ext4_journal: employ better ways to handle revoke blocks during checkpoint.
[lwext4.git] / lwext4 / ext4_journal.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * - Redistributions of source code must retain the above copyright
11  *   notice, this list of conditions and the following disclaimer.
12  * - Redistributions in binary form must reproduce the above copyright
13  *   notice, this list of conditions and the following disclaimer in the
14  *   documentation and/or other materials provided with the distribution.
15  * - The name of the author may not be used to endorse or promote products
16  *   derived from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 /** @addtogroup lwext4
31  * @{
32  */
33 /**
34  * @file  ext4_journal.c
35  * @brief Journal handle functions
36  */
37
38 #include "ext4_config.h"
39 #include "ext4_types.h"
40 #include "ext4_fs.h"
41 #include "ext4_super.h"
42 #include "ext4_errno.h"
43 #include "ext4_blockdev.h"
44 #include "ext4_crc32c.h"
45 #include "ext4_debug.h"
46 #include "tree.h"
47
48 #include <string.h>
49 #include <stdlib.h>
50
51 struct revoke_entry {
52         ext4_fsblk_t block;
53         uint32_t trans_id;
54         RB_ENTRY(revoke_entry) revoke_node;
55 };
56
57 struct recover_info {
58         uint32_t start_trans_id;
59         uint32_t last_trans_id;
60         uint32_t this_trans_id;
61         RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
62 };
63
64 struct replay_arg {
65         struct recover_info *info;
66         uint32_t *this_block;
67         uint32_t this_trans_id;
68 };
69
70 static int
71 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
72 {
73         if (a->block > b->block)
74                 return 1;
75         else if (a->block < b->block)
76                 return -1;
77         return 0;
78 }
79
80 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
81                      jbd_revoke_entry_cmp, static inline)
82
83 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
84 #define jbd_free_revoke_entry(addr) free(addr)
85
86 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
87                    ext4_lblk_t iblock,
88                    ext4_fsblk_t *fblock);
89
90 int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
91 {
92         int rc;
93         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
94         uint64_t offset;
95         ext4_fsblk_t fblock;
96         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
97         if (rc != EOK)
98                 return rc;
99
100         offset = fblock * ext4_sb_get_block_size(&fs->sb);
101         return ext4_block_writebytes(fs->bdev, offset, s,
102                                      EXT4_SUPERBLOCK_SIZE);
103 }
104
105 int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
106 {
107         int rc;
108         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
109         uint64_t offset;
110         ext4_fsblk_t fblock;
111         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
112         if (rc != EOK)
113                 return rc;
114
115         offset = fblock * ext4_sb_get_block_size(&fs->sb);
116         return ext4_block_readbytes(fs->bdev, offset, s,
117                                     EXT4_SUPERBLOCK_SIZE);
118 }
119
120 static bool jbd_verify_sb(struct jbd_sb *sb)
121 {
122         struct jbd_bhdr *header = &sb->header;
123         if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
124                 return false;
125
126         if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
127             jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
128                 return false;
129
130         return true;
131 }
132
133 static int jbd_write_sb(struct jbd_fs *jbd_fs)
134 {
135         int rc = EOK;
136         if (jbd_fs->dirty) {
137                 rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
138                 if (rc != EOK)
139                         return rc;
140
141                 jbd_fs->dirty = false;
142         }
143         return rc;
144 }
145
146 int jbd_get_fs(struct ext4_fs *fs,
147                struct jbd_fs *jbd_fs)
148 {
149         int rc;
150         uint32_t journal_ino;
151
152         memset(jbd_fs, 0, sizeof(struct jbd_fs));
153         journal_ino = ext4_get32(&fs->sb, journal_inode_number);
154
155         rc = ext4_fs_get_inode_ref(fs,
156                                    journal_ino,
157                                    &jbd_fs->inode_ref);
158         if (rc != EOK) {
159                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
160                 return rc;
161         }
162         rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
163         if (rc != EOK) {
164                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
165                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
166                 return rc;
167         }
168         if (!jbd_verify_sb(&jbd_fs->sb)) {
169                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
170                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
171                 rc = EIO;
172         }
173
174         return rc;
175 }
176
177 int jbd_put_fs(struct jbd_fs *jbd_fs)
178 {
179         int rc = EOK;
180         rc = jbd_write_sb(jbd_fs);
181
182         ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
183         return rc;
184 }
185
186 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
187                    ext4_lblk_t iblock,
188                    ext4_fsblk_t *fblock)
189 {
190         int rc = ext4_fs_get_inode_dblk_idx(
191                         &jbd_fs->inode_ref,
192                         iblock,
193                         fblock,
194                         false);
195         return rc;
196 }
197
198 int jbd_block_get(struct jbd_fs *jbd_fs,
199                   struct ext4_block *block,
200                   ext4_fsblk_t fblock)
201 {
202         /* TODO: journal device. */
203         int rc;
204         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
205         rc = jbd_inode_bmap(jbd_fs, iblock,
206                             &fblock);
207         if (rc != EOK)
208                 return rc;
209
210         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
211         rc = ext4_block_get(bdev, block, fblock);
212         return rc;
213 }
214
215 int jbd_block_get_noread(struct jbd_fs *jbd_fs,
216                          struct ext4_block *block,
217                          ext4_fsblk_t fblock)
218 {
219         /* TODO: journal device. */
220         int rc;
221         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
222         rc = jbd_inode_bmap(jbd_fs, iblock,
223                             &fblock);
224         if (rc != EOK)
225                 return rc;
226
227         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
228         rc = ext4_block_get_noread(bdev, block, fblock);
229         return rc;
230 }
231
232 int jbd_block_set(struct jbd_fs *jbd_fs,
233                   struct ext4_block *block)
234 {
235         return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
236                               block);
237 }
238
239 /*
240  * helper functions to deal with 32 or 64bit block numbers.
241  */
242 int jbd_tag_bytes(struct jbd_fs *jbd_fs)
243 {
244         int size;
245
246         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
247                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
248                 return sizeof(struct jbd_block_tag3);
249
250         size = sizeof(struct jbd_block_tag);
251
252         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
253                                      JBD_FEATURE_INCOMPAT_CSUM_V2))
254                 size += sizeof(uint16_t);
255
256         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
257                                      JBD_FEATURE_INCOMPAT_64BIT))
258                 return size;
259
260         return size - sizeof(uint32_t);
261 }
262
263 /**@brief: tag information. */
264 struct tag_info {
265         int tag_bytes;
266         ext4_fsblk_t block;
267         bool uuid_exist;
268         uint8_t uuid[UUID_SIZE];
269         bool last_tag;
270 };
271
272 static int
273 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
274                       void *__tag,
275                       int tag_bytes,
276                       int32_t remain_buf_size,
277                       struct tag_info *tag_info)
278 {
279         char *uuid_start;
280         tag_info->tag_bytes = tag_bytes;
281         tag_info->uuid_exist = false;
282         tag_info->last_tag = false;
283
284         if (remain_buf_size - tag_bytes < 0)
285                 return EINVAL;
286
287         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
288                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
289                 struct jbd_block_tag3 *tag = __tag;
290                 tag_info->block = jbd_get32(tag, blocknr);
291                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
292                                              JBD_FEATURE_INCOMPAT_64BIT))
293                          tag_info->block |=
294                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
295
296                 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
297                         tag_info->block = 0;
298
299                 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
300                         if (remain_buf_size - tag_bytes < UUID_SIZE)
301                                 return EINVAL;
302
303                         uuid_start = (char *)tag + tag_bytes;
304                         tag_info->uuid_exist = true;
305                         tag_info->tag_bytes += UUID_SIZE;
306                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
307                 }
308
309                 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
310                         tag_info->last_tag = true;
311
312         } else {
313                 struct jbd_block_tag *tag = __tag;
314                 tag_info->block = jbd_get32(tag, blocknr);
315                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
316                                              JBD_FEATURE_INCOMPAT_64BIT))
317                          tag_info->block |=
318                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
319
320                 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
321                         tag_info->block = 0;
322
323                 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
324                         if (remain_buf_size - tag_bytes < UUID_SIZE)
325                                 return EINVAL;
326
327                         uuid_start = (char *)tag + tag_bytes;
328                         tag_info->uuid_exist = true;
329                         tag_info->tag_bytes += UUID_SIZE;
330                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
331                 }
332
333                 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
334                         tag_info->last_tag = true;
335
336         }
337         return EOK;
338 }
339
340 static int
341 jbd_write_block_tag(struct jbd_fs *jbd_fs,
342                     void *__tag,
343                     int32_t remain_buf_size,
344                     struct tag_info *tag_info)
345 {
346         char *uuid_start;
347         int tag_bytes = jbd_tag_bytes(jbd_fs);
348
349         tag_info->tag_bytes = tag_bytes;
350
351         if (remain_buf_size - tag_bytes < 0)
352                 return EINVAL;
353
354         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
355                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
356                 struct jbd_block_tag3 *tag = __tag;
357                 jbd_set32(tag, blocknr, tag_info->block);
358                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
359                                              JBD_FEATURE_INCOMPAT_64BIT))
360                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
361
362                 if (tag_info->uuid_exist) {
363                         if (remain_buf_size - tag_bytes < UUID_SIZE)
364                                 return EINVAL;
365
366                         uuid_start = (char *)tag + tag_bytes;
367                         tag_info->tag_bytes += UUID_SIZE;
368                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
369                 } else
370                         jbd_set32(tag, flags,
371                                   jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
372
373                 if (tag_info->last_tag)
374                         jbd_set32(tag, flags,
375                                   jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
376
377         } else {
378                 struct jbd_block_tag *tag = __tag;
379                 jbd_set32(tag, blocknr, tag_info->block);
380                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
381                                              JBD_FEATURE_INCOMPAT_64BIT))
382                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
383
384                 if (tag_info->uuid_exist) {
385                         if (remain_buf_size - tag_bytes < UUID_SIZE)
386                                 return EINVAL;
387
388                         uuid_start = (char *)tag + tag_bytes;
389                         tag_info->tag_bytes += UUID_SIZE;
390                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
391                 } else
392                         jbd_set16(tag, flags,
393                                   jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
394
395                 if (tag_info->last_tag)
396                         jbd_set16(tag, flags,
397                                   jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
398
399         }
400         return EOK;
401 }
402
403 static void
404 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
405                         void *__tag_start,
406                         int32_t tag_tbl_size,
407                         void (*func)(struct jbd_fs * jbd_fs,
408                                         ext4_fsblk_t block,
409                                         uint8_t *uuid,
410                                         void *arg),
411                         void *arg)
412 {
413         char *tag_start, *tag_ptr;
414         int tag_bytes = jbd_tag_bytes(jbd_fs);
415         tag_start = __tag_start;
416         tag_ptr = tag_start;
417
418         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
419                                      JBD_FEATURE_INCOMPAT_CSUM_V2) ||
420             JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
421                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
422                 tag_tbl_size -= sizeof(struct jbd_block_tail);
423
424         while (tag_tbl_size) {
425                 struct tag_info tag_info;
426                 int rc = jbd_extract_block_tag(jbd_fs,
427                                       tag_ptr,
428                                       tag_bytes,
429                                       tag_tbl_size,
430                                       &tag_info);
431                 if (rc != EOK)
432                         break;
433
434                 if (func)
435                         func(jbd_fs, tag_info.block, tag_info.uuid, arg);
436
437                 if (tag_info.last_tag)
438                         break;
439
440                 tag_ptr += tag_info.tag_bytes;
441                 tag_tbl_size -= tag_info.tag_bytes;
442         }
443 }
444
445 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
446                                    ext4_fsblk_t block,
447                                    uint8_t *uuid,
448                                    void *arg)
449 {
450         uint32_t *iblock = arg;
451         ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
452         (*iblock)++;
453         (void)jbd_fs;
454         (void)uuid;
455         return;
456 }
457
458 static struct revoke_entry *
459 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
460 {
461         struct revoke_entry tmp = {
462                 .block = block
463         };
464
465         return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
466 }
467
468 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
469                                   ext4_fsblk_t block,
470                                   uint8_t *uuid __unused,
471                                   void *__arg)
472 {
473         int r;
474         struct replay_arg *arg = __arg;
475         struct recover_info *info = arg->info;
476         uint32_t *this_block = arg->this_block;
477         struct revoke_entry *revoke_entry;
478         struct ext4_block journal_block, ext4_block;
479         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
480
481         (*this_block)++;
482
483         revoke_entry = jbd_revoke_entry_lookup(info, block);
484         if (revoke_entry &&
485             arg->this_trans_id < revoke_entry->trans_id)
486                 return;
487
488         ext4_dbg(DEBUG_JBD,
489                  "Replaying block in block_tag: %" PRIu64 "\n",
490                  block);
491
492         r = jbd_block_get(jbd_fs, &journal_block, *this_block);
493         if (r != EOK)
494                 return;
495
496         if (block) {
497                 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
498                 if (r != EOK) {
499                         jbd_block_set(jbd_fs, &journal_block);
500                         return;
501                 }
502
503                 memcpy(ext4_block.data,
504                         journal_block.data,
505                         jbd_get32(&jbd_fs->sb, blocksize));
506
507                 ext4_bcache_set_dirty(ext4_block.buf);
508                 ext4_block_set(fs->bdev, &ext4_block);
509         } else {
510                 uint16_t mount_count, state;
511                 mount_count = ext4_get16(&fs->sb, mount_count);
512                 state = ext4_get16(&fs->sb, state);
513
514                 memcpy(&fs->sb,
515                         journal_block.data + EXT4_SUPERBLOCK_OFFSET,
516                         EXT4_SUPERBLOCK_SIZE);
517
518                 /* Mark system as mounted */
519                 ext4_set16(&fs->sb, state, state);
520                 r = ext4_sb_write(fs->bdev, &fs->sb);
521                 if (r != EOK)
522                         return;
523
524                 /*Update mount count*/
525                 ext4_set16(&fs->sb, mount_count, mount_count);
526         }
527
528         jbd_block_set(jbd_fs, &journal_block);
529         
530         return;
531 }
532
533 static void jbd_add_revoke_block_tags(struct recover_info *info,
534                                       ext4_fsblk_t block)
535 {
536         struct revoke_entry *revoke_entry;
537
538         ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
539         revoke_entry = jbd_revoke_entry_lookup(info, block);
540         if (revoke_entry) {
541                 revoke_entry->trans_id = info->this_trans_id;
542                 return;
543         }
544
545         revoke_entry = jbd_alloc_revoke_entry();
546         ext4_assert(revoke_entry);
547         revoke_entry->block = block;
548         revoke_entry->trans_id = info->this_trans_id;
549         RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
550
551         return;
552 }
553
554 static void jbd_destroy_revoke_tree(struct recover_info *info)
555 {
556         while (!RB_EMPTY(&info->revoke_root)) {
557                 struct revoke_entry *revoke_entry =
558                         RB_MIN(jbd_revoke, &info->revoke_root);
559                 ext4_assert(revoke_entry);
560                 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
561                 jbd_free_revoke_entry(revoke_entry);
562         }
563 }
564
565 /* Make sure we wrap around the log correctly! */
566 #define wrap(sb, var)                                           \
567 do {                                                                    \
568         if (var >= jbd_get32((sb), maxlen))                                     \
569                 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first));      \
570 } while (0)
571
572 #define ACTION_SCAN 0
573 #define ACTION_REVOKE 1
574 #define ACTION_RECOVER 2
575
576
577 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
578                                   struct jbd_bhdr *header,
579                                   struct recover_info *info)
580 {
581         char *blocks_entry;
582         struct jbd_revoke_header *revoke_hdr =
583                 (struct jbd_revoke_header *)header;
584         uint32_t i, nr_entries, record_len = 4;
585         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
586                                      JBD_FEATURE_INCOMPAT_64BIT))
587                 record_len = 8;
588
589         nr_entries = (jbd_get32(revoke_hdr, count) -
590                         sizeof(struct jbd_revoke_header)) /
591                         record_len;
592
593         blocks_entry = (char *)(revoke_hdr + 1);
594
595         for (i = 0;i < nr_entries;i++) {
596                 if (record_len == 8) {
597                         uint64_t *blocks =
598                                 (uint64_t *)blocks_entry;
599                         jbd_add_revoke_block_tags(info, to_be64(*blocks));
600                 } else {
601                         uint32_t *blocks =
602                                 (uint32_t *)blocks_entry;
603                         jbd_add_revoke_block_tags(info, to_be32(*blocks));
604                 }
605                 blocks_entry += record_len;
606         }
607 }
608
609 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
610                                        struct jbd_bhdr *header,
611                                        uint32_t *iblock)
612 {
613         jbd_iterate_block_table(jbd_fs,
614                                 header + 1,
615                                 jbd_get32(&jbd_fs->sb, blocksize) -
616                                         sizeof(struct jbd_bhdr),
617                                 jbd_display_block_tags,
618                                 iblock);
619 }
620
621 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
622                                         struct jbd_bhdr *header,
623                                         struct replay_arg *arg)
624 {
625         jbd_iterate_block_table(jbd_fs,
626                                 header + 1,
627                                 jbd_get32(&jbd_fs->sb, blocksize) -
628                                         sizeof(struct jbd_bhdr),
629                                 jbd_replay_block_tags,
630                                 arg);
631 }
632
633 int jbd_iterate_log(struct jbd_fs *jbd_fs,
634                     struct recover_info *info,
635                     int action)
636 {
637         int r = EOK;
638         bool log_end = false;
639         struct jbd_sb *sb = &jbd_fs->sb;
640         uint32_t start_trans_id, this_trans_id;
641         uint32_t start_block, this_block;
642
643         start_trans_id = this_trans_id = jbd_get32(sb, sequence);
644         start_block = this_block = jbd_get32(sb, start);
645
646         ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
647                             start_trans_id);
648
649         while (!log_end) {
650                 struct ext4_block block;
651                 struct jbd_bhdr *header;
652                 if (action != ACTION_SCAN)
653                         if (this_trans_id > info->last_trans_id) {
654                                 log_end = true;
655                                 continue;
656                         }
657
658                 r = jbd_block_get(jbd_fs, &block, this_block);
659                 if (r != EOK)
660                         break;
661
662                 header = (struct jbd_bhdr *)block.data;
663                 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
664                         jbd_block_set(jbd_fs, &block);
665                         log_end = true;
666                         continue;
667                 }
668
669                 if (jbd_get32(header, sequence) != this_trans_id) {
670                         if (action != ACTION_SCAN)
671                                 r = EIO;
672
673                         jbd_block_set(jbd_fs, &block);
674                         log_end = true;
675                         continue;
676                 }
677
678                 switch (jbd_get32(header, blocktype)) {
679                 case JBD_DESCRIPTOR_BLOCK:
680                         ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
681                                             "trans_id: %" PRIu32"\n",
682                                             this_block, this_trans_id);
683                         if (action == ACTION_RECOVER) {
684                                 struct replay_arg replay_arg;
685                                 replay_arg.info = info;
686                                 replay_arg.this_block = &this_block;
687                                 replay_arg.this_trans_id = this_trans_id;
688
689                                 jbd_replay_descriptor_block(jbd_fs,
690                                                 header, &replay_arg);
691                         } else
692                                 jbd_debug_descriptor_block(jbd_fs,
693                                                 header, &this_block);
694
695                         break;
696                 case JBD_COMMIT_BLOCK:
697                         ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
698                                             "trans_id: %" PRIu32"\n",
699                                             this_block, this_trans_id);
700                         this_trans_id++;
701                         break;
702                 case JBD_REVOKE_BLOCK:
703                         ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
704                                             "trans_id: %" PRIu32"\n",
705                                             this_block, this_trans_id);
706                         if (action == ACTION_REVOKE) {
707                                 info->this_trans_id = this_trans_id;
708                                 jbd_build_revoke_tree(jbd_fs,
709                                                 header, info);
710                         }
711                         break;
712                 default:
713                         log_end = true;
714                         break;
715                 }
716                 jbd_block_set(jbd_fs, &block);
717                 this_block++;
718                 wrap(sb, this_block);
719                 if (this_block == start_block)
720                         log_end = true;
721
722         }
723         ext4_dbg(DEBUG_JBD, "End of journal.\n");
724         if (r == EOK && action == ACTION_SCAN) {
725                 info->start_trans_id = start_trans_id;
726                 if (this_trans_id > start_trans_id)
727                         info->last_trans_id = this_trans_id - 1;
728                 else
729                         info->last_trans_id = this_trans_id;
730         }
731
732         return r;
733 }
734
735 int jbd_recover(struct jbd_fs *jbd_fs)
736 {
737         int r;
738         struct recover_info info;
739         struct jbd_sb *sb = &jbd_fs->sb;
740         if (!sb->start)
741                 return EOK;
742
743         RB_INIT(&info.revoke_root);
744
745         r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
746         if (r != EOK)
747                 return r;
748
749         r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
750         if (r != EOK)
751                 return r;
752
753         r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
754         if (r == EOK) {
755                 uint32_t features_incompatible =
756                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
757                                    features_incompatible);
758                 jbd_set32(&jbd_fs->sb, start, 0);
759                 features_incompatible &= ~EXT4_FINCOM_RECOVER;
760                 ext4_set32(&jbd_fs->inode_ref.fs->sb,
761                            features_incompatible,
762                            features_incompatible);
763                 jbd_fs->dirty = true;
764         }
765         jbd_destroy_revoke_tree(&info);
766         return r;
767 }
768
769 void jbd_journal_write_sb(struct jbd_journal *journal)
770 {
771         struct jbd_fs *jbd_fs = journal->jbd_fs;
772         jbd_set32(&jbd_fs->sb, start, journal->start);
773         jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
774         jbd_fs->dirty = true;
775 }
776
777 int jbd_journal_start(struct jbd_fs *jbd_fs,
778                       struct jbd_journal *journal)
779 {
780         journal->first = jbd_get32(&jbd_fs->sb, first);
781         journal->start = journal->first;
782         journal->last = journal->first;
783         journal->trans_id = 1;
784         journal->alloc_trans_id = 1;
785
786         journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
787
788         TAILQ_INIT(&journal->trans_queue);
789         TAILQ_INIT(&journal->cp_queue);
790         journal->jbd_fs = jbd_fs;
791         jbd_journal_write_sb(journal);
792         return jbd_write_sb(jbd_fs);
793 }
794
795 int jbd_journal_stop(struct jbd_journal *journal)
796 {
797         journal->start = 0;
798         journal->trans_id = 0;
799         jbd_journal_write_sb(journal);
800         return jbd_write_sb(journal->jbd_fs);
801 }
802
803 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
804                                         struct jbd_trans *trans)
805 {
806         uint32_t start_block = journal->last++;
807         trans->alloc_blocks++;
808         wrap(&journal->jbd_fs->sb, journal->last);
809         return start_block;
810 }
811
812 struct jbd_trans *
813 jbd_journal_new_trans(struct jbd_journal *journal)
814 {
815         struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
816         if (!trans)
817                 return NULL;
818
819         /* We will assign a trans_id to this transaction,
820          * once it has been committed.*/
821         trans->journal = journal;
822         trans->error = EOK;
823         return trans;
824 }
825
826 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
827                           struct ext4_buf *buf __unused,
828                           int res,
829                           void *arg);
830
831 int jbd_trans_add_block(struct jbd_trans *trans,
832                         struct ext4_block *block)
833 {
834         struct jbd_buf *buf;
835         /* We do not need to add those unmodified buffer to
836          * a transaction. */
837         if (!ext4_bcache_test_flag(block->buf, BC_DIRTY))
838                 return EOK;
839
840         buf = calloc(1, sizeof(struct jbd_buf));
841         if (!buf)
842                 return ENOMEM;
843
844         buf->trans = trans;
845         buf->block = *block;
846         ext4_bcache_inc_ref(block->buf);
847
848         block->buf->end_write = jbd_trans_end_write;
849         block->buf->end_write_arg = trans;
850
851         trans->data_cnt++;
852         LIST_INSERT_HEAD(&trans->buf_list, buf, buf_node);
853         return EOK;
854 }
855
856 int jbd_trans_revoke_block(struct jbd_trans *trans,
857                            ext4_fsblk_t lba)
858 {
859         struct jbd_revoke_rec *rec =
860                 calloc(1, sizeof(struct jbd_revoke_rec));
861         if (!rec)
862                 return ENOMEM;
863
864         rec->lba = lba;
865         LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
866         return EOK;
867 }
868
869 void jbd_journal_free_trans(struct jbd_journal *journal,
870                             struct jbd_trans *trans,
871                             bool abort)
872 {
873         struct jbd_buf *jbd_buf, *tmp;
874         struct jbd_revoke_rec *rec, *tmp2;
875         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
876         LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
877                           tmp) {
878                 if (abort)
879                         ext4_block_set(fs->bdev, &jbd_buf->block);
880
881                 LIST_REMOVE(jbd_buf, buf_node);
882                 free(jbd_buf);
883         }
884         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
885                           tmp2) {
886                 LIST_REMOVE(rec, revoke_node);
887                 free(rec);
888         }
889
890         free(trans);
891 }
892
893 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
894 {
895         int rc;
896         struct jbd_commit_header *header;
897         uint32_t commit_iblock = 0;
898         struct ext4_block commit_block;
899         struct jbd_journal *journal = trans->journal;
900
901         commit_iblock = jbd_journal_alloc_block(journal, trans);
902         rc = jbd_block_get_noread(journal->jbd_fs,
903                         &commit_block, commit_iblock);
904         if (rc != EOK)
905                 return rc;
906
907         header = (struct jbd_commit_header *)commit_block.data;
908         jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
909         jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
910         jbd_set32(&header->header, sequence, trans->trans_id);
911
912         ext4_bcache_set_dirty(commit_block.buf);
913         rc = jbd_block_set(journal->jbd_fs, &commit_block);
914         if (rc != EOK)
915                 return rc;
916
917         return EOK;
918 }
919
920 static int jbd_journal_prepare(struct jbd_journal *journal,
921                                struct jbd_trans *trans)
922 {
923         int rc = EOK, i = 0;
924         int32_t tag_tbl_size;
925         uint32_t desc_iblock = 0;
926         uint32_t data_iblock = 0;
927         char *tag_start = NULL, *tag_ptr = NULL;
928         struct jbd_buf *jbd_buf;
929         struct ext4_block desc_block, data_block;
930
931         LIST_FOREACH(jbd_buf, &trans->buf_list, buf_node) {
932                 struct tag_info tag_info;
933                 bool uuid_exist = false;
934 again:
935                 if (!desc_iblock) {
936                         struct jbd_bhdr *bhdr;
937                         desc_iblock = jbd_journal_alloc_block(journal, trans);
938                         rc = jbd_block_get_noread(journal->jbd_fs,
939                                            &desc_block, desc_iblock);
940                         if (rc != EOK)
941                                 break;
942
943                         ext4_bcache_set_dirty(desc_block.buf);
944
945                         bhdr = (struct jbd_bhdr *)desc_block.data;
946                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
947                         jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
948                         jbd_set32(bhdr, sequence, trans->trans_id);
949
950                         tag_start = (char *)(bhdr + 1);
951                         tag_ptr = tag_start;
952                         uuid_exist = true;
953                         tag_tbl_size = journal->block_size -
954                                 sizeof(struct jbd_bhdr);
955
956                         if (!trans->start_iblock)
957                                 trans->start_iblock = desc_iblock;
958
959                 }
960                 tag_info.block = jbd_buf->block.lb_id;
961                 tag_info.uuid_exist = uuid_exist;
962                 if (i == trans->data_cnt - 1)
963                         tag_info.last_tag = true;
964
965                 if (uuid_exist)
966                         memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
967                                         UUID_SIZE);
968
969                 rc = jbd_write_block_tag(journal->jbd_fs,
970                                 tag_ptr,
971                                 tag_tbl_size,
972                                 &tag_info);
973                 if (rc != EOK) {
974                         jbd_block_set(journal->jbd_fs, &desc_block);
975                         desc_iblock = 0;
976                         goto again;
977                 }
978
979                 data_iblock = jbd_journal_alloc_block(journal, trans);
980                 rc = jbd_block_get_noread(journal->jbd_fs,
981                                 &data_block, data_iblock);
982                 if (rc != EOK)
983                         break;
984
985                 ext4_bcache_set_dirty(data_block.buf);
986
987                 memcpy(data_block.data, jbd_buf->block.data,
988                         journal->block_size);
989
990                 rc = jbd_block_set(journal->jbd_fs, &data_block);
991                 if (rc != EOK)
992                         break;
993
994                 tag_ptr += tag_info.tag_bytes;
995                 tag_tbl_size -= tag_info.tag_bytes;
996
997                 i++;
998         }
999         if (rc == EOK && desc_iblock)
1000                 jbd_block_set(journal->jbd_fs, &desc_block);
1001
1002         return rc;
1003 }
1004
1005 static int
1006 jbd_journal_prepare_revoke(struct jbd_journal *journal,
1007                            struct jbd_trans *trans)
1008 {
1009         int rc = EOK, i = 0;
1010         int32_t tag_tbl_size;
1011         uint32_t desc_iblock = 0;
1012         char *blocks_entry = NULL;
1013         struct jbd_revoke_rec *rec, *tmp;
1014         struct ext4_block desc_block;
1015         struct jbd_revoke_header *header = NULL;
1016         int32_t record_len = 4;
1017
1018         if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1019                                      JBD_FEATURE_INCOMPAT_64BIT))
1020                 record_len = 8;
1021
1022         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1023                           tmp) {
1024 again:
1025                 if (!desc_iblock) {
1026                         struct jbd_bhdr *bhdr;
1027                         desc_iblock = jbd_journal_alloc_block(journal, trans);
1028                         rc = jbd_block_get_noread(journal->jbd_fs,
1029                                            &desc_block, desc_iblock);
1030                         if (rc != EOK) {
1031                                 break;
1032                         }
1033
1034                         ext4_bcache_set_dirty(desc_block.buf);
1035
1036                         bhdr = (struct jbd_bhdr *)desc_block.data;
1037                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1038                         jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
1039                         jbd_set32(bhdr, sequence, trans->trans_id);
1040                         
1041                         header = (struct jbd_revoke_header *)bhdr;
1042                         blocks_entry = (char *)(header + 1);
1043                         tag_tbl_size = journal->block_size -
1044                                 sizeof(struct jbd_revoke_header);
1045
1046                         if (!trans->start_iblock)
1047                                 trans->start_iblock = desc_iblock;
1048
1049                 }
1050
1051                 if (tag_tbl_size < record_len) {
1052                         jbd_set32(header, count,
1053                                   journal->block_size - tag_tbl_size);
1054                         jbd_block_set(journal->jbd_fs, &desc_block);
1055                         desc_iblock = 0;
1056                         header = NULL;
1057                         goto again;
1058                 }
1059                 if (record_len == 8) {
1060                         uint64_t *blocks =
1061                                 (uint64_t *)blocks_entry;
1062                         *blocks = to_be64(rec->lba);
1063                 } else {
1064                         uint32_t *blocks =
1065                                 (uint32_t *)blocks_entry;
1066                         *blocks = to_be32(rec->lba);
1067                 }
1068                 blocks_entry += record_len;
1069                 tag_tbl_size -= record_len;
1070
1071                 i++;
1072         }
1073         if (rc == EOK && desc_iblock) {
1074                 if (header != NULL)
1075                         jbd_set32(header, count,
1076                                   journal->block_size - tag_tbl_size);
1077
1078                 jbd_block_set(journal->jbd_fs, &desc_block);
1079         }
1080
1081         return rc;
1082 }
1083
1084 void
1085 jbd_journal_submit_trans(struct jbd_journal *journal,
1086                          struct jbd_trans *trans)
1087 {
1088         TAILQ_INSERT_TAIL(&journal->trans_queue,
1089                           trans,
1090                           trans_node);
1091 }
1092
1093 void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
1094 {
1095         struct jbd_buf *jbd_buf, *tmp;
1096         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1097         LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
1098                         tmp) {
1099                 ext4_block_set(fs->bdev, &jbd_buf->block);
1100         }
1101 }
1102
1103 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1104                           struct ext4_buf *buf __unused,
1105                           int res,
1106                           void *arg)
1107 {
1108         struct jbd_trans *trans = arg;
1109         struct jbd_journal *journal = trans->journal;
1110         if (res != EOK)
1111                 trans->error = res;
1112
1113         trans->written_cnt++;
1114         if (trans->written_cnt == trans->data_cnt) {
1115 again:
1116                 TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
1117                 journal->start = trans->start_iblock +
1118                                  trans->alloc_blocks;
1119                 journal->trans_id = trans->trans_id + 1;
1120                 jbd_journal_write_sb(journal);
1121                 jbd_write_sb(journal->jbd_fs);
1122                 jbd_journal_free_trans(journal, trans, false);
1123
1124                 if ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1125                         if (trans->data_cnt) {
1126                                 jbd_journal_cp_trans(journal, trans);
1127                                 return;
1128                         }
1129                         goto again;
1130                 }
1131         }
1132 }
1133
1134 /*
1135  * XXX: one should disable cache writeback first.
1136  */
1137 void jbd_journal_commit_one(struct jbd_journal *journal)
1138 {
1139         int rc = EOK;
1140         uint32_t last = journal->last;
1141         struct jbd_trans *trans;
1142         if ((trans = TAILQ_FIRST(&journal->trans_queue))) {
1143                 TAILQ_REMOVE(&journal->trans_queue, trans, trans_node);
1144
1145                 trans->trans_id = journal->alloc_trans_id;
1146                 rc = jbd_journal_prepare(journal, trans);
1147                 if (rc != EOK)
1148                         goto Finish;
1149
1150                 rc = jbd_journal_prepare_revoke(journal, trans);
1151                 if (rc != EOK)
1152                         goto Finish;
1153
1154                 rc = jbd_trans_write_commit_block(trans);
1155                 if (rc != EOK)
1156                         goto Finish;
1157
1158                 journal->alloc_trans_id++;
1159                 if (TAILQ_EMPTY(&journal->cp_queue)) {
1160                         if (trans->data_cnt) {
1161                                 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
1162                                                 trans_node);
1163                                 jbd_journal_cp_trans(journal, trans);
1164                         } else {
1165                                 journal->start = trans->start_iblock +
1166                                         trans->alloc_blocks;
1167                                 journal->trans_id = trans->trans_id + 1;
1168                                 jbd_journal_write_sb(journal);
1169                                 jbd_write_sb(journal->jbd_fs);
1170                                 jbd_journal_free_trans(journal, trans, false);
1171                         }
1172                 } else
1173                         TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
1174                                         trans_node);
1175         }
1176 Finish:
1177         if (rc != EOK) {
1178                 journal->last = last;
1179                 jbd_journal_free_trans(journal, trans, true);
1180         }
1181 }
1182
1183 /**
1184  * @}
1185  */