ext4_journal: journal write part 2.
[lwext4.git] / lwext4 / ext4_journal.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * - Redistributions of source code must retain the above copyright
11  *   notice, this list of conditions and the following disclaimer.
12  * - Redistributions in binary form must reproduce the above copyright
13  *   notice, this list of conditions and the following disclaimer in the
14  *   documentation and/or other materials provided with the distribution.
15  * - The name of the author may not be used to endorse or promote products
16  *   derived from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 /** @addtogroup lwext4
31  * @{
32  */
33 /**
34  * @file  ext4_journal.c
35  * @brief Journal handle functions
36  */
37
38 #include "ext4_config.h"
39 #include "ext4_types.h"
40 #include "ext4_fs.h"
41 #include "ext4_super.h"
42 #include "ext4_errno.h"
43 #include "ext4_blockdev.h"
44 #include "ext4_crc32c.h"
45 #include "ext4_debug.h"
46 #include "tree.h"
47
48 #include <string.h>
49 #include <stdlib.h>
50
51 struct revoke_entry {
52         ext4_fsblk_t block;
53         uint32_t trans_id;
54         RB_ENTRY(revoke_entry) revoke_node;
55 };
56
57 struct recover_info {
58         uint32_t start_trans_id;
59         uint32_t last_trans_id;
60         uint32_t this_trans_id;
61         RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
62 };
63
64 struct replay_arg {
65         struct recover_info *info;
66         uint32_t *this_block;
67         uint32_t this_trans_id;
68 };
69
70 static int
71 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
72 {
73         if (a->block > b->block)
74                 return 1;
75         else if (a->block < b->block)
76                 return -1;
77         return 0;
78 }
79
80 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
81                      jbd_revoke_entry_cmp, static inline)
82
83 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
84 #define jbd_free_revoke_entry(addr) free(addr)
85
86 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
87                    ext4_lblk_t iblock,
88                    ext4_fsblk_t *fblock);
89
90 int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
91 {
92         int rc;
93         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
94         uint64_t offset;
95         ext4_fsblk_t fblock;
96         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
97         if (rc != EOK)
98                 return rc;
99
100         offset = fblock * ext4_sb_get_block_size(&fs->sb);
101         return ext4_block_writebytes(fs->bdev, offset, s,
102                                      EXT4_SUPERBLOCK_SIZE);
103 }
104
105 int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
106 {
107         int rc;
108         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
109         uint64_t offset;
110         ext4_fsblk_t fblock;
111         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
112         if (rc != EOK)
113                 return rc;
114
115         offset = fblock * ext4_sb_get_block_size(&fs->sb);
116         return ext4_block_readbytes(fs->bdev, offset, s,
117                                     EXT4_SUPERBLOCK_SIZE);
118 }
119
120 static bool jbd_verify_sb(struct jbd_sb *sb)
121 {
122         struct jbd_bhdr *header = &sb->header;
123         if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
124                 return false;
125
126         if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
127             jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
128                 return false;
129
130         return true;
131 }
132
133 static int jbd_write_sb(struct jbd_fs *jbd_fs)
134 {
135         int rc = EOK;
136         if (jbd_fs->dirty) {
137                 rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
138                 if (rc != EOK)
139                         return rc;
140
141                 jbd_fs->dirty = false;
142         }
143         return rc;
144 }
145
146 int jbd_get_fs(struct ext4_fs *fs,
147                struct jbd_fs *jbd_fs)
148 {
149         int rc;
150         uint32_t journal_ino;
151
152         memset(jbd_fs, 0, sizeof(struct jbd_fs));
153         journal_ino = ext4_get32(&fs->sb, journal_inode_number);
154
155         rc = ext4_fs_get_inode_ref(fs,
156                                    journal_ino,
157                                    &jbd_fs->inode_ref);
158         if (rc != EOK) {
159                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
160                 return rc;
161         }
162         rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
163         if (rc != EOK) {
164                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
165                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
166                 return rc;
167         }
168         if (!jbd_verify_sb(&jbd_fs->sb)) {
169                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
170                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
171                 rc = EIO;
172         }
173
174         return rc;
175 }
176
177 int jbd_put_fs(struct jbd_fs *jbd_fs)
178 {
179         int rc = EOK;
180         rc = jbd_write_sb(jbd_fs);
181
182         ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
183         return rc;
184 }
185
186 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
187                    ext4_lblk_t iblock,
188                    ext4_fsblk_t *fblock)
189 {
190         int rc = ext4_fs_get_inode_dblk_idx(
191                         &jbd_fs->inode_ref,
192                         iblock,
193                         fblock,
194                         false);
195         return rc;
196 }
197
198 int jbd_block_get(struct jbd_fs *jbd_fs,
199                   struct ext4_block *block,
200                   ext4_fsblk_t fblock)
201 {
202         /* TODO: journal device. */
203         int rc;
204         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
205         rc = jbd_inode_bmap(jbd_fs, iblock,
206                             &fblock);
207         if (rc != EOK)
208                 return rc;
209
210         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
211         rc = ext4_block_get(bdev, block, fblock);
212         return rc;
213 }
214
215 int jbd_block_get_noread(struct jbd_fs *jbd_fs,
216                          struct ext4_block *block,
217                          ext4_fsblk_t fblock)
218 {
219         /* TODO: journal device. */
220         int rc;
221         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
222         rc = jbd_inode_bmap(jbd_fs, iblock,
223                             &fblock);
224         if (rc != EOK)
225                 return rc;
226
227         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
228         rc = ext4_block_get_noread(bdev, block, fblock);
229         return rc;
230 }
231
232 int jbd_block_set(struct jbd_fs *jbd_fs,
233                   struct ext4_block *block)
234 {
235         return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
236                               block);
237 }
238
239 /*
240  * helper functions to deal with 32 or 64bit block numbers.
241  */
242 int jbd_tag_bytes(struct jbd_fs *jbd_fs)
243 {
244         int size;
245
246         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
247                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
248                 return sizeof(struct jbd_block_tag3);
249
250         size = sizeof(struct jbd_block_tag);
251
252         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
253                                      JBD_FEATURE_INCOMPAT_CSUM_V2))
254                 size += sizeof(uint16_t);
255
256         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
257                                      JBD_FEATURE_INCOMPAT_64BIT))
258                 return size;
259
260         return size - sizeof(uint32_t);
261 }
262
263 /**@brief: tag information. */
264 struct tag_info {
265         int tag_bytes;
266         ext4_fsblk_t block;
267         bool uuid_exist;
268         uint8_t uuid[UUID_SIZE];
269         bool last_tag;
270 };
271
272 static int
273 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
274                       void *__tag,
275                       int tag_bytes,
276                       int32_t remain_buf_size,
277                       struct tag_info *tag_info)
278 {
279         char *uuid_start;
280         tag_info->tag_bytes = tag_bytes;
281         tag_info->uuid_exist = false;
282         tag_info->last_tag = false;
283
284         if (remain_buf_size - tag_bytes < 0)
285                 return EINVAL;
286
287         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
288                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
289                 struct jbd_block_tag3 *tag = __tag;
290                 tag_info->block = jbd_get32(tag, blocknr);
291                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
292                                              JBD_FEATURE_INCOMPAT_64BIT))
293                          tag_info->block |=
294                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
295
296                 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
297                         tag_info->block = 0;
298
299                 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
300                         if (remain_buf_size - tag_bytes < UUID_SIZE)
301                                 return EINVAL;
302
303                         uuid_start = (char *)tag + tag_bytes;
304                         tag_info->uuid_exist = true;
305                         tag_info->tag_bytes += UUID_SIZE;
306                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
307                 }
308
309                 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
310                         tag_info->last_tag = true;
311
312         } else {
313                 struct jbd_block_tag *tag = __tag;
314                 tag_info->block = jbd_get32(tag, blocknr);
315                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
316                                              JBD_FEATURE_INCOMPAT_64BIT))
317                          tag_info->block |=
318                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
319
320                 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
321                         tag_info->block = 0;
322
323                 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
324                         if (remain_buf_size - tag_bytes < UUID_SIZE)
325                                 return EINVAL;
326
327                         uuid_start = (char *)tag + tag_bytes;
328                         tag_info->uuid_exist = true;
329                         tag_info->tag_bytes += UUID_SIZE;
330                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
331                 }
332
333                 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
334                         tag_info->last_tag = true;
335
336         }
337         return EOK;
338 }
339
340 static int
341 jbd_write_block_tag(struct jbd_fs *jbd_fs,
342                     void *__tag,
343                     int32_t remain_buf_size,
344                     struct tag_info *tag_info)
345 {
346         char *uuid_start;
347         int tag_bytes = jbd_tag_bytes(jbd_fs);
348
349         tag_info->tag_bytes = tag_bytes;
350
351         if (remain_buf_size - tag_bytes < 0)
352                 return EINVAL;
353
354         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
355                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
356                 struct jbd_block_tag3 *tag = __tag;
357                 jbd_set32(tag, blocknr, tag_info->block);
358                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
359                                              JBD_FEATURE_INCOMPAT_64BIT))
360                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
361
362                 if (!tag_info->uuid_exist) {
363                         if (remain_buf_size - tag_bytes < UUID_SIZE)
364                                 return EINVAL;
365
366                         uuid_start = (char *)tag + tag_bytes;
367                         tag_info->tag_bytes += UUID_SIZE;
368                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
369                         jbd_set32(tag, flags,
370                                   jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
371                 }
372
373                 if (tag_info->last_tag)
374                         jbd_set32(tag, flags,
375                                   jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
376
377         } else {
378                 struct jbd_block_tag *tag = __tag;
379                 jbd_set32(tag, blocknr, tag_info->block);
380                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
381                                              JBD_FEATURE_INCOMPAT_64BIT))
382                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
383
384                 if (!tag_info->uuid_exist) {
385                         if (remain_buf_size - tag_bytes < UUID_SIZE)
386                                 return EINVAL;
387
388                         uuid_start = (char *)tag + tag_bytes;
389                         tag_info->tag_bytes += UUID_SIZE;
390                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
391                         jbd_set16(tag, flags,
392                                   jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
393                 }
394
395                 if (tag_info->last_tag)
396                         jbd_set16(tag, flags,
397                                   jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
398
399         }
400         return EOK;
401 }
402
403 static void
404 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
405                         void *__tag_start,
406                         int32_t tag_tbl_size,
407                         void (*func)(struct jbd_fs * jbd_fs,
408                                         ext4_fsblk_t block,
409                                         uint8_t *uuid,
410                                         void *arg),
411                         void *arg)
412 {
413         char *tag_start, *tag_ptr;
414         int tag_bytes = jbd_tag_bytes(jbd_fs);
415         tag_start = __tag_start;
416         tag_ptr = tag_start;
417
418         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
419                                      JBD_FEATURE_INCOMPAT_CSUM_V2) ||
420             JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
421                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
422                 tag_tbl_size -= sizeof(struct jbd_block_tail);
423
424         while (tag_tbl_size) {
425                 struct tag_info tag_info;
426                 int rc = jbd_extract_block_tag(jbd_fs,
427                                       tag_ptr,
428                                       tag_bytes,
429                                       tag_tbl_size,
430                                       &tag_info);
431                 if (rc != EOK)
432                         break;
433
434                 if (func)
435                         func(jbd_fs, tag_info.block, tag_info.uuid, arg);
436
437                 if (tag_info.last_tag)
438                         break;
439
440                 tag_ptr += tag_info.tag_bytes;
441                 tag_tbl_size -= tag_info.tag_bytes;
442         }
443 }
444
445 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
446                                    ext4_fsblk_t block,
447                                    uint8_t *uuid,
448                                    void *arg)
449 {
450         uint32_t *iblock = arg;
451         ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
452         (*iblock)++;
453         (void)jbd_fs;
454         (void)uuid;
455         return;
456 }
457
458 static struct revoke_entry *
459 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
460 {
461         struct revoke_entry tmp = {
462                 .block = block
463         };
464
465         return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
466 }
467
468 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
469                                   ext4_fsblk_t block,
470                                   uint8_t *uuid __unused,
471                                   void *__arg)
472 {
473         int r;
474         struct replay_arg *arg = __arg;
475         struct recover_info *info = arg->info;
476         uint32_t *this_block = arg->this_block;
477         struct revoke_entry *revoke_entry;
478         struct ext4_block journal_block, ext4_block;
479         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
480
481         (*this_block)++;
482
483         revoke_entry = jbd_revoke_entry_lookup(info, block);
484         if (revoke_entry &&
485             arg->this_trans_id < revoke_entry->trans_id)
486                 return;
487
488         ext4_dbg(DEBUG_JBD,
489                  "Replaying block in block_tag: %" PRIu64 "\n",
490                  block);
491
492         r = jbd_block_get(jbd_fs, &journal_block, *this_block);
493         if (r != EOK)
494                 return;
495
496         if (block) {
497                 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
498                 if (r != EOK) {
499                         jbd_block_set(jbd_fs, &journal_block);
500                         return;
501                 }
502
503                 memcpy(ext4_block.data,
504                         journal_block.data,
505                         jbd_get32(&jbd_fs->sb, blocksize));
506
507                 ext4_bcache_set_dirty(ext4_block.buf);
508                 ext4_block_set(fs->bdev, &ext4_block);
509         } else {
510                 uint16_t mount_count, state;
511                 mount_count = ext4_get16(&fs->sb, mount_count);
512                 state = ext4_get16(&fs->sb, state);
513
514                 memcpy(&fs->sb,
515                         journal_block.data + EXT4_SUPERBLOCK_OFFSET,
516                         EXT4_SUPERBLOCK_SIZE);
517
518                 /* Mark system as mounted */
519                 ext4_set16(&fs->sb, state, state);
520                 r = ext4_sb_write(fs->bdev, &fs->sb);
521                 if (r != EOK)
522                         return;
523
524                 /*Update mount count*/
525                 ext4_set16(&fs->sb, mount_count, mount_count);
526         }
527
528         jbd_block_set(jbd_fs, &journal_block);
529         
530         return;
531 }
532
533 static void jbd_add_revoke_block_tags(struct recover_info *info,
534                                       ext4_fsblk_t block)
535 {
536         struct revoke_entry *revoke_entry;
537
538         ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
539         revoke_entry = jbd_revoke_entry_lookup(info, block);
540         if (revoke_entry) {
541                 revoke_entry->trans_id = info->this_trans_id;
542                 return;
543         }
544
545         revoke_entry = jbd_alloc_revoke_entry();
546         ext4_assert(revoke_entry);
547         revoke_entry->block = block;
548         revoke_entry->trans_id = info->this_trans_id;
549         RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
550
551         return;
552 }
553
554 static void jbd_destroy_revoke_tree(struct recover_info *info)
555 {
556         while (!RB_EMPTY(&info->revoke_root)) {
557                 struct revoke_entry *revoke_entry =
558                         RB_MIN(jbd_revoke, &info->revoke_root);
559                 ext4_assert(revoke_entry);
560                 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
561                 jbd_free_revoke_entry(revoke_entry);
562         }
563 }
564
565 /* Make sure we wrap around the log correctly! */
566 #define wrap(sb, var)                                           \
567 do {                                                                    \
568         if (var >= jbd_get32((sb), maxlen))                                     \
569                 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first));      \
570 } while (0)
571
572 #define ACTION_SCAN 0
573 #define ACTION_REVOKE 1
574 #define ACTION_RECOVER 2
575
576
577 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
578                                   struct jbd_bhdr *header,
579                                   struct recover_info *info)
580 {
581         char *blocks_entry;
582         struct jbd_revoke_header *revoke_hdr =
583                 (struct jbd_revoke_header *)header;
584         uint32_t i, nr_entries, record_len = 4;
585         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
586                                      JBD_FEATURE_INCOMPAT_64BIT))
587                 record_len = 8;
588
589         nr_entries = (jbd_get32(revoke_hdr, count) -
590                         sizeof(struct jbd_revoke_header)) /
591                         record_len;
592
593         blocks_entry = (char *)(revoke_hdr + 1);
594
595         for (i = 0;i < nr_entries;i++) {
596                 if (record_len == 8) {
597                         uint64_t *blocks =
598                                 (uint64_t *)blocks_entry;
599                         jbd_add_revoke_block_tags(info, to_be64(*blocks));
600                 } else {
601                         uint32_t *blocks =
602                                 (uint32_t *)blocks_entry;
603                         jbd_add_revoke_block_tags(info, to_be32(*blocks));
604                 }
605                 blocks_entry += record_len;
606         }
607 }
608
609 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
610                                        struct jbd_bhdr *header,
611                                        uint32_t *iblock)
612 {
613         jbd_iterate_block_table(jbd_fs,
614                                 header + 1,
615                                 jbd_get32(&jbd_fs->sb, blocksize) -
616                                         sizeof(struct jbd_bhdr),
617                                 jbd_display_block_tags,
618                                 iblock);
619 }
620
621 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
622                                         struct jbd_bhdr *header,
623                                         struct replay_arg *arg)
624 {
625         jbd_iterate_block_table(jbd_fs,
626                                 header + 1,
627                                 jbd_get32(&jbd_fs->sb, blocksize) -
628                                         sizeof(struct jbd_bhdr),
629                                 jbd_replay_block_tags,
630                                 arg);
631 }
632
633 int jbd_iterate_log(struct jbd_fs *jbd_fs,
634                     struct recover_info *info,
635                     int action)
636 {
637         int r = EOK;
638         bool log_end = false;
639         struct jbd_sb *sb = &jbd_fs->sb;
640         uint32_t start_trans_id, this_trans_id;
641         uint32_t start_block, this_block;
642
643         start_trans_id = this_trans_id = jbd_get32(sb, sequence);
644         start_block = this_block = jbd_get32(sb, start);
645
646         ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
647                             start_trans_id);
648
649         while (!log_end) {
650                 struct ext4_block block;
651                 struct jbd_bhdr *header;
652                 if (action != ACTION_SCAN)
653                         if (this_trans_id > info->last_trans_id) {
654                                 log_end = true;
655                                 continue;
656                         }
657
658                 r = jbd_block_get(jbd_fs, &block, this_block);
659                 if (r != EOK)
660                         break;
661
662                 header = (struct jbd_bhdr *)block.data;
663                 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
664                         jbd_block_set(jbd_fs, &block);
665                         log_end = true;
666                         continue;
667                 }
668
669                 if (jbd_get32(header, sequence) != this_trans_id) {
670                         if (action != ACTION_SCAN)
671                                 r = EIO;
672
673                         jbd_block_set(jbd_fs, &block);
674                         log_end = true;
675                         continue;
676                 }
677
678                 switch (jbd_get32(header, blocktype)) {
679                 case JBD_DESCRIPTOR_BLOCK:
680                         ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
681                                             "trans_id: %" PRIu32"\n",
682                                             this_block, this_trans_id);
683                         if (action == ACTION_RECOVER) {
684                                 struct replay_arg replay_arg;
685                                 replay_arg.info = info;
686                                 replay_arg.this_block = &this_block;
687                                 replay_arg.this_trans_id = this_trans_id;
688
689                                 jbd_replay_descriptor_block(jbd_fs,
690                                                 header, &replay_arg);
691                         } else
692                                 jbd_debug_descriptor_block(jbd_fs,
693                                                 header, &this_block);
694
695                         break;
696                 case JBD_COMMIT_BLOCK:
697                         ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
698                                             "trans_id: %" PRIu32"\n",
699                                             this_block, this_trans_id);
700                         this_trans_id++;
701                         break;
702                 case JBD_REVOKE_BLOCK:
703                         ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
704                                             "trans_id: %" PRIu32"\n",
705                                             this_block, this_trans_id);
706                         if (action == ACTION_REVOKE) {
707                                 info->this_trans_id = this_trans_id;
708                                 jbd_build_revoke_tree(jbd_fs,
709                                                 header, info);
710                         }
711                         break;
712                 default:
713                         log_end = true;
714                         break;
715                 }
716                 jbd_block_set(jbd_fs, &block);
717                 this_block++;
718                 wrap(sb, this_block);
719                 if (this_block == start_block)
720                         log_end = true;
721
722         }
723         ext4_dbg(DEBUG_JBD, "End of journal.\n");
724         if (r == EOK && action == ACTION_SCAN) {
725                 info->start_trans_id = start_trans_id;
726                 if (this_trans_id > start_trans_id)
727                         info->last_trans_id = this_trans_id - 1;
728                 else
729                         info->last_trans_id = this_trans_id;
730         }
731
732         return r;
733 }
734
735 int jbd_recover(struct jbd_fs *jbd_fs)
736 {
737         int r;
738         struct recover_info info;
739         struct jbd_sb *sb = &jbd_fs->sb;
740         if (!sb->start)
741                 return EOK;
742
743         RB_INIT(&info.revoke_root);
744
745         r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
746         if (r != EOK)
747                 return r;
748
749         r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
750         if (r != EOK)
751                 return r;
752
753         r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
754         if (r == EOK) {
755                 jbd_set32(&jbd_fs->sb, start, 0);
756                 jbd_fs->dirty = true;
757         }
758         jbd_destroy_revoke_tree(&info);
759         return r;
760 }
761
762 void jbd_journal_write_sb(struct jbd_journal *journal)
763 {
764         struct jbd_fs *jbd_fs = journal->jbd_fs;
765         jbd_set32(&jbd_fs->sb, start, journal->first);
766         jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
767         jbd_fs->dirty = true;
768 }
769
770 int jbd_journal_start(struct jbd_fs *jbd_fs,
771                       struct jbd_journal *journal)
772 {
773         journal->first = jbd_get32(&jbd_fs->sb, first);
774         journal->start = journal->first;
775         journal->last = journal->first;
776         journal->trans_id = 1;
777         journal->alloc_trans_id = 1;
778
779         journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
780
781         TAILQ_INIT(&journal->trans_queue);
782         journal->jbd_fs = jbd_fs;
783         jbd_journal_write_sb(journal);
784         return jbd_write_sb(jbd_fs);
785 }
786
787 int jbd_journal_stop(struct jbd_journal *journal)
788 {
789         journal->start = 0;
790         journal->trans_id = 0;
791         jbd_journal_write_sb(journal);
792         return jbd_write_sb(journal->jbd_fs);
793 }
794
795 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
796                                         struct jbd_trans *trans)
797 {
798         uint32_t start_block = journal->last++;
799         trans->alloc_blocks++;
800         wrap(&journal->jbd_fs->sb, journal->last);
801         return start_block;
802 }
803
804 struct jbd_trans *
805 jbd_journal_new_trans(struct jbd_journal *journal)
806 {
807         struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
808         if (!trans)
809                 return NULL;
810
811         /* We will assign a trans_id to this transaction,
812          * once it has been committed.*/
813         trans->journal = journal;
814         trans->error = EOK;
815         return trans;
816 }
817
818 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
819                           struct ext4_buf *buf __unused,
820                           int res,
821                           void *arg);
822
823 int jbd_trans_add_block(struct jbd_trans *trans,
824                         struct ext4_block *block)
825 {
826         struct jbd_buf *buf = calloc(1, sizeof(struct jbd_buf));
827         if (!buf)
828                 return ENOMEM;
829
830         buf->trans = trans;
831         buf->block = *block;
832         ext4_bcache_inc_ref(block->buf);
833
834         block->buf->end_write = jbd_trans_end_write;
835         block->buf->end_write_arg = trans;
836
837         trans->data_cnt++;
838         LIST_INSERT_HEAD(&trans->buf_list, buf, buf_node);
839         return EOK;
840 }
841
842 int jbd_trans_revoke_block(struct jbd_trans *trans,
843                            ext4_fsblk_t lba)
844 {
845         struct jbd_revoke_rec *rec =
846                 calloc(1, sizeof(struct jbd_revoke_rec));
847         if (!rec)
848                 return ENOMEM;
849
850         rec->lba = lba;
851         LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
852         return EOK;
853 }
854
855 void jbd_journal_free_trans(struct jbd_journal *journal,
856                             struct jbd_trans *trans,
857                             bool abort)
858 {
859         struct jbd_buf *jbd_buf, *tmp;
860         struct jbd_revoke_rec *rec, *tmp2;
861         LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
862                           tmp) {
863                 if (abort)
864                         ext4_block_set(journal->jbd_fs->bdev, &jbd_buf->block);
865
866                 LIST_REMOVE(jbd_buf, buf_node);
867                 free(jbd_buf);
868         }
869         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
870                           tmp2) {
871                 LIST_REMOVE(rec, revoke_node);
872                 free(rec);
873         }
874
875         free(trans);
876 }
877
878 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
879 {
880         int rc;
881         struct jbd_commit_header *header;
882         uint32_t commit_iblock = 0;
883         struct ext4_block commit_block;
884         struct jbd_journal *journal = trans->journal;
885
886         commit_iblock = jbd_journal_alloc_block(journal, trans);
887         rc = jbd_block_get_noread(journal->jbd_fs,
888                         &commit_block, commit_iblock);
889         if (rc != EOK)
890                 return rc;
891
892         header = (struct jbd_commit_header *)commit_block.data;
893         header->header.magic = JBD_MAGIC_NUMBER;
894         header->header.blocktype = JBD_COMMIT_BLOCK;
895         header->header.sequence = trans->trans_id;
896
897         ext4_bcache_set_dirty(commit_block.buf);
898         rc = jbd_block_set(journal->jbd_fs, &commit_block);
899         if (rc != EOK)
900                 return rc;
901
902         return EOK;
903 }
904
905 static int jbd_journal_prepare(struct jbd_journal *journal,
906                                struct jbd_trans *trans)
907 {
908         int rc = EOK, i = 0;
909         int32_t tag_tbl_size;
910         uint32_t desc_iblock = 0;
911         uint32_t data_iblock = 0;
912         char *tag_start = NULL, *tag_ptr = NULL;
913         struct jbd_buf *jbd_buf;
914         struct ext4_block desc_block, data_block;
915
916         LIST_FOREACH(jbd_buf, &trans->buf_list, buf_node) {
917                 struct tag_info tag_info;
918                 bool uuid_exist = false;
919 again:
920                 if (!desc_iblock) {
921                         struct jbd_bhdr *bhdr;
922                         desc_iblock = jbd_journal_alloc_block(journal, trans);
923                         rc = jbd_block_get_noread(journal->jbd_fs,
924                                            &desc_block, desc_iblock);
925                         if (!rc)
926                                 break;
927
928                         ext4_bcache_set_dirty(desc_block.buf);
929
930                         bhdr = (struct jbd_bhdr *)desc_block.data;
931                         bhdr->magic = JBD_MAGIC_NUMBER;
932                         bhdr->blocktype = JBD_DESCRIPTOR_BLOCK;
933                         bhdr->sequence = trans->trans_id;
934
935                         tag_start = (char *)(bhdr + 1);
936                         tag_ptr = tag_start;
937                         uuid_exist = true;
938                         tag_tbl_size = journal->block_size -
939                                 sizeof(struct jbd_bhdr);
940                 }
941                 tag_info.block = jbd_buf->block.lb_id;
942                 tag_info.uuid_exist = uuid_exist;
943                 if (i == trans->data_cnt - 1)
944                         tag_info.last_tag = true;
945
946                 if (uuid_exist)
947                         memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
948                                         UUID_SIZE);
949
950                 rc = jbd_write_block_tag(journal->jbd_fs,
951                                 tag_ptr,
952                                 tag_tbl_size,
953                                 &tag_info);
954                 if (rc != EOK) {
955                         jbd_block_set(journal->jbd_fs, &desc_block);
956                         desc_iblock = 0;
957                         goto again;
958                 }
959
960                 data_iblock = jbd_journal_alloc_block(journal, trans);
961                 rc = jbd_block_get_noread(journal->jbd_fs,
962                                 &data_block, data_iblock);
963                 if (rc != EOK)
964                         break;
965
966                 ext4_bcache_set_dirty(data_block.buf);
967
968                 memcpy(data_block.data, jbd_buf->block.data,
969                         journal->block_size);
970
971                 rc = jbd_block_set(journal->jbd_fs, &data_block);
972                 if (rc != EOK)
973                         break;
974
975                 tag_ptr += tag_info.tag_bytes;
976                 tag_tbl_size -= tag_info.tag_bytes;
977
978                 i++;
979         }
980         if (rc == EOK && desc_iblock)
981                 jbd_block_set(journal->jbd_fs, &desc_block);
982
983         return rc;
984 }
985
986 static int
987 jbd_journal_prepare_revoke(struct jbd_journal *journal,
988                            struct jbd_trans *trans)
989 {
990         int rc = EOK, i = 0;
991         int32_t tag_tbl_size;
992         uint32_t desc_iblock = 0;
993         char *blocks_entry = NULL;
994         struct jbd_revoke_rec *rec, *tmp;
995         struct ext4_block desc_block;
996         struct jbd_revoke_header *header = NULL;
997         int32_t record_len = 4;
998
999         if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1000                                      JBD_FEATURE_INCOMPAT_64BIT))
1001                 record_len = 8;
1002
1003         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1004                           tmp) {
1005 again:
1006                 if (!desc_iblock) {
1007                         struct jbd_bhdr *bhdr;
1008                         desc_iblock = jbd_journal_alloc_block(journal, trans);
1009                         rc = jbd_block_get_noread(journal->jbd_fs,
1010                                            &desc_block, desc_iblock);
1011                         if (!rc) {
1012                                 break;
1013                         }
1014
1015                         ext4_bcache_set_dirty(desc_block.buf);
1016
1017                         bhdr = (struct jbd_bhdr *)desc_block.data;
1018                         bhdr->magic = JBD_MAGIC_NUMBER;
1019                         bhdr->blocktype = JBD_REVOKE_BLOCK;
1020                         bhdr->sequence = trans->trans_id;
1021                         
1022                         header = (struct jbd_revoke_header *)bhdr;
1023                         blocks_entry = (char *)(header + 1);
1024                         tag_tbl_size = journal->block_size -
1025                                 sizeof(struct jbd_revoke_header);
1026                 }
1027
1028                 if (tag_tbl_size < record_len) {
1029                         header->count = journal->block_size - tag_tbl_size;
1030                         jbd_block_set(journal->jbd_fs, &desc_block);
1031                         desc_iblock = 0;
1032                         header = NULL;
1033                         goto again;
1034                 }
1035                 if (record_len == 8) {
1036                         uint64_t *blocks =
1037                                 (uint64_t *)blocks_entry;
1038                         *blocks = to_be64(rec->lba);
1039                 } else {
1040                         uint32_t *blocks =
1041                                 (uint32_t *)blocks_entry;
1042                         *blocks = to_be32(rec->lba);
1043                 }
1044                 blocks_entry += record_len;
1045                 tag_tbl_size -= record_len;
1046
1047                 i++;
1048         }
1049         if (rc == EOK && desc_iblock) {
1050                 if (header != NULL)
1051                         header->count = journal->block_size - tag_tbl_size;
1052
1053                 jbd_block_set(journal->jbd_fs, &desc_block);
1054         }
1055
1056         return rc;
1057 }
1058
1059 void
1060 jbd_journal_submit_trans(struct jbd_journal *journal,
1061                          struct jbd_trans *trans)
1062 {
1063         TAILQ_INSERT_TAIL(&journal->trans_queue,
1064                           trans,
1065                           trans_node);
1066 }
1067
1068 void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
1069 {
1070         struct jbd_buf *jbd_buf, *tmp;
1071         LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
1072                         tmp) {
1073                 ext4_block_set(journal->jbd_fs->bdev, &jbd_buf->block);
1074         }
1075 }
1076
1077 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1078                           struct ext4_buf *buf __unused,
1079                           int res,
1080                           void *arg)
1081 {
1082         struct jbd_trans *trans = arg;
1083         struct jbd_journal *journal = trans->journal;
1084         if (res != EOK)
1085                 trans->error = res;
1086
1087         trans->written_cnt++;
1088         if (trans->written_cnt == trans->data_cnt) {
1089                 TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
1090                 journal->start += trans->alloc_blocks;
1091                 journal->trans_id = ++trans->trans_id;
1092                 jbd_journal_write_sb(journal);
1093                 jbd_journal_free_trans(journal, trans, false);
1094
1095                 if ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1096                         jbd_journal_cp_trans(journal, trans);
1097                 }
1098         }
1099 }
1100
1101 /*
1102  * XXX: one should disable cache writeback first.
1103  */
1104 static void
1105 jbd_journal_commit_one(struct jbd_journal *journal)
1106 {
1107         int rc = EOK;
1108         uint32_t last = journal->last;
1109         struct jbd_trans *trans;
1110         if ((trans = TAILQ_FIRST(&journal->trans_queue))) {
1111                 TAILQ_REMOVE(&journal->trans_queue, trans, trans_node);
1112
1113                 trans->trans_id = journal->alloc_trans_id;
1114                 rc = jbd_journal_prepare(journal, trans);
1115                 if (rc != EOK)
1116                         goto Finish;
1117
1118                 rc = jbd_journal_prepare_revoke(journal, trans);
1119                 if (rc != EOK)
1120                         goto Finish;
1121
1122                 rc = jbd_trans_write_commit_block(trans);
1123                 if (rc != EOK)
1124                         goto Finish;
1125
1126                 journal->alloc_trans_id++;
1127                 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
1128                           trans_node);
1129                 if (trans == TAILQ_FIRST(&journal->cp_queue)) {
1130                         jbd_journal_cp_trans(journal, trans);
1131                 }
1132         }
1133 Finish:
1134         if (rc != EOK) {
1135                 journal->last = last;
1136                 jbd_journal_free_trans(journal, trans, true);
1137         }
1138 }
1139
1140 /**
1141  * @}
1142  */