ext4_journal: journal write v1.
[lwext4.git] / lwext4 / ext4_journal.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * - Redistributions of source code must retain the above copyright
11  *   notice, this list of conditions and the following disclaimer.
12  * - Redistributions in binary form must reproduce the above copyright
13  *   notice, this list of conditions and the following disclaimer in the
14  *   documentation and/or other materials provided with the distribution.
15  * - The name of the author may not be used to endorse or promote products
16  *   derived from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 /** @addtogroup lwext4
31  * @{
32  */
33 /**
34  * @file  ext4_journal.c
35  * @brief Journal handle functions
36  */
37
38 #include "ext4_config.h"
39 #include "ext4_types.h"
40 #include "ext4_fs.h"
41 #include "ext4_super.h"
42 #include "ext4_errno.h"
43 #include "ext4_blockdev.h"
44 #include "ext4_crc32c.h"
45 #include "ext4_debug.h"
46 #include "tree.h"
47
48 #include <string.h>
49 #include <stdlib.h>
50
51 struct revoke_entry {
52         ext4_fsblk_t block;
53         uint32_t trans_id;
54         RB_ENTRY(revoke_entry) revoke_node;
55 };
56
57 struct recover_info {
58         uint32_t start_trans_id;
59         uint32_t last_trans_id;
60         uint32_t this_trans_id;
61         RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
62 };
63
64 struct replay_arg {
65         struct recover_info *info;
66         uint32_t *this_block;
67         uint32_t this_trans_id;
68 };
69
70 static int
71 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
72 {
73         if (a->block > b->block)
74                 return 1;
75         else if (a->block < b->block)
76                 return -1;
77         return 0;
78 }
79
80 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
81                      jbd_revoke_entry_cmp, static inline)
82
83 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
84 #define jbd_free_revoke_entry(addr) free(addr)
85
86 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
87                    ext4_lblk_t iblock,
88                    ext4_fsblk_t *fblock);
89
90 int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
91 {
92         int rc;
93         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
94         uint64_t offset;
95         ext4_fsblk_t fblock;
96         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
97         if (rc != EOK)
98                 return rc;
99
100         offset = fblock * ext4_sb_get_block_size(&fs->sb);
101         return ext4_block_writebytes(fs->bdev, offset, s,
102                                      EXT4_SUPERBLOCK_SIZE);
103 }
104
105 int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
106 {
107         int rc;
108         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
109         uint64_t offset;
110         ext4_fsblk_t fblock;
111         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
112         if (rc != EOK)
113                 return rc;
114
115         offset = fblock * ext4_sb_get_block_size(&fs->sb);
116         return ext4_block_readbytes(fs->bdev, offset, s,
117                                     EXT4_SUPERBLOCK_SIZE);
118 }
119
120 static bool jbd_verify_sb(struct jbd_sb *sb)
121 {
122         struct jbd_bhdr *header = &sb->header;
123         if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
124                 return false;
125
126         if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
127             jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
128                 return false;
129
130         return true;
131 }
132
133 static int jbd_write_sb(struct jbd_fs *jbd_fs)
134 {
135         int rc = EOK;
136         if (jbd_fs->dirty) {
137                 rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
138                 if (rc != EOK)
139                         return rc;
140
141                 jbd_fs->dirty = false;
142         }
143         return rc;
144 }
145
146 int jbd_get_fs(struct ext4_fs *fs,
147                struct jbd_fs *jbd_fs)
148 {
149         int rc;
150         uint32_t journal_ino;
151
152         memset(jbd_fs, 0, sizeof(struct jbd_fs));
153         journal_ino = ext4_get32(&fs->sb, journal_inode_number);
154
155         rc = ext4_fs_get_inode_ref(fs,
156                                    journal_ino,
157                                    &jbd_fs->inode_ref);
158         if (rc != EOK) {
159                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
160                 return rc;
161         }
162         rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
163         if (rc != EOK) {
164                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
165                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
166                 return rc;
167         }
168         if (!jbd_verify_sb(&jbd_fs->sb)) {
169                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
170                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
171                 rc = EIO;
172         }
173
174         return rc;
175 }
176
177 int jbd_put_fs(struct jbd_fs *jbd_fs)
178 {
179         int rc = EOK;
180         rc = jbd_write_sb(jbd_fs);
181
182         ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
183         return rc;
184 }
185
186 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
187                    ext4_lblk_t iblock,
188                    ext4_fsblk_t *fblock)
189 {
190         int rc = ext4_fs_get_inode_dblk_idx(
191                         &jbd_fs->inode_ref,
192                         iblock,
193                         fblock,
194                         false);
195         return rc;
196 }
197
198 int jbd_block_get(struct jbd_fs *jbd_fs,
199                   struct ext4_block *block,
200                   ext4_fsblk_t fblock)
201 {
202         /* TODO: journal device. */
203         int rc;
204         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
205         rc = jbd_inode_bmap(jbd_fs, iblock,
206                             &fblock);
207         if (rc != EOK)
208                 return rc;
209
210         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
211         rc = ext4_block_get(bdev, block, fblock);
212         return rc;
213 }
214
215 int jbd_block_get_noread(struct jbd_fs *jbd_fs,
216                          struct ext4_block *block,
217                          ext4_fsblk_t fblock)
218 {
219         /* TODO: journal device. */
220         int rc;
221         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
222         rc = jbd_inode_bmap(jbd_fs, iblock,
223                             &fblock);
224         if (rc != EOK)
225                 return rc;
226
227         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
228         rc = ext4_block_get_noread(bdev, block, fblock);
229         return rc;
230 }
231
232 int jbd_block_set(struct jbd_fs *jbd_fs,
233                   struct ext4_block *block)
234 {
235         return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
236                               block);
237 }
238
239 /*
240  * helper functions to deal with 32 or 64bit block numbers.
241  */
242 int jbd_tag_bytes(struct jbd_fs *jbd_fs)
243 {
244         int size;
245
246         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
247                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
248                 return sizeof(struct jbd_block_tag3);
249
250         size = sizeof(struct jbd_block_tag);
251
252         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
253                                      JBD_FEATURE_INCOMPAT_CSUM_V2))
254                 size += sizeof(uint16_t);
255
256         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
257                                      JBD_FEATURE_INCOMPAT_64BIT))
258                 return size;
259
260         return size - sizeof(uint32_t);
261 }
262
263 /**@brief: tag information. */
264 struct tag_info {
265         int tag_bytes;
266         ext4_fsblk_t block;
267         bool uuid_exist;
268         uint8_t uuid[UUID_SIZE];
269         bool last_tag;
270 };
271
272 static int
273 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
274                       void *__tag,
275                       int tag_bytes,
276                       int32_t remain_buf_size,
277                       struct tag_info *tag_info)
278 {
279         char *uuid_start;
280         tag_info->tag_bytes = tag_bytes;
281         tag_info->uuid_exist = false;
282         tag_info->last_tag = false;
283
284         if (remain_buf_size - tag_bytes < 0)
285                 return EINVAL;
286
287         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
288                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
289                 struct jbd_block_tag3 *tag = __tag;
290                 tag_info->block = jbd_get32(tag, blocknr);
291                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
292                                              JBD_FEATURE_INCOMPAT_64BIT))
293                          tag_info->block |=
294                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
295
296                 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
297                         tag_info->block = 0;
298
299                 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
300                         if (remain_buf_size - tag_bytes < UUID_SIZE)
301                                 return EINVAL;
302
303                         uuid_start = (char *)tag + tag_bytes;
304                         tag_info->uuid_exist = true;
305                         tag_info->tag_bytes += UUID_SIZE;
306                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
307                 }
308
309                 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
310                         tag_info->last_tag = true;
311
312         } else {
313                 struct jbd_block_tag *tag = __tag;
314                 tag_info->block = jbd_get32(tag, blocknr);
315                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
316                                              JBD_FEATURE_INCOMPAT_64BIT))
317                          tag_info->block |=
318                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
319
320                 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
321                         tag_info->block = 0;
322
323                 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
324                         if (remain_buf_size - tag_bytes < UUID_SIZE)
325                                 return EINVAL;
326
327                         uuid_start = (char *)tag + tag_bytes;
328                         tag_info->uuid_exist = true;
329                         tag_info->tag_bytes += UUID_SIZE;
330                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
331                 }
332
333                 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
334                         tag_info->last_tag = true;
335
336         }
337         return EOK;
338 }
339
340 static int
341 jbd_write_block_tag(struct jbd_fs *jbd_fs,
342                     void *__tag,
343                     int32_t remain_buf_size,
344                     struct tag_info *tag_info)
345 {
346         char *uuid_start;
347         int tag_bytes = jbd_tag_bytes(jbd_fs);
348
349         tag_info->tag_bytes = tag_bytes;
350
351         if (remain_buf_size - tag_bytes < 0)
352                 return EINVAL;
353
354         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
355                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
356                 struct jbd_block_tag3 *tag = __tag;
357                 jbd_set32(tag, blocknr, tag_info->block);
358                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
359                                              JBD_FEATURE_INCOMPAT_64BIT))
360                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
361
362                 if (!tag_info->uuid_exist) {
363                         if (remain_buf_size - tag_bytes < UUID_SIZE)
364                                 return EINVAL;
365
366                         uuid_start = (char *)tag + tag_bytes;
367                         tag_info->tag_bytes += UUID_SIZE;
368                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
369                         jbd_set32(tag, flags,
370                                   jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
371                 }
372
373                 if (tag_info->last_tag)
374                         jbd_set32(tag, flags,
375                                   jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
376
377         } else {
378                 struct jbd_block_tag *tag = __tag;
379                 jbd_set32(tag, blocknr, tag_info->block);
380                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
381                                              JBD_FEATURE_INCOMPAT_64BIT))
382                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
383
384                 if (!tag_info->uuid_exist) {
385                         if (remain_buf_size - tag_bytes < UUID_SIZE)
386                                 return EINVAL;
387
388                         uuid_start = (char *)tag + tag_bytes;
389                         tag_info->tag_bytes += UUID_SIZE;
390                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
391                         jbd_set16(tag, flags,
392                                   jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
393                 }
394
395                 if (tag_info->last_tag)
396                         jbd_set16(tag, flags,
397                                   jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
398
399         }
400         return EOK;
401 }
402
403 static void
404 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
405                         void *__tag_start,
406                         int32_t tag_tbl_size,
407                         void (*func)(struct jbd_fs * jbd_fs,
408                                         ext4_fsblk_t block,
409                                         uint8_t *uuid,
410                                         void *arg),
411                         void *arg)
412 {
413         ext4_fsblk_t block = 0;
414         char *tag_start, *tag_ptr;
415         int tag_bytes = jbd_tag_bytes(jbd_fs);
416         tag_start = __tag_start;
417         tag_ptr = tag_start;
418
419         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
420                                      JBD_FEATURE_INCOMPAT_CSUM_V2) ||
421             JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
422                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
423                 tag_tbl_size -= sizeof(struct jbd_block_tail);
424
425         while (tag_tbl_size) {
426                 struct tag_info tag_info;
427                 int rc = jbd_extract_block_tag(jbd_fs,
428                                       tag_ptr,
429                                       tag_bytes,
430                                       tag_tbl_size,
431                                       &tag_info);
432                 if (rc != EOK)
433                         break;
434
435                 if (func)
436                         func(jbd_fs, block, tag_info.uuid, arg);
437
438                 if (tag_info.last_tag)
439                         break;
440
441                 tag_ptr += tag_info.tag_bytes;
442                 tag_tbl_size -= tag_info.tag_bytes;
443         }
444 }
445
446 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
447                                    ext4_fsblk_t block,
448                                    uint8_t *uuid,
449                                    void *arg)
450 {
451         uint32_t *iblock = arg;
452         ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
453         (*iblock)++;
454         (void)jbd_fs;
455         (void)uuid;
456         return;
457 }
458
459 static struct revoke_entry *
460 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
461 {
462         struct revoke_entry tmp = {
463                 .block = block
464         };
465
466         return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
467 }
468
469 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
470                                   ext4_fsblk_t block,
471                                   uint8_t *uuid __unused,
472                                   void *__arg)
473 {
474         int r;
475         struct replay_arg *arg = __arg;
476         struct recover_info *info = arg->info;
477         uint32_t *this_block = arg->this_block;
478         struct revoke_entry *revoke_entry;
479         struct ext4_block journal_block, ext4_block;
480         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
481
482         (*this_block)++;
483
484         revoke_entry = jbd_revoke_entry_lookup(info, block);
485         if (revoke_entry &&
486             arg->this_trans_id < revoke_entry->trans_id)
487                 return;
488
489         ext4_dbg(DEBUG_JBD,
490                  "Replaying block in block_tag: %" PRIu64 "\n",
491                  block);
492
493         r = jbd_block_get(jbd_fs, &journal_block, *this_block);
494         if (r != EOK)
495                 return;
496
497         if (block) {
498                 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
499                 if (r != EOK) {
500                         jbd_block_set(jbd_fs, &journal_block);
501                         return;
502                 }
503
504                 memcpy(ext4_block.data,
505                         journal_block.data,
506                         jbd_get32(&jbd_fs->sb, blocksize));
507
508                 ext4_bcache_set_dirty(ext4_block.buf);
509                 ext4_block_set(fs->bdev, &ext4_block);
510         } else {
511                 uint16_t mount_count, state;
512                 mount_count = ext4_get16(&fs->sb, mount_count);
513                 state = ext4_get16(&fs->sb, state);
514
515                 memcpy(&fs->sb,
516                         journal_block.data + EXT4_SUPERBLOCK_OFFSET,
517                         EXT4_SUPERBLOCK_SIZE);
518
519                 /* Mark system as mounted */
520                 ext4_set16(&fs->sb, state, state);
521                 r = ext4_sb_write(fs->bdev, &fs->sb);
522                 if (r != EOK)
523                         return;
524
525                 /*Update mount count*/
526                 ext4_set16(&fs->sb, mount_count, mount_count);
527         }
528
529         jbd_block_set(jbd_fs, &journal_block);
530         
531         return;
532 }
533
534 static void jbd_add_revoke_block_tags(struct recover_info *info,
535                                       ext4_fsblk_t block)
536 {
537         struct revoke_entry *revoke_entry;
538
539         ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
540         revoke_entry = jbd_revoke_entry_lookup(info, block);
541         if (revoke_entry) {
542                 revoke_entry->trans_id = info->this_trans_id;
543                 return;
544         }
545
546         revoke_entry = jbd_alloc_revoke_entry();
547         ext4_assert(revoke_entry);
548         revoke_entry->block = block;
549         revoke_entry->trans_id = info->this_trans_id;
550         RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
551
552         return;
553 }
554
555 static void jbd_destroy_revoke_tree(struct recover_info *info)
556 {
557         while (!RB_EMPTY(&info->revoke_root)) {
558                 struct revoke_entry *revoke_entry =
559                         RB_MIN(jbd_revoke, &info->revoke_root);
560                 ext4_assert(revoke_entry);
561                 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
562                 jbd_free_revoke_entry(revoke_entry);
563         }
564 }
565
566 /* Make sure we wrap around the log correctly! */
567 #define wrap(sb, var)                                           \
568 do {                                                                    \
569         if (var >= jbd_get32((sb), maxlen))                                     \
570                 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first));      \
571 } while (0)
572
573 #define ACTION_SCAN 0
574 #define ACTION_REVOKE 1
575 #define ACTION_RECOVER 2
576
577
578 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
579                                   struct jbd_bhdr *header,
580                                   struct recover_info *info)
581 {
582         char *blocks_entry;
583         struct jbd_revoke_header *revoke_hdr =
584                 (struct jbd_revoke_header *)header;
585         uint32_t i, nr_entries, record_len = 4;
586         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
587                                      JBD_FEATURE_INCOMPAT_64BIT))
588                 record_len = 8;
589
590         nr_entries = (jbd_get32(revoke_hdr, count) -
591                         sizeof(struct jbd_revoke_header)) /
592                         record_len;
593
594         blocks_entry = (char *)(revoke_hdr + 1);
595
596         for (i = 0;i < nr_entries;i++) {
597                 if (record_len == 8) {
598                         uint64_t *blocks =
599                                 (uint64_t *)blocks_entry;
600                         jbd_add_revoke_block_tags(info, to_be64(*blocks));
601                 } else {
602                         uint32_t *blocks =
603                                 (uint32_t *)blocks_entry;
604                         jbd_add_revoke_block_tags(info, to_be32(*blocks));
605                 }
606                 blocks_entry += record_len;
607         }
608 }
609
610 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
611                                        struct jbd_bhdr *header,
612                                        uint32_t *iblock)
613 {
614         jbd_iterate_block_table(jbd_fs,
615                                 header + 1,
616                                 jbd_get32(&jbd_fs->sb, blocksize) -
617                                         sizeof(struct jbd_bhdr),
618                                 jbd_display_block_tags,
619                                 iblock);
620 }
621
622 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
623                                         struct jbd_bhdr *header,
624                                         struct replay_arg *arg)
625 {
626         jbd_iterate_block_table(jbd_fs,
627                                 header + 1,
628                                 jbd_get32(&jbd_fs->sb, blocksize) -
629                                         sizeof(struct jbd_bhdr),
630                                 jbd_replay_block_tags,
631                                 arg);
632 }
633
634 int jbd_iterate_log(struct jbd_fs *jbd_fs,
635                     struct recover_info *info,
636                     int action)
637 {
638         int r = EOK;
639         bool log_end = false;
640         struct jbd_sb *sb = &jbd_fs->sb;
641         uint32_t start_trans_id, this_trans_id;
642         uint32_t start_block, this_block;
643
644         start_trans_id = this_trans_id = jbd_get32(sb, sequence);
645         start_block = this_block = jbd_get32(sb, start);
646
647         ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
648                             start_trans_id);
649
650         while (!log_end) {
651                 struct ext4_block block;
652                 struct jbd_bhdr *header;
653                 if (action != ACTION_SCAN)
654                         if (this_trans_id > info->last_trans_id) {
655                                 log_end = true;
656                                 continue;
657                         }
658
659                 r = jbd_block_get(jbd_fs, &block, this_block);
660                 if (r != EOK)
661                         break;
662
663                 header = (struct jbd_bhdr *)block.data;
664                 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
665                         jbd_block_set(jbd_fs, &block);
666                         log_end = true;
667                         continue;
668                 }
669
670                 if (jbd_get32(header, sequence) != this_trans_id) {
671                         if (action != ACTION_SCAN)
672                                 r = EIO;
673
674                         jbd_block_set(jbd_fs, &block);
675                         log_end = true;
676                         continue;
677                 }
678
679                 switch (jbd_get32(header, blocktype)) {
680                 case JBD_DESCRIPTOR_BLOCK:
681                         ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
682                                             "trans_id: %" PRIu32"\n",
683                                             this_block, this_trans_id);
684                         if (action == ACTION_RECOVER) {
685                                 struct replay_arg replay_arg;
686                                 replay_arg.info = info;
687                                 replay_arg.this_block = &this_block;
688                                 replay_arg.this_trans_id = this_trans_id;
689
690                                 jbd_replay_descriptor_block(jbd_fs,
691                                                 header, &replay_arg);
692                         } else
693                                 jbd_debug_descriptor_block(jbd_fs,
694                                                 header, &this_block);
695
696                         break;
697                 case JBD_COMMIT_BLOCK:
698                         ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
699                                             "trans_id: %" PRIu32"\n",
700                                             this_block, this_trans_id);
701                         this_trans_id++;
702                         break;
703                 case JBD_REVOKE_BLOCK:
704                         ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
705                                             "trans_id: %" PRIu32"\n",
706                                             this_block, this_trans_id);
707                         if (action == ACTION_REVOKE) {
708                                 info->this_trans_id = this_trans_id;
709                                 jbd_build_revoke_tree(jbd_fs,
710                                                 header, info);
711                         }
712                         break;
713                 default:
714                         log_end = true;
715                         break;
716                 }
717                 jbd_block_set(jbd_fs, &block);
718                 this_block++;
719                 wrap(sb, this_block);
720                 if (this_block == start_block)
721                         log_end = true;
722
723         }
724         ext4_dbg(DEBUG_JBD, "End of journal.\n");
725         if (r == EOK && action == ACTION_SCAN) {
726                 info->start_trans_id = start_trans_id;
727                 if (this_trans_id > start_trans_id)
728                         info->last_trans_id = this_trans_id - 1;
729                 else
730                         info->last_trans_id = this_trans_id;
731         }
732
733         return r;
734 }
735
736 int jbd_recover(struct jbd_fs *jbd_fs)
737 {
738         int r;
739         struct recover_info info;
740         struct jbd_sb *sb = &jbd_fs->sb;
741         if (!sb->start)
742                 return EOK;
743
744         RB_INIT(&info.revoke_root);
745
746         r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
747         if (r != EOK)
748                 return r;
749
750         r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
751         if (r != EOK)
752                 return r;
753
754         r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
755         if (r == EOK) {
756                 jbd_set32(&jbd_fs->sb, start, 0);
757                 jbd_fs->dirty = true;
758         }
759         jbd_destroy_revoke_tree(&info);
760         return r;
761 }
762
763 void jbd_journal_write_sb(struct jbd_journal *journal)
764 {
765         struct jbd_fs *jbd_fs = journal->jbd_fs;
766         jbd_set32(&jbd_fs->sb, start, journal->first);
767         jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
768         jbd_fs->dirty = true;
769 }
770
771 int jbd_journal_start(struct jbd_fs *jbd_fs,
772                       struct jbd_journal *journal)
773 {
774         journal->first = jbd_get32(&jbd_fs->sb, first);
775         journal->start = journal->first;
776         journal->last = journal->first;
777         journal->trans_id = 1;
778
779         journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
780
781         TAILQ_INIT(&journal->trans_queue);
782         journal->jbd_fs = jbd_fs;
783         jbd_journal_write_sb(journal);
784         return jbd_write_sb(jbd_fs);
785 }
786
787 int jbd_journal_stop(struct jbd_journal *journal)
788 {
789         journal->start = 0;
790         journal->trans_id = 0;
791         jbd_journal_write_sb(journal);
792         return jbd_write_sb(journal->jbd_fs);
793 }
794
795 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal)
796 {
797         uint32_t start_block = journal->last++;
798         wrap(&journal->jbd_fs->sb, journal->last);
799         return start_block;
800 }
801
802 struct jbd_trans *
803 jbd_journal_new_trans(struct jbd_journal *journal)
804 {
805         struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
806         if (!trans)
807                 return NULL;
808
809         /* We will assign a trans_id to this transaction,
810          * once it has been committed.*/
811         trans->journal = journal;
812         trans->error = EOK;
813         return trans;
814 }
815
816 int jbd_trans_add_block(struct jbd_trans *trans,
817                         struct ext4_block *block)
818 {
819         struct jbd_buf *buf = calloc(1, sizeof(struct jbd_buf));
820         if (!buf)
821                 return ENOMEM;
822
823         buf->trans = trans;
824         buf->block = *block;
825         ext4_bcache_inc_ref(block->buf);
826         trans->data_cnt++;
827         LIST_INSERT_HEAD(&trans->buf_list, buf, buf_node);
828         return EOK;
829 }
830
831 int jbd_trans_revoke_block(struct jbd_trans *trans,
832                            ext4_fsblk_t lba)
833 {
834         struct jbd_revoke_rec *rec =
835                 calloc(1, sizeof(struct jbd_revoke_rec));
836         if (!rec)
837                 return ENOMEM;
838
839         rec->lba = lba;
840         LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
841         return EOK;
842 }
843
844 void jbd_journal_free_trans(struct jbd_journal *journal,
845                             struct jbd_trans *trans)
846 {
847         struct jbd_buf *jbd_buf, *tmp;
848         struct jbd_revoke_rec *rec, *tmp2;
849         LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
850                           tmp) {
851                 ext4_block_set(journal->jbd_fs->bdev, &jbd_buf->block);
852                 LIST_REMOVE(jbd_buf, buf_node);
853                 free(jbd_buf);
854         }
855         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
856                           tmp2) {
857                 LIST_REMOVE(rec, revoke_node);
858                 free(rec);
859         }
860
861         free(trans);
862 }
863
864 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
865                           struct ext4_buf *buf __unused,
866                           int res,
867                           void *arg)
868 {
869         struct jbd_trans *trans = arg;
870         trans->error = res;
871 }
872
873 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
874 {
875         int rc;
876         struct jbd_commit_header *header;
877         uint32_t commit_iblock = 0;
878         struct ext4_block commit_block;
879         struct jbd_journal *journal = trans->journal;
880
881         commit_iblock = jbd_journal_alloc_block(trans->journal);
882         rc = jbd_block_get_noread(journal->jbd_fs,
883                         &commit_block, commit_iblock);
884         if (rc != EOK)
885                 return rc;
886
887         header = (struct jbd_commit_header *)commit_block.data;
888         header->header.magic = JBD_MAGIC_NUMBER;
889         header->header.blocktype = JBD_COMMIT_BLOCK;
890         header->header.sequence = trans->trans_id;
891
892         ext4_bcache_set_dirty(commit_block.buf);
893         rc = jbd_block_set(journal->jbd_fs, &commit_block);
894         if (rc != EOK)
895                 return rc;
896
897         return EOK;
898 }
899
900 static int jbd_journal_prepare(struct jbd_journal *journal,
901                                struct jbd_trans *trans)
902 {
903         int rc = EOK, i = 0;
904         int32_t tag_tbl_size;
905         uint32_t desc_iblock = 0;
906         uint32_t data_iblock = 0;
907         char *tag_start = NULL, *tag_ptr = NULL;
908         struct jbd_buf *jbd_buf;
909         struct ext4_block desc_block, data_block;
910
911         LIST_FOREACH(jbd_buf, &trans->buf_list, buf_node) {
912                 struct tag_info tag_info;
913                 bool uuid_exist = false;
914 again:
915                 if (!desc_iblock) {
916                         struct jbd_bhdr *bhdr;
917                         desc_iblock = jbd_journal_alloc_block(journal);
918                         rc = jbd_block_get_noread(journal->jbd_fs,
919                                            &desc_block, desc_iblock);
920                         if (!rc)
921                                 break;
922
923                         ext4_bcache_set_dirty(desc_block.buf);
924
925                         bhdr = (struct jbd_bhdr *)desc_block.data;
926                         bhdr->magic = JBD_MAGIC_NUMBER;
927                         bhdr->blocktype = JBD_DESCRIPTOR_BLOCK;
928                         bhdr->sequence = trans->trans_id;
929
930                         tag_start = (char *)(bhdr + 1);
931                         tag_ptr = tag_start;
932                         uuid_exist = true;
933                         tag_tbl_size = journal->block_size -
934                                 sizeof(struct jbd_bhdr);
935                 }
936                 tag_info.block = jbd_buf->block.lb_id;
937                 tag_info.uuid_exist = uuid_exist;
938                 if (i == trans->data_cnt - 1)
939                         tag_info.last_tag = true;
940
941                 if (uuid_exist)
942                         memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
943                                         UUID_SIZE);
944
945                 rc = jbd_write_block_tag(journal->jbd_fs,
946                                 tag_ptr,
947                                 tag_tbl_size,
948                                 &tag_info);
949                 if (rc != EOK) {
950                         jbd_block_set(journal->jbd_fs, &desc_block);
951                         desc_iblock = 0;
952                         goto again;
953                 }
954
955                 data_iblock = jbd_journal_alloc_block(journal);
956                 rc = jbd_block_get_noread(journal->jbd_fs,
957                                 &data_block, data_iblock);
958                 if (rc != EOK)
959                         break;
960
961                 ext4_bcache_set_dirty(data_block.buf);
962
963                 memcpy(data_block.data, jbd_buf->block.data,
964                         journal->block_size);
965
966                 rc = jbd_block_set(journal->jbd_fs, &data_block);
967                 if (rc != EOK)
968                         break;
969
970                 tag_ptr += tag_info.tag_bytes;
971                 tag_tbl_size -= tag_info.tag_bytes;
972
973                 i++;
974         }
975         if (rc == EOK && desc_iblock)
976                 jbd_block_set(journal->jbd_fs, &desc_block);
977
978         return rc;
979 }
980
981 static int
982 jbd_journal_prepare_revoke(struct jbd_journal *journal,
983                            struct jbd_trans *trans)
984 {
985         int rc = EOK, i = 0;
986         int32_t tag_tbl_size;
987         uint32_t desc_iblock = 0;
988         char *blocks_entry = NULL;
989         struct jbd_revoke_rec *rec, *tmp;
990         struct ext4_block desc_block;
991         struct jbd_revoke_header *header = NULL;
992         int32_t record_len = 4;
993
994         if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
995                                      JBD_FEATURE_INCOMPAT_64BIT))
996                 record_len = 8;
997
998         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
999                           tmp) {
1000 again:
1001                 if (!desc_iblock) {
1002                         struct jbd_bhdr *bhdr;
1003                         desc_iblock = jbd_journal_alloc_block(journal);
1004                         rc = jbd_block_get_noread(journal->jbd_fs,
1005                                            &desc_block, desc_iblock);
1006                         if (!rc) {
1007                                 break;
1008                         }
1009
1010                         ext4_bcache_set_dirty(desc_block.buf);
1011
1012                         bhdr = (struct jbd_bhdr *)desc_block.data;
1013                         bhdr->magic = JBD_MAGIC_NUMBER;
1014                         bhdr->blocktype = JBD_REVOKE_BLOCK;
1015                         bhdr->sequence = trans->trans_id;
1016                         
1017                         header = (struct jbd_revoke_header *)bhdr;
1018                         blocks_entry = (char *)(header + 1);
1019                         tag_tbl_size = journal->block_size -
1020                                 sizeof(struct jbd_revoke_header);
1021                 }
1022
1023                 if (tag_tbl_size < record_len) {
1024                         header->count = journal->block_size - tag_tbl_size;
1025                         jbd_block_set(journal->jbd_fs, &desc_block);
1026                         desc_iblock = 0;
1027                         header = NULL;
1028                         goto again;
1029                 }
1030                 if (record_len == 8) {
1031                         uint64_t *blocks =
1032                                 (uint64_t *)blocks_entry;
1033                         *blocks = to_be64(rec->lba);
1034                 } else {
1035                         uint32_t *blocks =
1036                                 (uint32_t *)blocks_entry;
1037                         *blocks = to_be32(rec->lba);
1038                 }
1039                 blocks_entry += record_len;
1040                 tag_tbl_size -= record_len;
1041
1042                 i++;
1043         }
1044         if (rc == EOK && desc_iblock) {
1045                 if (header != NULL)
1046                         header->count = journal->block_size - tag_tbl_size;
1047
1048                 jbd_block_set(journal->jbd_fs, &desc_block);
1049         }
1050
1051         return rc;
1052 }
1053
1054 void
1055 jbd_journal_submit_trans(struct jbd_journal *journal,
1056                          struct jbd_trans *trans)
1057 {
1058         TAILQ_INSERT_TAIL(&journal->trans_queue,
1059                           trans,
1060                           trans_node);
1061 }
1062
1063 /*
1064  * XXX: one should disable cache writeback first.
1065  */
1066 void
1067 jbd_journal_commit_to_disk(struct jbd_journal *journal)
1068 {
1069         int rc;
1070         uint32_t last = journal->last,
1071                  trans_id = journal->trans_id,
1072                  start = journal->start;
1073         struct jbd_trans *trans, *tmp;
1074         TAILQ_FOREACH_SAFE(trans, &journal->trans_queue,
1075                            trans_node,
1076                            tmp) {
1077                 struct jbd_buf *jbd_buf;
1078                 TAILQ_REMOVE(&journal->trans_queue, trans, trans_node);
1079
1080                 trans->trans_id = trans_id + 1;
1081                 rc = jbd_journal_prepare(journal, trans);
1082                 if (rc != EOK) {
1083                         journal->last = last;
1084                         jbd_journal_free_trans(journal, trans);
1085                         continue;
1086                 }
1087                 rc = jbd_journal_prepare_revoke(journal, trans);
1088                 if (rc != EOK) {
1089                         journal->last = last;
1090                         jbd_journal_free_trans(journal, trans);
1091                         continue;
1092                 }
1093                 rc = jbd_trans_write_commit_block(trans);
1094                 if (rc != EOK) {
1095                         journal->last = last;
1096                         jbd_journal_free_trans(journal, trans);
1097                         continue;
1098                 }
1099                 LIST_FOREACH(jbd_buf, &trans->buf_list, buf_node) {
1100                         struct ext4_block *block = &jbd_buf->block;
1101                         block->buf->end_write = jbd_trans_end_write;
1102                         block->buf->end_write_arg = trans;
1103                         ext4_block_set(journal->jbd_fs->inode_ref.fs->bdev,
1104                                         block);
1105                 }
1106                 if (trans->error != EOK) {
1107                         journal->last = last;
1108                         jbd_journal_free_trans(journal, trans);
1109                         continue;
1110                 }
1111
1112                 start = last;
1113                 trans_id++;
1114                 last = journal->last;
1115                 jbd_journal_free_trans(journal, trans);
1116         }
1117         
1118         journal->start = start;
1119         journal->trans_id = trans_id;
1120         jbd_journal_write_sb(journal);
1121 }
1122
1123 /**
1124  * @}
1125  */