2 * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3 * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 /** @addtogroup lwext4
34 * @file ext4_journal.c
35 * @brief Journal handle functions
38 #include "ext4_config.h"
39 #include "ext4_types.h"
41 #include "ext4_super.h"
42 #include "ext4_journal.h"
43 #include "ext4_errno.h"
44 #include "ext4_blockdev.h"
45 #include "ext4_crc32c.h"
46 #include "ext4_debug.h"
52 /**@brief Revoke entry during journal replay.*/
54 /**@brief Block number not to be replayed.*/
57 /**@brief For any transaction id smaller
58 * than trans_id, records of @block
59 * in those transactions should not
63 /**@brief Revoke tree node.*/
64 RB_ENTRY(revoke_entry) revoke_node;
67 /**@brief Valid journal replay information.*/
69 /**@brief Starting transaction id.*/
70 uint32_t start_trans_id;
72 /**@brief Ending transaction id.*/
73 uint32_t last_trans_id;
75 /**@brief Used as internal argument.*/
76 uint32_t this_trans_id;
78 /**@brief RB-Tree storing revoke entries.*/
79 RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
82 /**@brief Journal replay internal arguments.*/
84 /**@brief Journal replay information.*/
85 struct recover_info *info;
87 /**@brief Current block we are on.*/
90 /**@brief Current trans_id we are on.*/
91 uint32_t this_trans_id;
95 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
97 if (a->block > b->block)
99 else if (a->block < b->block)
104 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
105 jbd_revoke_entry_cmp, static inline)
107 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
108 #define jbd_free_revoke_entry(addr) free(addr)
110 /**@brief Data block lookup helper.*/
111 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
113 ext4_fsblk_t *fblock);
115 /**@brief Write jbd superblock to disk.
116 * @param jbd_fs jbd filesystem
117 * @param s jbd superblock
118 * @return standard error code*/
119 int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
122 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
125 rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
129 offset = fblock * ext4_sb_get_block_size(&fs->sb);
130 return ext4_block_writebytes(fs->bdev, offset, s,
131 EXT4_SUPERBLOCK_SIZE);
134 /**@brief Read jbd superblock from disk.
135 * @param jbd_fs jbd filesystem
136 * @param s jbd superblock
137 * @return standard error code*/
138 int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
141 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
144 rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
148 offset = fblock * ext4_sb_get_block_size(&fs->sb);
149 return ext4_block_readbytes(fs->bdev, offset, s,
150 EXT4_SUPERBLOCK_SIZE);
153 /**@brief Verify jbd superblock.
154 * @param sb jbd superblock
155 * @return true if jbd superblock is valid */
156 static bool jbd_verify_sb(struct jbd_sb *sb)
158 struct jbd_bhdr *header = &sb->header;
159 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
162 if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
163 jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
169 /**@brief Write back dirty jbd superblock to disk.
170 * @param jbd_fs jbd filesystem
171 * @return standard error code*/
172 static int jbd_write_sb(struct jbd_fs *jbd_fs)
176 rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
180 jbd_fs->dirty = false;
185 /**@brief Get reference to jbd filesystem.
186 * @param fs Filesystem to load journal of
187 * @param jbd_fs jbd filesystem
188 * @return standard error code*/
189 int jbd_get_fs(struct ext4_fs *fs,
190 struct jbd_fs *jbd_fs)
193 uint32_t journal_ino;
195 memset(jbd_fs, 0, sizeof(struct jbd_fs));
196 /* See if there is journal inode on this filesystem.*/
197 /* FIXME: detection on existance ofbkejournal bdev is
199 journal_ino = ext4_get32(&fs->sb, journal_inode_number);
201 rc = ext4_fs_get_inode_ref(fs,
205 memset(jbd_fs, 0, sizeof(struct jbd_fs));
208 rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
210 memset(jbd_fs, 0, sizeof(struct jbd_fs));
211 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
214 if (!jbd_verify_sb(&jbd_fs->sb)) {
215 memset(jbd_fs, 0, sizeof(struct jbd_fs));
216 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
223 /**@brief Put reference of jbd filesystem.
224 * @param jbd_fs jbd filesystem
225 * @return standard error code*/
226 int jbd_put_fs(struct jbd_fs *jbd_fs)
229 rc = jbd_write_sb(jbd_fs);
231 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
235 /**@brief Data block lookup helper.
236 * @param jbd_fs jbd filesystem
237 * @param iblock block index
238 * @param fblock logical block address
239 * @return standard error code*/
240 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
242 ext4_fsblk_t *fblock)
244 int rc = ext4_fs_get_inode_dblk_idx(
252 /**@brief jbd block get function (through cache).
253 * @param jbd_fs jbd filesystem
254 * @param block block descriptor
255 * @param fblock jbd logical block address
256 * @return standard error code*/
257 int jbd_block_get(struct jbd_fs *jbd_fs,
258 struct ext4_block *block,
261 /* TODO: journal device. */
263 ext4_lblk_t iblock = (ext4_lblk_t)fblock;
265 /* Lookup the logical block address of
267 rc = jbd_inode_bmap(jbd_fs, iblock,
272 struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
273 rc = ext4_block_get(bdev, block, fblock);
275 /* If succeeded, mark buffer as BC_FLUSH to indicate
276 * that data should be written to disk immediately.*/
278 ext4_bcache_set_flag(block->buf, BC_FLUSH);
283 /**@brief jbd block get function (through cache, don't read).
284 * @param jbd_fs jbd filesystem
285 * @param block block descriptor
286 * @param fblock jbd logical block address
287 * @return standard error code*/
288 int jbd_block_get_noread(struct jbd_fs *jbd_fs,
289 struct ext4_block *block,
292 /* TODO: journal device. */
294 ext4_lblk_t iblock = (ext4_lblk_t)fblock;
295 rc = jbd_inode_bmap(jbd_fs, iblock,
300 struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
301 rc = ext4_block_get_noread(bdev, block, fblock);
303 ext4_bcache_set_flag(block->buf, BC_FLUSH);
308 /**@brief jbd block set procedure (through cache).
309 * @param jbd_fs jbd filesystem
310 * @param block block descriptor
311 * @return standard error code*/
312 int jbd_block_set(struct jbd_fs *jbd_fs,
313 struct ext4_block *block)
315 return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
319 /**@brief helper functions to calculate
320 * block tag size, not including UUID part.
321 * @param jbd_fs jbd filesystem
322 * @return tag size in bytes*/
323 int jbd_tag_bytes(struct jbd_fs *jbd_fs)
327 /* It is very easy to deal with the case which
328 * JBD_FEATURE_INCOMPAT_CSUM_V3 is enabled.*/
329 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
330 JBD_FEATURE_INCOMPAT_CSUM_V3))
331 return sizeof(struct jbd_block_tag3);
333 size = sizeof(struct jbd_block_tag);
335 /* If JBD_FEATURE_INCOMPAT_CSUM_V2 is enabled,
336 * add 2 bytes to size.*/
337 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
338 JBD_FEATURE_INCOMPAT_CSUM_V2))
339 size += sizeof(uint16_t);
341 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
342 JBD_FEATURE_INCOMPAT_64BIT))
345 /* If block number is 4 bytes in size,
346 * minus 4 bytes from size */
347 return size - sizeof(uint32_t);
350 /**@brief Tag information. */
352 /**@brief Tag size in bytes, including UUID part.*/
355 /**@brief block number stored in this tag.*/
358 /**@brief whether UUID part exists or not.*/
361 /**@brief UUID content if UUID part exists.*/
362 uint8_t uuid[UUID_SIZE];
364 /**@brief Is this the last tag? */
368 /**@brief Extract information from a block tag.
369 * @param __tag pointer to the block tag
370 * @param tag_bytes block tag size of this jbd filesystem
371 * @param remaining size in buffer containing the block tag
372 * @param tag_info information of this tag.
373 * @return EOK when succeed, otherwise return EINVAL.*/
375 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
378 int32_t remain_buf_size,
379 struct tag_info *tag_info)
382 tag_info->tag_bytes = tag_bytes;
383 tag_info->uuid_exist = false;
384 tag_info->last_tag = false;
386 /* See whether it is possible to hold a valid block tag.*/
387 if (remain_buf_size - tag_bytes < 0)
390 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
391 JBD_FEATURE_INCOMPAT_CSUM_V3)) {
392 struct jbd_block_tag3 *tag = __tag;
393 tag_info->block = jbd_get32(tag, blocknr);
394 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
395 JBD_FEATURE_INCOMPAT_64BIT))
397 (uint64_t)jbd_get32(tag, blocknr_high) << 32;
399 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
402 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
403 /* See whether it is possible to hold UUID part.*/
404 if (remain_buf_size - tag_bytes < UUID_SIZE)
407 uuid_start = (char *)tag + tag_bytes;
408 tag_info->uuid_exist = true;
409 tag_info->tag_bytes += UUID_SIZE;
410 memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
413 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
414 tag_info->last_tag = true;
417 struct jbd_block_tag *tag = __tag;
418 tag_info->block = jbd_get32(tag, blocknr);
419 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
420 JBD_FEATURE_INCOMPAT_64BIT))
422 (uint64_t)jbd_get32(tag, blocknr_high) << 32;
424 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
427 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
428 /* See whether it is possible to hold UUID part.*/
429 if (remain_buf_size - tag_bytes < UUID_SIZE)
432 uuid_start = (char *)tag + tag_bytes;
433 tag_info->uuid_exist = true;
434 tag_info->tag_bytes += UUID_SIZE;
435 memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
438 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
439 tag_info->last_tag = true;
445 /**@brief Write information to a block tag.
446 * @param __tag pointer to the block tag
447 * @param remaining size in buffer containing the block tag
448 * @param tag_info information of this tag.
449 * @return EOK when succeed, otherwise return EINVAL.*/
451 jbd_write_block_tag(struct jbd_fs *jbd_fs,
453 int32_t remain_buf_size,
454 struct tag_info *tag_info)
457 int tag_bytes = jbd_tag_bytes(jbd_fs);
459 tag_info->tag_bytes = tag_bytes;
461 /* See whether it is possible to hold a valid block tag.*/
462 if (remain_buf_size - tag_bytes < 0)
465 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
466 JBD_FEATURE_INCOMPAT_CSUM_V3)) {
467 struct jbd_block_tag3 *tag = __tag;
468 jbd_set32(tag, blocknr, tag_info->block);
469 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
470 JBD_FEATURE_INCOMPAT_64BIT))
471 jbd_set32(tag, blocknr_high, tag_info->block >> 32);
473 if (tag_info->uuid_exist) {
474 /* See whether it is possible to hold UUID part.*/
475 if (remain_buf_size - tag_bytes < UUID_SIZE)
478 uuid_start = (char *)tag + tag_bytes;
479 tag_info->tag_bytes += UUID_SIZE;
480 memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
482 jbd_set32(tag, flags,
483 jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
485 if (tag_info->last_tag)
486 jbd_set32(tag, flags,
487 jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
490 struct jbd_block_tag *tag = __tag;
491 jbd_set32(tag, blocknr, tag_info->block);
492 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
493 JBD_FEATURE_INCOMPAT_64BIT))
494 jbd_set32(tag, blocknr_high, tag_info->block >> 32);
496 if (tag_info->uuid_exist) {
497 /* See whether it is possible to hold UUID part.*/
498 if (remain_buf_size - tag_bytes < UUID_SIZE)
501 uuid_start = (char *)tag + tag_bytes;
502 tag_info->tag_bytes += UUID_SIZE;
503 memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
505 jbd_set16(tag, flags,
506 jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
508 if (tag_info->last_tag)
509 jbd_set16(tag, flags,
510 jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
516 /**@brief Iterate all block tags in a block.
517 * @param jbd_fs jbd filesystem
518 * @param __tag_start pointer to the block
519 * @param tag_tbl_size size of the block
520 * @param func callback routine to indicate that
521 * a block tag is found
522 * @param arg additional argument to be passed to func */
524 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
526 int32_t tag_tbl_size,
527 void (*func)(struct jbd_fs * jbd_fs,
533 char *tag_start, *tag_ptr;
534 int tag_bytes = jbd_tag_bytes(jbd_fs);
535 tag_start = __tag_start;
538 /* Cut off the size of block tail storing checksum. */
539 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
540 JBD_FEATURE_INCOMPAT_CSUM_V2) ||
541 JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
542 JBD_FEATURE_INCOMPAT_CSUM_V3))
543 tag_tbl_size -= sizeof(struct jbd_block_tail);
545 while (tag_tbl_size) {
546 struct tag_info tag_info;
547 int rc = jbd_extract_block_tag(jbd_fs,
556 func(jbd_fs, tag_info.block, tag_info.uuid, arg);
558 /* Stop the iteration when we reach the last tag. */
559 if (tag_info.last_tag)
562 tag_ptr += tag_info.tag_bytes;
563 tag_tbl_size -= tag_info.tag_bytes;
567 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
572 uint32_t *iblock = arg;
573 ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
580 static struct revoke_entry *
581 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
583 struct revoke_entry tmp = {
587 return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
590 /**@brief Replay a block in a transaction.
591 * @param jbd_fs jbd filesystem
592 * @param block block address to be replayed.*/
593 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
595 uint8_t *uuid __unused,
599 struct replay_arg *arg = __arg;
600 struct recover_info *info = arg->info;
601 uint32_t *this_block = arg->this_block;
602 struct revoke_entry *revoke_entry;
603 struct ext4_block journal_block, ext4_block;
604 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
608 /* We replay this block only if the current transaction id
609 * is equal or greater than that in revoke entry.*/
610 revoke_entry = jbd_revoke_entry_lookup(info, block);
612 arg->this_trans_id < revoke_entry->trans_id)
616 "Replaying block in block_tag: %" PRIu64 "\n",
619 r = jbd_block_get(jbd_fs, &journal_block, *this_block);
623 /* We need special treatment for ext4 superblock. */
625 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
627 jbd_block_set(jbd_fs, &journal_block);
631 memcpy(ext4_block.data,
633 jbd_get32(&jbd_fs->sb, blocksize));
635 ext4_bcache_set_dirty(ext4_block.buf);
636 ext4_block_set(fs->bdev, &ext4_block);
638 uint16_t mount_count, state;
639 mount_count = ext4_get16(&fs->sb, mount_count);
640 state = ext4_get16(&fs->sb, state);
643 journal_block.data + EXT4_SUPERBLOCK_OFFSET,
644 EXT4_SUPERBLOCK_SIZE);
646 /* Mark system as mounted */
647 ext4_set16(&fs->sb, state, state);
648 r = ext4_sb_write(fs->bdev, &fs->sb);
652 /*Update mount count*/
653 ext4_set16(&fs->sb, mount_count, mount_count);
656 jbd_block_set(jbd_fs, &journal_block);
661 /**@brief Add block address to revoke tree, along with
662 * its transaction id.
663 * @param info journal replay info
664 * @param block block address to be replayed.*/
665 static void jbd_add_revoke_block_tags(struct recover_info *info,
668 struct revoke_entry *revoke_entry;
670 ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
671 /* If the revoke entry with respect to the block address
672 * exists already, update its transaction id.*/
673 revoke_entry = jbd_revoke_entry_lookup(info, block);
675 revoke_entry->trans_id = info->this_trans_id;
679 revoke_entry = jbd_alloc_revoke_entry();
680 ext4_assert(revoke_entry);
681 revoke_entry->block = block;
682 revoke_entry->trans_id = info->this_trans_id;
683 RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
688 static void jbd_destroy_revoke_tree(struct recover_info *info)
690 while (!RB_EMPTY(&info->revoke_root)) {
691 struct revoke_entry *revoke_entry =
692 RB_MIN(jbd_revoke, &info->revoke_root);
693 ext4_assert(revoke_entry);
694 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
695 jbd_free_revoke_entry(revoke_entry);
699 /* Make sure we wrap around the log correctly! */
700 #define wrap(sb, var) \
702 if (var >= jbd_get32((sb), maxlen)) \
703 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first)); \
706 #define ACTION_SCAN 0
707 #define ACTION_REVOKE 1
708 #define ACTION_RECOVER 2
710 /**@brief Add entries in a revoke block to revoke tree.
711 * @param jbd_fs jbd filesystem
712 * @param header revoke block header
713 * @param recover_info journal replay info*/
714 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
715 struct jbd_bhdr *header,
716 struct recover_info *info)
719 struct jbd_revoke_header *revoke_hdr =
720 (struct jbd_revoke_header *)header;
721 uint32_t i, nr_entries, record_len = 4;
723 /* If we are working on a 64bit jbd filesystem, */
724 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
725 JBD_FEATURE_INCOMPAT_64BIT))
728 nr_entries = (jbd_get32(revoke_hdr, count) -
729 sizeof(struct jbd_revoke_header)) /
732 blocks_entry = (char *)(revoke_hdr + 1);
734 for (i = 0;i < nr_entries;i++) {
735 if (record_len == 8) {
737 (uint64_t *)blocks_entry;
738 jbd_add_revoke_block_tags(info, to_be64(*blocks));
741 (uint32_t *)blocks_entry;
742 jbd_add_revoke_block_tags(info, to_be32(*blocks));
744 blocks_entry += record_len;
748 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
749 struct jbd_bhdr *header,
752 jbd_iterate_block_table(jbd_fs,
754 jbd_get32(&jbd_fs->sb, blocksize) -
755 sizeof(struct jbd_bhdr),
756 jbd_display_block_tags,
760 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
761 struct jbd_bhdr *header,
762 struct replay_arg *arg)
764 jbd_iterate_block_table(jbd_fs,
766 jbd_get32(&jbd_fs->sb, blocksize) -
767 sizeof(struct jbd_bhdr),
768 jbd_replay_block_tags,
772 /**@brief The core routine of journal replay.
773 * @param jbd_fs jbd filesystem
774 * @param recover_info journal replay info
775 * @param action action needed to be taken
776 * @return standard error code*/
777 int jbd_iterate_log(struct jbd_fs *jbd_fs,
778 struct recover_info *info,
782 bool log_end = false;
783 struct jbd_sb *sb = &jbd_fs->sb;
784 uint32_t start_trans_id, this_trans_id;
785 uint32_t start_block, this_block;
787 /* We start iterating valid blocks in the whole journal.*/
788 start_trans_id = this_trans_id = jbd_get32(sb, sequence);
789 start_block = this_block = jbd_get32(sb, start);
791 ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
795 struct ext4_block block;
796 struct jbd_bhdr *header;
797 /* If we are not scanning for the last
798 * valid transaction in the journal,
799 * we will stop when we reach the end of
801 if (action != ACTION_SCAN)
802 if (this_trans_id > info->last_trans_id) {
807 r = jbd_block_get(jbd_fs, &block, this_block);
811 header = (struct jbd_bhdr *)block.data;
812 /* This block does not have a valid magic number,
813 * so we have reached the end of the journal.*/
814 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
815 jbd_block_set(jbd_fs, &block);
820 /* If the transaction id we found is not expected,
821 * we may have reached the end of the journal.
823 * If we are not scanning the journal, something
824 * bad might have taken place. :-( */
825 if (jbd_get32(header, sequence) != this_trans_id) {
826 if (action != ACTION_SCAN)
829 jbd_block_set(jbd_fs, &block);
834 switch (jbd_get32(header, blocktype)) {
835 case JBD_DESCRIPTOR_BLOCK:
836 ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
837 "trans_id: %" PRIu32"\n",
838 this_block, this_trans_id);
839 if (action == ACTION_RECOVER) {
840 struct replay_arg replay_arg;
841 replay_arg.info = info;
842 replay_arg.this_block = &this_block;
843 replay_arg.this_trans_id = this_trans_id;
845 jbd_replay_descriptor_block(jbd_fs,
846 header, &replay_arg);
848 jbd_debug_descriptor_block(jbd_fs,
849 header, &this_block);
852 case JBD_COMMIT_BLOCK:
853 ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
854 "trans_id: %" PRIu32"\n",
855 this_block, this_trans_id);
856 /* This is the end of a transaction,
857 * we may now proceed to the next transaction.
861 case JBD_REVOKE_BLOCK:
862 ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
863 "trans_id: %" PRIu32"\n",
864 this_block, this_trans_id);
865 if (action == ACTION_REVOKE) {
866 info->this_trans_id = this_trans_id;
867 jbd_build_revoke_tree(jbd_fs,
875 jbd_block_set(jbd_fs, &block);
877 wrap(sb, this_block);
878 if (this_block == start_block)
882 ext4_dbg(DEBUG_JBD, "End of journal.\n");
883 if (r == EOK && action == ACTION_SCAN) {
884 /* We have finished scanning the journal. */
885 info->start_trans_id = start_trans_id;
886 if (this_trans_id > start_trans_id)
887 info->last_trans_id = this_trans_id - 1;
889 info->last_trans_id = this_trans_id;
895 /**@brief Replay journal.
896 * @param jbd_fs jbd filesystem
897 * @return standard error code*/
898 int jbd_recover(struct jbd_fs *jbd_fs)
901 struct recover_info info;
902 struct jbd_sb *sb = &jbd_fs->sb;
906 RB_INIT(&info.revoke_root);
908 r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
912 r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
916 r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
918 /* If we successfully replay the journal,
919 * clear EXT4_FINCOM_RECOVER flag on the
920 * ext4 superblock, and set the start of
922 uint32_t features_incompatible =
923 ext4_get32(&jbd_fs->inode_ref.fs->sb,
924 features_incompatible);
925 jbd_set32(&jbd_fs->sb, start, 0);
926 features_incompatible &= ~EXT4_FINCOM_RECOVER;
927 ext4_set32(&jbd_fs->inode_ref.fs->sb,
928 features_incompatible,
929 features_incompatible);
930 jbd_fs->dirty = true;
931 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
932 &jbd_fs->inode_ref.fs->sb);
934 jbd_destroy_revoke_tree(&info);
938 void jbd_journal_write_sb(struct jbd_journal *journal)
940 struct jbd_fs *jbd_fs = journal->jbd_fs;
941 jbd_set32(&jbd_fs->sb, start, journal->start);
942 jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
943 jbd_fs->dirty = true;
946 /**@brief Start accessing the journal.
947 * @param jbd_fs jbd filesystem
948 * @param journal current journal session
949 * @return standard error code*/
950 int jbd_journal_start(struct jbd_fs *jbd_fs,
951 struct jbd_journal *journal)
954 uint32_t features_incompatible =
955 ext4_get32(&jbd_fs->inode_ref.fs->sb,
956 features_incompatible);
957 features_incompatible |= EXT4_FINCOM_RECOVER;
958 ext4_set32(&jbd_fs->inode_ref.fs->sb,
959 features_incompatible,
960 features_incompatible);
961 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
962 &jbd_fs->inode_ref.fs->sb);
966 journal->first = jbd_get32(&jbd_fs->sb, first);
967 journal->start = journal->first;
968 journal->last = journal->first;
969 journal->trans_id = 1;
970 journal->alloc_trans_id = 1;
972 journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
974 TAILQ_INIT(&journal->trans_queue);
975 TAILQ_INIT(&journal->cp_queue);
976 journal->jbd_fs = jbd_fs;
977 jbd_journal_write_sb(journal);
978 return jbd_write_sb(jbd_fs);
981 /**@brief Stop accessing the journal.
982 * @param journal current journal session
983 * @return standard error code*/
984 int jbd_journal_stop(struct jbd_journal *journal)
987 struct jbd_fs *jbd_fs = journal->jbd_fs;
988 uint32_t features_incompatible;
990 /* Commit all the transactions to the journal.*/
991 jbd_journal_commit_all(journal);
992 /* Make sure that journalled content have reached
994 ext4_block_cache_flush(jbd_fs->inode_ref.fs->bdev);
996 features_incompatible =
997 ext4_get32(&jbd_fs->inode_ref.fs->sb,
998 features_incompatible);
999 features_incompatible &= ~EXT4_FINCOM_RECOVER;
1000 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1001 features_incompatible,
1002 features_incompatible);
1003 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1004 &jbd_fs->inode_ref.fs->sb);
1009 journal->trans_id = 0;
1010 jbd_journal_write_sb(journal);
1011 return jbd_write_sb(journal->jbd_fs);
1014 /**@brief Allocate a block in the journal.
1015 * @param journal current journal session
1016 * @param trans transaction
1017 * @return allocated block address*/
1018 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
1019 struct jbd_trans *trans)
1021 uint32_t start_block;
1023 start_block = journal->last++;
1024 trans->alloc_blocks++;
1025 wrap(&journal->jbd_fs->sb, journal->last);
1027 /* If there is no space left, flush all journalled
1028 * blocks to disk first.*/
1029 if (journal->last == journal->start)
1030 ext4_block_cache_flush(journal->jbd_fs->inode_ref.fs->bdev);
1035 /**@brief Allocate a new transaction
1036 * @param journal current journal session
1037 * @return transaction allocated*/
1039 jbd_journal_new_trans(struct jbd_journal *journal)
1041 struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
1045 /* We will assign a trans_id to this transaction,
1046 * once it has been committed.*/
1047 trans->journal = journal;
1052 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1053 struct ext4_buf *buf __unused,
1057 /**@brief Add block to a transaction
1058 * @param trans transaction
1059 * @param block block descriptor
1060 * @return standard error code*/
1061 int jbd_trans_add_block(struct jbd_trans *trans,
1062 struct ext4_block *block)
1064 struct jbd_buf *buf;
1065 /* We do not need to add those unmodified buffer to
1067 if (!ext4_bcache_test_flag(block->buf, BC_DIRTY))
1070 buf = calloc(1, sizeof(struct jbd_buf));
1075 buf->block = *block;
1076 ext4_bcache_inc_ref(block->buf);
1078 /* If the content reach the disk, notify us
1079 * so that we may do a checkpoint. */
1080 block->buf->end_write = jbd_trans_end_write;
1081 block->buf->end_write_arg = trans;
1084 LIST_INSERT_HEAD(&trans->buf_list, buf, buf_node);
1088 /**@brief Add block to be revoked to a transaction
1089 * @param trans transaction
1090 * @param lba logical block address
1091 * @return standard error code*/
1092 int jbd_trans_revoke_block(struct jbd_trans *trans,
1095 struct jbd_revoke_rec *rec =
1096 calloc(1, sizeof(struct jbd_revoke_rec));
1101 LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
1105 /**@brief Free a transaction
1106 * @param journal current journal session
1107 * @param trans transaction
1108 * @param abort discard all the modifications on the block?
1109 * @return standard error code*/
1110 void jbd_journal_free_trans(struct jbd_journal *journal,
1111 struct jbd_trans *trans,
1114 struct jbd_buf *jbd_buf, *tmp;
1115 struct jbd_revoke_rec *rec, *tmp2;
1116 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1117 LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
1120 ext4_bcache_clear_dirty(jbd_buf->block.buf);
1121 ext4_block_set(fs->bdev, &jbd_buf->block);
1124 LIST_REMOVE(jbd_buf, buf_node);
1127 LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1129 LIST_REMOVE(rec, revoke_node);
1136 /**@brief Write commit block for a transaction
1137 * @param trans transaction
1138 * @return standard error code*/
1139 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
1142 struct jbd_commit_header *header;
1143 uint32_t commit_iblock = 0;
1144 struct ext4_block commit_block;
1145 struct jbd_journal *journal = trans->journal;
1147 commit_iblock = jbd_journal_alloc_block(journal, trans);
1148 rc = jbd_block_get_noread(journal->jbd_fs,
1149 &commit_block, commit_iblock);
1153 header = (struct jbd_commit_header *)commit_block.data;
1154 jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
1155 jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
1156 jbd_set32(&header->header, sequence, trans->trans_id);
1158 ext4_bcache_set_dirty(commit_block.buf);
1159 rc = jbd_block_set(journal->jbd_fs, &commit_block);
1166 /**@brief Write descriptor block for a transaction
1167 * @param journal current journal session
1168 * @param trans transaction
1169 * @return standard error code*/
1170 static int jbd_journal_prepare(struct jbd_journal *journal,
1171 struct jbd_trans *trans)
1173 int rc = EOK, i = 0;
1174 int32_t tag_tbl_size;
1175 uint32_t desc_iblock = 0;
1176 uint32_t data_iblock = 0;
1177 char *tag_start = NULL, *tag_ptr = NULL;
1178 struct jbd_buf *jbd_buf;
1179 struct ext4_block desc_block, data_block;
1181 LIST_FOREACH(jbd_buf, &trans->buf_list, buf_node) {
1182 struct tag_info tag_info;
1183 bool uuid_exist = false;
1186 struct jbd_bhdr *bhdr;
1187 desc_iblock = jbd_journal_alloc_block(journal, trans);
1188 rc = jbd_block_get_noread(journal->jbd_fs,
1189 &desc_block, desc_iblock);
1193 ext4_bcache_set_dirty(desc_block.buf);
1195 bhdr = (struct jbd_bhdr *)desc_block.data;
1196 jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1197 jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
1198 jbd_set32(bhdr, sequence, trans->trans_id);
1200 tag_start = (char *)(bhdr + 1);
1201 tag_ptr = tag_start;
1203 tag_tbl_size = journal->block_size -
1204 sizeof(struct jbd_bhdr);
1206 if (!trans->start_iblock)
1207 trans->start_iblock = desc_iblock;
1210 tag_info.block = jbd_buf->block.lb_id;
1211 tag_info.uuid_exist = uuid_exist;
1212 if (i == trans->data_cnt - 1)
1213 tag_info.last_tag = true;
1216 memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
1219 rc = jbd_write_block_tag(journal->jbd_fs,
1224 jbd_block_set(journal->jbd_fs, &desc_block);
1229 data_iblock = jbd_journal_alloc_block(journal, trans);
1230 rc = jbd_block_get_noread(journal->jbd_fs,
1231 &data_block, data_iblock);
1235 ext4_bcache_set_dirty(data_block.buf);
1237 memcpy(data_block.data, jbd_buf->block.data,
1238 journal->block_size);
1240 rc = jbd_block_set(journal->jbd_fs, &data_block);
1244 tag_ptr += tag_info.tag_bytes;
1245 tag_tbl_size -= tag_info.tag_bytes;
1249 if (rc == EOK && desc_iblock)
1250 jbd_block_set(journal->jbd_fs, &desc_block);
1255 /**@brief Write revoke block for a transaction
1256 * @param journal current journal session
1257 * @param trans transaction
1258 * @return standard error code*/
1260 jbd_journal_prepare_revoke(struct jbd_journal *journal,
1261 struct jbd_trans *trans)
1263 int rc = EOK, i = 0;
1264 int32_t tag_tbl_size;
1265 uint32_t desc_iblock = 0;
1266 char *blocks_entry = NULL;
1267 struct jbd_revoke_rec *rec, *tmp;
1268 struct ext4_block desc_block;
1269 struct jbd_revoke_header *header = NULL;
1270 int32_t record_len = 4;
1272 if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1273 JBD_FEATURE_INCOMPAT_64BIT))
1276 LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1280 struct jbd_bhdr *bhdr;
1281 desc_iblock = jbd_journal_alloc_block(journal, trans);
1282 rc = jbd_block_get_noread(journal->jbd_fs,
1283 &desc_block, desc_iblock);
1288 ext4_bcache_set_dirty(desc_block.buf);
1290 bhdr = (struct jbd_bhdr *)desc_block.data;
1291 jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1292 jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
1293 jbd_set32(bhdr, sequence, trans->trans_id);
1295 header = (struct jbd_revoke_header *)bhdr;
1296 blocks_entry = (char *)(header + 1);
1297 tag_tbl_size = journal->block_size -
1298 sizeof(struct jbd_revoke_header);
1300 if (!trans->start_iblock)
1301 trans->start_iblock = desc_iblock;
1305 if (tag_tbl_size < record_len) {
1306 jbd_set32(header, count,
1307 journal->block_size - tag_tbl_size);
1308 jbd_block_set(journal->jbd_fs, &desc_block);
1313 if (record_len == 8) {
1315 (uint64_t *)blocks_entry;
1316 *blocks = to_be64(rec->lba);
1319 (uint32_t *)blocks_entry;
1320 *blocks = to_be32(rec->lba);
1322 blocks_entry += record_len;
1323 tag_tbl_size -= record_len;
1327 if (rc == EOK && desc_iblock) {
1329 jbd_set32(header, count,
1330 journal->block_size - tag_tbl_size);
1332 jbd_block_set(journal->jbd_fs, &desc_block);
1338 /**@brief Submit the transaction to transaction queue.
1339 * @param journal current journal session
1340 * @param trans transaction*/
1342 jbd_journal_submit_trans(struct jbd_journal *journal,
1343 struct jbd_trans *trans)
1345 TAILQ_INSERT_TAIL(&journal->trans_queue,
1350 /**@brief Put references of block descriptors in a transaction.
1351 * @param journal current journal session
1352 * @param trans transaction*/
1353 void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
1355 struct jbd_buf *jbd_buf, *tmp;
1356 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1357 LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
1359 struct ext4_block block = jbd_buf->block;
1360 ext4_block_set(fs->bdev, &block);
1364 /**@brief Update the start block of the journal when
1365 * all the contents in a transaction reach the disk.*/
1366 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1367 struct ext4_buf *buf __unused,
1371 struct jbd_trans *trans = arg;
1372 struct jbd_journal *journal = trans->journal;
1373 bool first_in_queue =
1374 trans == TAILQ_FIRST(&journal->cp_queue);
1378 trans->written_cnt++;
1379 if (trans->written_cnt == trans->data_cnt) {
1380 TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
1382 if (first_in_queue) {
1383 journal->start = trans->start_iblock +
1384 trans->alloc_blocks;
1385 wrap(&journal->jbd_fs->sb, journal->start);
1386 journal->trans_id = trans->trans_id + 1;
1388 jbd_journal_free_trans(journal, trans, false);
1390 if (first_in_queue) {
1391 while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1392 if (!trans->data_cnt) {
1393 TAILQ_REMOVE(&journal->cp_queue,
1396 journal->start = trans->start_iblock +
1397 trans->alloc_blocks;
1398 wrap(&journal->jbd_fs->sb, journal->start);
1399 journal->trans_id = trans->trans_id + 1;
1400 jbd_journal_free_trans(journal,
1403 journal->start = trans->start_iblock;
1404 wrap(&journal->jbd_fs->sb, journal->start);
1405 journal->trans_id = trans->trans_id;
1409 jbd_journal_write_sb(journal);
1410 jbd_write_sb(journal->jbd_fs);
1415 /**@brief Commit a transaction to the journal immediately.
1416 * @param journal current journal session
1417 * @param trans transaction
1418 * @return standard error code*/
1419 int jbd_journal_commit_trans(struct jbd_journal *journal,
1420 struct jbd_trans *trans)
1423 uint32_t last = journal->last;
1425 trans->trans_id = journal->alloc_trans_id;
1426 rc = jbd_journal_prepare(journal, trans);
1430 rc = jbd_journal_prepare_revoke(journal, trans);
1434 rc = jbd_trans_write_commit_block(trans);
1438 journal->alloc_trans_id++;
1439 if (TAILQ_EMPTY(&journal->cp_queue)) {
1440 if (trans->data_cnt) {
1441 journal->start = trans->start_iblock;
1442 wrap(&journal->jbd_fs->sb, journal->start);
1443 journal->trans_id = trans->trans_id;
1444 jbd_journal_write_sb(journal);
1445 jbd_write_sb(journal->jbd_fs);
1446 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
1448 jbd_journal_cp_trans(journal, trans);
1450 journal->start = trans->start_iblock +
1451 trans->alloc_blocks;
1452 wrap(&journal->jbd_fs->sb, journal->start);
1453 journal->trans_id = trans->trans_id + 1;
1454 jbd_journal_write_sb(journal);
1455 jbd_journal_free_trans(journal, trans, false);
1458 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
1460 if (trans->data_cnt)
1461 jbd_journal_cp_trans(journal, trans);
1466 journal->last = last;
1467 jbd_journal_free_trans(journal, trans, true);
1472 /**@brief Commit one transaction on transaction queue
1474 * @param journal current journal session.*/
1475 void jbd_journal_commit_one(struct jbd_journal *journal)
1477 struct jbd_trans *trans;
1479 if ((trans = TAILQ_FIRST(&journal->trans_queue))) {
1480 TAILQ_REMOVE(&journal->trans_queue, trans, trans_node);
1481 jbd_journal_commit_trans(journal, trans);
1485 /**@brief Commit all the transactions on transaction queue
1487 * @param journal current journal session.*/
1488 void jbd_journal_commit_all(struct jbd_journal *journal)
1490 while (!TAILQ_EMPTY(&journal->trans_queue)) {
1491 jbd_journal_commit_one(journal);