2 * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3 * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 /** @addtogroup lwext4
34 * @file ext4_journal.c
35 * @brief Journal handle functions
38 #include "ext4_config.h"
39 #include "ext4_types.h"
40 #include "ext4_misc.h"
41 #include "ext4_errno.h"
42 #include "ext4_debug.h"
45 #include "ext4_super.h"
46 #include "ext4_journal.h"
47 #include "ext4_blockdev.h"
48 #include "ext4_crc32.h"
49 #include "ext4_journal.h"
54 /**@brief Revoke entry during journal replay.*/
56 /**@brief Block number not to be replayed.*/
59 /**@brief For any transaction id smaller
60 * than trans_id, records of @block
61 * in those transactions should not
65 /**@brief Revoke tree node.*/
66 RB_ENTRY(revoke_entry) revoke_node;
69 /**@brief Valid journal replay information.*/
71 /**@brief Starting transaction id.*/
72 uint32_t start_trans_id;
74 /**@brief Ending transaction id.*/
75 uint32_t last_trans_id;
77 /**@brief Used as internal argument.*/
78 uint32_t this_trans_id;
80 /**@brief No of transactions went through.*/
83 /**@brief RB-Tree storing revoke entries.*/
84 RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
87 /**@brief Journal replay internal arguments.*/
89 /**@brief Journal replay information.*/
90 struct recover_info *info;
92 /**@brief Current block we are on.*/
95 /**@brief Current trans_id we are on.*/
96 uint32_t this_trans_id;
100 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
102 if (a->block > b->block)
104 else if (a->block < b->block)
110 jbd_block_rec_cmp(struct jbd_block_rec *a, struct jbd_block_rec *b)
114 else if (a->lba < b->lba)
119 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
120 jbd_revoke_entry_cmp, static inline)
121 RB_GENERATE_INTERNAL(jbd_block, jbd_block_rec, block_rec_node,
122 jbd_block_rec_cmp, static inline)
124 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
125 #define jbd_free_revoke_entry(addr) free(addr)
127 static int jbd_has_csum(struct jbd_sb *jbd_sb)
129 if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V2))
132 if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V3))
138 #if CONFIG_META_CSUM_ENABLE
139 static uint32_t jbd_sb_csum(struct jbd_sb *jbd_sb)
141 uint32_t checksum = 0;
143 if (jbd_has_csum(jbd_sb)) {
144 uint32_t orig_checksum = jbd_sb->checksum;
145 jbd_set32(jbd_sb, checksum, 0);
146 /* Calculate crc32c checksum against tho whole superblock */
147 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_sb,
148 JBD_SUPERBLOCK_SIZE);
149 jbd_sb->checksum = orig_checksum;
154 #define jbd_sb_csum(...) 0
157 static void jbd_sb_csum_set(struct jbd_sb *jbd_sb)
159 if (!jbd_has_csum(jbd_sb))
162 jbd_set32(jbd_sb, checksum, jbd_sb_csum(jbd_sb));
165 #if CONFIG_META_CSUM_ENABLE
167 jbd_verify_sb_csum(struct jbd_sb *jbd_sb)
169 if (!jbd_has_csum(jbd_sb))
172 return jbd_sb_csum(jbd_sb) == jbd_get32(jbd_sb, checksum);
175 #define jbd_verify_sb_csum(...) true
178 #if CONFIG_META_CSUM_ENABLE
179 static uint32_t jbd_meta_csum(struct jbd_fs *jbd_fs,
180 struct jbd_bhdr *bhdr)
182 uint32_t checksum = 0;
184 if (jbd_has_csum(&jbd_fs->sb)) {
185 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
186 struct jbd_block_tail *tail =
187 (struct jbd_block_tail *)((char *)bhdr + block_size -
188 sizeof(struct jbd_block_tail));
189 uint32_t orig_checksum = tail->checksum;
192 /* First calculate crc32c checksum against fs uuid */
193 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
194 sizeof(jbd_fs->sb.uuid));
195 /* Calculate crc32c checksum against tho whole block */
196 checksum = ext4_crc32c(checksum, bhdr,
198 tail->checksum = orig_checksum;
203 #define jbd_meta_csum(...) 0
206 static void jbd_meta_csum_set(struct jbd_fs *jbd_fs,
207 struct jbd_bhdr *bhdr)
209 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
210 struct jbd_block_tail *tail = (struct jbd_block_tail *)
211 ((char *)bhdr + block_size -
212 sizeof(struct jbd_block_tail));
213 if (!jbd_has_csum(&jbd_fs->sb))
216 tail->checksum = to_be32(jbd_meta_csum(jbd_fs, bhdr));
219 #if CONFIG_META_CSUM_ENABLE
221 jbd_verify_meta_csum(struct jbd_fs *jbd_fs,
222 struct jbd_bhdr *bhdr)
224 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
225 struct jbd_block_tail *tail = (struct jbd_block_tail *)
226 ((char *)bhdr + block_size -
227 sizeof(struct jbd_block_tail));
228 if (!jbd_has_csum(&jbd_fs->sb))
231 return jbd_meta_csum(jbd_fs, bhdr) == to_be32(tail->checksum);
234 #define jbd_verify_meta_csum(...) true
237 #if CONFIG_META_CSUM_ENABLE
238 static uint32_t jbd_commit_csum(struct jbd_fs *jbd_fs,
239 struct jbd_commit_header *header)
241 uint32_t checksum = 0;
243 if (jbd_has_csum(&jbd_fs->sb)) {
244 uint32_t orig_checksum_type = header->chksum_type,
245 orig_checksum_size = header->chksum_size,
246 orig_checksum = header->chksum[0];
247 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
248 header->chksum_type = 0;
249 header->chksum_size = 0;
250 header->chksum[0] = 0;
252 /* First calculate crc32c checksum against fs uuid */
253 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
254 sizeof(jbd_fs->sb.uuid));
255 /* Calculate crc32c checksum against tho whole block */
256 checksum = ext4_crc32c(checksum, header,
259 header->chksum_type = orig_checksum_type;
260 header->chksum_size = orig_checksum_size;
261 header->chksum[0] = orig_checksum;
266 #define jbd_commit_csum(...) 0
269 static void jbd_commit_csum_set(struct jbd_fs *jbd_fs,
270 struct jbd_commit_header *header)
272 if (!jbd_has_csum(&jbd_fs->sb))
275 header->chksum_type = 0;
276 header->chksum_size = 0;
277 header->chksum[0] = jbd_commit_csum(jbd_fs, header);
280 #if CONFIG_META_CSUM_ENABLE
281 static bool jbd_verify_commit_csum(struct jbd_fs *jbd_fs,
282 struct jbd_commit_header *header)
284 if (!jbd_has_csum(&jbd_fs->sb))
287 return header->chksum[0] == to_be32(jbd_commit_csum(jbd_fs,
291 #define jbd_verify_commit_csum(...) true
294 #if CONFIG_META_CSUM_ENABLE
296 * NOTE: We only make use of @csum parameter when
297 * JBD_FEATURE_COMPAT_CHECKSUM is enabled.
299 static uint32_t jbd_block_csum(struct jbd_fs *jbd_fs, const void *buf,
303 uint32_t checksum = 0;
305 if (jbd_has_csum(&jbd_fs->sb)) {
306 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
307 /* First calculate crc32c checksum against fs uuid */
308 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
309 sizeof(jbd_fs->sb.uuid));
310 /* Then calculate crc32c checksum against sequence no. */
311 checksum = ext4_crc32c(checksum, &sequence,
313 /* Calculate crc32c checksum against tho whole block */
314 checksum = ext4_crc32c(checksum, buf,
316 } else if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
317 JBD_FEATURE_COMPAT_CHECKSUM)) {
318 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
319 /* Calculate crc32c checksum against tho whole block */
320 checksum = ext4_crc32(csum, buf,
326 #define jbd_block_csum(...) 0
329 static void jbd_block_tag_csum_set(struct jbd_fs *jbd_fs, void *__tag,
332 int ver = jbd_has_csum(&jbd_fs->sb);
337 struct jbd_block_tag *tag = __tag;
338 tag->checksum = (uint16_t)to_be32(checksum);
340 struct jbd_block_tag3 *tag = __tag;
341 tag->checksum = to_be32(checksum);
345 /**@brief Write jbd superblock to disk.
346 * @param jbd_fs jbd filesystem
347 * @param s jbd superblock
348 * @return standard error code*/
349 static int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
352 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
355 rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
360 offset = fblock * ext4_sb_get_block_size(&fs->sb);
361 return ext4_block_writebytes(fs->bdev, offset, s,
362 EXT4_SUPERBLOCK_SIZE);
365 /**@brief Read jbd superblock from disk.
366 * @param jbd_fs jbd filesystem
367 * @param s jbd superblock
368 * @return standard error code*/
369 static int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
372 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
375 rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
379 offset = fblock * ext4_sb_get_block_size(&fs->sb);
380 return ext4_block_readbytes(fs->bdev, offset, s,
381 EXT4_SUPERBLOCK_SIZE);
384 /**@brief Verify jbd superblock.
385 * @param sb jbd superblock
386 * @return true if jbd superblock is valid */
387 static bool jbd_verify_sb(struct jbd_sb *sb)
389 struct jbd_bhdr *header = &sb->header;
390 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
393 if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
394 jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
397 return jbd_verify_sb_csum(sb);
400 /**@brief Write back dirty jbd superblock to disk.
401 * @param jbd_fs jbd filesystem
402 * @return standard error code*/
403 static int jbd_write_sb(struct jbd_fs *jbd_fs)
407 rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
411 jbd_fs->dirty = false;
416 /**@brief Get reference to jbd filesystem.
417 * @param fs Filesystem to load journal of
418 * @param jbd_fs jbd filesystem
419 * @return standard error code*/
420 int jbd_get_fs(struct ext4_fs *fs,
421 struct jbd_fs *jbd_fs)
424 uint32_t journal_ino;
426 memset(jbd_fs, 0, sizeof(struct jbd_fs));
427 /* See if there is journal inode on this filesystem.*/
428 /* FIXME: detection on existance ofbkejournal bdev is
430 journal_ino = ext4_get32(&fs->sb, journal_inode_number);
432 rc = ext4_fs_get_inode_ref(fs,
436 memset(jbd_fs, 0, sizeof(struct jbd_fs));
439 rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
441 memset(jbd_fs, 0, sizeof(struct jbd_fs));
442 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
445 if (!jbd_verify_sb(&jbd_fs->sb)) {
446 memset(jbd_fs, 0, sizeof(struct jbd_fs));
447 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
454 /**@brief Put reference of jbd filesystem.
455 * @param jbd_fs jbd filesystem
456 * @return standard error code*/
457 int jbd_put_fs(struct jbd_fs *jbd_fs)
460 rc = jbd_write_sb(jbd_fs);
462 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
466 /**@brief Data block lookup helper.
467 * @param jbd_fs jbd filesystem
468 * @param iblock block index
469 * @param fblock logical block address
470 * @return standard error code*/
471 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
473 ext4_fsblk_t *fblock)
475 int rc = ext4_fs_get_inode_dblk_idx(
483 /**@brief jbd block get function (through cache).
484 * @param jbd_fs jbd filesystem
485 * @param block block descriptor
486 * @param fblock jbd logical block address
487 * @return standard error code*/
488 static int jbd_block_get(struct jbd_fs *jbd_fs,
489 struct ext4_block *block,
492 /* TODO: journal device. */
494 ext4_lblk_t iblock = (ext4_lblk_t)fblock;
496 /* Lookup the logical block address of
498 rc = jbd_inode_bmap(jbd_fs, iblock,
503 struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
504 rc = ext4_block_get(bdev, block, fblock);
506 /* If succeeded, mark buffer as BC_FLUSH to indicate
507 * that data should be written to disk immediately.*/
509 ext4_bcache_set_flag(block->buf, BC_FLUSH);
510 /* As we don't want to occupy too much space
511 * in block cache, we set this buffer BC_TMP.*/
512 ext4_bcache_set_flag(block->buf, BC_TMP);
518 /**@brief jbd block get function (through cache, don't read).
519 * @param jbd_fs jbd filesystem
520 * @param block block descriptor
521 * @param fblock jbd logical block address
522 * @return standard error code*/
523 static int jbd_block_get_noread(struct jbd_fs *jbd_fs,
524 struct ext4_block *block,
527 /* TODO: journal device. */
529 ext4_lblk_t iblock = (ext4_lblk_t)fblock;
530 rc = jbd_inode_bmap(jbd_fs, iblock,
535 struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
536 rc = ext4_block_get_noread(bdev, block, fblock);
538 ext4_bcache_set_flag(block->buf, BC_FLUSH);
543 /**@brief jbd block set procedure (through cache).
544 * @param jbd_fs jbd filesystem
545 * @param block block descriptor
546 * @return standard error code*/
547 static int jbd_block_set(struct jbd_fs *jbd_fs,
548 struct ext4_block *block)
550 return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
554 /**@brief helper functions to calculate
555 * block tag size, not including UUID part.
556 * @param jbd_fs jbd filesystem
557 * @return tag size in bytes*/
558 static int jbd_tag_bytes(struct jbd_fs *jbd_fs)
562 /* It is very easy to deal with the case which
563 * JBD_FEATURE_INCOMPAT_CSUM_V3 is enabled.*/
564 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
565 JBD_FEATURE_INCOMPAT_CSUM_V3))
566 return sizeof(struct jbd_block_tag3);
568 size = sizeof(struct jbd_block_tag);
570 /* If JBD_FEATURE_INCOMPAT_CSUM_V2 is enabled,
571 * add 2 bytes to size.*/
572 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
573 JBD_FEATURE_INCOMPAT_CSUM_V2))
574 size += sizeof(uint16_t);
576 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
577 JBD_FEATURE_INCOMPAT_64BIT))
580 /* If block number is 4 bytes in size,
581 * minus 4 bytes from size */
582 return size - sizeof(uint32_t);
585 /**@brief Tag information. */
587 /**@brief Tag size in bytes, including UUID part.*/
590 /**@brief block number stored in this tag.*/
593 /**@brief whether UUID part exists or not.*/
596 /**@brief UUID content if UUID part exists.*/
597 uint8_t uuid[UUID_SIZE];
599 /**@brief Is this the last tag? */
602 /**@brief crc32c checksum. */
606 /**@brief Extract information from a block tag.
607 * @param __tag pointer to the block tag
608 * @param tag_bytes block tag size of this jbd filesystem
609 * @param remaining size in buffer containing the block tag
610 * @param tag_info information of this tag.
611 * @return EOK when succeed, otherwise return EINVAL.*/
613 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
616 int32_t remain_buf_size,
617 struct tag_info *tag_info)
620 tag_info->tag_bytes = tag_bytes;
621 tag_info->uuid_exist = false;
622 tag_info->last_tag = false;
624 /* See whether it is possible to hold a valid block tag.*/
625 if (remain_buf_size - tag_bytes < 0)
628 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
629 JBD_FEATURE_INCOMPAT_CSUM_V3)) {
630 struct jbd_block_tag3 *tag = __tag;
631 tag_info->block = jbd_get32(tag, blocknr);
632 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
633 JBD_FEATURE_INCOMPAT_64BIT))
635 (uint64_t)jbd_get32(tag, blocknr_high) << 32;
637 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
640 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
641 /* See whether it is possible to hold UUID part.*/
642 if (remain_buf_size - tag_bytes < UUID_SIZE)
645 uuid_start = (char *)tag + tag_bytes;
646 tag_info->uuid_exist = true;
647 tag_info->tag_bytes += UUID_SIZE;
648 memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
651 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
652 tag_info->last_tag = true;
655 struct jbd_block_tag *tag = __tag;
656 tag_info->block = jbd_get32(tag, blocknr);
657 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
658 JBD_FEATURE_INCOMPAT_64BIT))
660 (uint64_t)jbd_get32(tag, blocknr_high) << 32;
662 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
665 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
666 /* See whether it is possible to hold UUID part.*/
667 if (remain_buf_size - tag_bytes < UUID_SIZE)
670 uuid_start = (char *)tag + tag_bytes;
671 tag_info->uuid_exist = true;
672 tag_info->tag_bytes += UUID_SIZE;
673 memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
676 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
677 tag_info->last_tag = true;
683 /**@brief Write information to a block tag.
684 * @param __tag pointer to the block tag
685 * @param remaining size in buffer containing the block tag
686 * @param tag_info information of this tag.
687 * @return EOK when succeed, otherwise return EINVAL.*/
689 jbd_write_block_tag(struct jbd_fs *jbd_fs,
691 int32_t remain_buf_size,
692 struct tag_info *tag_info)
695 int tag_bytes = jbd_tag_bytes(jbd_fs);
697 tag_info->tag_bytes = tag_bytes;
699 /* See whether it is possible to hold a valid block tag.*/
700 if (remain_buf_size - tag_bytes < 0)
703 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
704 JBD_FEATURE_INCOMPAT_CSUM_V3)) {
705 struct jbd_block_tag3 *tag = __tag;
706 memset(tag, 0, sizeof(struct jbd_block_tag3));
707 jbd_set32(tag, blocknr, (uint32_t)tag_info->block);
708 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
709 JBD_FEATURE_INCOMPAT_64BIT))
710 jbd_set32(tag, blocknr_high, tag_info->block >> 32);
712 if (tag_info->uuid_exist) {
713 /* See whether it is possible to hold UUID part.*/
714 if (remain_buf_size - tag_bytes < UUID_SIZE)
717 uuid_start = (char *)tag + tag_bytes;
718 tag_info->tag_bytes += UUID_SIZE;
719 memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
721 jbd_set32(tag, flags,
722 jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
724 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
726 if (tag_info->last_tag)
727 jbd_set32(tag, flags,
728 jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
731 struct jbd_block_tag *tag = __tag;
732 memset(tag, 0, sizeof(struct jbd_block_tag));
733 jbd_set32(tag, blocknr, (uint32_t)tag_info->block);
734 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
735 JBD_FEATURE_INCOMPAT_64BIT))
736 jbd_set32(tag, blocknr_high, tag_info->block >> 32);
738 if (tag_info->uuid_exist) {
739 /* See whether it is possible to hold UUID part.*/
740 if (remain_buf_size - tag_bytes < UUID_SIZE)
743 uuid_start = (char *)tag + tag_bytes;
744 tag_info->tag_bytes += UUID_SIZE;
745 memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
747 jbd_set16(tag, flags,
748 jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
750 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
752 if (tag_info->last_tag)
753 jbd_set16(tag, flags,
754 jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
760 /**@brief Iterate all block tags in a block.
761 * @param jbd_fs jbd filesystem
762 * @param __tag_start pointer to the block
763 * @param tag_tbl_size size of the block
764 * @param func callback routine to indicate that
765 * a block tag is found
766 * @param arg additional argument to be passed to func */
768 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
770 int32_t tag_tbl_size,
771 void (*func)(struct jbd_fs * jbd_fs,
777 char *tag_start, *tag_ptr;
778 int tag_bytes = jbd_tag_bytes(jbd_fs);
779 tag_start = __tag_start;
782 /* Cut off the size of block tail storing checksum. */
783 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
784 JBD_FEATURE_INCOMPAT_CSUM_V2) ||
785 JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
786 JBD_FEATURE_INCOMPAT_CSUM_V3))
787 tag_tbl_size -= sizeof(struct jbd_block_tail);
789 while (tag_tbl_size) {
790 struct tag_info tag_info;
791 int rc = jbd_extract_block_tag(jbd_fs,
800 func(jbd_fs, tag_info.block, tag_info.uuid, arg);
802 /* Stop the iteration when we reach the last tag. */
803 if (tag_info.last_tag)
806 tag_ptr += tag_info.tag_bytes;
807 tag_tbl_size -= tag_info.tag_bytes;
811 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
816 uint32_t *iblock = arg;
817 ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
824 static struct revoke_entry *
825 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
827 struct revoke_entry tmp = {
831 return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
834 /**@brief Replay a block in a transaction.
835 * @param jbd_fs jbd filesystem
836 * @param block block address to be replayed.*/
837 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
839 uint8_t *uuid __unused,
843 struct replay_arg *arg = __arg;
844 struct recover_info *info = arg->info;
845 uint32_t *this_block = arg->this_block;
846 struct revoke_entry *revoke_entry;
847 struct ext4_block journal_block, ext4_block;
848 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
852 /* We replay this block only if the current transaction id
853 * is equal or greater than that in revoke entry.*/
854 revoke_entry = jbd_revoke_entry_lookup(info, block);
856 arg->this_trans_id < revoke_entry->trans_id)
860 "Replaying block in block_tag: %" PRIu64 "\n",
863 r = jbd_block_get(jbd_fs, &journal_block, *this_block);
867 /* We need special treatment for ext4 superblock. */
869 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
871 jbd_block_set(jbd_fs, &journal_block);
875 memcpy(ext4_block.data,
877 jbd_get32(&jbd_fs->sb, blocksize));
879 ext4_bcache_set_dirty(ext4_block.buf);
880 ext4_block_set(fs->bdev, &ext4_block);
882 uint16_t mount_count, state;
883 mount_count = ext4_get16(&fs->sb, mount_count);
884 state = ext4_get16(&fs->sb, state);
887 journal_block.data + EXT4_SUPERBLOCK_OFFSET,
888 EXT4_SUPERBLOCK_SIZE);
890 /* Mark system as mounted */
891 ext4_set16(&fs->sb, state, state);
892 r = ext4_sb_write(fs->bdev, &fs->sb);
896 /*Update mount count*/
897 ext4_set16(&fs->sb, mount_count, mount_count);
900 jbd_block_set(jbd_fs, &journal_block);
905 /**@brief Add block address to revoke tree, along with
906 * its transaction id.
907 * @param info journal replay info
908 * @param block block address to be replayed.*/
909 static void jbd_add_revoke_block_tags(struct recover_info *info,
912 struct revoke_entry *revoke_entry;
914 ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
915 /* If the revoke entry with respect to the block address
916 * exists already, update its transaction id.*/
917 revoke_entry = jbd_revoke_entry_lookup(info, block);
919 revoke_entry->trans_id = info->this_trans_id;
923 revoke_entry = jbd_alloc_revoke_entry();
924 ext4_assert(revoke_entry);
925 revoke_entry->block = block;
926 revoke_entry->trans_id = info->this_trans_id;
927 RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
932 static void jbd_destroy_revoke_tree(struct recover_info *info)
934 while (!RB_EMPTY(&info->revoke_root)) {
935 struct revoke_entry *revoke_entry =
936 RB_MIN(jbd_revoke, &info->revoke_root);
937 ext4_assert(revoke_entry);
938 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
939 jbd_free_revoke_entry(revoke_entry);
943 /* Make sure we wrap around the log correctly! */
944 #define wrap(sb, var) \
946 if (var >= jbd_get32((sb), maxlen)) \
947 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first)); \
950 #define ACTION_SCAN 0
951 #define ACTION_REVOKE 1
952 #define ACTION_RECOVER 2
954 /**@brief Add entries in a revoke block to revoke tree.
955 * @param jbd_fs jbd filesystem
956 * @param header revoke block header
957 * @param recover_info journal replay info*/
958 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
959 struct jbd_bhdr *header,
960 struct recover_info *info)
963 struct jbd_revoke_header *revoke_hdr =
964 (struct jbd_revoke_header *)header;
965 uint32_t i, nr_entries, record_len = 4;
967 /* If we are working on a 64bit jbd filesystem, */
968 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
969 JBD_FEATURE_INCOMPAT_64BIT))
972 nr_entries = (jbd_get32(revoke_hdr, count) -
973 sizeof(struct jbd_revoke_header)) /
976 blocks_entry = (char *)(revoke_hdr + 1);
978 for (i = 0;i < nr_entries;i++) {
979 if (record_len == 8) {
981 (uint64_t *)blocks_entry;
982 jbd_add_revoke_block_tags(info, to_be64(*blocks));
985 (uint32_t *)blocks_entry;
986 jbd_add_revoke_block_tags(info, to_be32(*blocks));
988 blocks_entry += record_len;
992 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
993 struct jbd_bhdr *header,
996 jbd_iterate_block_table(jbd_fs,
998 jbd_get32(&jbd_fs->sb, blocksize) -
999 sizeof(struct jbd_bhdr),
1000 jbd_display_block_tags,
1004 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
1005 struct jbd_bhdr *header,
1006 struct replay_arg *arg)
1008 jbd_iterate_block_table(jbd_fs,
1010 jbd_get32(&jbd_fs->sb, blocksize) -
1011 sizeof(struct jbd_bhdr),
1012 jbd_replay_block_tags,
1016 /**@brief The core routine of journal replay.
1017 * @param jbd_fs jbd filesystem
1018 * @param recover_info journal replay info
1019 * @param action action needed to be taken
1020 * @return standard error code*/
1021 static int jbd_iterate_log(struct jbd_fs *jbd_fs,
1022 struct recover_info *info,
1026 bool log_end = false;
1027 struct jbd_sb *sb = &jbd_fs->sb;
1028 uint32_t start_trans_id, this_trans_id;
1029 uint32_t start_block, this_block;
1031 /* We start iterating valid blocks in the whole journal.*/
1032 start_trans_id = this_trans_id = jbd_get32(sb, sequence);
1033 start_block = this_block = jbd_get32(sb, start);
1034 if (action == ACTION_SCAN)
1035 info->trans_cnt = 0;
1036 else if (!info->trans_cnt)
1039 ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
1043 struct ext4_block block;
1044 struct jbd_bhdr *header;
1045 /* If we are not scanning for the last
1046 * valid transaction in the journal,
1047 * we will stop when we reach the end of
1049 if (action != ACTION_SCAN)
1050 if (this_trans_id > info->last_trans_id) {
1055 r = jbd_block_get(jbd_fs, &block, this_block);
1059 header = (struct jbd_bhdr *)block.data;
1060 /* This block does not have a valid magic number,
1061 * so we have reached the end of the journal.*/
1062 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
1063 jbd_block_set(jbd_fs, &block);
1068 /* If the transaction id we found is not expected,
1069 * we may have reached the end of the journal.
1071 * If we are not scanning the journal, something
1072 * bad might have taken place. :-( */
1073 if (jbd_get32(header, sequence) != this_trans_id) {
1074 if (action != ACTION_SCAN)
1077 jbd_block_set(jbd_fs, &block);
1082 switch (jbd_get32(header, blocktype)) {
1083 case JBD_DESCRIPTOR_BLOCK:
1084 if (!jbd_verify_meta_csum(jbd_fs, header)) {
1086 DBG_WARN "Descriptor block checksum failed."
1087 "Journal block: %" PRIu32"\n",
1092 ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
1093 "trans_id: %" PRIu32"\n",
1094 this_block, this_trans_id);
1095 if (action == ACTION_RECOVER) {
1096 struct replay_arg replay_arg;
1097 replay_arg.info = info;
1098 replay_arg.this_block = &this_block;
1099 replay_arg.this_trans_id = this_trans_id;
1101 jbd_replay_descriptor_block(jbd_fs,
1102 header, &replay_arg);
1104 jbd_debug_descriptor_block(jbd_fs,
1105 header, &this_block);
1108 case JBD_COMMIT_BLOCK:
1109 if (!jbd_verify_commit_csum(jbd_fs,
1110 (struct jbd_commit_header *)header)) {
1112 DBG_WARN "Commit block checksum failed."
1113 "Journal block: %" PRIu32"\n",
1118 ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
1119 "trans_id: %" PRIu32"\n",
1120 this_block, this_trans_id);
1121 /* This is the end of a transaction,
1122 * we may now proceed to the next transaction.
1127 case JBD_REVOKE_BLOCK:
1128 if (!jbd_verify_meta_csum(jbd_fs, header)) {
1130 DBG_WARN "Revoke block checksum failed."
1131 "Journal block: %" PRIu32"\n",
1136 ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
1137 "trans_id: %" PRIu32"\n",
1138 this_block, this_trans_id);
1139 if (action == ACTION_REVOKE) {
1140 info->this_trans_id = this_trans_id;
1141 jbd_build_revoke_tree(jbd_fs,
1149 jbd_block_set(jbd_fs, &block);
1151 wrap(sb, this_block);
1152 if (this_block == start_block)
1156 ext4_dbg(DEBUG_JBD, "End of journal.\n");
1157 if (r == EOK && action == ACTION_SCAN) {
1158 /* We have finished scanning the journal. */
1159 info->start_trans_id = start_trans_id;
1160 if (this_trans_id > start_trans_id)
1161 info->last_trans_id = this_trans_id - 1;
1163 info->last_trans_id = this_trans_id;
1169 /**@brief Replay journal.
1170 * @param jbd_fs jbd filesystem
1171 * @return standard error code*/
1172 int jbd_recover(struct jbd_fs *jbd_fs)
1175 struct recover_info info;
1176 struct jbd_sb *sb = &jbd_fs->sb;
1180 RB_INIT(&info.revoke_root);
1182 r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
1186 r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
1190 r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
1192 /* If we successfully replay the journal,
1193 * clear EXT4_FINCOM_RECOVER flag on the
1194 * ext4 superblock, and set the start of
1196 uint32_t features_incompatible =
1197 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1198 features_incompatible);
1199 jbd_set32(&jbd_fs->sb, start, 0);
1200 features_incompatible &= ~EXT4_FINCOM_RECOVER;
1201 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1202 features_incompatible,
1203 features_incompatible);
1204 jbd_fs->dirty = true;
1205 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1206 &jbd_fs->inode_ref.fs->sb);
1208 jbd_destroy_revoke_tree(&info);
1212 static void jbd_journal_write_sb(struct jbd_journal *journal)
1214 struct jbd_fs *jbd_fs = journal->jbd_fs;
1215 jbd_set32(&jbd_fs->sb, start, journal->start);
1216 jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
1217 jbd_fs->dirty = true;
1220 /**@brief Start accessing the journal.
1221 * @param jbd_fs jbd filesystem
1222 * @param journal current journal session
1223 * @return standard error code*/
1224 int jbd_journal_start(struct jbd_fs *jbd_fs,
1225 struct jbd_journal *journal)
1228 uint32_t features_incompatible =
1229 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1230 features_incompatible);
1231 struct ext4_block block = EXT4_BLOCK_ZERO();
1232 features_incompatible |= EXT4_FINCOM_RECOVER;
1233 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1234 features_incompatible,
1235 features_incompatible);
1236 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1237 &jbd_fs->inode_ref.fs->sb);
1241 journal->first = jbd_get32(&jbd_fs->sb, first);
1242 journal->start = journal->first;
1243 journal->last = journal->first;
1244 journal->trans_id = 1;
1245 journal->alloc_trans_id = 1;
1247 journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
1249 r = jbd_block_get_noread(jbd_fs,
1253 memset(journal, 0, sizeof(struct jbd_journal));
1256 memset(block.data, 0, journal->block_size);
1257 ext4_bcache_set_dirty(block.buf);
1258 r = jbd_block_set(jbd_fs, &block);
1260 memset(journal, 0, sizeof(struct jbd_journal));
1264 TAILQ_INIT(&journal->trans_queue);
1265 TAILQ_INIT(&journal->cp_queue);
1266 RB_INIT(&journal->block_rec_root);
1267 journal->jbd_fs = jbd_fs;
1268 jbd_journal_write_sb(journal);
1269 return jbd_write_sb(jbd_fs);
1272 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1273 struct ext4_buf *buf __unused,
1277 static void jbd_journal_flush_trans(struct jbd_trans *trans)
1279 struct jbd_buf *jbd_buf, *tmp;
1280 struct jbd_journal *journal = trans->journal;
1281 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1282 void *tmp_data = malloc(journal->block_size);
1283 ext4_assert(tmp_data);
1285 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1287 struct ext4_buf *buf = jbd_buf->block_rec->buf;
1288 /* The buffer in memory is still dirty. */
1290 if (jbd_buf->block_rec->trans != trans) {
1292 struct ext4_block jbd_block = EXT4_BLOCK_ZERO();
1293 ext4_assert(ext4_block_get(fs->bdev,
1295 jbd_buf->jbd_lba) == EOK);
1296 memcpy(tmp_data, jbd_block.data,
1297 journal->block_size);
1298 ext4_block_set(fs->bdev, &jbd_block);
1299 r = ext4_blocks_set_direct(fs->bdev, tmp_data,
1301 jbd_trans_end_write(fs->bdev->bc, buf, r, jbd_buf);
1303 ext4_block_flush_buf(fs->bdev, buf);
1312 jbd_journal_skip_pure_revoke(struct jbd_journal *journal,
1313 struct jbd_trans *trans)
1315 journal->start = trans->start_iblock +
1316 trans->alloc_blocks;
1317 wrap(&journal->jbd_fs->sb, journal->start);
1318 journal->trans_id = trans->trans_id + 1;
1319 jbd_journal_free_trans(journal,
1321 jbd_journal_write_sb(journal);
1325 jbd_journal_purge_cp_trans(struct jbd_journal *journal,
1329 struct jbd_trans *trans;
1330 while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1331 if (!trans->data_cnt) {
1332 TAILQ_REMOVE(&journal->cp_queue,
1335 jbd_journal_skip_pure_revoke(journal, trans);
1337 if (trans->data_cnt ==
1338 trans->written_cnt) {
1340 trans->start_iblock +
1341 trans->alloc_blocks;
1342 wrap(&journal->jbd_fs->sb,
1345 trans->trans_id + 1;
1346 TAILQ_REMOVE(&journal->cp_queue,
1349 jbd_journal_free_trans(journal,
1352 jbd_journal_write_sb(journal);
1353 } else if (!flush) {
1355 trans->start_iblock;
1356 wrap(&journal->jbd_fs->sb,
1360 jbd_journal_write_sb(journal);
1363 jbd_journal_flush_trans(trans);
1370 /**@brief Stop accessing the journal.
1371 * @param journal current journal session
1372 * @return standard error code*/
1373 int jbd_journal_stop(struct jbd_journal *journal)
1376 struct jbd_fs *jbd_fs = journal->jbd_fs;
1377 uint32_t features_incompatible;
1379 /* Make sure that journalled content have reached
1381 jbd_journal_purge_cp_trans(journal, true, false);
1383 /* There should be no block record in this journal
1385 if (!RB_EMPTY(&journal->block_rec_root))
1387 DBG_WARN "There are still block records "
1388 "in this journal session!\n");
1390 features_incompatible =
1391 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1392 features_incompatible);
1393 features_incompatible &= ~EXT4_FINCOM_RECOVER;
1394 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1395 features_incompatible,
1396 features_incompatible);
1397 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1398 &jbd_fs->inode_ref.fs->sb);
1403 journal->trans_id = 0;
1404 jbd_journal_write_sb(journal);
1405 return jbd_write_sb(journal->jbd_fs);
1408 /**@brief Allocate a block in the journal.
1409 * @param journal current journal session
1410 * @param trans transaction
1411 * @return allocated block address*/
1412 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
1413 struct jbd_trans *trans)
1415 uint32_t start_block;
1417 start_block = journal->last++;
1418 trans->alloc_blocks++;
1419 wrap(&journal->jbd_fs->sb, journal->last);
1421 /* If there is no space left, flush all journalled
1422 * blocks to disk first.*/
1423 if (journal->last == journal->start)
1424 jbd_journal_purge_cp_trans(journal, true, false);
1429 /**@brief Allocate a new transaction
1430 * @param journal current journal session
1431 * @return transaction allocated*/
1433 jbd_journal_new_trans(struct jbd_journal *journal)
1435 struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
1439 /* We will assign a trans_id to this transaction,
1440 * once it has been committed.*/
1441 trans->journal = journal;
1442 trans->data_csum = EXT4_CRC32_INIT;
1444 TAILQ_INIT(&trans->buf_queue);
1448 /**@brief gain access to it before making any modications.
1449 * @param journal current journal session
1450 * @param trans transaction
1451 * @param block descriptor
1452 * @return standard error code.*/
1453 int jbd_trans_get_access(struct jbd_journal *journal,
1454 struct jbd_trans *trans,
1455 struct ext4_block *block)
1458 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1459 struct jbd_buf *jbd_buf = block->buf->end_write_arg;
1461 /* If the buffer has already been modified, we should
1462 * flush dirty data in this buffer to disk.*/
1463 if (ext4_bcache_test_flag(block->buf, BC_DIRTY) &&
1464 block->buf->end_write == jbd_trans_end_write) {
1465 ext4_assert(jbd_buf);
1466 if (jbd_buf->trans != trans)
1467 r = ext4_block_flush_buf(fs->bdev, block->buf);
1473 static struct jbd_block_rec *
1474 jbd_trans_block_rec_lookup(struct jbd_journal *journal,
1477 struct jbd_block_rec tmp = {
1481 return RB_FIND(jbd_block,
1482 &journal->block_rec_root,
1487 jbd_trans_change_ownership(struct jbd_block_rec *block_rec,
1488 struct jbd_trans *new_trans,
1489 struct ext4_buf *new_buf)
1491 LIST_REMOVE(block_rec, tbrec_node);
1492 /* Now this block record belongs to this transaction. */
1493 LIST_INSERT_HEAD(&new_trans->tbrec_list, block_rec, tbrec_node);
1494 block_rec->trans = new_trans;
1495 block_rec->buf = new_buf;
1498 static inline struct jbd_block_rec *
1499 jbd_trans_insert_block_rec(struct jbd_trans *trans,
1501 struct ext4_buf *buf)
1503 struct jbd_block_rec *block_rec;
1504 block_rec = jbd_trans_block_rec_lookup(trans->journal, lba);
1506 jbd_trans_change_ownership(block_rec, trans, buf);
1509 block_rec = calloc(1, sizeof(struct jbd_block_rec));
1513 block_rec->lba = lba;
1514 block_rec->buf = buf;
1515 block_rec->trans = trans;
1516 TAILQ_INIT(&block_rec->dirty_buf_queue);
1517 LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);
1518 RB_INSERT(jbd_block, &trans->journal->block_rec_root, block_rec);
1523 jbd_trans_finish_callback(struct jbd_journal *journal,
1524 const struct jbd_trans *trans,
1525 struct jbd_block_rec *block_rec,
1528 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1529 if (block_rec->trans != trans)
1533 struct jbd_buf *jbd_buf, *tmp;
1534 TAILQ_FOREACH_SAFE(jbd_buf,
1535 &block_rec->dirty_buf_queue,
1538 /* All we need is a fake ext4_buf. */
1539 struct ext4_buf buf;
1541 jbd_trans_end_write(fs->bdev->bc,
1547 struct jbd_buf *jbd_buf;
1548 struct ext4_block jbd_block = EXT4_BLOCK_ZERO(),
1549 block = EXT4_BLOCK_ZERO();
1550 jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,
1553 ext4_assert(ext4_block_get(fs->bdev,
1555 jbd_buf->jbd_lba) == EOK);
1556 ext4_assert(ext4_block_get_noread(fs->bdev,
1558 block_rec->lba) == EOK);
1559 memcpy(block.data, jbd_block.data,
1560 journal->block_size);
1562 jbd_trans_change_ownership(block_rec,
1563 jbd_buf->trans, block.buf);
1565 block.buf->end_write = jbd_trans_end_write;
1566 block.buf->end_write_arg = jbd_buf;
1568 ext4_bcache_set_flag(jbd_block.buf, BC_TMP);
1569 ext4_bcache_set_dirty(block.buf);
1571 ext4_block_set(fs->bdev, &jbd_block);
1572 ext4_block_set(fs->bdev, &block);
1579 jbd_trans_remove_block_rec(struct jbd_journal *journal,
1580 struct jbd_block_rec *block_rec,
1581 struct jbd_trans *trans)
1583 /* If this block record doesn't belong to this transaction,
1585 if (block_rec->trans == trans) {
1586 LIST_REMOVE(block_rec, tbrec_node);
1587 RB_REMOVE(jbd_block,
1588 &journal->block_rec_root,
1594 /**@brief Add block to a transaction and mark it dirty.
1595 * @param trans transaction
1596 * @param block block descriptor
1597 * @return standard error code*/
1598 int jbd_trans_set_block_dirty(struct jbd_trans *trans,
1599 struct ext4_block *block)
1601 struct jbd_buf *buf;
1603 struct jbd_block_rec *block_rec;
1604 if (block->buf->end_write == jbd_trans_end_write) {
1605 buf = block->buf->end_write_arg;
1606 if (buf && buf->trans == trans)
1609 buf = calloc(1, sizeof(struct jbd_buf));
1613 if ((block_rec = jbd_trans_insert_block_rec(trans,
1615 block->buf)) == NULL) {
1620 TAILQ_INSERT_TAIL(&block_rec->dirty_buf_queue,
1624 buf->block_rec = block_rec;
1626 buf->block = *block;
1627 ext4_bcache_inc_ref(block->buf);
1629 /* If the content reach the disk, notify us
1630 * so that we may do a checkpoint. */
1631 block->buf->end_write = jbd_trans_end_write;
1632 block->buf->end_write_arg = buf;
1635 TAILQ_INSERT_HEAD(&trans->buf_queue, buf, buf_node);
1637 ext4_bcache_set_dirty(block->buf);
1641 /**@brief Add block to be revoked to a transaction
1642 * @param trans transaction
1643 * @param lba logical block address
1644 * @return standard error code*/
1645 int jbd_trans_revoke_block(struct jbd_trans *trans,
1648 struct jbd_revoke_rec *rec =
1649 calloc(1, sizeof(struct jbd_revoke_rec));
1654 LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
1658 /**@brief Try to add block to be revoked to a transaction.
1659 * If @lba still remains in an transaction on checkpoint
1660 * queue, add @lba as a revoked block to the transaction.
1661 * @param trans transaction
1662 * @param lba logical block address
1663 * @return standard error code*/
1664 int jbd_trans_try_revoke_block(struct jbd_trans *trans,
1668 struct jbd_journal *journal = trans->journal;
1669 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1670 struct jbd_block_rec *block_rec =
1671 jbd_trans_block_rec_lookup(journal, lba);
1673 /* Make sure we don't flush any buffers belong to this transaction. */
1674 if (block_rec && block_rec->trans != trans) {
1675 /* If the buffer has not been flushed yet, flush it now. */
1676 if (block_rec->buf) {
1677 r = ext4_block_flush_buf(fs->bdev, block_rec->buf);
1683 jbd_trans_revoke_block(trans, lba);
1689 /**@brief Free a transaction
1690 * @param journal current journal session
1691 * @param trans transaction
1692 * @param abort discard all the modifications on the block?
1693 * @return standard error code*/
1694 void jbd_journal_free_trans(struct jbd_journal *journal,
1695 struct jbd_trans *trans,
1698 struct jbd_buf *jbd_buf, *tmp;
1699 struct jbd_revoke_rec *rec, *tmp2;
1700 struct jbd_block_rec *block_rec, *tmp3;
1701 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1702 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1704 block_rec = jbd_buf->block_rec;
1706 jbd_buf->block.buf->end_write = NULL;
1707 jbd_buf->block.buf->end_write_arg = NULL;
1708 ext4_bcache_clear_dirty(jbd_buf->block.buf);
1709 ext4_block_set(fs->bdev, &jbd_buf->block);
1712 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1715 jbd_trans_finish_callback(journal,
1719 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1722 LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1724 LIST_REMOVE(rec, revoke_node);
1727 LIST_FOREACH_SAFE(block_rec, &trans->tbrec_list, tbrec_node,
1729 jbd_trans_remove_block_rec(journal, block_rec, trans);
1735 /**@brief Write commit block for a transaction
1736 * @param trans transaction
1737 * @return standard error code*/
1738 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
1741 struct jbd_commit_header *header;
1742 uint32_t commit_iblock = 0;
1743 struct ext4_block commit_block;
1744 struct jbd_journal *journal = trans->journal;
1746 commit_iblock = jbd_journal_alloc_block(journal, trans);
1747 rc = jbd_block_get_noread(journal->jbd_fs,
1748 &commit_block, commit_iblock);
1752 header = (struct jbd_commit_header *)commit_block.data;
1753 jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
1754 jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
1755 jbd_set32(&header->header, sequence, trans->trans_id);
1757 if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1758 JBD_FEATURE_COMPAT_CHECKSUM)) {
1759 jbd_set32(header, chksum_type, JBD_CRC32_CHKSUM);
1760 jbd_set32(header, chksum_size, JBD_CRC32_CHKSUM_SIZE);
1761 jbd_set32(header, chksum[0], trans->data_csum);
1763 jbd_commit_csum_set(journal->jbd_fs, header);
1764 ext4_bcache_set_dirty(commit_block.buf);
1765 rc = jbd_block_set(journal->jbd_fs, &commit_block);
1772 /**@brief Write descriptor block for a transaction
1773 * @param journal current journal session
1774 * @param trans transaction
1775 * @return standard error code*/
1776 static int jbd_journal_prepare(struct jbd_journal *journal,
1777 struct jbd_trans *trans)
1779 int rc = EOK, i = 0;
1780 int32_t tag_tbl_size = 0;
1781 uint32_t desc_iblock = 0;
1782 uint32_t data_iblock = 0;
1783 char *tag_start = NULL, *tag_ptr = NULL;
1784 struct jbd_buf *jbd_buf, *tmp;
1785 struct ext4_block desc_block, data_block;
1786 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1787 uint32_t checksum = EXT4_CRC32_INIT;
1789 /* Try to remove any non-dirty buffers from the tail of
1791 TAILQ_FOREACH_REVERSE_SAFE(jbd_buf, &trans->buf_queue,
1792 jbd_trans_buf, buf_node, tmp) {
1793 /* We stop the iteration when we find a dirty buffer. */
1794 if (ext4_bcache_test_flag(jbd_buf->block.buf,
1798 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1802 jbd_buf->block.buf->end_write = NULL;
1803 jbd_buf->block.buf->end_write_arg = NULL;
1804 jbd_trans_finish_callback(journal,
1809 /* The buffer has not been modified, just release
1811 jbd_trans_remove_block_rec(journal,
1812 jbd_buf->block_rec, trans);
1815 ext4_block_set(fs->bdev, &jbd_buf->block);
1816 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1820 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {
1821 struct tag_info tag_info;
1822 bool uuid_exist = false;
1823 if (!ext4_bcache_test_flag(jbd_buf->block.buf,
1825 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1829 jbd_buf->block.buf->end_write = NULL;
1830 jbd_buf->block.buf->end_write_arg = NULL;
1831 jbd_trans_finish_callback(journal,
1836 /* The buffer has not been modified, just release
1838 jbd_trans_remove_block_rec(journal,
1839 jbd_buf->block_rec, trans);
1842 ext4_block_set(fs->bdev, &jbd_buf->block);
1843 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1847 checksum = jbd_block_csum(journal->jbd_fs,
1848 jbd_buf->block.data,
1853 struct jbd_bhdr *bhdr;
1854 desc_iblock = jbd_journal_alloc_block(journal, trans);
1855 rc = jbd_block_get_noread(journal->jbd_fs,
1856 &desc_block, desc_iblock);
1860 ext4_bcache_set_dirty(desc_block.buf);
1862 bhdr = (struct jbd_bhdr *)desc_block.data;
1863 jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1864 jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
1865 jbd_set32(bhdr, sequence, trans->trans_id);
1867 tag_start = (char *)(bhdr + 1);
1868 tag_ptr = tag_start;
1870 tag_tbl_size = journal->block_size -
1871 sizeof(struct jbd_bhdr);
1873 if (jbd_has_csum(&journal->jbd_fs->sb))
1874 tag_tbl_size -= sizeof(struct jbd_block_tail);
1876 if (!trans->start_iblock)
1877 trans->start_iblock = desc_iblock;
1880 tag_info.block = jbd_buf->block.lb_id;
1881 tag_info.uuid_exist = uuid_exist;
1882 if (i == trans->data_cnt - 1)
1883 tag_info.last_tag = true;
1885 tag_info.last_tag = false;
1887 tag_info.checksum = checksum;
1890 memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
1893 rc = jbd_write_block_tag(journal->jbd_fs,
1898 jbd_meta_csum_set(journal->jbd_fs,
1899 (struct jbd_bhdr *)desc_block.data);
1900 jbd_block_set(journal->jbd_fs, &desc_block);
1905 data_iblock = jbd_journal_alloc_block(journal, trans);
1906 rc = jbd_block_get_noread(journal->jbd_fs,
1907 &data_block, data_iblock);
1911 ext4_bcache_set_dirty(data_block.buf);
1913 memcpy(data_block.data, jbd_buf->block.data,
1914 journal->block_size);
1915 jbd_buf->jbd_lba = data_block.lb_id;
1917 rc = jbd_block_set(journal->jbd_fs, &data_block);
1921 tag_ptr += tag_info.tag_bytes;
1922 tag_tbl_size -= tag_info.tag_bytes;
1926 if (rc == EOK && desc_iblock) {
1927 jbd_meta_csum_set(journal->jbd_fs,
1928 (struct jbd_bhdr *)desc_block.data);
1929 trans->data_csum = checksum;
1930 jbd_block_set(journal->jbd_fs, &desc_block);
1936 /**@brief Write revoke block for a transaction
1937 * @param journal current journal session
1938 * @param trans transaction
1939 * @return standard error code*/
1941 jbd_journal_prepare_revoke(struct jbd_journal *journal,
1942 struct jbd_trans *trans)
1944 int rc = EOK, i = 0;
1945 int32_t tag_tbl_size = 0;
1946 uint32_t desc_iblock = 0;
1947 char *blocks_entry = NULL;
1948 struct jbd_revoke_rec *rec, *tmp;
1949 struct ext4_block desc_block;
1950 struct jbd_revoke_header *header = NULL;
1951 int32_t record_len = 4;
1953 if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1954 JBD_FEATURE_INCOMPAT_64BIT))
1957 LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1961 struct jbd_bhdr *bhdr;
1962 desc_iblock = jbd_journal_alloc_block(journal, trans);
1963 rc = jbd_block_get_noread(journal->jbd_fs,
1964 &desc_block, desc_iblock);
1969 ext4_bcache_set_dirty(desc_block.buf);
1971 bhdr = (struct jbd_bhdr *)desc_block.data;
1972 jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1973 jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
1974 jbd_set32(bhdr, sequence, trans->trans_id);
1976 header = (struct jbd_revoke_header *)bhdr;
1977 blocks_entry = (char *)(header + 1);
1978 tag_tbl_size = journal->block_size -
1979 sizeof(struct jbd_revoke_header);
1981 if (jbd_has_csum(&journal->jbd_fs->sb))
1982 tag_tbl_size -= sizeof(struct jbd_block_tail);
1984 if (!trans->start_iblock)
1985 trans->start_iblock = desc_iblock;
1989 if (tag_tbl_size < record_len) {
1990 jbd_set32(header, count,
1991 journal->block_size - tag_tbl_size);
1992 jbd_meta_csum_set(journal->jbd_fs,
1993 (struct jbd_bhdr *)desc_block.data);
1994 jbd_block_set(journal->jbd_fs, &desc_block);
1999 if (record_len == 8) {
2001 (uint64_t *)blocks_entry;
2002 *blocks = to_be64(rec->lba);
2005 (uint32_t *)blocks_entry;
2006 *blocks = to_be32((uint32_t)rec->lba);
2008 blocks_entry += record_len;
2009 tag_tbl_size -= record_len;
2013 if (rc == EOK && desc_iblock) {
2015 jbd_set32(header, count,
2016 journal->block_size - tag_tbl_size);
2018 jbd_meta_csum_set(journal->jbd_fs,
2019 (struct jbd_bhdr *)desc_block.data);
2020 jbd_block_set(journal->jbd_fs, &desc_block);
2026 /**@brief Put references of block descriptors in a transaction.
2027 * @param journal current journal session
2028 * @param trans transaction*/
2029 void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
2031 struct jbd_buf *jbd_buf, *tmp;
2032 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
2033 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
2035 struct ext4_block block = jbd_buf->block;
2036 ext4_block_set(fs->bdev, &block);
2040 /**@brief Update the start block of the journal when
2041 * all the contents in a transaction reach the disk.*/
2042 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
2043 struct ext4_buf *buf,
2047 struct jbd_buf *jbd_buf = arg;
2048 struct jbd_trans *trans = jbd_buf->trans;
2049 struct jbd_block_rec *block_rec = jbd_buf->block_rec;
2050 struct jbd_journal *journal = trans->journal;
2051 bool first_in_queue =
2052 trans == TAILQ_FIRST(&journal->cp_queue);
2056 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
2057 TAILQ_REMOVE(&block_rec->dirty_buf_queue,
2061 jbd_trans_finish_callback(journal,
2065 if (block_rec->trans == trans) {
2066 block_rec->buf = NULL;
2067 /* Clear the end_write and end_write_arg fields. */
2068 buf->end_write = NULL;
2069 buf->end_write_arg = NULL;
2074 trans->written_cnt++;
2075 if (trans->written_cnt == trans->data_cnt) {
2076 /* If it is the first transaction on checkpoint queue,
2077 * we will shift the start of the journal to the next
2078 * transaction, and remove subsequent written
2079 * transactions from checkpoint queue until we find
2080 * an unwritten one. */
2081 if (first_in_queue) {
2082 journal->start = trans->start_iblock +
2083 trans->alloc_blocks;
2084 wrap(&journal->jbd_fs->sb, journal->start);
2085 journal->trans_id = trans->trans_id + 1;
2086 TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
2087 jbd_journal_free_trans(journal, trans, false);
2089 jbd_journal_purge_cp_trans(journal, false, true);
2090 jbd_journal_write_sb(journal);
2091 jbd_write_sb(journal->jbd_fs);
2096 /**@brief Commit a transaction to the journal immediately.
2097 * @param journal current journal session
2098 * @param trans transaction
2099 * @return standard error code*/
2100 int jbd_journal_commit_trans(struct jbd_journal *journal,
2101 struct jbd_trans *trans)
2104 uint32_t last = journal->last;
2106 trans->trans_id = journal->alloc_trans_id;
2107 rc = jbd_journal_prepare(journal, trans);
2111 rc = jbd_journal_prepare_revoke(journal, trans);
2115 if (TAILQ_EMPTY(&trans->buf_queue) &&
2116 LIST_EMPTY(&trans->revoke_list)) {
2117 /* Since there are no entries in both buffer list
2118 * and revoke entry list, we do not consider trans as
2119 * complete transaction and just return EOK.*/
2120 jbd_journal_free_trans(journal, trans, false);
2124 rc = jbd_trans_write_commit_block(trans);
2128 journal->alloc_trans_id++;
2129 if (TAILQ_EMPTY(&journal->cp_queue)) {
2130 if (trans->data_cnt) {
2131 journal->start = trans->start_iblock;
2132 wrap(&journal->jbd_fs->sb, journal->start);
2133 journal->trans_id = trans->trans_id;
2134 jbd_journal_write_sb(journal);
2135 jbd_write_sb(journal->jbd_fs);
2136 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
2138 jbd_journal_cp_trans(journal, trans);
2140 journal->start = trans->start_iblock +
2141 trans->alloc_blocks;
2142 wrap(&journal->jbd_fs->sb, journal->start);
2143 journal->trans_id = trans->trans_id + 1;
2144 jbd_journal_write_sb(journal);
2145 jbd_journal_free_trans(journal, trans, false);
2148 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
2150 if (trans->data_cnt)
2151 jbd_journal_cp_trans(journal, trans);
2156 journal->last = last;
2157 jbd_journal_free_trans(journal, trans, true);