2 * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3 * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 /** @addtogroup lwext4
34 * @file ext4_journal.c
35 * @brief Journal handle functions
38 #include "ext4_config.h"
39 #include "ext4_types.h"
40 #include "ext4_misc.h"
41 #include "ext4_errno.h"
42 #include "ext4_debug.h"
45 #include "ext4_super.h"
46 #include "ext4_journal.h"
47 #include "ext4_blockdev.h"
48 #include "ext4_crc32.h"
49 #include "ext4_journal.h"
54 /**@brief Revoke entry during journal replay.*/
56 /**@brief Block number not to be replayed.*/
59 /**@brief For any transaction id smaller
60 * than trans_id, records of @block
61 * in those transactions should not
65 /**@brief Revoke tree node.*/
66 RB_ENTRY(revoke_entry) revoke_node;
69 /**@brief Valid journal replay information.*/
71 /**@brief Starting transaction id.*/
72 uint32_t start_trans_id;
74 /**@brief Ending transaction id.*/
75 uint32_t last_trans_id;
77 /**@brief Used as internal argument.*/
78 uint32_t this_trans_id;
80 /**@brief No of transactions went through.*/
83 /**@brief RB-Tree storing revoke entries.*/
84 RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
87 /**@brief Journal replay internal arguments.*/
89 /**@brief Journal replay information.*/
90 struct recover_info *info;
92 /**@brief Current block we are on.*/
95 /**@brief Current trans_id we are on.*/
96 uint32_t this_trans_id;
99 /* Make sure we wrap around the log correctly! */
100 #define wrap(sb, var) \
102 if (var >= jbd_get32((sb), maxlen)) \
103 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first)); \
108 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
110 if (a->block > b->block)
112 else if (a->block < b->block)
118 jbd_block_rec_cmp(struct jbd_block_rec *a, struct jbd_block_rec *b)
122 else if (a->lba < b->lba)
127 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
128 jbd_revoke_entry_cmp, static inline)
129 RB_GENERATE_INTERNAL(jbd_block, jbd_block_rec, block_rec_node,
130 jbd_block_rec_cmp, static inline)
132 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
133 #define jbd_free_revoke_entry(addr) free(addr)
135 static int jbd_has_csum(struct jbd_sb *jbd_sb)
137 if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V2))
140 if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V3))
146 #if CONFIG_META_CSUM_ENABLE
147 static uint32_t jbd_sb_csum(struct jbd_sb *jbd_sb)
149 uint32_t checksum = 0;
151 if (jbd_has_csum(jbd_sb)) {
152 uint32_t orig_checksum = jbd_sb->checksum;
153 jbd_set32(jbd_sb, checksum, 0);
154 /* Calculate crc32c checksum against tho whole superblock */
155 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_sb,
156 JBD_SUPERBLOCK_SIZE);
157 jbd_sb->checksum = orig_checksum;
162 #define jbd_sb_csum(...) 0
165 static void jbd_sb_csum_set(struct jbd_sb *jbd_sb)
167 if (!jbd_has_csum(jbd_sb))
170 jbd_set32(jbd_sb, checksum, jbd_sb_csum(jbd_sb));
173 #if CONFIG_META_CSUM_ENABLE
175 jbd_verify_sb_csum(struct jbd_sb *jbd_sb)
177 if (!jbd_has_csum(jbd_sb))
180 return jbd_sb_csum(jbd_sb) == jbd_get32(jbd_sb, checksum);
183 #define jbd_verify_sb_csum(...) true
186 #if CONFIG_META_CSUM_ENABLE
187 static uint32_t jbd_meta_csum(struct jbd_fs *jbd_fs,
188 struct jbd_bhdr *bhdr)
190 uint32_t checksum = 0;
192 if (jbd_has_csum(&jbd_fs->sb)) {
193 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
194 struct jbd_block_tail *tail =
195 (struct jbd_block_tail *)((char *)bhdr + block_size -
196 sizeof(struct jbd_block_tail));
197 uint32_t orig_checksum = tail->checksum;
200 /* First calculate crc32c checksum against fs uuid */
201 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
202 sizeof(jbd_fs->sb.uuid));
203 /* Calculate crc32c checksum against tho whole block */
204 checksum = ext4_crc32c(checksum, bhdr,
206 tail->checksum = orig_checksum;
211 #define jbd_meta_csum(...) 0
214 static void jbd_meta_csum_set(struct jbd_fs *jbd_fs,
215 struct jbd_bhdr *bhdr)
217 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
218 struct jbd_block_tail *tail = (struct jbd_block_tail *)
219 ((char *)bhdr + block_size -
220 sizeof(struct jbd_block_tail));
221 if (!jbd_has_csum(&jbd_fs->sb))
224 tail->checksum = to_be32(jbd_meta_csum(jbd_fs, bhdr));
227 #if CONFIG_META_CSUM_ENABLE
229 jbd_verify_meta_csum(struct jbd_fs *jbd_fs,
230 struct jbd_bhdr *bhdr)
232 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
233 struct jbd_block_tail *tail = (struct jbd_block_tail *)
234 ((char *)bhdr + block_size -
235 sizeof(struct jbd_block_tail));
236 if (!jbd_has_csum(&jbd_fs->sb))
239 return jbd_meta_csum(jbd_fs, bhdr) == to_be32(tail->checksum);
242 #define jbd_verify_meta_csum(...) true
245 #if CONFIG_META_CSUM_ENABLE
246 static uint32_t jbd_commit_csum(struct jbd_fs *jbd_fs,
247 struct jbd_commit_header *header)
249 uint32_t checksum = 0;
251 if (jbd_has_csum(&jbd_fs->sb)) {
252 uint32_t orig_checksum_type = header->chksum_type,
253 orig_checksum_size = header->chksum_size,
254 orig_checksum = header->chksum[0];
255 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
256 header->chksum_type = 0;
257 header->chksum_size = 0;
258 header->chksum[0] = 0;
260 /* First calculate crc32c checksum against fs uuid */
261 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
262 sizeof(jbd_fs->sb.uuid));
263 /* Calculate crc32c checksum against tho whole block */
264 checksum = ext4_crc32c(checksum, header,
267 header->chksum_type = orig_checksum_type;
268 header->chksum_size = orig_checksum_size;
269 header->chksum[0] = orig_checksum;
274 #define jbd_commit_csum(...) 0
277 static void jbd_commit_csum_set(struct jbd_fs *jbd_fs,
278 struct jbd_commit_header *header)
280 if (!jbd_has_csum(&jbd_fs->sb))
283 header->chksum_type = 0;
284 header->chksum_size = 0;
285 header->chksum[0] = jbd_commit_csum(jbd_fs, header);
288 #if CONFIG_META_CSUM_ENABLE
289 static bool jbd_verify_commit_csum(struct jbd_fs *jbd_fs,
290 struct jbd_commit_header *header)
292 if (!jbd_has_csum(&jbd_fs->sb))
295 return header->chksum[0] == to_be32(jbd_commit_csum(jbd_fs,
299 #define jbd_verify_commit_csum(...) true
302 #if CONFIG_META_CSUM_ENABLE
304 * NOTE: We only make use of @csum parameter when
305 * JBD_FEATURE_COMPAT_CHECKSUM is enabled.
307 static uint32_t jbd_block_csum(struct jbd_fs *jbd_fs, const void *buf,
311 uint32_t checksum = 0;
313 if (jbd_has_csum(&jbd_fs->sb)) {
314 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
315 /* First calculate crc32c checksum against fs uuid */
316 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
317 sizeof(jbd_fs->sb.uuid));
318 /* Then calculate crc32c checksum against sequence no. */
319 checksum = ext4_crc32c(checksum, &sequence,
321 /* Calculate crc32c checksum against tho whole block */
322 checksum = ext4_crc32c(checksum, buf,
324 } else if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
325 JBD_FEATURE_COMPAT_CHECKSUM)) {
326 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
327 /* Calculate crc32c checksum against tho whole block */
328 checksum = ext4_crc32(csum, buf,
334 #define jbd_block_csum(...) 0
337 static void jbd_block_tag_csum_set(struct jbd_fs *jbd_fs, void *__tag,
340 int ver = jbd_has_csum(&jbd_fs->sb);
345 struct jbd_block_tag *tag = __tag;
346 tag->checksum = (uint16_t)to_be32(checksum);
348 struct jbd_block_tag3 *tag = __tag;
349 tag->checksum = to_be32(checksum);
353 /**@brief Write jbd superblock to disk.
354 * @param jbd_fs jbd filesystem
355 * @param s jbd superblock
356 * @return standard error code*/
357 static int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
360 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
363 rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
368 offset = fblock * ext4_sb_get_block_size(&fs->sb);
369 return ext4_block_writebytes(fs->bdev, offset, s,
370 EXT4_SUPERBLOCK_SIZE);
373 /**@brief Read jbd superblock from disk.
374 * @param jbd_fs jbd filesystem
375 * @param s jbd superblock
376 * @return standard error code*/
377 static int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
380 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
383 rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
387 offset = fblock * ext4_sb_get_block_size(&fs->sb);
388 return ext4_block_readbytes(fs->bdev, offset, s,
389 EXT4_SUPERBLOCK_SIZE);
392 /**@brief Verify jbd superblock.
393 * @param sb jbd superblock
394 * @return true if jbd superblock is valid */
395 static bool jbd_verify_sb(struct jbd_sb *sb)
397 struct jbd_bhdr *header = &sb->header;
398 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
401 if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
402 jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
405 return jbd_verify_sb_csum(sb);
408 /**@brief Write back dirty jbd superblock to disk.
409 * @param jbd_fs jbd filesystem
410 * @return standard error code*/
411 static int jbd_write_sb(struct jbd_fs *jbd_fs)
415 rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
419 jbd_fs->dirty = false;
424 /**@brief Get reference to jbd filesystem.
425 * @param fs Filesystem to load journal of
426 * @param jbd_fs jbd filesystem
427 * @return standard error code*/
428 int jbd_get_fs(struct ext4_fs *fs,
429 struct jbd_fs *jbd_fs)
432 uint32_t journal_ino;
434 memset(jbd_fs, 0, sizeof(struct jbd_fs));
435 /* See if there is journal inode on this filesystem.*/
436 /* FIXME: detection on existance ofbkejournal bdev is
438 journal_ino = ext4_get32(&fs->sb, journal_inode_number);
440 rc = ext4_fs_get_inode_ref(fs,
444 memset(jbd_fs, 0, sizeof(struct jbd_fs));
447 rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
449 memset(jbd_fs, 0, sizeof(struct jbd_fs));
450 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
453 if (!jbd_verify_sb(&jbd_fs->sb)) {
454 memset(jbd_fs, 0, sizeof(struct jbd_fs));
455 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
462 /**@brief Put reference of jbd filesystem.
463 * @param jbd_fs jbd filesystem
464 * @return standard error code*/
465 int jbd_put_fs(struct jbd_fs *jbd_fs)
468 rc = jbd_write_sb(jbd_fs);
470 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
474 /**@brief Data block lookup helper.
475 * @param jbd_fs jbd filesystem
476 * @param iblock block index
477 * @param fblock logical block address
478 * @return standard error code*/
479 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
481 ext4_fsblk_t *fblock)
483 int rc = ext4_fs_get_inode_dblk_idx(
491 /**@brief jbd block get function (through cache).
492 * @param jbd_fs jbd filesystem
493 * @param block block descriptor
494 * @param fblock jbd logical block address
495 * @return standard error code*/
496 static int jbd_block_get(struct jbd_fs *jbd_fs,
497 struct ext4_block *block,
500 /* TODO: journal device. */
502 ext4_lblk_t iblock = (ext4_lblk_t)fblock;
504 /* Lookup the logical block address of
506 rc = jbd_inode_bmap(jbd_fs, iblock,
511 struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
512 rc = ext4_block_get(bdev, block, fblock);
514 /* If succeeded, mark buffer as BC_FLUSH to indicate
515 * that data should be written to disk immediately.*/
517 ext4_bcache_set_flag(block->buf, BC_FLUSH);
518 /* As we don't want to occupy too much space
519 * in block cache, we set this buffer BC_TMP.*/
520 ext4_bcache_set_flag(block->buf, BC_TMP);
526 /**@brief jbd block get function (through cache, don't read).
527 * @param jbd_fs jbd filesystem
528 * @param block block descriptor
529 * @param fblock jbd logical block address
530 * @return standard error code*/
531 static int jbd_block_get_noread(struct jbd_fs *jbd_fs,
532 struct ext4_block *block,
535 /* TODO: journal device. */
537 ext4_lblk_t iblock = (ext4_lblk_t)fblock;
538 rc = jbd_inode_bmap(jbd_fs, iblock,
543 struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
544 rc = ext4_block_get_noread(bdev, block, fblock);
546 ext4_bcache_set_flag(block->buf, BC_FLUSH);
551 /**@brief jbd block set procedure (through cache).
552 * @param jbd_fs jbd filesystem
553 * @param block block descriptor
554 * @return standard error code*/
555 static int jbd_block_set(struct jbd_fs *jbd_fs,
556 struct ext4_block *block)
558 return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
562 /**@brief helper functions to calculate
563 * block tag size, not including UUID part.
564 * @param jbd_fs jbd filesystem
565 * @return tag size in bytes*/
566 static int jbd_tag_bytes(struct jbd_fs *jbd_fs)
570 /* It is very easy to deal with the case which
571 * JBD_FEATURE_INCOMPAT_CSUM_V3 is enabled.*/
572 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
573 JBD_FEATURE_INCOMPAT_CSUM_V3))
574 return sizeof(struct jbd_block_tag3);
576 size = sizeof(struct jbd_block_tag);
578 /* If JBD_FEATURE_INCOMPAT_CSUM_V2 is enabled,
579 * add 2 bytes to size.*/
580 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
581 JBD_FEATURE_INCOMPAT_CSUM_V2))
582 size += sizeof(uint16_t);
584 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
585 JBD_FEATURE_INCOMPAT_64BIT))
588 /* If block number is 4 bytes in size,
589 * minus 4 bytes from size */
590 return size - sizeof(uint32_t);
593 /**@brief Tag information. */
595 /**@brief Tag size in bytes, including UUID part.*/
598 /**@brief block number stored in this tag.*/
601 /**@brief whether UUID part exists or not.*/
604 /**@brief UUID content if UUID part exists.*/
605 uint8_t uuid[UUID_SIZE];
607 /**@brief Is this the last tag? */
610 /**@brief crc32c checksum. */
614 /**@brief Extract information from a block tag.
615 * @param __tag pointer to the block tag
616 * @param tag_bytes block tag size of this jbd filesystem
617 * @param remaining size in buffer containing the block tag
618 * @param tag_info information of this tag.
619 * @return EOK when succeed, otherwise return EINVAL.*/
621 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
624 int32_t remain_buf_size,
625 struct tag_info *tag_info)
628 tag_info->tag_bytes = tag_bytes;
629 tag_info->uuid_exist = false;
630 tag_info->last_tag = false;
632 /* See whether it is possible to hold a valid block tag.*/
633 if (remain_buf_size - tag_bytes < 0)
636 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
637 JBD_FEATURE_INCOMPAT_CSUM_V3)) {
638 struct jbd_block_tag3 *tag = __tag;
639 tag_info->block = jbd_get32(tag, blocknr);
640 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
641 JBD_FEATURE_INCOMPAT_64BIT))
643 (uint64_t)jbd_get32(tag, blocknr_high) << 32;
645 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
648 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
649 /* See whether it is possible to hold UUID part.*/
650 if (remain_buf_size - tag_bytes < UUID_SIZE)
653 uuid_start = (char *)tag + tag_bytes;
654 tag_info->uuid_exist = true;
655 tag_info->tag_bytes += UUID_SIZE;
656 memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
659 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
660 tag_info->last_tag = true;
663 struct jbd_block_tag *tag = __tag;
664 tag_info->block = jbd_get32(tag, blocknr);
665 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
666 JBD_FEATURE_INCOMPAT_64BIT))
668 (uint64_t)jbd_get32(tag, blocknr_high) << 32;
670 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
673 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
674 /* See whether it is possible to hold UUID part.*/
675 if (remain_buf_size - tag_bytes < UUID_SIZE)
678 uuid_start = (char *)tag + tag_bytes;
679 tag_info->uuid_exist = true;
680 tag_info->tag_bytes += UUID_SIZE;
681 memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
684 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
685 tag_info->last_tag = true;
691 /**@brief Write information to a block tag.
692 * @param __tag pointer to the block tag
693 * @param remaining size in buffer containing the block tag
694 * @param tag_info information of this tag.
695 * @return EOK when succeed, otherwise return EINVAL.*/
697 jbd_write_block_tag(struct jbd_fs *jbd_fs,
699 int32_t remain_buf_size,
700 struct tag_info *tag_info)
703 int tag_bytes = jbd_tag_bytes(jbd_fs);
705 tag_info->tag_bytes = tag_bytes;
707 /* See whether it is possible to hold a valid block tag.*/
708 if (remain_buf_size - tag_bytes < 0)
711 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
712 JBD_FEATURE_INCOMPAT_CSUM_V3)) {
713 struct jbd_block_tag3 *tag = __tag;
714 memset(tag, 0, sizeof(struct jbd_block_tag3));
715 jbd_set32(tag, blocknr, (uint32_t)tag_info->block);
716 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
717 JBD_FEATURE_INCOMPAT_64BIT))
718 jbd_set32(tag, blocknr_high, tag_info->block >> 32);
720 if (tag_info->uuid_exist) {
721 /* See whether it is possible to hold UUID part.*/
722 if (remain_buf_size - tag_bytes < UUID_SIZE)
725 uuid_start = (char *)tag + tag_bytes;
726 tag_info->tag_bytes += UUID_SIZE;
727 memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
729 jbd_set32(tag, flags,
730 jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
732 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
734 if (tag_info->last_tag)
735 jbd_set32(tag, flags,
736 jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
739 struct jbd_block_tag *tag = __tag;
740 memset(tag, 0, sizeof(struct jbd_block_tag));
741 jbd_set32(tag, blocknr, (uint32_t)tag_info->block);
742 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
743 JBD_FEATURE_INCOMPAT_64BIT))
744 jbd_set32(tag, blocknr_high, tag_info->block >> 32);
746 if (tag_info->uuid_exist) {
747 /* See whether it is possible to hold UUID part.*/
748 if (remain_buf_size - tag_bytes < UUID_SIZE)
751 uuid_start = (char *)tag + tag_bytes;
752 tag_info->tag_bytes += UUID_SIZE;
753 memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
755 jbd_set16(tag, flags,
756 jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
758 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
760 if (tag_info->last_tag)
761 jbd_set16(tag, flags,
762 jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
768 /**@brief Iterate all block tags in a block.
769 * @param jbd_fs jbd filesystem
770 * @param __tag_start pointer to the block
771 * @param tag_tbl_size size of the block
772 * @param func callback routine to indicate that
773 * a block tag is found
774 * @param arg additional argument to be passed to func */
776 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
778 int32_t tag_tbl_size,
779 void (*func)(struct jbd_fs * jbd_fs,
785 char *tag_start, *tag_ptr;
786 int tag_bytes = jbd_tag_bytes(jbd_fs);
787 tag_start = __tag_start;
790 /* Cut off the size of block tail storing checksum. */
791 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
792 JBD_FEATURE_INCOMPAT_CSUM_V2) ||
793 JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
794 JBD_FEATURE_INCOMPAT_CSUM_V3))
795 tag_tbl_size -= sizeof(struct jbd_block_tail);
797 while (tag_tbl_size) {
798 struct tag_info tag_info;
799 int rc = jbd_extract_block_tag(jbd_fs,
808 func(jbd_fs, tag_info.block, tag_info.uuid, arg);
810 /* Stop the iteration when we reach the last tag. */
811 if (tag_info.last_tag)
814 tag_ptr += tag_info.tag_bytes;
815 tag_tbl_size -= tag_info.tag_bytes;
819 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
824 uint32_t *iblock = arg;
825 ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
827 wrap(&jbd_fs->sb, *iblock);
833 static struct revoke_entry *
834 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
836 struct revoke_entry tmp = {
840 return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
843 /**@brief Replay a block in a transaction.
844 * @param jbd_fs jbd filesystem
845 * @param block block address to be replayed.*/
846 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
848 uint8_t *uuid __unused,
852 struct replay_arg *arg = __arg;
853 struct recover_info *info = arg->info;
854 uint32_t *this_block = arg->this_block;
855 struct revoke_entry *revoke_entry;
856 struct ext4_block journal_block, ext4_block;
857 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
860 wrap(&jbd_fs->sb, *this_block);
862 /* We replay this block only if the current transaction id
863 * is equal or greater than that in revoke entry.*/
864 revoke_entry = jbd_revoke_entry_lookup(info, block);
866 arg->this_trans_id < revoke_entry->trans_id)
870 "Replaying block in block_tag: %" PRIu64 "\n",
873 r = jbd_block_get(jbd_fs, &journal_block, *this_block);
877 /* We need special treatment for ext4 superblock. */
879 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
881 jbd_block_set(jbd_fs, &journal_block);
885 memcpy(ext4_block.data,
887 jbd_get32(&jbd_fs->sb, blocksize));
889 ext4_bcache_set_dirty(ext4_block.buf);
890 ext4_block_set(fs->bdev, &ext4_block);
892 uint16_t mount_count, state;
893 mount_count = ext4_get16(&fs->sb, mount_count);
894 state = ext4_get16(&fs->sb, state);
897 journal_block.data + EXT4_SUPERBLOCK_OFFSET,
898 EXT4_SUPERBLOCK_SIZE);
900 /* Mark system as mounted */
901 ext4_set16(&fs->sb, state, state);
902 r = ext4_sb_write(fs->bdev, &fs->sb);
906 /*Update mount count*/
907 ext4_set16(&fs->sb, mount_count, mount_count);
910 jbd_block_set(jbd_fs, &journal_block);
915 /**@brief Add block address to revoke tree, along with
916 * its transaction id.
917 * @param info journal replay info
918 * @param block block address to be replayed.*/
919 static void jbd_add_revoke_block_tags(struct recover_info *info,
922 struct revoke_entry *revoke_entry;
924 ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
925 /* If the revoke entry with respect to the block address
926 * exists already, update its transaction id.*/
927 revoke_entry = jbd_revoke_entry_lookup(info, block);
929 revoke_entry->trans_id = info->this_trans_id;
933 revoke_entry = jbd_alloc_revoke_entry();
934 ext4_assert(revoke_entry);
935 revoke_entry->block = block;
936 revoke_entry->trans_id = info->this_trans_id;
937 RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
942 static void jbd_destroy_revoke_tree(struct recover_info *info)
944 while (!RB_EMPTY(&info->revoke_root)) {
945 struct revoke_entry *revoke_entry =
946 RB_MIN(jbd_revoke, &info->revoke_root);
947 ext4_assert(revoke_entry);
948 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
949 jbd_free_revoke_entry(revoke_entry);
954 #define ACTION_SCAN 0
955 #define ACTION_REVOKE 1
956 #define ACTION_RECOVER 2
958 /**@brief Add entries in a revoke block to revoke tree.
959 * @param jbd_fs jbd filesystem
960 * @param header revoke block header
961 * @param recover_info journal replay info*/
962 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
963 struct jbd_bhdr *header,
964 struct recover_info *info)
967 struct jbd_revoke_header *revoke_hdr =
968 (struct jbd_revoke_header *)header;
969 uint32_t i, nr_entries, record_len = 4;
971 /* If we are working on a 64bit jbd filesystem, */
972 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
973 JBD_FEATURE_INCOMPAT_64BIT))
976 nr_entries = (jbd_get32(revoke_hdr, count) -
977 sizeof(struct jbd_revoke_header)) /
980 blocks_entry = (char *)(revoke_hdr + 1);
982 for (i = 0;i < nr_entries;i++) {
983 if (record_len == 8) {
985 (uint64_t *)blocks_entry;
986 jbd_add_revoke_block_tags(info, to_be64(*blocks));
989 (uint32_t *)blocks_entry;
990 jbd_add_revoke_block_tags(info, to_be32(*blocks));
992 blocks_entry += record_len;
996 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
997 struct jbd_bhdr *header,
1000 jbd_iterate_block_table(jbd_fs,
1002 jbd_get32(&jbd_fs->sb, blocksize) -
1003 sizeof(struct jbd_bhdr),
1004 jbd_display_block_tags,
1008 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
1009 struct jbd_bhdr *header,
1010 struct replay_arg *arg)
1012 jbd_iterate_block_table(jbd_fs,
1014 jbd_get32(&jbd_fs->sb, blocksize) -
1015 sizeof(struct jbd_bhdr),
1016 jbd_replay_block_tags,
1020 /**@brief The core routine of journal replay.
1021 * @param jbd_fs jbd filesystem
1022 * @param recover_info journal replay info
1023 * @param action action needed to be taken
1024 * @return standard error code*/
1025 static int jbd_iterate_log(struct jbd_fs *jbd_fs,
1026 struct recover_info *info,
1030 bool log_end = false;
1031 struct jbd_sb *sb = &jbd_fs->sb;
1032 uint32_t start_trans_id, this_trans_id;
1033 uint32_t start_block, this_block;
1035 /* We start iterating valid blocks in the whole journal.*/
1036 start_trans_id = this_trans_id = jbd_get32(sb, sequence);
1037 start_block = this_block = jbd_get32(sb, start);
1038 if (action == ACTION_SCAN)
1039 info->trans_cnt = 0;
1040 else if (!info->trans_cnt)
1043 ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
1047 struct ext4_block block;
1048 struct jbd_bhdr *header;
1049 /* If we are not scanning for the last
1050 * valid transaction in the journal,
1051 * we will stop when we reach the end of
1053 if (action != ACTION_SCAN)
1054 if (this_trans_id > info->last_trans_id) {
1059 r = jbd_block_get(jbd_fs, &block, this_block);
1063 header = (struct jbd_bhdr *)block.data;
1064 /* This block does not have a valid magic number,
1065 * so we have reached the end of the journal.*/
1066 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
1067 jbd_block_set(jbd_fs, &block);
1072 /* If the transaction id we found is not expected,
1073 * we may have reached the end of the journal.
1075 * If we are not scanning the journal, something
1076 * bad might have taken place. :-( */
1077 if (jbd_get32(header, sequence) != this_trans_id) {
1078 if (action != ACTION_SCAN)
1081 jbd_block_set(jbd_fs, &block);
1086 switch (jbd_get32(header, blocktype)) {
1087 case JBD_DESCRIPTOR_BLOCK:
1088 if (!jbd_verify_meta_csum(jbd_fs, header)) {
1090 DBG_WARN "Descriptor block checksum failed."
1091 "Journal block: %" PRIu32"\n",
1096 ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
1097 "trans_id: %" PRIu32"\n",
1098 this_block, this_trans_id);
1099 if (action == ACTION_RECOVER) {
1100 struct replay_arg replay_arg;
1101 replay_arg.info = info;
1102 replay_arg.this_block = &this_block;
1103 replay_arg.this_trans_id = this_trans_id;
1105 jbd_replay_descriptor_block(jbd_fs,
1106 header, &replay_arg);
1108 jbd_debug_descriptor_block(jbd_fs,
1109 header, &this_block);
1112 case JBD_COMMIT_BLOCK:
1113 if (!jbd_verify_commit_csum(jbd_fs,
1114 (struct jbd_commit_header *)header)) {
1116 DBG_WARN "Commit block checksum failed."
1117 "Journal block: %" PRIu32"\n",
1122 ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
1123 "trans_id: %" PRIu32"\n",
1124 this_block, this_trans_id);
1125 /* This is the end of a transaction,
1126 * we may now proceed to the next transaction.
1131 case JBD_REVOKE_BLOCK:
1132 if (!jbd_verify_meta_csum(jbd_fs, header)) {
1134 DBG_WARN "Revoke block checksum failed."
1135 "Journal block: %" PRIu32"\n",
1140 ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
1141 "trans_id: %" PRIu32"\n",
1142 this_block, this_trans_id);
1143 if (action == ACTION_REVOKE) {
1144 info->this_trans_id = this_trans_id;
1145 jbd_build_revoke_tree(jbd_fs,
1153 jbd_block_set(jbd_fs, &block);
1155 wrap(sb, this_block);
1156 if (this_block == start_block)
1160 ext4_dbg(DEBUG_JBD, "End of journal.\n");
1161 if (r == EOK && action == ACTION_SCAN) {
1162 /* We have finished scanning the journal. */
1163 info->start_trans_id = start_trans_id;
1164 if (this_trans_id > start_trans_id)
1165 info->last_trans_id = this_trans_id - 1;
1167 info->last_trans_id = this_trans_id;
1173 /**@brief Replay journal.
1174 * @param jbd_fs jbd filesystem
1175 * @return standard error code*/
1176 int jbd_recover(struct jbd_fs *jbd_fs)
1179 struct recover_info info;
1180 struct jbd_sb *sb = &jbd_fs->sb;
1184 RB_INIT(&info.revoke_root);
1186 r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
1190 r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
1194 r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
1196 /* If we successfully replay the journal,
1197 * clear EXT4_FINCOM_RECOVER flag on the
1198 * ext4 superblock, and set the start of
1200 uint32_t features_incompatible =
1201 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1202 features_incompatible);
1203 jbd_set32(&jbd_fs->sb, start, 0);
1204 features_incompatible &= ~EXT4_FINCOM_RECOVER;
1205 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1206 features_incompatible,
1207 features_incompatible);
1208 jbd_fs->dirty = true;
1209 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1210 &jbd_fs->inode_ref.fs->sb);
1212 jbd_destroy_revoke_tree(&info);
1216 static void jbd_journal_write_sb(struct jbd_journal *journal)
1218 struct jbd_fs *jbd_fs = journal->jbd_fs;
1219 jbd_set32(&jbd_fs->sb, start, journal->start);
1220 jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
1221 jbd_fs->dirty = true;
1224 /**@brief Start accessing the journal.
1225 * @param jbd_fs jbd filesystem
1226 * @param journal current journal session
1227 * @return standard error code*/
1228 int jbd_journal_start(struct jbd_fs *jbd_fs,
1229 struct jbd_journal *journal)
1232 uint32_t features_incompatible =
1233 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1234 features_incompatible);
1235 struct ext4_block block = EXT4_BLOCK_ZERO();
1236 features_incompatible |= EXT4_FINCOM_RECOVER;
1237 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1238 features_incompatible,
1239 features_incompatible);
1240 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1241 &jbd_fs->inode_ref.fs->sb);
1245 journal->first = jbd_get32(&jbd_fs->sb, first);
1246 journal->start = journal->first;
1247 journal->last = journal->first;
1248 journal->trans_id = 1;
1249 journal->alloc_trans_id = 1;
1251 journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
1253 r = jbd_block_get_noread(jbd_fs,
1257 memset(journal, 0, sizeof(struct jbd_journal));
1260 memset(block.data, 0, journal->block_size);
1261 ext4_bcache_set_dirty(block.buf);
1262 r = jbd_block_set(jbd_fs, &block);
1264 memset(journal, 0, sizeof(struct jbd_journal));
1268 TAILQ_INIT(&journal->trans_queue);
1269 TAILQ_INIT(&journal->cp_queue);
1270 RB_INIT(&journal->block_rec_root);
1271 journal->jbd_fs = jbd_fs;
1272 jbd_journal_write_sb(journal);
1273 return jbd_write_sb(jbd_fs);
1276 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1277 struct ext4_buf *buf __unused,
1281 static void jbd_journal_flush_trans(struct jbd_trans *trans)
1283 struct jbd_buf *jbd_buf, *tmp;
1284 struct jbd_journal *journal = trans->journal;
1285 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1286 void *tmp_data = malloc(journal->block_size);
1287 ext4_assert(tmp_data);
1289 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1291 struct ext4_buf *buf = jbd_buf->block_rec->buf;
1292 /* The buffer in memory is still dirty. */
1294 if (jbd_buf->block_rec->trans != trans) {
1296 struct ext4_block jbd_block = EXT4_BLOCK_ZERO();
1297 ext4_assert(ext4_block_get(fs->bdev,
1299 jbd_buf->jbd_lba) == EOK);
1300 memcpy(tmp_data, jbd_block.data,
1301 journal->block_size);
1302 ext4_block_set(fs->bdev, &jbd_block);
1303 r = ext4_blocks_set_direct(fs->bdev, tmp_data,
1305 jbd_trans_end_write(fs->bdev->bc, buf, r, jbd_buf);
1307 ext4_block_flush_buf(fs->bdev, buf);
1316 jbd_journal_skip_pure_revoke(struct jbd_journal *journal,
1317 struct jbd_trans *trans)
1319 journal->start = trans->start_iblock +
1320 trans->alloc_blocks;
1321 wrap(&journal->jbd_fs->sb, journal->start);
1322 journal->trans_id = trans->trans_id + 1;
1323 jbd_journal_free_trans(journal,
1325 jbd_journal_write_sb(journal);
1329 jbd_journal_purge_cp_trans(struct jbd_journal *journal,
1333 struct jbd_trans *trans;
1334 while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1335 if (!trans->data_cnt) {
1336 TAILQ_REMOVE(&journal->cp_queue,
1339 jbd_journal_skip_pure_revoke(journal, trans);
1341 if (trans->data_cnt ==
1342 trans->written_cnt) {
1344 trans->start_iblock +
1345 trans->alloc_blocks;
1346 wrap(&journal->jbd_fs->sb,
1349 trans->trans_id + 1;
1350 TAILQ_REMOVE(&journal->cp_queue,
1353 jbd_journal_free_trans(journal,
1356 jbd_journal_write_sb(journal);
1357 } else if (!flush) {
1359 trans->start_iblock;
1360 wrap(&journal->jbd_fs->sb,
1364 jbd_journal_write_sb(journal);
1367 jbd_journal_flush_trans(trans);
1374 /**@brief Stop accessing the journal.
1375 * @param journal current journal session
1376 * @return standard error code*/
1377 int jbd_journal_stop(struct jbd_journal *journal)
1380 struct jbd_fs *jbd_fs = journal->jbd_fs;
1381 uint32_t features_incompatible;
1383 /* Make sure that journalled content have reached
1385 jbd_journal_purge_cp_trans(journal, true, false);
1387 /* There should be no block record in this journal
1389 if (!RB_EMPTY(&journal->block_rec_root))
1391 DBG_WARN "There are still block records "
1392 "in this journal session!\n");
1394 features_incompatible =
1395 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1396 features_incompatible);
1397 features_incompatible &= ~EXT4_FINCOM_RECOVER;
1398 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1399 features_incompatible,
1400 features_incompatible);
1401 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1402 &jbd_fs->inode_ref.fs->sb);
1407 journal->trans_id = 0;
1408 jbd_journal_write_sb(journal);
1409 return jbd_write_sb(journal->jbd_fs);
1412 /**@brief Allocate a block in the journal.
1413 * @param journal current journal session
1414 * @param trans transaction
1415 * @return allocated block address*/
1416 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
1417 struct jbd_trans *trans)
1419 uint32_t start_block;
1421 start_block = journal->last++;
1422 trans->alloc_blocks++;
1423 wrap(&journal->jbd_fs->sb, journal->last);
1425 /* If there is no space left, flush all journalled
1426 * blocks to disk first.*/
1427 if (journal->last == journal->start)
1428 jbd_journal_purge_cp_trans(journal, true, false);
1433 /**@brief Allocate a new transaction
1434 * @param journal current journal session
1435 * @return transaction allocated*/
1437 jbd_journal_new_trans(struct jbd_journal *journal)
1439 struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
1443 /* We will assign a trans_id to this transaction,
1444 * once it has been committed.*/
1445 trans->journal = journal;
1446 trans->data_csum = EXT4_CRC32_INIT;
1448 TAILQ_INIT(&trans->buf_queue);
1452 /**@brief gain access to it before making any modications.
1453 * @param journal current journal session
1454 * @param trans transaction
1455 * @param block descriptor
1456 * @return standard error code.*/
1457 int jbd_trans_get_access(struct jbd_journal *journal,
1458 struct jbd_trans *trans,
1459 struct ext4_block *block)
1462 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1463 struct jbd_buf *jbd_buf = block->buf->end_write_arg;
1465 /* If the buffer has already been modified, we should
1466 * flush dirty data in this buffer to disk.*/
1467 if (ext4_bcache_test_flag(block->buf, BC_DIRTY) &&
1468 block->buf->end_write == jbd_trans_end_write) {
1469 ext4_assert(jbd_buf);
1470 if (jbd_buf->trans != trans)
1471 r = ext4_block_flush_buf(fs->bdev, block->buf);
1477 static struct jbd_block_rec *
1478 jbd_trans_block_rec_lookup(struct jbd_journal *journal,
1481 struct jbd_block_rec tmp = {
1485 return RB_FIND(jbd_block,
1486 &journal->block_rec_root,
1491 jbd_trans_change_ownership(struct jbd_block_rec *block_rec,
1492 struct jbd_trans *new_trans,
1493 struct ext4_buf *new_buf)
1495 LIST_REMOVE(block_rec, tbrec_node);
1496 /* Now this block record belongs to this transaction. */
1497 LIST_INSERT_HEAD(&new_trans->tbrec_list, block_rec, tbrec_node);
1498 block_rec->trans = new_trans;
1499 block_rec->buf = new_buf;
1502 static inline struct jbd_block_rec *
1503 jbd_trans_insert_block_rec(struct jbd_trans *trans,
1505 struct ext4_buf *buf)
1507 struct jbd_block_rec *block_rec;
1508 block_rec = jbd_trans_block_rec_lookup(trans->journal, lba);
1510 jbd_trans_change_ownership(block_rec, trans, buf);
1513 block_rec = calloc(1, sizeof(struct jbd_block_rec));
1517 block_rec->lba = lba;
1518 block_rec->buf = buf;
1519 block_rec->trans = trans;
1520 TAILQ_INIT(&block_rec->dirty_buf_queue);
1521 LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);
1522 RB_INSERT(jbd_block, &trans->journal->block_rec_root, block_rec);
1527 jbd_trans_finish_callback(struct jbd_journal *journal,
1528 const struct jbd_trans *trans,
1529 struct jbd_block_rec *block_rec,
1532 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1533 if (block_rec->trans != trans)
1537 struct jbd_buf *jbd_buf, *tmp;
1538 TAILQ_FOREACH_SAFE(jbd_buf,
1539 &block_rec->dirty_buf_queue,
1542 /* All we need is a fake ext4_buf. */
1543 struct ext4_buf buf;
1545 jbd_trans_end_write(fs->bdev->bc,
1551 struct jbd_buf *jbd_buf;
1552 struct ext4_block jbd_block = EXT4_BLOCK_ZERO(),
1553 block = EXT4_BLOCK_ZERO();
1554 jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,
1557 ext4_assert(ext4_block_get(fs->bdev,
1559 jbd_buf->jbd_lba) == EOK);
1560 ext4_assert(ext4_block_get_noread(fs->bdev,
1562 block_rec->lba) == EOK);
1563 memcpy(block.data, jbd_block.data,
1564 journal->block_size);
1566 jbd_trans_change_ownership(block_rec,
1567 jbd_buf->trans, block.buf);
1569 block.buf->end_write = jbd_trans_end_write;
1570 block.buf->end_write_arg = jbd_buf;
1572 ext4_bcache_set_flag(jbd_block.buf, BC_TMP);
1573 ext4_bcache_set_dirty(block.buf);
1575 ext4_block_set(fs->bdev, &jbd_block);
1576 ext4_block_set(fs->bdev, &block);
1583 jbd_trans_remove_block_rec(struct jbd_journal *journal,
1584 struct jbd_block_rec *block_rec,
1585 struct jbd_trans *trans)
1587 /* If this block record doesn't belong to this transaction,
1589 if (block_rec->trans == trans) {
1590 LIST_REMOVE(block_rec, tbrec_node);
1591 RB_REMOVE(jbd_block,
1592 &journal->block_rec_root,
1598 /**@brief Add block to a transaction and mark it dirty.
1599 * @param trans transaction
1600 * @param block block descriptor
1601 * @return standard error code*/
1602 int jbd_trans_set_block_dirty(struct jbd_trans *trans,
1603 struct ext4_block *block)
1605 struct jbd_buf *jbd_buf;
1606 struct jbd_block_rec *block_rec;
1608 if (block->buf->end_write == jbd_trans_end_write) {
1609 jbd_buf = block->buf->end_write_arg;
1610 if (jbd_buf && jbd_buf->trans == trans)
1613 jbd_buf = calloc(1, sizeof(struct jbd_buf));
1617 if ((block_rec = jbd_trans_insert_block_rec(trans,
1619 block->buf)) == NULL) {
1624 TAILQ_INSERT_TAIL(&block_rec->dirty_buf_queue,
1628 jbd_buf->block_rec = block_rec;
1629 jbd_buf->trans = trans;
1630 jbd_buf->block = *block;
1631 ext4_bcache_inc_ref(block->buf);
1633 /* If the content reach the disk, notify us
1634 * so that we may do a checkpoint. */
1635 block->buf->end_write = jbd_trans_end_write;
1636 block->buf->end_write_arg = jbd_buf;
1639 TAILQ_INSERT_HEAD(&trans->buf_queue, jbd_buf, buf_node);
1641 ext4_bcache_set_dirty(block->buf);
1645 /**@brief Add block to be revoked to a transaction
1646 * @param trans transaction
1647 * @param lba logical block address
1648 * @return standard error code*/
1649 int jbd_trans_revoke_block(struct jbd_trans *trans,
1652 struct jbd_revoke_rec *rec =
1653 calloc(1, sizeof(struct jbd_revoke_rec));
1658 LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
1662 /**@brief Try to add block to be revoked to a transaction.
1663 * If @lba still remains in an transaction on checkpoint
1664 * queue, add @lba as a revoked block to the transaction.
1665 * @param trans transaction
1666 * @param lba logical block address
1667 * @return standard error code*/
1668 int jbd_trans_try_revoke_block(struct jbd_trans *trans,
1672 struct jbd_journal *journal = trans->journal;
1673 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1674 struct jbd_block_rec *block_rec =
1675 jbd_trans_block_rec_lookup(journal, lba);
1677 /* Make sure we don't flush any buffers belong to this transaction. */
1678 if (block_rec && block_rec->trans != trans) {
1679 /* If the buffer has not been flushed yet, flush it now. */
1680 if (block_rec->buf) {
1681 r = ext4_block_flush_buf(fs->bdev, block_rec->buf);
1687 jbd_trans_revoke_block(trans, lba);
1693 /**@brief Free a transaction
1694 * @param journal current journal session
1695 * @param trans transaction
1696 * @param abort discard all the modifications on the block?
1697 * @return standard error code*/
1698 void jbd_journal_free_trans(struct jbd_journal *journal,
1699 struct jbd_trans *trans,
1702 struct jbd_buf *jbd_buf, *tmp;
1703 struct jbd_revoke_rec *rec, *tmp2;
1704 struct jbd_block_rec *block_rec, *tmp3;
1705 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1706 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1708 block_rec = jbd_buf->block_rec;
1710 jbd_buf->block.buf->end_write = NULL;
1711 jbd_buf->block.buf->end_write_arg = NULL;
1712 ext4_bcache_clear_dirty(jbd_buf->block.buf);
1713 ext4_block_set(fs->bdev, &jbd_buf->block);
1716 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1719 jbd_trans_finish_callback(journal,
1723 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1726 LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1728 LIST_REMOVE(rec, revoke_node);
1731 LIST_FOREACH_SAFE(block_rec, &trans->tbrec_list, tbrec_node,
1733 jbd_trans_remove_block_rec(journal, block_rec, trans);
1739 /**@brief Write commit block for a transaction
1740 * @param trans transaction
1741 * @return standard error code*/
1742 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
1745 struct jbd_commit_header *header;
1746 uint32_t commit_iblock = 0;
1747 struct ext4_block commit_block;
1748 struct jbd_journal *journal = trans->journal;
1750 commit_iblock = jbd_journal_alloc_block(journal, trans);
1751 rc = jbd_block_get_noread(journal->jbd_fs,
1752 &commit_block, commit_iblock);
1756 header = (struct jbd_commit_header *)commit_block.data;
1757 jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
1758 jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
1759 jbd_set32(&header->header, sequence, trans->trans_id);
1761 if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1762 JBD_FEATURE_COMPAT_CHECKSUM)) {
1763 jbd_set32(header, chksum_type, JBD_CRC32_CHKSUM);
1764 jbd_set32(header, chksum_size, JBD_CRC32_CHKSUM_SIZE);
1765 jbd_set32(header, chksum[0], trans->data_csum);
1767 jbd_commit_csum_set(journal->jbd_fs, header);
1768 ext4_bcache_set_dirty(commit_block.buf);
1769 rc = jbd_block_set(journal->jbd_fs, &commit_block);
1776 /**@brief Write descriptor block for a transaction
1777 * @param journal current journal session
1778 * @param trans transaction
1779 * @return standard error code*/
1780 static int jbd_journal_prepare(struct jbd_journal *journal,
1781 struct jbd_trans *trans)
1783 int rc = EOK, i = 0;
1784 int32_t tag_tbl_size = 0;
1785 uint32_t desc_iblock = 0;
1786 uint32_t data_iblock = 0;
1787 char *tag_start = NULL, *tag_ptr = NULL;
1788 struct jbd_buf *jbd_buf, *tmp;
1789 struct ext4_block desc_block, data_block;
1790 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1791 uint32_t checksum = EXT4_CRC32_INIT;
1793 /* Try to remove any non-dirty buffers from the tail of
1795 TAILQ_FOREACH_REVERSE_SAFE(jbd_buf, &trans->buf_queue,
1796 jbd_trans_buf, buf_node, tmp) {
1797 /* We stop the iteration when we find a dirty buffer. */
1798 if (ext4_bcache_test_flag(jbd_buf->block.buf,
1802 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1806 jbd_buf->block.buf->end_write = NULL;
1807 jbd_buf->block.buf->end_write_arg = NULL;
1808 jbd_trans_finish_callback(journal,
1813 /* The buffer has not been modified, just release
1815 jbd_trans_remove_block_rec(journal,
1816 jbd_buf->block_rec, trans);
1819 ext4_block_set(fs->bdev, &jbd_buf->block);
1820 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1824 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {
1825 struct tag_info tag_info;
1826 bool uuid_exist = false;
1827 if (!ext4_bcache_test_flag(jbd_buf->block.buf,
1829 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1833 jbd_buf->block.buf->end_write = NULL;
1834 jbd_buf->block.buf->end_write_arg = NULL;
1835 jbd_trans_finish_callback(journal,
1840 /* The buffer has not been modified, just release
1842 jbd_trans_remove_block_rec(journal,
1843 jbd_buf->block_rec, trans);
1846 ext4_block_set(fs->bdev, &jbd_buf->block);
1847 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1851 checksum = jbd_block_csum(journal->jbd_fs,
1852 jbd_buf->block.data,
1857 struct jbd_bhdr *bhdr;
1858 desc_iblock = jbd_journal_alloc_block(journal, trans);
1859 rc = jbd_block_get_noread(journal->jbd_fs,
1860 &desc_block, desc_iblock);
1864 ext4_bcache_set_dirty(desc_block.buf);
1866 bhdr = (struct jbd_bhdr *)desc_block.data;
1867 jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1868 jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
1869 jbd_set32(bhdr, sequence, trans->trans_id);
1871 tag_start = (char *)(bhdr + 1);
1872 tag_ptr = tag_start;
1874 tag_tbl_size = journal->block_size -
1875 sizeof(struct jbd_bhdr);
1877 if (jbd_has_csum(&journal->jbd_fs->sb))
1878 tag_tbl_size -= sizeof(struct jbd_block_tail);
1880 if (!trans->start_iblock)
1881 trans->start_iblock = desc_iblock;
1884 tag_info.block = jbd_buf->block.lb_id;
1885 tag_info.uuid_exist = uuid_exist;
1886 if (i == trans->data_cnt - 1)
1887 tag_info.last_tag = true;
1889 tag_info.last_tag = false;
1891 tag_info.checksum = checksum;
1894 memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
1897 rc = jbd_write_block_tag(journal->jbd_fs,
1902 jbd_meta_csum_set(journal->jbd_fs,
1903 (struct jbd_bhdr *)desc_block.data);
1904 jbd_block_set(journal->jbd_fs, &desc_block);
1909 data_iblock = jbd_journal_alloc_block(journal, trans);
1910 rc = jbd_block_get_noread(journal->jbd_fs,
1911 &data_block, data_iblock);
1915 ext4_bcache_set_dirty(data_block.buf);
1917 memcpy(data_block.data, jbd_buf->block.data,
1918 journal->block_size);
1919 jbd_buf->jbd_lba = data_block.lb_id;
1921 rc = jbd_block_set(journal->jbd_fs, &data_block);
1925 tag_ptr += tag_info.tag_bytes;
1926 tag_tbl_size -= tag_info.tag_bytes;
1930 if (rc == EOK && desc_iblock) {
1931 jbd_meta_csum_set(journal->jbd_fs,
1932 (struct jbd_bhdr *)desc_block.data);
1933 trans->data_csum = checksum;
1934 jbd_block_set(journal->jbd_fs, &desc_block);
1940 /**@brief Write revoke block for a transaction
1941 * @param journal current journal session
1942 * @param trans transaction
1943 * @return standard error code*/
1945 jbd_journal_prepare_revoke(struct jbd_journal *journal,
1946 struct jbd_trans *trans)
1948 int rc = EOK, i = 0;
1949 int32_t tag_tbl_size = 0;
1950 uint32_t desc_iblock = 0;
1951 char *blocks_entry = NULL;
1952 struct jbd_revoke_rec *rec, *tmp;
1953 struct ext4_block desc_block;
1954 struct jbd_revoke_header *header = NULL;
1955 int32_t record_len = 4;
1957 if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1958 JBD_FEATURE_INCOMPAT_64BIT))
1961 LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1965 struct jbd_bhdr *bhdr;
1966 desc_iblock = jbd_journal_alloc_block(journal, trans);
1967 rc = jbd_block_get_noread(journal->jbd_fs,
1968 &desc_block, desc_iblock);
1973 ext4_bcache_set_dirty(desc_block.buf);
1975 bhdr = (struct jbd_bhdr *)desc_block.data;
1976 jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1977 jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
1978 jbd_set32(bhdr, sequence, trans->trans_id);
1980 header = (struct jbd_revoke_header *)bhdr;
1981 blocks_entry = (char *)(header + 1);
1982 tag_tbl_size = journal->block_size -
1983 sizeof(struct jbd_revoke_header);
1985 if (jbd_has_csum(&journal->jbd_fs->sb))
1986 tag_tbl_size -= sizeof(struct jbd_block_tail);
1988 if (!trans->start_iblock)
1989 trans->start_iblock = desc_iblock;
1993 if (tag_tbl_size < record_len) {
1994 jbd_set32(header, count,
1995 journal->block_size - tag_tbl_size);
1996 jbd_meta_csum_set(journal->jbd_fs,
1997 (struct jbd_bhdr *)desc_block.data);
1998 jbd_block_set(journal->jbd_fs, &desc_block);
2003 if (record_len == 8) {
2005 (uint64_t *)blocks_entry;
2006 *blocks = to_be64(rec->lba);
2009 (uint32_t *)blocks_entry;
2010 *blocks = to_be32((uint32_t)rec->lba);
2012 blocks_entry += record_len;
2013 tag_tbl_size -= record_len;
2017 if (rc == EOK && desc_iblock) {
2019 jbd_set32(header, count,
2020 journal->block_size - tag_tbl_size);
2022 jbd_meta_csum_set(journal->jbd_fs,
2023 (struct jbd_bhdr *)desc_block.data);
2024 jbd_block_set(journal->jbd_fs, &desc_block);
2030 /**@brief Put references of block descriptors in a transaction.
2031 * @param journal current journal session
2032 * @param trans transaction*/
2033 void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
2035 struct jbd_buf *jbd_buf, *tmp;
2036 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
2037 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
2039 struct ext4_block block = jbd_buf->block;
2040 ext4_block_set(fs->bdev, &block);
2044 /**@brief Update the start block of the journal when
2045 * all the contents in a transaction reach the disk.*/
2046 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
2047 struct ext4_buf *buf,
2051 struct jbd_buf *jbd_buf = arg;
2052 struct jbd_trans *trans = jbd_buf->trans;
2053 struct jbd_block_rec *block_rec = jbd_buf->block_rec;
2054 struct jbd_journal *journal = trans->journal;
2055 bool first_in_queue =
2056 trans == TAILQ_FIRST(&journal->cp_queue);
2060 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
2061 TAILQ_REMOVE(&block_rec->dirty_buf_queue,
2065 jbd_trans_finish_callback(journal,
2069 if (block_rec->trans == trans) {
2070 block_rec->buf = NULL;
2071 /* Clear the end_write and end_write_arg fields. */
2072 buf->end_write = NULL;
2073 buf->end_write_arg = NULL;
2078 trans->written_cnt++;
2079 if (trans->written_cnt == trans->data_cnt) {
2080 /* If it is the first transaction on checkpoint queue,
2081 * we will shift the start of the journal to the next
2082 * transaction, and remove subsequent written
2083 * transactions from checkpoint queue until we find
2084 * an unwritten one. */
2085 if (first_in_queue) {
2086 journal->start = trans->start_iblock +
2087 trans->alloc_blocks;
2088 wrap(&journal->jbd_fs->sb, journal->start);
2089 journal->trans_id = trans->trans_id + 1;
2090 TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
2091 jbd_journal_free_trans(journal, trans, false);
2093 jbd_journal_purge_cp_trans(journal, false, true);
2094 jbd_journal_write_sb(journal);
2095 jbd_write_sb(journal->jbd_fs);
2100 /**@brief Commit a transaction to the journal immediately.
2101 * @param journal current journal session
2102 * @param trans transaction
2103 * @return standard error code*/
2104 int jbd_journal_commit_trans(struct jbd_journal *journal,
2105 struct jbd_trans *trans)
2108 uint32_t last = journal->last;
2110 trans->trans_id = journal->alloc_trans_id;
2111 rc = jbd_journal_prepare(journal, trans);
2115 rc = jbd_journal_prepare_revoke(journal, trans);
2119 if (TAILQ_EMPTY(&trans->buf_queue) &&
2120 LIST_EMPTY(&trans->revoke_list)) {
2121 /* Since there are no entries in both buffer list
2122 * and revoke entry list, we do not consider trans as
2123 * complete transaction and just return EOK.*/
2124 jbd_journal_free_trans(journal, trans, false);
2128 rc = jbd_trans_write_commit_block(trans);
2132 journal->alloc_trans_id++;
2133 if (TAILQ_EMPTY(&journal->cp_queue)) {
2134 if (trans->data_cnt) {
2135 journal->start = trans->start_iblock;
2136 wrap(&journal->jbd_fs->sb, journal->start);
2137 journal->trans_id = trans->trans_id;
2138 jbd_journal_write_sb(journal);
2139 jbd_write_sb(journal->jbd_fs);
2140 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
2142 jbd_journal_cp_trans(journal, trans);
2144 journal->start = trans->start_iblock +
2145 trans->alloc_blocks;
2146 wrap(&journal->jbd_fs->sb, journal->start);
2147 journal->trans_id = trans->trans_id + 1;
2148 jbd_journal_write_sb(journal);
2149 jbd_journal_free_trans(journal, trans, false);
2152 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
2154 if (trans->data_cnt)
2155 jbd_journal_cp_trans(journal, trans);
2160 journal->last = last;
2161 jbd_journal_free_trans(journal, trans, true);