2 * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3 * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 /** @addtogroup lwext4
34 * @file ext4_journal.c
35 * @brief Journal handle functions
38 #include "ext4_config.h"
39 #include "ext4_types.h"
40 #include "ext4_misc.h"
41 #include "ext4_errno.h"
42 #include "ext4_debug.h"
45 #include "ext4_super.h"
46 #include "ext4_journal.h"
47 #include "ext4_blockdev.h"
48 #include "ext4_crc32.h"
49 #include "ext4_journal.h"
54 /**@brief Revoke entry during journal replay.*/
56 /**@brief Block number not to be replayed.*/
59 /**@brief For any transaction id smaller
60 * than trans_id, records of @block
61 * in those transactions should not
65 /**@brief Revoke tree node.*/
66 RB_ENTRY(revoke_entry) revoke_node;
69 /**@brief Valid journal replay information.*/
71 /**@brief Starting transaction id.*/
72 uint32_t start_trans_id;
74 /**@brief Ending transaction id.*/
75 uint32_t last_trans_id;
77 /**@brief Used as internal argument.*/
78 uint32_t this_trans_id;
80 /**@brief No of transactions went through.*/
83 /**@brief RB-Tree storing revoke entries.*/
84 RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
87 /**@brief Journal replay internal arguments.*/
89 /**@brief Journal replay information.*/
90 struct recover_info *info;
92 /**@brief Current block we are on.*/
95 /**@brief Current trans_id we are on.*/
96 uint32_t this_trans_id;
99 /* Make sure we wrap around the log correctly! */
100 #define wrap(sb, var) \
102 if (var >= jbd_get32((sb), maxlen)) \
103 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first)); \
108 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
110 if (a->block > b->block)
112 else if (a->block < b->block)
118 jbd_block_rec_cmp(struct jbd_block_rec *a, struct jbd_block_rec *b)
122 else if (a->lba < b->lba)
128 jbd_revoke_rec_cmp(struct jbd_revoke_rec *a, struct jbd_revoke_rec *b)
132 else if (a->lba < b->lba)
137 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
138 jbd_revoke_entry_cmp, static inline)
139 RB_GENERATE_INTERNAL(jbd_block, jbd_block_rec, block_rec_node,
140 jbd_block_rec_cmp, static inline)
141 RB_GENERATE_INTERNAL(jbd_revoke_tree, jbd_revoke_rec, revoke_node,
142 jbd_revoke_rec_cmp, static inline)
144 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
145 #define jbd_free_revoke_entry(addr) free(addr)
147 static int jbd_has_csum(struct jbd_sb *jbd_sb)
149 if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V2))
152 if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V3))
158 #if CONFIG_META_CSUM_ENABLE
159 static uint32_t jbd_sb_csum(struct jbd_sb *jbd_sb)
161 uint32_t checksum = 0;
163 if (jbd_has_csum(jbd_sb)) {
164 uint32_t orig_checksum = jbd_sb->checksum;
165 jbd_set32(jbd_sb, checksum, 0);
166 /* Calculate crc32c checksum against tho whole superblock */
167 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_sb,
168 JBD_SUPERBLOCK_SIZE);
169 jbd_sb->checksum = orig_checksum;
174 #define jbd_sb_csum(...) 0
177 static void jbd_sb_csum_set(struct jbd_sb *jbd_sb)
179 if (!jbd_has_csum(jbd_sb))
182 jbd_set32(jbd_sb, checksum, jbd_sb_csum(jbd_sb));
185 #if CONFIG_META_CSUM_ENABLE
187 jbd_verify_sb_csum(struct jbd_sb *jbd_sb)
189 if (!jbd_has_csum(jbd_sb))
192 return jbd_sb_csum(jbd_sb) == jbd_get32(jbd_sb, checksum);
195 #define jbd_verify_sb_csum(...) true
198 #if CONFIG_META_CSUM_ENABLE
199 static uint32_t jbd_meta_csum(struct jbd_fs *jbd_fs,
200 struct jbd_bhdr *bhdr)
202 uint32_t checksum = 0;
204 if (jbd_has_csum(&jbd_fs->sb)) {
205 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
206 struct jbd_block_tail *tail =
207 (struct jbd_block_tail *)((char *)bhdr + block_size -
208 sizeof(struct jbd_block_tail));
209 uint32_t orig_checksum = tail->checksum;
212 /* First calculate crc32c checksum against fs uuid */
213 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
214 sizeof(jbd_fs->sb.uuid));
215 /* Calculate crc32c checksum against tho whole block */
216 checksum = ext4_crc32c(checksum, bhdr,
218 tail->checksum = orig_checksum;
223 #define jbd_meta_csum(...) 0
226 static void jbd_meta_csum_set(struct jbd_fs *jbd_fs,
227 struct jbd_bhdr *bhdr)
229 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
230 struct jbd_block_tail *tail = (struct jbd_block_tail *)
231 ((char *)bhdr + block_size -
232 sizeof(struct jbd_block_tail));
233 if (!jbd_has_csum(&jbd_fs->sb))
236 tail->checksum = to_be32(jbd_meta_csum(jbd_fs, bhdr));
239 #if CONFIG_META_CSUM_ENABLE
241 jbd_verify_meta_csum(struct jbd_fs *jbd_fs,
242 struct jbd_bhdr *bhdr)
244 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
245 struct jbd_block_tail *tail = (struct jbd_block_tail *)
246 ((char *)bhdr + block_size -
247 sizeof(struct jbd_block_tail));
248 if (!jbd_has_csum(&jbd_fs->sb))
251 return jbd_meta_csum(jbd_fs, bhdr) == to_be32(tail->checksum);
254 #define jbd_verify_meta_csum(...) true
257 #if CONFIG_META_CSUM_ENABLE
258 static uint32_t jbd_commit_csum(struct jbd_fs *jbd_fs,
259 struct jbd_commit_header *header)
261 uint32_t checksum = 0;
263 if (jbd_has_csum(&jbd_fs->sb)) {
264 uint32_t orig_checksum_type = header->chksum_type,
265 orig_checksum_size = header->chksum_size,
266 orig_checksum = header->chksum[0];
267 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
268 header->chksum_type = 0;
269 header->chksum_size = 0;
270 header->chksum[0] = 0;
272 /* First calculate crc32c checksum against fs uuid */
273 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
274 sizeof(jbd_fs->sb.uuid));
275 /* Calculate crc32c checksum against tho whole block */
276 checksum = ext4_crc32c(checksum, header,
279 header->chksum_type = orig_checksum_type;
280 header->chksum_size = orig_checksum_size;
281 header->chksum[0] = orig_checksum;
286 #define jbd_commit_csum(...) 0
289 static void jbd_commit_csum_set(struct jbd_fs *jbd_fs,
290 struct jbd_commit_header *header)
292 if (!jbd_has_csum(&jbd_fs->sb))
295 header->chksum_type = 0;
296 header->chksum_size = 0;
297 header->chksum[0] = jbd_commit_csum(jbd_fs, header);
300 #if CONFIG_META_CSUM_ENABLE
301 static bool jbd_verify_commit_csum(struct jbd_fs *jbd_fs,
302 struct jbd_commit_header *header)
304 if (!jbd_has_csum(&jbd_fs->sb))
307 return header->chksum[0] == to_be32(jbd_commit_csum(jbd_fs,
311 #define jbd_verify_commit_csum(...) true
314 #if CONFIG_META_CSUM_ENABLE
316 * NOTE: We only make use of @csum parameter when
317 * JBD_FEATURE_COMPAT_CHECKSUM is enabled.
319 static uint32_t jbd_block_csum(struct jbd_fs *jbd_fs, const void *buf,
323 uint32_t checksum = 0;
325 if (jbd_has_csum(&jbd_fs->sb)) {
326 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
327 /* First calculate crc32c checksum against fs uuid */
328 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
329 sizeof(jbd_fs->sb.uuid));
330 /* Then calculate crc32c checksum against sequence no. */
331 checksum = ext4_crc32c(checksum, &sequence,
333 /* Calculate crc32c checksum against tho whole block */
334 checksum = ext4_crc32c(checksum, buf,
336 } else if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
337 JBD_FEATURE_COMPAT_CHECKSUM)) {
338 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
339 /* Calculate crc32c checksum against tho whole block */
340 checksum = ext4_crc32(csum, buf,
346 #define jbd_block_csum(...) 0
349 static void jbd_block_tag_csum_set(struct jbd_fs *jbd_fs, void *__tag,
352 int ver = jbd_has_csum(&jbd_fs->sb);
357 struct jbd_block_tag *tag = __tag;
358 tag->checksum = (uint16_t)to_be32(checksum);
360 struct jbd_block_tag3 *tag = __tag;
361 tag->checksum = to_be32(checksum);
365 /**@brief Write jbd superblock to disk.
366 * @param jbd_fs jbd filesystem
367 * @param s jbd superblock
368 * @return standard error code*/
369 static int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
372 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
375 rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
380 offset = fblock * ext4_sb_get_block_size(&fs->sb);
381 return ext4_block_writebytes(fs->bdev, offset, s,
382 EXT4_SUPERBLOCK_SIZE);
385 /**@brief Read jbd superblock from disk.
386 * @param jbd_fs jbd filesystem
387 * @param s jbd superblock
388 * @return standard error code*/
389 static int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
392 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
395 rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
399 offset = fblock * ext4_sb_get_block_size(&fs->sb);
400 return ext4_block_readbytes(fs->bdev, offset, s,
401 EXT4_SUPERBLOCK_SIZE);
404 /**@brief Verify jbd superblock.
405 * @param sb jbd superblock
406 * @return true if jbd superblock is valid */
407 static bool jbd_verify_sb(struct jbd_sb *sb)
409 struct jbd_bhdr *header = &sb->header;
410 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
413 if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
414 jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
417 return jbd_verify_sb_csum(sb);
420 /**@brief Write back dirty jbd superblock to disk.
421 * @param jbd_fs jbd filesystem
422 * @return standard error code*/
423 static int jbd_write_sb(struct jbd_fs *jbd_fs)
427 rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
431 jbd_fs->dirty = false;
436 /**@brief Get reference to jbd filesystem.
437 * @param fs Filesystem to load journal of
438 * @param jbd_fs jbd filesystem
439 * @return standard error code*/
440 int jbd_get_fs(struct ext4_fs *fs,
441 struct jbd_fs *jbd_fs)
444 uint32_t journal_ino;
446 memset(jbd_fs, 0, sizeof(struct jbd_fs));
447 /* See if there is journal inode on this filesystem.*/
448 /* FIXME: detection on existance ofbkejournal bdev is
450 journal_ino = ext4_get32(&fs->sb, journal_inode_number);
452 rc = ext4_fs_get_inode_ref(fs,
456 memset(jbd_fs, 0, sizeof(struct jbd_fs));
459 rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
461 memset(jbd_fs, 0, sizeof(struct jbd_fs));
462 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
465 if (!jbd_verify_sb(&jbd_fs->sb)) {
466 memset(jbd_fs, 0, sizeof(struct jbd_fs));
467 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
472 jbd_fs->bdev = fs->bdev;
477 /**@brief Put reference of jbd filesystem.
478 * @param jbd_fs jbd filesystem
479 * @return standard error code*/
480 int jbd_put_fs(struct jbd_fs *jbd_fs)
483 rc = jbd_write_sb(jbd_fs);
485 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
489 /**@brief Data block lookup helper.
490 * @param jbd_fs jbd filesystem
491 * @param iblock block index
492 * @param fblock logical block address
493 * @return standard error code*/
494 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
496 ext4_fsblk_t *fblock)
498 int rc = ext4_fs_get_inode_dblk_idx(
506 /**@brief jbd block get function (through cache).
507 * @param jbd_fs jbd filesystem
508 * @param block block descriptor
509 * @param fblock jbd logical block address
510 * @return standard error code*/
511 static int jbd_block_get(struct jbd_fs *jbd_fs,
512 struct ext4_block *block,
515 /* TODO: journal device. */
517 struct ext4_blockdev *bdev = jbd_fs->bdev;
518 ext4_lblk_t iblock = (ext4_lblk_t)fblock;
520 /* Lookup the logical block address of
522 rc = jbd_inode_bmap(jbd_fs, iblock,
527 rc = ext4_block_get(bdev, block, fblock);
529 /* If succeeded, mark buffer as BC_FLUSH to indicate
530 * that data should be written to disk immediately.*/
532 ext4_bcache_set_flag(block->buf, BC_FLUSH);
533 /* As we don't want to occupy too much space
534 * in block cache, we set this buffer BC_TMP.*/
535 ext4_bcache_set_flag(block->buf, BC_TMP);
541 /**@brief jbd block get function (through cache, don't read).
542 * @param jbd_fs jbd filesystem
543 * @param block block descriptor
544 * @param fblock jbd logical block address
545 * @return standard error code*/
546 static int jbd_block_get_noread(struct jbd_fs *jbd_fs,
547 struct ext4_block *block,
550 /* TODO: journal device. */
552 struct ext4_blockdev *bdev = jbd_fs->bdev;
553 ext4_lblk_t iblock = (ext4_lblk_t)fblock;
554 rc = jbd_inode_bmap(jbd_fs, iblock,
559 rc = ext4_block_get_noread(bdev, block, fblock);
561 ext4_bcache_set_flag(block->buf, BC_FLUSH);
566 /**@brief jbd block set procedure (through cache).
567 * @param jbd_fs jbd filesystem
568 * @param block block descriptor
569 * @return standard error code*/
570 static int jbd_block_set(struct jbd_fs *jbd_fs,
571 struct ext4_block *block)
573 struct ext4_blockdev *bdev = jbd_fs->bdev;
574 return ext4_block_set(bdev, block);
577 /**@brief helper functions to calculate
578 * block tag size, not including UUID part.
579 * @param jbd_fs jbd filesystem
580 * @return tag size in bytes*/
581 static int jbd_tag_bytes(struct jbd_fs *jbd_fs)
585 /* It is very easy to deal with the case which
586 * JBD_FEATURE_INCOMPAT_CSUM_V3 is enabled.*/
587 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
588 JBD_FEATURE_INCOMPAT_CSUM_V3))
589 return sizeof(struct jbd_block_tag3);
591 size = sizeof(struct jbd_block_tag);
593 /* If JBD_FEATURE_INCOMPAT_CSUM_V2 is enabled,
594 * add 2 bytes to size.*/
595 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
596 JBD_FEATURE_INCOMPAT_CSUM_V2))
597 size += sizeof(uint16_t);
599 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
600 JBD_FEATURE_INCOMPAT_64BIT))
603 /* If block number is 4 bytes in size,
604 * minus 4 bytes from size */
605 return size - sizeof(uint32_t);
608 /**@brief Tag information. */
610 /**@brief Tag size in bytes, including UUID part.*/
613 /**@brief block number stored in this tag.*/
616 /**@brief whether UUID part exists or not.*/
619 /**@brief UUID content if UUID part exists.*/
620 uint8_t uuid[UUID_SIZE];
622 /**@brief Is this the last tag? */
625 /**@brief crc32c checksum. */
629 /**@brief Extract information from a block tag.
630 * @param __tag pointer to the block tag
631 * @param tag_bytes block tag size of this jbd filesystem
632 * @param remaining size in buffer containing the block tag
633 * @param tag_info information of this tag.
634 * @return EOK when succeed, otherwise return EINVAL.*/
636 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
639 int32_t remain_buf_size,
640 struct tag_info *tag_info)
643 tag_info->tag_bytes = tag_bytes;
644 tag_info->uuid_exist = false;
645 tag_info->last_tag = false;
647 /* See whether it is possible to hold a valid block tag.*/
648 if (remain_buf_size - tag_bytes < 0)
651 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
652 JBD_FEATURE_INCOMPAT_CSUM_V3)) {
653 struct jbd_block_tag3 *tag = __tag;
654 tag_info->block = jbd_get32(tag, blocknr);
655 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
656 JBD_FEATURE_INCOMPAT_64BIT))
658 (uint64_t)jbd_get32(tag, blocknr_high) << 32;
660 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
663 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
664 /* See whether it is possible to hold UUID part.*/
665 if (remain_buf_size - tag_bytes < UUID_SIZE)
668 uuid_start = (char *)tag + tag_bytes;
669 tag_info->uuid_exist = true;
670 tag_info->tag_bytes += UUID_SIZE;
671 memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
674 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
675 tag_info->last_tag = true;
678 struct jbd_block_tag *tag = __tag;
679 tag_info->block = jbd_get32(tag, blocknr);
680 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
681 JBD_FEATURE_INCOMPAT_64BIT))
683 (uint64_t)jbd_get32(tag, blocknr_high) << 32;
685 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
688 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
689 /* See whether it is possible to hold UUID part.*/
690 if (remain_buf_size - tag_bytes < UUID_SIZE)
693 uuid_start = (char *)tag + tag_bytes;
694 tag_info->uuid_exist = true;
695 tag_info->tag_bytes += UUID_SIZE;
696 memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
699 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
700 tag_info->last_tag = true;
706 /**@brief Write information to a block tag.
707 * @param __tag pointer to the block tag
708 * @param remaining size in buffer containing the block tag
709 * @param tag_info information of this tag.
710 * @return EOK when succeed, otherwise return EINVAL.*/
712 jbd_write_block_tag(struct jbd_fs *jbd_fs,
714 int32_t remain_buf_size,
715 struct tag_info *tag_info)
718 int tag_bytes = jbd_tag_bytes(jbd_fs);
720 tag_info->tag_bytes = tag_bytes;
722 /* See whether it is possible to hold a valid block tag.*/
723 if (remain_buf_size - tag_bytes < 0)
726 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
727 JBD_FEATURE_INCOMPAT_CSUM_V3)) {
728 struct jbd_block_tag3 *tag = __tag;
729 memset(tag, 0, sizeof(struct jbd_block_tag3));
730 jbd_set32(tag, blocknr, (uint32_t)tag_info->block);
731 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
732 JBD_FEATURE_INCOMPAT_64BIT))
733 jbd_set32(tag, blocknr_high, tag_info->block >> 32);
735 if (tag_info->uuid_exist) {
736 /* See whether it is possible to hold UUID part.*/
737 if (remain_buf_size - tag_bytes < UUID_SIZE)
740 uuid_start = (char *)tag + tag_bytes;
741 tag_info->tag_bytes += UUID_SIZE;
742 memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
744 jbd_set32(tag, flags,
745 jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
747 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
749 if (tag_info->last_tag)
750 jbd_set32(tag, flags,
751 jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
754 struct jbd_block_tag *tag = __tag;
755 memset(tag, 0, sizeof(struct jbd_block_tag));
756 jbd_set32(tag, blocknr, (uint32_t)tag_info->block);
757 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
758 JBD_FEATURE_INCOMPAT_64BIT))
759 jbd_set32(tag, blocknr_high, tag_info->block >> 32);
761 if (tag_info->uuid_exist) {
762 /* See whether it is possible to hold UUID part.*/
763 if (remain_buf_size - tag_bytes < UUID_SIZE)
766 uuid_start = (char *)tag + tag_bytes;
767 tag_info->tag_bytes += UUID_SIZE;
768 memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
770 jbd_set16(tag, flags,
771 jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
773 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
775 if (tag_info->last_tag)
776 jbd_set16(tag, flags,
777 jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
783 /**@brief Iterate all block tags in a block.
784 * @param jbd_fs jbd filesystem
785 * @param __tag_start pointer to the block
786 * @param tag_tbl_size size of the block
787 * @param func callback routine to indicate that
788 * a block tag is found
789 * @param arg additional argument to be passed to func */
791 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
793 int32_t tag_tbl_size,
794 void (*func)(struct jbd_fs * jbd_fs,
800 char *tag_start, *tag_ptr;
801 int tag_bytes = jbd_tag_bytes(jbd_fs);
802 tag_start = __tag_start;
805 /* Cut off the size of block tail storing checksum. */
806 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
807 JBD_FEATURE_INCOMPAT_CSUM_V2) ||
808 JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
809 JBD_FEATURE_INCOMPAT_CSUM_V3))
810 tag_tbl_size -= sizeof(struct jbd_block_tail);
812 while (tag_tbl_size) {
813 struct tag_info tag_info;
814 int rc = jbd_extract_block_tag(jbd_fs,
823 func(jbd_fs, tag_info.block, tag_info.uuid, arg);
825 /* Stop the iteration when we reach the last tag. */
826 if (tag_info.last_tag)
829 tag_ptr += tag_info.tag_bytes;
830 tag_tbl_size -= tag_info.tag_bytes;
834 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
839 uint32_t *iblock = arg;
840 ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
842 wrap(&jbd_fs->sb, *iblock);
848 static struct revoke_entry *
849 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
851 struct revoke_entry tmp = {
855 return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
858 /**@brief Replay a block in a transaction.
859 * @param jbd_fs jbd filesystem
860 * @param block block address to be replayed.*/
861 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
863 uint8_t *uuid __unused,
867 struct replay_arg *arg = __arg;
868 struct recover_info *info = arg->info;
869 uint32_t *this_block = arg->this_block;
870 struct revoke_entry *revoke_entry;
871 struct ext4_block journal_block, ext4_block;
872 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
875 wrap(&jbd_fs->sb, *this_block);
877 /* We replay this block only if the current transaction id
878 * is equal or greater than that in revoke entry.*/
879 revoke_entry = jbd_revoke_entry_lookup(info, block);
881 arg->this_trans_id < revoke_entry->trans_id)
885 "Replaying block in block_tag: %" PRIu64 "\n",
888 r = jbd_block_get(jbd_fs, &journal_block, *this_block);
892 /* We need special treatment for ext4 superblock. */
894 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
896 jbd_block_set(jbd_fs, &journal_block);
900 memcpy(ext4_block.data,
902 jbd_get32(&jbd_fs->sb, blocksize));
904 ext4_bcache_set_dirty(ext4_block.buf);
905 ext4_block_set(fs->bdev, &ext4_block);
907 uint16_t mount_count, state;
908 mount_count = ext4_get16(&fs->sb, mount_count);
909 state = ext4_get16(&fs->sb, state);
912 journal_block.data + EXT4_SUPERBLOCK_OFFSET,
913 EXT4_SUPERBLOCK_SIZE);
915 /* Mark system as mounted */
916 ext4_set16(&fs->sb, state, state);
917 r = ext4_sb_write(fs->bdev, &fs->sb);
921 /*Update mount count*/
922 ext4_set16(&fs->sb, mount_count, mount_count);
925 jbd_block_set(jbd_fs, &journal_block);
930 /**@brief Add block address to revoke tree, along with
931 * its transaction id.
932 * @param info journal replay info
933 * @param block block address to be replayed.*/
934 static void jbd_add_revoke_block_tags(struct recover_info *info,
937 struct revoke_entry *revoke_entry;
939 ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
940 /* If the revoke entry with respect to the block address
941 * exists already, update its transaction id.*/
942 revoke_entry = jbd_revoke_entry_lookup(info, block);
944 revoke_entry->trans_id = info->this_trans_id;
948 revoke_entry = jbd_alloc_revoke_entry();
949 ext4_assert(revoke_entry);
950 revoke_entry->block = block;
951 revoke_entry->trans_id = info->this_trans_id;
952 RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
957 static void jbd_destroy_revoke_tree(struct recover_info *info)
959 while (!RB_EMPTY(&info->revoke_root)) {
960 struct revoke_entry *revoke_entry =
961 RB_MIN(jbd_revoke, &info->revoke_root);
962 ext4_assert(revoke_entry);
963 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
964 jbd_free_revoke_entry(revoke_entry);
969 #define ACTION_SCAN 0
970 #define ACTION_REVOKE 1
971 #define ACTION_RECOVER 2
973 /**@brief Add entries in a revoke block to revoke tree.
974 * @param jbd_fs jbd filesystem
975 * @param header revoke block header
976 * @param recover_info journal replay info*/
977 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
978 struct jbd_bhdr *header,
979 struct recover_info *info)
982 struct jbd_revoke_header *revoke_hdr =
983 (struct jbd_revoke_header *)header;
984 uint32_t i, nr_entries, record_len = 4;
986 /* If we are working on a 64bit jbd filesystem, */
987 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
988 JBD_FEATURE_INCOMPAT_64BIT))
991 nr_entries = (jbd_get32(revoke_hdr, count) -
992 sizeof(struct jbd_revoke_header)) /
995 blocks_entry = (char *)(revoke_hdr + 1);
997 for (i = 0;i < nr_entries;i++) {
998 if (record_len == 8) {
1000 (uint64_t *)blocks_entry;
1001 jbd_add_revoke_block_tags(info, to_be64(*blocks));
1004 (uint32_t *)blocks_entry;
1005 jbd_add_revoke_block_tags(info, to_be32(*blocks));
1007 blocks_entry += record_len;
1011 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
1012 struct jbd_bhdr *header,
1015 jbd_iterate_block_table(jbd_fs,
1017 jbd_get32(&jbd_fs->sb, blocksize) -
1018 sizeof(struct jbd_bhdr),
1019 jbd_display_block_tags,
1023 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
1024 struct jbd_bhdr *header,
1025 struct replay_arg *arg)
1027 jbd_iterate_block_table(jbd_fs,
1029 jbd_get32(&jbd_fs->sb, blocksize) -
1030 sizeof(struct jbd_bhdr),
1031 jbd_replay_block_tags,
1035 /**@brief The core routine of journal replay.
1036 * @param jbd_fs jbd filesystem
1037 * @param recover_info journal replay info
1038 * @param action action needed to be taken
1039 * @return standard error code*/
1040 static int jbd_iterate_log(struct jbd_fs *jbd_fs,
1041 struct recover_info *info,
1045 bool log_end = false;
1046 struct jbd_sb *sb = &jbd_fs->sb;
1047 uint32_t start_trans_id, this_trans_id;
1048 uint32_t start_block, this_block;
1050 /* We start iterating valid blocks in the whole journal.*/
1051 start_trans_id = this_trans_id = jbd_get32(sb, sequence);
1052 start_block = this_block = jbd_get32(sb, start);
1053 if (action == ACTION_SCAN)
1054 info->trans_cnt = 0;
1055 else if (!info->trans_cnt)
1058 ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
1062 struct ext4_block block;
1063 struct jbd_bhdr *header;
1064 /* If we are not scanning for the last
1065 * valid transaction in the journal,
1066 * we will stop when we reach the end of
1068 if (action != ACTION_SCAN)
1069 if (this_trans_id > info->last_trans_id) {
1074 r = jbd_block_get(jbd_fs, &block, this_block);
1078 header = (struct jbd_bhdr *)block.data;
1079 /* This block does not have a valid magic number,
1080 * so we have reached the end of the journal.*/
1081 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
1082 jbd_block_set(jbd_fs, &block);
1087 /* If the transaction id we found is not expected,
1088 * we may have reached the end of the journal.
1090 * If we are not scanning the journal, something
1091 * bad might have taken place. :-( */
1092 if (jbd_get32(header, sequence) != this_trans_id) {
1093 if (action != ACTION_SCAN)
1096 jbd_block_set(jbd_fs, &block);
1101 switch (jbd_get32(header, blocktype)) {
1102 case JBD_DESCRIPTOR_BLOCK:
1103 if (!jbd_verify_meta_csum(jbd_fs, header)) {
1105 DBG_WARN "Descriptor block checksum failed."
1106 "Journal block: %" PRIu32"\n",
1111 ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
1112 "trans_id: %" PRIu32"\n",
1113 this_block, this_trans_id);
1114 if (action == ACTION_RECOVER) {
1115 struct replay_arg replay_arg;
1116 replay_arg.info = info;
1117 replay_arg.this_block = &this_block;
1118 replay_arg.this_trans_id = this_trans_id;
1120 jbd_replay_descriptor_block(jbd_fs,
1121 header, &replay_arg);
1123 jbd_debug_descriptor_block(jbd_fs,
1124 header, &this_block);
1127 case JBD_COMMIT_BLOCK:
1128 if (!jbd_verify_commit_csum(jbd_fs,
1129 (struct jbd_commit_header *)header)) {
1131 DBG_WARN "Commit block checksum failed."
1132 "Journal block: %" PRIu32"\n",
1137 ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
1138 "trans_id: %" PRIu32"\n",
1139 this_block, this_trans_id);
1140 /* This is the end of a transaction,
1141 * we may now proceed to the next transaction.
1146 case JBD_REVOKE_BLOCK:
1147 if (!jbd_verify_meta_csum(jbd_fs, header)) {
1149 DBG_WARN "Revoke block checksum failed."
1150 "Journal block: %" PRIu32"\n",
1155 ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
1156 "trans_id: %" PRIu32"\n",
1157 this_block, this_trans_id);
1158 if (action == ACTION_REVOKE) {
1159 info->this_trans_id = this_trans_id;
1160 jbd_build_revoke_tree(jbd_fs,
1168 jbd_block_set(jbd_fs, &block);
1170 wrap(sb, this_block);
1171 if (this_block == start_block)
1175 ext4_dbg(DEBUG_JBD, "End of journal.\n");
1176 if (r == EOK && action == ACTION_SCAN) {
1177 /* We have finished scanning the journal. */
1178 info->start_trans_id = start_trans_id;
1179 if (this_trans_id > start_trans_id)
1180 info->last_trans_id = this_trans_id - 1;
1182 info->last_trans_id = this_trans_id;
1188 /**@brief Replay journal.
1189 * @param jbd_fs jbd filesystem
1190 * @return standard error code*/
1191 int jbd_recover(struct jbd_fs *jbd_fs)
1194 struct recover_info info;
1195 struct jbd_sb *sb = &jbd_fs->sb;
1199 RB_INIT(&info.revoke_root);
1201 r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
1205 r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
1209 r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
1211 /* If we successfully replay the journal,
1212 * clear EXT4_FINCOM_RECOVER flag on the
1213 * ext4 superblock, and set the start of
1215 uint32_t features_incompatible =
1216 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1217 features_incompatible);
1218 jbd_set32(&jbd_fs->sb, start, 0);
1219 features_incompatible &= ~EXT4_FINCOM_RECOVER;
1220 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1221 features_incompatible,
1222 features_incompatible);
1223 jbd_fs->dirty = true;
1224 r = ext4_sb_write(jbd_fs->bdev,
1225 &jbd_fs->inode_ref.fs->sb);
1227 jbd_destroy_revoke_tree(&info);
1231 static void jbd_journal_write_sb(struct jbd_journal *journal)
1233 struct jbd_fs *jbd_fs = journal->jbd_fs;
1234 jbd_set32(&jbd_fs->sb, start, journal->start);
1235 jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
1236 jbd_fs->dirty = true;
1239 /**@brief Start accessing the journal.
1240 * @param jbd_fs jbd filesystem
1241 * @param journal current journal session
1242 * @return standard error code*/
1243 int jbd_journal_start(struct jbd_fs *jbd_fs,
1244 struct jbd_journal *journal)
1247 uint32_t features_incompatible =
1248 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1249 features_incompatible);
1250 struct ext4_block block = EXT4_BLOCK_ZERO();
1251 features_incompatible |= EXT4_FINCOM_RECOVER;
1252 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1253 features_incompatible,
1254 features_incompatible);
1255 r = ext4_sb_write(jbd_fs->bdev,
1256 &jbd_fs->inode_ref.fs->sb);
1260 journal->first = jbd_get32(&jbd_fs->sb, first);
1261 journal->start = journal->first;
1262 journal->last = journal->first;
1263 journal->trans_id = 1;
1264 journal->alloc_trans_id = 1;
1266 journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
1268 r = jbd_block_get_noread(jbd_fs,
1272 memset(journal, 0, sizeof(struct jbd_journal));
1275 memset(block.data, 0, journal->block_size);
1276 ext4_bcache_set_dirty(block.buf);
1277 r = jbd_block_set(jbd_fs, &block);
1279 memset(journal, 0, sizeof(struct jbd_journal));
1283 TAILQ_INIT(&journal->cp_queue);
1284 RB_INIT(&journal->block_rec_root);
1285 journal->jbd_fs = jbd_fs;
1286 jbd_journal_write_sb(journal);
1287 r = jbd_write_sb(jbd_fs);
1291 jbd_fs->bdev->journal = journal;
1295 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1296 struct ext4_buf *buf __unused,
1301 * This routine is only suitable to committed transactions. */
1302 static void jbd_journal_flush_trans(struct jbd_trans *trans)
1304 struct jbd_buf *jbd_buf, *tmp;
1305 struct jbd_journal *journal = trans->journal;
1306 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1307 void *tmp_data = malloc(journal->block_size);
1308 ext4_assert(tmp_data);
1310 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1312 struct ext4_buf *buf;
1313 struct ext4_block block;
1314 /* The buffer is not yet flushed. */
1315 buf = ext4_bcache_find_get(fs->bdev->bc, &block,
1316 jbd_buf->block_rec->lba);
1317 if (!(buf && ext4_bcache_test_flag(buf, BC_UPTODATE) &&
1318 jbd_buf->block_rec->trans == trans)) {
1320 struct ext4_block jbd_block = EXT4_BLOCK_ZERO();
1321 ext4_assert(jbd_block_get(journal->jbd_fs,
1323 jbd_buf->jbd_lba) == EOK);
1324 memcpy(tmp_data, jbd_block.data,
1325 journal->block_size);
1326 ext4_block_set(fs->bdev, &jbd_block);
1327 r = ext4_blocks_set_direct(fs->bdev, tmp_data,
1328 jbd_buf->block_rec->lba, 1);
1329 jbd_trans_end_write(fs->bdev->bc, buf, r, jbd_buf);
1331 ext4_block_flush_buf(fs->bdev, buf);
1334 ext4_block_set(fs->bdev, &block);
1341 jbd_journal_skip_pure_revoke(struct jbd_journal *journal,
1342 struct jbd_trans *trans)
1344 journal->start = trans->start_iblock +
1345 trans->alloc_blocks;
1346 wrap(&journal->jbd_fs->sb, journal->start);
1347 journal->trans_id = trans->trans_id + 1;
1348 jbd_journal_free_trans(journal,
1350 jbd_journal_write_sb(journal);
1354 jbd_journal_purge_cp_trans(struct jbd_journal *journal,
1358 struct jbd_trans *trans;
1359 while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1360 if (!trans->data_cnt) {
1361 TAILQ_REMOVE(&journal->cp_queue,
1364 jbd_journal_skip_pure_revoke(journal, trans);
1366 if (trans->data_cnt ==
1367 trans->written_cnt) {
1369 trans->start_iblock +
1370 trans->alloc_blocks;
1371 wrap(&journal->jbd_fs->sb,
1374 trans->trans_id + 1;
1375 TAILQ_REMOVE(&journal->cp_queue,
1378 jbd_journal_free_trans(journal,
1381 jbd_journal_write_sb(journal);
1382 } else if (!flush) {
1384 trans->start_iblock;
1385 wrap(&journal->jbd_fs->sb,
1389 jbd_journal_write_sb(journal);
1392 jbd_journal_flush_trans(trans);
1399 /**@brief Stop accessing the journal.
1400 * @param journal current journal session
1401 * @return standard error code*/
1402 int jbd_journal_stop(struct jbd_journal *journal)
1405 struct jbd_fs *jbd_fs = journal->jbd_fs;
1406 uint32_t features_incompatible;
1408 /* Make sure that journalled content have reached
1410 jbd_journal_purge_cp_trans(journal, true, false);
1412 /* There should be no block record in this journal
1414 if (!RB_EMPTY(&journal->block_rec_root))
1416 DBG_WARN "There are still block records "
1417 "in this journal session!\n");
1419 features_incompatible =
1420 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1421 features_incompatible);
1422 features_incompatible &= ~EXT4_FINCOM_RECOVER;
1423 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1424 features_incompatible,
1425 features_incompatible);
1426 r = ext4_sb_write(jbd_fs->bdev,
1427 &jbd_fs->inode_ref.fs->sb);
1432 journal->trans_id = 0;
1433 jbd_journal_write_sb(journal);
1434 return jbd_write_sb(journal->jbd_fs);
1437 /**@brief Allocate a block in the journal.
1438 * @param journal current journal session
1439 * @param trans transaction
1440 * @return allocated block address*/
1441 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
1442 struct jbd_trans *trans)
1444 uint32_t start_block;
1446 start_block = journal->last++;
1447 trans->alloc_blocks++;
1448 wrap(&journal->jbd_fs->sb, journal->last);
1450 /* If there is no space left, flush all journalled
1451 * blocks to disk first.*/
1452 if (journal->last == journal->start)
1453 jbd_journal_purge_cp_trans(journal, true, false);
1458 static struct jbd_block_rec *
1459 jbd_trans_block_rec_lookup(struct jbd_journal *journal,
1462 struct jbd_block_rec tmp = {
1466 return RB_FIND(jbd_block,
1467 &journal->block_rec_root,
1472 jbd_trans_change_ownership(struct jbd_block_rec *block_rec,
1473 struct jbd_trans *new_trans)
1475 LIST_REMOVE(block_rec, tbrec_node);
1477 /* Now this block record belongs to this transaction. */
1478 LIST_INSERT_HEAD(&new_trans->tbrec_list, block_rec, tbrec_node);
1480 block_rec->trans = new_trans;
1483 static inline struct jbd_block_rec *
1484 jbd_trans_insert_block_rec(struct jbd_trans *trans,
1487 struct jbd_block_rec *block_rec;
1488 block_rec = jbd_trans_block_rec_lookup(trans->journal, lba);
1490 jbd_trans_change_ownership(block_rec, trans);
1493 block_rec = calloc(1, sizeof(struct jbd_block_rec));
1497 block_rec->lba = lba;
1498 block_rec->trans = trans;
1499 TAILQ_INIT(&block_rec->dirty_buf_queue);
1500 LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);
1501 RB_INSERT(jbd_block, &trans->journal->block_rec_root, block_rec);
1506 * This routine will do the dirty works.
1509 jbd_trans_finish_callback(struct jbd_journal *journal,
1510 const struct jbd_trans *trans,
1511 struct jbd_block_rec *block_rec,
1515 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1516 if (block_rec->trans != trans)
1520 struct jbd_buf *jbd_buf, *tmp;
1521 TAILQ_FOREACH_SAFE(jbd_buf,
1522 &block_rec->dirty_buf_queue,
1525 jbd_trans_end_write(fs->bdev->bc,
1532 * We have to roll back data if the block is going to be
1535 struct jbd_buf *jbd_buf;
1536 struct ext4_block jbd_block = EXT4_BLOCK_ZERO(),
1537 block = EXT4_BLOCK_ZERO();
1538 jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,
1542 ext4_assert(ext4_block_get_noread(fs->bdev,
1544 block_rec->lba) == EOK);
1545 ext4_assert(jbd_block_get(journal->jbd_fs,
1547 jbd_buf->jbd_lba) == EOK);
1548 memcpy(block.data, jbd_block.data,
1549 journal->block_size);
1551 jbd_trans_change_ownership(block_rec,
1554 block.buf->end_write = jbd_trans_end_write;
1555 block.buf->end_write_arg = jbd_buf;
1557 ext4_bcache_set_flag(jbd_block.buf, BC_TMP);
1558 ext4_bcache_set_dirty(block.buf);
1560 ext4_block_set(fs->bdev, &jbd_block);
1561 ext4_block_set(fs->bdev, &block);
1564 /* The revoked buffer is yet written. */
1565 jbd_trans_change_ownership(block_rec,
1573 jbd_trans_remove_block_rec(struct jbd_journal *journal,
1574 struct jbd_block_rec *block_rec,
1575 struct jbd_trans *trans)
1577 /* If this block record doesn't belong to this transaction,
1579 if (block_rec->trans == trans) {
1580 LIST_REMOVE(block_rec, tbrec_node);
1581 RB_REMOVE(jbd_block,
1582 &journal->block_rec_root,
1588 /**@brief Add block to a transaction and mark it dirty.
1589 * @param trans transaction
1590 * @param block block descriptor
1591 * @return standard error code*/
1592 int jbd_trans_set_block_dirty(struct jbd_trans *trans,
1593 struct ext4_block *block)
1595 struct jbd_buf *jbd_buf;
1596 struct jbd_revoke_rec *rec, tmp_rec = {
1599 struct jbd_block_rec *block_rec;
1601 if (block->buf->end_write == jbd_trans_end_write) {
1602 jbd_buf = block->buf->end_write_arg;
1603 if (jbd_buf && jbd_buf->trans == trans)
1606 jbd_buf = calloc(1, sizeof(struct jbd_buf));
1610 if ((block_rec = jbd_trans_insert_block_rec(trans,
1611 block->lb_id)) == NULL) {
1616 TAILQ_INSERT_TAIL(&block_rec->dirty_buf_queue,
1620 jbd_buf->block_rec = block_rec;
1621 jbd_buf->trans = trans;
1622 jbd_buf->block = *block;
1623 ext4_bcache_inc_ref(block->buf);
1625 /* If the content reach the disk, notify us
1626 * so that we may do a checkpoint. */
1627 block->buf->end_write = jbd_trans_end_write;
1628 block->buf->end_write_arg = jbd_buf;
1631 TAILQ_INSERT_HEAD(&trans->buf_queue, jbd_buf, buf_node);
1633 ext4_bcache_set_dirty(block->buf);
1634 rec = RB_FIND(jbd_revoke_tree,
1635 &trans->revoke_root,
1638 RB_REMOVE(jbd_revoke_tree, &trans->revoke_root,
1644 /**@brief Add block to be revoked to a transaction
1645 * @param trans transaction
1646 * @param lba logical block address
1647 * @return standard error code*/
1648 int jbd_trans_revoke_block(struct jbd_trans *trans,
1651 struct jbd_revoke_rec *rec =
1652 calloc(1, sizeof(struct jbd_revoke_rec));
1657 RB_INSERT(jbd_revoke_tree, &trans->revoke_root, rec);
1661 /**@brief Try to add block to be revoked to a transaction.
1662 * If @lba still remains in an transaction on checkpoint
1663 * queue, add @lba as a revoked block to the transaction.
1664 * @param trans transaction
1665 * @param lba logical block address
1666 * @return standard error code*/
1667 int jbd_trans_try_revoke_block(struct jbd_trans *trans,
1670 struct jbd_journal *journal = trans->journal;
1671 struct jbd_block_rec *block_rec =
1672 jbd_trans_block_rec_lookup(journal, lba);
1675 if (block_rec->trans == trans) {
1676 struct jbd_buf *jbd_buf =
1677 TAILQ_LAST(&block_rec->dirty_buf_queue,
1679 /* If there are still unwritten buffers. */
1680 if (TAILQ_FIRST(&block_rec->dirty_buf_queue) !=
1682 jbd_trans_revoke_block(trans, lba);
1685 jbd_trans_revoke_block(trans, lba);
1691 /**@brief Free a transaction
1692 * @param journal current journal session
1693 * @param trans transaction
1694 * @param abort discard all the modifications on the block?
1695 * @return standard error code*/
1696 void jbd_journal_free_trans(struct jbd_journal *journal,
1697 struct jbd_trans *trans,
1700 struct jbd_buf *jbd_buf, *tmp;
1701 struct jbd_revoke_rec *rec, *tmp2;
1702 struct jbd_block_rec *block_rec, *tmp3;
1703 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1704 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1706 block_rec = jbd_buf->block_rec;
1708 jbd_buf->block.buf->end_write = NULL;
1709 jbd_buf->block.buf->end_write_arg = NULL;
1710 ext4_bcache_clear_dirty(jbd_buf->block.buf);
1711 ext4_block_set(fs->bdev, &jbd_buf->block);
1714 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1717 jbd_trans_finish_callback(journal,
1722 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1725 RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
1727 RB_REMOVE(jbd_revoke_tree, &trans->revoke_root, rec);
1730 LIST_FOREACH_SAFE(block_rec, &trans->tbrec_list, tbrec_node,
1732 jbd_trans_remove_block_rec(journal, block_rec, trans);
1738 /**@brief Write commit block for a transaction
1739 * @param trans transaction
1740 * @return standard error code*/
1741 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
1744 struct ext4_block block;
1745 struct jbd_commit_header *header;
1746 uint32_t commit_iblock, orig_commit_iblock;
1747 struct jbd_journal *journal = trans->journal;
1749 commit_iblock = jbd_journal_alloc_block(journal, trans);
1750 rc = jbd_block_get_noread(journal->jbd_fs, &block, commit_iblock);
1754 header = (struct jbd_commit_header *)block.data;
1755 jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
1756 jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
1757 jbd_set32(&header->header, sequence, trans->trans_id);
1759 if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1760 JBD_FEATURE_COMPAT_CHECKSUM)) {
1761 jbd_set32(header, chksum_type, JBD_CRC32_CHKSUM);
1762 jbd_set32(header, chksum_size, JBD_CRC32_CHKSUM_SIZE);
1763 jbd_set32(header, chksum[0], trans->data_csum);
1765 jbd_commit_csum_set(journal->jbd_fs, header);
1766 ext4_bcache_set_dirty(block.buf);
1767 ext4_bcache_set_flag(block.buf, BC_TMP);
1768 rc = jbd_block_set(journal->jbd_fs, &block);
1772 orig_commit_iblock = commit_iblock;
1774 wrap(&journal->jbd_fs->sb, commit_iblock);
1776 /* To prevent accidental reference to stale journalling metadata. */
1777 if (orig_commit_iblock < commit_iblock) {
1778 rc = jbd_block_get_noread(journal->jbd_fs, &block, commit_iblock);
1782 memset(block.data, 0, journal->block_size);
1783 ext4_bcache_set_dirty(block.buf);
1784 ext4_bcache_set_flag(block.buf, BC_TMP);
1785 rc = jbd_block_set(journal->jbd_fs, &block);
1791 /**@brief Write descriptor block for a transaction
1792 * @param journal current journal session
1793 * @param trans transaction
1794 * @return standard error code*/
1795 static int jbd_journal_prepare(struct jbd_journal *journal,
1796 struct jbd_trans *trans)
1798 int rc = EOK, i = 0;
1799 struct ext4_block desc_block = EXT4_BLOCK_ZERO(),
1800 data_block = EXT4_BLOCK_ZERO();
1801 int32_t tag_tbl_size = 0;
1802 uint32_t desc_iblock = 0;
1803 uint32_t data_iblock = 0;
1804 char *tag_start = NULL, *tag_ptr = NULL;
1805 struct jbd_buf *jbd_buf, *tmp;
1806 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1807 uint32_t checksum = EXT4_CRC32_INIT;
1808 struct jbd_bhdr *bhdr = NULL;
1811 /* Try to remove any non-dirty buffers from the tail of
1813 TAILQ_FOREACH_REVERSE_SAFE(jbd_buf, &trans->buf_queue,
1814 jbd_trans_buf, buf_node, tmp) {
1815 struct jbd_revoke_rec tmp_rec = {
1816 .lba = jbd_buf->block_rec->lba
1818 /* We stop the iteration when we find a dirty buffer. */
1819 if (ext4_bcache_test_flag(jbd_buf->block.buf,
1823 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1827 jbd_buf->block.buf->end_write = NULL;
1828 jbd_buf->block.buf->end_write_arg = NULL;
1829 jbd_trans_finish_callback(journal,
1833 RB_FIND(jbd_revoke_tree,
1834 &trans->revoke_root,
1836 jbd_trans_remove_block_rec(journal,
1837 jbd_buf->block_rec, trans);
1840 ext4_block_set(fs->bdev, &jbd_buf->block);
1841 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1845 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {
1846 struct tag_info tag_info;
1847 bool uuid_exist = false;
1848 struct jbd_revoke_rec tmp_rec = {
1849 .lba = jbd_buf->block_rec->lba
1851 if (!ext4_bcache_test_flag(jbd_buf->block.buf,
1853 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1857 jbd_buf->block.buf->end_write = NULL;
1858 jbd_buf->block.buf->end_write_arg = NULL;
1860 /* The buffer has not been modified, just release
1862 jbd_trans_finish_callback(journal,
1866 RB_FIND(jbd_revoke_tree,
1867 &trans->revoke_root,
1869 jbd_trans_remove_block_rec(journal,
1870 jbd_buf->block_rec, trans);
1873 ext4_block_set(fs->bdev, &jbd_buf->block);
1874 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1878 checksum = jbd_block_csum(journal->jbd_fs,
1879 jbd_buf->block.data,
1884 desc_iblock = jbd_journal_alloc_block(journal, trans);
1885 rc = jbd_block_get_noread(journal->jbd_fs, &desc_block, desc_iblock);
1889 bhdr = (struct jbd_bhdr *)desc_block.data;
1890 jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1891 jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
1892 jbd_set32(bhdr, sequence, trans->trans_id);
1894 tag_start = (char *)(bhdr + 1);
1895 tag_ptr = tag_start;
1897 tag_tbl_size = journal->block_size -
1898 sizeof(struct jbd_bhdr);
1900 if (jbd_has_csum(&journal->jbd_fs->sb))
1901 tag_tbl_size -= sizeof(struct jbd_block_tail);
1903 if (!trans->start_iblock)
1904 trans->start_iblock = desc_iblock;
1906 ext4_bcache_set_dirty(desc_block.buf);
1907 ext4_bcache_set_flag(desc_block.buf, BC_TMP);
1909 tag_info.block = jbd_buf->block.lb_id;
1910 tag_info.uuid_exist = uuid_exist;
1911 if (i == trans->data_cnt - 1)
1912 tag_info.last_tag = true;
1914 tag_info.last_tag = false;
1916 tag_info.checksum = checksum;
1919 memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
1922 rc = jbd_write_block_tag(journal->jbd_fs,
1927 jbd_meta_csum_set(journal->jbd_fs, bhdr);
1929 rc = jbd_block_set(journal->jbd_fs, &desc_block);
1936 data_iblock = jbd_journal_alloc_block(journal, trans);
1937 rc = jbd_block_get_noread(journal->jbd_fs, &data_block, data_iblock);
1940 ext4_bcache_clear_dirty(desc_block.buf);
1941 jbd_block_set(journal->jbd_fs, &desc_block);
1945 data = data_block.data;
1946 memcpy(data, jbd_buf->block.data,
1947 journal->block_size);
1948 ext4_bcache_set_dirty(data_block.buf);
1949 ext4_bcache_set_flag(data_block.buf, BC_TMP);
1950 rc = jbd_block_set(journal->jbd_fs, &data_block);
1953 ext4_bcache_clear_dirty(desc_block.buf);
1954 jbd_block_set(journal->jbd_fs, &desc_block);
1957 jbd_buf->jbd_lba = data_iblock;
1959 tag_ptr += tag_info.tag_bytes;
1960 tag_tbl_size -= tag_info.tag_bytes;
1964 if (rc == EOK && desc_iblock) {
1965 jbd_meta_csum_set(journal->jbd_fs,
1966 (struct jbd_bhdr *)bhdr);
1967 trans->data_csum = checksum;
1968 rc = jbd_block_set(journal->jbd_fs, &desc_block);
1974 /**@brief Write revoke block for a transaction
1975 * @param journal current journal session
1976 * @param trans transaction
1977 * @return standard error code*/
1979 jbd_journal_prepare_revoke(struct jbd_journal *journal,
1980 struct jbd_trans *trans)
1982 int rc = EOK, i = 0;
1983 struct ext4_block desc_block = EXT4_BLOCK_ZERO();
1984 int32_t tag_tbl_size = 0;
1985 uint32_t desc_iblock = 0;
1986 char *blocks_entry = NULL;
1987 struct jbd_revoke_rec *rec, *tmp;
1988 struct jbd_revoke_header *header = NULL;
1989 int32_t record_len = 4;
1990 struct jbd_bhdr *bhdr = NULL;
1992 if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1993 JBD_FEATURE_INCOMPAT_64BIT))
1996 RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
2000 desc_iblock = jbd_journal_alloc_block(journal, trans);
2001 rc = jbd_block_get_noread(journal->jbd_fs, &desc_block,
2006 bhdr = (struct jbd_bhdr *)desc_block.data;
2007 jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
2008 jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
2009 jbd_set32(bhdr, sequence, trans->trans_id);
2011 header = (struct jbd_revoke_header *)bhdr;
2012 blocks_entry = (char *)(header + 1);
2013 tag_tbl_size = journal->block_size -
2014 sizeof(struct jbd_revoke_header);
2016 if (jbd_has_csum(&journal->jbd_fs->sb))
2017 tag_tbl_size -= sizeof(struct jbd_block_tail);
2019 if (!trans->start_iblock)
2020 trans->start_iblock = desc_iblock;
2022 ext4_bcache_set_dirty(desc_block.buf);
2023 ext4_bcache_set_flag(desc_block.buf, BC_TMP);
2026 if (tag_tbl_size < record_len) {
2027 jbd_set32(header, count,
2028 journal->block_size - tag_tbl_size);
2029 jbd_meta_csum_set(journal->jbd_fs, bhdr);
2033 rc = jbd_block_set(journal->jbd_fs, &desc_block);
2039 if (record_len == 8) {
2041 (uint64_t *)blocks_entry;
2042 *blocks = to_be64(rec->lba);
2045 (uint32_t *)blocks_entry;
2046 *blocks = to_be32((uint32_t)rec->lba);
2048 blocks_entry += record_len;
2049 tag_tbl_size -= record_len;
2053 if (rc == EOK && desc_iblock) {
2055 jbd_set32(header, count,
2056 journal->block_size - tag_tbl_size);
2058 jbd_meta_csum_set(journal->jbd_fs, bhdr);
2059 rc = jbd_block_set(journal->jbd_fs, &desc_block);
2065 /**@brief Put references of block descriptors in a transaction.
2066 * @param journal current journal session
2067 * @param trans transaction*/
2068 void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
2070 struct jbd_buf *jbd_buf, *tmp;
2071 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
2072 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
2074 struct ext4_block block = jbd_buf->block;
2075 ext4_block_set(fs->bdev, &block);
2079 /**@brief Update the start block of the journal when
2080 * all the contents in a transaction reach the disk.*/
2081 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
2082 struct ext4_buf *buf,
2086 struct jbd_buf *jbd_buf = arg;
2087 struct jbd_trans *trans = jbd_buf->trans;
2088 struct jbd_block_rec *block_rec = jbd_buf->block_rec;
2089 struct jbd_journal *journal = trans->journal;
2090 bool first_in_queue =
2091 trans == TAILQ_FIRST(&journal->cp_queue);
2095 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
2096 TAILQ_REMOVE(&block_rec->dirty_buf_queue,
2100 jbd_trans_finish_callback(journal,
2105 if (block_rec->trans == trans && buf) {
2106 /* Clear the end_write and end_write_arg fields. */
2107 buf->end_write = NULL;
2108 buf->end_write_arg = NULL;
2113 trans->written_cnt++;
2114 if (trans->written_cnt == trans->data_cnt) {
2115 /* If it is the first transaction on checkpoint queue,
2116 * we will shift the start of the journal to the next
2117 * transaction, and remove subsequent written
2118 * transactions from checkpoint queue until we find
2119 * an unwritten one. */
2120 if (first_in_queue) {
2121 journal->start = trans->start_iblock +
2122 trans->alloc_blocks;
2123 wrap(&journal->jbd_fs->sb, journal->start);
2124 journal->trans_id = trans->trans_id + 1;
2125 TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
2126 jbd_journal_free_trans(journal, trans, false);
2128 jbd_journal_purge_cp_trans(journal, false, true);
2129 jbd_journal_write_sb(journal);
2130 jbd_write_sb(journal->jbd_fs);
2135 /**@brief Commit a transaction to the journal immediately.
2136 * @param journal current journal session
2137 * @param trans transaction
2138 * @return standard error code*/
2139 static int __jbd_journal_commit_trans(struct jbd_journal *journal,
2140 struct jbd_trans *trans)
2143 uint32_t last = journal->last;
2144 struct jbd_revoke_rec *rec, *tmp;
2146 trans->trans_id = journal->alloc_trans_id;
2147 rc = jbd_journal_prepare(journal, trans);
2151 rc = jbd_journal_prepare_revoke(journal, trans);
2155 if (TAILQ_EMPTY(&trans->buf_queue) &&
2156 RB_EMPTY(&trans->revoke_root)) {
2157 /* Since there are no entries in both buffer list
2158 * and revoke entry list, we do not consider trans as
2159 * complete transaction and just return EOK.*/
2160 jbd_journal_free_trans(journal, trans, false);
2164 rc = jbd_trans_write_commit_block(trans);
2168 journal->alloc_trans_id++;
2170 /* Complete the checkpoint of buffers which are revoked. */
2171 RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
2173 struct jbd_block_rec *block_rec =
2174 jbd_trans_block_rec_lookup(journal, rec->lba);
2175 struct jbd_buf *jbd_buf = NULL;
2177 jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,
2180 struct ext4_buf *buf;
2181 struct ext4_block block = EXT4_BLOCK_ZERO();
2183 * We do this to reset the ext4_buf::end_write and
2184 * ext4_buf::end_write_arg fields so that the checkpoint
2185 * callback won't be triggered again.
2187 buf = ext4_bcache_find_get(journal->jbd_fs->bdev->bc,
2189 jbd_buf->block_rec->lba);
2190 jbd_trans_end_write(journal->jbd_fs->bdev->bc,
2195 ext4_block_set(journal->jbd_fs->bdev, &block);
2199 if (TAILQ_EMPTY(&journal->cp_queue)) {
2201 * This transaction is going to be the first object in the
2203 * When the first transaction in checkpoint queue is completely
2204 * written to disk, we shift the tail of the log to right.
2206 if (trans->data_cnt) {
2207 journal->start = trans->start_iblock;
2208 wrap(&journal->jbd_fs->sb, journal->start);
2209 journal->trans_id = trans->trans_id;
2210 jbd_journal_write_sb(journal);
2211 jbd_write_sb(journal->jbd_fs);
2212 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
2214 jbd_journal_cp_trans(journal, trans);
2216 journal->start = trans->start_iblock +
2217 trans->alloc_blocks;
2218 wrap(&journal->jbd_fs->sb, journal->start);
2219 journal->trans_id = trans->trans_id + 1;
2220 jbd_journal_write_sb(journal);
2221 jbd_journal_free_trans(journal, trans, false);
2224 /* No need to do anything to the JBD superblock. */
2225 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
2227 if (trans->data_cnt)
2228 jbd_journal_cp_trans(journal, trans);
2231 if (rc != EOK && rc != ENOSPC) {
2232 journal->last = last;
2233 jbd_journal_free_trans(journal, trans, true);
2238 /**@brief Allocate a new transaction
2239 * @param journal current journal session
2240 * @return transaction allocated*/
2242 jbd_journal_new_trans(struct jbd_journal *journal)
2244 struct jbd_trans *trans = NULL;
2245 trans = calloc(1, sizeof(struct jbd_trans));
2249 /* We will assign a trans_id to this transaction,
2250 * once it has been committed.*/
2251 trans->journal = journal;
2252 trans->data_csum = EXT4_CRC32_INIT;
2254 TAILQ_INIT(&trans->buf_queue);
2258 /**@brief Commit a transaction to the journal immediately.
2259 * @param journal current journal session
2260 * @param trans transaction
2261 * @return standard error code*/
2262 int jbd_journal_commit_trans(struct jbd_journal *journal,
2263 struct jbd_trans *trans)
2266 r = __jbd_journal_commit_trans(journal, trans);