2 * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3 * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 /** @addtogroup lwext4
34 * @file ext4_journal.c
35 * @brief Journal handle functions
38 #include "ext4_config.h"
39 #include "ext4_types.h"
41 #include "ext4_super.h"
42 #include "ext4_journal.h"
43 #include "ext4_errno.h"
44 #include "ext4_blockdev.h"
45 #include "ext4_crc32.h"
46 #include "ext4_debug.h"
51 /**@brief Revoke entry during journal replay.*/
53 /**@brief Block number not to be replayed.*/
56 /**@brief For any transaction id smaller
57 * than trans_id, records of @block
58 * in those transactions should not
62 /**@brief Revoke tree node.*/
63 RB_ENTRY(revoke_entry) revoke_node;
66 /**@brief Valid journal replay information.*/
68 /**@brief Starting transaction id.*/
69 uint32_t start_trans_id;
71 /**@brief Ending transaction id.*/
72 uint32_t last_trans_id;
74 /**@brief Used as internal argument.*/
75 uint32_t this_trans_id;
77 /**@brief RB-Tree storing revoke entries.*/
78 RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
81 /**@brief Journal replay internal arguments.*/
83 /**@brief Journal replay information.*/
84 struct recover_info *info;
86 /**@brief Current block we are on.*/
89 /**@brief Current trans_id we are on.*/
90 uint32_t this_trans_id;
94 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
96 if (a->block > b->block)
98 else if (a->block < b->block)
104 jbd_block_rec_cmp(struct jbd_block_rec *a, struct jbd_block_rec *b)
108 else if (a->lba < b->lba)
113 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
114 jbd_revoke_entry_cmp, static inline)
115 RB_GENERATE_INTERNAL(jbd_block, jbd_block_rec, block_rec_node,
116 jbd_block_rec_cmp, static inline)
118 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
119 #define jbd_free_revoke_entry(addr) free(addr)
121 static int jbd_has_csum(struct jbd_sb *jbd_sb)
123 if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V2))
126 if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V3))
132 #if CONFIG_META_CSUM_ENABLE
133 static uint32_t jbd_sb_csum(struct jbd_sb *jbd_sb)
135 uint32_t checksum = 0;
137 if (jbd_has_csum(jbd_sb)) {
138 uint32_t orig_checksum = jbd_sb->checksum;
139 jbd_set32(jbd_sb, checksum, 0);
140 /* Calculate crc32c checksum against tho whole superblock */
141 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_sb,
142 JBD_SUPERBLOCK_SIZE);
143 jbd_sb->checksum = orig_checksum;
148 #define jbd_sb_csum(...) 0
151 static void jbd_sb_csum_set(struct jbd_sb *jbd_sb)
153 if (!jbd_has_csum(jbd_sb))
156 jbd_set32(jbd_sb, checksum, jbd_sb_csum(jbd_sb));
159 #if CONFIG_META_CSUM_ENABLE
161 jbd_verify_sb_csum(struct jbd_sb *jbd_sb)
163 if (!jbd_has_csum(jbd_sb))
166 return jbd_sb_csum(jbd_sb) == jbd_get32(jbd_sb, checksum);
169 #define jbd_verify_sb_csum(...) true
172 #if CONFIG_META_CSUM_ENABLE
173 static uint32_t jbd_meta_csum(struct jbd_fs *jbd_fs,
174 struct jbd_bhdr *bhdr)
176 uint32_t checksum = 0;
178 if (jbd_has_csum(&jbd_fs->sb)) {
179 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
180 struct jbd_block_tail *tail =
181 (struct jbd_block_tail *)((char *)bhdr + block_size -
182 sizeof(struct jbd_block_tail));
183 uint32_t orig_checksum = tail->checksum;
186 /* First calculate crc32c checksum against fs uuid */
187 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
188 sizeof(jbd_fs->sb.uuid));
189 /* Calculate crc32c checksum against tho whole block */
190 checksum = ext4_crc32c(checksum, bhdr,
192 tail->checksum = orig_checksum;
197 #define jbd_meta_csum(...) 0
200 static void jbd_meta_csum_set(struct jbd_fs *jbd_fs,
201 struct jbd_bhdr *bhdr)
203 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
204 struct jbd_block_tail *tail = (struct jbd_block_tail *)
205 ((char *)bhdr + block_size -
206 sizeof(struct jbd_block_tail));
207 if (!jbd_has_csum(&jbd_fs->sb))
210 tail->checksum = to_be32(jbd_meta_csum(jbd_fs, bhdr));
213 #if CONFIG_META_CSUM_ENABLE
215 jbd_verify_meta_csum(struct jbd_fs *jbd_fs,
216 struct jbd_bhdr *bhdr)
218 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
219 struct jbd_block_tail *tail = (struct jbd_block_tail *)
220 ((char *)bhdr + block_size -
221 sizeof(struct jbd_block_tail));
222 if (!jbd_has_csum(&jbd_fs->sb))
225 return jbd_meta_csum(jbd_fs, bhdr) == to_be32(tail->checksum);
228 #define jbd_verify_meta_csum(...) true
231 #if CONFIG_META_CSUM_ENABLE
232 static uint32_t jbd_commit_csum(struct jbd_fs *jbd_fs,
233 struct jbd_commit_header *header)
235 uint32_t checksum = 0;
237 if (jbd_has_csum(&jbd_fs->sb)) {
238 uint32_t orig_checksum_type = header->chksum_type,
239 orig_checksum_size = header->chksum_size,
240 orig_checksum = header->chksum[0];
241 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
242 header->chksum_type = 0;
243 header->chksum_size = 0;
244 header->chksum[0] = 0;
246 /* First calculate crc32c checksum against fs uuid */
247 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
248 sizeof(jbd_fs->sb.uuid));
249 /* Calculate crc32c checksum against tho whole block */
250 checksum = ext4_crc32c(checksum, header,
253 header->chksum_type = orig_checksum_type;
254 header->chksum_size = orig_checksum_size;
255 header->chksum[0] = orig_checksum;
260 #define jbd_commit_csum(...) 0
263 static void jbd_commit_csum_set(struct jbd_fs *jbd_fs,
264 struct jbd_commit_header *header)
266 if (!jbd_has_csum(&jbd_fs->sb))
269 header->chksum_type = 0;
270 header->chksum_size = 0;
271 header->chksum[0] = jbd_commit_csum(jbd_fs, header);
274 #if CONFIG_META_CSUM_ENABLE
275 static bool jbd_verify_commit_csum(struct jbd_fs *jbd_fs,
276 struct jbd_commit_header *header)
278 if (!jbd_has_csum(&jbd_fs->sb))
281 return header->chksum[0] == to_be32(jbd_commit_csum(jbd_fs,
285 #define jbd_verify_commit_csum(...) true
288 #if CONFIG_META_CSUM_ENABLE
289 static uint32_t jbd_block_csum(struct jbd_fs *jbd_fs, const void *buf)
291 uint32_t checksum = 0;
293 if (jbd_has_csum(&jbd_fs->sb)) {
294 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
295 /* First calculate crc32c checksum against fs uuid */
296 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
297 sizeof(jbd_fs->sb.uuid));
298 /* Calculate crc32c checksum against tho whole block */
299 checksum = ext4_crc32c(checksum, buf,
305 #define jbd_block_csum(...) 0
308 static void jbd_block_tag_csum_set(struct jbd_fs *jbd_fs, void *__tag,
311 int ver = jbd_has_csum(&jbd_fs->sb);
316 struct jbd_block_tag *tag = __tag;
317 tag->checksum = (uint16_t)to_be32(checksum);
319 struct jbd_block_tag3 *tag = __tag;
320 tag->checksum = to_be32(checksum);
324 /**@brief Write jbd superblock to disk.
325 * @param jbd_fs jbd filesystem
326 * @param s jbd superblock
327 * @return standard error code*/
328 static int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
331 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
334 rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
339 offset = fblock * ext4_sb_get_block_size(&fs->sb);
340 return ext4_block_writebytes(fs->bdev, offset, s,
341 EXT4_SUPERBLOCK_SIZE);
344 /**@brief Read jbd superblock from disk.
345 * @param jbd_fs jbd filesystem
346 * @param s jbd superblock
347 * @return standard error code*/
348 static int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
351 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
354 rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
358 offset = fblock * ext4_sb_get_block_size(&fs->sb);
359 return ext4_block_readbytes(fs->bdev, offset, s,
360 EXT4_SUPERBLOCK_SIZE);
363 /**@brief Verify jbd superblock.
364 * @param sb jbd superblock
365 * @return true if jbd superblock is valid */
366 static bool jbd_verify_sb(struct jbd_sb *sb)
368 struct jbd_bhdr *header = &sb->header;
369 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
372 if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
373 jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
376 return jbd_verify_sb_csum(sb);
379 /**@brief Write back dirty jbd superblock to disk.
380 * @param jbd_fs jbd filesystem
381 * @return standard error code*/
382 static int jbd_write_sb(struct jbd_fs *jbd_fs)
386 rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
390 jbd_fs->dirty = false;
395 /**@brief Get reference to jbd filesystem.
396 * @param fs Filesystem to load journal of
397 * @param jbd_fs jbd filesystem
398 * @return standard error code*/
399 int jbd_get_fs(struct ext4_fs *fs,
400 struct jbd_fs *jbd_fs)
403 uint32_t journal_ino;
405 memset(jbd_fs, 0, sizeof(struct jbd_fs));
406 /* See if there is journal inode on this filesystem.*/
407 /* FIXME: detection on existance ofbkejournal bdev is
409 journal_ino = ext4_get32(&fs->sb, journal_inode_number);
411 rc = ext4_fs_get_inode_ref(fs,
415 memset(jbd_fs, 0, sizeof(struct jbd_fs));
418 rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
420 memset(jbd_fs, 0, sizeof(struct jbd_fs));
421 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
424 if (!jbd_verify_sb(&jbd_fs->sb)) {
425 memset(jbd_fs, 0, sizeof(struct jbd_fs));
426 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
433 /**@brief Put reference of jbd filesystem.
434 * @param jbd_fs jbd filesystem
435 * @return standard error code*/
436 int jbd_put_fs(struct jbd_fs *jbd_fs)
439 rc = jbd_write_sb(jbd_fs);
441 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
445 /**@brief Data block lookup helper.
446 * @param jbd_fs jbd filesystem
447 * @param iblock block index
448 * @param fblock logical block address
449 * @return standard error code*/
450 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
452 ext4_fsblk_t *fblock)
454 int rc = ext4_fs_get_inode_dblk_idx(
462 /**@brief jbd block get function (through cache).
463 * @param jbd_fs jbd filesystem
464 * @param block block descriptor
465 * @param fblock jbd logical block address
466 * @return standard error code*/
467 static int jbd_block_get(struct jbd_fs *jbd_fs,
468 struct ext4_block *block,
471 /* TODO: journal device. */
473 ext4_lblk_t iblock = (ext4_lblk_t)fblock;
475 /* Lookup the logical block address of
477 rc = jbd_inode_bmap(jbd_fs, iblock,
482 struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
483 rc = ext4_block_get(bdev, block, fblock);
485 /* If succeeded, mark buffer as BC_FLUSH to indicate
486 * that data should be written to disk immediately.*/
488 ext4_bcache_set_flag(block->buf, BC_FLUSH);
489 /* As we don't want to occupy too much space
490 * in block cache, we set this buffer BC_TMP.*/
491 ext4_bcache_set_flag(block->buf, BC_TMP);
497 /**@brief jbd block get function (through cache, don't read).
498 * @param jbd_fs jbd filesystem
499 * @param block block descriptor
500 * @param fblock jbd logical block address
501 * @return standard error code*/
502 static int jbd_block_get_noread(struct jbd_fs *jbd_fs,
503 struct ext4_block *block,
506 /* TODO: journal device. */
508 ext4_lblk_t iblock = (ext4_lblk_t)fblock;
509 rc = jbd_inode_bmap(jbd_fs, iblock,
514 struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
515 rc = ext4_block_get_noread(bdev, block, fblock);
517 ext4_bcache_set_flag(block->buf, BC_FLUSH);
522 /**@brief jbd block set procedure (through cache).
523 * @param jbd_fs jbd filesystem
524 * @param block block descriptor
525 * @return standard error code*/
526 static int jbd_block_set(struct jbd_fs *jbd_fs,
527 struct ext4_block *block)
529 return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
533 /**@brief helper functions to calculate
534 * block tag size, not including UUID part.
535 * @param jbd_fs jbd filesystem
536 * @return tag size in bytes*/
537 static int jbd_tag_bytes(struct jbd_fs *jbd_fs)
541 /* It is very easy to deal with the case which
542 * JBD_FEATURE_INCOMPAT_CSUM_V3 is enabled.*/
543 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
544 JBD_FEATURE_INCOMPAT_CSUM_V3))
545 return sizeof(struct jbd_block_tag3);
547 size = sizeof(struct jbd_block_tag);
549 /* If JBD_FEATURE_INCOMPAT_CSUM_V2 is enabled,
550 * add 2 bytes to size.*/
551 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
552 JBD_FEATURE_INCOMPAT_CSUM_V2))
553 size += sizeof(uint16_t);
555 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
556 JBD_FEATURE_INCOMPAT_64BIT))
559 /* If block number is 4 bytes in size,
560 * minus 4 bytes from size */
561 return size - sizeof(uint32_t);
564 /**@brief Tag information. */
566 /**@brief Tag size in bytes, including UUID part.*/
569 /**@brief block number stored in this tag.*/
572 /**@brief whether UUID part exists or not.*/
575 /**@brief UUID content if UUID part exists.*/
576 uint8_t uuid[UUID_SIZE];
578 /**@brief Is this the last tag? */
581 /**@brief crc32c checksum. */
585 /**@brief Extract information from a block tag.
586 * @param __tag pointer to the block tag
587 * @param tag_bytes block tag size of this jbd filesystem
588 * @param remaining size in buffer containing the block tag
589 * @param tag_info information of this tag.
590 * @return EOK when succeed, otherwise return EINVAL.*/
592 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
595 int32_t remain_buf_size,
596 struct tag_info *tag_info)
599 tag_info->tag_bytes = tag_bytes;
600 tag_info->uuid_exist = false;
601 tag_info->last_tag = false;
603 /* See whether it is possible to hold a valid block tag.*/
604 if (remain_buf_size - tag_bytes < 0)
607 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
608 JBD_FEATURE_INCOMPAT_CSUM_V3)) {
609 struct jbd_block_tag3 *tag = __tag;
610 tag_info->block = jbd_get32(tag, blocknr);
611 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
612 JBD_FEATURE_INCOMPAT_64BIT))
614 (uint64_t)jbd_get32(tag, blocknr_high) << 32;
616 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
619 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
620 /* See whether it is possible to hold UUID part.*/
621 if (remain_buf_size - tag_bytes < UUID_SIZE)
624 uuid_start = (char *)tag + tag_bytes;
625 tag_info->uuid_exist = true;
626 tag_info->tag_bytes += UUID_SIZE;
627 memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
630 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
631 tag_info->last_tag = true;
634 struct jbd_block_tag *tag = __tag;
635 tag_info->block = jbd_get32(tag, blocknr);
636 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
637 JBD_FEATURE_INCOMPAT_64BIT))
639 (uint64_t)jbd_get32(tag, blocknr_high) << 32;
641 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
644 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
645 /* See whether it is possible to hold UUID part.*/
646 if (remain_buf_size - tag_bytes < UUID_SIZE)
649 uuid_start = (char *)tag + tag_bytes;
650 tag_info->uuid_exist = true;
651 tag_info->tag_bytes += UUID_SIZE;
652 memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
655 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
656 tag_info->last_tag = true;
662 /**@brief Write information to a block tag.
663 * @param __tag pointer to the block tag
664 * @param remaining size in buffer containing the block tag
665 * @param tag_info information of this tag.
666 * @return EOK when succeed, otherwise return EINVAL.*/
668 jbd_write_block_tag(struct jbd_fs *jbd_fs,
670 int32_t remain_buf_size,
671 struct tag_info *tag_info)
674 int tag_bytes = jbd_tag_bytes(jbd_fs);
676 tag_info->tag_bytes = tag_bytes;
678 /* See whether it is possible to hold a valid block tag.*/
679 if (remain_buf_size - tag_bytes < 0)
682 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
683 JBD_FEATURE_INCOMPAT_CSUM_V3)) {
684 struct jbd_block_tag3 *tag = __tag;
685 memset(tag, 0, sizeof(struct jbd_block_tag3));
686 jbd_set32(tag, blocknr, tag_info->block);
687 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
688 JBD_FEATURE_INCOMPAT_64BIT))
689 jbd_set32(tag, blocknr_high, tag_info->block >> 32);
691 if (tag_info->uuid_exist) {
692 /* See whether it is possible to hold UUID part.*/
693 if (remain_buf_size - tag_bytes < UUID_SIZE)
696 uuid_start = (char *)tag + tag_bytes;
697 tag_info->tag_bytes += UUID_SIZE;
698 memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
700 jbd_set32(tag, flags,
701 jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
703 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
705 if (tag_info->last_tag)
706 jbd_set32(tag, flags,
707 jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
710 struct jbd_block_tag *tag = __tag;
711 memset(tag, 0, sizeof(struct jbd_block_tag));
712 jbd_set32(tag, blocknr, tag_info->block);
713 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
714 JBD_FEATURE_INCOMPAT_64BIT))
715 jbd_set32(tag, blocknr_high, tag_info->block >> 32);
717 if (tag_info->uuid_exist) {
718 /* See whether it is possible to hold UUID part.*/
719 if (remain_buf_size - tag_bytes < UUID_SIZE)
722 uuid_start = (char *)tag + tag_bytes;
723 tag_info->tag_bytes += UUID_SIZE;
724 memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
726 jbd_set16(tag, flags,
727 jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
729 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
731 if (tag_info->last_tag)
732 jbd_set16(tag, flags,
733 jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
739 /**@brief Iterate all block tags in a block.
740 * @param jbd_fs jbd filesystem
741 * @param __tag_start pointer to the block
742 * @param tag_tbl_size size of the block
743 * @param func callback routine to indicate that
744 * a block tag is found
745 * @param arg additional argument to be passed to func */
747 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
749 int32_t tag_tbl_size,
750 void (*func)(struct jbd_fs * jbd_fs,
756 char *tag_start, *tag_ptr;
757 int tag_bytes = jbd_tag_bytes(jbd_fs);
758 tag_start = __tag_start;
761 /* Cut off the size of block tail storing checksum. */
762 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
763 JBD_FEATURE_INCOMPAT_CSUM_V2) ||
764 JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
765 JBD_FEATURE_INCOMPAT_CSUM_V3))
766 tag_tbl_size -= sizeof(struct jbd_block_tail);
768 while (tag_tbl_size) {
769 struct tag_info tag_info;
770 int rc = jbd_extract_block_tag(jbd_fs,
779 func(jbd_fs, tag_info.block, tag_info.uuid, arg);
781 /* Stop the iteration when we reach the last tag. */
782 if (tag_info.last_tag)
785 tag_ptr += tag_info.tag_bytes;
786 tag_tbl_size -= tag_info.tag_bytes;
790 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
795 uint32_t *iblock = arg;
796 ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
803 static struct revoke_entry *
804 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
806 struct revoke_entry tmp = {
810 return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
813 /**@brief Replay a block in a transaction.
814 * @param jbd_fs jbd filesystem
815 * @param block block address to be replayed.*/
816 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
818 uint8_t *uuid __unused,
822 struct replay_arg *arg = __arg;
823 struct recover_info *info = arg->info;
824 uint32_t *this_block = arg->this_block;
825 struct revoke_entry *revoke_entry;
826 struct ext4_block journal_block, ext4_block;
827 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
831 /* We replay this block only if the current transaction id
832 * is equal or greater than that in revoke entry.*/
833 revoke_entry = jbd_revoke_entry_lookup(info, block);
835 arg->this_trans_id < revoke_entry->trans_id)
839 "Replaying block in block_tag: %" PRIu64 "\n",
842 r = jbd_block_get(jbd_fs, &journal_block, *this_block);
846 /* We need special treatment for ext4 superblock. */
848 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
850 jbd_block_set(jbd_fs, &journal_block);
854 memcpy(ext4_block.data,
856 jbd_get32(&jbd_fs->sb, blocksize));
858 ext4_bcache_set_dirty(ext4_block.buf);
859 ext4_block_set(fs->bdev, &ext4_block);
861 uint16_t mount_count, state;
862 mount_count = ext4_get16(&fs->sb, mount_count);
863 state = ext4_get16(&fs->sb, state);
866 journal_block.data + EXT4_SUPERBLOCK_OFFSET,
867 EXT4_SUPERBLOCK_SIZE);
869 /* Mark system as mounted */
870 ext4_set16(&fs->sb, state, state);
871 r = ext4_sb_write(fs->bdev, &fs->sb);
875 /*Update mount count*/
876 ext4_set16(&fs->sb, mount_count, mount_count);
879 jbd_block_set(jbd_fs, &journal_block);
884 /**@brief Add block address to revoke tree, along with
885 * its transaction id.
886 * @param info journal replay info
887 * @param block block address to be replayed.*/
888 static void jbd_add_revoke_block_tags(struct recover_info *info,
891 struct revoke_entry *revoke_entry;
893 ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
894 /* If the revoke entry with respect to the block address
895 * exists already, update its transaction id.*/
896 revoke_entry = jbd_revoke_entry_lookup(info, block);
898 revoke_entry->trans_id = info->this_trans_id;
902 revoke_entry = jbd_alloc_revoke_entry();
903 ext4_assert(revoke_entry);
904 revoke_entry->block = block;
905 revoke_entry->trans_id = info->this_trans_id;
906 RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
911 static void jbd_destroy_revoke_tree(struct recover_info *info)
913 while (!RB_EMPTY(&info->revoke_root)) {
914 struct revoke_entry *revoke_entry =
915 RB_MIN(jbd_revoke, &info->revoke_root);
916 ext4_assert(revoke_entry);
917 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
918 jbd_free_revoke_entry(revoke_entry);
922 /* Make sure we wrap around the log correctly! */
923 #define wrap(sb, var) \
925 if (var >= jbd_get32((sb), maxlen)) \
926 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first)); \
929 #define ACTION_SCAN 0
930 #define ACTION_REVOKE 1
931 #define ACTION_RECOVER 2
933 /**@brief Add entries in a revoke block to revoke tree.
934 * @param jbd_fs jbd filesystem
935 * @param header revoke block header
936 * @param recover_info journal replay info*/
937 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
938 struct jbd_bhdr *header,
939 struct recover_info *info)
942 struct jbd_revoke_header *revoke_hdr =
943 (struct jbd_revoke_header *)header;
944 uint32_t i, nr_entries, record_len = 4;
946 /* If we are working on a 64bit jbd filesystem, */
947 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
948 JBD_FEATURE_INCOMPAT_64BIT))
951 nr_entries = (jbd_get32(revoke_hdr, count) -
952 sizeof(struct jbd_revoke_header)) /
955 blocks_entry = (char *)(revoke_hdr + 1);
957 for (i = 0;i < nr_entries;i++) {
958 if (record_len == 8) {
960 (uint64_t *)blocks_entry;
961 jbd_add_revoke_block_tags(info, to_be64(*blocks));
964 (uint32_t *)blocks_entry;
965 jbd_add_revoke_block_tags(info, to_be32(*blocks));
967 blocks_entry += record_len;
971 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
972 struct jbd_bhdr *header,
975 jbd_iterate_block_table(jbd_fs,
977 jbd_get32(&jbd_fs->sb, blocksize) -
978 sizeof(struct jbd_bhdr),
979 jbd_display_block_tags,
983 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
984 struct jbd_bhdr *header,
985 struct replay_arg *arg)
987 jbd_iterate_block_table(jbd_fs,
989 jbd_get32(&jbd_fs->sb, blocksize) -
990 sizeof(struct jbd_bhdr),
991 jbd_replay_block_tags,
995 /**@brief The core routine of journal replay.
996 * @param jbd_fs jbd filesystem
997 * @param recover_info journal replay info
998 * @param action action needed to be taken
999 * @return standard error code*/
1000 static int jbd_iterate_log(struct jbd_fs *jbd_fs,
1001 struct recover_info *info,
1005 bool log_end = false;
1006 struct jbd_sb *sb = &jbd_fs->sb;
1007 uint32_t start_trans_id, this_trans_id;
1008 uint32_t start_block, this_block;
1010 /* We start iterating valid blocks in the whole journal.*/
1011 start_trans_id = this_trans_id = jbd_get32(sb, sequence);
1012 start_block = this_block = jbd_get32(sb, start);
1014 ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
1018 struct ext4_block block;
1019 struct jbd_bhdr *header;
1020 /* If we are not scanning for the last
1021 * valid transaction in the journal,
1022 * we will stop when we reach the end of
1024 if (action != ACTION_SCAN)
1025 if (this_trans_id > info->last_trans_id) {
1030 r = jbd_block_get(jbd_fs, &block, this_block);
1034 header = (struct jbd_bhdr *)block.data;
1035 /* This block does not have a valid magic number,
1036 * so we have reached the end of the journal.*/
1037 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
1038 jbd_block_set(jbd_fs, &block);
1043 /* If the transaction id we found is not expected,
1044 * we may have reached the end of the journal.
1046 * If we are not scanning the journal, something
1047 * bad might have taken place. :-( */
1048 if (jbd_get32(header, sequence) != this_trans_id) {
1049 if (action != ACTION_SCAN)
1052 jbd_block_set(jbd_fs, &block);
1057 switch (jbd_get32(header, blocktype)) {
1058 case JBD_DESCRIPTOR_BLOCK:
1059 if (!jbd_verify_meta_csum(jbd_fs, header)) {
1061 DBG_WARN "Descriptor block checksum failed."
1062 "Journal block: %" PRIu32"\n",
1067 ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
1068 "trans_id: %" PRIu32"\n",
1069 this_block, this_trans_id);
1070 if (action == ACTION_RECOVER) {
1071 struct replay_arg replay_arg;
1072 replay_arg.info = info;
1073 replay_arg.this_block = &this_block;
1074 replay_arg.this_trans_id = this_trans_id;
1076 jbd_replay_descriptor_block(jbd_fs,
1077 header, &replay_arg);
1079 jbd_debug_descriptor_block(jbd_fs,
1080 header, &this_block);
1083 case JBD_COMMIT_BLOCK:
1084 if (!jbd_verify_commit_csum(jbd_fs,
1085 (struct jbd_commit_header *)header)) {
1087 DBG_WARN "Commit block checksum failed."
1088 "Journal block: %" PRIu32"\n",
1093 ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
1094 "trans_id: %" PRIu32"\n",
1095 this_block, this_trans_id);
1096 /* This is the end of a transaction,
1097 * we may now proceed to the next transaction.
1101 case JBD_REVOKE_BLOCK:
1102 if (!jbd_verify_meta_csum(jbd_fs, header)) {
1104 DBG_WARN "Revoke block checksum failed."
1105 "Journal block: %" PRIu32"\n",
1110 ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
1111 "trans_id: %" PRIu32"\n",
1112 this_block, this_trans_id);
1113 if (action == ACTION_REVOKE) {
1114 info->this_trans_id = this_trans_id;
1115 jbd_build_revoke_tree(jbd_fs,
1123 jbd_block_set(jbd_fs, &block);
1125 wrap(sb, this_block);
1126 if (this_block == start_block)
1130 ext4_dbg(DEBUG_JBD, "End of journal.\n");
1131 if (r == EOK && action == ACTION_SCAN) {
1132 /* We have finished scanning the journal. */
1133 info->start_trans_id = start_trans_id;
1134 if (this_trans_id > start_trans_id)
1135 info->last_trans_id = this_trans_id - 1;
1137 info->last_trans_id = this_trans_id;
1143 /**@brief Replay journal.
1144 * @param jbd_fs jbd filesystem
1145 * @return standard error code*/
1146 int jbd_recover(struct jbd_fs *jbd_fs)
1149 struct recover_info info;
1150 struct jbd_sb *sb = &jbd_fs->sb;
1154 RB_INIT(&info.revoke_root);
1156 r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
1160 r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
1164 r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
1166 /* If we successfully replay the journal,
1167 * clear EXT4_FINCOM_RECOVER flag on the
1168 * ext4 superblock, and set the start of
1170 uint32_t features_incompatible =
1171 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1172 features_incompatible);
1173 jbd_set32(&jbd_fs->sb, start, 0);
1174 features_incompatible &= ~EXT4_FINCOM_RECOVER;
1175 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1176 features_incompatible,
1177 features_incompatible);
1178 jbd_fs->dirty = true;
1179 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1180 &jbd_fs->inode_ref.fs->sb);
1182 jbd_destroy_revoke_tree(&info);
1186 static void jbd_journal_write_sb(struct jbd_journal *journal)
1188 struct jbd_fs *jbd_fs = journal->jbd_fs;
1189 jbd_set32(&jbd_fs->sb, start, journal->start);
1190 jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
1191 jbd_fs->dirty = true;
1194 /**@brief Start accessing the journal.
1195 * @param jbd_fs jbd filesystem
1196 * @param journal current journal session
1197 * @return standard error code*/
1198 int jbd_journal_start(struct jbd_fs *jbd_fs,
1199 struct jbd_journal *journal)
1202 uint32_t features_incompatible =
1203 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1204 features_incompatible);
1205 features_incompatible |= EXT4_FINCOM_RECOVER;
1206 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1207 features_incompatible,
1208 features_incompatible);
1209 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1210 &jbd_fs->inode_ref.fs->sb);
1214 journal->first = jbd_get32(&jbd_fs->sb, first);
1215 journal->start = journal->first;
1216 journal->last = journal->first;
1217 journal->trans_id = 1;
1218 journal->alloc_trans_id = 1;
1220 journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
1222 TAILQ_INIT(&journal->trans_queue);
1223 TAILQ_INIT(&journal->cp_queue);
1224 RB_INIT(&journal->block_rec_root);
1225 journal->jbd_fs = jbd_fs;
1226 jbd_journal_write_sb(journal);
1227 return jbd_write_sb(jbd_fs);
1230 static void jbd_journal_flush_trans(struct jbd_trans *trans)
1232 struct jbd_buf *jbd_buf, *tmp;
1233 struct jbd_journal *journal = trans->journal;
1234 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1235 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1237 struct ext4_block block = jbd_buf->block;
1238 ext4_block_flush_buf(fs->bdev, block.buf);
1243 jbd_journal_skip_pure_revoke(struct jbd_journal *journal,
1244 struct jbd_trans *trans)
1246 journal->start = trans->start_iblock +
1247 trans->alloc_blocks;
1248 wrap(&journal->jbd_fs->sb, journal->start);
1249 journal->trans_id = trans->trans_id + 1;
1250 jbd_journal_free_trans(journal,
1252 jbd_journal_write_sb(journal);
1256 jbd_journal_purge_cp_trans(struct jbd_journal *journal,
1259 struct jbd_trans *trans;
1260 while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1261 if (!trans->data_cnt) {
1262 TAILQ_REMOVE(&journal->cp_queue,
1265 jbd_journal_skip_pure_revoke(journal, trans);
1267 if (trans->data_cnt ==
1268 trans->written_cnt) {
1270 trans->start_iblock +
1271 trans->alloc_blocks;
1272 wrap(&journal->jbd_fs->sb,
1275 trans->trans_id + 1;
1276 TAILQ_REMOVE(&journal->cp_queue,
1279 jbd_journal_free_trans(journal,
1282 jbd_journal_write_sb(journal);
1283 } else if (!flush) {
1285 trans->start_iblock;
1286 wrap(&journal->jbd_fs->sb,
1290 jbd_journal_write_sb(journal);
1293 jbd_journal_flush_trans(trans);
1298 /**@brief Stop accessing the journal.
1299 * @param journal current journal session
1300 * @return standard error code*/
1301 int jbd_journal_stop(struct jbd_journal *journal)
1304 struct jbd_fs *jbd_fs = journal->jbd_fs;
1305 uint32_t features_incompatible;
1307 /* Commit all the transactions to the journal.*/
1308 jbd_journal_commit_all(journal);
1310 /* Make sure that journalled content have reached
1312 jbd_journal_purge_cp_trans(journal, true);
1314 /* There should be no block record in this journal
1316 if (!RB_EMPTY(&journal->block_rec_root))
1318 DBG_WARN "There are still block records "
1319 "in this journal session!\n");
1321 features_incompatible =
1322 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1323 features_incompatible);
1324 features_incompatible &= ~EXT4_FINCOM_RECOVER;
1325 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1326 features_incompatible,
1327 features_incompatible);
1328 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1329 &jbd_fs->inode_ref.fs->sb);
1334 journal->trans_id = 0;
1335 jbd_journal_write_sb(journal);
1336 return jbd_write_sb(journal->jbd_fs);
1339 /**@brief Allocate a block in the journal.
1340 * @param journal current journal session
1341 * @param trans transaction
1342 * @return allocated block address*/
1343 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
1344 struct jbd_trans *trans)
1346 uint32_t start_block;
1348 start_block = journal->last++;
1349 trans->alloc_blocks++;
1350 wrap(&journal->jbd_fs->sb, journal->last);
1352 /* If there is no space left, flush all journalled
1353 * blocks to disk first.*/
1354 if (journal->last == journal->start)
1355 jbd_journal_purge_cp_trans(journal, true);
1360 /**@brief Allocate a new transaction
1361 * @param journal current journal session
1362 * @return transaction allocated*/
1364 jbd_journal_new_trans(struct jbd_journal *journal)
1366 struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
1370 /* We will assign a trans_id to this transaction,
1371 * once it has been committed.*/
1372 trans->journal = journal;
1374 TAILQ_INIT(&trans->buf_queue);
1378 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1379 struct ext4_buf *buf __unused,
1383 /**@brief gain access to it before making any modications.
1384 * @param journal current journal session
1385 * @param trans transaction
1386 * @param block descriptor
1387 * @return standard error code.*/
1388 int jbd_trans_get_access(struct jbd_journal *journal,
1389 struct jbd_trans *trans,
1390 struct ext4_block *block)
1393 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1394 struct jbd_buf *jbd_buf = block->buf->end_write_arg;
1396 /* If the buffer has already been modified, we should
1397 * flush dirty data in this buffer to disk.*/
1398 if (ext4_bcache_test_flag(block->buf, BC_DIRTY) &&
1399 block->buf->end_write == jbd_trans_end_write) {
1400 ext4_assert(jbd_buf);
1401 if (jbd_buf->trans != trans)
1402 r = ext4_block_flush_buf(fs->bdev, block->buf);
1408 static struct jbd_block_rec *
1409 jbd_trans_block_rec_lookup(struct jbd_journal *journal,
1412 struct jbd_block_rec tmp = {
1416 return RB_FIND(jbd_block,
1417 &journal->block_rec_root,
1421 static inline struct jbd_block_rec *
1422 jbd_trans_insert_block_rec(struct jbd_trans *trans,
1424 struct ext4_buf *buf)
1426 struct jbd_block_rec *block_rec;
1427 block_rec = jbd_trans_block_rec_lookup(trans->journal, lba);
1429 LIST_REMOVE(block_rec, tbrec_node);
1430 /* Data should be flushed to disk already. */
1431 ext4_assert(!block_rec->buf);
1432 /* Now this block record belongs to this transaction. */
1433 LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);
1434 block_rec->trans = trans;
1437 block_rec = calloc(1, sizeof(struct jbd_block_rec));
1441 block_rec->lba = lba;
1442 block_rec->buf = buf;
1443 block_rec->trans = trans;
1444 LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);
1445 RB_INSERT(jbd_block, &trans->journal->block_rec_root, block_rec);
1450 jbd_trans_remove_block_rec(struct jbd_journal *journal,
1451 struct jbd_block_rec *block_rec,
1452 struct jbd_trans *trans)
1454 /* If this block record doesn't belong to this transaction,
1456 if (block_rec->trans == trans) {
1457 LIST_REMOVE(block_rec, tbrec_node);
1458 RB_REMOVE(jbd_block,
1459 &journal->block_rec_root,
1465 /**@brief Add block to a transaction and mark it dirty.
1466 * @param trans transaction
1467 * @param block block descriptor
1468 * @return standard error code*/
1469 int jbd_trans_set_block_dirty(struct jbd_trans *trans,
1470 struct ext4_block *block)
1472 struct jbd_buf *buf;
1474 if (!ext4_bcache_test_flag(block->buf, BC_DIRTY) &&
1475 block->buf->end_write != jbd_trans_end_write) {
1476 struct jbd_block_rec *block_rec;
1477 buf = calloc(1, sizeof(struct jbd_buf));
1481 if ((block_rec = jbd_trans_insert_block_rec(trans,
1483 block->buf)) == NULL) {
1488 buf->block_rec = block_rec;
1490 buf->block = *block;
1491 ext4_bcache_inc_ref(block->buf);
1493 /* If the content reach the disk, notify us
1494 * so that we may do a checkpoint. */
1495 block->buf->end_write = jbd_trans_end_write;
1496 block->buf->end_write_arg = buf;
1499 TAILQ_INSERT_HEAD(&trans->buf_queue, buf, buf_node);
1501 ext4_bcache_set_dirty(block->buf);
1506 /**@brief Add block to be revoked to a transaction
1507 * @param trans transaction
1508 * @param lba logical block address
1509 * @return standard error code*/
1510 int jbd_trans_revoke_block(struct jbd_trans *trans,
1513 struct jbd_revoke_rec *rec =
1514 calloc(1, sizeof(struct jbd_revoke_rec));
1519 LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
1523 /**@brief Try to add block to be revoked to a transaction.
1524 * If @lba still remains in an transaction on checkpoint
1525 * queue, add @lba as a revoked block to the transaction.
1526 * @param trans transaction
1527 * @param lba logical block address
1528 * @return standard error code*/
1529 int jbd_trans_try_revoke_block(struct jbd_trans *trans,
1533 struct jbd_journal *journal = trans->journal;
1534 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1535 struct jbd_block_rec *block_rec =
1536 jbd_trans_block_rec_lookup(journal, lba);
1538 /* Make sure we don't flush any buffers belong to this transaction. */
1539 if (block_rec && block_rec->trans != trans) {
1540 /* If the buffer has not been flushed yet, flush it now. */
1541 if (block_rec->buf) {
1542 r = ext4_block_flush_buf(fs->bdev, block_rec->buf);
1548 jbd_trans_revoke_block(trans, lba);
1554 /**@brief Free a transaction
1555 * @param journal current journal session
1556 * @param trans transaction
1557 * @param abort discard all the modifications on the block?
1558 * @return standard error code*/
1559 void jbd_journal_free_trans(struct jbd_journal *journal,
1560 struct jbd_trans *trans,
1563 struct jbd_buf *jbd_buf, *tmp;
1564 struct jbd_revoke_rec *rec, *tmp2;
1565 struct jbd_block_rec *block_rec, *tmp3;
1566 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1567 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1570 jbd_buf->block.buf->end_write = NULL;
1571 jbd_buf->block.buf->end_write_arg = NULL;
1572 ext4_bcache_clear_dirty(jbd_buf->block.buf);
1573 ext4_block_set(fs->bdev, &jbd_buf->block);
1576 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1579 LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1581 LIST_REMOVE(rec, revoke_node);
1584 LIST_FOREACH_SAFE(block_rec, &trans->tbrec_list, tbrec_node,
1586 jbd_trans_remove_block_rec(journal, block_rec, trans);
1592 /**@brief Write commit block for a transaction
1593 * @param trans transaction
1594 * @return standard error code*/
1595 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
1598 struct jbd_commit_header *header;
1599 uint32_t commit_iblock = 0;
1600 struct ext4_block commit_block;
1601 struct jbd_journal *journal = trans->journal;
1603 commit_iblock = jbd_journal_alloc_block(journal, trans);
1604 rc = jbd_block_get_noread(journal->jbd_fs,
1605 &commit_block, commit_iblock);
1609 header = (struct jbd_commit_header *)commit_block.data;
1610 jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
1611 jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
1612 jbd_set32(&header->header, sequence, trans->trans_id);
1614 jbd_commit_csum_set(journal->jbd_fs, header);
1615 ext4_bcache_set_dirty(commit_block.buf);
1616 rc = jbd_block_set(journal->jbd_fs, &commit_block);
1623 /**@brief Write descriptor block for a transaction
1624 * @param journal current journal session
1625 * @param trans transaction
1626 * @return standard error code*/
1627 static int jbd_journal_prepare(struct jbd_journal *journal,
1628 struct jbd_trans *trans)
1630 int rc = EOK, i = 0;
1631 int32_t tag_tbl_size;
1632 uint32_t desc_iblock = 0;
1633 uint32_t data_iblock = 0;
1634 char *tag_start = NULL, *tag_ptr = NULL;
1635 struct jbd_buf *jbd_buf, *tmp;
1636 struct ext4_block desc_block, data_block;
1637 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1639 /* Try to remove any non-dirty buffers from the tail of
1641 TAILQ_FOREACH_REVERSE_SAFE(jbd_buf, &trans->buf_queue,
1642 jbd_trans_buf, buf_node, tmp) {
1643 /* We stop the iteration when we find a dirty buffer. */
1644 if (ext4_bcache_test_flag(jbd_buf->block.buf,
1648 /* The buffer has not been modified, just release
1650 jbd_trans_remove_block_rec(journal,
1651 jbd_buf->block_rec, trans);
1654 jbd_buf->block.buf->end_write = NULL;
1655 jbd_buf->block.buf->end_write_arg = NULL;
1656 ext4_block_set(fs->bdev, &jbd_buf->block);
1657 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1661 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {
1662 struct tag_info tag_info;
1663 bool uuid_exist = false;
1665 if (!ext4_bcache_test_flag(jbd_buf->block.buf,
1667 /* The buffer has not been modified, just release
1669 jbd_trans_remove_block_rec(journal,
1670 jbd_buf->block_rec, trans);
1673 jbd_buf->block.buf->end_write = NULL;
1674 jbd_buf->block.buf->end_write_arg = NULL;
1675 ext4_block_set(fs->bdev, &jbd_buf->block);
1676 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1680 checksum = jbd_block_csum(journal->jbd_fs,
1681 jbd_buf->block.data);
1684 struct jbd_bhdr *bhdr;
1685 desc_iblock = jbd_journal_alloc_block(journal, trans);
1686 rc = jbd_block_get_noread(journal->jbd_fs,
1687 &desc_block, desc_iblock);
1691 ext4_bcache_set_dirty(desc_block.buf);
1693 bhdr = (struct jbd_bhdr *)desc_block.data;
1694 jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1695 jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
1696 jbd_set32(bhdr, sequence, trans->trans_id);
1698 tag_start = (char *)(bhdr + 1);
1699 tag_ptr = tag_start;
1701 tag_tbl_size = journal->block_size -
1702 sizeof(struct jbd_bhdr);
1704 if (jbd_has_csum(&journal->jbd_fs->sb))
1705 tag_tbl_size -= sizeof(struct jbd_block_tail);
1707 if (!trans->start_iblock)
1708 trans->start_iblock = desc_iblock;
1711 tag_info.block = jbd_buf->block.lb_id;
1712 tag_info.uuid_exist = uuid_exist;
1713 if (i == trans->data_cnt - 1)
1714 tag_info.last_tag = true;
1716 tag_info.last_tag = false;
1717 tag_info.checksum = checksum;
1720 memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
1723 rc = jbd_write_block_tag(journal->jbd_fs,
1728 jbd_meta_csum_set(journal->jbd_fs,
1729 (struct jbd_bhdr *)desc_block.data);
1730 jbd_block_set(journal->jbd_fs, &desc_block);
1735 data_iblock = jbd_journal_alloc_block(journal, trans);
1736 rc = jbd_block_get_noread(journal->jbd_fs,
1737 &data_block, data_iblock);
1741 ext4_bcache_set_dirty(data_block.buf);
1743 memcpy(data_block.data, jbd_buf->block.data,
1744 journal->block_size);
1746 rc = jbd_block_set(journal->jbd_fs, &data_block);
1750 tag_ptr += tag_info.tag_bytes;
1751 tag_tbl_size -= tag_info.tag_bytes;
1755 if (rc == EOK && desc_iblock) {
1756 jbd_meta_csum_set(journal->jbd_fs,
1757 (struct jbd_bhdr *)desc_block.data);
1758 jbd_block_set(journal->jbd_fs, &desc_block);
1764 /**@brief Write revoke block for a transaction
1765 * @param journal current journal session
1766 * @param trans transaction
1767 * @return standard error code*/
1769 jbd_journal_prepare_revoke(struct jbd_journal *journal,
1770 struct jbd_trans *trans)
1772 int rc = EOK, i = 0;
1773 int32_t tag_tbl_size;
1774 uint32_t desc_iblock = 0;
1775 char *blocks_entry = NULL;
1776 struct jbd_revoke_rec *rec, *tmp;
1777 struct ext4_block desc_block;
1778 struct jbd_revoke_header *header = NULL;
1779 int32_t record_len = 4;
1781 if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1782 JBD_FEATURE_INCOMPAT_64BIT))
1785 LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1789 struct jbd_bhdr *bhdr;
1790 desc_iblock = jbd_journal_alloc_block(journal, trans);
1791 rc = jbd_block_get_noread(journal->jbd_fs,
1792 &desc_block, desc_iblock);
1797 ext4_bcache_set_dirty(desc_block.buf);
1799 bhdr = (struct jbd_bhdr *)desc_block.data;
1800 jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1801 jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
1802 jbd_set32(bhdr, sequence, trans->trans_id);
1804 header = (struct jbd_revoke_header *)bhdr;
1805 blocks_entry = (char *)(header + 1);
1806 tag_tbl_size = journal->block_size -
1807 sizeof(struct jbd_revoke_header);
1809 if (jbd_has_csum(&journal->jbd_fs->sb))
1810 tag_tbl_size -= sizeof(struct jbd_block_tail);
1812 if (!trans->start_iblock)
1813 trans->start_iblock = desc_iblock;
1817 if (tag_tbl_size < record_len) {
1818 jbd_set32(header, count,
1819 journal->block_size - tag_tbl_size);
1820 jbd_meta_csum_set(journal->jbd_fs,
1821 (struct jbd_bhdr *)desc_block.data);
1822 jbd_block_set(journal->jbd_fs, &desc_block);
1827 if (record_len == 8) {
1829 (uint64_t *)blocks_entry;
1830 *blocks = to_be64(rec->lba);
1833 (uint32_t *)blocks_entry;
1834 *blocks = to_be32(rec->lba);
1836 blocks_entry += record_len;
1837 tag_tbl_size -= record_len;
1841 if (rc == EOK && desc_iblock) {
1843 jbd_set32(header, count,
1844 journal->block_size - tag_tbl_size);
1846 jbd_meta_csum_set(journal->jbd_fs,
1847 (struct jbd_bhdr *)desc_block.data);
1848 jbd_block_set(journal->jbd_fs, &desc_block);
1854 /**@brief Submit the transaction to transaction queue.
1855 * @param journal current journal session
1856 * @param trans transaction*/
1858 jbd_journal_submit_trans(struct jbd_journal *journal,
1859 struct jbd_trans *trans)
1861 TAILQ_INSERT_TAIL(&journal->trans_queue,
1866 /**@brief Put references of block descriptors in a transaction.
1867 * @param journal current journal session
1868 * @param trans transaction*/
1869 void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
1871 struct jbd_buf *jbd_buf, *tmp;
1872 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1873 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1875 struct ext4_block block = jbd_buf->block;
1876 ext4_block_set(fs->bdev, &block);
1880 /**@brief Update the start block of the journal when
1881 * all the contents in a transaction reach the disk.*/
1882 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1883 struct ext4_buf *buf,
1887 struct jbd_buf *jbd_buf = arg;
1888 struct jbd_trans *trans = jbd_buf->trans;
1889 struct jbd_journal *journal = trans->journal;
1890 bool first_in_queue =
1891 trans == TAILQ_FIRST(&journal->cp_queue);
1895 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1896 jbd_buf->block_rec->buf = NULL;
1899 /* Clear the end_write and end_write_arg fields. */
1900 buf->end_write = NULL;
1901 buf->end_write_arg = NULL;
1903 trans->written_cnt++;
1904 if (trans->written_cnt == trans->data_cnt) {
1905 /* If it is the first transaction on checkpoint queue,
1906 * we will shift the start of the journal to the next
1907 * transaction, and remove subsequent written
1908 * transactions from checkpoint queue until we find
1909 * an unwritten one. */
1910 if (first_in_queue) {
1911 journal->start = trans->start_iblock +
1912 trans->alloc_blocks;
1913 wrap(&journal->jbd_fs->sb, journal->start);
1914 journal->trans_id = trans->trans_id + 1;
1915 TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
1916 jbd_journal_free_trans(journal, trans, false);
1918 jbd_journal_purge_cp_trans(journal, false);
1919 jbd_journal_write_sb(journal);
1920 jbd_write_sb(journal->jbd_fs);
1925 /**@brief Commit a transaction to the journal immediately.
1926 * @param journal current journal session
1927 * @param trans transaction
1928 * @return standard error code*/
1929 int jbd_journal_commit_trans(struct jbd_journal *journal,
1930 struct jbd_trans *trans)
1933 uint32_t last = journal->last;
1935 trans->trans_id = journal->alloc_trans_id;
1936 rc = jbd_journal_prepare(journal, trans);
1940 rc = jbd_journal_prepare_revoke(journal, trans);
1944 if (TAILQ_EMPTY(&trans->buf_queue) &&
1945 LIST_EMPTY(&trans->revoke_list)) {
1946 /* Since there are no entries in both buffer list
1947 * and revoke entry list, we do not consider trans as
1948 * complete transaction and just return EOK.*/
1949 jbd_journal_free_trans(journal, trans, false);
1953 rc = jbd_trans_write_commit_block(trans);
1957 journal->alloc_trans_id++;
1958 if (TAILQ_EMPTY(&journal->cp_queue)) {
1959 if (trans->data_cnt) {
1960 journal->start = trans->start_iblock;
1961 wrap(&journal->jbd_fs->sb, journal->start);
1962 journal->trans_id = trans->trans_id;
1963 jbd_journal_write_sb(journal);
1964 jbd_write_sb(journal->jbd_fs);
1965 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
1967 jbd_journal_cp_trans(journal, trans);
1969 journal->start = trans->start_iblock +
1970 trans->alloc_blocks;
1971 wrap(&journal->jbd_fs->sb, journal->start);
1972 journal->trans_id = trans->trans_id + 1;
1973 jbd_journal_write_sb(journal);
1974 jbd_journal_free_trans(journal, trans, false);
1977 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
1979 if (trans->data_cnt)
1980 jbd_journal_cp_trans(journal, trans);
1985 journal->last = last;
1986 jbd_journal_free_trans(journal, trans, true);
1991 /**@brief Commit one transaction on transaction queue
1993 * @param journal current journal session.*/
1994 void jbd_journal_commit_one(struct jbd_journal *journal)
1996 struct jbd_trans *trans;
1998 if ((trans = TAILQ_FIRST(&journal->trans_queue))) {
1999 TAILQ_REMOVE(&journal->trans_queue, trans, trans_node);
2000 jbd_journal_commit_trans(journal, trans);
2004 /**@brief Commit all the transactions on transaction queue
2006 * @param journal current journal session.*/
2007 void jbd_journal_commit_all(struct jbd_journal *journal)
2009 while (!TAILQ_EMPTY(&journal->trans_queue)) {
2010 jbd_journal_commit_one(journal);