2 * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3 * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 /** @addtogroup lwext4
34 * @file ext4_journal.c
35 * @brief Journal handle functions
38 #include "ext4_config.h"
39 #include "ext4_types.h"
40 #include "ext4_misc.h"
41 #include "ext4_errno.h"
42 #include "ext4_debug.h"
45 #include "ext4_super.h"
46 #include "ext4_journal.h"
47 #include "ext4_blockdev.h"
48 #include "ext4_crc32.h"
49 #include "ext4_journal.h"
54 /**@brief Revoke entry during journal replay.*/
56 /**@brief Block number not to be replayed.*/
59 /**@brief For any transaction id smaller
60 * than trans_id, records of @block
61 * in those transactions should not
65 /**@brief Revoke tree node.*/
66 RB_ENTRY(revoke_entry) revoke_node;
69 /**@brief Valid journal replay information.*/
71 /**@brief Starting transaction id.*/
72 uint32_t start_trans_id;
74 /**@brief Ending transaction id.*/
75 uint32_t last_trans_id;
77 /**@brief Used as internal argument.*/
78 uint32_t this_trans_id;
80 /**@brief No of transactions went through.*/
83 /**@brief RB-Tree storing revoke entries.*/
84 RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
87 /**@brief Journal replay internal arguments.*/
89 /**@brief Journal replay information.*/
90 struct recover_info *info;
92 /**@brief Current block we are on.*/
95 /**@brief Current trans_id we are on.*/
96 uint32_t this_trans_id;
99 /* Make sure we wrap around the log correctly! */
100 #define wrap(sb, var) \
102 if (var >= jbd_get32((sb), maxlen)) \
103 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first)); \
106 static inline int32_t
107 trans_id_diff(uint32_t x, uint32_t y)
109 int32_t diff = x - y;
114 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
116 if (a->block > b->block)
118 else if (a->block < b->block)
124 jbd_block_rec_cmp(struct jbd_block_rec *a, struct jbd_block_rec *b)
128 else if (a->lba < b->lba)
134 jbd_revoke_rec_cmp(struct jbd_revoke_rec *a, struct jbd_revoke_rec *b)
138 else if (a->lba < b->lba)
143 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
144 jbd_revoke_entry_cmp, static inline)
145 RB_GENERATE_INTERNAL(jbd_block, jbd_block_rec, block_rec_node,
146 jbd_block_rec_cmp, static inline)
147 RB_GENERATE_INTERNAL(jbd_revoke_tree, jbd_revoke_rec, revoke_node,
148 jbd_revoke_rec_cmp, static inline)
150 #define jbd_alloc_revoke_entry() ext4_calloc(1, sizeof(struct revoke_entry))
151 #define jbd_free_revoke_entry(addr) ext4_free(addr)
153 static int jbd_has_csum(struct jbd_sb *jbd_sb)
155 if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V2))
158 if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V3))
164 #if CONFIG_META_CSUM_ENABLE
165 static uint32_t jbd_sb_csum(struct jbd_sb *jbd_sb)
167 uint32_t checksum = 0;
169 if (jbd_has_csum(jbd_sb)) {
170 uint32_t orig_checksum = jbd_sb->checksum;
171 jbd_set32(jbd_sb, checksum, 0);
172 /* Calculate crc32c checksum against tho whole superblock */
173 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_sb,
174 JBD_SUPERBLOCK_SIZE);
175 jbd_sb->checksum = orig_checksum;
180 #define jbd_sb_csum(...) 0
183 static void jbd_sb_csum_set(struct jbd_sb *jbd_sb)
185 if (!jbd_has_csum(jbd_sb))
188 jbd_set32(jbd_sb, checksum, jbd_sb_csum(jbd_sb));
191 #if CONFIG_META_CSUM_ENABLE
193 jbd_verify_sb_csum(struct jbd_sb *jbd_sb)
195 if (!jbd_has_csum(jbd_sb))
198 return jbd_sb_csum(jbd_sb) == jbd_get32(jbd_sb, checksum);
201 #define jbd_verify_sb_csum(...) true
204 #if CONFIG_META_CSUM_ENABLE
205 static uint32_t jbd_meta_csum(struct jbd_fs *jbd_fs,
206 struct jbd_bhdr *bhdr)
208 uint32_t checksum = 0;
210 if (jbd_has_csum(&jbd_fs->sb)) {
211 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
212 struct jbd_block_tail *tail =
213 (struct jbd_block_tail *)((char *)bhdr + block_size -
214 sizeof(struct jbd_block_tail));
215 uint32_t orig_checksum = tail->checksum;
218 /* First calculate crc32c checksum against fs uuid */
219 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
220 sizeof(jbd_fs->sb.uuid));
221 /* Calculate crc32c checksum against tho whole block */
222 checksum = ext4_crc32c(checksum, bhdr,
224 tail->checksum = orig_checksum;
229 #define jbd_meta_csum(...) 0
232 static void jbd_meta_csum_set(struct jbd_fs *jbd_fs,
233 struct jbd_bhdr *bhdr)
235 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
236 struct jbd_block_tail *tail = (struct jbd_block_tail *)
237 ((char *)bhdr + block_size -
238 sizeof(struct jbd_block_tail));
239 if (!jbd_has_csum(&jbd_fs->sb))
242 tail->checksum = to_be32(jbd_meta_csum(jbd_fs, bhdr));
245 #if CONFIG_META_CSUM_ENABLE
247 jbd_verify_meta_csum(struct jbd_fs *jbd_fs,
248 struct jbd_bhdr *bhdr)
250 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
251 struct jbd_block_tail *tail = (struct jbd_block_tail *)
252 ((char *)bhdr + block_size -
253 sizeof(struct jbd_block_tail));
254 if (!jbd_has_csum(&jbd_fs->sb))
257 return jbd_meta_csum(jbd_fs, bhdr) == to_be32(tail->checksum);
260 #define jbd_verify_meta_csum(...) true
263 #if CONFIG_META_CSUM_ENABLE
264 static uint32_t jbd_commit_csum(struct jbd_fs *jbd_fs,
265 struct jbd_commit_header *header)
267 uint32_t checksum = 0;
269 if (jbd_has_csum(&jbd_fs->sb)) {
270 uint32_t orig_checksum_type = header->chksum_type,
271 orig_checksum_size = header->chksum_size,
272 orig_checksum = header->chksum[0];
273 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
274 header->chksum_type = 0;
275 header->chksum_size = 0;
276 header->chksum[0] = 0;
278 /* First calculate crc32c checksum against fs uuid */
279 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
280 sizeof(jbd_fs->sb.uuid));
281 /* Calculate crc32c checksum against tho whole block */
282 checksum = ext4_crc32c(checksum, header,
285 header->chksum_type = orig_checksum_type;
286 header->chksum_size = orig_checksum_size;
287 header->chksum[0] = orig_checksum;
292 #define jbd_commit_csum(...) 0
295 static void jbd_commit_csum_set(struct jbd_fs *jbd_fs,
296 struct jbd_commit_header *header)
298 if (!jbd_has_csum(&jbd_fs->sb))
301 header->chksum_type = 0;
302 header->chksum_size = 0;
303 header->chksum[0] = jbd_commit_csum(jbd_fs, header);
306 #if CONFIG_META_CSUM_ENABLE
307 static bool jbd_verify_commit_csum(struct jbd_fs *jbd_fs,
308 struct jbd_commit_header *header)
310 if (!jbd_has_csum(&jbd_fs->sb))
313 return header->chksum[0] == to_be32(jbd_commit_csum(jbd_fs,
317 #define jbd_verify_commit_csum(...) true
320 #if CONFIG_META_CSUM_ENABLE
322 * NOTE: We only make use of @csum parameter when
323 * JBD_FEATURE_COMPAT_CHECKSUM is enabled.
325 static uint32_t jbd_block_csum(struct jbd_fs *jbd_fs, const void *buf,
329 uint32_t checksum = 0;
331 if (jbd_has_csum(&jbd_fs->sb)) {
332 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
333 /* First calculate crc32c checksum against fs uuid */
334 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
335 sizeof(jbd_fs->sb.uuid));
336 /* Then calculate crc32c checksum against sequence no. */
337 checksum = ext4_crc32c(checksum, &sequence,
339 /* Calculate crc32c checksum against tho whole block */
340 checksum = ext4_crc32c(checksum, buf,
342 } else if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
343 JBD_FEATURE_COMPAT_CHECKSUM)) {
344 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
345 /* Calculate crc32c checksum against tho whole block */
346 checksum = ext4_crc32(csum, buf,
352 #define jbd_block_csum(...) 0
355 static void jbd_block_tag_csum_set(struct jbd_fs *jbd_fs, void *__tag,
358 int ver = jbd_has_csum(&jbd_fs->sb);
363 struct jbd_block_tag *tag = __tag;
364 tag->checksum = (uint16_t)to_be32(checksum);
366 struct jbd_block_tag3 *tag = __tag;
367 tag->checksum = to_be32(checksum);
371 /**@brief Write jbd superblock to disk.
372 * @param jbd_fs jbd filesystem
373 * @param s jbd superblock
374 * @return standard error code*/
375 static int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
378 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
381 rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
386 offset = fblock * ext4_sb_get_block_size(&fs->sb);
387 return ext4_block_writebytes(fs->bdev, offset, s,
388 EXT4_SUPERBLOCK_SIZE);
391 /**@brief Read jbd superblock from disk.
392 * @param jbd_fs jbd filesystem
393 * @param s jbd superblock
394 * @return standard error code*/
395 static int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
398 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
401 rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
405 offset = fblock * ext4_sb_get_block_size(&fs->sb);
406 return ext4_block_readbytes(fs->bdev, offset, s,
407 EXT4_SUPERBLOCK_SIZE);
410 /**@brief Verify jbd superblock.
411 * @param sb jbd superblock
412 * @return true if jbd superblock is valid */
413 static bool jbd_verify_sb(struct jbd_sb *sb)
415 struct jbd_bhdr *header = &sb->header;
416 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
419 if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
420 jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
423 return jbd_verify_sb_csum(sb);
426 /**@brief Write back dirty jbd superblock to disk.
427 * @param jbd_fs jbd filesystem
428 * @return standard error code*/
429 static int jbd_write_sb(struct jbd_fs *jbd_fs)
433 rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
437 jbd_fs->dirty = false;
442 /**@brief Get reference to jbd filesystem.
443 * @param fs Filesystem to load journal of
444 * @param jbd_fs jbd filesystem
445 * @return standard error code*/
446 int jbd_get_fs(struct ext4_fs *fs,
447 struct jbd_fs *jbd_fs)
450 uint32_t journal_ino;
452 memset(jbd_fs, 0, sizeof(struct jbd_fs));
453 /* See if there is journal inode on this filesystem.*/
454 /* FIXME: detection on existance ofbkejournal bdev is
456 journal_ino = ext4_get32(&fs->sb, journal_inode_number);
458 rc = ext4_fs_get_inode_ref(fs,
462 memset(jbd_fs, 0, sizeof(struct jbd_fs));
465 rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
467 memset(jbd_fs, 0, sizeof(struct jbd_fs));
468 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
471 if (!jbd_verify_sb(&jbd_fs->sb)) {
472 memset(jbd_fs, 0, sizeof(struct jbd_fs));
473 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
478 jbd_fs->bdev = fs->bdev;
483 /**@brief Put reference of jbd filesystem.
484 * @param jbd_fs jbd filesystem
485 * @return standard error code*/
486 int jbd_put_fs(struct jbd_fs *jbd_fs)
489 rc = jbd_write_sb(jbd_fs);
491 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
495 /**@brief Data block lookup helper.
496 * @param jbd_fs jbd filesystem
497 * @param iblock block index
498 * @param fblock logical block address
499 * @return standard error code*/
500 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
502 ext4_fsblk_t *fblock)
504 int rc = ext4_fs_get_inode_dblk_idx(
512 /**@brief jbd block get function (through cache).
513 * @param jbd_fs jbd filesystem
514 * @param block block descriptor
515 * @param fblock jbd logical block address
516 * @return standard error code*/
517 static int jbd_block_get(struct jbd_fs *jbd_fs,
518 struct ext4_block *block,
521 /* TODO: journal device. */
523 struct ext4_blockdev *bdev = jbd_fs->bdev;
524 ext4_lblk_t iblock = (ext4_lblk_t)fblock;
526 /* Lookup the logical block address of
528 rc = jbd_inode_bmap(jbd_fs, iblock,
533 rc = ext4_block_get(bdev, block, fblock);
535 /* If succeeded, mark buffer as BC_FLUSH to indicate
536 * that data should be written to disk immediately.*/
538 ext4_bcache_set_flag(block->buf, BC_FLUSH);
539 /* As we don't want to occupy too much space
540 * in block cache, we set this buffer BC_TMP.*/
541 ext4_bcache_set_flag(block->buf, BC_TMP);
547 /**@brief jbd block get function (through cache, don't read).
548 * @param jbd_fs jbd filesystem
549 * @param block block descriptor
550 * @param fblock jbd logical block address
551 * @return standard error code*/
552 static int jbd_block_get_noread(struct jbd_fs *jbd_fs,
553 struct ext4_block *block,
556 /* TODO: journal device. */
558 struct ext4_blockdev *bdev = jbd_fs->bdev;
559 ext4_lblk_t iblock = (ext4_lblk_t)fblock;
560 rc = jbd_inode_bmap(jbd_fs, iblock,
565 rc = ext4_block_get_noread(bdev, block, fblock);
567 ext4_bcache_set_flag(block->buf, BC_FLUSH);
572 /**@brief jbd block set procedure (through cache).
573 * @param jbd_fs jbd filesystem
574 * @param block block descriptor
575 * @return standard error code*/
576 static int jbd_block_set(struct jbd_fs *jbd_fs,
577 struct ext4_block *block)
579 struct ext4_blockdev *bdev = jbd_fs->bdev;
580 return ext4_block_set(bdev, block);
583 /**@brief helper functions to calculate
584 * block tag size, not including UUID part.
585 * @param jbd_fs jbd filesystem
586 * @return tag size in bytes*/
587 static int jbd_tag_bytes(struct jbd_fs *jbd_fs)
591 /* It is very easy to deal with the case which
592 * JBD_FEATURE_INCOMPAT_CSUM_V3 is enabled.*/
593 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
594 JBD_FEATURE_INCOMPAT_CSUM_V3))
595 return sizeof(struct jbd_block_tag3);
597 size = sizeof(struct jbd_block_tag);
599 /* If JBD_FEATURE_INCOMPAT_CSUM_V2 is enabled,
600 * add 2 bytes to size.*/
601 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
602 JBD_FEATURE_INCOMPAT_CSUM_V2))
603 size += sizeof(uint16_t);
605 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
606 JBD_FEATURE_INCOMPAT_64BIT))
609 /* If block number is 4 bytes in size,
610 * minus 4 bytes from size */
611 return size - sizeof(uint32_t);
614 /**@brief Tag information. */
616 /**@brief Tag size in bytes, including UUID part.*/
619 /**@brief block number stored in this tag.*/
622 /**@brief Is the first 4 bytes of block equals to
623 * JBD_MAGIC_NUMBER? */
626 /**@brief whether UUID part exists or not.*/
629 /**@brief UUID content if UUID part exists.*/
630 uint8_t uuid[UUID_SIZE];
632 /**@brief Is this the last tag? */
635 /**@brief crc32c checksum. */
639 /**@brief Extract information from a block tag.
640 * @param __tag pointer to the block tag
641 * @param tag_bytes block tag size of this jbd filesystem
642 * @param remaining size in buffer containing the block tag
643 * @param tag_info information of this tag.
644 * @return EOK when succeed, otherwise return EINVAL.*/
646 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
649 int32_t remain_buf_size,
650 struct tag_info *tag_info)
653 tag_info->tag_bytes = tag_bytes;
654 tag_info->uuid_exist = false;
655 tag_info->last_tag = false;
656 tag_info->is_escape = false;
658 /* See whether it is possible to hold a valid block tag.*/
659 if (remain_buf_size - tag_bytes < 0)
662 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
663 JBD_FEATURE_INCOMPAT_CSUM_V3)) {
664 struct jbd_block_tag3 *tag = __tag;
665 tag_info->block = jbd_get32(tag, blocknr);
666 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
667 JBD_FEATURE_INCOMPAT_64BIT))
669 (uint64_t)jbd_get32(tag, blocknr_high) << 32;
671 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
672 tag_info->is_escape = true;
674 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
675 /* See whether it is possible to hold UUID part.*/
676 if (remain_buf_size - tag_bytes < UUID_SIZE)
679 uuid_start = (char *)tag + tag_bytes;
680 tag_info->uuid_exist = true;
681 tag_info->tag_bytes += UUID_SIZE;
682 memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
685 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
686 tag_info->last_tag = true;
689 struct jbd_block_tag *tag = __tag;
690 tag_info->block = jbd_get32(tag, blocknr);
691 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
692 JBD_FEATURE_INCOMPAT_64BIT))
694 (uint64_t)jbd_get32(tag, blocknr_high) << 32;
696 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
697 tag_info->is_escape = true;
699 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
700 /* See whether it is possible to hold UUID part.*/
701 if (remain_buf_size - tag_bytes < UUID_SIZE)
704 uuid_start = (char *)tag + tag_bytes;
705 tag_info->uuid_exist = true;
706 tag_info->tag_bytes += UUID_SIZE;
707 memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
710 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
711 tag_info->last_tag = true;
717 /**@brief Write information to a block tag.
718 * @param __tag pointer to the block tag
719 * @param remaining size in buffer containing the block tag
720 * @param tag_info information of this tag.
721 * @return EOK when succeed, otherwise return EINVAL.*/
723 jbd_write_block_tag(struct jbd_fs *jbd_fs,
725 int32_t remain_buf_size,
726 struct tag_info *tag_info)
729 int tag_bytes = jbd_tag_bytes(jbd_fs);
731 tag_info->tag_bytes = tag_bytes;
733 /* See whether it is possible to hold a valid block tag.*/
734 if (remain_buf_size - tag_bytes < 0)
737 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
738 JBD_FEATURE_INCOMPAT_CSUM_V3)) {
739 struct jbd_block_tag3 *tag = __tag;
740 memset(tag, 0, sizeof(struct jbd_block_tag3));
741 jbd_set32(tag, blocknr, (uint32_t)tag_info->block);
742 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
743 JBD_FEATURE_INCOMPAT_64BIT))
744 jbd_set32(tag, blocknr_high, tag_info->block >> 32);
746 if (tag_info->uuid_exist) {
747 /* See whether it is possible to hold UUID part.*/
748 if (remain_buf_size - tag_bytes < UUID_SIZE)
751 uuid_start = (char *)tag + tag_bytes;
752 tag_info->tag_bytes += UUID_SIZE;
753 memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
755 jbd_set32(tag, flags,
756 jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
758 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
760 if (tag_info->last_tag)
761 jbd_set32(tag, flags,
762 jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
764 if (tag_info->is_escape)
765 jbd_set32(tag, flags,
766 jbd_get32(tag, flags) | JBD_FLAG_ESCAPE);
769 struct jbd_block_tag *tag = __tag;
770 memset(tag, 0, sizeof(struct jbd_block_tag));
771 jbd_set32(tag, blocknr, (uint32_t)tag_info->block);
772 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
773 JBD_FEATURE_INCOMPAT_64BIT))
774 jbd_set32(tag, blocknr_high, tag_info->block >> 32);
776 if (tag_info->uuid_exist) {
777 /* See whether it is possible to hold UUID part.*/
778 if (remain_buf_size - tag_bytes < UUID_SIZE)
781 uuid_start = (char *)tag + tag_bytes;
782 tag_info->tag_bytes += UUID_SIZE;
783 memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
785 jbd_set16(tag, flags,
786 jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
788 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
790 if (tag_info->last_tag)
791 jbd_set16(tag, flags,
792 jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
795 if (tag_info->is_escape)
796 jbd_set16(tag, flags,
797 jbd_get16(tag, flags) | JBD_FLAG_ESCAPE);
803 /**@brief Iterate all block tags in a block.
804 * @param jbd_fs jbd filesystem
805 * @param __tag_start pointer to the block
806 * @param tag_tbl_size size of the block
807 * @param func callback routine to indicate that
808 * a block tag is found
809 * @param arg additional argument to be passed to func */
811 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
813 int32_t tag_tbl_size,
814 void (*func)(struct jbd_fs * jbd_fs,
815 struct tag_info *tag_info,
819 char *tag_start, *tag_ptr;
820 int tag_bytes = jbd_tag_bytes(jbd_fs);
821 tag_start = __tag_start;
824 /* Cut off the size of block tail storing checksum. */
825 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
826 JBD_FEATURE_INCOMPAT_CSUM_V2) ||
827 JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
828 JBD_FEATURE_INCOMPAT_CSUM_V3))
829 tag_tbl_size -= sizeof(struct jbd_block_tail);
831 while (tag_tbl_size) {
832 struct tag_info tag_info;
833 int rc = jbd_extract_block_tag(jbd_fs,
842 func(jbd_fs, &tag_info, arg);
844 /* Stop the iteration when we reach the last tag. */
845 if (tag_info.last_tag)
848 tag_ptr += tag_info.tag_bytes;
849 tag_tbl_size -= tag_info.tag_bytes;
853 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
854 struct tag_info *tag_info,
857 uint32_t *iblock = arg;
858 ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", tag_info->block);
860 wrap(&jbd_fs->sb, *iblock);
865 static struct revoke_entry *
866 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
868 struct revoke_entry tmp = {
872 return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
875 /**@brief Replay a block in a transaction.
876 * @param jbd_fs jbd filesystem
877 * @param tag_info tag_info of the logged block.*/
878 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
879 struct tag_info *tag_info,
883 struct replay_arg *arg = __arg;
884 struct recover_info *info = arg->info;
885 uint32_t *this_block = arg->this_block;
886 struct revoke_entry *revoke_entry;
887 struct ext4_block journal_block, ext4_block;
888 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
891 wrap(&jbd_fs->sb, *this_block);
893 /* We replay this block only if the current transaction id
894 * is equal or greater than that in revoke entry.*/
895 revoke_entry = jbd_revoke_entry_lookup(info, tag_info->block);
897 trans_id_diff(arg->this_trans_id, revoke_entry->trans_id) <= 0)
901 "Replaying block in block_tag: %" PRIu64 "\n",
904 r = jbd_block_get(jbd_fs, &journal_block, *this_block);
908 /* We need special treatment for ext4 superblock. */
909 if (tag_info->block) {
910 r = ext4_block_get_noread(fs->bdev, &ext4_block, tag_info->block);
912 jbd_block_set(jbd_fs, &journal_block);
916 memcpy(ext4_block.data,
918 jbd_get32(&jbd_fs->sb, blocksize));
920 if (tag_info->is_escape)
921 ((struct jbd_bhdr *)ext4_block.data)->magic =
922 to_be32(JBD_MAGIC_NUMBER);
924 ext4_bcache_set_dirty(ext4_block.buf);
925 ext4_block_set(fs->bdev, &ext4_block);
927 uint16_t mount_count, state;
928 mount_count = ext4_get16(&fs->sb, mount_count);
929 state = ext4_get16(&fs->sb, state);
932 journal_block.data + EXT4_SUPERBLOCK_OFFSET,
933 EXT4_SUPERBLOCK_SIZE);
935 /* Mark system as mounted */
936 ext4_set16(&fs->sb, state, state);
937 r = ext4_sb_write(fs->bdev, &fs->sb);
941 /*Update mount count*/
942 ext4_set16(&fs->sb, mount_count, mount_count);
945 jbd_block_set(jbd_fs, &journal_block);
950 /**@brief Add block address to revoke tree, along with
951 * its transaction id.
952 * @param info journal replay info
953 * @param block block address to be replayed.*/
954 static void jbd_add_revoke_block_tags(struct recover_info *info,
957 struct revoke_entry *revoke_entry;
959 ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
960 /* If the revoke entry with respect to the block address
961 * exists already, update its transaction id.*/
962 revoke_entry = jbd_revoke_entry_lookup(info, block);
964 revoke_entry->trans_id = info->this_trans_id;
968 revoke_entry = jbd_alloc_revoke_entry();
969 ext4_assert(revoke_entry);
970 revoke_entry->block = block;
971 revoke_entry->trans_id = info->this_trans_id;
972 RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
977 static void jbd_destroy_revoke_tree(struct recover_info *info)
979 while (!RB_EMPTY(&info->revoke_root)) {
980 struct revoke_entry *revoke_entry =
981 RB_MIN(jbd_revoke, &info->revoke_root);
982 ext4_assert(revoke_entry);
983 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
984 jbd_free_revoke_entry(revoke_entry);
989 #define ACTION_SCAN 0
990 #define ACTION_REVOKE 1
991 #define ACTION_RECOVER 2
993 /**@brief Add entries in a revoke block to revoke tree.
994 * @param jbd_fs jbd filesystem
995 * @param header revoke block header
996 * @param recover_info journal replay info*/
997 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
998 struct jbd_bhdr *header,
999 struct recover_info *info)
1002 struct jbd_revoke_header *revoke_hdr =
1003 (struct jbd_revoke_header *)header;
1004 uint32_t i, nr_entries, record_len = 4;
1006 /* If we are working on a 64bit jbd filesystem, */
1007 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
1008 JBD_FEATURE_INCOMPAT_64BIT))
1011 nr_entries = (jbd_get32(revoke_hdr, count) -
1012 sizeof(struct jbd_revoke_header)) /
1015 blocks_entry = (char *)(revoke_hdr + 1);
1017 for (i = 0;i < nr_entries;i++) {
1018 if (record_len == 8) {
1020 (uint64_t *)blocks_entry;
1021 jbd_add_revoke_block_tags(info, to_be64(*blocks));
1024 (uint32_t *)blocks_entry;
1025 jbd_add_revoke_block_tags(info, to_be32(*blocks));
1027 blocks_entry += record_len;
1031 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
1032 struct jbd_bhdr *header,
1035 jbd_iterate_block_table(jbd_fs,
1037 jbd_get32(&jbd_fs->sb, blocksize) -
1038 sizeof(struct jbd_bhdr),
1039 jbd_display_block_tags,
1043 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
1044 struct jbd_bhdr *header,
1045 struct replay_arg *arg)
1047 jbd_iterate_block_table(jbd_fs,
1049 jbd_get32(&jbd_fs->sb, blocksize) -
1050 sizeof(struct jbd_bhdr),
1051 jbd_replay_block_tags,
1055 /**@brief The core routine of journal replay.
1056 * @param jbd_fs jbd filesystem
1057 * @param recover_info journal replay info
1058 * @param action action needed to be taken
1059 * @return standard error code*/
1060 static int jbd_iterate_log(struct jbd_fs *jbd_fs,
1061 struct recover_info *info,
1065 bool log_end = false;
1066 struct jbd_sb *sb = &jbd_fs->sb;
1067 uint32_t start_trans_id, this_trans_id;
1068 uint32_t start_block, this_block;
1070 /* We start iterating valid blocks in the whole journal.*/
1071 start_trans_id = this_trans_id = jbd_get32(sb, sequence);
1072 start_block = this_block = jbd_get32(sb, start);
1073 if (action == ACTION_SCAN)
1074 info->trans_cnt = 0;
1075 else if (!info->trans_cnt)
1078 ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
1082 struct ext4_block block;
1083 struct jbd_bhdr *header;
1084 /* If we are not scanning for the last
1085 * valid transaction in the journal,
1086 * we will stop when we reach the end of
1088 if (action != ACTION_SCAN)
1089 if (trans_id_diff(this_trans_id, info->last_trans_id) > 0) {
1094 r = jbd_block_get(jbd_fs, &block, this_block);
1098 header = (struct jbd_bhdr *)block.data;
1099 /* This block does not have a valid magic number,
1100 * so we have reached the end of the journal.*/
1101 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
1102 jbd_block_set(jbd_fs, &block);
1107 /* If the transaction id we found is not expected,
1108 * we may have reached the end of the journal.
1110 * If we are not scanning the journal, something
1111 * bad might have taken place. :-( */
1112 if (jbd_get32(header, sequence) != this_trans_id) {
1113 if (action != ACTION_SCAN)
1116 jbd_block_set(jbd_fs, &block);
1121 switch (jbd_get32(header, blocktype)) {
1122 case JBD_DESCRIPTOR_BLOCK:
1123 if (!jbd_verify_meta_csum(jbd_fs, header)) {
1125 DBG_WARN "Descriptor block checksum failed."
1126 "Journal block: %" PRIu32"\n",
1131 ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
1132 "trans_id: %" PRIu32"\n",
1133 this_block, this_trans_id);
1134 if (action == ACTION_RECOVER) {
1135 struct replay_arg replay_arg;
1136 replay_arg.info = info;
1137 replay_arg.this_block = &this_block;
1138 replay_arg.this_trans_id = this_trans_id;
1140 jbd_replay_descriptor_block(jbd_fs,
1141 header, &replay_arg);
1143 jbd_debug_descriptor_block(jbd_fs,
1144 header, &this_block);
1147 case JBD_COMMIT_BLOCK:
1148 if (!jbd_verify_commit_csum(jbd_fs,
1149 (struct jbd_commit_header *)header)) {
1151 DBG_WARN "Commit block checksum failed."
1152 "Journal block: %" PRIu32"\n",
1157 ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
1158 "trans_id: %" PRIu32"\n",
1159 this_block, this_trans_id);
1160 /* This is the end of a transaction,
1161 * we may now proceed to the next transaction.
1166 case JBD_REVOKE_BLOCK:
1167 if (!jbd_verify_meta_csum(jbd_fs, header)) {
1169 DBG_WARN "Revoke block checksum failed."
1170 "Journal block: %" PRIu32"\n",
1175 ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
1176 "trans_id: %" PRIu32"\n",
1177 this_block, this_trans_id);
1178 if (action == ACTION_REVOKE) {
1179 info->this_trans_id = this_trans_id;
1180 jbd_build_revoke_tree(jbd_fs,
1188 jbd_block_set(jbd_fs, &block);
1190 wrap(sb, this_block);
1191 if (this_block == start_block)
1195 ext4_dbg(DEBUG_JBD, "End of journal.\n");
1196 if (r == EOK && action == ACTION_SCAN) {
1197 /* We have finished scanning the journal. */
1198 info->start_trans_id = start_trans_id;
1199 if (trans_id_diff(this_trans_id, start_trans_id) > 0)
1200 info->last_trans_id = this_trans_id - 1;
1202 info->last_trans_id = this_trans_id;
1208 /**@brief Replay journal.
1209 * @param jbd_fs jbd filesystem
1210 * @return standard error code*/
1211 int jbd_recover(struct jbd_fs *jbd_fs)
1214 struct recover_info info;
1215 struct jbd_sb *sb = &jbd_fs->sb;
1219 RB_INIT(&info.revoke_root);
1221 r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
1225 r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
1229 r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
1231 /* If we successfully replay the journal,
1232 * clear EXT4_FINCOM_RECOVER flag on the
1233 * ext4 superblock, and set the start of
1235 uint32_t features_incompatible =
1236 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1237 features_incompatible);
1238 jbd_set32(&jbd_fs->sb, start, 0);
1239 features_incompatible &= ~EXT4_FINCOM_RECOVER;
1240 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1241 features_incompatible,
1242 features_incompatible);
1243 jbd_fs->dirty = true;
1244 r = ext4_sb_write(jbd_fs->bdev,
1245 &jbd_fs->inode_ref.fs->sb);
1247 jbd_destroy_revoke_tree(&info);
1251 static void jbd_journal_write_sb(struct jbd_journal *journal)
1253 struct jbd_fs *jbd_fs = journal->jbd_fs;
1254 jbd_set32(&jbd_fs->sb, start, journal->start);
1255 jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
1256 jbd_fs->dirty = true;
1259 /**@brief Start accessing the journal.
1260 * @param jbd_fs jbd filesystem
1261 * @param journal current journal session
1262 * @return standard error code*/
1263 int jbd_journal_start(struct jbd_fs *jbd_fs,
1264 struct jbd_journal *journal)
1267 uint32_t features_incompatible =
1268 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1269 features_incompatible);
1270 struct ext4_block block = EXT4_BLOCK_ZERO();
1271 features_incompatible |= EXT4_FINCOM_RECOVER;
1272 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1273 features_incompatible,
1274 features_incompatible);
1275 r = ext4_sb_write(jbd_fs->bdev,
1276 &jbd_fs->inode_ref.fs->sb);
1280 journal->first = jbd_get32(&jbd_fs->sb, first);
1281 journal->start = journal->first;
1282 journal->last = journal->first;
1283 journal->trans_id = 1;
1284 journal->alloc_trans_id = 1;
1286 journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
1288 r = jbd_block_get_noread(jbd_fs,
1292 memset(journal, 0, sizeof(struct jbd_journal));
1295 memset(block.data, 0, journal->block_size);
1296 ext4_bcache_set_dirty(block.buf);
1297 r = jbd_block_set(jbd_fs, &block);
1299 memset(journal, 0, sizeof(struct jbd_journal));
1303 TAILQ_INIT(&journal->cp_queue);
1304 RB_INIT(&journal->block_rec_root);
1305 journal->jbd_fs = jbd_fs;
1306 jbd_journal_write_sb(journal);
1307 r = jbd_write_sb(jbd_fs);
1311 jbd_fs->bdev->journal = journal;
1315 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1316 struct ext4_buf *buf __unused,
1321 * This routine is only suitable to committed transactions. */
1322 static void jbd_journal_flush_trans(struct jbd_trans *trans)
1324 struct jbd_buf *jbd_buf, *tmp;
1325 struct jbd_journal *journal = trans->journal;
1326 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1327 void *tmp_data = ext4_malloc(journal->block_size);
1328 ext4_assert(tmp_data);
1330 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1332 struct ext4_buf *buf;
1333 struct ext4_block block;
1334 /* The buffer is not yet flushed. */
1335 buf = ext4_bcache_find_get(fs->bdev->bc, &block,
1336 jbd_buf->block_rec->lba);
1337 if (!(buf && ext4_bcache_test_flag(buf, BC_UPTODATE) &&
1338 jbd_buf->block_rec->trans == trans)) {
1340 struct ext4_block jbd_block = EXT4_BLOCK_ZERO();
1341 ext4_assert(jbd_block_get(journal->jbd_fs,
1343 jbd_buf->jbd_lba) == EOK);
1344 memcpy(tmp_data, jbd_block.data,
1345 journal->block_size);
1346 ext4_block_set(fs->bdev, &jbd_block);
1347 r = ext4_blocks_set_direct(fs->bdev, tmp_data,
1348 jbd_buf->block_rec->lba, 1);
1349 jbd_trans_end_write(fs->bdev->bc, buf, r, jbd_buf);
1351 ext4_block_flush_buf(fs->bdev, buf);
1354 ext4_block_set(fs->bdev, &block);
1357 ext4_free(tmp_data);
1361 jbd_journal_skip_pure_revoke(struct jbd_journal *journal,
1362 struct jbd_trans *trans)
1364 journal->start = trans->start_iblock +
1365 trans->alloc_blocks;
1366 wrap(&journal->jbd_fs->sb, journal->start);
1367 journal->trans_id = trans->trans_id + 1;
1368 jbd_journal_free_trans(journal,
1370 jbd_journal_write_sb(journal);
1374 jbd_journal_purge_cp_trans(struct jbd_journal *journal,
1378 struct jbd_trans *trans;
1379 while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1380 if (!trans->data_cnt) {
1381 TAILQ_REMOVE(&journal->cp_queue,
1384 jbd_journal_skip_pure_revoke(journal, trans);
1386 if (trans->data_cnt ==
1387 trans->written_cnt) {
1389 trans->start_iblock +
1390 trans->alloc_blocks;
1391 wrap(&journal->jbd_fs->sb,
1394 trans->trans_id + 1;
1395 TAILQ_REMOVE(&journal->cp_queue,
1398 jbd_journal_free_trans(journal,
1401 jbd_journal_write_sb(journal);
1402 } else if (!flush) {
1404 trans->start_iblock;
1405 wrap(&journal->jbd_fs->sb,
1409 jbd_journal_write_sb(journal);
1412 jbd_journal_flush_trans(trans);
1419 /**@brief Stop accessing the journal.
1420 * @param journal current journal session
1421 * @return standard error code*/
1422 int jbd_journal_stop(struct jbd_journal *journal)
1425 struct jbd_fs *jbd_fs = journal->jbd_fs;
1426 uint32_t features_incompatible;
1428 /* Make sure that journalled content have reached
1430 jbd_journal_purge_cp_trans(journal, true, false);
1432 /* There should be no block record in this journal
1434 if (!RB_EMPTY(&journal->block_rec_root))
1436 DBG_WARN "There are still block records "
1437 "in this journal session!\n");
1439 features_incompatible =
1440 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1441 features_incompatible);
1442 features_incompatible &= ~EXT4_FINCOM_RECOVER;
1443 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1444 features_incompatible,
1445 features_incompatible);
1446 r = ext4_sb_write(jbd_fs->bdev,
1447 &jbd_fs->inode_ref.fs->sb);
1452 journal->trans_id = 0;
1453 jbd_journal_write_sb(journal);
1454 return jbd_write_sb(journal->jbd_fs);
1457 /**@brief Allocate a block in the journal.
1458 * @param journal current journal session
1459 * @param trans transaction
1460 * @return allocated block address*/
1461 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
1462 struct jbd_trans *trans)
1464 uint32_t start_block;
1466 start_block = journal->last++;
1467 trans->alloc_blocks++;
1468 wrap(&journal->jbd_fs->sb, journal->last);
1470 /* If there is no space left, flush all journalled
1471 * blocks to disk first.*/
1472 if (journal->last == journal->start)
1473 jbd_journal_purge_cp_trans(journal, true, false);
1478 static struct jbd_block_rec *
1479 jbd_trans_block_rec_lookup(struct jbd_journal *journal,
1482 struct jbd_block_rec tmp = {
1486 return RB_FIND(jbd_block,
1487 &journal->block_rec_root,
1492 jbd_trans_change_ownership(struct jbd_block_rec *block_rec,
1493 struct jbd_trans *new_trans)
1495 LIST_REMOVE(block_rec, tbrec_node);
1497 /* Now this block record belongs to this transaction. */
1498 LIST_INSERT_HEAD(&new_trans->tbrec_list, block_rec, tbrec_node);
1500 block_rec->trans = new_trans;
1503 static inline struct jbd_block_rec *
1504 jbd_trans_insert_block_rec(struct jbd_trans *trans,
1507 struct jbd_block_rec *block_rec;
1508 block_rec = jbd_trans_block_rec_lookup(trans->journal, lba);
1510 jbd_trans_change_ownership(block_rec, trans);
1513 block_rec = ext4_calloc(1, sizeof(struct jbd_block_rec));
1517 block_rec->lba = lba;
1518 block_rec->trans = trans;
1519 TAILQ_INIT(&block_rec->dirty_buf_queue);
1520 LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);
1521 RB_INSERT(jbd_block, &trans->journal->block_rec_root, block_rec);
1526 * This routine will do the dirty works.
1529 jbd_trans_finish_callback(struct jbd_journal *journal,
1530 const struct jbd_trans *trans,
1531 struct jbd_block_rec *block_rec,
1535 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1536 if (block_rec->trans != trans)
1540 struct jbd_buf *jbd_buf, *tmp;
1541 TAILQ_FOREACH_SAFE(jbd_buf,
1542 &block_rec->dirty_buf_queue,
1545 jbd_trans_end_write(fs->bdev->bc,
1552 * We have to roll back data if the block is going to be
1555 struct jbd_buf *jbd_buf;
1556 struct ext4_block jbd_block = EXT4_BLOCK_ZERO(),
1557 block = EXT4_BLOCK_ZERO();
1558 jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,
1562 ext4_assert(ext4_block_get_noread(fs->bdev,
1564 block_rec->lba) == EOK);
1565 ext4_assert(jbd_block_get(journal->jbd_fs,
1567 jbd_buf->jbd_lba) == EOK);
1568 memcpy(block.data, jbd_block.data,
1569 journal->block_size);
1571 jbd_trans_change_ownership(block_rec,
1574 block.buf->end_write = jbd_trans_end_write;
1575 block.buf->end_write_arg = jbd_buf;
1577 ext4_bcache_set_flag(jbd_block.buf, BC_TMP);
1578 ext4_bcache_set_dirty(block.buf);
1580 ext4_block_set(fs->bdev, &jbd_block);
1581 ext4_block_set(fs->bdev, &block);
1584 /* The revoked buffer is yet written. */
1585 jbd_trans_change_ownership(block_rec,
1593 jbd_trans_remove_block_rec(struct jbd_journal *journal,
1594 struct jbd_block_rec *block_rec,
1595 struct jbd_trans *trans)
1597 /* If this block record doesn't belong to this transaction,
1599 if (block_rec->trans == trans) {
1600 LIST_REMOVE(block_rec, tbrec_node);
1601 RB_REMOVE(jbd_block,
1602 &journal->block_rec_root,
1604 ext4_free(block_rec);
1608 /**@brief Add block to a transaction and mark it dirty.
1609 * @param trans transaction
1610 * @param block block descriptor
1611 * @return standard error code*/
1612 int jbd_trans_set_block_dirty(struct jbd_trans *trans,
1613 struct ext4_block *block)
1615 struct jbd_buf *jbd_buf;
1616 struct jbd_revoke_rec *rec, tmp_rec = {
1619 struct jbd_block_rec *block_rec;
1621 if (block->buf->end_write == jbd_trans_end_write) {
1622 jbd_buf = block->buf->end_write_arg;
1623 if (jbd_buf && jbd_buf->trans == trans)
1626 jbd_buf = ext4_calloc(1, sizeof(struct jbd_buf));
1630 if ((block_rec = jbd_trans_insert_block_rec(trans,
1631 block->lb_id)) == NULL) {
1636 TAILQ_INSERT_TAIL(&block_rec->dirty_buf_queue,
1640 jbd_buf->block_rec = block_rec;
1641 jbd_buf->trans = trans;
1642 jbd_buf->block = *block;
1643 ext4_bcache_inc_ref(block->buf);
1645 /* If the content reach the disk, notify us
1646 * so that we may do a checkpoint. */
1647 block->buf->end_write = jbd_trans_end_write;
1648 block->buf->end_write_arg = jbd_buf;
1651 TAILQ_INSERT_HEAD(&trans->buf_queue, jbd_buf, buf_node);
1653 ext4_bcache_set_dirty(block->buf);
1654 rec = RB_FIND(jbd_revoke_tree,
1655 &trans->revoke_root,
1658 RB_REMOVE(jbd_revoke_tree, &trans->revoke_root,
1666 /**@brief Add block to be revoked to a transaction
1667 * @param trans transaction
1668 * @param lba logical block address
1669 * @return standard error code*/
1670 int jbd_trans_revoke_block(struct jbd_trans *trans,
1673 struct jbd_revoke_rec tmp_rec = {
1676 rec = RB_FIND(jbd_revoke_tree,
1677 &trans->revoke_root,
1682 rec = ext4_calloc(1, sizeof(struct jbd_revoke_rec));
1687 RB_INSERT(jbd_revoke_tree, &trans->revoke_root, rec);
1691 /**@brief Try to add block to be revoked to a transaction.
1692 * If @lba still remains in an transaction on checkpoint
1693 * queue, add @lba as a revoked block to the transaction.
1694 * @param trans transaction
1695 * @param lba logical block address
1696 * @return standard error code*/
1697 int jbd_trans_try_revoke_block(struct jbd_trans *trans,
1700 struct jbd_journal *journal = trans->journal;
1701 struct jbd_block_rec *block_rec =
1702 jbd_trans_block_rec_lookup(journal, lba);
1705 if (block_rec->trans == trans) {
1706 struct jbd_buf *jbd_buf =
1707 TAILQ_LAST(&block_rec->dirty_buf_queue,
1709 /* If there are still unwritten buffers. */
1710 if (TAILQ_FIRST(&block_rec->dirty_buf_queue) !=
1712 jbd_trans_revoke_block(trans, lba);
1715 jbd_trans_revoke_block(trans, lba);
1721 /**@brief Free a transaction
1722 * @param journal current journal session
1723 * @param trans transaction
1724 * @param abort discard all the modifications on the block?
1725 * @return standard error code*/
1726 void jbd_journal_free_trans(struct jbd_journal *journal,
1727 struct jbd_trans *trans,
1730 struct jbd_buf *jbd_buf, *tmp;
1731 struct jbd_revoke_rec *rec, *tmp2;
1732 struct jbd_block_rec *block_rec, *tmp3;
1733 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1734 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1736 block_rec = jbd_buf->block_rec;
1738 jbd_buf->block.buf->end_write = NULL;
1739 jbd_buf->block.buf->end_write_arg = NULL;
1740 ext4_bcache_clear_dirty(jbd_buf->block.buf);
1741 ext4_block_set(fs->bdev, &jbd_buf->block);
1744 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1747 jbd_trans_finish_callback(journal,
1752 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1755 RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
1757 RB_REMOVE(jbd_revoke_tree, &trans->revoke_root, rec);
1760 LIST_FOREACH_SAFE(block_rec, &trans->tbrec_list, tbrec_node,
1762 jbd_trans_remove_block_rec(journal, block_rec, trans);
1768 /**@brief Write commit block for a transaction
1769 * @param trans transaction
1770 * @return standard error code*/
1771 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
1774 struct ext4_block block;
1775 struct jbd_commit_header *header;
1776 uint32_t commit_iblock, orig_commit_iblock;
1777 struct jbd_journal *journal = trans->journal;
1779 commit_iblock = jbd_journal_alloc_block(journal, trans);
1780 orig_commit_iblock = commit_iblock;
1782 wrap(&journal->jbd_fs->sb, commit_iblock);
1784 /* To prevent accidental reference to stale journalling metadata. */
1785 if (orig_commit_iblock < commit_iblock) {
1786 rc = jbd_block_get_noread(journal->jbd_fs, &block, commit_iblock);
1790 memset(block.data, 0, journal->block_size);
1791 ext4_bcache_set_dirty(block.buf);
1792 ext4_bcache_set_flag(block.buf, BC_TMP);
1793 rc = jbd_block_set(journal->jbd_fs, &block);
1798 rc = jbd_block_get_noread(journal->jbd_fs, &block, orig_commit_iblock);
1802 header = (struct jbd_commit_header *)block.data;
1803 jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
1804 jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
1805 jbd_set32(&header->header, sequence, trans->trans_id);
1807 if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1808 JBD_FEATURE_COMPAT_CHECKSUM)) {
1809 jbd_set32(header, chksum_type, JBD_CRC32_CHKSUM);
1810 jbd_set32(header, chksum_size, JBD_CRC32_CHKSUM_SIZE);
1811 jbd_set32(header, chksum[0], trans->data_csum);
1813 jbd_commit_csum_set(journal->jbd_fs, header);
1814 ext4_bcache_set_dirty(block.buf);
1815 ext4_bcache_set_flag(block.buf, BC_TMP);
1816 rc = jbd_block_set(journal->jbd_fs, &block);
1820 /**@brief Write descriptor block for a transaction
1821 * @param journal current journal session
1822 * @param trans transaction
1823 * @return standard error code*/
1824 static int jbd_journal_prepare(struct jbd_journal *journal,
1825 struct jbd_trans *trans)
1827 int rc = EOK, i = 0;
1828 struct ext4_block desc_block = EXT4_BLOCK_ZERO(),
1829 data_block = EXT4_BLOCK_ZERO();
1830 int32_t tag_tbl_size = 0;
1831 uint32_t desc_iblock = 0;
1832 uint32_t data_iblock = 0;
1833 char *tag_start = NULL, *tag_ptr = NULL;
1834 struct jbd_buf *jbd_buf, *tmp;
1835 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1836 uint32_t checksum = EXT4_CRC32_INIT;
1837 struct jbd_bhdr *bhdr = NULL;
1840 /* Try to remove any non-dirty buffers from the tail of
1842 TAILQ_FOREACH_REVERSE_SAFE(jbd_buf, &trans->buf_queue,
1843 jbd_trans_buf, buf_node, tmp) {
1844 struct jbd_revoke_rec tmp_rec = {
1845 .lba = jbd_buf->block_rec->lba
1847 /* We stop the iteration when we find a dirty buffer. */
1848 if (ext4_bcache_test_flag(jbd_buf->block.buf,
1852 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1856 jbd_buf->block.buf->end_write = NULL;
1857 jbd_buf->block.buf->end_write_arg = NULL;
1858 jbd_trans_finish_callback(journal,
1862 RB_FIND(jbd_revoke_tree,
1863 &trans->revoke_root,
1865 jbd_trans_remove_block_rec(journal,
1866 jbd_buf->block_rec, trans);
1869 ext4_block_set(fs->bdev, &jbd_buf->block);
1870 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1874 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {
1875 struct tag_info tag_info;
1876 bool uuid_exist = false;
1877 bool is_escape = false;
1878 struct jbd_revoke_rec tmp_rec = {
1879 .lba = jbd_buf->block_rec->lba
1881 if (!ext4_bcache_test_flag(jbd_buf->block.buf,
1883 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1887 jbd_buf->block.buf->end_write = NULL;
1888 jbd_buf->block.buf->end_write_arg = NULL;
1890 /* The buffer has not been modified, just release
1892 jbd_trans_finish_callback(journal,
1896 RB_FIND(jbd_revoke_tree,
1897 &trans->revoke_root,
1899 jbd_trans_remove_block_rec(journal,
1900 jbd_buf->block_rec, trans);
1903 ext4_block_set(fs->bdev, &jbd_buf->block);
1904 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1908 checksum = jbd_block_csum(journal->jbd_fs,
1909 jbd_buf->block.data,
1912 if (((struct jbd_bhdr *)jbd_buf->block.data)->magic ==
1913 to_be32(JBD_MAGIC_NUMBER))
1918 desc_iblock = jbd_journal_alloc_block(journal, trans);
1919 rc = jbd_block_get_noread(journal->jbd_fs, &desc_block, desc_iblock);
1923 bhdr = (struct jbd_bhdr *)desc_block.data;
1924 jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1925 jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
1926 jbd_set32(bhdr, sequence, trans->trans_id);
1928 tag_start = (char *)(bhdr + 1);
1929 tag_ptr = tag_start;
1931 tag_tbl_size = journal->block_size -
1932 sizeof(struct jbd_bhdr);
1934 if (jbd_has_csum(&journal->jbd_fs->sb))
1935 tag_tbl_size -= sizeof(struct jbd_block_tail);
1937 if (!trans->start_iblock)
1938 trans->start_iblock = desc_iblock;
1940 ext4_bcache_set_dirty(desc_block.buf);
1941 ext4_bcache_set_flag(desc_block.buf, BC_TMP);
1943 tag_info.block = jbd_buf->block.lb_id;
1944 tag_info.uuid_exist = uuid_exist;
1945 tag_info.is_escape = is_escape;
1946 if (i == trans->data_cnt - 1)
1947 tag_info.last_tag = true;
1949 tag_info.last_tag = false;
1951 tag_info.checksum = checksum;
1954 memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
1957 rc = jbd_write_block_tag(journal->jbd_fs,
1962 jbd_meta_csum_set(journal->jbd_fs, bhdr);
1964 rc = jbd_block_set(journal->jbd_fs, &desc_block);
1971 data_iblock = jbd_journal_alloc_block(journal, trans);
1972 rc = jbd_block_get_noread(journal->jbd_fs, &data_block, data_iblock);
1975 ext4_bcache_clear_dirty(desc_block.buf);
1976 jbd_block_set(journal->jbd_fs, &desc_block);
1980 data = data_block.data;
1981 memcpy(data, jbd_buf->block.data,
1982 journal->block_size);
1984 ((struct jbd_bhdr *)data)->magic = 0;
1986 ext4_bcache_set_dirty(data_block.buf);
1987 ext4_bcache_set_flag(data_block.buf, BC_TMP);
1988 rc = jbd_block_set(journal->jbd_fs, &data_block);
1991 ext4_bcache_clear_dirty(desc_block.buf);
1992 jbd_block_set(journal->jbd_fs, &desc_block);
1995 jbd_buf->jbd_lba = data_iblock;
1997 tag_ptr += tag_info.tag_bytes;
1998 tag_tbl_size -= tag_info.tag_bytes;
2002 if (rc == EOK && desc_iblock) {
2003 jbd_meta_csum_set(journal->jbd_fs,
2004 (struct jbd_bhdr *)bhdr);
2005 trans->data_csum = checksum;
2006 rc = jbd_block_set(journal->jbd_fs, &desc_block);
2012 /**@brief Write revoke block for a transaction
2013 * @param journal current journal session
2014 * @param trans transaction
2015 * @return standard error code*/
2017 jbd_journal_prepare_revoke(struct jbd_journal *journal,
2018 struct jbd_trans *trans)
2020 int rc = EOK, i = 0;
2021 struct ext4_block desc_block = EXT4_BLOCK_ZERO();
2022 int32_t tag_tbl_size = 0;
2023 uint32_t desc_iblock = 0;
2024 char *blocks_entry = NULL;
2025 struct jbd_revoke_rec *rec, *tmp;
2026 struct jbd_revoke_header *header = NULL;
2027 int32_t record_len = 4;
2028 struct jbd_bhdr *bhdr = NULL;
2030 if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
2031 JBD_FEATURE_INCOMPAT_64BIT))
2034 RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
2038 desc_iblock = jbd_journal_alloc_block(journal, trans);
2039 rc = jbd_block_get_noread(journal->jbd_fs, &desc_block,
2044 bhdr = (struct jbd_bhdr *)desc_block.data;
2045 jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
2046 jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
2047 jbd_set32(bhdr, sequence, trans->trans_id);
2049 header = (struct jbd_revoke_header *)bhdr;
2050 blocks_entry = (char *)(header + 1);
2051 tag_tbl_size = journal->block_size -
2052 sizeof(struct jbd_revoke_header);
2054 if (jbd_has_csum(&journal->jbd_fs->sb))
2055 tag_tbl_size -= sizeof(struct jbd_block_tail);
2057 if (!trans->start_iblock)
2058 trans->start_iblock = desc_iblock;
2060 ext4_bcache_set_dirty(desc_block.buf);
2061 ext4_bcache_set_flag(desc_block.buf, BC_TMP);
2064 if (tag_tbl_size < record_len) {
2065 jbd_set32(header, count,
2066 journal->block_size - tag_tbl_size);
2067 jbd_meta_csum_set(journal->jbd_fs, bhdr);
2071 rc = jbd_block_set(journal->jbd_fs, &desc_block);
2077 if (record_len == 8) {
2079 (uint64_t *)blocks_entry;
2080 *blocks = to_be64(rec->lba);
2083 (uint32_t *)blocks_entry;
2084 *blocks = to_be32((uint32_t)rec->lba);
2086 blocks_entry += record_len;
2087 tag_tbl_size -= record_len;
2091 if (rc == EOK && desc_iblock) {
2093 jbd_set32(header, count,
2094 journal->block_size - tag_tbl_size);
2096 jbd_meta_csum_set(journal->jbd_fs, bhdr);
2097 rc = jbd_block_set(journal->jbd_fs, &desc_block);
2103 /**@brief Put references of block descriptors in a transaction.
2104 * @param journal current journal session
2105 * @param trans transaction*/
2106 void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
2108 struct jbd_buf *jbd_buf, *tmp;
2109 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
2110 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
2112 struct ext4_block block = jbd_buf->block;
2113 ext4_block_set(fs->bdev, &block);
2117 /**@brief Update the start block of the journal when
2118 * all the contents in a transaction reach the disk.*/
2119 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
2120 struct ext4_buf *buf,
2124 struct jbd_buf *jbd_buf = arg;
2125 struct jbd_trans *trans = jbd_buf->trans;
2126 struct jbd_block_rec *block_rec = jbd_buf->block_rec;
2127 struct jbd_journal *journal = trans->journal;
2128 bool first_in_queue =
2129 trans == TAILQ_FIRST(&journal->cp_queue);
2133 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
2134 TAILQ_REMOVE(&block_rec->dirty_buf_queue,
2138 jbd_trans_finish_callback(journal,
2143 if (block_rec->trans == trans && buf) {
2144 /* Clear the end_write and end_write_arg fields. */
2145 buf->end_write = NULL;
2146 buf->end_write_arg = NULL;
2151 trans->written_cnt++;
2152 if (trans->written_cnt == trans->data_cnt) {
2153 /* If it is the first transaction on checkpoint queue,
2154 * we will shift the start of the journal to the next
2155 * transaction, and remove subsequent written
2156 * transactions from checkpoint queue until we find
2157 * an unwritten one. */
2158 if (first_in_queue) {
2159 journal->start = trans->start_iblock +
2160 trans->alloc_blocks;
2161 wrap(&journal->jbd_fs->sb, journal->start);
2162 journal->trans_id = trans->trans_id + 1;
2163 TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
2164 jbd_journal_free_trans(journal, trans, false);
2166 jbd_journal_purge_cp_trans(journal, false, true);
2167 jbd_journal_write_sb(journal);
2168 jbd_write_sb(journal->jbd_fs);
2173 /**@brief Commit a transaction to the journal immediately.
2174 * @param journal current journal session
2175 * @param trans transaction
2176 * @return standard error code*/
2177 static int __jbd_journal_commit_trans(struct jbd_journal *journal,
2178 struct jbd_trans *trans)
2181 uint32_t last = journal->last;
2182 struct jbd_revoke_rec *rec, *tmp;
2184 trans->trans_id = journal->alloc_trans_id;
2185 rc = jbd_journal_prepare(journal, trans);
2189 rc = jbd_journal_prepare_revoke(journal, trans);
2193 if (TAILQ_EMPTY(&trans->buf_queue) &&
2194 RB_EMPTY(&trans->revoke_root)) {
2195 /* Since there are no entries in both buffer list
2196 * and revoke entry list, we do not consider trans as
2197 * complete transaction and just return EOK.*/
2198 jbd_journal_free_trans(journal, trans, false);
2202 rc = jbd_trans_write_commit_block(trans);
2206 journal->alloc_trans_id++;
2208 /* Complete the checkpoint of buffers which are revoked. */
2209 RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
2211 struct jbd_block_rec *block_rec =
2212 jbd_trans_block_rec_lookup(journal, rec->lba);
2213 struct jbd_buf *jbd_buf = NULL;
2215 jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,
2218 struct ext4_buf *buf;
2219 struct ext4_block block = EXT4_BLOCK_ZERO();
2221 * We do this to reset the ext4_buf::end_write and
2222 * ext4_buf::end_write_arg fields so that the checkpoint
2223 * callback won't be triggered again.
2225 buf = ext4_bcache_find_get(journal->jbd_fs->bdev->bc,
2227 jbd_buf->block_rec->lba);
2228 jbd_trans_end_write(journal->jbd_fs->bdev->bc,
2233 ext4_block_set(journal->jbd_fs->bdev, &block);
2237 if (TAILQ_EMPTY(&journal->cp_queue)) {
2239 * This transaction is going to be the first object in the
2241 * When the first transaction in checkpoint queue is completely
2242 * written to disk, we shift the tail of the log to right.
2244 if (trans->data_cnt) {
2245 journal->start = trans->start_iblock;
2246 wrap(&journal->jbd_fs->sb, journal->start);
2247 journal->trans_id = trans->trans_id;
2248 jbd_journal_write_sb(journal);
2249 jbd_write_sb(journal->jbd_fs);
2250 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
2252 jbd_journal_cp_trans(journal, trans);
2254 journal->start = trans->start_iblock +
2255 trans->alloc_blocks;
2256 wrap(&journal->jbd_fs->sb, journal->start);
2257 journal->trans_id = trans->trans_id + 1;
2258 jbd_journal_write_sb(journal);
2259 jbd_journal_free_trans(journal, trans, false);
2262 /* No need to do anything to the JBD superblock. */
2263 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
2265 if (trans->data_cnt)
2266 jbd_journal_cp_trans(journal, trans);
2269 if (rc != EOK && rc != ENOSPC) {
2270 journal->last = last;
2271 jbd_journal_free_trans(journal, trans, true);
2276 /**@brief Allocate a new transaction
2277 * @param journal current journal session
2278 * @return transaction allocated*/
2280 jbd_journal_new_trans(struct jbd_journal *journal)
2282 struct jbd_trans *trans = NULL;
2283 trans = ext4_calloc(1, sizeof(struct jbd_trans));
2287 /* We will assign a trans_id to this transaction,
2288 * once it has been committed.*/
2289 trans->journal = journal;
2290 trans->data_csum = EXT4_CRC32_INIT;
2292 TAILQ_INIT(&trans->buf_queue);
2296 /**@brief Commit a transaction to the journal immediately.
2297 * @param journal current journal session
2298 * @param trans transaction
2299 * @return standard error code*/
2300 int jbd_journal_commit_trans(struct jbd_journal *journal,
2301 struct jbd_trans *trans)
2304 r = __jbd_journal_commit_trans(journal, trans);