From: Kaho Ng Date: Wed, 27 Apr 2016 11:11:31 +0000 (+0800) Subject: ext4_journal: numorous changes. X-Git-Url: https://git.carlh.net/gitweb/?a=commitdiff_plain;h=8281a97813ef19452be1ce6ec1a6c40e483f12f1;p=lwext4.git ext4_journal: numorous changes. - Do not utilize jbd_block_rec::buf because it can be unreliable that jbd_block_rec::buf may be freed. - Do not need to flush buffers before a new transaction is going to reference them. - add some comments to the changes. --- diff --git a/include/ext4_bcache.h b/include/ext4_bcache.h index bceb75a..b7ae438 100644 --- a/include/ext4_bcache.h +++ b/include/ext4_bcache.h @@ -239,6 +239,12 @@ struct ext4_buf *ext4_buf_lowest_lru(struct ext4_bcache *bc); * @param buf buffer*/ void ext4_bcache_drop_buf(struct ext4_bcache *bc, struct ext4_buf *buf); +/**@brief Invalidate a buffer. + * @param bc block cache descriptor + * @param buf buffer*/ +void ext4_bcache_invalidate_buf(struct ext4_bcache *bc, + struct ext4_buf *buf); + /**@brief Invalidate a range of buffers. * @param bc block cache descriptor * @param from starting lba diff --git a/include/ext4_blockdev.h b/include/ext4_blockdev.h index f6a6d36..92759e4 100644 --- a/include/ext4_blockdev.h +++ b/include/ext4_blockdev.h @@ -124,6 +124,8 @@ struct ext4_blockdev { /**@brief The filesystem this block device belongs to. */ struct ext4_fs *fs; + + void *journal; }; /**@brief Static initialization of the block device.*/ diff --git a/include/ext4_journal.h b/include/ext4_journal.h index 6961e15..76a4a87 100644 --- a/include/ext4_journal.h +++ b/include/ext4_journal.h @@ -47,7 +47,6 @@ extern "C" { #include "misc/tree.h" struct jbd_fs { - /* If journal block device is used, bdev will be non-null */ struct ext4_blockdev *bdev; struct ext4_inode_ref inode_ref; struct jbd_sb sb; @@ -56,7 +55,7 @@ struct jbd_fs { }; struct jbd_buf { - uint64_t jbd_lba; + uint32_t jbd_lba; struct ext4_block block; struct jbd_trans *trans; struct jbd_block_rec *block_rec; @@ -66,12 +65,11 @@ struct jbd_buf { struct jbd_revoke_rec { ext4_fsblk_t lba; - LIST_ENTRY(jbd_revoke_rec) revoke_node; + RB_ENTRY(jbd_revoke_rec) revoke_node; }; struct jbd_block_rec { ext4_fsblk_t lba; - struct ext4_buf *buf; struct jbd_trans *trans; RB_ENTRY(jbd_block_rec) block_rec_node; LIST_ENTRY(jbd_block_rec) tbrec_node; @@ -91,7 +89,7 @@ struct jbd_trans { struct jbd_journal *journal; TAILQ_HEAD(jbd_trans_buf, jbd_buf) buf_queue; - LIST_HEAD(jbd_revoke_list, jbd_revoke_rec) revoke_list; + RB_HEAD(jbd_revoke_tree, jbd_revoke_rec) revoke_root; LIST_HEAD(jbd_trans_block_rec, jbd_block_rec) tbrec_list; TAILQ_ENTRY(jbd_trans) trans_node; }; @@ -105,7 +103,6 @@ struct jbd_journal { uint32_t block_size; - TAILQ_HEAD(jbd_trans_queue, jbd_trans) trans_queue; TAILQ_HEAD(jbd_cp_queue, jbd_trans) cp_queue; RB_HEAD(jbd_block, jbd_block_rec) block_rec_root; @@ -122,10 +119,8 @@ int jbd_recover(struct jbd_fs *jbd_fs); int jbd_journal_start(struct jbd_fs *jbd_fs, struct jbd_journal *journal); int jbd_journal_stop(struct jbd_journal *journal); -struct jbd_trans *jbd_journal_new_trans(struct jbd_journal *journal); -int jbd_trans_get_access(struct jbd_journal *journal, - struct jbd_trans *trans, - struct ext4_block *block); +struct jbd_trans * +jbd_journal_new_trans(struct jbd_journal *journal); int jbd_trans_set_block_dirty(struct jbd_trans *trans, struct ext4_block *block); int jbd_trans_revoke_block(struct jbd_trans *trans, @@ -137,6 +132,10 @@ void jbd_journal_free_trans(struct jbd_journal *journal, bool abort); int jbd_journal_commit_trans(struct jbd_journal *journal, struct jbd_trans *trans); +void +jbd_journal_purge_cp_trans(struct jbd_journal *journal, + bool flush, + bool once); #ifdef __cplusplus } diff --git a/src/ext4_balloc.c b/src/ext4_balloc.c index 4b17c29..73f5631 100644 --- a/src/ext4_balloc.c +++ b/src/ext4_balloc.c @@ -225,7 +225,9 @@ int ext4_balloc_free_block(struct ext4_inode_ref *inode_ref, ext4_fsblk_t baddr) } ext4_bcache_invalidate_lba(fs->bdev->bc, baddr, 1); /* Release block group reference */ - return ext4_fs_put_block_group_ref(&bg_ref); + rc = ext4_fs_put_block_group_ref(&bg_ref); + + return rc; } int ext4_balloc_free_blocks(struct ext4_inode_ref *inode_ref, @@ -342,6 +344,7 @@ int ext4_balloc_free_blocks(struct ext4_inode_ref *inode_ref, ext4_bcache_invalidate_lba(fs->bdev->bc, first, count); /*All blocks should be released*/ ext4_assert(count == 0); + return rc; } diff --git a/src/ext4_bcache.c b/src/ext4_bcache.c index 1a1766a..e1e3b0c 100644 --- a/src/ext4_bcache.c +++ b/src/ext4_bcache.c @@ -177,6 +177,19 @@ void ext4_bcache_drop_buf(struct ext4_bcache *bc, struct ext4_buf *buf) bc->ref_blocks--; } +void ext4_bcache_invalidate_buf(struct ext4_bcache *bc, + struct ext4_buf *buf) +{ + buf->end_write = NULL; + buf->end_write_arg = NULL; + + /* Clear both dirty and up-to-date flags. */ + if (ext4_bcache_test_flag(buf, BC_DIRTY)) + ext4_bcache_remove_dirty_node(bc, buf); + + ext4_bcache_clear_dirty(buf); +} + void ext4_bcache_invalidate_lba(struct ext4_bcache *bc, uint64_t from, uint32_t cnt) @@ -187,13 +200,7 @@ void ext4_bcache_invalidate_lba(struct ext4_bcache *bc, if (buf->lba > end) break; - /* Clear both dirty and up-to-date flags. */ - if (ext4_bcache_test_flag(buf, BC_DIRTY)) - ext4_bcache_remove_dirty_node(bc, buf); - - buf->end_write = NULL; - buf->end_write_arg = NULL; - ext4_bcache_clear_dirty(buf); + ext4_bcache_invalidate_buf(bc, buf); } } diff --git a/src/ext4_blockdev.c b/src/ext4_blockdev.c index 4b33cd2..19653bb 100644 --- a/src/ext4_blockdev.c +++ b/src/ext4_blockdev.c @@ -41,6 +41,8 @@ #include "ext4_debug.h" #include "ext4_blockdev.h" +#include "ext4_fs.h" +#include "ext4_journal.h" #include #include @@ -188,6 +190,8 @@ int ext4_block_cache_shake(struct ext4_blockdev *bdev) if (bdev->bc->dont_shake) return EOK; + bdev->bc->dont_shake = true; + while (!RB_EMPTY(&bdev->bc->lru_root) && ext4_bcache_is_full(bdev->bc)) { @@ -202,6 +206,7 @@ int ext4_block_cache_shake(struct ext4_blockdev *bdev) ext4_bcache_drop_buf(bdev->bc, buf); } + bdev->bc->dont_shake = false; return r; } diff --git a/src/ext4_journal.c b/src/ext4_journal.c index 1b5a99e..9fe984e 100644 --- a/src/ext4_journal.c +++ b/src/ext4_journal.c @@ -124,10 +124,22 @@ jbd_block_rec_cmp(struct jbd_block_rec *a, struct jbd_block_rec *b) return 0; } +static int +jbd_revoke_rec_cmp(struct jbd_revoke_rec *a, struct jbd_revoke_rec *b) +{ + if (a->lba > b->lba) + return 1; + else if (a->lba < b->lba) + return -1; + return 0; +} + RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node, jbd_revoke_entry_cmp, static inline) RB_GENERATE_INTERNAL(jbd_block, jbd_block_rec, block_rec_node, jbd_block_rec_cmp, static inline) +RB_GENERATE_INTERNAL(jbd_revoke_tree, jbd_revoke_rec, revoke_node, + jbd_revoke_rec_cmp, static inline) #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry)) #define jbd_free_revoke_entry(addr) free(addr) @@ -456,6 +468,9 @@ int jbd_get_fs(struct ext4_fs *fs, rc = EIO; } + if (rc == EOK) + jbd_fs->bdev = fs->bdev; + return rc; } @@ -499,6 +514,7 @@ static int jbd_block_get(struct jbd_fs *jbd_fs, { /* TODO: journal device. */ int rc; + struct ext4_blockdev *bdev = jbd_fs->bdev; ext4_lblk_t iblock = (ext4_lblk_t)fblock; /* Lookup the logical block address of @@ -508,7 +524,6 @@ static int jbd_block_get(struct jbd_fs *jbd_fs, if (rc != EOK) return rc; - struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev; rc = ext4_block_get(bdev, block, fblock); /* If succeeded, mark buffer as BC_FLUSH to indicate @@ -534,13 +549,13 @@ static int jbd_block_get_noread(struct jbd_fs *jbd_fs, { /* TODO: journal device. */ int rc; + struct ext4_blockdev *bdev = jbd_fs->bdev; ext4_lblk_t iblock = (ext4_lblk_t)fblock; rc = jbd_inode_bmap(jbd_fs, iblock, &fblock); if (rc != EOK) return rc; - struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev; rc = ext4_block_get_noread(bdev, block, fblock); if (rc == EOK) ext4_bcache_set_flag(block->buf, BC_FLUSH); @@ -555,8 +570,8 @@ static int jbd_block_get_noread(struct jbd_fs *jbd_fs, static int jbd_block_set(struct jbd_fs *jbd_fs, struct ext4_block *block) { - return ext4_block_set(jbd_fs->inode_ref.fs->bdev, - block); + struct ext4_blockdev *bdev = jbd_fs->bdev; + return ext4_block_set(bdev, block); } /**@brief helper functions to calculate @@ -1206,7 +1221,7 @@ int jbd_recover(struct jbd_fs *jbd_fs) features_incompatible, features_incompatible); jbd_fs->dirty = true; - r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev, + r = ext4_sb_write(jbd_fs->bdev, &jbd_fs->inode_ref.fs->sb); } jbd_destroy_revoke_tree(&info); @@ -1237,7 +1252,7 @@ int jbd_journal_start(struct jbd_fs *jbd_fs, ext4_set32(&jbd_fs->inode_ref.fs->sb, features_incompatible, features_incompatible); - r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev, + r = ext4_sb_write(jbd_fs->bdev, &jbd_fs->inode_ref.fs->sb); if (r != EOK) return r; @@ -1265,12 +1280,16 @@ int jbd_journal_start(struct jbd_fs *jbd_fs, return r; } - TAILQ_INIT(&journal->trans_queue); TAILQ_INIT(&journal->cp_queue); RB_INIT(&journal->block_rec_root); journal->jbd_fs = jbd_fs; jbd_journal_write_sb(journal); - return jbd_write_sb(jbd_fs); + r = jbd_write_sb(jbd_fs); + if (r != EOK) + return r; + + jbd_fs->bdev->journal = journal; + return EOK; } static void jbd_trans_end_write(struct ext4_bcache *bc __unused, @@ -1278,6 +1297,8 @@ static void jbd_trans_end_write(struct ext4_bcache *bc __unused, int res, void *arg); +/* + * This routine is only suitable to committed transactions. */ static void jbd_journal_flush_trans(struct jbd_trans *trans) { struct jbd_buf *jbd_buf, *tmp; @@ -1288,25 +1309,29 @@ static void jbd_journal_flush_trans(struct jbd_trans *trans) TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) { - struct ext4_buf *buf = jbd_buf->block_rec->buf; - /* The buffer in memory is still dirty. */ - if (buf) { - if (jbd_buf->block_rec->trans != trans) { - int r; - struct ext4_block jbd_block = EXT4_BLOCK_ZERO(); - ext4_assert(ext4_block_get(fs->bdev, - &jbd_block, - jbd_buf->jbd_lba) == EOK); - memcpy(tmp_data, jbd_block.data, - journal->block_size); - ext4_block_set(fs->bdev, &jbd_block); - r = ext4_blocks_set_direct(fs->bdev, tmp_data, - buf->lba, 1); - jbd_trans_end_write(fs->bdev->bc, buf, r, jbd_buf); - } else - ext4_block_flush_buf(fs->bdev, buf); + struct ext4_buf *buf; + struct ext4_block block; + /* The buffer is not yet flushed. */ + buf = ext4_bcache_find_get(fs->bdev->bc, &block, + jbd_buf->block_rec->lba); + if (!(buf && ext4_bcache_test_flag(buf, BC_UPTODATE) && + jbd_buf->block_rec->trans == trans)) { + int r; + struct ext4_block jbd_block = EXT4_BLOCK_ZERO(); + ext4_assert(jbd_block_get(journal->jbd_fs, + &jbd_block, + jbd_buf->jbd_lba) == EOK); + memcpy(tmp_data, jbd_block.data, + journal->block_size); + ext4_block_set(fs->bdev, &jbd_block); + r = ext4_blocks_set_direct(fs->bdev, tmp_data, + jbd_buf->block_rec->lba, 1); + jbd_trans_end_write(fs->bdev->bc, buf, r, jbd_buf); + } else + ext4_block_flush_buf(fs->bdev, buf); - } + if (buf) + ext4_block_set(fs->bdev, &block); } free(tmp_data); @@ -1325,7 +1350,7 @@ jbd_journal_skip_pure_revoke(struct jbd_journal *journal, jbd_journal_write_sb(journal); } -static void +void jbd_journal_purge_cp_trans(struct jbd_journal *journal, bool flush, bool once) @@ -1398,7 +1423,7 @@ int jbd_journal_stop(struct jbd_journal *journal) ext4_set32(&jbd_fs->inode_ref.fs->sb, features_incompatible, features_incompatible); - r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev, + r = ext4_sb_write(jbd_fs->bdev, &jbd_fs->inode_ref.fs->sb); if (r != EOK) return r; @@ -1430,50 +1455,6 @@ static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal, return start_block; } -/**@brief Allocate a new transaction - * @param journal current journal session - * @return transaction allocated*/ -struct jbd_trans * -jbd_journal_new_trans(struct jbd_journal *journal) -{ - struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans)); - if (!trans) - return NULL; - - /* We will assign a trans_id to this transaction, - * once it has been committed.*/ - trans->journal = journal; - trans->data_csum = EXT4_CRC32_INIT; - trans->error = EOK; - TAILQ_INIT(&trans->buf_queue); - return trans; -} - -/**@brief gain access to it before making any modications. - * @param journal current journal session - * @param trans transaction - * @param block descriptor - * @return standard error code.*/ -int jbd_trans_get_access(struct jbd_journal *journal, - struct jbd_trans *trans, - struct ext4_block *block) -{ - int r = EOK; - struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs; - struct jbd_buf *jbd_buf = block->buf->end_write_arg; - - /* If the buffer has already been modified, we should - * flush dirty data in this buffer to disk.*/ - if (ext4_bcache_test_flag(block->buf, BC_DIRTY) && - block->buf->end_write == jbd_trans_end_write) { - ext4_assert(jbd_buf); - if (jbd_buf->trans != trans) - r = ext4_block_flush_buf(fs->bdev, block->buf); - - } - return r; -} - static struct jbd_block_rec * jbd_trans_block_rec_lookup(struct jbd_journal *journal, ext4_fsblk_t lba) @@ -1489,25 +1470,24 @@ jbd_trans_block_rec_lookup(struct jbd_journal *journal, static void jbd_trans_change_ownership(struct jbd_block_rec *block_rec, - struct jbd_trans *new_trans, - struct ext4_buf *new_buf) + struct jbd_trans *new_trans) { LIST_REMOVE(block_rec, tbrec_node); - /* Now this block record belongs to this transaction. */ - LIST_INSERT_HEAD(&new_trans->tbrec_list, block_rec, tbrec_node); + if (new_trans) { + /* Now this block record belongs to this transaction. */ + LIST_INSERT_HEAD(&new_trans->tbrec_list, block_rec, tbrec_node); + } block_rec->trans = new_trans; - block_rec->buf = new_buf; } static inline struct jbd_block_rec * jbd_trans_insert_block_rec(struct jbd_trans *trans, - ext4_fsblk_t lba, - struct ext4_buf *buf) + ext4_fsblk_t lba) { struct jbd_block_rec *block_rec; block_rec = jbd_trans_block_rec_lookup(trans->journal, lba); if (block_rec) { - jbd_trans_change_ownership(block_rec, trans, buf); + jbd_trans_change_ownership(block_rec, trans); return block_rec; } block_rec = calloc(1, sizeof(struct jbd_block_rec)); @@ -1515,7 +1495,6 @@ jbd_trans_insert_block_rec(struct jbd_trans *trans, return NULL; block_rec->lba = lba; - block_rec->buf = buf; block_rec->trans = trans; TAILQ_INIT(&block_rec->dirty_buf_queue); LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node); @@ -1523,11 +1502,15 @@ jbd_trans_insert_block_rec(struct jbd_trans *trans, return block_rec; } +/* + * This routine will do the dirty works. + */ static void jbd_trans_finish_callback(struct jbd_journal *journal, const struct jbd_trans *trans, struct jbd_block_rec *block_rec, - bool abort) + bool abort, + bool revoke) { struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs; if (block_rec->trans != trans) @@ -1539,42 +1522,49 @@ jbd_trans_finish_callback(struct jbd_journal *journal, &block_rec->dirty_buf_queue, dirty_buf_node, tmp) { - /* All we need is a fake ext4_buf. */ - struct ext4_buf buf; - jbd_trans_end_write(fs->bdev->bc, - &buf, + NULL, EOK, jbd_buf); } } else { + /* + * We have to roll back data if the block is going to be + * aborted. + */ struct jbd_buf *jbd_buf; struct ext4_block jbd_block = EXT4_BLOCK_ZERO(), block = EXT4_BLOCK_ZERO(); jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue, jbd_buf_dirty); if (jbd_buf) { - ext4_assert(ext4_block_get(fs->bdev, - &jbd_block, - jbd_buf->jbd_lba) == EOK); - ext4_assert(ext4_block_get_noread(fs->bdev, - &block, - block_rec->lba) == EOK); - memcpy(block.data, jbd_block.data, - journal->block_size); + if (!revoke) { + ext4_assert(ext4_block_get_noread(fs->bdev, + &block, + block_rec->lba) == EOK); + ext4_assert(jbd_block_get(journal->jbd_fs, + &jbd_block, + jbd_buf->jbd_lba) == EOK); + memcpy(block.data, jbd_block.data, + journal->block_size); - jbd_trans_change_ownership(block_rec, - jbd_buf->trans, block.buf); + jbd_trans_change_ownership(block_rec, + jbd_buf->trans); - block.buf->end_write = jbd_trans_end_write; - block.buf->end_write_arg = jbd_buf; + block.buf->end_write = jbd_trans_end_write; + block.buf->end_write_arg = jbd_buf; - ext4_bcache_set_flag(jbd_block.buf, BC_TMP); - ext4_bcache_set_dirty(block.buf); + ext4_bcache_set_flag(jbd_block.buf, BC_TMP); + ext4_bcache_set_dirty(block.buf); - ext4_block_set(fs->bdev, &jbd_block); - ext4_block_set(fs->bdev, &block); - return; + ext4_block_set(fs->bdev, &jbd_block); + ext4_block_set(fs->bdev, &block); + return; + } else { + /* The revoked buffer is yet written. */ + jbd_trans_change_ownership(block_rec, + jbd_buf->trans); + } } } } @@ -1603,6 +1593,9 @@ int jbd_trans_set_block_dirty(struct jbd_trans *trans, struct ext4_block *block) { struct jbd_buf *jbd_buf; + struct jbd_revoke_rec *rec, tmp_rec = { + .lba = block->lb_id + }; struct jbd_block_rec *block_rec; if (block->buf->end_write == jbd_trans_end_write) { @@ -1615,8 +1608,7 @@ int jbd_trans_set_block_dirty(struct jbd_trans *trans, return ENOMEM; if ((block_rec = jbd_trans_insert_block_rec(trans, - block->lb_id, - block->buf)) == NULL) { + block->lb_id)) == NULL) { free(jbd_buf); return ENOMEM; } @@ -1639,6 +1631,13 @@ int jbd_trans_set_block_dirty(struct jbd_trans *trans, TAILQ_INSERT_HEAD(&trans->buf_queue, jbd_buf, buf_node); ext4_bcache_set_dirty(block->buf); + rec = RB_FIND(jbd_revoke_tree, + &trans->revoke_root, + &tmp_rec); + if (rec) + RB_REMOVE(jbd_revoke_tree, &trans->revoke_root, + rec); + return EOK; } @@ -1655,7 +1654,7 @@ int jbd_trans_revoke_block(struct jbd_trans *trans, return ENOMEM; rec->lba = lba; - LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node); + RB_INSERT(jbd_revoke_tree, &trans->revoke_root, rec); return EOK; } @@ -1668,23 +1667,22 @@ int jbd_trans_revoke_block(struct jbd_trans *trans, int jbd_trans_try_revoke_block(struct jbd_trans *trans, ext4_fsblk_t lba) { - int r = EOK; struct jbd_journal *journal = trans->journal; - struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs; struct jbd_block_rec *block_rec = jbd_trans_block_rec_lookup(journal, lba); - /* Make sure we don't flush any buffers belong to this transaction. */ - if (block_rec && block_rec->trans != trans) { - /* If the buffer has not been flushed yet, flush it now. */ - if (block_rec->buf) { - r = ext4_block_flush_buf(fs->bdev, block_rec->buf); - if (r != EOK) - return r; - - } + if (block_rec) { + if (block_rec->trans == trans) { + struct jbd_buf *jbd_buf = + TAILQ_LAST(&block_rec->dirty_buf_queue, + jbd_buf_dirty); + /* If there are still unwritten buffers. */ + if (TAILQ_FIRST(&block_rec->dirty_buf_queue) != + jbd_buf) + jbd_trans_revoke_block(trans, lba); - jbd_trans_revoke_block(trans, lba); + } else + jbd_trans_revoke_block(trans, lba); } return EOK; @@ -1719,13 +1717,14 @@ void jbd_journal_free_trans(struct jbd_journal *journal, jbd_trans_finish_callback(journal, trans, block_rec, - abort); + abort, + false); TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node); free(jbd_buf); } - LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node, + RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root, tmp2) { - LIST_REMOVE(rec, revoke_node); + RB_REMOVE(jbd_revoke_tree, &trans->revoke_root, rec); free(rec); } LIST_FOREACH_SAFE(block_rec, &trans->tbrec_list, tbrec_node, @@ -1742,18 +1741,17 @@ void jbd_journal_free_trans(struct jbd_journal *journal, static int jbd_trans_write_commit_block(struct jbd_trans *trans) { int rc; + struct ext4_block block; struct jbd_commit_header *header; - uint32_t commit_iblock = 0; - struct ext4_block commit_block; + uint32_t commit_iblock, orig_commit_iblock; struct jbd_journal *journal = trans->journal; commit_iblock = jbd_journal_alloc_block(journal, trans); - rc = jbd_block_get_noread(journal->jbd_fs, - &commit_block, commit_iblock); + rc = jbd_block_get_noread(journal->jbd_fs, &block, commit_iblock); if (rc != EOK) return rc; - header = (struct jbd_commit_header *)commit_block.data; + header = (struct jbd_commit_header *)block.data; jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER); jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK); jbd_set32(&header->header, sequence, trans->trans_id); @@ -1765,12 +1763,29 @@ static int jbd_trans_write_commit_block(struct jbd_trans *trans) jbd_set32(header, chksum[0], trans->data_csum); } jbd_commit_csum_set(journal->jbd_fs, header); - ext4_bcache_set_dirty(commit_block.buf); - rc = jbd_block_set(journal->jbd_fs, &commit_block); + ext4_bcache_set_dirty(block.buf); + ext4_bcache_set_flag(block.buf, BC_TMP); + rc = jbd_block_set(journal->jbd_fs, &block); if (rc != EOK) return rc; - return EOK; + orig_commit_iblock = commit_iblock; + commit_iblock++; + wrap(&journal->jbd_fs->sb, commit_iblock); + + /* To prevent accidental reference to stale journalling metadata. */ + if (orig_commit_iblock < commit_iblock) { + rc = jbd_block_get_noread(journal->jbd_fs, &block, commit_iblock); + if (rc != EOK) + return rc; + + memset(block.data, 0, journal->block_size); + ext4_bcache_set_dirty(block.buf); + ext4_bcache_set_flag(block.buf, BC_TMP); + rc = jbd_block_set(journal->jbd_fs, &block); + } + + return rc; } /**@brief Write descriptor block for a transaction @@ -1781,19 +1796,25 @@ static int jbd_journal_prepare(struct jbd_journal *journal, struct jbd_trans *trans) { int rc = EOK, i = 0; + struct ext4_block desc_block = EXT4_BLOCK_ZERO(), + data_block = EXT4_BLOCK_ZERO(); int32_t tag_tbl_size = 0; uint32_t desc_iblock = 0; uint32_t data_iblock = 0; char *tag_start = NULL, *tag_ptr = NULL; struct jbd_buf *jbd_buf, *tmp; - struct ext4_block desc_block, data_block; struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs; uint32_t checksum = EXT4_CRC32_INIT; + struct jbd_bhdr *bhdr = NULL; + void *data; /* Try to remove any non-dirty buffers from the tail of * buf_queue. */ TAILQ_FOREACH_REVERSE_SAFE(jbd_buf, &trans->buf_queue, jbd_trans_buf, buf_node, tmp) { + struct jbd_revoke_rec tmp_rec = { + .lba = jbd_buf->block_rec->lba + }; /* We stop the iteration when we find a dirty buffer. */ if (ext4_bcache_test_flag(jbd_buf->block.buf, BC_DIRTY)) @@ -1808,12 +1829,12 @@ static int jbd_journal_prepare(struct jbd_journal *journal, jbd_trans_finish_callback(journal, trans, jbd_buf->block_rec, - true); - - /* The buffer has not been modified, just release - * that jbd_buf. */ + true, + RB_FIND(jbd_revoke_tree, + &trans->revoke_root, + &tmp_rec)); jbd_trans_remove_block_rec(journal, - jbd_buf->block_rec, trans); + jbd_buf->block_rec, trans); trans->data_cnt--; ext4_block_set(fs->bdev, &jbd_buf->block); @@ -1824,6 +1845,9 @@ static int jbd_journal_prepare(struct jbd_journal *journal, TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) { struct tag_info tag_info; bool uuid_exist = false; + struct jbd_revoke_rec tmp_rec = { + .lba = jbd_buf->block_rec->lba + }; if (!ext4_bcache_test_flag(jbd_buf->block.buf, BC_DIRTY)) { TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue, @@ -1832,13 +1856,16 @@ static int jbd_journal_prepare(struct jbd_journal *journal, jbd_buf->block.buf->end_write = NULL; jbd_buf->block.buf->end_write_arg = NULL; - jbd_trans_finish_callback(journal, - trans, - jbd_buf->block_rec, - true); /* The buffer has not been modified, just release * that jbd_buf. */ + jbd_trans_finish_callback(journal, + trans, + jbd_buf->block_rec, + true, + RB_FIND(jbd_revoke_tree, + &trans->revoke_root, + &tmp_rec)); jbd_trans_remove_block_rec(journal, jbd_buf->block_rec, trans); trans->data_cnt--; @@ -1854,15 +1881,11 @@ static int jbd_journal_prepare(struct jbd_journal *journal, trans->trans_id); again: if (!desc_iblock) { - struct jbd_bhdr *bhdr; desc_iblock = jbd_journal_alloc_block(journal, trans); - rc = jbd_block_get_noread(journal->jbd_fs, - &desc_block, desc_iblock); + rc = jbd_block_get_noread(journal->jbd_fs, &desc_block, desc_iblock); if (rc != EOK) break; - ext4_bcache_set_dirty(desc_block.buf); - bhdr = (struct jbd_bhdr *)desc_block.data; jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER); jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK); @@ -1880,6 +1903,8 @@ again: if (!trans->start_iblock) trans->start_iblock = desc_iblock; + ext4_bcache_set_dirty(desc_block.buf); + ext4_bcache_set_flag(desc_block.buf, BC_TMP); } tag_info.block = jbd_buf->block.lb_id; tag_info.uuid_exist = uuid_exist; @@ -1899,28 +1924,37 @@ again: tag_tbl_size, &tag_info); if (rc != EOK) { - jbd_meta_csum_set(journal->jbd_fs, - (struct jbd_bhdr *)desc_block.data); - jbd_block_set(journal->jbd_fs, &desc_block); + jbd_meta_csum_set(journal->jbd_fs, bhdr); desc_iblock = 0; + rc = jbd_block_set(journal->jbd_fs, &desc_block); + if (rc != EOK) + break; + goto again; } data_iblock = jbd_journal_alloc_block(journal, trans); - rc = jbd_block_get_noread(journal->jbd_fs, - &data_block, data_iblock); - if (rc != EOK) + rc = jbd_block_get_noread(journal->jbd_fs, &data_block, data_iblock); + if (rc != EOK) { + desc_iblock = 0; + ext4_bcache_clear_dirty(desc_block.buf); + jbd_block_set(journal->jbd_fs, &desc_block); break; + } - ext4_bcache_set_dirty(data_block.buf); - - memcpy(data_block.data, jbd_buf->block.data, + data = data_block.data; + memcpy(data, jbd_buf->block.data, journal->block_size); - jbd_buf->jbd_lba = data_block.lb_id; - + ext4_bcache_set_dirty(data_block.buf); + ext4_bcache_set_flag(data_block.buf, BC_TMP); rc = jbd_block_set(journal->jbd_fs, &data_block); - if (rc != EOK) + if (rc != EOK) { + desc_iblock = 0; + ext4_bcache_clear_dirty(desc_block.buf); + jbd_block_set(journal->jbd_fs, &desc_block); break; + } + jbd_buf->jbd_lba = data_iblock; tag_ptr += tag_info.tag_bytes; tag_tbl_size -= tag_info.tag_bytes; @@ -1929,9 +1963,9 @@ again: } if (rc == EOK && desc_iblock) { jbd_meta_csum_set(journal->jbd_fs, - (struct jbd_bhdr *)desc_block.data); + (struct jbd_bhdr *)bhdr); trans->data_csum = checksum; - jbd_block_set(journal->jbd_fs, &desc_block); + rc = jbd_block_set(journal->jbd_fs, &desc_block); } return rc; @@ -1946,31 +1980,28 @@ jbd_journal_prepare_revoke(struct jbd_journal *journal, struct jbd_trans *trans) { int rc = EOK, i = 0; + struct ext4_block desc_block = EXT4_BLOCK_ZERO(); int32_t tag_tbl_size = 0; uint32_t desc_iblock = 0; char *blocks_entry = NULL; struct jbd_revoke_rec *rec, *tmp; - struct ext4_block desc_block; struct jbd_revoke_header *header = NULL; int32_t record_len = 4; + struct jbd_bhdr *bhdr = NULL; if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb, JBD_FEATURE_INCOMPAT_64BIT)) record_len = 8; - LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node, + RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root, tmp) { again: if (!desc_iblock) { - struct jbd_bhdr *bhdr; desc_iblock = jbd_journal_alloc_block(journal, trans); - rc = jbd_block_get_noread(journal->jbd_fs, - &desc_block, desc_iblock); - if (rc != EOK) { + rc = jbd_block_get_noread(journal->jbd_fs, &desc_block, + desc_iblock); + if (rc != EOK) break; - } - - ext4_bcache_set_dirty(desc_block.buf); bhdr = (struct jbd_bhdr *)desc_block.data; jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER); @@ -1988,16 +2019,21 @@ again: if (!trans->start_iblock) trans->start_iblock = desc_iblock; + ext4_bcache_set_dirty(desc_block.buf); + ext4_bcache_set_flag(desc_block.buf, BC_TMP); } if (tag_tbl_size < record_len) { jbd_set32(header, count, journal->block_size - tag_tbl_size); - jbd_meta_csum_set(journal->jbd_fs, - (struct jbd_bhdr *)desc_block.data); - jbd_block_set(journal->jbd_fs, &desc_block); + jbd_meta_csum_set(journal->jbd_fs, bhdr); + bhdr = NULL; desc_iblock = 0; header = NULL; + rc = jbd_block_set(journal->jbd_fs, &desc_block); + if (rc != EOK) + break; + goto again; } if (record_len == 8) { @@ -2019,9 +2055,8 @@ again: jbd_set32(header, count, journal->block_size - tag_tbl_size); - jbd_meta_csum_set(journal->jbd_fs, - (struct jbd_bhdr *)desc_block.data); - jbd_block_set(journal->jbd_fs, &desc_block); + jbd_meta_csum_set(journal->jbd_fs, bhdr); + rc = jbd_block_set(journal->jbd_fs, &desc_block); } return rc; @@ -2065,9 +2100,9 @@ static void jbd_trans_end_write(struct ext4_bcache *bc __unused, jbd_trans_finish_callback(journal, trans, jbd_buf->block_rec, + false, false); - if (block_rec->trans == trans) { - block_rec->buf = NULL; + if (block_rec->trans == trans && buf) { /* Clear the end_write and end_write_arg fields. */ buf->end_write = NULL; buf->end_write_arg = NULL; @@ -2101,11 +2136,12 @@ static void jbd_trans_end_write(struct ext4_bcache *bc __unused, * @param journal current journal session * @param trans transaction * @return standard error code*/ -int jbd_journal_commit_trans(struct jbd_journal *journal, - struct jbd_trans *trans) +static int __jbd_journal_commit_trans(struct jbd_journal *journal, + struct jbd_trans *trans) { int rc = EOK; uint32_t last = journal->last; + struct jbd_revoke_rec *rec, *tmp; trans->trans_id = journal->alloc_trans_id; rc = jbd_journal_prepare(journal, trans); @@ -2117,7 +2153,7 @@ int jbd_journal_commit_trans(struct jbd_journal *journal, goto Finish; if (TAILQ_EMPTY(&trans->buf_queue) && - LIST_EMPTY(&trans->revoke_list)) { + RB_EMPTY(&trans->revoke_root)) { /* Since there are no entries in both buffer list * and revoke entry list, we do not consider trans as * complete transaction and just return EOK.*/ @@ -2130,7 +2166,43 @@ int jbd_journal_commit_trans(struct jbd_journal *journal, goto Finish; journal->alloc_trans_id++; + + /* Complete the checkpoint of buffers which are revoked. */ + RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root, + tmp) { + struct jbd_block_rec *block_rec = + jbd_trans_block_rec_lookup(journal, rec->lba); + struct jbd_buf *jbd_buf = NULL; + if (block_rec) + jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue, + jbd_buf_dirty); + if (jbd_buf) { + struct ext4_buf *buf; + struct ext4_block block = EXT4_BLOCK_ZERO(); + /* + * We do this to reset the ext4_buf::end_write and + * ext4_buf::end_write_arg fields so that the checkpoint + * callback won't be triggered again. + */ + buf = ext4_bcache_find_get(journal->jbd_fs->bdev->bc, + &block, + jbd_buf->block_rec->lba); + jbd_trans_end_write(journal->jbd_fs->bdev->bc, + buf, + EOK, + jbd_buf); + if (buf) + ext4_block_set(journal->jbd_fs->bdev, &block); + } + } + if (TAILQ_EMPTY(&journal->cp_queue)) { + /* + * This transaction is going to be the first object in the + * checkpoint queue. + * When the first transaction in checkpoint queue is completely + * written to disk, we shift the tail of the log to right. + */ if (trans->data_cnt) { journal->start = trans->start_iblock; wrap(&journal->jbd_fs->sb, journal->start); @@ -2149,20 +2221,52 @@ int jbd_journal_commit_trans(struct jbd_journal *journal, jbd_journal_free_trans(journal, trans, false); } } else { + /* No need to do anything to the JBD superblock. */ TAILQ_INSERT_TAIL(&journal->cp_queue, trans, trans_node); if (trans->data_cnt) jbd_journal_cp_trans(journal, trans); - } Finish: - if (rc != EOK) { + if (rc != EOK && rc != ENOSPC) { journal->last = last; jbd_journal_free_trans(journal, trans, true); } return rc; } +/**@brief Allocate a new transaction + * @param journal current journal session + * @return transaction allocated*/ +struct jbd_trans * +jbd_journal_new_trans(struct jbd_journal *journal) +{ + struct jbd_trans *trans = NULL; + trans = calloc(1, sizeof(struct jbd_trans)); + if (!trans) + return NULL; + + /* We will assign a trans_id to this transaction, + * once it has been committed.*/ + trans->journal = journal; + trans->data_csum = EXT4_CRC32_INIT; + trans->error = EOK; + TAILQ_INIT(&trans->buf_queue); + return trans; +} + +/**@brief Commit a transaction to the journal immediately. + * @param journal current journal session + * @param trans transaction + * @return standard error code*/ +int jbd_journal_commit_trans(struct jbd_journal *journal, + struct jbd_trans *trans) +{ + int r = EOK; + r = __jbd_journal_commit_trans(journal, trans); + return r; +} + /** * @} */ diff --git a/src/ext4_trans.c b/src/ext4_trans.c index cf7f2ca..7a91dbc 100644 --- a/src/ext4_trans.c +++ b/src/ext4_trans.c @@ -44,20 +44,6 @@ #include "ext4_fs.h" #include "ext4_journal.h" -static int ext4_trans_get_write_access(struct ext4_fs *fs __unused, - struct ext4_block *block __unused) -{ - int r = EOK; -#if CONFIG_JOURNALING_ENABLE - if (fs->jbd_journal && fs->curr_trans) { - struct jbd_journal *journal = fs->jbd_journal; - struct jbd_trans *trans = fs->curr_trans; - r = jbd_trans_get_access(journal, trans, block); - } -#endif - return r; -} - int ext4_trans_set_block_dirty(struct ext4_buf *buf) { int r = EOK; @@ -86,10 +72,6 @@ int ext4_trans_block_get_noread(struct ext4_blockdev *bdev, if (r != EOK) return r; - r = ext4_trans_get_write_access(bdev->fs, b); - if (r != EOK) - ext4_block_set(bdev, b); - return r; } @@ -101,10 +83,6 @@ int ext4_trans_block_get(struct ext4_blockdev *bdev, if (r != EOK) return r; - r = ext4_trans_get_write_access(bdev->fs, b); - if (r != EOK) - ext4_block_set(bdev, b); - return r; }