ext4_journal: numorous changes.
authorKaho Ng <ngkaho1234@gmail.com>
Wed, 27 Apr 2016 11:11:31 +0000 (19:11 +0800)
committerKaho Ng <ngkaho1234@gmail.com>
Sat, 30 Apr 2016 08:18:00 +0000 (16:18 +0800)
 - Do not utilize jbd_block_rec::buf because it can be unreliable that
   jbd_block_rec::buf may be freed.
 - Do not need to flush buffers before a new transaction is going to reference
   them.
 - add some comments to the changes.

include/ext4_bcache.h
include/ext4_blockdev.h
include/ext4_journal.h
src/ext4_balloc.c
src/ext4_bcache.c
src/ext4_blockdev.c
src/ext4_journal.c
src/ext4_trans.c

index bceb75a12fdb490091b04d3e2ed4b4fdc14889d3..b7ae43857c594de0525921b0294a002bf9666df3 100644 (file)
@@ -239,6 +239,12 @@ struct ext4_buf *ext4_buf_lowest_lru(struct ext4_bcache *bc);
  * @param   buf buffer*/
 void ext4_bcache_drop_buf(struct ext4_bcache *bc, struct ext4_buf *buf);
 
+/**@brief   Invalidate a buffer.
+ * @param   bc block cache descriptor
+ * @param   buf buffer*/
+void ext4_bcache_invalidate_buf(struct ext4_bcache *bc,
+                               struct ext4_buf *buf);
+
 /**@brief   Invalidate a range of buffers.
  * @param   bc block cache descriptor
  * @param   from starting lba
index f6a6d368f3b1ec02677ad2b9064c6bd99b61d672..92759e4eeeba8e89f16a8f419e7e1b41b1e39470 100644 (file)
@@ -124,6 +124,8 @@ struct ext4_blockdev {
 
        /**@brief   The filesystem this block device belongs to. */
        struct ext4_fs *fs;
+
+       void *journal;
 };
 
 /**@brief   Static initialization of the block device.*/
index 6961e1542601057d30aef36daa97d3444dd57c7d..76a4a87bb36e464bbc2bf1d87e83628ccf232313 100644 (file)
@@ -47,7 +47,6 @@ extern "C" {
 #include "misc/tree.h"
 
 struct jbd_fs {
-       /* If journal block device is used, bdev will be non-null */
        struct ext4_blockdev *bdev;
        struct ext4_inode_ref inode_ref;
        struct jbd_sb sb;
@@ -56,7 +55,7 @@ struct jbd_fs {
 };
 
 struct jbd_buf {
-       uint64_t jbd_lba;
+       uint32_t jbd_lba;
        struct ext4_block block;
        struct jbd_trans *trans;
        struct jbd_block_rec *block_rec;
@@ -66,12 +65,11 @@ struct jbd_buf {
 
 struct jbd_revoke_rec {
        ext4_fsblk_t lba;
-       LIST_ENTRY(jbd_revoke_rec) revoke_node;
+       RB_ENTRY(jbd_revoke_rec) revoke_node;
 };
 
 struct jbd_block_rec {
        ext4_fsblk_t lba;
-       struct ext4_buf *buf;
        struct jbd_trans *trans;
        RB_ENTRY(jbd_block_rec) block_rec_node;
        LIST_ENTRY(jbd_block_rec) tbrec_node;
@@ -91,7 +89,7 @@ struct jbd_trans {
        struct jbd_journal *journal;
 
        TAILQ_HEAD(jbd_trans_buf, jbd_buf) buf_queue;
-       LIST_HEAD(jbd_revoke_list, jbd_revoke_rec) revoke_list;
+       RB_HEAD(jbd_revoke_tree, jbd_revoke_rec) revoke_root;
        LIST_HEAD(jbd_trans_block_rec, jbd_block_rec) tbrec_list;
        TAILQ_ENTRY(jbd_trans) trans_node;
 };
@@ -105,7 +103,6 @@ struct jbd_journal {
 
        uint32_t block_size;
 
-       TAILQ_HEAD(jbd_trans_queue, jbd_trans) trans_queue;
        TAILQ_HEAD(jbd_cp_queue, jbd_trans) cp_queue;
        RB_HEAD(jbd_block, jbd_block_rec) block_rec_root;
 
@@ -122,10 +119,8 @@ int jbd_recover(struct jbd_fs *jbd_fs);
 int jbd_journal_start(struct jbd_fs *jbd_fs,
                      struct jbd_journal *journal);
 int jbd_journal_stop(struct jbd_journal *journal);
-struct jbd_trans *jbd_journal_new_trans(struct jbd_journal *journal);
-int jbd_trans_get_access(struct jbd_journal *journal,
-                        struct jbd_trans *trans,
-                        struct ext4_block *block);
+struct jbd_trans *
+jbd_journal_new_trans(struct jbd_journal *journal);
 int jbd_trans_set_block_dirty(struct jbd_trans *trans,
                              struct ext4_block *block);
 int jbd_trans_revoke_block(struct jbd_trans *trans,
@@ -137,6 +132,10 @@ void jbd_journal_free_trans(struct jbd_journal *journal,
                            bool abort);
 int jbd_journal_commit_trans(struct jbd_journal *journal,
                             struct jbd_trans *trans);
+void
+jbd_journal_purge_cp_trans(struct jbd_journal *journal,
+                          bool flush,
+                          bool once);
 
 #ifdef __cplusplus
 }
index 4b17c29420aff352ec3668c0e55d4d3da75c8657..73f5631886178dd0907cd449dcd97650cd29e851 100644 (file)
@@ -225,7 +225,9 @@ int ext4_balloc_free_block(struct ext4_inode_ref *inode_ref, ext4_fsblk_t baddr)
        }
        ext4_bcache_invalidate_lba(fs->bdev->bc, baddr, 1);
        /* Release block group reference */
-       return ext4_fs_put_block_group_ref(&bg_ref);
+       rc = ext4_fs_put_block_group_ref(&bg_ref);
+
+       return rc;
 }
 
 int ext4_balloc_free_blocks(struct ext4_inode_ref *inode_ref,
@@ -342,6 +344,7 @@ int ext4_balloc_free_blocks(struct ext4_inode_ref *inode_ref,
        ext4_bcache_invalidate_lba(fs->bdev->bc, first, count);
        /*All blocks should be released*/
        ext4_assert(count == 0);
+
        return rc;
 }
 
index 1a1766ac4ab978339e5e9e95a9d3c14d4bc64e17..e1e3b0ccbc13cb0c5ed0d790d27b5c1a5a87cadb 100644 (file)
@@ -177,6 +177,19 @@ void ext4_bcache_drop_buf(struct ext4_bcache *bc, struct ext4_buf *buf)
        bc->ref_blocks--;
 }
 
+void ext4_bcache_invalidate_buf(struct ext4_bcache *bc,
+                               struct ext4_buf *buf)
+{
+       buf->end_write = NULL;
+       buf->end_write_arg = NULL;
+
+       /* Clear both dirty and up-to-date flags. */
+       if (ext4_bcache_test_flag(buf, BC_DIRTY))
+               ext4_bcache_remove_dirty_node(bc, buf);
+
+       ext4_bcache_clear_dirty(buf);
+}
+
 void ext4_bcache_invalidate_lba(struct ext4_bcache *bc,
                                uint64_t from,
                                uint32_t cnt)
@@ -187,13 +200,7 @@ void ext4_bcache_invalidate_lba(struct ext4_bcache *bc,
                if (buf->lba > end)
                        break;
 
-               /* Clear both dirty and up-to-date flags. */
-               if (ext4_bcache_test_flag(buf, BC_DIRTY))
-                       ext4_bcache_remove_dirty_node(bc, buf);
-
-               buf->end_write = NULL;
-               buf->end_write_arg = NULL;
-               ext4_bcache_clear_dirty(buf);
+               ext4_bcache_invalidate_buf(bc, buf);
        }
 }
 
index 4b33cd28743e52dfc80d94462d3ac158372ed7db..19653bb8d8c0603d6f804946a08a315780009d91 100644 (file)
@@ -41,6 +41,8 @@
 #include "ext4_debug.h"
 
 #include "ext4_blockdev.h"
+#include "ext4_fs.h"
+#include "ext4_journal.h"
 
 #include <string.h>
 #include <stdlib.h>
@@ -188,6 +190,8 @@ int ext4_block_cache_shake(struct ext4_blockdev *bdev)
        if (bdev->bc->dont_shake)
                return EOK;
 
+       bdev->bc->dont_shake = true;
+
        while (!RB_EMPTY(&bdev->bc->lru_root) &&
                ext4_bcache_is_full(bdev->bc)) {
 
@@ -202,6 +206,7 @@ int ext4_block_cache_shake(struct ext4_blockdev *bdev)
 
                ext4_bcache_drop_buf(bdev->bc, buf);
        }
+       bdev->bc->dont_shake = false;
        return r;
 }
 
index 1b5a99efd41d63b616bd9398116445b1d33230a5..9fe984e1d8b45c6e1f3a3abe1fbe3ac5df18ec2e 100644 (file)
@@ -124,10 +124,22 @@ jbd_block_rec_cmp(struct jbd_block_rec *a, struct jbd_block_rec *b)
        return 0;
 }
 
+static int
+jbd_revoke_rec_cmp(struct jbd_revoke_rec *a, struct jbd_revoke_rec *b)
+{
+       if (a->lba > b->lba)
+               return 1;
+       else if (a->lba < b->lba)
+               return -1;
+       return 0;
+}
+
 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
                     jbd_revoke_entry_cmp, static inline)
 RB_GENERATE_INTERNAL(jbd_block, jbd_block_rec, block_rec_node,
                     jbd_block_rec_cmp, static inline)
+RB_GENERATE_INTERNAL(jbd_revoke_tree, jbd_revoke_rec, revoke_node,
+                    jbd_revoke_rec_cmp, static inline)
 
 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
 #define jbd_free_revoke_entry(addr) free(addr)
@@ -456,6 +468,9 @@ int jbd_get_fs(struct ext4_fs *fs,
                rc = EIO;
        }
 
+       if (rc == EOK)
+               jbd_fs->bdev = fs->bdev;
+
        return rc;
 }
 
@@ -499,6 +514,7 @@ static int jbd_block_get(struct jbd_fs *jbd_fs,
 {
        /* TODO: journal device. */
        int rc;
+       struct ext4_blockdev *bdev = jbd_fs->bdev;
        ext4_lblk_t iblock = (ext4_lblk_t)fblock;
 
        /* Lookup the logical block address of
@@ -508,7 +524,6 @@ static int jbd_block_get(struct jbd_fs *jbd_fs,
        if (rc != EOK)
                return rc;
 
-       struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
        rc = ext4_block_get(bdev, block, fblock);
 
        /* If succeeded, mark buffer as BC_FLUSH to indicate
@@ -534,13 +549,13 @@ static int jbd_block_get_noread(struct jbd_fs *jbd_fs,
 {
        /* TODO: journal device. */
        int rc;
+       struct ext4_blockdev *bdev = jbd_fs->bdev;
        ext4_lblk_t iblock = (ext4_lblk_t)fblock;
        rc = jbd_inode_bmap(jbd_fs, iblock,
                            &fblock);
        if (rc != EOK)
                return rc;
 
-       struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
        rc = ext4_block_get_noread(bdev, block, fblock);
        if (rc == EOK)
                ext4_bcache_set_flag(block->buf, BC_FLUSH);
@@ -555,8 +570,8 @@ static int jbd_block_get_noread(struct jbd_fs *jbd_fs,
 static int jbd_block_set(struct jbd_fs *jbd_fs,
                  struct ext4_block *block)
 {
-       return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
-                             block);
+       struct ext4_blockdev *bdev = jbd_fs->bdev;
+       return ext4_block_set(bdev, block);
 }
 
 /**@brief  helper functions to calculate
@@ -1206,7 +1221,7 @@ int jbd_recover(struct jbd_fs *jbd_fs)
                           features_incompatible,
                           features_incompatible);
                jbd_fs->dirty = true;
-               r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
+               r = ext4_sb_write(jbd_fs->bdev,
                                  &jbd_fs->inode_ref.fs->sb);
        }
        jbd_destroy_revoke_tree(&info);
@@ -1237,7 +1252,7 @@ int jbd_journal_start(struct jbd_fs *jbd_fs,
        ext4_set32(&jbd_fs->inode_ref.fs->sb,
                        features_incompatible,
                        features_incompatible);
-       r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
+       r = ext4_sb_write(jbd_fs->bdev,
                        &jbd_fs->inode_ref.fs->sb);
        if (r != EOK)
                return r;
@@ -1265,12 +1280,16 @@ int jbd_journal_start(struct jbd_fs *jbd_fs,
                return r;
        }
 
-       TAILQ_INIT(&journal->trans_queue);
        TAILQ_INIT(&journal->cp_queue);
        RB_INIT(&journal->block_rec_root);
        journal->jbd_fs = jbd_fs;
        jbd_journal_write_sb(journal);
-       return jbd_write_sb(jbd_fs);
+       r = jbd_write_sb(jbd_fs);
+       if (r != EOK)
+               return r;
+
+       jbd_fs->bdev->journal = journal;
+       return EOK;
 }
 
 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
@@ -1278,6 +1297,8 @@ static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
                          int res,
                          void *arg);
 
+/*
+ * This routine is only suitable to committed transactions. */
 static void jbd_journal_flush_trans(struct jbd_trans *trans)
 {
        struct jbd_buf *jbd_buf, *tmp;
@@ -1288,25 +1309,29 @@ static void jbd_journal_flush_trans(struct jbd_trans *trans)
 
        TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
                        tmp) {
-               struct ext4_buf *buf = jbd_buf->block_rec->buf;
-               /* The buffer in memory is still dirty. */
-               if (buf) {
-                       if (jbd_buf->block_rec->trans != trans) {
-                               int r;
-                               struct ext4_block jbd_block = EXT4_BLOCK_ZERO();
-                               ext4_assert(ext4_block_get(fs->bdev,
-                                                       &jbd_block,
-                                                       jbd_buf->jbd_lba) == EOK);
-                               memcpy(tmp_data, jbd_block.data,
-                                               journal->block_size);
-                               ext4_block_set(fs->bdev, &jbd_block);
-                               r = ext4_blocks_set_direct(fs->bdev, tmp_data,
-                                               buf->lba, 1);
-                               jbd_trans_end_write(fs->bdev->bc, buf, r, jbd_buf);
-                       } else
-                               ext4_block_flush_buf(fs->bdev, buf);
+               struct ext4_buf *buf;
+               struct ext4_block block;
+               /* The buffer is not yet flushed. */
+               buf = ext4_bcache_find_get(fs->bdev->bc, &block,
+                                          jbd_buf->block_rec->lba);
+               if (!(buf && ext4_bcache_test_flag(buf, BC_UPTODATE) &&
+                     jbd_buf->block_rec->trans == trans)) {
+                       int r;
+                       struct ext4_block jbd_block = EXT4_BLOCK_ZERO();
+                       ext4_assert(jbd_block_get(journal->jbd_fs,
+                                               &jbd_block,
+                                               jbd_buf->jbd_lba) == EOK);
+                       memcpy(tmp_data, jbd_block.data,
+                                       journal->block_size);
+                       ext4_block_set(fs->bdev, &jbd_block);
+                       r = ext4_blocks_set_direct(fs->bdev, tmp_data,
+                                       jbd_buf->block_rec->lba, 1);
+                       jbd_trans_end_write(fs->bdev->bc, buf, r, jbd_buf);
+               } else
+                       ext4_block_flush_buf(fs->bdev, buf);
 
-               }
+               if (buf)
+                       ext4_block_set(fs->bdev, &block);
        }
 
        free(tmp_data);
@@ -1325,7 +1350,7 @@ jbd_journal_skip_pure_revoke(struct jbd_journal *journal,
        jbd_journal_write_sb(journal);
 }
 
-static void
+void
 jbd_journal_purge_cp_trans(struct jbd_journal *journal,
                           bool flush,
                           bool once)
@@ -1398,7 +1423,7 @@ int jbd_journal_stop(struct jbd_journal *journal)
        ext4_set32(&jbd_fs->inode_ref.fs->sb,
                        features_incompatible,
                        features_incompatible);
-       r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
+       r = ext4_sb_write(jbd_fs->bdev,
                        &jbd_fs->inode_ref.fs->sb);
        if (r != EOK)
                return r;
@@ -1430,50 +1455,6 @@ static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
        return start_block;
 }
 
-/**@brief  Allocate a new transaction
- * @param  journal current journal session
- * @return transaction allocated*/
-struct jbd_trans *
-jbd_journal_new_trans(struct jbd_journal *journal)
-{
-       struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
-       if (!trans)
-               return NULL;
-
-       /* We will assign a trans_id to this transaction,
-        * once it has been committed.*/
-       trans->journal = journal;
-       trans->data_csum = EXT4_CRC32_INIT;
-       trans->error = EOK;
-       TAILQ_INIT(&trans->buf_queue);
-       return trans;
-}
-
-/**@brief  gain access to it before making any modications.
- * @param  journal current journal session
- * @param  trans transaction
- * @param  block descriptor
- * @return standard error code.*/
-int jbd_trans_get_access(struct jbd_journal *journal,
-                        struct jbd_trans *trans,
-                        struct ext4_block *block)
-{
-       int r = EOK;
-       struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
-       struct jbd_buf *jbd_buf = block->buf->end_write_arg;
-
-       /* If the buffer has already been modified, we should
-        * flush dirty data in this buffer to disk.*/
-       if (ext4_bcache_test_flag(block->buf, BC_DIRTY) &&
-           block->buf->end_write == jbd_trans_end_write) {
-               ext4_assert(jbd_buf);
-               if (jbd_buf->trans != trans)
-                       r = ext4_block_flush_buf(fs->bdev, block->buf);
-
-       }
-       return r;
-}
-
 static struct jbd_block_rec *
 jbd_trans_block_rec_lookup(struct jbd_journal *journal,
                           ext4_fsblk_t lba)
@@ -1489,25 +1470,24 @@ jbd_trans_block_rec_lookup(struct jbd_journal *journal,
 
 static void
 jbd_trans_change_ownership(struct jbd_block_rec *block_rec,
-                          struct jbd_trans *new_trans,
-                          struct ext4_buf *new_buf)
+                          struct jbd_trans *new_trans)
 {
        LIST_REMOVE(block_rec, tbrec_node);
-       /* Now this block record belongs to this transaction. */
-       LIST_INSERT_HEAD(&new_trans->tbrec_list, block_rec, tbrec_node);
+       if (new_trans) {
+               /* Now this block record belongs to this transaction. */
+               LIST_INSERT_HEAD(&new_trans->tbrec_list, block_rec, tbrec_node);
+       }
        block_rec->trans = new_trans;
-       block_rec->buf = new_buf;
 }
 
 static inline struct jbd_block_rec *
 jbd_trans_insert_block_rec(struct jbd_trans *trans,
-                          ext4_fsblk_t lba,
-                          struct ext4_buf *buf)
+                          ext4_fsblk_t lba)
 {
        struct jbd_block_rec *block_rec;
        block_rec = jbd_trans_block_rec_lookup(trans->journal, lba);
        if (block_rec) {
-               jbd_trans_change_ownership(block_rec, trans, buf);
+               jbd_trans_change_ownership(block_rec, trans);
                return block_rec;
        }
        block_rec = calloc(1, sizeof(struct jbd_block_rec));
@@ -1515,7 +1495,6 @@ jbd_trans_insert_block_rec(struct jbd_trans *trans,
                return NULL;
 
        block_rec->lba = lba;
-       block_rec->buf = buf;
        block_rec->trans = trans;
        TAILQ_INIT(&block_rec->dirty_buf_queue);
        LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);
@@ -1523,11 +1502,15 @@ jbd_trans_insert_block_rec(struct jbd_trans *trans,
        return block_rec;
 }
 
+/*
+ * This routine will do the dirty works.
+ */
 static void
 jbd_trans_finish_callback(struct jbd_journal *journal,
                          const struct jbd_trans *trans,
                          struct jbd_block_rec *block_rec,
-                         bool abort)
+                         bool abort,
+                         bool revoke)
 {
        struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
        if (block_rec->trans != trans)
@@ -1539,42 +1522,49 @@ jbd_trans_finish_callback(struct jbd_journal *journal,
                                &block_rec->dirty_buf_queue,
                                dirty_buf_node,
                                tmp) {
-                       /* All we need is a fake ext4_buf. */
-                       struct ext4_buf buf;
-
                        jbd_trans_end_write(fs->bdev->bc,
-                                       &buf,
+                                       NULL,
                                        EOK,
                                        jbd_buf);
                }
        } else {
+               /*
+                * We have to roll back data if the block is going to be
+                * aborted.
+                */
                struct jbd_buf *jbd_buf;
                struct ext4_block jbd_block = EXT4_BLOCK_ZERO(),
                                  block = EXT4_BLOCK_ZERO();
                jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,
                                jbd_buf_dirty);
                if (jbd_buf) {
-                       ext4_assert(ext4_block_get(fs->bdev,
-                                               &jbd_block,
-                                               jbd_buf->jbd_lba) == EOK);
-                       ext4_assert(ext4_block_get_noread(fs->bdev,
-                                               &block,
-                                               block_rec->lba) == EOK);
-                       memcpy(block.data, jbd_block.data,
-                                       journal->block_size);
+                       if (!revoke) {
+                               ext4_assert(ext4_block_get_noread(fs->bdev,
+                                                       &block,
+                                                       block_rec->lba) == EOK);
+                               ext4_assert(jbd_block_get(journal->jbd_fs,
+                                                       &jbd_block,
+                                                       jbd_buf->jbd_lba) == EOK);
+                               memcpy(block.data, jbd_block.data,
+                                               journal->block_size);
 
-                       jbd_trans_change_ownership(block_rec,
-                                       jbd_buf->trans, block.buf);
+                               jbd_trans_change_ownership(block_rec,
+                                               jbd_buf->trans);
 
-                       block.buf->end_write = jbd_trans_end_write;
-                       block.buf->end_write_arg = jbd_buf;
+                               block.buf->end_write = jbd_trans_end_write;
+                               block.buf->end_write_arg = jbd_buf;
 
-                       ext4_bcache_set_flag(jbd_block.buf, BC_TMP);
-                       ext4_bcache_set_dirty(block.buf);
+                               ext4_bcache_set_flag(jbd_block.buf, BC_TMP);
+                               ext4_bcache_set_dirty(block.buf);
 
-                       ext4_block_set(fs->bdev, &jbd_block);
-                       ext4_block_set(fs->bdev, &block);
-                       return;
+                               ext4_block_set(fs->bdev, &jbd_block);
+                               ext4_block_set(fs->bdev, &block);
+                               return;
+                       } else {
+                               /* The revoked buffer is yet written. */
+                               jbd_trans_change_ownership(block_rec,
+                                               jbd_buf->trans);
+                       }
                }
        }
 }
@@ -1603,6 +1593,9 @@ int jbd_trans_set_block_dirty(struct jbd_trans *trans,
                              struct ext4_block *block)
 {
        struct jbd_buf *jbd_buf;
+       struct jbd_revoke_rec *rec, tmp_rec = {
+               .lba = block->lb_id
+       };
        struct jbd_block_rec *block_rec;
 
        if (block->buf->end_write == jbd_trans_end_write) {
@@ -1615,8 +1608,7 @@ int jbd_trans_set_block_dirty(struct jbd_trans *trans,
                return ENOMEM;
 
        if ((block_rec = jbd_trans_insert_block_rec(trans,
-                                       block->lb_id,
-                                       block->buf)) == NULL) {
+                                       block->lb_id)) == NULL) {
                free(jbd_buf);
                return ENOMEM;
        }
@@ -1639,6 +1631,13 @@ int jbd_trans_set_block_dirty(struct jbd_trans *trans,
        TAILQ_INSERT_HEAD(&trans->buf_queue, jbd_buf, buf_node);
 
        ext4_bcache_set_dirty(block->buf);
+       rec = RB_FIND(jbd_revoke_tree,
+                       &trans->revoke_root,
+                       &tmp_rec);
+       if (rec)
+               RB_REMOVE(jbd_revoke_tree, &trans->revoke_root,
+                         rec);
+
        return EOK;
 }
 
@@ -1655,7 +1654,7 @@ int jbd_trans_revoke_block(struct jbd_trans *trans,
                return ENOMEM;
 
        rec->lba = lba;
-       LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
+       RB_INSERT(jbd_revoke_tree, &trans->revoke_root, rec);
        return EOK;
 }
 
@@ -1668,23 +1667,22 @@ int jbd_trans_revoke_block(struct jbd_trans *trans,
 int jbd_trans_try_revoke_block(struct jbd_trans *trans,
                               ext4_fsblk_t lba)
 {
-       int r = EOK;
        struct jbd_journal *journal = trans->journal;
-       struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
        struct jbd_block_rec *block_rec =
                jbd_trans_block_rec_lookup(journal, lba);
 
-       /* Make sure we don't flush any buffers belong to this transaction. */
-       if (block_rec && block_rec->trans != trans) {
-               /* If the buffer has not been flushed yet, flush it now. */
-               if (block_rec->buf) {
-                       r = ext4_block_flush_buf(fs->bdev, block_rec->buf);
-                       if (r != EOK)
-                               return r;
-
-               }
+       if (block_rec) {
+               if (block_rec->trans == trans) {
+                       struct jbd_buf *jbd_buf =
+                               TAILQ_LAST(&block_rec->dirty_buf_queue,
+                                       jbd_buf_dirty);
+                       /* If there are still unwritten buffers. */
+                       if (TAILQ_FIRST(&block_rec->dirty_buf_queue) !=
+                           jbd_buf)
+                               jbd_trans_revoke_block(trans, lba);
 
-               jbd_trans_revoke_block(trans, lba);
+               } else
+                       jbd_trans_revoke_block(trans, lba);
        }
 
        return EOK;
@@ -1719,13 +1717,14 @@ void jbd_journal_free_trans(struct jbd_journal *journal,
                jbd_trans_finish_callback(journal,
                                trans,
                                block_rec,
-                               abort);
+                               abort,
+                               false);
                TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
                free(jbd_buf);
        }
-       LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
+       RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
                          tmp2) {
-               LIST_REMOVE(rec, revoke_node);
+               RB_REMOVE(jbd_revoke_tree, &trans->revoke_root, rec);
                free(rec);
        }
        LIST_FOREACH_SAFE(block_rec, &trans->tbrec_list, tbrec_node,
@@ -1742,18 +1741,17 @@ void jbd_journal_free_trans(struct jbd_journal *journal,
 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
 {
        int rc;
+       struct ext4_block block;
        struct jbd_commit_header *header;
-       uint32_t commit_iblock = 0;
-       struct ext4_block commit_block;
+       uint32_t commit_iblock, orig_commit_iblock;
        struct jbd_journal *journal = trans->journal;
 
        commit_iblock = jbd_journal_alloc_block(journal, trans);
-       rc = jbd_block_get_noread(journal->jbd_fs,
-                       &commit_block, commit_iblock);
+       rc = jbd_block_get_noread(journal->jbd_fs, &block, commit_iblock);
        if (rc != EOK)
                return rc;
 
-       header = (struct jbd_commit_header *)commit_block.data;
+       header = (struct jbd_commit_header *)block.data;
        jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
        jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
        jbd_set32(&header->header, sequence, trans->trans_id);
@@ -1765,12 +1763,29 @@ static int jbd_trans_write_commit_block(struct jbd_trans *trans)
                jbd_set32(header, chksum[0], trans->data_csum);
        }
        jbd_commit_csum_set(journal->jbd_fs, header);
-       ext4_bcache_set_dirty(commit_block.buf);
-       rc = jbd_block_set(journal->jbd_fs, &commit_block);
+       ext4_bcache_set_dirty(block.buf);
+       ext4_bcache_set_flag(block.buf, BC_TMP);
+       rc = jbd_block_set(journal->jbd_fs, &block);
        if (rc != EOK)
                return rc;
 
-       return EOK;
+       orig_commit_iblock = commit_iblock;
+       commit_iblock++;
+       wrap(&journal->jbd_fs->sb, commit_iblock);
+
+       /* To prevent accidental reference to stale journalling metadata. */
+       if (orig_commit_iblock < commit_iblock) {
+               rc = jbd_block_get_noread(journal->jbd_fs, &block, commit_iblock);
+               if (rc != EOK)
+                       return rc;
+
+               memset(block.data, 0, journal->block_size);
+               ext4_bcache_set_dirty(block.buf);
+               ext4_bcache_set_flag(block.buf, BC_TMP);
+               rc = jbd_block_set(journal->jbd_fs, &block);
+       }
+
+       return rc;
 }
 
 /**@brief  Write descriptor block for a transaction
@@ -1781,19 +1796,25 @@ static int jbd_journal_prepare(struct jbd_journal *journal,
                               struct jbd_trans *trans)
 {
        int rc = EOK, i = 0;
+       struct ext4_block desc_block = EXT4_BLOCK_ZERO(),
+                         data_block = EXT4_BLOCK_ZERO();
        int32_t tag_tbl_size = 0;
        uint32_t desc_iblock = 0;
        uint32_t data_iblock = 0;
        char *tag_start = NULL, *tag_ptr = NULL;
        struct jbd_buf *jbd_buf, *tmp;
-       struct ext4_block desc_block, data_block;
        struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
        uint32_t checksum = EXT4_CRC32_INIT;
+       struct jbd_bhdr *bhdr = NULL;
+       void *data;
 
        /* Try to remove any non-dirty buffers from the tail of
         * buf_queue. */
        TAILQ_FOREACH_REVERSE_SAFE(jbd_buf, &trans->buf_queue,
                        jbd_trans_buf, buf_node, tmp) {
+               struct jbd_revoke_rec tmp_rec = {
+                       .lba = jbd_buf->block_rec->lba
+               };
                /* We stop the iteration when we find a dirty buffer. */
                if (ext4_bcache_test_flag(jbd_buf->block.buf,
                                        BC_DIRTY))
@@ -1808,12 +1829,12 @@ static int jbd_journal_prepare(struct jbd_journal *journal,
                jbd_trans_finish_callback(journal,
                                trans,
                                jbd_buf->block_rec,
-                               true);
-
-               /* The buffer has not been modified, just release
-                * that jbd_buf. */
+                               true,
+                               RB_FIND(jbd_revoke_tree,
+                                       &trans->revoke_root,
+                                       &tmp_rec));
                jbd_trans_remove_block_rec(journal,
-                               jbd_buf->block_rec, trans);
+                                       jbd_buf->block_rec, trans);
                trans->data_cnt--;
 
                ext4_block_set(fs->bdev, &jbd_buf->block);
@@ -1824,6 +1845,9 @@ static int jbd_journal_prepare(struct jbd_journal *journal,
        TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {
                struct tag_info tag_info;
                bool uuid_exist = false;
+               struct jbd_revoke_rec tmp_rec = {
+                       .lba = jbd_buf->block_rec->lba
+               };
                if (!ext4_bcache_test_flag(jbd_buf->block.buf,
                                           BC_DIRTY)) {
                        TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
@@ -1832,13 +1856,16 @@ static int jbd_journal_prepare(struct jbd_journal *journal,
 
                        jbd_buf->block.buf->end_write = NULL;
                        jbd_buf->block.buf->end_write_arg = NULL;
-                       jbd_trans_finish_callback(journal,
-                                       trans,
-                                       jbd_buf->block_rec,
-                                       true);
 
                        /* The buffer has not been modified, just release
                         * that jbd_buf. */
+                       jbd_trans_finish_callback(journal,
+                                       trans,
+                                       jbd_buf->block_rec,
+                                       true,
+                                       RB_FIND(jbd_revoke_tree,
+                                               &trans->revoke_root,
+                                               &tmp_rec));
                        jbd_trans_remove_block_rec(journal,
                                        jbd_buf->block_rec, trans);
                        trans->data_cnt--;
@@ -1854,15 +1881,11 @@ static int jbd_journal_prepare(struct jbd_journal *journal,
                                          trans->trans_id);
 again:
                if (!desc_iblock) {
-                       struct jbd_bhdr *bhdr;
                        desc_iblock = jbd_journal_alloc_block(journal, trans);
-                       rc = jbd_block_get_noread(journal->jbd_fs,
-                                          &desc_block, desc_iblock);
+                       rc = jbd_block_get_noread(journal->jbd_fs, &desc_block, desc_iblock);
                        if (rc != EOK)
                                break;
 
-                       ext4_bcache_set_dirty(desc_block.buf);
-
                        bhdr = (struct jbd_bhdr *)desc_block.data;
                        jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
                        jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
@@ -1880,6 +1903,8 @@ again:
                        if (!trans->start_iblock)
                                trans->start_iblock = desc_iblock;
 
+                       ext4_bcache_set_dirty(desc_block.buf);
+                       ext4_bcache_set_flag(desc_block.buf, BC_TMP);
                }
                tag_info.block = jbd_buf->block.lb_id;
                tag_info.uuid_exist = uuid_exist;
@@ -1899,28 +1924,37 @@ again:
                                tag_tbl_size,
                                &tag_info);
                if (rc != EOK) {
-                       jbd_meta_csum_set(journal->jbd_fs,
-                                       (struct jbd_bhdr *)desc_block.data);
-                       jbd_block_set(journal->jbd_fs, &desc_block);
+                       jbd_meta_csum_set(journal->jbd_fs, bhdr);
                        desc_iblock = 0;
+                       rc = jbd_block_set(journal->jbd_fs, &desc_block);
+                       if (rc != EOK)
+                               break;
+
                        goto again;
                }
 
                data_iblock = jbd_journal_alloc_block(journal, trans);
-               rc = jbd_block_get_noread(journal->jbd_fs,
-                               &data_block, data_iblock);
-               if (rc != EOK)
+               rc = jbd_block_get_noread(journal->jbd_fs, &data_block, data_iblock);
+               if (rc != EOK) {
+                       desc_iblock = 0;
+                       ext4_bcache_clear_dirty(desc_block.buf);
+                       jbd_block_set(journal->jbd_fs, &desc_block);
                        break;
+               }
 
-               ext4_bcache_set_dirty(data_block.buf);
-
-               memcpy(data_block.data, jbd_buf->block.data,
+               data = data_block.data;
+               memcpy(data, jbd_buf->block.data,
                        journal->block_size);
-               jbd_buf->jbd_lba = data_block.lb_id;
-
+               ext4_bcache_set_dirty(data_block.buf);
+               ext4_bcache_set_flag(data_block.buf, BC_TMP);
                rc = jbd_block_set(journal->jbd_fs, &data_block);
-               if (rc != EOK)
+               if (rc != EOK) {
+                       desc_iblock = 0;
+                       ext4_bcache_clear_dirty(desc_block.buf);
+                       jbd_block_set(journal->jbd_fs, &desc_block);
                        break;
+               }
+               jbd_buf->jbd_lba = data_iblock;
 
                tag_ptr += tag_info.tag_bytes;
                tag_tbl_size -= tag_info.tag_bytes;
@@ -1929,9 +1963,9 @@ again:
        }
        if (rc == EOK && desc_iblock) {
                jbd_meta_csum_set(journal->jbd_fs,
-                               (struct jbd_bhdr *)desc_block.data);
+                               (struct jbd_bhdr *)bhdr);
                trans->data_csum = checksum;
-               jbd_block_set(journal->jbd_fs, &desc_block);
+               rc = jbd_block_set(journal->jbd_fs, &desc_block);
        }
 
        return rc;
@@ -1946,31 +1980,28 @@ jbd_journal_prepare_revoke(struct jbd_journal *journal,
                           struct jbd_trans *trans)
 {
        int rc = EOK, i = 0;
+       struct ext4_block desc_block = EXT4_BLOCK_ZERO();
        int32_t tag_tbl_size = 0;
        uint32_t desc_iblock = 0;
        char *blocks_entry = NULL;
        struct jbd_revoke_rec *rec, *tmp;
-       struct ext4_block desc_block;
        struct jbd_revoke_header *header = NULL;
        int32_t record_len = 4;
+       struct jbd_bhdr *bhdr = NULL;
 
        if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
                                     JBD_FEATURE_INCOMPAT_64BIT))
                record_len = 8;
 
-       LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
+       RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
                          tmp) {
 again:
                if (!desc_iblock) {
-                       struct jbd_bhdr *bhdr;
                        desc_iblock = jbd_journal_alloc_block(journal, trans);
-                       rc = jbd_block_get_noread(journal->jbd_fs,
-                                          &desc_block, desc_iblock);
-                       if (rc != EOK) {
+                       rc = jbd_block_get_noread(journal->jbd_fs, &desc_block,
+                                                 desc_iblock);
+                       if (rc != EOK)
                                break;
-                       }
-
-                       ext4_bcache_set_dirty(desc_block.buf);
 
                        bhdr = (struct jbd_bhdr *)desc_block.data;
                        jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
@@ -1988,16 +2019,21 @@ again:
                        if (!trans->start_iblock)
                                trans->start_iblock = desc_iblock;
 
+                       ext4_bcache_set_dirty(desc_block.buf);
+                       ext4_bcache_set_flag(desc_block.buf, BC_TMP);
                }
 
                if (tag_tbl_size < record_len) {
                        jbd_set32(header, count,
                                  journal->block_size - tag_tbl_size);
-                       jbd_meta_csum_set(journal->jbd_fs,
-                                       (struct jbd_bhdr *)desc_block.data);
-                       jbd_block_set(journal->jbd_fs, &desc_block);
+                       jbd_meta_csum_set(journal->jbd_fs, bhdr);
+                       bhdr = NULL;
                        desc_iblock = 0;
                        header = NULL;
+                       rc = jbd_block_set(journal->jbd_fs, &desc_block);
+                       if (rc != EOK)
+                               break;
+
                        goto again;
                }
                if (record_len == 8) {
@@ -2019,9 +2055,8 @@ again:
                        jbd_set32(header, count,
                                  journal->block_size - tag_tbl_size);
 
-               jbd_meta_csum_set(journal->jbd_fs,
-                               (struct jbd_bhdr *)desc_block.data);
-               jbd_block_set(journal->jbd_fs, &desc_block);
+               jbd_meta_csum_set(journal->jbd_fs, bhdr);
+               rc = jbd_block_set(journal->jbd_fs, &desc_block);
        }
 
        return rc;
@@ -2065,9 +2100,9 @@ static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
        jbd_trans_finish_callback(journal,
                        trans,
                        jbd_buf->block_rec,
+                       false,
                        false);
-       if (block_rec->trans == trans) {
-               block_rec->buf = NULL;
+       if (block_rec->trans == trans && buf) {
                /* Clear the end_write and end_write_arg fields. */
                buf->end_write = NULL;
                buf->end_write_arg = NULL;
@@ -2101,11 +2136,12 @@ static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
  * @param  journal current journal session
  * @param  trans transaction
  * @return standard error code*/
-int jbd_journal_commit_trans(struct jbd_journal *journal,
-                            struct jbd_trans *trans)
+static int __jbd_journal_commit_trans(struct jbd_journal *journal,
+                                     struct jbd_trans *trans)
 {
        int rc = EOK;
        uint32_t last = journal->last;
+       struct jbd_revoke_rec *rec, *tmp;
 
        trans->trans_id = journal->alloc_trans_id;
        rc = jbd_journal_prepare(journal, trans);
@@ -2117,7 +2153,7 @@ int jbd_journal_commit_trans(struct jbd_journal *journal,
                goto Finish;
 
        if (TAILQ_EMPTY(&trans->buf_queue) &&
-           LIST_EMPTY(&trans->revoke_list)) {
+           RB_EMPTY(&trans->revoke_root)) {
                /* Since there are no entries in both buffer list
                 * and revoke entry list, we do not consider trans as
                 * complete transaction and just return EOK.*/
@@ -2130,7 +2166,43 @@ int jbd_journal_commit_trans(struct jbd_journal *journal,
                goto Finish;
 
        journal->alloc_trans_id++;
+
+       /* Complete the checkpoint of buffers which are revoked. */
+       RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
+                       tmp) {
+               struct jbd_block_rec *block_rec =
+                       jbd_trans_block_rec_lookup(journal, rec->lba);
+               struct jbd_buf *jbd_buf = NULL;
+               if (block_rec)
+                       jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,
+                                       jbd_buf_dirty);
+               if (jbd_buf) {
+                       struct ext4_buf *buf;
+                       struct ext4_block block = EXT4_BLOCK_ZERO();
+                       /*
+                        * We do this to reset the ext4_buf::end_write and
+                        * ext4_buf::end_write_arg fields so that the checkpoint
+                        * callback won't be triggered again.
+                        */
+                       buf = ext4_bcache_find_get(journal->jbd_fs->bdev->bc,
+                                       &block,
+                                       jbd_buf->block_rec->lba);
+                       jbd_trans_end_write(journal->jbd_fs->bdev->bc,
+                                       buf,
+                                       EOK,
+                                       jbd_buf);
+                       if (buf)
+                               ext4_block_set(journal->jbd_fs->bdev, &block);
+               }
+       }
+
        if (TAILQ_EMPTY(&journal->cp_queue)) {
+               /*
+                * This transaction is going to be the first object in the
+                * checkpoint queue.
+                * When the first transaction in checkpoint queue is completely
+                * written to disk, we shift the tail of the log to right.
+                */
                if (trans->data_cnt) {
                        journal->start = trans->start_iblock;
                        wrap(&journal->jbd_fs->sb, journal->start);
@@ -2149,20 +2221,52 @@ int jbd_journal_commit_trans(struct jbd_journal *journal,
                        jbd_journal_free_trans(journal, trans, false);
                }
        } else {
+               /* No need to do anything to the JBD superblock. */
                TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
                                trans_node);
                if (trans->data_cnt)
                        jbd_journal_cp_trans(journal, trans);
-
        }
 Finish:
-       if (rc != EOK) {
+       if (rc != EOK && rc != ENOSPC) {
                journal->last = last;
                jbd_journal_free_trans(journal, trans, true);
        }
        return rc;
 }
 
+/**@brief  Allocate a new transaction
+ * @param  journal current journal session
+ * @return transaction allocated*/
+struct jbd_trans *
+jbd_journal_new_trans(struct jbd_journal *journal)
+{
+       struct jbd_trans *trans = NULL;
+       trans = calloc(1, sizeof(struct jbd_trans));
+       if (!trans)
+               return NULL;
+
+       /* We will assign a trans_id to this transaction,
+        * once it has been committed.*/
+       trans->journal = journal;
+       trans->data_csum = EXT4_CRC32_INIT;
+       trans->error = EOK;
+       TAILQ_INIT(&trans->buf_queue);
+       return trans;
+}
+
+/**@brief  Commit a transaction to the journal immediately.
+ * @param  journal current journal session
+ * @param  trans transaction
+ * @return standard error code*/
+int jbd_journal_commit_trans(struct jbd_journal *journal,
+                            struct jbd_trans *trans)
+{
+       int r = EOK;
+       r = __jbd_journal_commit_trans(journal, trans);
+       return r;
+}
+
 /**
  * @}
  */
index cf7f2ca09b3ce917e237a8a0367cee2fbe4daac2..7a91dbc61341cb51c2b277a4070b0b55115d013c 100644 (file)
 #include "ext4_fs.h"
 #include "ext4_journal.h"
 
-static int ext4_trans_get_write_access(struct ext4_fs *fs __unused,
-                               struct ext4_block *block __unused)
-{
-       int r = EOK;
-#if CONFIG_JOURNALING_ENABLE
-       if (fs->jbd_journal && fs->curr_trans) {
-               struct jbd_journal *journal = fs->jbd_journal;
-               struct jbd_trans *trans = fs->curr_trans;
-               r = jbd_trans_get_access(journal, trans, block);
-       }
-#endif
-       return r;
-}
-
 int ext4_trans_set_block_dirty(struct ext4_buf *buf)
 {
        int r = EOK;
@@ -86,10 +72,6 @@ int ext4_trans_block_get_noread(struct ext4_blockdev *bdev,
        if (r != EOK)
                return r;
 
-       r = ext4_trans_get_write_access(bdev->fs, b);
-       if (r != EOK)
-               ext4_block_set(bdev, b);
-
        return r;
 }
 
@@ -101,10 +83,6 @@ int ext4_trans_block_get(struct ext4_blockdev *bdev,
        if (r != EOK)
                return r;
 
-       r = ext4_trans_get_write_access(bdev->fs, b);
-       if (r != EOK)
-               ext4_block_set(bdev, b);
-
        return r;
 }