ext4_journal: initial journal replay support.
[lwext4.git] / lwext4 / ext4_journal.c
index 033ea45b0092d64eb97fcadac856db5a49a724fd..e6d9c83f219949d41da72f2d4254f9e115c6f02f 100644 (file)
 #include "ext4_blockdev.h"
 #include "ext4_crc32c.h"
 #include "ext4_debug.h"
+#include "tree.h"
 
 #include <string.h>
+#include <malloc.h>
+
+struct revoke_entry {
+       ext4_fsblk_t block;
+       uint32_t trans_id;
+       RB_ENTRY(revoke_entry) revoke_node;
+};
+
+struct recover_info {
+       uint32_t start_trans_id;
+       uint32_t last_trans_id;
+       uint32_t this_trans_id;
+       RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
+};
+
+struct replay_arg {
+       struct recover_info *info;
+       uint32_t *this_block;
+};
+
+static int
+jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
+{
+       if (a->block > b->block)
+               return 1;
+       else if (a->block < b->block)
+               return -1;
+       return 0;
+}
+
+RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
+                    jbd_revoke_entry_cmp, static inline)
+
+#define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
+#define jbd_free_revoke_entry(addr) free(addr)
 
 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
                   ext4_lblk_t iblock,
@@ -50,12 +86,12 @@ int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
 
 static bool jbd_verify_sb(struct jbd_sb *sb)
 {
-       struct jbd_bhdr *bhdr = &sb->header;
-       if (bhdr->magic != to_be32(JBD_MAGIC_NUMBER))
+       struct jbd_bhdr *header = &sb->header;
+       if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
                return false;
 
-       if (bhdr->blocktype != to_be32(JBD_SUPERBLOCK) &&
-           bhdr->blocktype != to_be32(JBD_SUPERBLOCK_V2))
+       if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
+           jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
                return false;
 
        return true;
@@ -89,6 +125,9 @@ int jbd_get_fs(struct ext4_fs *fs,
 int jbd_put_fs(struct jbd_fs *jbd_fs)
 {
        int rc;
+       if (jbd_fs->dirty)
+               jbd_sb_write(jbd_fs, &jbd_fs->sb);
+
        rc = ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
        return rc;
 }
@@ -146,21 +185,293 @@ int jbd_block_set(struct jbd_fs *jbd_fs,
                              block);
 }
 
+/*
+ * helper functions to deal with 32 or 64bit block numbers.
+ */
+int jbd_tag_bytes(struct jbd_fs *jbd_fs)
+{
+       int size;
+
+       if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+                                    JBD_FEATURE_INCOMPAT_CSUM_V3))
+               return sizeof(struct jbd_block_tag3);
+
+       size = sizeof(struct jbd_block_tag);
+
+       if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+                                    JBD_FEATURE_INCOMPAT_CSUM_V2))
+               size += sizeof(uint16_t);
+
+       if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+                                    JBD_FEATURE_INCOMPAT_64BIT))
+               return size;
+
+       return size - sizeof(uint32_t);
+}
+
+static void
+jbd_extract_block_tag(struct jbd_fs *jbd_fs,
+                     uint32_t tag_bytes,
+                     void *__tag,
+                     ext4_fsblk_t *block,
+                     bool *uuid_exist,
+                     uint8_t *uuid,
+                     bool *last_tag)
+{
+       char *uuid_start;
+       *uuid_exist = false;
+       *last_tag = false;
+       if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+                                    JBD_FEATURE_INCOMPAT_CSUM_V3)) {
+               struct jbd_block_tag3 *tag = __tag;
+               *block = jbd_get32(tag, blocknr);
+               if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+                                            JBD_FEATURE_INCOMPAT_64BIT))
+                        *block |= (uint64_t)jbd_get32(tag, blocknr_high) << 32;
+
+               if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
+                       *block = 0;
+
+               if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
+                       uuid_start = (char *)tag + tag_bytes;
+                       *uuid_exist = true;
+                       memcpy(uuid, uuid_start, UUID_SIZE);
+               }
+
+               if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
+                       *last_tag = true;
+
+       } else {
+               struct jbd_block_tag *tag = __tag;
+               *block = jbd_get32(tag, blocknr);
+               if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+                                            JBD_FEATURE_INCOMPAT_64BIT))
+                        *block |= (uint64_t)jbd_get32(tag, blocknr_high) << 32;
+
+               if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
+                       *block = 0;
+
+               if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
+                       uuid_start = (char *)tag + tag_bytes;
+                       *uuid_exist = true;
+                       memcpy(uuid, uuid_start, UUID_SIZE);
+               }
+
+               if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
+                       *last_tag = true;
+
+       }
+}
+
+static void
+jbd_iterate_block_table(struct jbd_fs *jbd_fs,
+                       void *__tag_start,
+                       uint32_t tag_tbl_size,
+                       void (*func)(struct jbd_fs * jbd_fs,
+                                       ext4_fsblk_t block,
+                                       uint8_t *uuid,
+                                       void *arg),
+                       void *arg)
+{
+       ext4_fsblk_t block = 0;
+       uint8_t uuid[UUID_SIZE];
+       char *tag_start, *tag_ptr;
+       uint32_t tag_bytes = jbd_tag_bytes(jbd_fs);
+       tag_start = __tag_start;
+       tag_ptr = tag_start;
+
+       if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+                                    JBD_FEATURE_INCOMPAT_CSUM_V2) ||
+           JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+                                    JBD_FEATURE_INCOMPAT_CSUM_V3))
+               tag_tbl_size -= sizeof(struct jbd_block_tail);
+
+       while (tag_ptr - tag_start + tag_bytes <= tag_tbl_size) {
+               bool uuid_exist;
+               bool last_tag;
+               jbd_extract_block_tag(jbd_fs,
+                                     tag_bytes,
+                                     tag_ptr,
+                                     &block,
+                                     &uuid_exist,
+                                     uuid,
+                                     &last_tag);
+               if (func)
+                       func(jbd_fs, block, uuid, arg);
+
+               if (last_tag)
+                       break;
+
+               tag_ptr += tag_bytes;
+               if (uuid_exist)
+                       tag_ptr += UUID_SIZE;
+
+       }
+}
+
+static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
+                                  ext4_fsblk_t block,
+                                  uint8_t *uuid,
+                                  void *arg)
+{
+       uint32_t *iblock = arg;
+       ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
+       (*iblock)++;
+       (void)jbd_fs;
+       (void)uuid;
+       return;
+}
+
+static struct revoke_entry *
+jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
+{
+       struct revoke_entry tmp = {
+               .block = block
+       };
+
+       return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
+}
+
+static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
+                                 ext4_fsblk_t block,
+                                 uint8_t *uuid __unused,
+                                 void *__arg)
+{
+       int r;
+       struct replay_arg *arg = __arg;
+       struct recover_info *info = arg->info;
+       uint32_t *this_block = arg->this_block;
+       struct revoke_entry *revoke_entry;
+       struct ext4_block journal_block, ext4_block;
+       ext4_dbg(DEBUG_JBD,
+                "Replaying block in block_tag: %" PRIu64 "\n",
+                block);
+       (*this_block)++;
+
+       revoke_entry = jbd_revoke_entry_lookup(info, block);
+       if (revoke_entry)
+               return;
+
+       r = jbd_block_get(jbd_fs, &journal_block, *this_block);
+       if (r != EOK)
+               return;
+
+       r = ext4_block_get_noread(jbd_fs->bdev, &ext4_block, block);
+       if (r != EOK) {
+               jbd_block_set(jbd_fs, &journal_block);
+               return;
+       }
+
+       memcpy(ext4_block.data,
+              journal_block.data,
+              jbd_get32(&jbd_fs->sb, blocksize));
+
+       ext4_block.dirty = true;
+       ext4_block_set(jbd_fs->bdev, &ext4_block);
+       jbd_block_set(jbd_fs, &journal_block);
+       
+       return;
+}
+
+static void jbd_add_revoke_block_tags(struct recover_info *info,
+                                     ext4_fsblk_t block)
+{
+       struct revoke_entry *revoke_entry;
+
+       ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
+       revoke_entry = jbd_revoke_entry_lookup(info, block);
+       if (revoke_entry) {
+               revoke_entry->trans_id = info->this_trans_id;
+               return;
+       }
+
+       revoke_entry = jbd_alloc_revoke_entry();
+       ext4_assert(revoke_entry);
+       revoke_entry->block = block;
+       revoke_entry->trans_id = info->this_trans_id;
+       RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
+
+       return;
+}
+
+static void jbd_destroy_revoke_tree(struct recover_info *info)
+{
+       while (!RB_EMPTY(&info->revoke_root)) {
+               struct revoke_entry *revoke_entry =
+                       RB_MIN(jbd_revoke, &info->revoke_root);
+               ext4_assert(revoke_entry);
+               RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
+               jbd_free_revoke_entry(revoke_entry);
+       }
+}
+
 /* Make sure we wrap around the log correctly! */
 #define wrap(sb, var)                                          \
 do {                                                                   \
-       if (var >= to_be32((sb)->maxlen))                                       \
-               var -= (to_be32((sb)->maxlen) - to_be32((sb)->first));  \
+       if (var >= jbd_get32((sb), maxlen))                                     \
+               var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first));      \
 } while (0)
 
 #define ACTION_SCAN 0
 #define ACTION_REVOKE 1
 #define ACTION_RECOVER 2
 
-struct recover_info {
-       uint32_t start_trans_id;
-       uint32_t last_trans_id;
-};
+
+static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
+                                 struct jbd_bhdr *header,
+                                 struct recover_info *info)
+{
+       char *blocks_entry;
+       struct jbd_revoke_header *revoke_hdr =
+               (struct jbd_revoke_header *)header;
+       uint32_t i, nr_entries, record_len = 4;
+       if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+                                    JBD_FEATURE_INCOMPAT_64BIT))
+               record_len = 8;
+
+       nr_entries = (revoke_hdr->count -
+                       sizeof(struct jbd_revoke_header)) /
+                       record_len;
+
+       blocks_entry = (char *)(revoke_hdr + 1);
+
+       for (i = 0;i < nr_entries;i++) {
+               if (record_len == 8) {
+                       uint64_t *blocks =
+                               (uint64_t *)blocks_entry;
+                       jbd_add_revoke_block_tags(info, *blocks);
+               } else {
+                       uint32_t *blocks =
+                               (uint32_t *)blocks_entry;
+                       jbd_add_revoke_block_tags(info, *blocks);
+               }
+               blocks_entry += record_len;
+       }
+}
+
+static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
+                                      struct jbd_bhdr *header,
+                                      uint32_t *iblock)
+{
+       jbd_iterate_block_table(jbd_fs,
+                               header + 1,
+                               jbd_get32(&jbd_fs->sb, blocksize) -
+                                       sizeof(struct jbd_bhdr),
+                               jbd_display_block_tags,
+                               iblock);
+}
+
+static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
+                                       struct jbd_bhdr *header,
+                                       struct replay_arg *arg)
+{
+       jbd_iterate_block_table(jbd_fs,
+                               header + 1,
+                               jbd_get32(&jbd_fs->sb, blocksize) -
+                                       sizeof(struct jbd_bhdr),
+                               jbd_replay_block_tags,
+                               arg);
+}
 
 int jbd_iterate_log(struct jbd_fs *jbd_fs,
                    struct recover_info *info,
@@ -172,8 +483,8 @@ int jbd_iterate_log(struct jbd_fs *jbd_fs,
        uint32_t start_trans_id, this_trans_id;
        uint32_t start_block, this_block;
 
-       start_trans_id = this_trans_id = to_be32(sb->sequence);
-       start_block = this_block = to_be32(sb->start);
+       start_trans_id = this_trans_id = jbd_get32(sb, sequence);
+       start_block = this_block = jbd_get32(sb, start);
 
        ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
                            start_trans_id);
@@ -192,24 +503,37 @@ int jbd_iterate_log(struct jbd_fs *jbd_fs,
                        break;
 
                header = (struct jbd_bhdr *)block.data;
-               if (header->magic != to_be32(JBD_MAGIC_NUMBER)) {
+               if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
+                       jbd_block_set(jbd_fs, &block);
                        log_end = true;
                        continue;
                }
 
-               if (header->sequence != to_be32(this_trans_id)) {
-                       if (this_trans_id <= info->last_trans_id)
+               if (jbd_get32(header, sequence) != this_trans_id) {
+                       if (action != ACTION_SCAN)
                                r = EIO;
 
+                       jbd_block_set(jbd_fs, &block);
                        log_end = true;
                        continue;
                }
 
-               switch (header->blocktype) {
+               switch (jbd_get32(header, blocktype)) {
                case JBD_DESCRIPTOR_BLOCK:
                        ext4_dbg(DEBUG_JBD, "Descriptor block: %u, "
                                            "trans_id: %u\n",
                                            this_block, this_trans_id);
+                       if (action == ACTION_SCAN)
+                               jbd_debug_descriptor_block(jbd_fs,
+                                               header, &this_block);
+                       else if (action == ACTION_RECOVER) {
+                               struct replay_arg replay_arg;
+                               replay_arg.info = info;
+                               replay_arg.this_block = &this_block;
+                               jbd_replay_descriptor_block(jbd_fs,
+                                               header, &replay_arg);
+                       }
+
                        break;
                case JBD_COMMIT_BLOCK:
                        ext4_dbg(DEBUG_JBD, "Commit block: %u, "
@@ -221,6 +545,11 @@ int jbd_iterate_log(struct jbd_fs *jbd_fs,
                        ext4_dbg(DEBUG_JBD, "Revoke block: %u, "
                                            "trans_id: %u\n",
                                            this_block, this_trans_id);
+                       if (action == ACTION_REVOKE) {
+                               info->this_trans_id = this_trans_id;
+                               jbd_build_revoke_tree(jbd_fs,
+                                               header, info);
+                       }
                        break;
                default:
                        log_end = true;
@@ -253,6 +582,21 @@ int jbd_recover(struct jbd_fs *jbd_fs)
        if (!sb->start)
                return EOK;
 
+       RB_INIT(&info.revoke_root);
+
        r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
+       if (r != EOK)
+               return r;
+
+       r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
+       if (r != EOK)
+               return r;
+
+       r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
+       if (r == EOK) {
+               jbd_set32(&jbd_fs->sb, start, 0);
+               jbd_fs->dirty = true;
+       }
+       jbd_destroy_revoke_tree(&info);
        return r;
 }