ext4_journal: fix not wrapping blocks when recovering.
[lwext4.git] / src / ext4_journal.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * - Redistributions of source code must retain the above copyright
11  *   notice, this list of conditions and the following disclaimer.
12  * - Redistributions in binary form must reproduce the above copyright
13  *   notice, this list of conditions and the following disclaimer in the
14  *   documentation and/or other materials provided with the distribution.
15  * - The name of the author may not be used to endorse or promote products
16  *   derived from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 /** @addtogroup lwext4
31  * @{
32  */
33 /**
34  * @file  ext4_journal.c
35  * @brief Journal handle functions
36  */
37
38 #include "ext4_config.h"
39 #include "ext4_types.h"
40 #include "ext4_misc.h"
41 #include "ext4_errno.h"
42 #include "ext4_debug.h"
43
44 #include "ext4_fs.h"
45 #include "ext4_super.h"
46 #include "ext4_journal.h"
47 #include "ext4_blockdev.h"
48 #include "ext4_crc32.h"
49 #include "ext4_journal.h"
50
51 #include <string.h>
52 #include <stdlib.h>
53
54 /**@brief  Revoke entry during journal replay.*/
55 struct revoke_entry {
56         /**@brief  Block number not to be replayed.*/
57         ext4_fsblk_t block;
58
59         /**@brief  For any transaction id smaller
60          *         than trans_id, records of @block
61          *         in those transactions should not
62          *         be replayed.*/
63         uint32_t trans_id;
64
65         /**@brief  Revoke tree node.*/
66         RB_ENTRY(revoke_entry) revoke_node;
67 };
68
69 /**@brief  Valid journal replay information.*/
70 struct recover_info {
71         /**@brief  Starting transaction id.*/
72         uint32_t start_trans_id;
73
74         /**@brief  Ending transaction id.*/
75         uint32_t last_trans_id;
76
77         /**@brief  Used as internal argument.*/
78         uint32_t this_trans_id;
79
80         /**@brief  No of transactions went through.*/
81         uint32_t trans_cnt;
82
83         /**@brief  RB-Tree storing revoke entries.*/
84         RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
85 };
86
87 /**@brief  Journal replay internal arguments.*/
88 struct replay_arg {
89         /**@brief  Journal replay information.*/
90         struct recover_info *info;
91
92         /**@brief  Current block we are on.*/
93         uint32_t *this_block;
94
95         /**@brief  Current trans_id we are on.*/
96         uint32_t this_trans_id;
97 };
98
99 /* Make sure we wrap around the log correctly! */
100 #define wrap(sb, var)                                           \
101 do {                                                                    \
102         if (var >= jbd_get32((sb), maxlen))                                     \
103                 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first));      \
104 } while (0)
105
106
107 static int
108 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
109 {
110         if (a->block > b->block)
111                 return 1;
112         else if (a->block < b->block)
113                 return -1;
114         return 0;
115 }
116
117 static int
118 jbd_block_rec_cmp(struct jbd_block_rec *a, struct jbd_block_rec *b)
119 {
120         if (a->lba > b->lba)
121                 return 1;
122         else if (a->lba < b->lba)
123                 return -1;
124         return 0;
125 }
126
127 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
128                      jbd_revoke_entry_cmp, static inline)
129 RB_GENERATE_INTERNAL(jbd_block, jbd_block_rec, block_rec_node,
130                      jbd_block_rec_cmp, static inline)
131
132 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
133 #define jbd_free_revoke_entry(addr) free(addr)
134
135 static int jbd_has_csum(struct jbd_sb *jbd_sb)
136 {
137         if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V2))
138                 return 2;
139
140         if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V3))
141                 return 3;
142
143         return 0;
144 }
145
146 #if CONFIG_META_CSUM_ENABLE
147 static uint32_t jbd_sb_csum(struct jbd_sb *jbd_sb)
148 {
149         uint32_t checksum = 0;
150
151         if (jbd_has_csum(jbd_sb)) {
152                 uint32_t orig_checksum = jbd_sb->checksum;
153                 jbd_set32(jbd_sb, checksum, 0);
154                 /* Calculate crc32c checksum against tho whole superblock */
155                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_sb,
156                                 JBD_SUPERBLOCK_SIZE);
157                 jbd_sb->checksum = orig_checksum;
158         }
159         return checksum;
160 }
161 #else
162 #define jbd_sb_csum(...) 0
163 #endif
164
165 static void jbd_sb_csum_set(struct jbd_sb *jbd_sb)
166 {
167         if (!jbd_has_csum(jbd_sb))
168                 return;
169
170         jbd_set32(jbd_sb, checksum, jbd_sb_csum(jbd_sb));
171 }
172
173 #if CONFIG_META_CSUM_ENABLE
174 static bool
175 jbd_verify_sb_csum(struct jbd_sb *jbd_sb)
176 {
177         if (!jbd_has_csum(jbd_sb))
178                 return true;
179
180         return jbd_sb_csum(jbd_sb) == jbd_get32(jbd_sb, checksum);
181 }
182 #else
183 #define jbd_verify_sb_csum(...) true
184 #endif
185
186 #if CONFIG_META_CSUM_ENABLE
187 static uint32_t jbd_meta_csum(struct jbd_fs *jbd_fs,
188                               struct jbd_bhdr *bhdr)
189 {
190         uint32_t checksum = 0;
191
192         if (jbd_has_csum(&jbd_fs->sb)) {
193                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
194                 struct jbd_block_tail *tail =
195                         (struct jbd_block_tail *)((char *)bhdr + block_size -
196                                 sizeof(struct jbd_block_tail));
197                 uint32_t orig_checksum = tail->checksum;
198                 tail->checksum = 0;
199
200                 /* First calculate crc32c checksum against fs uuid */
201                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
202                                        sizeof(jbd_fs->sb.uuid));
203                 /* Calculate crc32c checksum against tho whole block */
204                 checksum = ext4_crc32c(checksum, bhdr,
205                                 block_size);
206                 tail->checksum = orig_checksum;
207         }
208         return checksum;
209 }
210 #else
211 #define jbd_meta_csum(...) 0
212 #endif
213
214 static void jbd_meta_csum_set(struct jbd_fs *jbd_fs,
215                               struct jbd_bhdr *bhdr)
216 {
217         uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
218         struct jbd_block_tail *tail = (struct jbd_block_tail *)
219                                 ((char *)bhdr + block_size -
220                                 sizeof(struct jbd_block_tail));
221         if (!jbd_has_csum(&jbd_fs->sb))
222                 return;
223
224         tail->checksum = to_be32(jbd_meta_csum(jbd_fs, bhdr));
225 }
226
227 #if CONFIG_META_CSUM_ENABLE
228 static bool
229 jbd_verify_meta_csum(struct jbd_fs *jbd_fs,
230                      struct jbd_bhdr *bhdr)
231 {
232         uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
233         struct jbd_block_tail *tail = (struct jbd_block_tail *)
234                                 ((char *)bhdr + block_size -
235                                 sizeof(struct jbd_block_tail));
236         if (!jbd_has_csum(&jbd_fs->sb))
237                 return true;
238
239         return jbd_meta_csum(jbd_fs, bhdr) == to_be32(tail->checksum);
240 }
241 #else
242 #define jbd_verify_meta_csum(...) true
243 #endif
244
245 #if CONFIG_META_CSUM_ENABLE
246 static uint32_t jbd_commit_csum(struct jbd_fs *jbd_fs,
247                               struct jbd_commit_header *header)
248 {
249         uint32_t checksum = 0;
250
251         if (jbd_has_csum(&jbd_fs->sb)) {
252                 uint32_t orig_checksum_type = header->chksum_type,
253                          orig_checksum_size = header->chksum_size,
254                          orig_checksum = header->chksum[0];
255                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
256                 header->chksum_type = 0;
257                 header->chksum_size = 0;
258                 header->chksum[0] = 0;
259
260                 /* First calculate crc32c checksum against fs uuid */
261                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
262                                        sizeof(jbd_fs->sb.uuid));
263                 /* Calculate crc32c checksum against tho whole block */
264                 checksum = ext4_crc32c(checksum, header,
265                                 block_size);
266
267                 header->chksum_type = orig_checksum_type;
268                 header->chksum_size = orig_checksum_size;
269                 header->chksum[0] = orig_checksum;
270         }
271         return checksum;
272 }
273 #else
274 #define jbd_commit_csum(...) 0
275 #endif
276
277 static void jbd_commit_csum_set(struct jbd_fs *jbd_fs,
278                               struct jbd_commit_header *header)
279 {
280         if (!jbd_has_csum(&jbd_fs->sb))
281                 return;
282
283         header->chksum_type = 0;
284         header->chksum_size = 0;
285         header->chksum[0] = jbd_commit_csum(jbd_fs, header);
286 }
287
288 #if CONFIG_META_CSUM_ENABLE
289 static bool jbd_verify_commit_csum(struct jbd_fs *jbd_fs,
290                                    struct jbd_commit_header *header)
291 {
292         if (!jbd_has_csum(&jbd_fs->sb))
293                 return true;
294
295         return header->chksum[0] == to_be32(jbd_commit_csum(jbd_fs,
296                                             header));
297 }
298 #else
299 #define jbd_verify_commit_csum(...) true
300 #endif
301
302 #if CONFIG_META_CSUM_ENABLE
303 /*
304  * NOTE: We only make use of @csum parameter when
305  *       JBD_FEATURE_COMPAT_CHECKSUM is enabled.
306  */
307 static uint32_t jbd_block_csum(struct jbd_fs *jbd_fs, const void *buf,
308                                uint32_t csum,
309                                uint32_t sequence)
310 {
311         uint32_t checksum = 0;
312
313         if (jbd_has_csum(&jbd_fs->sb)) {
314                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
315                 /* First calculate crc32c checksum against fs uuid */
316                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
317                                        sizeof(jbd_fs->sb.uuid));
318                 /* Then calculate crc32c checksum against sequence no. */
319                 checksum = ext4_crc32c(checksum, &sequence,
320                                 sizeof(uint32_t));
321                 /* Calculate crc32c checksum against tho whole block */
322                 checksum = ext4_crc32c(checksum, buf,
323                                 block_size);
324         } else if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
325                                      JBD_FEATURE_COMPAT_CHECKSUM)) {
326                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
327                 /* Calculate crc32c checksum against tho whole block */
328                 checksum = ext4_crc32(csum, buf,
329                                 block_size);
330         }
331         return checksum;
332 }
333 #else
334 #define jbd_block_csum(...) 0
335 #endif
336
337 static void jbd_block_tag_csum_set(struct jbd_fs *jbd_fs, void *__tag,
338                                    uint32_t checksum)
339 {
340         int ver = jbd_has_csum(&jbd_fs->sb);
341         if (!ver)
342                 return;
343
344         if (ver == 2) {
345                 struct jbd_block_tag *tag = __tag;
346                 tag->checksum = (uint16_t)to_be32(checksum);
347         } else {
348                 struct jbd_block_tag3 *tag = __tag;
349                 tag->checksum = to_be32(checksum);
350         }
351 }
352
353 /**@brief  Write jbd superblock to disk.
354  * @param  jbd_fs jbd filesystem
355  * @param  s jbd superblock
356  * @return standard error code*/
357 static int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
358 {
359         int rc;
360         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
361         uint64_t offset;
362         ext4_fsblk_t fblock;
363         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
364         if (rc != EOK)
365                 return rc;
366
367         jbd_sb_csum_set(s);
368         offset = fblock * ext4_sb_get_block_size(&fs->sb);
369         return ext4_block_writebytes(fs->bdev, offset, s,
370                                      EXT4_SUPERBLOCK_SIZE);
371 }
372
373 /**@brief  Read jbd superblock from disk.
374  * @param  jbd_fs jbd filesystem
375  * @param  s jbd superblock
376  * @return standard error code*/
377 static int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
378 {
379         int rc;
380         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
381         uint64_t offset;
382         ext4_fsblk_t fblock;
383         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
384         if (rc != EOK)
385                 return rc;
386
387         offset = fblock * ext4_sb_get_block_size(&fs->sb);
388         return ext4_block_readbytes(fs->bdev, offset, s,
389                                     EXT4_SUPERBLOCK_SIZE);
390 }
391
392 /**@brief  Verify jbd superblock.
393  * @param  sb jbd superblock
394  * @return true if jbd superblock is valid */
395 static bool jbd_verify_sb(struct jbd_sb *sb)
396 {
397         struct jbd_bhdr *header = &sb->header;
398         if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
399                 return false;
400
401         if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
402             jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
403                 return false;
404
405         return jbd_verify_sb_csum(sb);
406 }
407
408 /**@brief  Write back dirty jbd superblock to disk.
409  * @param  jbd_fs jbd filesystem
410  * @return standard error code*/
411 static int jbd_write_sb(struct jbd_fs *jbd_fs)
412 {
413         int rc = EOK;
414         if (jbd_fs->dirty) {
415                 rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
416                 if (rc != EOK)
417                         return rc;
418
419                 jbd_fs->dirty = false;
420         }
421         return rc;
422 }
423
424 /**@brief  Get reference to jbd filesystem.
425  * @param  fs Filesystem to load journal of
426  * @param  jbd_fs jbd filesystem
427  * @return standard error code*/
428 int jbd_get_fs(struct ext4_fs *fs,
429                struct jbd_fs *jbd_fs)
430 {
431         int rc;
432         uint32_t journal_ino;
433
434         memset(jbd_fs, 0, sizeof(struct jbd_fs));
435         /* See if there is journal inode on this filesystem.*/
436         /* FIXME: detection on existance ofbkejournal bdev is
437          *        missing.*/
438         journal_ino = ext4_get32(&fs->sb, journal_inode_number);
439
440         rc = ext4_fs_get_inode_ref(fs,
441                                    journal_ino,
442                                    &jbd_fs->inode_ref);
443         if (rc != EOK) {
444                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
445                 return rc;
446         }
447         rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
448         if (rc != EOK) {
449                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
450                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
451                 return rc;
452         }
453         if (!jbd_verify_sb(&jbd_fs->sb)) {
454                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
455                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
456                 rc = EIO;
457         }
458
459         return rc;
460 }
461
462 /**@brief  Put reference of jbd filesystem.
463  * @param  jbd_fs jbd filesystem
464  * @return standard error code*/
465 int jbd_put_fs(struct jbd_fs *jbd_fs)
466 {
467         int rc = EOK;
468         rc = jbd_write_sb(jbd_fs);
469
470         ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
471         return rc;
472 }
473
474 /**@brief  Data block lookup helper.
475  * @param  jbd_fs jbd filesystem
476  * @param  iblock block index
477  * @param  fblock logical block address
478  * @return standard error code*/
479 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
480                    ext4_lblk_t iblock,
481                    ext4_fsblk_t *fblock)
482 {
483         int rc = ext4_fs_get_inode_dblk_idx(
484                         &jbd_fs->inode_ref,
485                         iblock,
486                         fblock,
487                         false);
488         return rc;
489 }
490
491 /**@brief   jbd block get function (through cache).
492  * @param   jbd_fs jbd filesystem
493  * @param   block block descriptor
494  * @param   fblock jbd logical block address
495  * @return  standard error code*/
496 static int jbd_block_get(struct jbd_fs *jbd_fs,
497                   struct ext4_block *block,
498                   ext4_fsblk_t fblock)
499 {
500         /* TODO: journal device. */
501         int rc;
502         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
503
504         /* Lookup the logical block address of
505          * fblock.*/
506         rc = jbd_inode_bmap(jbd_fs, iblock,
507                             &fblock);
508         if (rc != EOK)
509                 return rc;
510
511         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
512         rc = ext4_block_get(bdev, block, fblock);
513
514         /* If succeeded, mark buffer as BC_FLUSH to indicate
515          * that data should be written to disk immediately.*/
516         if (rc == EOK) {
517                 ext4_bcache_set_flag(block->buf, BC_FLUSH);
518                 /* As we don't want to occupy too much space
519                  * in block cache, we set this buffer BC_TMP.*/
520                 ext4_bcache_set_flag(block->buf, BC_TMP);
521         }
522
523         return rc;
524 }
525
526 /**@brief   jbd block get function (through cache, don't read).
527  * @param   jbd_fs jbd filesystem
528  * @param   block block descriptor
529  * @param   fblock jbd logical block address
530  * @return  standard error code*/
531 static int jbd_block_get_noread(struct jbd_fs *jbd_fs,
532                          struct ext4_block *block,
533                          ext4_fsblk_t fblock)
534 {
535         /* TODO: journal device. */
536         int rc;
537         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
538         rc = jbd_inode_bmap(jbd_fs, iblock,
539                             &fblock);
540         if (rc != EOK)
541                 return rc;
542
543         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
544         rc = ext4_block_get_noread(bdev, block, fblock);
545         if (rc == EOK)
546                 ext4_bcache_set_flag(block->buf, BC_FLUSH);
547
548         return rc;
549 }
550
551 /**@brief   jbd block set procedure (through cache).
552  * @param   jbd_fs jbd filesystem
553  * @param   block block descriptor
554  * @return  standard error code*/
555 static int jbd_block_set(struct jbd_fs *jbd_fs,
556                   struct ext4_block *block)
557 {
558         return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
559                               block);
560 }
561
562 /**@brief  helper functions to calculate
563  *         block tag size, not including UUID part.
564  * @param  jbd_fs jbd filesystem
565  * @return tag size in bytes*/
566 static int jbd_tag_bytes(struct jbd_fs *jbd_fs)
567 {
568         int size;
569
570         /* It is very easy to deal with the case which
571          * JBD_FEATURE_INCOMPAT_CSUM_V3 is enabled.*/
572         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
573                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
574                 return sizeof(struct jbd_block_tag3);
575
576         size = sizeof(struct jbd_block_tag);
577
578         /* If JBD_FEATURE_INCOMPAT_CSUM_V2 is enabled,
579          * add 2 bytes to size.*/
580         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
581                                      JBD_FEATURE_INCOMPAT_CSUM_V2))
582                 size += sizeof(uint16_t);
583
584         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
585                                      JBD_FEATURE_INCOMPAT_64BIT))
586                 return size;
587
588         /* If block number is 4 bytes in size,
589          * minus 4 bytes from size */
590         return size - sizeof(uint32_t);
591 }
592
593 /**@brief  Tag information. */
594 struct tag_info {
595         /**@brief  Tag size in bytes, including UUID part.*/
596         int tag_bytes;
597
598         /**@brief  block number stored in this tag.*/
599         ext4_fsblk_t block;
600
601         /**@brief  whether UUID part exists or not.*/
602         bool uuid_exist;
603
604         /**@brief  UUID content if UUID part exists.*/
605         uint8_t uuid[UUID_SIZE];
606
607         /**@brief  Is this the last tag? */
608         bool last_tag;
609
610         /**@brief  crc32c checksum. */
611         uint32_t checksum;
612 };
613
614 /**@brief  Extract information from a block tag.
615  * @param  __tag pointer to the block tag
616  * @param  tag_bytes block tag size of this jbd filesystem
617  * @param  remaining size in buffer containing the block tag
618  * @param  tag_info information of this tag.
619  * @return  EOK when succeed, otherwise return EINVAL.*/
620 static int
621 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
622                       void *__tag,
623                       int tag_bytes,
624                       int32_t remain_buf_size,
625                       struct tag_info *tag_info)
626 {
627         char *uuid_start;
628         tag_info->tag_bytes = tag_bytes;
629         tag_info->uuid_exist = false;
630         tag_info->last_tag = false;
631
632         /* See whether it is possible to hold a valid block tag.*/
633         if (remain_buf_size - tag_bytes < 0)
634                 return EINVAL;
635
636         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
637                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
638                 struct jbd_block_tag3 *tag = __tag;
639                 tag_info->block = jbd_get32(tag, blocknr);
640                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
641                                              JBD_FEATURE_INCOMPAT_64BIT))
642                          tag_info->block |=
643                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
644
645                 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
646                         tag_info->block = 0;
647
648                 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
649                         /* See whether it is possible to hold UUID part.*/
650                         if (remain_buf_size - tag_bytes < UUID_SIZE)
651                                 return EINVAL;
652
653                         uuid_start = (char *)tag + tag_bytes;
654                         tag_info->uuid_exist = true;
655                         tag_info->tag_bytes += UUID_SIZE;
656                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
657                 }
658
659                 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
660                         tag_info->last_tag = true;
661
662         } else {
663                 struct jbd_block_tag *tag = __tag;
664                 tag_info->block = jbd_get32(tag, blocknr);
665                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
666                                              JBD_FEATURE_INCOMPAT_64BIT))
667                          tag_info->block |=
668                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
669
670                 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
671                         tag_info->block = 0;
672
673                 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
674                         /* See whether it is possible to hold UUID part.*/
675                         if (remain_buf_size - tag_bytes < UUID_SIZE)
676                                 return EINVAL;
677
678                         uuid_start = (char *)tag + tag_bytes;
679                         tag_info->uuid_exist = true;
680                         tag_info->tag_bytes += UUID_SIZE;
681                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
682                 }
683
684                 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
685                         tag_info->last_tag = true;
686
687         }
688         return EOK;
689 }
690
691 /**@brief  Write information to a block tag.
692  * @param  __tag pointer to the block tag
693  * @param  remaining size in buffer containing the block tag
694  * @param  tag_info information of this tag.
695  * @return  EOK when succeed, otherwise return EINVAL.*/
696 static int
697 jbd_write_block_tag(struct jbd_fs *jbd_fs,
698                     void *__tag,
699                     int32_t remain_buf_size,
700                     struct tag_info *tag_info)
701 {
702         char *uuid_start;
703         int tag_bytes = jbd_tag_bytes(jbd_fs);
704
705         tag_info->tag_bytes = tag_bytes;
706
707         /* See whether it is possible to hold a valid block tag.*/
708         if (remain_buf_size - tag_bytes < 0)
709                 return EINVAL;
710
711         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
712                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
713                 struct jbd_block_tag3 *tag = __tag;
714                 memset(tag, 0, sizeof(struct jbd_block_tag3));
715                 jbd_set32(tag, blocknr, (uint32_t)tag_info->block);
716                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
717                                              JBD_FEATURE_INCOMPAT_64BIT))
718                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
719
720                 if (tag_info->uuid_exist) {
721                         /* See whether it is possible to hold UUID part.*/
722                         if (remain_buf_size - tag_bytes < UUID_SIZE)
723                                 return EINVAL;
724
725                         uuid_start = (char *)tag + tag_bytes;
726                         tag_info->tag_bytes += UUID_SIZE;
727                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
728                 } else
729                         jbd_set32(tag, flags,
730                                   jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
731
732                 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
733
734                 if (tag_info->last_tag)
735                         jbd_set32(tag, flags,
736                                   jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
737
738         } else {
739                 struct jbd_block_tag *tag = __tag;
740                 memset(tag, 0, sizeof(struct jbd_block_tag));
741                 jbd_set32(tag, blocknr, (uint32_t)tag_info->block);
742                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
743                                              JBD_FEATURE_INCOMPAT_64BIT))
744                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
745
746                 if (tag_info->uuid_exist) {
747                         /* See whether it is possible to hold UUID part.*/
748                         if (remain_buf_size - tag_bytes < UUID_SIZE)
749                                 return EINVAL;
750
751                         uuid_start = (char *)tag + tag_bytes;
752                         tag_info->tag_bytes += UUID_SIZE;
753                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
754                 } else
755                         jbd_set16(tag, flags,
756                                   jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
757
758                 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
759
760                 if (tag_info->last_tag)
761                         jbd_set16(tag, flags,
762                                   jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
763
764         }
765         return EOK;
766 }
767
768 /**@brief  Iterate all block tags in a block.
769  * @param  jbd_fs jbd filesystem
770  * @param  __tag_start pointer to the block
771  * @param  tag_tbl_size size of the block
772  * @param  func callback routine to indicate that
773  *         a block tag is found
774  * @param  arg additional argument to be passed to func */
775 static void
776 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
777                         void *__tag_start,
778                         int32_t tag_tbl_size,
779                         void (*func)(struct jbd_fs * jbd_fs,
780                                         ext4_fsblk_t block,
781                                         uint8_t *uuid,
782                                         void *arg),
783                         void *arg)
784 {
785         char *tag_start, *tag_ptr;
786         int tag_bytes = jbd_tag_bytes(jbd_fs);
787         tag_start = __tag_start;
788         tag_ptr = tag_start;
789
790         /* Cut off the size of block tail storing checksum. */
791         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
792                                      JBD_FEATURE_INCOMPAT_CSUM_V2) ||
793             JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
794                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
795                 tag_tbl_size -= sizeof(struct jbd_block_tail);
796
797         while (tag_tbl_size) {
798                 struct tag_info tag_info;
799                 int rc = jbd_extract_block_tag(jbd_fs,
800                                       tag_ptr,
801                                       tag_bytes,
802                                       tag_tbl_size,
803                                       &tag_info);
804                 if (rc != EOK)
805                         break;
806
807                 if (func)
808                         func(jbd_fs, tag_info.block, tag_info.uuid, arg);
809
810                 /* Stop the iteration when we reach the last tag. */
811                 if (tag_info.last_tag)
812                         break;
813
814                 tag_ptr += tag_info.tag_bytes;
815                 tag_tbl_size -= tag_info.tag_bytes;
816         }
817 }
818
819 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
820                                    ext4_fsblk_t block,
821                                    uint8_t *uuid,
822                                    void *arg)
823 {
824         uint32_t *iblock = arg;
825         ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
826         (*iblock)++;
827         wrap(&jbd_fs->sb, *iblock);
828         (void)jbd_fs;
829         (void)uuid;
830         return;
831 }
832
833 static struct revoke_entry *
834 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
835 {
836         struct revoke_entry tmp = {
837                 .block = block
838         };
839
840         return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
841 }
842
843 /**@brief  Replay a block in a transaction.
844  * @param  jbd_fs jbd filesystem
845  * @param  block  block address to be replayed.*/
846 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
847                                   ext4_fsblk_t block,
848                                   uint8_t *uuid __unused,
849                                   void *__arg)
850 {
851         int r;
852         struct replay_arg *arg = __arg;
853         struct recover_info *info = arg->info;
854         uint32_t *this_block = arg->this_block;
855         struct revoke_entry *revoke_entry;
856         struct ext4_block journal_block, ext4_block;
857         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
858
859         (*this_block)++;
860         wrap(&jbd_fs->sb, *this_block);
861
862         /* We replay this block only if the current transaction id
863          * is equal or greater than that in revoke entry.*/
864         revoke_entry = jbd_revoke_entry_lookup(info, block);
865         if (revoke_entry &&
866             arg->this_trans_id < revoke_entry->trans_id)
867                 return;
868
869         ext4_dbg(DEBUG_JBD,
870                  "Replaying block in block_tag: %" PRIu64 "\n",
871                  block);
872
873         r = jbd_block_get(jbd_fs, &journal_block, *this_block);
874         if (r != EOK)
875                 return;
876
877         /* We need special treatment for ext4 superblock. */
878         if (block) {
879                 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
880                 if (r != EOK) {
881                         jbd_block_set(jbd_fs, &journal_block);
882                         return;
883                 }
884
885                 memcpy(ext4_block.data,
886                         journal_block.data,
887                         jbd_get32(&jbd_fs->sb, blocksize));
888
889                 ext4_bcache_set_dirty(ext4_block.buf);
890                 ext4_block_set(fs->bdev, &ext4_block);
891         } else {
892                 uint16_t mount_count, state;
893                 mount_count = ext4_get16(&fs->sb, mount_count);
894                 state = ext4_get16(&fs->sb, state);
895
896                 memcpy(&fs->sb,
897                         journal_block.data + EXT4_SUPERBLOCK_OFFSET,
898                         EXT4_SUPERBLOCK_SIZE);
899
900                 /* Mark system as mounted */
901                 ext4_set16(&fs->sb, state, state);
902                 r = ext4_sb_write(fs->bdev, &fs->sb);
903                 if (r != EOK)
904                         return;
905
906                 /*Update mount count*/
907                 ext4_set16(&fs->sb, mount_count, mount_count);
908         }
909
910         jbd_block_set(jbd_fs, &journal_block);
911         
912         return;
913 }
914
915 /**@brief  Add block address to revoke tree, along with
916  *         its transaction id.
917  * @param  info  journal replay info
918  * @param  block  block address to be replayed.*/
919 static void jbd_add_revoke_block_tags(struct recover_info *info,
920                                       ext4_fsblk_t block)
921 {
922         struct revoke_entry *revoke_entry;
923
924         ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
925         /* If the revoke entry with respect to the block address
926          * exists already, update its transaction id.*/
927         revoke_entry = jbd_revoke_entry_lookup(info, block);
928         if (revoke_entry) {
929                 revoke_entry->trans_id = info->this_trans_id;
930                 return;
931         }
932
933         revoke_entry = jbd_alloc_revoke_entry();
934         ext4_assert(revoke_entry);
935         revoke_entry->block = block;
936         revoke_entry->trans_id = info->this_trans_id;
937         RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
938
939         return;
940 }
941
942 static void jbd_destroy_revoke_tree(struct recover_info *info)
943 {
944         while (!RB_EMPTY(&info->revoke_root)) {
945                 struct revoke_entry *revoke_entry =
946                         RB_MIN(jbd_revoke, &info->revoke_root);
947                 ext4_assert(revoke_entry);
948                 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
949                 jbd_free_revoke_entry(revoke_entry);
950         }
951 }
952
953
954 #define ACTION_SCAN 0
955 #define ACTION_REVOKE 1
956 #define ACTION_RECOVER 2
957
958 /**@brief  Add entries in a revoke block to revoke tree.
959  * @param  jbd_fs jbd filesystem
960  * @param  header revoke block header
961  * @param  recover_info  journal replay info*/
962 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
963                                   struct jbd_bhdr *header,
964                                   struct recover_info *info)
965 {
966         char *blocks_entry;
967         struct jbd_revoke_header *revoke_hdr =
968                 (struct jbd_revoke_header *)header;
969         uint32_t i, nr_entries, record_len = 4;
970
971         /* If we are working on a 64bit jbd filesystem, */
972         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
973                                      JBD_FEATURE_INCOMPAT_64BIT))
974                 record_len = 8;
975
976         nr_entries = (jbd_get32(revoke_hdr, count) -
977                         sizeof(struct jbd_revoke_header)) /
978                         record_len;
979
980         blocks_entry = (char *)(revoke_hdr + 1);
981
982         for (i = 0;i < nr_entries;i++) {
983                 if (record_len == 8) {
984                         uint64_t *blocks =
985                                 (uint64_t *)blocks_entry;
986                         jbd_add_revoke_block_tags(info, to_be64(*blocks));
987                 } else {
988                         uint32_t *blocks =
989                                 (uint32_t *)blocks_entry;
990                         jbd_add_revoke_block_tags(info, to_be32(*blocks));
991                 }
992                 blocks_entry += record_len;
993         }
994 }
995
996 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
997                                        struct jbd_bhdr *header,
998                                        uint32_t *iblock)
999 {
1000         jbd_iterate_block_table(jbd_fs,
1001                                 header + 1,
1002                                 jbd_get32(&jbd_fs->sb, blocksize) -
1003                                         sizeof(struct jbd_bhdr),
1004                                 jbd_display_block_tags,
1005                                 iblock);
1006 }
1007
1008 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
1009                                         struct jbd_bhdr *header,
1010                                         struct replay_arg *arg)
1011 {
1012         jbd_iterate_block_table(jbd_fs,
1013                                 header + 1,
1014                                 jbd_get32(&jbd_fs->sb, blocksize) -
1015                                         sizeof(struct jbd_bhdr),
1016                                 jbd_replay_block_tags,
1017                                 arg);
1018 }
1019
1020 /**@brief  The core routine of journal replay.
1021  * @param  jbd_fs jbd filesystem
1022  * @param  recover_info  journal replay info
1023  * @param  action action needed to be taken
1024  * @return standard error code*/
1025 static int jbd_iterate_log(struct jbd_fs *jbd_fs,
1026                            struct recover_info *info,
1027                            int action)
1028 {
1029         int r = EOK;
1030         bool log_end = false;
1031         struct jbd_sb *sb = &jbd_fs->sb;
1032         uint32_t start_trans_id, this_trans_id;
1033         uint32_t start_block, this_block;
1034
1035         /* We start iterating valid blocks in the whole journal.*/
1036         start_trans_id = this_trans_id = jbd_get32(sb, sequence);
1037         start_block = this_block = jbd_get32(sb, start);
1038         if (action == ACTION_SCAN)
1039                 info->trans_cnt = 0;
1040         else if (!info->trans_cnt)
1041                 log_end = true;
1042
1043         ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
1044                             start_trans_id);
1045
1046         while (!log_end) {
1047                 struct ext4_block block;
1048                 struct jbd_bhdr *header;
1049                 /* If we are not scanning for the last
1050                  * valid transaction in the journal,
1051                  * we will stop when we reach the end of
1052                  * the journal.*/
1053                 if (action != ACTION_SCAN)
1054                         if (this_trans_id > info->last_trans_id) {
1055                                 log_end = true;
1056                                 continue;
1057                         }
1058
1059                 r = jbd_block_get(jbd_fs, &block, this_block);
1060                 if (r != EOK)
1061                         break;
1062
1063                 header = (struct jbd_bhdr *)block.data;
1064                 /* This block does not have a valid magic number,
1065                  * so we have reached the end of the journal.*/
1066                 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
1067                         jbd_block_set(jbd_fs, &block);
1068                         log_end = true;
1069                         continue;
1070                 }
1071
1072                 /* If the transaction id we found is not expected,
1073                  * we may have reached the end of the journal.
1074                  *
1075                  * If we are not scanning the journal, something
1076                  * bad might have taken place. :-( */
1077                 if (jbd_get32(header, sequence) != this_trans_id) {
1078                         if (action != ACTION_SCAN)
1079                                 r = EIO;
1080
1081                         jbd_block_set(jbd_fs, &block);
1082                         log_end = true;
1083                         continue;
1084                 }
1085
1086                 switch (jbd_get32(header, blocktype)) {
1087                 case JBD_DESCRIPTOR_BLOCK:
1088                         if (!jbd_verify_meta_csum(jbd_fs, header)) {
1089                                 ext4_dbg(DEBUG_JBD,
1090                                         DBG_WARN "Descriptor block checksum failed."
1091                                                 "Journal block: %" PRIu32"\n",
1092                                                 this_block);
1093                                 log_end = true;
1094                                 break;
1095                         }
1096                         ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
1097                                             "trans_id: %" PRIu32"\n",
1098                                             this_block, this_trans_id);
1099                         if (action == ACTION_RECOVER) {
1100                                 struct replay_arg replay_arg;
1101                                 replay_arg.info = info;
1102                                 replay_arg.this_block = &this_block;
1103                                 replay_arg.this_trans_id = this_trans_id;
1104
1105                                 jbd_replay_descriptor_block(jbd_fs,
1106                                                 header, &replay_arg);
1107                         } else
1108                                 jbd_debug_descriptor_block(jbd_fs,
1109                                                 header, &this_block);
1110
1111                         break;
1112                 case JBD_COMMIT_BLOCK:
1113                         if (!jbd_verify_commit_csum(jbd_fs,
1114                                         (struct jbd_commit_header *)header)) {
1115                                 ext4_dbg(DEBUG_JBD,
1116                                         DBG_WARN "Commit block checksum failed."
1117                                                 "Journal block: %" PRIu32"\n",
1118                                                 this_block);
1119                                 log_end = true;
1120                                 break;
1121                         }
1122                         ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
1123                                             "trans_id: %" PRIu32"\n",
1124                                             this_block, this_trans_id);
1125                         /* This is the end of a transaction,
1126                          * we may now proceed to the next transaction.
1127                          */
1128                         this_trans_id++;
1129                         info->trans_cnt++;
1130                         break;
1131                 case JBD_REVOKE_BLOCK:
1132                         if (!jbd_verify_meta_csum(jbd_fs, header)) {
1133                                 ext4_dbg(DEBUG_JBD,
1134                                         DBG_WARN "Revoke block checksum failed."
1135                                                 "Journal block: %" PRIu32"\n",
1136                                                 this_block);
1137                                 log_end = true;
1138                                 break;
1139                         }
1140                         ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
1141                                             "trans_id: %" PRIu32"\n",
1142                                             this_block, this_trans_id);
1143                         if (action == ACTION_REVOKE) {
1144                                 info->this_trans_id = this_trans_id;
1145                                 jbd_build_revoke_tree(jbd_fs,
1146                                                 header, info);
1147                         }
1148                         break;
1149                 default:
1150                         log_end = true;
1151                         break;
1152                 }
1153                 jbd_block_set(jbd_fs, &block);
1154                 this_block++;
1155                 wrap(sb, this_block);
1156                 if (this_block == start_block)
1157                         log_end = true;
1158
1159         }
1160         ext4_dbg(DEBUG_JBD, "End of journal.\n");
1161         if (r == EOK && action == ACTION_SCAN) {
1162                 /* We have finished scanning the journal. */
1163                 info->start_trans_id = start_trans_id;
1164                 if (this_trans_id > start_trans_id)
1165                         info->last_trans_id = this_trans_id - 1;
1166                 else
1167                         info->last_trans_id = this_trans_id;
1168         }
1169
1170         return r;
1171 }
1172
1173 /**@brief  Replay journal.
1174  * @param  jbd_fs jbd filesystem
1175  * @return standard error code*/
1176 int jbd_recover(struct jbd_fs *jbd_fs)
1177 {
1178         int r;
1179         struct recover_info info;
1180         struct jbd_sb *sb = &jbd_fs->sb;
1181         if (!sb->start)
1182                 return EOK;
1183
1184         RB_INIT(&info.revoke_root);
1185
1186         r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
1187         if (r != EOK)
1188                 return r;
1189
1190         r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
1191         if (r != EOK)
1192                 return r;
1193
1194         r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
1195         if (r == EOK) {
1196                 /* If we successfully replay the journal,
1197                  * clear EXT4_FINCOM_RECOVER flag on the
1198                  * ext4 superblock, and set the start of
1199                  * journal to 0.*/
1200                 uint32_t features_incompatible =
1201                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
1202                                    features_incompatible);
1203                 jbd_set32(&jbd_fs->sb, start, 0);
1204                 features_incompatible &= ~EXT4_FINCOM_RECOVER;
1205                 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1206                            features_incompatible,
1207                            features_incompatible);
1208                 jbd_fs->dirty = true;
1209                 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1210                                   &jbd_fs->inode_ref.fs->sb);
1211         }
1212         jbd_destroy_revoke_tree(&info);
1213         return r;
1214 }
1215
1216 static void jbd_journal_write_sb(struct jbd_journal *journal)
1217 {
1218         struct jbd_fs *jbd_fs = journal->jbd_fs;
1219         jbd_set32(&jbd_fs->sb, start, journal->start);
1220         jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
1221         jbd_fs->dirty = true;
1222 }
1223
1224 /**@brief  Start accessing the journal.
1225  * @param  jbd_fs jbd filesystem
1226  * @param  journal current journal session
1227  * @return standard error code*/
1228 int jbd_journal_start(struct jbd_fs *jbd_fs,
1229                       struct jbd_journal *journal)
1230 {
1231         int r;
1232         uint32_t features_incompatible =
1233                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
1234                                    features_incompatible);
1235         struct ext4_block block = EXT4_BLOCK_ZERO();
1236         features_incompatible |= EXT4_FINCOM_RECOVER;
1237         ext4_set32(&jbd_fs->inode_ref.fs->sb,
1238                         features_incompatible,
1239                         features_incompatible);
1240         r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1241                         &jbd_fs->inode_ref.fs->sb);
1242         if (r != EOK)
1243                 return r;
1244
1245         journal->first = jbd_get32(&jbd_fs->sb, first);
1246         journal->start = journal->first;
1247         journal->last = journal->first;
1248         journal->trans_id = 1;
1249         journal->alloc_trans_id = 1;
1250
1251         journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
1252
1253         r = jbd_block_get_noread(jbd_fs,
1254                          &block,
1255                          journal->start);
1256         if (r != EOK) {
1257                 memset(journal, 0, sizeof(struct jbd_journal));
1258                 return r;
1259         }
1260         memset(block.data, 0, journal->block_size);
1261         ext4_bcache_set_dirty(block.buf);
1262         r = jbd_block_set(jbd_fs, &block);
1263         if (r != EOK) {
1264                 memset(journal, 0, sizeof(struct jbd_journal));
1265                 return r;
1266         }
1267
1268         TAILQ_INIT(&journal->trans_queue);
1269         TAILQ_INIT(&journal->cp_queue);
1270         RB_INIT(&journal->block_rec_root);
1271         journal->jbd_fs = jbd_fs;
1272         jbd_journal_write_sb(journal);
1273         return jbd_write_sb(jbd_fs);
1274 }
1275
1276 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1277                           struct ext4_buf *buf __unused,
1278                           int res,
1279                           void *arg);
1280
1281 static void jbd_journal_flush_trans(struct jbd_trans *trans)
1282 {
1283         struct jbd_buf *jbd_buf, *tmp;
1284         struct jbd_journal *journal = trans->journal;
1285         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1286         void *tmp_data = malloc(journal->block_size);
1287         ext4_assert(tmp_data);
1288
1289         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1290                         tmp) {
1291                 struct ext4_buf *buf = jbd_buf->block_rec->buf;
1292                 /* The buffer in memory is still dirty. */
1293                 if (buf) {
1294                         if (jbd_buf->block_rec->trans != trans) {
1295                                 int r;
1296                                 struct ext4_block jbd_block = EXT4_BLOCK_ZERO();
1297                                 ext4_assert(ext4_block_get(fs->bdev,
1298                                                         &jbd_block,
1299                                                         jbd_buf->jbd_lba) == EOK);
1300                                 memcpy(tmp_data, jbd_block.data,
1301                                                 journal->block_size);
1302                                 ext4_block_set(fs->bdev, &jbd_block);
1303                                 r = ext4_blocks_set_direct(fs->bdev, tmp_data,
1304                                                 buf->lba, 1);
1305                                 jbd_trans_end_write(fs->bdev->bc, buf, r, jbd_buf);
1306                         } else
1307                                 ext4_block_flush_buf(fs->bdev, buf);
1308
1309                 }
1310         }
1311
1312         free(tmp_data);
1313 }
1314
1315 static void
1316 jbd_journal_skip_pure_revoke(struct jbd_journal *journal,
1317                              struct jbd_trans *trans)
1318 {
1319         journal->start = trans->start_iblock +
1320                 trans->alloc_blocks;
1321         wrap(&journal->jbd_fs->sb, journal->start);
1322         journal->trans_id = trans->trans_id + 1;
1323         jbd_journal_free_trans(journal,
1324                         trans, false);
1325         jbd_journal_write_sb(journal);
1326 }
1327
1328 static void
1329 jbd_journal_purge_cp_trans(struct jbd_journal *journal,
1330                            bool flush,
1331                            bool once)
1332 {
1333         struct jbd_trans *trans;
1334         while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1335                 if (!trans->data_cnt) {
1336                         TAILQ_REMOVE(&journal->cp_queue,
1337                                         trans,
1338                                         trans_node);
1339                         jbd_journal_skip_pure_revoke(journal, trans);
1340                 } else {
1341                         if (trans->data_cnt ==
1342                                         trans->written_cnt) {
1343                                 journal->start =
1344                                         trans->start_iblock +
1345                                         trans->alloc_blocks;
1346                                 wrap(&journal->jbd_fs->sb,
1347                                                 journal->start);
1348                                 journal->trans_id =
1349                                         trans->trans_id + 1;
1350                                 TAILQ_REMOVE(&journal->cp_queue,
1351                                                 trans,
1352                                                 trans_node);
1353                                 jbd_journal_free_trans(journal,
1354                                                 trans,
1355                                                 false);
1356                                 jbd_journal_write_sb(journal);
1357                         } else if (!flush) {
1358                                 journal->start =
1359                                         trans->start_iblock;
1360                                 wrap(&journal->jbd_fs->sb,
1361                                                 journal->start);
1362                                 journal->trans_id =
1363                                         trans->trans_id;
1364                                 jbd_journal_write_sb(journal);
1365                                 break;
1366                         } else
1367                                 jbd_journal_flush_trans(trans);
1368                 }
1369                 if (once)
1370                         break;
1371         }
1372 }
1373
1374 /**@brief  Stop accessing the journal.
1375  * @param  journal current journal session
1376  * @return standard error code*/
1377 int jbd_journal_stop(struct jbd_journal *journal)
1378 {
1379         int r;
1380         struct jbd_fs *jbd_fs = journal->jbd_fs;
1381         uint32_t features_incompatible;
1382
1383         /* Make sure that journalled content have reached
1384          * the disk.*/
1385         jbd_journal_purge_cp_trans(journal, true, false);
1386
1387         /* There should be no block record in this journal
1388          * session. */
1389         if (!RB_EMPTY(&journal->block_rec_root))
1390                 ext4_dbg(DEBUG_JBD,
1391                          DBG_WARN "There are still block records "
1392                                   "in this journal session!\n");
1393
1394         features_incompatible =
1395                 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1396                            features_incompatible);
1397         features_incompatible &= ~EXT4_FINCOM_RECOVER;
1398         ext4_set32(&jbd_fs->inode_ref.fs->sb,
1399                         features_incompatible,
1400                         features_incompatible);
1401         r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1402                         &jbd_fs->inode_ref.fs->sb);
1403         if (r != EOK)
1404                 return r;
1405
1406         journal->start = 0;
1407         journal->trans_id = 0;
1408         jbd_journal_write_sb(journal);
1409         return jbd_write_sb(journal->jbd_fs);
1410 }
1411
1412 /**@brief  Allocate a block in the journal.
1413  * @param  journal current journal session
1414  * @param  trans transaction
1415  * @return allocated block address*/
1416 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
1417                                         struct jbd_trans *trans)
1418 {
1419         uint32_t start_block;
1420
1421         start_block = journal->last++;
1422         trans->alloc_blocks++;
1423         wrap(&journal->jbd_fs->sb, journal->last);
1424         
1425         /* If there is no space left, flush all journalled
1426          * blocks to disk first.*/
1427         if (journal->last == journal->start)
1428                 jbd_journal_purge_cp_trans(journal, true, false);
1429
1430         return start_block;
1431 }
1432
1433 /**@brief  Allocate a new transaction
1434  * @param  journal current journal session
1435  * @return transaction allocated*/
1436 struct jbd_trans *
1437 jbd_journal_new_trans(struct jbd_journal *journal)
1438 {
1439         struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
1440         if (!trans)
1441                 return NULL;
1442
1443         /* We will assign a trans_id to this transaction,
1444          * once it has been committed.*/
1445         trans->journal = journal;
1446         trans->data_csum = EXT4_CRC32_INIT;
1447         trans->error = EOK;
1448         TAILQ_INIT(&trans->buf_queue);
1449         return trans;
1450 }
1451
1452 /**@brief  gain access to it before making any modications.
1453  * @param  journal current journal session
1454  * @param  trans transaction
1455  * @param  block descriptor
1456  * @return standard error code.*/
1457 int jbd_trans_get_access(struct jbd_journal *journal,
1458                          struct jbd_trans *trans,
1459                          struct ext4_block *block)
1460 {
1461         int r = EOK;
1462         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1463         struct jbd_buf *jbd_buf = block->buf->end_write_arg;
1464
1465         /* If the buffer has already been modified, we should
1466          * flush dirty data in this buffer to disk.*/
1467         if (ext4_bcache_test_flag(block->buf, BC_DIRTY) &&
1468             block->buf->end_write == jbd_trans_end_write) {
1469                 ext4_assert(jbd_buf);
1470                 if (jbd_buf->trans != trans)
1471                         r = ext4_block_flush_buf(fs->bdev, block->buf);
1472
1473         }
1474         return r;
1475 }
1476
1477 static struct jbd_block_rec *
1478 jbd_trans_block_rec_lookup(struct jbd_journal *journal,
1479                            ext4_fsblk_t lba)
1480 {
1481         struct jbd_block_rec tmp = {
1482                 .lba = lba
1483         };
1484
1485         return RB_FIND(jbd_block,
1486                        &journal->block_rec_root,
1487                        &tmp);
1488 }
1489
1490 static void
1491 jbd_trans_change_ownership(struct jbd_block_rec *block_rec,
1492                            struct jbd_trans *new_trans,
1493                            struct ext4_buf *new_buf)
1494 {
1495         LIST_REMOVE(block_rec, tbrec_node);
1496         /* Now this block record belongs to this transaction. */
1497         LIST_INSERT_HEAD(&new_trans->tbrec_list, block_rec, tbrec_node);
1498         block_rec->trans = new_trans;
1499         block_rec->buf = new_buf;
1500 }
1501
1502 static inline struct jbd_block_rec *
1503 jbd_trans_insert_block_rec(struct jbd_trans *trans,
1504                            ext4_fsblk_t lba,
1505                            struct ext4_buf *buf)
1506 {
1507         struct jbd_block_rec *block_rec;
1508         block_rec = jbd_trans_block_rec_lookup(trans->journal, lba);
1509         if (block_rec) {
1510                 jbd_trans_change_ownership(block_rec, trans, buf);
1511                 return block_rec;
1512         }
1513         block_rec = calloc(1, sizeof(struct jbd_block_rec));
1514         if (!block_rec)
1515                 return NULL;
1516
1517         block_rec->lba = lba;
1518         block_rec->buf = buf;
1519         block_rec->trans = trans;
1520         TAILQ_INIT(&block_rec->dirty_buf_queue);
1521         LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);
1522         RB_INSERT(jbd_block, &trans->journal->block_rec_root, block_rec);
1523         return block_rec;
1524 }
1525
1526 static void
1527 jbd_trans_finish_callback(struct jbd_journal *journal,
1528                           const struct jbd_trans *trans,
1529                           struct jbd_block_rec *block_rec,
1530                           bool abort)
1531 {
1532         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1533         if (block_rec->trans != trans)
1534                 return;
1535
1536         if (!abort) {
1537                 struct jbd_buf *jbd_buf, *tmp;
1538                 TAILQ_FOREACH_SAFE(jbd_buf,
1539                                 &block_rec->dirty_buf_queue,
1540                                 dirty_buf_node,
1541                                 tmp) {
1542                         /* All we need is a fake ext4_buf. */
1543                         struct ext4_buf buf;
1544
1545                         jbd_trans_end_write(fs->bdev->bc,
1546                                         &buf,
1547                                         EOK,
1548                                         jbd_buf);
1549                 }
1550         } else {
1551                 struct jbd_buf *jbd_buf;
1552                 struct ext4_block jbd_block = EXT4_BLOCK_ZERO(),
1553                                   block = EXT4_BLOCK_ZERO();
1554                 jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,
1555                                 jbd_buf_dirty);
1556                 if (jbd_buf) {
1557                         ext4_assert(ext4_block_get(fs->bdev,
1558                                                 &jbd_block,
1559                                                 jbd_buf->jbd_lba) == EOK);
1560                         ext4_assert(ext4_block_get_noread(fs->bdev,
1561                                                 &block,
1562                                                 block_rec->lba) == EOK);
1563                         memcpy(block.data, jbd_block.data,
1564                                         journal->block_size);
1565
1566                         jbd_trans_change_ownership(block_rec,
1567                                         jbd_buf->trans, block.buf);
1568
1569                         block.buf->end_write = jbd_trans_end_write;
1570                         block.buf->end_write_arg = jbd_buf;
1571
1572                         ext4_bcache_set_flag(jbd_block.buf, BC_TMP);
1573                         ext4_bcache_set_dirty(block.buf);
1574
1575                         ext4_block_set(fs->bdev, &jbd_block);
1576                         ext4_block_set(fs->bdev, &block);
1577                         return;
1578                 }
1579         }
1580 }
1581
1582 static inline void
1583 jbd_trans_remove_block_rec(struct jbd_journal *journal,
1584                            struct jbd_block_rec *block_rec,
1585                            struct jbd_trans *trans)
1586 {
1587         /* If this block record doesn't belong to this transaction,
1588          * give up.*/
1589         if (block_rec->trans == trans) {
1590                 LIST_REMOVE(block_rec, tbrec_node);
1591                 RB_REMOVE(jbd_block,
1592                                 &journal->block_rec_root,
1593                                 block_rec);
1594                 free(block_rec);
1595         }
1596 }
1597
1598 /**@brief  Add block to a transaction and mark it dirty.
1599  * @param  trans transaction
1600  * @param  block block descriptor
1601  * @return standard error code*/
1602 int jbd_trans_set_block_dirty(struct jbd_trans *trans,
1603                               struct ext4_block *block)
1604 {
1605         struct jbd_buf *jbd_buf;
1606         struct jbd_block_rec *block_rec;
1607
1608         if (block->buf->end_write == jbd_trans_end_write) {
1609                 jbd_buf = block->buf->end_write_arg;
1610                 if (jbd_buf && jbd_buf->trans == trans)
1611                         return EOK;
1612         }
1613         jbd_buf = calloc(1, sizeof(struct jbd_buf));
1614         if (!jbd_buf)
1615                 return ENOMEM;
1616
1617         if ((block_rec = jbd_trans_insert_block_rec(trans,
1618                                         block->lb_id,
1619                                         block->buf)) == NULL) {
1620                 free(jbd_buf);
1621                 return ENOMEM;
1622         }
1623
1624         TAILQ_INSERT_TAIL(&block_rec->dirty_buf_queue,
1625                         jbd_buf,
1626                         dirty_buf_node);
1627
1628         jbd_buf->block_rec = block_rec;
1629         jbd_buf->trans = trans;
1630         jbd_buf->block = *block;
1631         ext4_bcache_inc_ref(block->buf);
1632
1633         /* If the content reach the disk, notify us
1634          * so that we may do a checkpoint. */
1635         block->buf->end_write = jbd_trans_end_write;
1636         block->buf->end_write_arg = jbd_buf;
1637
1638         trans->data_cnt++;
1639         TAILQ_INSERT_HEAD(&trans->buf_queue, jbd_buf, buf_node);
1640
1641         ext4_bcache_set_dirty(block->buf);
1642         return EOK;
1643 }
1644
1645 /**@brief  Add block to be revoked to a transaction
1646  * @param  trans transaction
1647  * @param  lba logical block address
1648  * @return standard error code*/
1649 int jbd_trans_revoke_block(struct jbd_trans *trans,
1650                            ext4_fsblk_t lba)
1651 {
1652         struct jbd_revoke_rec *rec =
1653                 calloc(1, sizeof(struct jbd_revoke_rec));
1654         if (!rec)
1655                 return ENOMEM;
1656
1657         rec->lba = lba;
1658         LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
1659         return EOK;
1660 }
1661
1662 /**@brief  Try to add block to be revoked to a transaction.
1663  *         If @lba still remains in an transaction on checkpoint
1664  *         queue, add @lba as a revoked block to the transaction.
1665  * @param  trans transaction
1666  * @param  lba logical block address
1667  * @return standard error code*/
1668 int jbd_trans_try_revoke_block(struct jbd_trans *trans,
1669                                ext4_fsblk_t lba)
1670 {
1671         int r = EOK;
1672         struct jbd_journal *journal = trans->journal;
1673         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1674         struct jbd_block_rec *block_rec =
1675                 jbd_trans_block_rec_lookup(journal, lba);
1676
1677         /* Make sure we don't flush any buffers belong to this transaction. */
1678         if (block_rec && block_rec->trans != trans) {
1679                 /* If the buffer has not been flushed yet, flush it now. */
1680                 if (block_rec->buf) {
1681                         r = ext4_block_flush_buf(fs->bdev, block_rec->buf);
1682                         if (r != EOK)
1683                                 return r;
1684
1685                 }
1686
1687                 jbd_trans_revoke_block(trans, lba);
1688         }
1689
1690         return EOK;
1691 }
1692
1693 /**@brief  Free a transaction
1694  * @param  journal current journal session
1695  * @param  trans transaction
1696  * @param  abort discard all the modifications on the block?
1697  * @return standard error code*/
1698 void jbd_journal_free_trans(struct jbd_journal *journal,
1699                             struct jbd_trans *trans,
1700                             bool abort)
1701 {
1702         struct jbd_buf *jbd_buf, *tmp;
1703         struct jbd_revoke_rec *rec, *tmp2;
1704         struct jbd_block_rec *block_rec, *tmp3;
1705         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1706         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1707                           tmp) {
1708                 block_rec = jbd_buf->block_rec;
1709                 if (abort) {
1710                         jbd_buf->block.buf->end_write = NULL;
1711                         jbd_buf->block.buf->end_write_arg = NULL;
1712                         ext4_bcache_clear_dirty(jbd_buf->block.buf);
1713                         ext4_block_set(fs->bdev, &jbd_buf->block);
1714                 }
1715
1716                 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1717                         jbd_buf,
1718                         dirty_buf_node);
1719                 jbd_trans_finish_callback(journal,
1720                                 trans,
1721                                 block_rec,
1722                                 abort);
1723                 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1724                 free(jbd_buf);
1725         }
1726         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1727                           tmp2) {
1728                 LIST_REMOVE(rec, revoke_node);
1729                 free(rec);
1730         }
1731         LIST_FOREACH_SAFE(block_rec, &trans->tbrec_list, tbrec_node,
1732                           tmp3) {
1733                 jbd_trans_remove_block_rec(journal, block_rec, trans);
1734         }
1735
1736         free(trans);
1737 }
1738
1739 /**@brief  Write commit block for a transaction
1740  * @param  trans transaction
1741  * @return standard error code*/
1742 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
1743 {
1744         int rc;
1745         struct jbd_commit_header *header;
1746         uint32_t commit_iblock = 0;
1747         struct ext4_block commit_block;
1748         struct jbd_journal *journal = trans->journal;
1749
1750         commit_iblock = jbd_journal_alloc_block(journal, trans);
1751         rc = jbd_block_get_noread(journal->jbd_fs,
1752                         &commit_block, commit_iblock);
1753         if (rc != EOK)
1754                 return rc;
1755
1756         header = (struct jbd_commit_header *)commit_block.data;
1757         jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
1758         jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
1759         jbd_set32(&header->header, sequence, trans->trans_id);
1760
1761         if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1762                                 JBD_FEATURE_COMPAT_CHECKSUM)) {
1763                 jbd_set32(header, chksum_type, JBD_CRC32_CHKSUM);
1764                 jbd_set32(header, chksum_size, JBD_CRC32_CHKSUM_SIZE);
1765                 jbd_set32(header, chksum[0], trans->data_csum);
1766         }
1767         jbd_commit_csum_set(journal->jbd_fs, header);
1768         ext4_bcache_set_dirty(commit_block.buf);
1769         rc = jbd_block_set(journal->jbd_fs, &commit_block);
1770         if (rc != EOK)
1771                 return rc;
1772
1773         return EOK;
1774 }
1775
1776 /**@brief  Write descriptor block for a transaction
1777  * @param  journal current journal session
1778  * @param  trans transaction
1779  * @return standard error code*/
1780 static int jbd_journal_prepare(struct jbd_journal *journal,
1781                                struct jbd_trans *trans)
1782 {
1783         int rc = EOK, i = 0;
1784         int32_t tag_tbl_size = 0;
1785         uint32_t desc_iblock = 0;
1786         uint32_t data_iblock = 0;
1787         char *tag_start = NULL, *tag_ptr = NULL;
1788         struct jbd_buf *jbd_buf, *tmp;
1789         struct ext4_block desc_block, data_block;
1790         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1791         uint32_t checksum = EXT4_CRC32_INIT;
1792
1793         /* Try to remove any non-dirty buffers from the tail of
1794          * buf_queue. */
1795         TAILQ_FOREACH_REVERSE_SAFE(jbd_buf, &trans->buf_queue,
1796                         jbd_trans_buf, buf_node, tmp) {
1797                 /* We stop the iteration when we find a dirty buffer. */
1798                 if (ext4_bcache_test_flag(jbd_buf->block.buf,
1799                                         BC_DIRTY))
1800                         break;
1801         
1802                 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1803                         jbd_buf,
1804                         dirty_buf_node);
1805
1806                 jbd_buf->block.buf->end_write = NULL;
1807                 jbd_buf->block.buf->end_write_arg = NULL;
1808                 jbd_trans_finish_callback(journal,
1809                                 trans,
1810                                 jbd_buf->block_rec,
1811                                 true);
1812
1813                 /* The buffer has not been modified, just release
1814                  * that jbd_buf. */
1815                 jbd_trans_remove_block_rec(journal,
1816                                 jbd_buf->block_rec, trans);
1817                 trans->data_cnt--;
1818
1819                 ext4_block_set(fs->bdev, &jbd_buf->block);
1820                 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1821                 free(jbd_buf);
1822         }
1823
1824         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {
1825                 struct tag_info tag_info;
1826                 bool uuid_exist = false;
1827                 if (!ext4_bcache_test_flag(jbd_buf->block.buf,
1828                                            BC_DIRTY)) {
1829                         TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1830                                         jbd_buf,
1831                                         dirty_buf_node);
1832
1833                         jbd_buf->block.buf->end_write = NULL;
1834                         jbd_buf->block.buf->end_write_arg = NULL;
1835                         jbd_trans_finish_callback(journal,
1836                                         trans,
1837                                         jbd_buf->block_rec,
1838                                         true);
1839
1840                         /* The buffer has not been modified, just release
1841                          * that jbd_buf. */
1842                         jbd_trans_remove_block_rec(journal,
1843                                         jbd_buf->block_rec, trans);
1844                         trans->data_cnt--;
1845
1846                         ext4_block_set(fs->bdev, &jbd_buf->block);
1847                         TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1848                         free(jbd_buf);
1849                         continue;
1850                 }
1851                 checksum = jbd_block_csum(journal->jbd_fs,
1852                                           jbd_buf->block.data,
1853                                           checksum,
1854                                           trans->trans_id);
1855 again:
1856                 if (!desc_iblock) {
1857                         struct jbd_bhdr *bhdr;
1858                         desc_iblock = jbd_journal_alloc_block(journal, trans);
1859                         rc = jbd_block_get_noread(journal->jbd_fs,
1860                                            &desc_block, desc_iblock);
1861                         if (rc != EOK)
1862                                 break;
1863
1864                         ext4_bcache_set_dirty(desc_block.buf);
1865
1866                         bhdr = (struct jbd_bhdr *)desc_block.data;
1867                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1868                         jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
1869                         jbd_set32(bhdr, sequence, trans->trans_id);
1870
1871                         tag_start = (char *)(bhdr + 1);
1872                         tag_ptr = tag_start;
1873                         uuid_exist = true;
1874                         tag_tbl_size = journal->block_size -
1875                                 sizeof(struct jbd_bhdr);
1876
1877                         if (jbd_has_csum(&journal->jbd_fs->sb))
1878                                 tag_tbl_size -= sizeof(struct jbd_block_tail);
1879
1880                         if (!trans->start_iblock)
1881                                 trans->start_iblock = desc_iblock;
1882
1883                 }
1884                 tag_info.block = jbd_buf->block.lb_id;
1885                 tag_info.uuid_exist = uuid_exist;
1886                 if (i == trans->data_cnt - 1)
1887                         tag_info.last_tag = true;
1888                 else
1889                         tag_info.last_tag = false;
1890
1891                 tag_info.checksum = checksum;
1892
1893                 if (uuid_exist)
1894                         memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
1895                                         UUID_SIZE);
1896
1897                 rc = jbd_write_block_tag(journal->jbd_fs,
1898                                 tag_ptr,
1899                                 tag_tbl_size,
1900                                 &tag_info);
1901                 if (rc != EOK) {
1902                         jbd_meta_csum_set(journal->jbd_fs,
1903                                         (struct jbd_bhdr *)desc_block.data);
1904                         jbd_block_set(journal->jbd_fs, &desc_block);
1905                         desc_iblock = 0;
1906                         goto again;
1907                 }
1908
1909                 data_iblock = jbd_journal_alloc_block(journal, trans);
1910                 rc = jbd_block_get_noread(journal->jbd_fs,
1911                                 &data_block, data_iblock);
1912                 if (rc != EOK)
1913                         break;
1914
1915                 ext4_bcache_set_dirty(data_block.buf);
1916
1917                 memcpy(data_block.data, jbd_buf->block.data,
1918                         journal->block_size);
1919                 jbd_buf->jbd_lba = data_block.lb_id;
1920
1921                 rc = jbd_block_set(journal->jbd_fs, &data_block);
1922                 if (rc != EOK)
1923                         break;
1924
1925                 tag_ptr += tag_info.tag_bytes;
1926                 tag_tbl_size -= tag_info.tag_bytes;
1927
1928                 i++;
1929         }
1930         if (rc == EOK && desc_iblock) {
1931                 jbd_meta_csum_set(journal->jbd_fs,
1932                                 (struct jbd_bhdr *)desc_block.data);
1933                 trans->data_csum = checksum;
1934                 jbd_block_set(journal->jbd_fs, &desc_block);
1935         }
1936
1937         return rc;
1938 }
1939
1940 /**@brief  Write revoke block for a transaction
1941  * @param  journal current journal session
1942  * @param  trans transaction
1943  * @return standard error code*/
1944 static int
1945 jbd_journal_prepare_revoke(struct jbd_journal *journal,
1946                            struct jbd_trans *trans)
1947 {
1948         int rc = EOK, i = 0;
1949         int32_t tag_tbl_size = 0;
1950         uint32_t desc_iblock = 0;
1951         char *blocks_entry = NULL;
1952         struct jbd_revoke_rec *rec, *tmp;
1953         struct ext4_block desc_block;
1954         struct jbd_revoke_header *header = NULL;
1955         int32_t record_len = 4;
1956
1957         if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1958                                      JBD_FEATURE_INCOMPAT_64BIT))
1959                 record_len = 8;
1960
1961         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1962                           tmp) {
1963 again:
1964                 if (!desc_iblock) {
1965                         struct jbd_bhdr *bhdr;
1966                         desc_iblock = jbd_journal_alloc_block(journal, trans);
1967                         rc = jbd_block_get_noread(journal->jbd_fs,
1968                                            &desc_block, desc_iblock);
1969                         if (rc != EOK) {
1970                                 break;
1971                         }
1972
1973                         ext4_bcache_set_dirty(desc_block.buf);
1974
1975                         bhdr = (struct jbd_bhdr *)desc_block.data;
1976                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1977                         jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
1978                         jbd_set32(bhdr, sequence, trans->trans_id);
1979                         
1980                         header = (struct jbd_revoke_header *)bhdr;
1981                         blocks_entry = (char *)(header + 1);
1982                         tag_tbl_size = journal->block_size -
1983                                 sizeof(struct jbd_revoke_header);
1984
1985                         if (jbd_has_csum(&journal->jbd_fs->sb))
1986                                 tag_tbl_size -= sizeof(struct jbd_block_tail);
1987
1988                         if (!trans->start_iblock)
1989                                 trans->start_iblock = desc_iblock;
1990
1991                 }
1992
1993                 if (tag_tbl_size < record_len) {
1994                         jbd_set32(header, count,
1995                                   journal->block_size - tag_tbl_size);
1996                         jbd_meta_csum_set(journal->jbd_fs,
1997                                         (struct jbd_bhdr *)desc_block.data);
1998                         jbd_block_set(journal->jbd_fs, &desc_block);
1999                         desc_iblock = 0;
2000                         header = NULL;
2001                         goto again;
2002                 }
2003                 if (record_len == 8) {
2004                         uint64_t *blocks =
2005                                 (uint64_t *)blocks_entry;
2006                         *blocks = to_be64(rec->lba);
2007                 } else {
2008                         uint32_t *blocks =
2009                                 (uint32_t *)blocks_entry;
2010                         *blocks = to_be32((uint32_t)rec->lba);
2011                 }
2012                 blocks_entry += record_len;
2013                 tag_tbl_size -= record_len;
2014
2015                 i++;
2016         }
2017         if (rc == EOK && desc_iblock) {
2018                 if (header != NULL)
2019                         jbd_set32(header, count,
2020                                   journal->block_size - tag_tbl_size);
2021
2022                 jbd_meta_csum_set(journal->jbd_fs,
2023                                 (struct jbd_bhdr *)desc_block.data);
2024                 jbd_block_set(journal->jbd_fs, &desc_block);
2025         }
2026
2027         return rc;
2028 }
2029
2030 /**@brief  Put references of block descriptors in a transaction.
2031  * @param  journal current journal session
2032  * @param  trans transaction*/
2033 void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
2034 {
2035         struct jbd_buf *jbd_buf, *tmp;
2036         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
2037         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
2038                         tmp) {
2039                 struct ext4_block block = jbd_buf->block;
2040                 ext4_block_set(fs->bdev, &block);
2041         }
2042 }
2043
2044 /**@brief  Update the start block of the journal when
2045  *         all the contents in a transaction reach the disk.*/
2046 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
2047                           struct ext4_buf *buf,
2048                           int res,
2049                           void *arg)
2050 {
2051         struct jbd_buf *jbd_buf = arg;
2052         struct jbd_trans *trans = jbd_buf->trans;
2053         struct jbd_block_rec *block_rec = jbd_buf->block_rec;
2054         struct jbd_journal *journal = trans->journal;
2055         bool first_in_queue =
2056                 trans == TAILQ_FIRST(&journal->cp_queue);
2057         if (res != EOK)
2058                 trans->error = res;
2059
2060         TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
2061         TAILQ_REMOVE(&block_rec->dirty_buf_queue,
2062                         jbd_buf,
2063                         dirty_buf_node);
2064
2065         jbd_trans_finish_callback(journal,
2066                         trans,
2067                         jbd_buf->block_rec,
2068                         false);
2069         if (block_rec->trans == trans) {
2070                 block_rec->buf = NULL;
2071                 /* Clear the end_write and end_write_arg fields. */
2072                 buf->end_write = NULL;
2073                 buf->end_write_arg = NULL;
2074         }
2075
2076         free(jbd_buf);
2077
2078         trans->written_cnt++;
2079         if (trans->written_cnt == trans->data_cnt) {
2080                 /* If it is the first transaction on checkpoint queue,
2081                  * we will shift the start of the journal to the next
2082                  * transaction, and remove subsequent written
2083                  * transactions from checkpoint queue until we find
2084                  * an unwritten one. */
2085                 if (first_in_queue) {
2086                         journal->start = trans->start_iblock +
2087                                 trans->alloc_blocks;
2088                         wrap(&journal->jbd_fs->sb, journal->start);
2089                         journal->trans_id = trans->trans_id + 1;
2090                         TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
2091                         jbd_journal_free_trans(journal, trans, false);
2092
2093                         jbd_journal_purge_cp_trans(journal, false, true);
2094                         jbd_journal_write_sb(journal);
2095                         jbd_write_sb(journal->jbd_fs);
2096                 }
2097         }
2098 }
2099
2100 /**@brief  Commit a transaction to the journal immediately.
2101  * @param  journal current journal session
2102  * @param  trans transaction
2103  * @return standard error code*/
2104 int jbd_journal_commit_trans(struct jbd_journal *journal,
2105                              struct jbd_trans *trans)
2106 {
2107         int rc = EOK;
2108         uint32_t last = journal->last;
2109
2110         trans->trans_id = journal->alloc_trans_id;
2111         rc = jbd_journal_prepare(journal, trans);
2112         if (rc != EOK)
2113                 goto Finish;
2114
2115         rc = jbd_journal_prepare_revoke(journal, trans);
2116         if (rc != EOK)
2117                 goto Finish;
2118
2119         if (TAILQ_EMPTY(&trans->buf_queue) &&
2120             LIST_EMPTY(&trans->revoke_list)) {
2121                 /* Since there are no entries in both buffer list
2122                  * and revoke entry list, we do not consider trans as
2123                  * complete transaction and just return EOK.*/
2124                 jbd_journal_free_trans(journal, trans, false);
2125                 goto Finish;
2126         }
2127
2128         rc = jbd_trans_write_commit_block(trans);
2129         if (rc != EOK)
2130                 goto Finish;
2131
2132         journal->alloc_trans_id++;
2133         if (TAILQ_EMPTY(&journal->cp_queue)) {
2134                 if (trans->data_cnt) {
2135                         journal->start = trans->start_iblock;
2136                         wrap(&journal->jbd_fs->sb, journal->start);
2137                         journal->trans_id = trans->trans_id;
2138                         jbd_journal_write_sb(journal);
2139                         jbd_write_sb(journal->jbd_fs);
2140                         TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
2141                                         trans_node);
2142                         jbd_journal_cp_trans(journal, trans);
2143                 } else {
2144                         journal->start = trans->start_iblock +
2145                                 trans->alloc_blocks;
2146                         wrap(&journal->jbd_fs->sb, journal->start);
2147                         journal->trans_id = trans->trans_id + 1;
2148                         jbd_journal_write_sb(journal);
2149                         jbd_journal_free_trans(journal, trans, false);
2150                 }
2151         } else {
2152                 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
2153                                 trans_node);
2154                 if (trans->data_cnt)
2155                         jbd_journal_cp_trans(journal, trans);
2156
2157         }
2158 Finish:
2159         if (rc != EOK) {
2160                 journal->last = last;
2161                 jbd_journal_free_trans(journal, trans, true);
2162         }
2163         return rc;
2164 }
2165
2166 /**
2167  * @}
2168  */