ext4_journal: add once parameter to loops.
[lwext4.git] / src / ext4_journal.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * - Redistributions of source code must retain the above copyright
11  *   notice, this list of conditions and the following disclaimer.
12  * - Redistributions in binary form must reproduce the above copyright
13  *   notice, this list of conditions and the following disclaimer in the
14  *   documentation and/or other materials provided with the distribution.
15  * - The name of the author may not be used to endorse or promote products
16  *   derived from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 /** @addtogroup lwext4
31  * @{
32  */
33 /**
34  * @file  ext4_journal.c
35  * @brief Journal handle functions
36  */
37
38 #include "ext4_config.h"
39 #include "ext4_types.h"
40 #include "ext4_misc.h"
41 #include "ext4_errno.h"
42 #include "ext4_debug.h"
43
44 #include "ext4_fs.h"
45 #include "ext4_super.h"
46 #include "ext4_journal.h"
47 #include "ext4_blockdev.h"
48 #include "ext4_crc32.h"
49 #include "ext4_journal.h"
50
51 #include <string.h>
52 #include <stdlib.h>
53
54 /**@brief  Revoke entry during journal replay.*/
55 struct revoke_entry {
56         /**@brief  Block number not to be replayed.*/
57         ext4_fsblk_t block;
58
59         /**@brief  For any transaction id smaller
60          *         than trans_id, records of @block
61          *         in those transactions should not
62          *         be replayed.*/
63         uint32_t trans_id;
64
65         /**@brief  Revoke tree node.*/
66         RB_ENTRY(revoke_entry) revoke_node;
67 };
68
69 /**@brief  Valid journal replay information.*/
70 struct recover_info {
71         /**@brief  Starting transaction id.*/
72         uint32_t start_trans_id;
73
74         /**@brief  Ending transaction id.*/
75         uint32_t last_trans_id;
76
77         /**@brief  Used as internal argument.*/
78         uint32_t this_trans_id;
79
80         /**@brief  No of transactions went through.*/
81         uint32_t trans_cnt;
82
83         /**@brief  RB-Tree storing revoke entries.*/
84         RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
85 };
86
87 /**@brief  Journal replay internal arguments.*/
88 struct replay_arg {
89         /**@brief  Journal replay information.*/
90         struct recover_info *info;
91
92         /**@brief  Current block we are on.*/
93         uint32_t *this_block;
94
95         /**@brief  Current trans_id we are on.*/
96         uint32_t this_trans_id;
97 };
98
99 static int
100 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
101 {
102         if (a->block > b->block)
103                 return 1;
104         else if (a->block < b->block)
105                 return -1;
106         return 0;
107 }
108
109 static int
110 jbd_block_rec_cmp(struct jbd_block_rec *a, struct jbd_block_rec *b)
111 {
112         if (a->lba > b->lba)
113                 return 1;
114         else if (a->lba < b->lba)
115                 return -1;
116         return 0;
117 }
118
119 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
120                      jbd_revoke_entry_cmp, static inline)
121 RB_GENERATE_INTERNAL(jbd_block, jbd_block_rec, block_rec_node,
122                      jbd_block_rec_cmp, static inline)
123
124 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
125 #define jbd_free_revoke_entry(addr) free(addr)
126
127 static int jbd_has_csum(struct jbd_sb *jbd_sb)
128 {
129         if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V2))
130                 return 2;
131
132         if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V3))
133                 return 3;
134
135         return 0;
136 }
137
138 #if CONFIG_META_CSUM_ENABLE
139 static uint32_t jbd_sb_csum(struct jbd_sb *jbd_sb)
140 {
141         uint32_t checksum = 0;
142
143         if (jbd_has_csum(jbd_sb)) {
144                 uint32_t orig_checksum = jbd_sb->checksum;
145                 jbd_set32(jbd_sb, checksum, 0);
146                 /* Calculate crc32c checksum against tho whole superblock */
147                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_sb,
148                                 JBD_SUPERBLOCK_SIZE);
149                 jbd_sb->checksum = orig_checksum;
150         }
151         return checksum;
152 }
153 #else
154 #define jbd_sb_csum(...) 0
155 #endif
156
157 static void jbd_sb_csum_set(struct jbd_sb *jbd_sb)
158 {
159         if (!jbd_has_csum(jbd_sb))
160                 return;
161
162         jbd_set32(jbd_sb, checksum, jbd_sb_csum(jbd_sb));
163 }
164
165 #if CONFIG_META_CSUM_ENABLE
166 static bool
167 jbd_verify_sb_csum(struct jbd_sb *jbd_sb)
168 {
169         if (!jbd_has_csum(jbd_sb))
170                 return true;
171
172         return jbd_sb_csum(jbd_sb) == jbd_get32(jbd_sb, checksum);
173 }
174 #else
175 #define jbd_verify_sb_csum(...) true
176 #endif
177
178 #if CONFIG_META_CSUM_ENABLE
179 static uint32_t jbd_meta_csum(struct jbd_fs *jbd_fs,
180                               struct jbd_bhdr *bhdr)
181 {
182         uint32_t checksum = 0;
183
184         if (jbd_has_csum(&jbd_fs->sb)) {
185                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
186                 struct jbd_block_tail *tail =
187                         (struct jbd_block_tail *)((char *)bhdr + block_size -
188                                 sizeof(struct jbd_block_tail));
189                 uint32_t orig_checksum = tail->checksum;
190                 tail->checksum = 0;
191
192                 /* First calculate crc32c checksum against fs uuid */
193                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
194                                        sizeof(jbd_fs->sb.uuid));
195                 /* Calculate crc32c checksum against tho whole block */
196                 checksum = ext4_crc32c(checksum, bhdr,
197                                 block_size);
198                 tail->checksum = orig_checksum;
199         }
200         return checksum;
201 }
202 #else
203 #define jbd_meta_csum(...) 0
204 #endif
205
206 static void jbd_meta_csum_set(struct jbd_fs *jbd_fs,
207                               struct jbd_bhdr *bhdr)
208 {
209         uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
210         struct jbd_block_tail *tail = (struct jbd_block_tail *)
211                                 ((char *)bhdr + block_size -
212                                 sizeof(struct jbd_block_tail));
213         if (!jbd_has_csum(&jbd_fs->sb))
214                 return;
215
216         tail->checksum = to_be32(jbd_meta_csum(jbd_fs, bhdr));
217 }
218
219 #if CONFIG_META_CSUM_ENABLE
220 static bool
221 jbd_verify_meta_csum(struct jbd_fs *jbd_fs,
222                      struct jbd_bhdr *bhdr)
223 {
224         uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
225         struct jbd_block_tail *tail = (struct jbd_block_tail *)
226                                 ((char *)bhdr + block_size -
227                                 sizeof(struct jbd_block_tail));
228         if (!jbd_has_csum(&jbd_fs->sb))
229                 return true;
230
231         return jbd_meta_csum(jbd_fs, bhdr) == to_be32(tail->checksum);
232 }
233 #else
234 #define jbd_verify_meta_csum(...) true
235 #endif
236
237 #if CONFIG_META_CSUM_ENABLE
238 static uint32_t jbd_commit_csum(struct jbd_fs *jbd_fs,
239                               struct jbd_commit_header *header)
240 {
241         uint32_t checksum = 0;
242
243         if (jbd_has_csum(&jbd_fs->sb)) {
244                 uint32_t orig_checksum_type = header->chksum_type,
245                          orig_checksum_size = header->chksum_size,
246                          orig_checksum = header->chksum[0];
247                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
248                 header->chksum_type = 0;
249                 header->chksum_size = 0;
250                 header->chksum[0] = 0;
251
252                 /* First calculate crc32c checksum against fs uuid */
253                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
254                                        sizeof(jbd_fs->sb.uuid));
255                 /* Calculate crc32c checksum against tho whole block */
256                 checksum = ext4_crc32c(checksum, header,
257                                 block_size);
258
259                 header->chksum_type = orig_checksum_type;
260                 header->chksum_size = orig_checksum_size;
261                 header->chksum[0] = orig_checksum;
262         }
263         return checksum;
264 }
265 #else
266 #define jbd_commit_csum(...) 0
267 #endif
268
269 static void jbd_commit_csum_set(struct jbd_fs *jbd_fs,
270                               struct jbd_commit_header *header)
271 {
272         if (!jbd_has_csum(&jbd_fs->sb))
273                 return;
274
275         header->chksum_type = 0;
276         header->chksum_size = 0;
277         header->chksum[0] = jbd_commit_csum(jbd_fs, header);
278 }
279
280 #if CONFIG_META_CSUM_ENABLE
281 static bool jbd_verify_commit_csum(struct jbd_fs *jbd_fs,
282                                    struct jbd_commit_header *header)
283 {
284         if (!jbd_has_csum(&jbd_fs->sb))
285                 return true;
286
287         return header->chksum[0] == to_be32(jbd_commit_csum(jbd_fs,
288                                             header));
289 }
290 #else
291 #define jbd_verify_commit_csum(...) true
292 #endif
293
294 #if CONFIG_META_CSUM_ENABLE
295 /*
296  * NOTE: We only make use of @csum parameter when
297  *       JBD_FEATURE_COMPAT_CHECKSUM is enabled.
298  */
299 static uint32_t jbd_block_csum(struct jbd_fs *jbd_fs, const void *buf,
300                                uint32_t csum,
301                                uint32_t sequence)
302 {
303         uint32_t checksum = 0;
304
305         if (jbd_has_csum(&jbd_fs->sb)) {
306                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
307                 /* First calculate crc32c checksum against fs uuid */
308                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
309                                        sizeof(jbd_fs->sb.uuid));
310                 /* Then calculate crc32c checksum against sequence no. */
311                 checksum = ext4_crc32c(checksum, &sequence,
312                                 sizeof(uint32_t));
313                 /* Calculate crc32c checksum against tho whole block */
314                 checksum = ext4_crc32c(checksum, buf,
315                                 block_size);
316         } else if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
317                                      JBD_FEATURE_COMPAT_CHECKSUM)) {
318                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
319                 /* Calculate crc32c checksum against tho whole block */
320                 checksum = ext4_crc32(csum, buf,
321                                 block_size);
322         }
323         return checksum;
324 }
325 #else
326 #define jbd_block_csum(...) 0
327 #endif
328
329 static void jbd_block_tag_csum_set(struct jbd_fs *jbd_fs, void *__tag,
330                                    uint32_t checksum)
331 {
332         int ver = jbd_has_csum(&jbd_fs->sb);
333         if (!ver)
334                 return;
335
336         if (ver == 2) {
337                 struct jbd_block_tag *tag = __tag;
338                 tag->checksum = (uint16_t)to_be32(checksum);
339         } else {
340                 struct jbd_block_tag3 *tag = __tag;
341                 tag->checksum = to_be32(checksum);
342         }
343 }
344
345 /**@brief  Write jbd superblock to disk.
346  * @param  jbd_fs jbd filesystem
347  * @param  s jbd superblock
348  * @return standard error code*/
349 static int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
350 {
351         int rc;
352         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
353         uint64_t offset;
354         ext4_fsblk_t fblock;
355         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
356         if (rc != EOK)
357                 return rc;
358
359         jbd_sb_csum_set(s);
360         offset = fblock * ext4_sb_get_block_size(&fs->sb);
361         return ext4_block_writebytes(fs->bdev, offset, s,
362                                      EXT4_SUPERBLOCK_SIZE);
363 }
364
365 /**@brief  Read jbd superblock from disk.
366  * @param  jbd_fs jbd filesystem
367  * @param  s jbd superblock
368  * @return standard error code*/
369 static int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
370 {
371         int rc;
372         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
373         uint64_t offset;
374         ext4_fsblk_t fblock;
375         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
376         if (rc != EOK)
377                 return rc;
378
379         offset = fblock * ext4_sb_get_block_size(&fs->sb);
380         return ext4_block_readbytes(fs->bdev, offset, s,
381                                     EXT4_SUPERBLOCK_SIZE);
382 }
383
384 /**@brief  Verify jbd superblock.
385  * @param  sb jbd superblock
386  * @return true if jbd superblock is valid */
387 static bool jbd_verify_sb(struct jbd_sb *sb)
388 {
389         struct jbd_bhdr *header = &sb->header;
390         if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
391                 return false;
392
393         if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
394             jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
395                 return false;
396
397         return jbd_verify_sb_csum(sb);
398 }
399
400 /**@brief  Write back dirty jbd superblock to disk.
401  * @param  jbd_fs jbd filesystem
402  * @return standard error code*/
403 static int jbd_write_sb(struct jbd_fs *jbd_fs)
404 {
405         int rc = EOK;
406         if (jbd_fs->dirty) {
407                 rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
408                 if (rc != EOK)
409                         return rc;
410
411                 jbd_fs->dirty = false;
412         }
413         return rc;
414 }
415
416 /**@brief  Get reference to jbd filesystem.
417  * @param  fs Filesystem to load journal of
418  * @param  jbd_fs jbd filesystem
419  * @return standard error code*/
420 int jbd_get_fs(struct ext4_fs *fs,
421                struct jbd_fs *jbd_fs)
422 {
423         int rc;
424         uint32_t journal_ino;
425
426         memset(jbd_fs, 0, sizeof(struct jbd_fs));
427         /* See if there is journal inode on this filesystem.*/
428         /* FIXME: detection on existance ofbkejournal bdev is
429          *        missing.*/
430         journal_ino = ext4_get32(&fs->sb, journal_inode_number);
431
432         rc = ext4_fs_get_inode_ref(fs,
433                                    journal_ino,
434                                    &jbd_fs->inode_ref);
435         if (rc != EOK) {
436                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
437                 return rc;
438         }
439         rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
440         if (rc != EOK) {
441                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
442                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
443                 return rc;
444         }
445         if (!jbd_verify_sb(&jbd_fs->sb)) {
446                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
447                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
448                 rc = EIO;
449         }
450
451         return rc;
452 }
453
454 /**@brief  Put reference of jbd filesystem.
455  * @param  jbd_fs jbd filesystem
456  * @return standard error code*/
457 int jbd_put_fs(struct jbd_fs *jbd_fs)
458 {
459         int rc = EOK;
460         rc = jbd_write_sb(jbd_fs);
461
462         ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
463         return rc;
464 }
465
466 /**@brief  Data block lookup helper.
467  * @param  jbd_fs jbd filesystem
468  * @param  iblock block index
469  * @param  fblock logical block address
470  * @return standard error code*/
471 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
472                    ext4_lblk_t iblock,
473                    ext4_fsblk_t *fblock)
474 {
475         int rc = ext4_fs_get_inode_dblk_idx(
476                         &jbd_fs->inode_ref,
477                         iblock,
478                         fblock,
479                         false);
480         return rc;
481 }
482
483 /**@brief   jbd block get function (through cache).
484  * @param   jbd_fs jbd filesystem
485  * @param   block block descriptor
486  * @param   fblock jbd logical block address
487  * @return  standard error code*/
488 static int jbd_block_get(struct jbd_fs *jbd_fs,
489                   struct ext4_block *block,
490                   ext4_fsblk_t fblock)
491 {
492         /* TODO: journal device. */
493         int rc;
494         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
495
496         /* Lookup the logical block address of
497          * fblock.*/
498         rc = jbd_inode_bmap(jbd_fs, iblock,
499                             &fblock);
500         if (rc != EOK)
501                 return rc;
502
503         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
504         rc = ext4_block_get(bdev, block, fblock);
505
506         /* If succeeded, mark buffer as BC_FLUSH to indicate
507          * that data should be written to disk immediately.*/
508         if (rc == EOK) {
509                 ext4_bcache_set_flag(block->buf, BC_FLUSH);
510                 /* As we don't want to occupy too much space
511                  * in block cache, we set this buffer BC_TMP.*/
512                 ext4_bcache_set_flag(block->buf, BC_TMP);
513         }
514
515         return rc;
516 }
517
518 /**@brief   jbd block get function (through cache, don't read).
519  * @param   jbd_fs jbd filesystem
520  * @param   block block descriptor
521  * @param   fblock jbd logical block address
522  * @return  standard error code*/
523 static int jbd_block_get_noread(struct jbd_fs *jbd_fs,
524                          struct ext4_block *block,
525                          ext4_fsblk_t fblock)
526 {
527         /* TODO: journal device. */
528         int rc;
529         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
530         rc = jbd_inode_bmap(jbd_fs, iblock,
531                             &fblock);
532         if (rc != EOK)
533                 return rc;
534
535         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
536         rc = ext4_block_get_noread(bdev, block, fblock);
537         if (rc == EOK)
538                 ext4_bcache_set_flag(block->buf, BC_FLUSH);
539
540         return rc;
541 }
542
543 /**@brief   jbd block set procedure (through cache).
544  * @param   jbd_fs jbd filesystem
545  * @param   block block descriptor
546  * @return  standard error code*/
547 static int jbd_block_set(struct jbd_fs *jbd_fs,
548                   struct ext4_block *block)
549 {
550         return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
551                               block);
552 }
553
554 /**@brief  helper functions to calculate
555  *         block tag size, not including UUID part.
556  * @param  jbd_fs jbd filesystem
557  * @return tag size in bytes*/
558 static int jbd_tag_bytes(struct jbd_fs *jbd_fs)
559 {
560         int size;
561
562         /* It is very easy to deal with the case which
563          * JBD_FEATURE_INCOMPAT_CSUM_V3 is enabled.*/
564         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
565                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
566                 return sizeof(struct jbd_block_tag3);
567
568         size = sizeof(struct jbd_block_tag);
569
570         /* If JBD_FEATURE_INCOMPAT_CSUM_V2 is enabled,
571          * add 2 bytes to size.*/
572         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
573                                      JBD_FEATURE_INCOMPAT_CSUM_V2))
574                 size += sizeof(uint16_t);
575
576         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
577                                      JBD_FEATURE_INCOMPAT_64BIT))
578                 return size;
579
580         /* If block number is 4 bytes in size,
581          * minus 4 bytes from size */
582         return size - sizeof(uint32_t);
583 }
584
585 /**@brief  Tag information. */
586 struct tag_info {
587         /**@brief  Tag size in bytes, including UUID part.*/
588         int tag_bytes;
589
590         /**@brief  block number stored in this tag.*/
591         ext4_fsblk_t block;
592
593         /**@brief  whether UUID part exists or not.*/
594         bool uuid_exist;
595
596         /**@brief  UUID content if UUID part exists.*/
597         uint8_t uuid[UUID_SIZE];
598
599         /**@brief  Is this the last tag? */
600         bool last_tag;
601
602         /**@brief  crc32c checksum. */
603         uint32_t checksum;
604 };
605
606 /**@brief  Extract information from a block tag.
607  * @param  __tag pointer to the block tag
608  * @param  tag_bytes block tag size of this jbd filesystem
609  * @param  remaining size in buffer containing the block tag
610  * @param  tag_info information of this tag.
611  * @return  EOK when succeed, otherwise return EINVAL.*/
612 static int
613 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
614                       void *__tag,
615                       int tag_bytes,
616                       int32_t remain_buf_size,
617                       struct tag_info *tag_info)
618 {
619         char *uuid_start;
620         tag_info->tag_bytes = tag_bytes;
621         tag_info->uuid_exist = false;
622         tag_info->last_tag = false;
623
624         /* See whether it is possible to hold a valid block tag.*/
625         if (remain_buf_size - tag_bytes < 0)
626                 return EINVAL;
627
628         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
629                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
630                 struct jbd_block_tag3 *tag = __tag;
631                 tag_info->block = jbd_get32(tag, blocknr);
632                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
633                                              JBD_FEATURE_INCOMPAT_64BIT))
634                          tag_info->block |=
635                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
636
637                 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
638                         tag_info->block = 0;
639
640                 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
641                         /* See whether it is possible to hold UUID part.*/
642                         if (remain_buf_size - tag_bytes < UUID_SIZE)
643                                 return EINVAL;
644
645                         uuid_start = (char *)tag + tag_bytes;
646                         tag_info->uuid_exist = true;
647                         tag_info->tag_bytes += UUID_SIZE;
648                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
649                 }
650
651                 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
652                         tag_info->last_tag = true;
653
654         } else {
655                 struct jbd_block_tag *tag = __tag;
656                 tag_info->block = jbd_get32(tag, blocknr);
657                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
658                                              JBD_FEATURE_INCOMPAT_64BIT))
659                          tag_info->block |=
660                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
661
662                 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
663                         tag_info->block = 0;
664
665                 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
666                         /* See whether it is possible to hold UUID part.*/
667                         if (remain_buf_size - tag_bytes < UUID_SIZE)
668                                 return EINVAL;
669
670                         uuid_start = (char *)tag + tag_bytes;
671                         tag_info->uuid_exist = true;
672                         tag_info->tag_bytes += UUID_SIZE;
673                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
674                 }
675
676                 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
677                         tag_info->last_tag = true;
678
679         }
680         return EOK;
681 }
682
683 /**@brief  Write information to a block tag.
684  * @param  __tag pointer to the block tag
685  * @param  remaining size in buffer containing the block tag
686  * @param  tag_info information of this tag.
687  * @return  EOK when succeed, otherwise return EINVAL.*/
688 static int
689 jbd_write_block_tag(struct jbd_fs *jbd_fs,
690                     void *__tag,
691                     int32_t remain_buf_size,
692                     struct tag_info *tag_info)
693 {
694         char *uuid_start;
695         int tag_bytes = jbd_tag_bytes(jbd_fs);
696
697         tag_info->tag_bytes = tag_bytes;
698
699         /* See whether it is possible to hold a valid block tag.*/
700         if (remain_buf_size - tag_bytes < 0)
701                 return EINVAL;
702
703         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
704                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
705                 struct jbd_block_tag3 *tag = __tag;
706                 memset(tag, 0, sizeof(struct jbd_block_tag3));
707                 jbd_set32(tag, blocknr, (uint32_t)tag_info->block);
708                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
709                                              JBD_FEATURE_INCOMPAT_64BIT))
710                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
711
712                 if (tag_info->uuid_exist) {
713                         /* See whether it is possible to hold UUID part.*/
714                         if (remain_buf_size - tag_bytes < UUID_SIZE)
715                                 return EINVAL;
716
717                         uuid_start = (char *)tag + tag_bytes;
718                         tag_info->tag_bytes += UUID_SIZE;
719                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
720                 } else
721                         jbd_set32(tag, flags,
722                                   jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
723
724                 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
725
726                 if (tag_info->last_tag)
727                         jbd_set32(tag, flags,
728                                   jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
729
730         } else {
731                 struct jbd_block_tag *tag = __tag;
732                 memset(tag, 0, sizeof(struct jbd_block_tag));
733                 jbd_set32(tag, blocknr, (uint32_t)tag_info->block);
734                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
735                                              JBD_FEATURE_INCOMPAT_64BIT))
736                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
737
738                 if (tag_info->uuid_exist) {
739                         /* See whether it is possible to hold UUID part.*/
740                         if (remain_buf_size - tag_bytes < UUID_SIZE)
741                                 return EINVAL;
742
743                         uuid_start = (char *)tag + tag_bytes;
744                         tag_info->tag_bytes += UUID_SIZE;
745                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
746                 } else
747                         jbd_set16(tag, flags,
748                                   jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
749
750                 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
751
752                 if (tag_info->last_tag)
753                         jbd_set16(tag, flags,
754                                   jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
755
756         }
757         return EOK;
758 }
759
760 /**@brief  Iterate all block tags in a block.
761  * @param  jbd_fs jbd filesystem
762  * @param  __tag_start pointer to the block
763  * @param  tag_tbl_size size of the block
764  * @param  func callback routine to indicate that
765  *         a block tag is found
766  * @param  arg additional argument to be passed to func */
767 static void
768 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
769                         void *__tag_start,
770                         int32_t tag_tbl_size,
771                         void (*func)(struct jbd_fs * jbd_fs,
772                                         ext4_fsblk_t block,
773                                         uint8_t *uuid,
774                                         void *arg),
775                         void *arg)
776 {
777         char *tag_start, *tag_ptr;
778         int tag_bytes = jbd_tag_bytes(jbd_fs);
779         tag_start = __tag_start;
780         tag_ptr = tag_start;
781
782         /* Cut off the size of block tail storing checksum. */
783         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
784                                      JBD_FEATURE_INCOMPAT_CSUM_V2) ||
785             JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
786                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
787                 tag_tbl_size -= sizeof(struct jbd_block_tail);
788
789         while (tag_tbl_size) {
790                 struct tag_info tag_info;
791                 int rc = jbd_extract_block_tag(jbd_fs,
792                                       tag_ptr,
793                                       tag_bytes,
794                                       tag_tbl_size,
795                                       &tag_info);
796                 if (rc != EOK)
797                         break;
798
799                 if (func)
800                         func(jbd_fs, tag_info.block, tag_info.uuid, arg);
801
802                 /* Stop the iteration when we reach the last tag. */
803                 if (tag_info.last_tag)
804                         break;
805
806                 tag_ptr += tag_info.tag_bytes;
807                 tag_tbl_size -= tag_info.tag_bytes;
808         }
809 }
810
811 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
812                                    ext4_fsblk_t block,
813                                    uint8_t *uuid,
814                                    void *arg)
815 {
816         uint32_t *iblock = arg;
817         ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
818         (*iblock)++;
819         (void)jbd_fs;
820         (void)uuid;
821         return;
822 }
823
824 static struct revoke_entry *
825 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
826 {
827         struct revoke_entry tmp = {
828                 .block = block
829         };
830
831         return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
832 }
833
834 /**@brief  Replay a block in a transaction.
835  * @param  jbd_fs jbd filesystem
836  * @param  block  block address to be replayed.*/
837 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
838                                   ext4_fsblk_t block,
839                                   uint8_t *uuid __unused,
840                                   void *__arg)
841 {
842         int r;
843         struct replay_arg *arg = __arg;
844         struct recover_info *info = arg->info;
845         uint32_t *this_block = arg->this_block;
846         struct revoke_entry *revoke_entry;
847         struct ext4_block journal_block, ext4_block;
848         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
849
850         (*this_block)++;
851
852         /* We replay this block only if the current transaction id
853          * is equal or greater than that in revoke entry.*/
854         revoke_entry = jbd_revoke_entry_lookup(info, block);
855         if (revoke_entry &&
856             arg->this_trans_id < revoke_entry->trans_id)
857                 return;
858
859         ext4_dbg(DEBUG_JBD,
860                  "Replaying block in block_tag: %" PRIu64 "\n",
861                  block);
862
863         r = jbd_block_get(jbd_fs, &journal_block, *this_block);
864         if (r != EOK)
865                 return;
866
867         /* We need special treatment for ext4 superblock. */
868         if (block) {
869                 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
870                 if (r != EOK) {
871                         jbd_block_set(jbd_fs, &journal_block);
872                         return;
873                 }
874
875                 memcpy(ext4_block.data,
876                         journal_block.data,
877                         jbd_get32(&jbd_fs->sb, blocksize));
878
879                 ext4_bcache_set_dirty(ext4_block.buf);
880                 ext4_block_set(fs->bdev, &ext4_block);
881         } else {
882                 uint16_t mount_count, state;
883                 mount_count = ext4_get16(&fs->sb, mount_count);
884                 state = ext4_get16(&fs->sb, state);
885
886                 memcpy(&fs->sb,
887                         journal_block.data + EXT4_SUPERBLOCK_OFFSET,
888                         EXT4_SUPERBLOCK_SIZE);
889
890                 /* Mark system as mounted */
891                 ext4_set16(&fs->sb, state, state);
892                 r = ext4_sb_write(fs->bdev, &fs->sb);
893                 if (r != EOK)
894                         return;
895
896                 /*Update mount count*/
897                 ext4_set16(&fs->sb, mount_count, mount_count);
898         }
899
900         jbd_block_set(jbd_fs, &journal_block);
901         
902         return;
903 }
904
905 /**@brief  Add block address to revoke tree, along with
906  *         its transaction id.
907  * @param  info  journal replay info
908  * @param  block  block address to be replayed.*/
909 static void jbd_add_revoke_block_tags(struct recover_info *info,
910                                       ext4_fsblk_t block)
911 {
912         struct revoke_entry *revoke_entry;
913
914         ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
915         /* If the revoke entry with respect to the block address
916          * exists already, update its transaction id.*/
917         revoke_entry = jbd_revoke_entry_lookup(info, block);
918         if (revoke_entry) {
919                 revoke_entry->trans_id = info->this_trans_id;
920                 return;
921         }
922
923         revoke_entry = jbd_alloc_revoke_entry();
924         ext4_assert(revoke_entry);
925         revoke_entry->block = block;
926         revoke_entry->trans_id = info->this_trans_id;
927         RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
928
929         return;
930 }
931
932 static void jbd_destroy_revoke_tree(struct recover_info *info)
933 {
934         while (!RB_EMPTY(&info->revoke_root)) {
935                 struct revoke_entry *revoke_entry =
936                         RB_MIN(jbd_revoke, &info->revoke_root);
937                 ext4_assert(revoke_entry);
938                 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
939                 jbd_free_revoke_entry(revoke_entry);
940         }
941 }
942
943 /* Make sure we wrap around the log correctly! */
944 #define wrap(sb, var)                                           \
945 do {                                                                    \
946         if (var >= jbd_get32((sb), maxlen))                                     \
947                 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first));      \
948 } while (0)
949
950 #define ACTION_SCAN 0
951 #define ACTION_REVOKE 1
952 #define ACTION_RECOVER 2
953
954 /**@brief  Add entries in a revoke block to revoke tree.
955  * @param  jbd_fs jbd filesystem
956  * @param  header revoke block header
957  * @param  recover_info  journal replay info*/
958 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
959                                   struct jbd_bhdr *header,
960                                   struct recover_info *info)
961 {
962         char *blocks_entry;
963         struct jbd_revoke_header *revoke_hdr =
964                 (struct jbd_revoke_header *)header;
965         uint32_t i, nr_entries, record_len = 4;
966
967         /* If we are working on a 64bit jbd filesystem, */
968         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
969                                      JBD_FEATURE_INCOMPAT_64BIT))
970                 record_len = 8;
971
972         nr_entries = (jbd_get32(revoke_hdr, count) -
973                         sizeof(struct jbd_revoke_header)) /
974                         record_len;
975
976         blocks_entry = (char *)(revoke_hdr + 1);
977
978         for (i = 0;i < nr_entries;i++) {
979                 if (record_len == 8) {
980                         uint64_t *blocks =
981                                 (uint64_t *)blocks_entry;
982                         jbd_add_revoke_block_tags(info, to_be64(*blocks));
983                 } else {
984                         uint32_t *blocks =
985                                 (uint32_t *)blocks_entry;
986                         jbd_add_revoke_block_tags(info, to_be32(*blocks));
987                 }
988                 blocks_entry += record_len;
989         }
990 }
991
992 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
993                                        struct jbd_bhdr *header,
994                                        uint32_t *iblock)
995 {
996         jbd_iterate_block_table(jbd_fs,
997                                 header + 1,
998                                 jbd_get32(&jbd_fs->sb, blocksize) -
999                                         sizeof(struct jbd_bhdr),
1000                                 jbd_display_block_tags,
1001                                 iblock);
1002 }
1003
1004 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
1005                                         struct jbd_bhdr *header,
1006                                         struct replay_arg *arg)
1007 {
1008         jbd_iterate_block_table(jbd_fs,
1009                                 header + 1,
1010                                 jbd_get32(&jbd_fs->sb, blocksize) -
1011                                         sizeof(struct jbd_bhdr),
1012                                 jbd_replay_block_tags,
1013                                 arg);
1014 }
1015
1016 /**@brief  The core routine of journal replay.
1017  * @param  jbd_fs jbd filesystem
1018  * @param  recover_info  journal replay info
1019  * @param  action action needed to be taken
1020  * @return standard error code*/
1021 static int jbd_iterate_log(struct jbd_fs *jbd_fs,
1022                            struct recover_info *info,
1023                            int action)
1024 {
1025         int r = EOK;
1026         bool log_end = false;
1027         struct jbd_sb *sb = &jbd_fs->sb;
1028         uint32_t start_trans_id, this_trans_id;
1029         uint32_t start_block, this_block;
1030
1031         /* We start iterating valid blocks in the whole journal.*/
1032         start_trans_id = this_trans_id = jbd_get32(sb, sequence);
1033         start_block = this_block = jbd_get32(sb, start);
1034         if (action == ACTION_SCAN)
1035                 info->trans_cnt = 0;
1036         else if (!info->trans_cnt)
1037                 log_end = true;
1038
1039         ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
1040                             start_trans_id);
1041
1042         while (!log_end) {
1043                 struct ext4_block block;
1044                 struct jbd_bhdr *header;
1045                 /* If we are not scanning for the last
1046                  * valid transaction in the journal,
1047                  * we will stop when we reach the end of
1048                  * the journal.*/
1049                 if (action != ACTION_SCAN)
1050                         if (this_trans_id > info->last_trans_id) {
1051                                 log_end = true;
1052                                 continue;
1053                         }
1054
1055                 r = jbd_block_get(jbd_fs, &block, this_block);
1056                 if (r != EOK)
1057                         break;
1058
1059                 header = (struct jbd_bhdr *)block.data;
1060                 /* This block does not have a valid magic number,
1061                  * so we have reached the end of the journal.*/
1062                 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
1063                         jbd_block_set(jbd_fs, &block);
1064                         log_end = true;
1065                         continue;
1066                 }
1067
1068                 /* If the transaction id we found is not expected,
1069                  * we may have reached the end of the journal.
1070                  *
1071                  * If we are not scanning the journal, something
1072                  * bad might have taken place. :-( */
1073                 if (jbd_get32(header, sequence) != this_trans_id) {
1074                         if (action != ACTION_SCAN)
1075                                 r = EIO;
1076
1077                         jbd_block_set(jbd_fs, &block);
1078                         log_end = true;
1079                         continue;
1080                 }
1081
1082                 switch (jbd_get32(header, blocktype)) {
1083                 case JBD_DESCRIPTOR_BLOCK:
1084                         if (!jbd_verify_meta_csum(jbd_fs, header)) {
1085                                 ext4_dbg(DEBUG_JBD,
1086                                         DBG_WARN "Descriptor block checksum failed."
1087                                                 "Journal block: %" PRIu32"\n",
1088                                                 this_block);
1089                                 log_end = true;
1090                                 break;
1091                         }
1092                         ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
1093                                             "trans_id: %" PRIu32"\n",
1094                                             this_block, this_trans_id);
1095                         if (action == ACTION_RECOVER) {
1096                                 struct replay_arg replay_arg;
1097                                 replay_arg.info = info;
1098                                 replay_arg.this_block = &this_block;
1099                                 replay_arg.this_trans_id = this_trans_id;
1100
1101                                 jbd_replay_descriptor_block(jbd_fs,
1102                                                 header, &replay_arg);
1103                         } else
1104                                 jbd_debug_descriptor_block(jbd_fs,
1105                                                 header, &this_block);
1106
1107                         break;
1108                 case JBD_COMMIT_BLOCK:
1109                         if (!jbd_verify_commit_csum(jbd_fs,
1110                                         (struct jbd_commit_header *)header)) {
1111                                 ext4_dbg(DEBUG_JBD,
1112                                         DBG_WARN "Commit block checksum failed."
1113                                                 "Journal block: %" PRIu32"\n",
1114                                                 this_block);
1115                                 log_end = true;
1116                                 break;
1117                         }
1118                         ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
1119                                             "trans_id: %" PRIu32"\n",
1120                                             this_block, this_trans_id);
1121                         /* This is the end of a transaction,
1122                          * we may now proceed to the next transaction.
1123                          */
1124                         this_trans_id++;
1125                         info->trans_cnt++;
1126                         break;
1127                 case JBD_REVOKE_BLOCK:
1128                         if (!jbd_verify_meta_csum(jbd_fs, header)) {
1129                                 ext4_dbg(DEBUG_JBD,
1130                                         DBG_WARN "Revoke block checksum failed."
1131                                                 "Journal block: %" PRIu32"\n",
1132                                                 this_block);
1133                                 log_end = true;
1134                                 break;
1135                         }
1136                         ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
1137                                             "trans_id: %" PRIu32"\n",
1138                                             this_block, this_trans_id);
1139                         if (action == ACTION_REVOKE) {
1140                                 info->this_trans_id = this_trans_id;
1141                                 jbd_build_revoke_tree(jbd_fs,
1142                                                 header, info);
1143                         }
1144                         break;
1145                 default:
1146                         log_end = true;
1147                         break;
1148                 }
1149                 jbd_block_set(jbd_fs, &block);
1150                 this_block++;
1151                 wrap(sb, this_block);
1152                 if (this_block == start_block)
1153                         log_end = true;
1154
1155         }
1156         ext4_dbg(DEBUG_JBD, "End of journal.\n");
1157         if (r == EOK && action == ACTION_SCAN) {
1158                 /* We have finished scanning the journal. */
1159                 info->start_trans_id = start_trans_id;
1160                 if (this_trans_id > start_trans_id)
1161                         info->last_trans_id = this_trans_id - 1;
1162                 else
1163                         info->last_trans_id = this_trans_id;
1164         }
1165
1166         return r;
1167 }
1168
1169 /**@brief  Replay journal.
1170  * @param  jbd_fs jbd filesystem
1171  * @return standard error code*/
1172 int jbd_recover(struct jbd_fs *jbd_fs)
1173 {
1174         int r;
1175         struct recover_info info;
1176         struct jbd_sb *sb = &jbd_fs->sb;
1177         if (!sb->start)
1178                 return EOK;
1179
1180         RB_INIT(&info.revoke_root);
1181
1182         r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
1183         if (r != EOK)
1184                 return r;
1185
1186         r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
1187         if (r != EOK)
1188                 return r;
1189
1190         r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
1191         if (r == EOK) {
1192                 /* If we successfully replay the journal,
1193                  * clear EXT4_FINCOM_RECOVER flag on the
1194                  * ext4 superblock, and set the start of
1195                  * journal to 0.*/
1196                 uint32_t features_incompatible =
1197                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
1198                                    features_incompatible);
1199                 jbd_set32(&jbd_fs->sb, start, 0);
1200                 features_incompatible &= ~EXT4_FINCOM_RECOVER;
1201                 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1202                            features_incompatible,
1203                            features_incompatible);
1204                 jbd_fs->dirty = true;
1205                 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1206                                   &jbd_fs->inode_ref.fs->sb);
1207         }
1208         jbd_destroy_revoke_tree(&info);
1209         return r;
1210 }
1211
1212 static void jbd_journal_write_sb(struct jbd_journal *journal)
1213 {
1214         struct jbd_fs *jbd_fs = journal->jbd_fs;
1215         jbd_set32(&jbd_fs->sb, start, journal->start);
1216         jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
1217         jbd_fs->dirty = true;
1218 }
1219
1220 /**@brief  Start accessing the journal.
1221  * @param  jbd_fs jbd filesystem
1222  * @param  journal current journal session
1223  * @return standard error code*/
1224 int jbd_journal_start(struct jbd_fs *jbd_fs,
1225                       struct jbd_journal *journal)
1226 {
1227         int r;
1228         uint32_t features_incompatible =
1229                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
1230                                    features_incompatible);
1231         struct ext4_block block = EXT4_BLOCK_ZERO();
1232         features_incompatible |= EXT4_FINCOM_RECOVER;
1233         ext4_set32(&jbd_fs->inode_ref.fs->sb,
1234                         features_incompatible,
1235                         features_incompatible);
1236         r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1237                         &jbd_fs->inode_ref.fs->sb);
1238         if (r != EOK)
1239                 return r;
1240
1241         journal->first = jbd_get32(&jbd_fs->sb, first);
1242         journal->start = journal->first;
1243         journal->last = journal->first;
1244         journal->trans_id = 1;
1245         journal->alloc_trans_id = 1;
1246
1247         journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
1248
1249         r = jbd_block_get_noread(jbd_fs,
1250                          &block,
1251                          journal->start);
1252         if (r != EOK) {
1253                 memset(journal, 0, sizeof(struct jbd_journal));
1254                 return r;
1255         }
1256         memset(block.data, 0, journal->block_size);
1257         ext4_bcache_set_dirty(block.buf);
1258         r = jbd_block_set(jbd_fs, &block);
1259         if (r != EOK) {
1260                 memset(journal, 0, sizeof(struct jbd_journal));
1261                 return r;
1262         }
1263
1264         TAILQ_INIT(&journal->trans_queue);
1265         TAILQ_INIT(&journal->cp_queue);
1266         RB_INIT(&journal->block_rec_root);
1267         journal->jbd_fs = jbd_fs;
1268         jbd_journal_write_sb(journal);
1269         return jbd_write_sb(jbd_fs);
1270 }
1271
1272 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1273                           struct ext4_buf *buf __unused,
1274                           int res,
1275                           void *arg);
1276
1277 static void jbd_journal_flush_trans(struct jbd_trans *trans)
1278 {
1279         struct jbd_buf *jbd_buf, *tmp;
1280         struct jbd_journal *journal = trans->journal;
1281         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1282         void *tmp_data = malloc(journal->block_size);
1283         ext4_assert(tmp_data);
1284
1285         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1286                         tmp) {
1287                 struct ext4_buf *buf = jbd_buf->block_rec->buf;
1288                 /* The buffer in memory is still dirty. */
1289                 if (buf) {
1290                         if (jbd_buf->block_rec->trans != trans) {
1291                                 int r;
1292                                 struct ext4_block jbd_block = EXT4_BLOCK_ZERO();
1293                                 ext4_assert(ext4_block_get(fs->bdev,
1294                                                         &jbd_block,
1295                                                         jbd_buf->jbd_lba) == EOK);
1296                                 memcpy(tmp_data, jbd_block.data,
1297                                                 journal->block_size);
1298                                 ext4_block_set(fs->bdev, &jbd_block);
1299                                 r = ext4_blocks_set_direct(fs->bdev, tmp_data,
1300                                                 buf->lba, 1);
1301                                 jbd_trans_end_write(fs->bdev->bc, buf, r, jbd_buf);
1302                         } else
1303                                 ext4_block_flush_buf(fs->bdev, buf);
1304
1305                 }
1306         }
1307
1308         free(tmp_data);
1309 }
1310
1311 static void
1312 jbd_journal_skip_pure_revoke(struct jbd_journal *journal,
1313                              struct jbd_trans *trans)
1314 {
1315         journal->start = trans->start_iblock +
1316                 trans->alloc_blocks;
1317         wrap(&journal->jbd_fs->sb, journal->start);
1318         journal->trans_id = trans->trans_id + 1;
1319         jbd_journal_free_trans(journal,
1320                         trans, false);
1321         jbd_journal_write_sb(journal);
1322 }
1323
1324 static void
1325 jbd_journal_purge_cp_trans(struct jbd_journal *journal,
1326                            bool flush,
1327                            bool once)
1328 {
1329         struct jbd_trans *trans;
1330         while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1331                 if (!trans->data_cnt) {
1332                         TAILQ_REMOVE(&journal->cp_queue,
1333                                         trans,
1334                                         trans_node);
1335                         jbd_journal_skip_pure_revoke(journal, trans);
1336                 } else {
1337                         if (trans->data_cnt ==
1338                                         trans->written_cnt) {
1339                                 journal->start =
1340                                         trans->start_iblock +
1341                                         trans->alloc_blocks;
1342                                 wrap(&journal->jbd_fs->sb,
1343                                                 journal->start);
1344                                 journal->trans_id =
1345                                         trans->trans_id + 1;
1346                                 TAILQ_REMOVE(&journal->cp_queue,
1347                                                 trans,
1348                                                 trans_node);
1349                                 jbd_journal_free_trans(journal,
1350                                                 trans,
1351                                                 false);
1352                                 jbd_journal_write_sb(journal);
1353                         } else if (!flush) {
1354                                 journal->start =
1355                                         trans->start_iblock;
1356                                 wrap(&journal->jbd_fs->sb,
1357                                                 journal->start);
1358                                 journal->trans_id =
1359                                         trans->trans_id;
1360                                 jbd_journal_write_sb(journal);
1361                                 break;
1362                         } else
1363                                 jbd_journal_flush_trans(trans);
1364                 }
1365                 if (once)
1366                         break;
1367         }
1368 }
1369
1370 /**@brief  Stop accessing the journal.
1371  * @param  journal current journal session
1372  * @return standard error code*/
1373 int jbd_journal_stop(struct jbd_journal *journal)
1374 {
1375         int r;
1376         struct jbd_fs *jbd_fs = journal->jbd_fs;
1377         uint32_t features_incompatible;
1378
1379         /* Make sure that journalled content have reached
1380          * the disk.*/
1381         jbd_journal_purge_cp_trans(journal, true, false);
1382
1383         /* There should be no block record in this journal
1384          * session. */
1385         if (!RB_EMPTY(&journal->block_rec_root))
1386                 ext4_dbg(DEBUG_JBD,
1387                          DBG_WARN "There are still block records "
1388                                   "in this journal session!\n");
1389
1390         features_incompatible =
1391                 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1392                            features_incompatible);
1393         features_incompatible &= ~EXT4_FINCOM_RECOVER;
1394         ext4_set32(&jbd_fs->inode_ref.fs->sb,
1395                         features_incompatible,
1396                         features_incompatible);
1397         r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1398                         &jbd_fs->inode_ref.fs->sb);
1399         if (r != EOK)
1400                 return r;
1401
1402         journal->start = 0;
1403         journal->trans_id = 0;
1404         jbd_journal_write_sb(journal);
1405         return jbd_write_sb(journal->jbd_fs);
1406 }
1407
1408 /**@brief  Allocate a block in the journal.
1409  * @param  journal current journal session
1410  * @param  trans transaction
1411  * @return allocated block address*/
1412 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
1413                                         struct jbd_trans *trans)
1414 {
1415         uint32_t start_block;
1416
1417         start_block = journal->last++;
1418         trans->alloc_blocks++;
1419         wrap(&journal->jbd_fs->sb, journal->last);
1420         
1421         /* If there is no space left, flush all journalled
1422          * blocks to disk first.*/
1423         if (journal->last == journal->start)
1424                 jbd_journal_purge_cp_trans(journal, true, false);
1425
1426         return start_block;
1427 }
1428
1429 /**@brief  Allocate a new transaction
1430  * @param  journal current journal session
1431  * @return transaction allocated*/
1432 struct jbd_trans *
1433 jbd_journal_new_trans(struct jbd_journal *journal)
1434 {
1435         struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
1436         if (!trans)
1437                 return NULL;
1438
1439         /* We will assign a trans_id to this transaction,
1440          * once it has been committed.*/
1441         trans->journal = journal;
1442         trans->data_csum = EXT4_CRC32_INIT;
1443         trans->error = EOK;
1444         TAILQ_INIT(&trans->buf_queue);
1445         return trans;
1446 }
1447
1448 /**@brief  gain access to it before making any modications.
1449  * @param  journal current journal session
1450  * @param  trans transaction
1451  * @param  block descriptor
1452  * @return standard error code.*/
1453 int jbd_trans_get_access(struct jbd_journal *journal,
1454                          struct jbd_trans *trans,
1455                          struct ext4_block *block)
1456 {
1457         int r = EOK;
1458         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1459         struct jbd_buf *jbd_buf = block->buf->end_write_arg;
1460
1461         /* If the buffer has already been modified, we should
1462          * flush dirty data in this buffer to disk.*/
1463         if (ext4_bcache_test_flag(block->buf, BC_DIRTY) &&
1464             block->buf->end_write == jbd_trans_end_write) {
1465                 ext4_assert(jbd_buf);
1466                 if (jbd_buf->trans != trans)
1467                         r = ext4_block_flush_buf(fs->bdev, block->buf);
1468
1469         }
1470         return r;
1471 }
1472
1473 static struct jbd_block_rec *
1474 jbd_trans_block_rec_lookup(struct jbd_journal *journal,
1475                            ext4_fsblk_t lba)
1476 {
1477         struct jbd_block_rec tmp = {
1478                 .lba = lba
1479         };
1480
1481         return RB_FIND(jbd_block,
1482                        &journal->block_rec_root,
1483                        &tmp);
1484 }
1485
1486 static void
1487 jbd_trans_change_ownership(struct jbd_block_rec *block_rec,
1488                            struct jbd_trans *new_trans,
1489                            struct ext4_buf *new_buf)
1490 {
1491         LIST_REMOVE(block_rec, tbrec_node);
1492         /* Now this block record belongs to this transaction. */
1493         LIST_INSERT_HEAD(&new_trans->tbrec_list, block_rec, tbrec_node);
1494         block_rec->trans = new_trans;
1495         block_rec->buf = new_buf;
1496 }
1497
1498 static inline struct jbd_block_rec *
1499 jbd_trans_insert_block_rec(struct jbd_trans *trans,
1500                            ext4_fsblk_t lba,
1501                            struct ext4_buf *buf)
1502 {
1503         struct jbd_block_rec *block_rec;
1504         block_rec = jbd_trans_block_rec_lookup(trans->journal, lba);
1505         if (block_rec) {
1506                 jbd_trans_change_ownership(block_rec, trans, buf);
1507                 return block_rec;
1508         }
1509         block_rec = calloc(1, sizeof(struct jbd_block_rec));
1510         if (!block_rec)
1511                 return NULL;
1512
1513         block_rec->lba = lba;
1514         block_rec->buf = buf;
1515         block_rec->trans = trans;
1516         TAILQ_INIT(&block_rec->dirty_buf_queue);
1517         LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);
1518         RB_INSERT(jbd_block, &trans->journal->block_rec_root, block_rec);
1519         return block_rec;
1520 }
1521
1522 static void
1523 jbd_trans_finish_callback(struct jbd_journal *journal,
1524                           const struct jbd_trans *trans,
1525                           struct jbd_block_rec *block_rec,
1526                           bool abort)
1527 {
1528         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1529         if (block_rec->trans != trans)
1530                 return;
1531
1532         if (!abort) {
1533                 struct jbd_buf *jbd_buf, *tmp;
1534                 TAILQ_FOREACH_SAFE(jbd_buf,
1535                                 &block_rec->dirty_buf_queue,
1536                                 dirty_buf_node,
1537                                 tmp) {
1538                         /* All we need is a fake ext4_buf. */
1539                         struct ext4_buf buf;
1540
1541                         jbd_trans_end_write(fs->bdev->bc,
1542                                         &buf,
1543                                         EOK,
1544                                         jbd_buf);
1545                 }
1546         } else {
1547                 struct jbd_buf *jbd_buf;
1548                 struct ext4_block jbd_block = EXT4_BLOCK_ZERO(),
1549                                   block = EXT4_BLOCK_ZERO();
1550                 jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,
1551                                 jbd_buf_dirty);
1552                 if (jbd_buf) {
1553                         ext4_assert(ext4_block_get(fs->bdev,
1554                                                 &jbd_block,
1555                                                 jbd_buf->jbd_lba) == EOK);
1556                         ext4_assert(ext4_block_get_noread(fs->bdev,
1557                                                 &block,
1558                                                 block_rec->lba) == EOK);
1559                         memcpy(block.data, jbd_block.data,
1560                                         journal->block_size);
1561
1562                         jbd_trans_change_ownership(block_rec,
1563                                         jbd_buf->trans, block.buf);
1564
1565                         block.buf->end_write = jbd_trans_end_write;
1566                         block.buf->end_write_arg = jbd_buf;
1567
1568                         ext4_bcache_set_flag(jbd_block.buf, BC_TMP);
1569                         ext4_bcache_set_dirty(block.buf);
1570
1571                         ext4_block_set(fs->bdev, &jbd_block);
1572                         ext4_block_set(fs->bdev, &block);
1573                         return;
1574                 }
1575         }
1576 }
1577
1578 static inline void
1579 jbd_trans_remove_block_rec(struct jbd_journal *journal,
1580                            struct jbd_block_rec *block_rec,
1581                            struct jbd_trans *trans)
1582 {
1583         /* If this block record doesn't belong to this transaction,
1584          * give up.*/
1585         if (block_rec->trans == trans) {
1586                 LIST_REMOVE(block_rec, tbrec_node);
1587                 RB_REMOVE(jbd_block,
1588                                 &journal->block_rec_root,
1589                                 block_rec);
1590                 free(block_rec);
1591         }
1592 }
1593
1594 /**@brief  Add block to a transaction and mark it dirty.
1595  * @param  trans transaction
1596  * @param  block block descriptor
1597  * @return standard error code*/
1598 int jbd_trans_set_block_dirty(struct jbd_trans *trans,
1599                               struct ext4_block *block)
1600 {
1601         struct jbd_buf *buf;
1602
1603         struct jbd_block_rec *block_rec;
1604         if (block->buf->end_write == jbd_trans_end_write) {
1605                 buf = block->buf->end_write_arg;
1606                 if (buf && buf->trans == trans)
1607                         return EOK;
1608         }
1609         buf = calloc(1, sizeof(struct jbd_buf));
1610         if (!buf)
1611                 return ENOMEM;
1612
1613         if ((block_rec = jbd_trans_insert_block_rec(trans,
1614                                         block->lb_id,
1615                                         block->buf)) == NULL) {
1616                 free(buf);
1617                 return ENOMEM;
1618         }
1619
1620         TAILQ_INSERT_TAIL(&block_rec->dirty_buf_queue,
1621                         buf,
1622                         dirty_buf_node);
1623
1624         buf->block_rec = block_rec;
1625         buf->trans = trans;
1626         buf->block = *block;
1627         ext4_bcache_inc_ref(block->buf);
1628
1629         /* If the content reach the disk, notify us
1630          * so that we may do a checkpoint. */
1631         block->buf->end_write = jbd_trans_end_write;
1632         block->buf->end_write_arg = buf;
1633
1634         trans->data_cnt++;
1635         TAILQ_INSERT_HEAD(&trans->buf_queue, buf, buf_node);
1636
1637         ext4_bcache_set_dirty(block->buf);
1638         return EOK;
1639 }
1640
1641 /**@brief  Add block to be revoked to a transaction
1642  * @param  trans transaction
1643  * @param  lba logical block address
1644  * @return standard error code*/
1645 int jbd_trans_revoke_block(struct jbd_trans *trans,
1646                            ext4_fsblk_t lba)
1647 {
1648         struct jbd_revoke_rec *rec =
1649                 calloc(1, sizeof(struct jbd_revoke_rec));
1650         if (!rec)
1651                 return ENOMEM;
1652
1653         rec->lba = lba;
1654         LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
1655         return EOK;
1656 }
1657
1658 /**@brief  Try to add block to be revoked to a transaction.
1659  *         If @lba still remains in an transaction on checkpoint
1660  *         queue, add @lba as a revoked block to the transaction.
1661  * @param  trans transaction
1662  * @param  lba logical block address
1663  * @return standard error code*/
1664 int jbd_trans_try_revoke_block(struct jbd_trans *trans,
1665                                ext4_fsblk_t lba)
1666 {
1667         int r = EOK;
1668         struct jbd_journal *journal = trans->journal;
1669         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1670         struct jbd_block_rec *block_rec =
1671                 jbd_trans_block_rec_lookup(journal, lba);
1672
1673         /* Make sure we don't flush any buffers belong to this transaction. */
1674         if (block_rec && block_rec->trans != trans) {
1675                 /* If the buffer has not been flushed yet, flush it now. */
1676                 if (block_rec->buf) {
1677                         r = ext4_block_flush_buf(fs->bdev, block_rec->buf);
1678                         if (r != EOK)
1679                                 return r;
1680
1681                 }
1682
1683                 jbd_trans_revoke_block(trans, lba);
1684         }
1685
1686         return EOK;
1687 }
1688
1689 /**@brief  Free a transaction
1690  * @param  journal current journal session
1691  * @param  trans transaction
1692  * @param  abort discard all the modifications on the block?
1693  * @return standard error code*/
1694 void jbd_journal_free_trans(struct jbd_journal *journal,
1695                             struct jbd_trans *trans,
1696                             bool abort)
1697 {
1698         struct jbd_buf *jbd_buf, *tmp;
1699         struct jbd_revoke_rec *rec, *tmp2;
1700         struct jbd_block_rec *block_rec, *tmp3;
1701         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1702         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1703                           tmp) {
1704                 block_rec = jbd_buf->block_rec;
1705                 if (abort) {
1706                         jbd_buf->block.buf->end_write = NULL;
1707                         jbd_buf->block.buf->end_write_arg = NULL;
1708                         ext4_bcache_clear_dirty(jbd_buf->block.buf);
1709                         ext4_block_set(fs->bdev, &jbd_buf->block);
1710                 }
1711
1712                 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1713                         jbd_buf,
1714                         dirty_buf_node);
1715                 jbd_trans_finish_callback(journal,
1716                                 trans,
1717                                 block_rec,
1718                                 abort);
1719                 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1720                 free(jbd_buf);
1721         }
1722         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1723                           tmp2) {
1724                 LIST_REMOVE(rec, revoke_node);
1725                 free(rec);
1726         }
1727         LIST_FOREACH_SAFE(block_rec, &trans->tbrec_list, tbrec_node,
1728                           tmp3) {
1729                 jbd_trans_remove_block_rec(journal, block_rec, trans);
1730         }
1731
1732         free(trans);
1733 }
1734
1735 /**@brief  Write commit block for a transaction
1736  * @param  trans transaction
1737  * @return standard error code*/
1738 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
1739 {
1740         int rc;
1741         struct jbd_commit_header *header;
1742         uint32_t commit_iblock = 0;
1743         struct ext4_block commit_block;
1744         struct jbd_journal *journal = trans->journal;
1745
1746         commit_iblock = jbd_journal_alloc_block(journal, trans);
1747         rc = jbd_block_get_noread(journal->jbd_fs,
1748                         &commit_block, commit_iblock);
1749         if (rc != EOK)
1750                 return rc;
1751
1752         header = (struct jbd_commit_header *)commit_block.data;
1753         jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
1754         jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
1755         jbd_set32(&header->header, sequence, trans->trans_id);
1756
1757         if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1758                                 JBD_FEATURE_COMPAT_CHECKSUM)) {
1759                 jbd_set32(header, chksum_type, JBD_CRC32_CHKSUM);
1760                 jbd_set32(header, chksum_size, JBD_CRC32_CHKSUM_SIZE);
1761                 jbd_set32(header, chksum[0], trans->data_csum);
1762         }
1763         jbd_commit_csum_set(journal->jbd_fs, header);
1764         ext4_bcache_set_dirty(commit_block.buf);
1765         rc = jbd_block_set(journal->jbd_fs, &commit_block);
1766         if (rc != EOK)
1767                 return rc;
1768
1769         return EOK;
1770 }
1771
1772 /**@brief  Write descriptor block for a transaction
1773  * @param  journal current journal session
1774  * @param  trans transaction
1775  * @return standard error code*/
1776 static int jbd_journal_prepare(struct jbd_journal *journal,
1777                                struct jbd_trans *trans)
1778 {
1779         int rc = EOK, i = 0;
1780         int32_t tag_tbl_size = 0;
1781         uint32_t desc_iblock = 0;
1782         uint32_t data_iblock = 0;
1783         char *tag_start = NULL, *tag_ptr = NULL;
1784         struct jbd_buf *jbd_buf, *tmp;
1785         struct ext4_block desc_block, data_block;
1786         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1787         uint32_t checksum = EXT4_CRC32_INIT;
1788
1789         /* Try to remove any non-dirty buffers from the tail of
1790          * buf_queue. */
1791         TAILQ_FOREACH_REVERSE_SAFE(jbd_buf, &trans->buf_queue,
1792                         jbd_trans_buf, buf_node, tmp) {
1793                 /* We stop the iteration when we find a dirty buffer. */
1794                 if (ext4_bcache_test_flag(jbd_buf->block.buf,
1795                                         BC_DIRTY))
1796                         break;
1797         
1798                 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1799                         jbd_buf,
1800                         dirty_buf_node);
1801
1802                 jbd_buf->block.buf->end_write = NULL;
1803                 jbd_buf->block.buf->end_write_arg = NULL;
1804                 jbd_trans_finish_callback(journal,
1805                                 trans,
1806                                 jbd_buf->block_rec,
1807                                 true);
1808
1809                 /* The buffer has not been modified, just release
1810                  * that jbd_buf. */
1811                 jbd_trans_remove_block_rec(journal,
1812                                 jbd_buf->block_rec, trans);
1813                 trans->data_cnt--;
1814
1815                 ext4_block_set(fs->bdev, &jbd_buf->block);
1816                 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1817                 free(jbd_buf);
1818         }
1819
1820         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {
1821                 struct tag_info tag_info;
1822                 bool uuid_exist = false;
1823                 if (!ext4_bcache_test_flag(jbd_buf->block.buf,
1824                                            BC_DIRTY)) {
1825                         TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1826                                         jbd_buf,
1827                                         dirty_buf_node);
1828
1829                         jbd_buf->block.buf->end_write = NULL;
1830                         jbd_buf->block.buf->end_write_arg = NULL;
1831                         jbd_trans_finish_callback(journal,
1832                                         trans,
1833                                         jbd_buf->block_rec,
1834                                         true);
1835
1836                         /* The buffer has not been modified, just release
1837                          * that jbd_buf. */
1838                         jbd_trans_remove_block_rec(journal,
1839                                         jbd_buf->block_rec, trans);
1840                         trans->data_cnt--;
1841
1842                         ext4_block_set(fs->bdev, &jbd_buf->block);
1843                         TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1844                         free(jbd_buf);
1845                         continue;
1846                 }
1847                 checksum = jbd_block_csum(journal->jbd_fs,
1848                                           jbd_buf->block.data,
1849                                           checksum,
1850                                           trans->trans_id);
1851 again:
1852                 if (!desc_iblock) {
1853                         struct jbd_bhdr *bhdr;
1854                         desc_iblock = jbd_journal_alloc_block(journal, trans);
1855                         rc = jbd_block_get_noread(journal->jbd_fs,
1856                                            &desc_block, desc_iblock);
1857                         if (rc != EOK)
1858                                 break;
1859
1860                         ext4_bcache_set_dirty(desc_block.buf);
1861
1862                         bhdr = (struct jbd_bhdr *)desc_block.data;
1863                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1864                         jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
1865                         jbd_set32(bhdr, sequence, trans->trans_id);
1866
1867                         tag_start = (char *)(bhdr + 1);
1868                         tag_ptr = tag_start;
1869                         uuid_exist = true;
1870                         tag_tbl_size = journal->block_size -
1871                                 sizeof(struct jbd_bhdr);
1872
1873                         if (jbd_has_csum(&journal->jbd_fs->sb))
1874                                 tag_tbl_size -= sizeof(struct jbd_block_tail);
1875
1876                         if (!trans->start_iblock)
1877                                 trans->start_iblock = desc_iblock;
1878
1879                 }
1880                 tag_info.block = jbd_buf->block.lb_id;
1881                 tag_info.uuid_exist = uuid_exist;
1882                 if (i == trans->data_cnt - 1)
1883                         tag_info.last_tag = true;
1884                 else
1885                         tag_info.last_tag = false;
1886
1887                 tag_info.checksum = checksum;
1888
1889                 if (uuid_exist)
1890                         memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
1891                                         UUID_SIZE);
1892
1893                 rc = jbd_write_block_tag(journal->jbd_fs,
1894                                 tag_ptr,
1895                                 tag_tbl_size,
1896                                 &tag_info);
1897                 if (rc != EOK) {
1898                         jbd_meta_csum_set(journal->jbd_fs,
1899                                         (struct jbd_bhdr *)desc_block.data);
1900                         jbd_block_set(journal->jbd_fs, &desc_block);
1901                         desc_iblock = 0;
1902                         goto again;
1903                 }
1904
1905                 data_iblock = jbd_journal_alloc_block(journal, trans);
1906                 rc = jbd_block_get_noread(journal->jbd_fs,
1907                                 &data_block, data_iblock);
1908                 if (rc != EOK)
1909                         break;
1910
1911                 ext4_bcache_set_dirty(data_block.buf);
1912
1913                 memcpy(data_block.data, jbd_buf->block.data,
1914                         journal->block_size);
1915                 jbd_buf->jbd_lba = data_block.lb_id;
1916
1917                 rc = jbd_block_set(journal->jbd_fs, &data_block);
1918                 if (rc != EOK)
1919                         break;
1920
1921                 tag_ptr += tag_info.tag_bytes;
1922                 tag_tbl_size -= tag_info.tag_bytes;
1923
1924                 i++;
1925         }
1926         if (rc == EOK && desc_iblock) {
1927                 jbd_meta_csum_set(journal->jbd_fs,
1928                                 (struct jbd_bhdr *)desc_block.data);
1929                 trans->data_csum = checksum;
1930                 jbd_block_set(journal->jbd_fs, &desc_block);
1931         }
1932
1933         return rc;
1934 }
1935
1936 /**@brief  Write revoke block for a transaction
1937  * @param  journal current journal session
1938  * @param  trans transaction
1939  * @return standard error code*/
1940 static int
1941 jbd_journal_prepare_revoke(struct jbd_journal *journal,
1942                            struct jbd_trans *trans)
1943 {
1944         int rc = EOK, i = 0;
1945         int32_t tag_tbl_size = 0;
1946         uint32_t desc_iblock = 0;
1947         char *blocks_entry = NULL;
1948         struct jbd_revoke_rec *rec, *tmp;
1949         struct ext4_block desc_block;
1950         struct jbd_revoke_header *header = NULL;
1951         int32_t record_len = 4;
1952
1953         if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1954                                      JBD_FEATURE_INCOMPAT_64BIT))
1955                 record_len = 8;
1956
1957         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1958                           tmp) {
1959 again:
1960                 if (!desc_iblock) {
1961                         struct jbd_bhdr *bhdr;
1962                         desc_iblock = jbd_journal_alloc_block(journal, trans);
1963                         rc = jbd_block_get_noread(journal->jbd_fs,
1964                                            &desc_block, desc_iblock);
1965                         if (rc != EOK) {
1966                                 break;
1967                         }
1968
1969                         ext4_bcache_set_dirty(desc_block.buf);
1970
1971                         bhdr = (struct jbd_bhdr *)desc_block.data;
1972                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1973                         jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
1974                         jbd_set32(bhdr, sequence, trans->trans_id);
1975                         
1976                         header = (struct jbd_revoke_header *)bhdr;
1977                         blocks_entry = (char *)(header + 1);
1978                         tag_tbl_size = journal->block_size -
1979                                 sizeof(struct jbd_revoke_header);
1980
1981                         if (jbd_has_csum(&journal->jbd_fs->sb))
1982                                 tag_tbl_size -= sizeof(struct jbd_block_tail);
1983
1984                         if (!trans->start_iblock)
1985                                 trans->start_iblock = desc_iblock;
1986
1987                 }
1988
1989                 if (tag_tbl_size < record_len) {
1990                         jbd_set32(header, count,
1991                                   journal->block_size - tag_tbl_size);
1992                         jbd_meta_csum_set(journal->jbd_fs,
1993                                         (struct jbd_bhdr *)desc_block.data);
1994                         jbd_block_set(journal->jbd_fs, &desc_block);
1995                         desc_iblock = 0;
1996                         header = NULL;
1997                         goto again;
1998                 }
1999                 if (record_len == 8) {
2000                         uint64_t *blocks =
2001                                 (uint64_t *)blocks_entry;
2002                         *blocks = to_be64(rec->lba);
2003                 } else {
2004                         uint32_t *blocks =
2005                                 (uint32_t *)blocks_entry;
2006                         *blocks = to_be32((uint32_t)rec->lba);
2007                 }
2008                 blocks_entry += record_len;
2009                 tag_tbl_size -= record_len;
2010
2011                 i++;
2012         }
2013         if (rc == EOK && desc_iblock) {
2014                 if (header != NULL)
2015                         jbd_set32(header, count,
2016                                   journal->block_size - tag_tbl_size);
2017
2018                 jbd_meta_csum_set(journal->jbd_fs,
2019                                 (struct jbd_bhdr *)desc_block.data);
2020                 jbd_block_set(journal->jbd_fs, &desc_block);
2021         }
2022
2023         return rc;
2024 }
2025
2026 /**@brief  Put references of block descriptors in a transaction.
2027  * @param  journal current journal session
2028  * @param  trans transaction*/
2029 void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
2030 {
2031         struct jbd_buf *jbd_buf, *tmp;
2032         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
2033         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
2034                         tmp) {
2035                 struct ext4_block block = jbd_buf->block;
2036                 ext4_block_set(fs->bdev, &block);
2037         }
2038 }
2039
2040 /**@brief  Update the start block of the journal when
2041  *         all the contents in a transaction reach the disk.*/
2042 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
2043                           struct ext4_buf *buf,
2044                           int res,
2045                           void *arg)
2046 {
2047         struct jbd_buf *jbd_buf = arg;
2048         struct jbd_trans *trans = jbd_buf->trans;
2049         struct jbd_block_rec *block_rec = jbd_buf->block_rec;
2050         struct jbd_journal *journal = trans->journal;
2051         bool first_in_queue =
2052                 trans == TAILQ_FIRST(&journal->cp_queue);
2053         if (res != EOK)
2054                 trans->error = res;
2055
2056         TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
2057         TAILQ_REMOVE(&block_rec->dirty_buf_queue,
2058                         jbd_buf,
2059                         dirty_buf_node);
2060
2061         jbd_trans_finish_callback(journal,
2062                         trans,
2063                         jbd_buf->block_rec,
2064                         false);
2065         if (block_rec->trans == trans) {
2066                 block_rec->buf = NULL;
2067                 /* Clear the end_write and end_write_arg fields. */
2068                 buf->end_write = NULL;
2069                 buf->end_write_arg = NULL;
2070         }
2071
2072         free(jbd_buf);
2073
2074         trans->written_cnt++;
2075         if (trans->written_cnt == trans->data_cnt) {
2076                 /* If it is the first transaction on checkpoint queue,
2077                  * we will shift the start of the journal to the next
2078                  * transaction, and remove subsequent written
2079                  * transactions from checkpoint queue until we find
2080                  * an unwritten one. */
2081                 if (first_in_queue) {
2082                         journal->start = trans->start_iblock +
2083                                 trans->alloc_blocks;
2084                         wrap(&journal->jbd_fs->sb, journal->start);
2085                         journal->trans_id = trans->trans_id + 1;
2086                         TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
2087                         jbd_journal_free_trans(journal, trans, false);
2088
2089                         jbd_journal_purge_cp_trans(journal, false, true);
2090                         jbd_journal_write_sb(journal);
2091                         jbd_write_sb(journal->jbd_fs);
2092                 }
2093         }
2094 }
2095
2096 /**@brief  Commit a transaction to the journal immediately.
2097  * @param  journal current journal session
2098  * @param  trans transaction
2099  * @return standard error code*/
2100 int jbd_journal_commit_trans(struct jbd_journal *journal,
2101                              struct jbd_trans *trans)
2102 {
2103         int rc = EOK;
2104         uint32_t last = journal->last;
2105
2106         trans->trans_id = journal->alloc_trans_id;
2107         rc = jbd_journal_prepare(journal, trans);
2108         if (rc != EOK)
2109                 goto Finish;
2110
2111         rc = jbd_journal_prepare_revoke(journal, trans);
2112         if (rc != EOK)
2113                 goto Finish;
2114
2115         if (TAILQ_EMPTY(&trans->buf_queue) &&
2116             LIST_EMPTY(&trans->revoke_list)) {
2117                 /* Since there are no entries in both buffer list
2118                  * and revoke entry list, we do not consider trans as
2119                  * complete transaction and just return EOK.*/
2120                 jbd_journal_free_trans(journal, trans, false);
2121                 goto Finish;
2122         }
2123
2124         rc = jbd_trans_write_commit_block(trans);
2125         if (rc != EOK)
2126                 goto Finish;
2127
2128         journal->alloc_trans_id++;
2129         if (TAILQ_EMPTY(&journal->cp_queue)) {
2130                 if (trans->data_cnt) {
2131                         journal->start = trans->start_iblock;
2132                         wrap(&journal->jbd_fs->sb, journal->start);
2133                         journal->trans_id = trans->trans_id;
2134                         jbd_journal_write_sb(journal);
2135                         jbd_write_sb(journal->jbd_fs);
2136                         TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
2137                                         trans_node);
2138                         jbd_journal_cp_trans(journal, trans);
2139                 } else {
2140                         journal->start = trans->start_iblock +
2141                                 trans->alloc_blocks;
2142                         wrap(&journal->jbd_fs->sb, journal->start);
2143                         journal->trans_id = trans->trans_id + 1;
2144                         jbd_journal_write_sb(journal);
2145                         jbd_journal_free_trans(journal, trans, false);
2146                 }
2147         } else {
2148                 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
2149                                 trans_node);
2150                 if (trans->data_cnt)
2151                         jbd_journal_cp_trans(journal, trans);
2152
2153         }
2154 Finish:
2155         if (rc != EOK) {
2156                 journal->last = last;
2157                 jbd_journal_free_trans(journal, trans, true);
2158         }
2159         return rc;
2160 }
2161
2162 /**
2163  * @}
2164  */