e64d766977fd055ae317cc1ea555d92ed85f876c
[lwext4.git] / lwext4 / ext4_journal.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * - Redistributions of source code must retain the above copyright
11  *   notice, this list of conditions and the following disclaimer.
12  * - Redistributions in binary form must reproduce the above copyright
13  *   notice, this list of conditions and the following disclaimer in the
14  *   documentation and/or other materials provided with the distribution.
15  * - The name of the author may not be used to endorse or promote products
16  *   derived from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 /** @addtogroup lwext4
31  * @{
32  */
33 /**
34  * @file  ext4_journal.c
35  * @brief Journal handle functions
36  */
37
38 #include "ext4_config.h"
39 #include "ext4_types.h"
40 #include "ext4_fs.h"
41 #include "ext4_super.h"
42 #include "ext4_journal.h"
43 #include "ext4_errno.h"
44 #include "ext4_blockdev.h"
45 #include "ext4_crc32c.h"
46 #include "ext4_debug.h"
47 #include "tree.h"
48
49 #include <string.h>
50 #include <stdlib.h>
51
52 /**@brief  Revoke entry during journal replay.*/
53 struct revoke_entry {
54         /**@brief  Block number not to be replayed.*/
55         ext4_fsblk_t block;
56
57         /**@brief  For any transaction id smaller
58          *         than trans_id, records of @block
59          *         in those transactions should not
60          *         be replayed.*/
61         uint32_t trans_id;
62
63         /**@brief  Revoke tree node.*/
64         RB_ENTRY(revoke_entry) revoke_node;
65 };
66
67 /**@brief  Valid journal replay information.*/
68 struct recover_info {
69         /**@brief  Starting transaction id.*/
70         uint32_t start_trans_id;
71
72         /**@brief  Ending transaction id.*/
73         uint32_t last_trans_id;
74
75         /**@brief  Used as internal argument.*/
76         uint32_t this_trans_id;
77
78         /**@brief  RB-Tree storing revoke entries.*/
79         RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
80 };
81
82 /**@brief  Journal replay internal arguments.*/
83 struct replay_arg {
84         /**@brief  Journal replay information.*/
85         struct recover_info *info;
86
87         /**@brief  Current block we are on.*/
88         uint32_t *this_block;
89
90         /**@brief  Current trans_id we are on.*/
91         uint32_t this_trans_id;
92 };
93
94 static int
95 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
96 {
97         if (a->block > b->block)
98                 return 1;
99         else if (a->block < b->block)
100                 return -1;
101         return 0;
102 }
103
104 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
105                      jbd_revoke_entry_cmp, static inline)
106
107 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
108 #define jbd_free_revoke_entry(addr) free(addr)
109
110 /**@brief  Data block lookup helper.*/
111 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
112                    ext4_lblk_t iblock,
113                    ext4_fsblk_t *fblock);
114
115 /**@brief  Write jbd superblock to disk.
116  * @param  jbd_fs jbd filesystem
117  * @param  s jbd superblock
118  * @return standard error code*/
119 int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
120 {
121         int rc;
122         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
123         uint64_t offset;
124         ext4_fsblk_t fblock;
125         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
126         if (rc != EOK)
127                 return rc;
128
129         offset = fblock * ext4_sb_get_block_size(&fs->sb);
130         return ext4_block_writebytes(fs->bdev, offset, s,
131                                      EXT4_SUPERBLOCK_SIZE);
132 }
133
134 /**@brief  Read jbd superblock from disk.
135  * @param  jbd_fs jbd filesystem
136  * @param  s jbd superblock
137  * @return standard error code*/
138 int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
139 {
140         int rc;
141         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
142         uint64_t offset;
143         ext4_fsblk_t fblock;
144         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
145         if (rc != EOK)
146                 return rc;
147
148         offset = fblock * ext4_sb_get_block_size(&fs->sb);
149         return ext4_block_readbytes(fs->bdev, offset, s,
150                                     EXT4_SUPERBLOCK_SIZE);
151 }
152
153 /**@brief  Verify jbd superblock.
154  * @param  sb jbd superblock
155  * @return true if jbd superblock is valid */
156 static bool jbd_verify_sb(struct jbd_sb *sb)
157 {
158         struct jbd_bhdr *header = &sb->header;
159         if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
160                 return false;
161
162         if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
163             jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
164                 return false;
165
166         return true;
167 }
168
169 /**@brief  Write back dirty jbd superblock to disk.
170  * @param  jbd_fs jbd filesystem
171  * @return standard error code*/
172 static int jbd_write_sb(struct jbd_fs *jbd_fs)
173 {
174         int rc = EOK;
175         if (jbd_fs->dirty) {
176                 rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
177                 if (rc != EOK)
178                         return rc;
179
180                 jbd_fs->dirty = false;
181         }
182         return rc;
183 }
184
185 /**@brief  Get reference to jbd filesystem.
186  * @param  fs Filesystem to load journal of
187  * @param  jbd_fs jbd filesystem
188  * @return standard error code*/
189 int jbd_get_fs(struct ext4_fs *fs,
190                struct jbd_fs *jbd_fs)
191 {
192         int rc;
193         uint32_t journal_ino;
194
195         memset(jbd_fs, 0, sizeof(struct jbd_fs));
196         /* See if there is journal inode on this filesystem.*/
197         /* FIXME: detection on existance ofbkejournal bdev is
198          *        missing.*/
199         journal_ino = ext4_get32(&fs->sb, journal_inode_number);
200
201         rc = ext4_fs_get_inode_ref(fs,
202                                    journal_ino,
203                                    &jbd_fs->inode_ref);
204         if (rc != EOK) {
205                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
206                 return rc;
207         }
208         rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
209         if (rc != EOK) {
210                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
211                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
212                 return rc;
213         }
214         if (!jbd_verify_sb(&jbd_fs->sb)) {
215                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
216                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
217                 rc = EIO;
218         }
219
220         return rc;
221 }
222
223 /**@brief  Put reference of jbd filesystem.
224  * @param  jbd_fs jbd filesystem
225  * @return standard error code*/
226 int jbd_put_fs(struct jbd_fs *jbd_fs)
227 {
228         int rc = EOK;
229         rc = jbd_write_sb(jbd_fs);
230
231         ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
232         return rc;
233 }
234
235 /**@brief  Data block lookup helper.
236  * @param  jbd_fs jbd filesystem
237  * @param  iblock block index
238  * @param  fblock logical block address
239  * @return standard error code*/
240 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
241                    ext4_lblk_t iblock,
242                    ext4_fsblk_t *fblock)
243 {
244         int rc = ext4_fs_get_inode_dblk_idx(
245                         &jbd_fs->inode_ref,
246                         iblock,
247                         fblock,
248                         false);
249         return rc;
250 }
251
252 /**@brief   jbd block get function (through cache).
253  * @param   jbd_fs jbd filesystem
254  * @param   block block descriptor
255  * @param   fblock jbd logical block address
256  * @return  standard error code*/
257 int jbd_block_get(struct jbd_fs *jbd_fs,
258                   struct ext4_block *block,
259                   ext4_fsblk_t fblock)
260 {
261         /* TODO: journal device. */
262         int rc;
263         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
264
265         /* Lookup the logical block address of
266          * fblock.*/
267         rc = jbd_inode_bmap(jbd_fs, iblock,
268                             &fblock);
269         if (rc != EOK)
270                 return rc;
271
272         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
273         rc = ext4_block_get(bdev, block, fblock);
274
275         /* If succeeded, mark buffer as BC_FLUSH to indicate
276          * that data should be written to disk immediately.*/
277         if (rc == EOK)
278                 ext4_bcache_set_flag(block->buf, BC_FLUSH);
279
280         return rc;
281 }
282
283 /**@brief   jbd block get function (through cache, don't read).
284  * @param   jbd_fs jbd filesystem
285  * @param   block block descriptor
286  * @param   fblock jbd logical block address
287  * @return  standard error code*/
288 int jbd_block_get_noread(struct jbd_fs *jbd_fs,
289                          struct ext4_block *block,
290                          ext4_fsblk_t fblock)
291 {
292         /* TODO: journal device. */
293         int rc;
294         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
295         rc = jbd_inode_bmap(jbd_fs, iblock,
296                             &fblock);
297         if (rc != EOK)
298                 return rc;
299
300         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
301         rc = ext4_block_get_noread(bdev, block, fblock);
302         if (rc == EOK)
303                 ext4_bcache_set_flag(block->buf, BC_FLUSH);
304
305         return rc;
306 }
307
308 /**@brief   jbd block set procedure (through cache).
309  * @param   jbd_fs jbd filesystem
310  * @param   block block descriptor
311  * @return  standard error code*/
312 int jbd_block_set(struct jbd_fs *jbd_fs,
313                   struct ext4_block *block)
314 {
315         return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
316                               block);
317 }
318
319 /**@brief  helper functions to calculate
320  *         block tag size, not including UUID part.
321  * @param  jbd_fs jbd filesystem
322  * @return tag size in bytes*/
323 int jbd_tag_bytes(struct jbd_fs *jbd_fs)
324 {
325         int size;
326
327         /* It is very easy to deal with the case which
328          * JBD_FEATURE_INCOMPAT_CSUM_V3 is enabled.*/
329         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
330                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
331                 return sizeof(struct jbd_block_tag3);
332
333         size = sizeof(struct jbd_block_tag);
334
335         /* If JBD_FEATURE_INCOMPAT_CSUM_V2 is enabled,
336          * add 2 bytes to size.*/
337         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
338                                      JBD_FEATURE_INCOMPAT_CSUM_V2))
339                 size += sizeof(uint16_t);
340
341         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
342                                      JBD_FEATURE_INCOMPAT_64BIT))
343                 return size;
344
345         /* If block number is 4 bytes in size,
346          * minus 4 bytes from size */
347         return size - sizeof(uint32_t);
348 }
349
350 /**@brief  Tag information. */
351 struct tag_info {
352         /**@brief  Tag size in bytes, including UUID part.*/
353         int tag_bytes;
354
355         /**@brief  block number stored in this tag.*/
356         ext4_fsblk_t block;
357
358         /**@brief  whether UUID part exists or not.*/
359         bool uuid_exist;
360
361         /**@brief  UUID content if UUID part exists.*/
362         uint8_t uuid[UUID_SIZE];
363
364         /**@brief  Is this the last tag? */
365         bool last_tag;
366 };
367
368 /**@brief  Extract information from a block tag.
369  * @param  __tag pointer to the block tag
370  * @param  tag_bytes block tag size of this jbd filesystem
371  * @param  remaining size in buffer containing the block tag
372  * @param  tag_info information of this tag.
373  * @return  EOK when succeed, otherwise return EINVAL.*/
374 static int
375 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
376                       void *__tag,
377                       int tag_bytes,
378                       int32_t remain_buf_size,
379                       struct tag_info *tag_info)
380 {
381         char *uuid_start;
382         tag_info->tag_bytes = tag_bytes;
383         tag_info->uuid_exist = false;
384         tag_info->last_tag = false;
385
386         /* See whether it is possible to hold a valid block tag.*/
387         if (remain_buf_size - tag_bytes < 0)
388                 return EINVAL;
389
390         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
391                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
392                 struct jbd_block_tag3 *tag = __tag;
393                 tag_info->block = jbd_get32(tag, blocknr);
394                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
395                                              JBD_FEATURE_INCOMPAT_64BIT))
396                          tag_info->block |=
397                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
398
399                 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
400                         tag_info->block = 0;
401
402                 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
403                         /* See whether it is possible to hold UUID part.*/
404                         if (remain_buf_size - tag_bytes < UUID_SIZE)
405                                 return EINVAL;
406
407                         uuid_start = (char *)tag + tag_bytes;
408                         tag_info->uuid_exist = true;
409                         tag_info->tag_bytes += UUID_SIZE;
410                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
411                 }
412
413                 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
414                         tag_info->last_tag = true;
415
416         } else {
417                 struct jbd_block_tag *tag = __tag;
418                 tag_info->block = jbd_get32(tag, blocknr);
419                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
420                                              JBD_FEATURE_INCOMPAT_64BIT))
421                          tag_info->block |=
422                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
423
424                 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
425                         tag_info->block = 0;
426
427                 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
428                         /* See whether it is possible to hold UUID part.*/
429                         if (remain_buf_size - tag_bytes < UUID_SIZE)
430                                 return EINVAL;
431
432                         uuid_start = (char *)tag + tag_bytes;
433                         tag_info->uuid_exist = true;
434                         tag_info->tag_bytes += UUID_SIZE;
435                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
436                 }
437
438                 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
439                         tag_info->last_tag = true;
440
441         }
442         return EOK;
443 }
444
445 /**@brief  Write information to a block tag.
446  * @param  __tag pointer to the block tag
447  * @param  remaining size in buffer containing the block tag
448  * @param  tag_info information of this tag.
449  * @return  EOK when succeed, otherwise return EINVAL.*/
450 static int
451 jbd_write_block_tag(struct jbd_fs *jbd_fs,
452                     void *__tag,
453                     int32_t remain_buf_size,
454                     struct tag_info *tag_info)
455 {
456         char *uuid_start;
457         int tag_bytes = jbd_tag_bytes(jbd_fs);
458
459         tag_info->tag_bytes = tag_bytes;
460
461         /* See whether it is possible to hold a valid block tag.*/
462         if (remain_buf_size - tag_bytes < 0)
463                 return EINVAL;
464
465         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
466                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
467                 struct jbd_block_tag3 *tag = __tag;
468                 jbd_set32(tag, blocknr, tag_info->block);
469                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
470                                              JBD_FEATURE_INCOMPAT_64BIT))
471                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
472
473                 if (tag_info->uuid_exist) {
474                         /* See whether it is possible to hold UUID part.*/
475                         if (remain_buf_size - tag_bytes < UUID_SIZE)
476                                 return EINVAL;
477
478                         uuid_start = (char *)tag + tag_bytes;
479                         tag_info->tag_bytes += UUID_SIZE;
480                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
481                 } else
482                         jbd_set32(tag, flags,
483                                   jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
484
485                 if (tag_info->last_tag)
486                         jbd_set32(tag, flags,
487                                   jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
488
489         } else {
490                 struct jbd_block_tag *tag = __tag;
491                 jbd_set32(tag, blocknr, tag_info->block);
492                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
493                                              JBD_FEATURE_INCOMPAT_64BIT))
494                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
495
496                 if (tag_info->uuid_exist) {
497                         /* See whether it is possible to hold UUID part.*/
498                         if (remain_buf_size - tag_bytes < UUID_SIZE)
499                                 return EINVAL;
500
501                         uuid_start = (char *)tag + tag_bytes;
502                         tag_info->tag_bytes += UUID_SIZE;
503                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
504                 } else
505                         jbd_set16(tag, flags,
506                                   jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
507
508                 if (tag_info->last_tag)
509                         jbd_set16(tag, flags,
510                                   jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
511
512         }
513         return EOK;
514 }
515
516 /**@brief  Iterate all block tags in a block.
517  * @param  jbd_fs jbd filesystem
518  * @param  __tag_start pointer to the block
519  * @param  tag_tbl_size size of the block
520  * @param  func callback routine to indicate that
521  *         a block tag is found
522  * @param  arg additional argument to be passed to func */
523 static void
524 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
525                         void *__tag_start,
526                         int32_t tag_tbl_size,
527                         void (*func)(struct jbd_fs * jbd_fs,
528                                         ext4_fsblk_t block,
529                                         uint8_t *uuid,
530                                         void *arg),
531                         void *arg)
532 {
533         char *tag_start, *tag_ptr;
534         int tag_bytes = jbd_tag_bytes(jbd_fs);
535         tag_start = __tag_start;
536         tag_ptr = tag_start;
537
538         /* Cut off the size of block tail storing checksum. */
539         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
540                                      JBD_FEATURE_INCOMPAT_CSUM_V2) ||
541             JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
542                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
543                 tag_tbl_size -= sizeof(struct jbd_block_tail);
544
545         while (tag_tbl_size) {
546                 struct tag_info tag_info;
547                 int rc = jbd_extract_block_tag(jbd_fs,
548                                       tag_ptr,
549                                       tag_bytes,
550                                       tag_tbl_size,
551                                       &tag_info);
552                 if (rc != EOK)
553                         break;
554
555                 if (func)
556                         func(jbd_fs, tag_info.block, tag_info.uuid, arg);
557
558                 /* Stop the iteration when we reach the last tag. */
559                 if (tag_info.last_tag)
560                         break;
561
562                 tag_ptr += tag_info.tag_bytes;
563                 tag_tbl_size -= tag_info.tag_bytes;
564         }
565 }
566
567 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
568                                    ext4_fsblk_t block,
569                                    uint8_t *uuid,
570                                    void *arg)
571 {
572         uint32_t *iblock = arg;
573         ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
574         (*iblock)++;
575         (void)jbd_fs;
576         (void)uuid;
577         return;
578 }
579
580 static struct revoke_entry *
581 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
582 {
583         struct revoke_entry tmp = {
584                 .block = block
585         };
586
587         return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
588 }
589
590 /**@brief  Replay a block in a transaction.
591  * @param  jbd_fs jbd filesystem
592  * @param  block  block address to be replayed.*/
593 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
594                                   ext4_fsblk_t block,
595                                   uint8_t *uuid __unused,
596                                   void *__arg)
597 {
598         int r;
599         struct replay_arg *arg = __arg;
600         struct recover_info *info = arg->info;
601         uint32_t *this_block = arg->this_block;
602         struct revoke_entry *revoke_entry;
603         struct ext4_block journal_block, ext4_block;
604         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
605
606         (*this_block)++;
607
608         /* We replay this block only if the current transaction id
609          * is equal or greater than that in revoke entry.*/
610         revoke_entry = jbd_revoke_entry_lookup(info, block);
611         if (revoke_entry &&
612             arg->this_trans_id < revoke_entry->trans_id)
613                 return;
614
615         ext4_dbg(DEBUG_JBD,
616                  "Replaying block in block_tag: %" PRIu64 "\n",
617                  block);
618
619         r = jbd_block_get(jbd_fs, &journal_block, *this_block);
620         if (r != EOK)
621                 return;
622
623         /* We need special treatment for ext4 superblock. */
624         if (block) {
625                 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
626                 if (r != EOK) {
627                         jbd_block_set(jbd_fs, &journal_block);
628                         return;
629                 }
630
631                 memcpy(ext4_block.data,
632                         journal_block.data,
633                         jbd_get32(&jbd_fs->sb, blocksize));
634
635                 ext4_bcache_set_dirty(ext4_block.buf);
636                 ext4_block_set(fs->bdev, &ext4_block);
637         } else {
638                 uint16_t mount_count, state;
639                 mount_count = ext4_get16(&fs->sb, mount_count);
640                 state = ext4_get16(&fs->sb, state);
641
642                 memcpy(&fs->sb,
643                         journal_block.data + EXT4_SUPERBLOCK_OFFSET,
644                         EXT4_SUPERBLOCK_SIZE);
645
646                 /* Mark system as mounted */
647                 ext4_set16(&fs->sb, state, state);
648                 r = ext4_sb_write(fs->bdev, &fs->sb);
649                 if (r != EOK)
650                         return;
651
652                 /*Update mount count*/
653                 ext4_set16(&fs->sb, mount_count, mount_count);
654         }
655
656         jbd_block_set(jbd_fs, &journal_block);
657         
658         return;
659 }
660
661 /**@brief  Add block address to revoke tree, along with
662  *         its transaction id.
663  * @param  info  journal replay info
664  * @param  block  block address to be replayed.*/
665 static void jbd_add_revoke_block_tags(struct recover_info *info,
666                                       ext4_fsblk_t block)
667 {
668         struct revoke_entry *revoke_entry;
669
670         ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
671         /* If the revoke entry with respect to the block address
672          * exists already, update its transaction id.*/
673         revoke_entry = jbd_revoke_entry_lookup(info, block);
674         if (revoke_entry) {
675                 revoke_entry->trans_id = info->this_trans_id;
676                 return;
677         }
678
679         revoke_entry = jbd_alloc_revoke_entry();
680         ext4_assert(revoke_entry);
681         revoke_entry->block = block;
682         revoke_entry->trans_id = info->this_trans_id;
683         RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
684
685         return;
686 }
687
688 static void jbd_destroy_revoke_tree(struct recover_info *info)
689 {
690         while (!RB_EMPTY(&info->revoke_root)) {
691                 struct revoke_entry *revoke_entry =
692                         RB_MIN(jbd_revoke, &info->revoke_root);
693                 ext4_assert(revoke_entry);
694                 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
695                 jbd_free_revoke_entry(revoke_entry);
696         }
697 }
698
699 /* Make sure we wrap around the log correctly! */
700 #define wrap(sb, var)                                           \
701 do {                                                                    \
702         if (var >= jbd_get32((sb), maxlen))                                     \
703                 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first));      \
704 } while (0)
705
706 #define ACTION_SCAN 0
707 #define ACTION_REVOKE 1
708 #define ACTION_RECOVER 2
709
710 /**@brief  Add entries in a revoke block to revoke tree.
711  * @param  jbd_fs jbd filesystem
712  * @param  header revoke block header
713  * @param  recover_info  journal replay info*/
714 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
715                                   struct jbd_bhdr *header,
716                                   struct recover_info *info)
717 {
718         char *blocks_entry;
719         struct jbd_revoke_header *revoke_hdr =
720                 (struct jbd_revoke_header *)header;
721         uint32_t i, nr_entries, record_len = 4;
722
723         /* If we are working on a 64bit jbd filesystem, */
724         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
725                                      JBD_FEATURE_INCOMPAT_64BIT))
726                 record_len = 8;
727
728         nr_entries = (jbd_get32(revoke_hdr, count) -
729                         sizeof(struct jbd_revoke_header)) /
730                         record_len;
731
732         blocks_entry = (char *)(revoke_hdr + 1);
733
734         for (i = 0;i < nr_entries;i++) {
735                 if (record_len == 8) {
736                         uint64_t *blocks =
737                                 (uint64_t *)blocks_entry;
738                         jbd_add_revoke_block_tags(info, to_be64(*blocks));
739                 } else {
740                         uint32_t *blocks =
741                                 (uint32_t *)blocks_entry;
742                         jbd_add_revoke_block_tags(info, to_be32(*blocks));
743                 }
744                 blocks_entry += record_len;
745         }
746 }
747
748 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
749                                        struct jbd_bhdr *header,
750                                        uint32_t *iblock)
751 {
752         jbd_iterate_block_table(jbd_fs,
753                                 header + 1,
754                                 jbd_get32(&jbd_fs->sb, blocksize) -
755                                         sizeof(struct jbd_bhdr),
756                                 jbd_display_block_tags,
757                                 iblock);
758 }
759
760 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
761                                         struct jbd_bhdr *header,
762                                         struct replay_arg *arg)
763 {
764         jbd_iterate_block_table(jbd_fs,
765                                 header + 1,
766                                 jbd_get32(&jbd_fs->sb, blocksize) -
767                                         sizeof(struct jbd_bhdr),
768                                 jbd_replay_block_tags,
769                                 arg);
770 }
771
772 /**@brief  The core routine of journal replay.
773  * @param  jbd_fs jbd filesystem
774  * @param  recover_info  journal replay info
775  * @param  action action needed to be taken
776  * @return standard error code*/
777 int jbd_iterate_log(struct jbd_fs *jbd_fs,
778                     struct recover_info *info,
779                     int action)
780 {
781         int r = EOK;
782         bool log_end = false;
783         struct jbd_sb *sb = &jbd_fs->sb;
784         uint32_t start_trans_id, this_trans_id;
785         uint32_t start_block, this_block;
786
787         /* We start iterating valid blocks in the whole journal.*/
788         start_trans_id = this_trans_id = jbd_get32(sb, sequence);
789         start_block = this_block = jbd_get32(sb, start);
790
791         ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
792                             start_trans_id);
793
794         while (!log_end) {
795                 struct ext4_block block;
796                 struct jbd_bhdr *header;
797                 /* If we are not scanning for the last
798                  * valid transaction in the journal,
799                  * we will stop when we reach the end of
800                  * the journal.*/
801                 if (action != ACTION_SCAN)
802                         if (this_trans_id > info->last_trans_id) {
803                                 log_end = true;
804                                 continue;
805                         }
806
807                 r = jbd_block_get(jbd_fs, &block, this_block);
808                 if (r != EOK)
809                         break;
810
811                 header = (struct jbd_bhdr *)block.data;
812                 /* This block does not have a valid magic number,
813                  * so we have reached the end of the journal.*/
814                 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
815                         jbd_block_set(jbd_fs, &block);
816                         log_end = true;
817                         continue;
818                 }
819
820                 /* If the transaction id we found is not expected,
821                  * we may have reached the end of the journal.
822                  *
823                  * If we are not scanning the journal, something
824                  * bad might have taken place. :-( */
825                 if (jbd_get32(header, sequence) != this_trans_id) {
826                         if (action != ACTION_SCAN)
827                                 r = EIO;
828
829                         jbd_block_set(jbd_fs, &block);
830                         log_end = true;
831                         continue;
832                 }
833
834                 switch (jbd_get32(header, blocktype)) {
835                 case JBD_DESCRIPTOR_BLOCK:
836                         ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
837                                             "trans_id: %" PRIu32"\n",
838                                             this_block, this_trans_id);
839                         if (action == ACTION_RECOVER) {
840                                 struct replay_arg replay_arg;
841                                 replay_arg.info = info;
842                                 replay_arg.this_block = &this_block;
843                                 replay_arg.this_trans_id = this_trans_id;
844
845                                 jbd_replay_descriptor_block(jbd_fs,
846                                                 header, &replay_arg);
847                         } else
848                                 jbd_debug_descriptor_block(jbd_fs,
849                                                 header, &this_block);
850
851                         break;
852                 case JBD_COMMIT_BLOCK:
853                         ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
854                                             "trans_id: %" PRIu32"\n",
855                                             this_block, this_trans_id);
856                         /* This is the end of a transaction,
857                          * we may now proceed to the next transaction.
858                          */
859                         this_trans_id++;
860                         break;
861                 case JBD_REVOKE_BLOCK:
862                         ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
863                                             "trans_id: %" PRIu32"\n",
864                                             this_block, this_trans_id);
865                         if (action == ACTION_REVOKE) {
866                                 info->this_trans_id = this_trans_id;
867                                 jbd_build_revoke_tree(jbd_fs,
868                                                 header, info);
869                         }
870                         break;
871                 default:
872                         log_end = true;
873                         break;
874                 }
875                 jbd_block_set(jbd_fs, &block);
876                 this_block++;
877                 wrap(sb, this_block);
878                 if (this_block == start_block)
879                         log_end = true;
880
881         }
882         ext4_dbg(DEBUG_JBD, "End of journal.\n");
883         if (r == EOK && action == ACTION_SCAN) {
884                 /* We have finished scanning the journal. */
885                 info->start_trans_id = start_trans_id;
886                 if (this_trans_id > start_trans_id)
887                         info->last_trans_id = this_trans_id - 1;
888                 else
889                         info->last_trans_id = this_trans_id;
890         }
891
892         return r;
893 }
894
895 /**@brief  Replay journal.
896  * @param  jbd_fs jbd filesystem
897  * @return standard error code*/
898 int jbd_recover(struct jbd_fs *jbd_fs)
899 {
900         int r;
901         struct recover_info info;
902         struct jbd_sb *sb = &jbd_fs->sb;
903         if (!sb->start)
904                 return EOK;
905
906         RB_INIT(&info.revoke_root);
907
908         r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
909         if (r != EOK)
910                 return r;
911
912         r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
913         if (r != EOK)
914                 return r;
915
916         r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
917         if (r == EOK) {
918                 /* If we successfully replay the journal,
919                  * clear EXT4_FINCOM_RECOVER flag on the
920                  * ext4 superblock, and set the start of
921                  * journal to 0.*/
922                 uint32_t features_incompatible =
923                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
924                                    features_incompatible);
925                 jbd_set32(&jbd_fs->sb, start, 0);
926                 features_incompatible &= ~EXT4_FINCOM_RECOVER;
927                 ext4_set32(&jbd_fs->inode_ref.fs->sb,
928                            features_incompatible,
929                            features_incompatible);
930                 jbd_fs->dirty = true;
931                 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
932                                   &jbd_fs->inode_ref.fs->sb);
933         }
934         jbd_destroy_revoke_tree(&info);
935         return r;
936 }
937
938 void jbd_journal_write_sb(struct jbd_journal *journal)
939 {
940         struct jbd_fs *jbd_fs = journal->jbd_fs;
941         jbd_set32(&jbd_fs->sb, start, journal->start);
942         jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
943         jbd_fs->dirty = true;
944 }
945
946 /**@brief  Start accessing the journal.
947  * @param  jbd_fs jbd filesystem
948  * @param  journal current journal session
949  * @return standard error code*/
950 int jbd_journal_start(struct jbd_fs *jbd_fs,
951                       struct jbd_journal *journal)
952 {
953         int r;
954         uint32_t features_incompatible =
955                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
956                                    features_incompatible);
957         features_incompatible |= EXT4_FINCOM_RECOVER;
958         ext4_set32(&jbd_fs->inode_ref.fs->sb,
959                         features_incompatible,
960                         features_incompatible);
961         r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
962                         &jbd_fs->inode_ref.fs->sb);
963         if (r != EOK)
964                 return r;
965
966         journal->first = jbd_get32(&jbd_fs->sb, first);
967         journal->start = journal->first;
968         journal->last = journal->first;
969         journal->trans_id = 1;
970         journal->alloc_trans_id = 1;
971
972         journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
973
974         TAILQ_INIT(&journal->trans_queue);
975         TAILQ_INIT(&journal->cp_queue);
976         journal->jbd_fs = jbd_fs;
977         jbd_journal_write_sb(journal);
978         return jbd_write_sb(jbd_fs);
979 }
980
981 /**@brief  Stop accessing the journal.
982  * @param  journal current journal session
983  * @return standard error code*/
984 int jbd_journal_stop(struct jbd_journal *journal)
985 {
986         int r;
987         struct jbd_fs *jbd_fs = journal->jbd_fs;
988         uint32_t features_incompatible;
989
990         /* Commit all the transactions to the journal.*/
991         jbd_journal_commit_all(journal);
992         /* Make sure that journalled content have reached
993          * the disk.*/
994         ext4_block_cache_flush(jbd_fs->inode_ref.fs->bdev);
995
996         features_incompatible =
997                 ext4_get32(&jbd_fs->inode_ref.fs->sb,
998                            features_incompatible);
999         features_incompatible &= ~EXT4_FINCOM_RECOVER;
1000         ext4_set32(&jbd_fs->inode_ref.fs->sb,
1001                         features_incompatible,
1002                         features_incompatible);
1003         r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1004                         &jbd_fs->inode_ref.fs->sb);
1005         if (r != EOK)
1006                 return r;
1007
1008         journal->start = 0;
1009         journal->trans_id = 0;
1010         jbd_journal_write_sb(journal);
1011         return jbd_write_sb(journal->jbd_fs);
1012 }
1013
1014 /**@brief  Allocate a block in the journal.
1015  * @param  journal current journal session
1016  * @param  trans transaction
1017  * @return allocated block address*/
1018 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
1019                                         struct jbd_trans *trans)
1020 {
1021         uint32_t start_block;
1022
1023         start_block = journal->last++;
1024         trans->alloc_blocks++;
1025         wrap(&journal->jbd_fs->sb, journal->last);
1026         
1027         /* If there is no space left, flush all journalled
1028          * blocks to disk first.*/
1029         if (journal->last == journal->start)
1030                 ext4_block_cache_flush(journal->jbd_fs->inode_ref.fs->bdev);
1031
1032         return start_block;
1033 }
1034
1035 /**@brief  Allocate a new transaction
1036  * @param  journal current journal session
1037  * @return transaction allocated*/
1038 struct jbd_trans *
1039 jbd_journal_new_trans(struct jbd_journal *journal)
1040 {
1041         struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
1042         if (!trans)
1043                 return NULL;
1044
1045         /* We will assign a trans_id to this transaction,
1046          * once it has been committed.*/
1047         trans->journal = journal;
1048         trans->error = EOK;
1049         return trans;
1050 }
1051
1052 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1053                           struct ext4_buf *buf __unused,
1054                           int res,
1055                           void *arg);
1056
1057 /**@brief  Add block to a transaction
1058  * @param  trans transaction
1059  * @param  block block descriptor
1060  * @return standard error code*/
1061 int jbd_trans_add_block(struct jbd_trans *trans,
1062                         struct ext4_block *block)
1063 {
1064         struct jbd_buf *buf;
1065         /* We do not need to add those unmodified buffer to
1066          * a transaction. */
1067         if (!ext4_bcache_test_flag(block->buf, BC_DIRTY))
1068                 return EOK;
1069
1070         buf = calloc(1, sizeof(struct jbd_buf));
1071         if (!buf)
1072                 return ENOMEM;
1073
1074         buf->trans = trans;
1075         buf->block = *block;
1076         ext4_bcache_inc_ref(block->buf);
1077
1078         /* If the content reach the disk, notify us
1079          * so that we may do a checkpoint. */
1080         block->buf->end_write = jbd_trans_end_write;
1081         block->buf->end_write_arg = trans;
1082
1083         trans->data_cnt++;
1084         LIST_INSERT_HEAD(&trans->buf_list, buf, buf_node);
1085         return EOK;
1086 }
1087
1088 /**@brief  Add block to be revoked to a transaction
1089  * @param  trans transaction
1090  * @param  lba logical block address
1091  * @return standard error code*/
1092 int jbd_trans_revoke_block(struct jbd_trans *trans,
1093                            ext4_fsblk_t lba)
1094 {
1095         struct jbd_revoke_rec *rec =
1096                 calloc(1, sizeof(struct jbd_revoke_rec));
1097         if (!rec)
1098                 return ENOMEM;
1099
1100         rec->lba = lba;
1101         LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
1102         return EOK;
1103 }
1104
1105 /**@brief  Free a transaction
1106  * @param  journal current journal session
1107  * @param  trans transaction
1108  * @param  abort discard all the modifications on the block?
1109  * @return standard error code*/
1110 void jbd_journal_free_trans(struct jbd_journal *journal,
1111                             struct jbd_trans *trans,
1112                             bool abort)
1113 {
1114         struct jbd_buf *jbd_buf, *tmp;
1115         struct jbd_revoke_rec *rec, *tmp2;
1116         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1117         LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
1118                           tmp) {
1119                 if (abort) {
1120                         ext4_bcache_clear_dirty(jbd_buf->block.buf);
1121                         ext4_block_set(fs->bdev, &jbd_buf->block);
1122                 }
1123
1124                 LIST_REMOVE(jbd_buf, buf_node);
1125                 free(jbd_buf);
1126         }
1127         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1128                           tmp2) {
1129                 LIST_REMOVE(rec, revoke_node);
1130                 free(rec);
1131         }
1132
1133         free(trans);
1134 }
1135
1136 /**@brief  Write commit block for a transaction
1137  * @param  trans transaction
1138  * @return standard error code*/
1139 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
1140 {
1141         int rc;
1142         struct jbd_commit_header *header;
1143         uint32_t commit_iblock = 0;
1144         struct ext4_block commit_block;
1145         struct jbd_journal *journal = trans->journal;
1146
1147         commit_iblock = jbd_journal_alloc_block(journal, trans);
1148         rc = jbd_block_get_noread(journal->jbd_fs,
1149                         &commit_block, commit_iblock);
1150         if (rc != EOK)
1151                 return rc;
1152
1153         header = (struct jbd_commit_header *)commit_block.data;
1154         jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
1155         jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
1156         jbd_set32(&header->header, sequence, trans->trans_id);
1157
1158         ext4_bcache_set_dirty(commit_block.buf);
1159         rc = jbd_block_set(journal->jbd_fs, &commit_block);
1160         if (rc != EOK)
1161                 return rc;
1162
1163         return EOK;
1164 }
1165
1166 /**@brief  Write descriptor block for a transaction
1167  * @param  journal current journal session
1168  * @param  trans transaction
1169  * @return standard error code*/
1170 static int jbd_journal_prepare(struct jbd_journal *journal,
1171                                struct jbd_trans *trans)
1172 {
1173         int rc = EOK, i = 0;
1174         int32_t tag_tbl_size;
1175         uint32_t desc_iblock = 0;
1176         uint32_t data_iblock = 0;
1177         char *tag_start = NULL, *tag_ptr = NULL;
1178         struct jbd_buf *jbd_buf;
1179         struct ext4_block desc_block, data_block;
1180
1181         LIST_FOREACH(jbd_buf, &trans->buf_list, buf_node) {
1182                 struct tag_info tag_info;
1183                 bool uuid_exist = false;
1184 again:
1185                 if (!desc_iblock) {
1186                         struct jbd_bhdr *bhdr;
1187                         desc_iblock = jbd_journal_alloc_block(journal, trans);
1188                         rc = jbd_block_get_noread(journal->jbd_fs,
1189                                            &desc_block, desc_iblock);
1190                         if (rc != EOK)
1191                                 break;
1192
1193                         ext4_bcache_set_dirty(desc_block.buf);
1194
1195                         bhdr = (struct jbd_bhdr *)desc_block.data;
1196                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1197                         jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
1198                         jbd_set32(bhdr, sequence, trans->trans_id);
1199
1200                         tag_start = (char *)(bhdr + 1);
1201                         tag_ptr = tag_start;
1202                         uuid_exist = true;
1203                         tag_tbl_size = journal->block_size -
1204                                 sizeof(struct jbd_bhdr);
1205
1206                         if (!trans->start_iblock)
1207                                 trans->start_iblock = desc_iblock;
1208
1209                 }
1210                 tag_info.block = jbd_buf->block.lb_id;
1211                 tag_info.uuid_exist = uuid_exist;
1212                 if (i == trans->data_cnt - 1)
1213                         tag_info.last_tag = true;
1214
1215                 if (uuid_exist)
1216                         memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
1217                                         UUID_SIZE);
1218
1219                 rc = jbd_write_block_tag(journal->jbd_fs,
1220                                 tag_ptr,
1221                                 tag_tbl_size,
1222                                 &tag_info);
1223                 if (rc != EOK) {
1224                         jbd_block_set(journal->jbd_fs, &desc_block);
1225                         desc_iblock = 0;
1226                         goto again;
1227                 }
1228
1229                 data_iblock = jbd_journal_alloc_block(journal, trans);
1230                 rc = jbd_block_get_noread(journal->jbd_fs,
1231                                 &data_block, data_iblock);
1232                 if (rc != EOK)
1233                         break;
1234
1235                 ext4_bcache_set_dirty(data_block.buf);
1236
1237                 memcpy(data_block.data, jbd_buf->block.data,
1238                         journal->block_size);
1239
1240                 rc = jbd_block_set(journal->jbd_fs, &data_block);
1241                 if (rc != EOK)
1242                         break;
1243
1244                 tag_ptr += tag_info.tag_bytes;
1245                 tag_tbl_size -= tag_info.tag_bytes;
1246
1247                 i++;
1248         }
1249         if (rc == EOK && desc_iblock)
1250                 jbd_block_set(journal->jbd_fs, &desc_block);
1251
1252         return rc;
1253 }
1254
1255 /**@brief  Write revoke block for a transaction
1256  * @param  journal current journal session
1257  * @param  trans transaction
1258  * @return standard error code*/
1259 static int
1260 jbd_journal_prepare_revoke(struct jbd_journal *journal,
1261                            struct jbd_trans *trans)
1262 {
1263         int rc = EOK, i = 0;
1264         int32_t tag_tbl_size;
1265         uint32_t desc_iblock = 0;
1266         char *blocks_entry = NULL;
1267         struct jbd_revoke_rec *rec, *tmp;
1268         struct ext4_block desc_block;
1269         struct jbd_revoke_header *header = NULL;
1270         int32_t record_len = 4;
1271
1272         if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1273                                      JBD_FEATURE_INCOMPAT_64BIT))
1274                 record_len = 8;
1275
1276         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1277                           tmp) {
1278 again:
1279                 if (!desc_iblock) {
1280                         struct jbd_bhdr *bhdr;
1281                         desc_iblock = jbd_journal_alloc_block(journal, trans);
1282                         rc = jbd_block_get_noread(journal->jbd_fs,
1283                                            &desc_block, desc_iblock);
1284                         if (rc != EOK) {
1285                                 break;
1286                         }
1287
1288                         ext4_bcache_set_dirty(desc_block.buf);
1289
1290                         bhdr = (struct jbd_bhdr *)desc_block.data;
1291                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1292                         jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
1293                         jbd_set32(bhdr, sequence, trans->trans_id);
1294                         
1295                         header = (struct jbd_revoke_header *)bhdr;
1296                         blocks_entry = (char *)(header + 1);
1297                         tag_tbl_size = journal->block_size -
1298                                 sizeof(struct jbd_revoke_header);
1299
1300                         if (!trans->start_iblock)
1301                                 trans->start_iblock = desc_iblock;
1302
1303                 }
1304
1305                 if (tag_tbl_size < record_len) {
1306                         jbd_set32(header, count,
1307                                   journal->block_size - tag_tbl_size);
1308                         jbd_block_set(journal->jbd_fs, &desc_block);
1309                         desc_iblock = 0;
1310                         header = NULL;
1311                         goto again;
1312                 }
1313                 if (record_len == 8) {
1314                         uint64_t *blocks =
1315                                 (uint64_t *)blocks_entry;
1316                         *blocks = to_be64(rec->lba);
1317                 } else {
1318                         uint32_t *blocks =
1319                                 (uint32_t *)blocks_entry;
1320                         *blocks = to_be32(rec->lba);
1321                 }
1322                 blocks_entry += record_len;
1323                 tag_tbl_size -= record_len;
1324
1325                 i++;
1326         }
1327         if (rc == EOK && desc_iblock) {
1328                 if (header != NULL)
1329                         jbd_set32(header, count,
1330                                   journal->block_size - tag_tbl_size);
1331
1332                 jbd_block_set(journal->jbd_fs, &desc_block);
1333         }
1334
1335         return rc;
1336 }
1337
1338 /**@brief  Submit the transaction to transaction queue.
1339  * @param  journal current journal session
1340  * @param  trans transaction*/
1341 void
1342 jbd_journal_submit_trans(struct jbd_journal *journal,
1343                          struct jbd_trans *trans)
1344 {
1345         TAILQ_INSERT_TAIL(&journal->trans_queue,
1346                           trans,
1347                           trans_node);
1348 }
1349
1350 /**@brief  Put references of block descriptors in a transaction.
1351  * @param  journal current journal session
1352  * @param  trans transaction*/
1353 void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
1354 {
1355         struct jbd_buf *jbd_buf, *tmp;
1356         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1357         LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
1358                         tmp) {
1359                 struct ext4_block block = jbd_buf->block;
1360                 ext4_block_set(fs->bdev, &block);
1361         }
1362 }
1363
1364 /**@brief  Update the start block of the journal when
1365  *         all the contents in a transaction reach the disk.*/
1366 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1367                           struct ext4_buf *buf __unused,
1368                           int res,
1369                           void *arg)
1370 {
1371         struct jbd_trans *trans = arg;
1372         struct jbd_journal *journal = trans->journal;
1373         bool first_in_queue =
1374                 trans == TAILQ_FIRST(&journal->cp_queue);
1375         if (res != EOK)
1376                 trans->error = res;
1377
1378         trans->written_cnt++;
1379         if (trans->written_cnt == trans->data_cnt) {
1380                 TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
1381
1382                 if (first_in_queue) {
1383                         journal->start = trans->start_iblock +
1384                                 trans->alloc_blocks;
1385                         wrap(&journal->jbd_fs->sb, journal->start);
1386                         journal->trans_id = trans->trans_id + 1;
1387                 }
1388                 jbd_journal_free_trans(journal, trans, false);
1389
1390                 if (first_in_queue) {
1391                         while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1392                                 if (!trans->data_cnt) {
1393                                         TAILQ_REMOVE(&journal->cp_queue,
1394                                                      trans,
1395                                                      trans_node);
1396                                         journal->start = trans->start_iblock +
1397                                                 trans->alloc_blocks;
1398                                         wrap(&journal->jbd_fs->sb, journal->start);
1399                                         journal->trans_id = trans->trans_id + 1;
1400                                         jbd_journal_free_trans(journal,
1401                                                                trans, false);
1402                                 } else {
1403                                         journal->start = trans->start_iblock;
1404                                         wrap(&journal->jbd_fs->sb, journal->start);
1405                                         journal->trans_id = trans->trans_id;
1406                                         break;
1407                                 }
1408                         }
1409                         jbd_journal_write_sb(journal);
1410                         jbd_write_sb(journal->jbd_fs);
1411                 }
1412         }
1413 }
1414
1415 /**@brief  Commit a transaction to the journal immediately.
1416  * @param  journal current journal session
1417  * @param  trans transaction
1418  * @return standard error code*/
1419 int jbd_journal_commit_trans(struct jbd_journal *journal,
1420                              struct jbd_trans *trans)
1421 {
1422         int rc = EOK;
1423         uint32_t last = journal->last;
1424
1425         trans->trans_id = journal->alloc_trans_id;
1426         rc = jbd_journal_prepare(journal, trans);
1427         if (rc != EOK)
1428                 goto Finish;
1429
1430         rc = jbd_journal_prepare_revoke(journal, trans);
1431         if (rc != EOK)
1432                 goto Finish;
1433
1434         rc = jbd_trans_write_commit_block(trans);
1435         if (rc != EOK)
1436                 goto Finish;
1437
1438         journal->alloc_trans_id++;
1439         if (TAILQ_EMPTY(&journal->cp_queue)) {
1440                 if (trans->data_cnt) {
1441                         journal->start = trans->start_iblock;
1442                         wrap(&journal->jbd_fs->sb, journal->start);
1443                         journal->trans_id = trans->trans_id;
1444                         jbd_journal_write_sb(journal);
1445                         jbd_write_sb(journal->jbd_fs);
1446                         TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
1447                                         trans_node);
1448                         jbd_journal_cp_trans(journal, trans);
1449                 } else {
1450                         journal->start = trans->start_iblock +
1451                                 trans->alloc_blocks;
1452                         wrap(&journal->jbd_fs->sb, journal->start);
1453                         journal->trans_id = trans->trans_id + 1;
1454                         jbd_journal_write_sb(journal);
1455                         jbd_journal_free_trans(journal, trans, false);
1456                 }
1457         } else {
1458                 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
1459                                 trans_node);
1460                 if (trans->data_cnt)
1461                         jbd_journal_cp_trans(journal, trans);
1462
1463         }
1464 Finish:
1465         if (rc != EOK) {
1466                 journal->last = last;
1467                 jbd_journal_free_trans(journal, trans, true);
1468         }
1469         return rc;
1470 }
1471
1472 /**@brief  Commit one transaction on transaction queue
1473  *         to the journal.
1474  * @param  journal current journal session.*/
1475 void jbd_journal_commit_one(struct jbd_journal *journal)
1476 {
1477         struct jbd_trans *trans;
1478
1479         if ((trans = TAILQ_FIRST(&journal->trans_queue))) {
1480                 TAILQ_REMOVE(&journal->trans_queue, trans, trans_node);
1481                 jbd_journal_commit_trans(journal, trans);
1482         }
1483 }
1484
1485 /**@brief  Commit all the transactions on transaction queue
1486  *         to the journal.
1487  * @param  journal current journal session.*/
1488 void jbd_journal_commit_all(struct jbd_journal *journal)
1489 {
1490         while (!TAILQ_EMPTY(&journal->trans_queue)) {
1491                 jbd_journal_commit_one(journal);
1492         }
1493 }
1494
1495 /**
1496  * @}
1497  */