a97bbe41b0c15313272eec335ee50695f3b2a8e3
[lwext4.git] / lwext4 / ext4_journal.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * - Redistributions of source code must retain the above copyright
11  *   notice, this list of conditions and the following disclaimer.
12  * - Redistributions in binary form must reproduce the above copyright
13  *   notice, this list of conditions and the following disclaimer in the
14  *   documentation and/or other materials provided with the distribution.
15  * - The name of the author may not be used to endorse or promote products
16  *   derived from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 /** @addtogroup lwext4
31  * @{
32  */
33 /**
34  * @file  ext4_journal.c
35  * @brief Journal handle functions
36  */
37
38 #include "ext4_config.h"
39 #include "ext4_types.h"
40 #include "ext4_fs.h"
41 #include "ext4_super.h"
42 #include "ext4_journal.h"
43 #include "ext4_errno.h"
44 #include "ext4_blockdev.h"
45 #include "ext4_crc32.h"
46 #include "ext4_debug.h"
47
48 #include <string.h>
49 #include <stdlib.h>
50
51 /**@brief  Revoke entry during journal replay.*/
52 struct revoke_entry {
53         /**@brief  Block number not to be replayed.*/
54         ext4_fsblk_t block;
55
56         /**@brief  For any transaction id smaller
57          *         than trans_id, records of @block
58          *         in those transactions should not
59          *         be replayed.*/
60         uint32_t trans_id;
61
62         /**@brief  Revoke tree node.*/
63         RB_ENTRY(revoke_entry) revoke_node;
64 };
65
66 /**@brief  Valid journal replay information.*/
67 struct recover_info {
68         /**@brief  Starting transaction id.*/
69         uint32_t start_trans_id;
70
71         /**@brief  Ending transaction id.*/
72         uint32_t last_trans_id;
73
74         /**@brief  Used as internal argument.*/
75         uint32_t this_trans_id;
76
77         /**@brief  RB-Tree storing revoke entries.*/
78         RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
79 };
80
81 /**@brief  Journal replay internal arguments.*/
82 struct replay_arg {
83         /**@brief  Journal replay information.*/
84         struct recover_info *info;
85
86         /**@brief  Current block we are on.*/
87         uint32_t *this_block;
88
89         /**@brief  Current trans_id we are on.*/
90         uint32_t this_trans_id;
91 };
92
93 static int
94 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
95 {
96         if (a->block > b->block)
97                 return 1;
98         else if (a->block < b->block)
99                 return -1;
100         return 0;
101 }
102
103 static int
104 jbd_block_rec_cmp(struct jbd_block_rec *a, struct jbd_block_rec *b)
105 {
106         if (a->lba > b->lba)
107                 return 1;
108         else if (a->lba < b->lba)
109                 return -1;
110         return 0;
111 }
112
113 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
114                      jbd_revoke_entry_cmp, static inline)
115 RB_GENERATE_INTERNAL(jbd_block, jbd_block_rec, block_rec_node,
116                      jbd_block_rec_cmp, static inline)
117
118 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
119 #define jbd_free_revoke_entry(addr) free(addr)
120
121 static int jbd_has_csum(struct jbd_sb *jbd_sb)
122 {
123         if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V2))
124                 return 2;
125
126         if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V3))
127                 return 3;
128
129         return 0;
130 }
131
132 #if CONFIG_META_CSUM_ENABLE
133 static uint32_t jbd_sb_csum(struct jbd_sb *jbd_sb)
134 {
135         uint32_t checksum = 0;
136
137         if (jbd_has_csum(jbd_sb)) {
138                 uint32_t orig_checksum = jbd_sb->checksum;
139                 jbd_set32(jbd_sb, checksum, 0);
140                 /* Calculate crc32c checksum against tho whole superblock */
141                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_sb,
142                                 JBD_SUPERBLOCK_SIZE);
143                 jbd_sb->checksum = orig_checksum;
144         }
145         return checksum;
146 }
147 #else
148 #define jbd_sb_csum(...) 0
149 #endif
150
151 static void jbd_sb_csum_set(struct jbd_sb *jbd_sb)
152 {
153         if (!jbd_has_csum(jbd_sb))
154                 return;
155
156         jbd_set32(jbd_sb, checksum, jbd_sb_csum(jbd_sb));
157 }
158
159 #if CONFIG_META_CSUM_ENABLE
160 static bool
161 jbd_verify_sb_csum(struct jbd_sb *jbd_sb)
162 {
163         if (!jbd_has_csum(jbd_sb))
164                 return true;
165
166         return jbd_sb_csum(jbd_sb) == jbd_get32(jbd_sb, checksum);
167 }
168 #else
169 #define jbd_verify_sb_csum(...) true
170 #endif
171
172 #if CONFIG_META_CSUM_ENABLE
173 static uint32_t jbd_meta_csum(struct jbd_fs *jbd_fs,
174                               struct jbd_bhdr *bhdr)
175 {
176         uint32_t checksum = 0;
177
178         if (jbd_has_csum(&jbd_fs->sb)) {
179                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
180                 struct jbd_block_tail *tail =
181                         (struct jbd_block_tail *)((char *)bhdr + block_size -
182                                 sizeof(struct jbd_block_tail));
183                 uint32_t orig_checksum = tail->checksum;
184                 tail->checksum = 0;
185
186                 /* First calculate crc32c checksum against fs uuid */
187                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
188                                        sizeof(jbd_fs->sb.uuid));
189                 /* Calculate crc32c checksum against tho whole block */
190                 checksum = ext4_crc32c(checksum, bhdr,
191                                 block_size);
192                 tail->checksum = orig_checksum;
193         }
194         return checksum;
195 }
196 #else
197 #define jbd_meta_csum(...) 0
198 #endif
199
200 static void jbd_meta_csum_set(struct jbd_fs *jbd_fs,
201                               struct jbd_bhdr *bhdr)
202 {
203         uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
204         struct jbd_block_tail *tail = (struct jbd_block_tail *)
205                                 ((char *)bhdr + block_size -
206                                 sizeof(struct jbd_block_tail));
207         if (!jbd_has_csum(&jbd_fs->sb))
208                 return;
209
210         tail->checksum = to_be32(jbd_meta_csum(jbd_fs, bhdr));
211 }
212
213 #if CONFIG_META_CSUM_ENABLE
214 static bool
215 jbd_verify_meta_csum(struct jbd_fs *jbd_fs,
216                      struct jbd_bhdr *bhdr)
217 {
218         uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
219         struct jbd_block_tail *tail = (struct jbd_block_tail *)
220                                 ((char *)bhdr + block_size -
221                                 sizeof(struct jbd_block_tail));
222         if (!jbd_has_csum(&jbd_fs->sb))
223                 return true;
224
225         return jbd_meta_csum(jbd_fs, bhdr) == to_be32(tail->checksum);
226 }
227 #else
228 #define jbd_verify_meta_csum(...) true
229 #endif
230
231 #if CONFIG_META_CSUM_ENABLE
232 static uint32_t jbd_commit_csum(struct jbd_fs *jbd_fs,
233                               struct jbd_commit_header *header)
234 {
235         uint32_t checksum = 0;
236
237         if (jbd_has_csum(&jbd_fs->sb)) {
238                 uint32_t orig_checksum_type = header->chksum_type,
239                          orig_checksum_size = header->chksum_size,
240                          orig_checksum = header->chksum[0];
241                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
242                 header->chksum_type = 0;
243                 header->chksum_size = 0;
244                 header->chksum[0] = 0;
245
246                 /* First calculate crc32c checksum against fs uuid */
247                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
248                                        sizeof(jbd_fs->sb.uuid));
249                 /* Calculate crc32c checksum against tho whole block */
250                 checksum = ext4_crc32c(checksum, header,
251                                 block_size);
252
253                 header->chksum_type = orig_checksum_type;
254                 header->chksum_size = orig_checksum_size;
255                 header->chksum[0] = orig_checksum;
256         }
257         return checksum;
258 }
259 #else
260 #define jbd_commit_csum(...) 0
261 #endif
262
263 static void jbd_commit_csum_set(struct jbd_fs *jbd_fs,
264                               struct jbd_commit_header *header)
265 {
266         if (!jbd_has_csum(&jbd_fs->sb))
267                 return;
268
269         header->chksum_type = 0;
270         header->chksum_size = 0;
271         header->chksum[0] = jbd_commit_csum(jbd_fs, header);
272 }
273
274 #if CONFIG_META_CSUM_ENABLE
275 static bool jbd_verify_commit_csum(struct jbd_fs *jbd_fs,
276                                    struct jbd_commit_header *header)
277 {
278         if (!jbd_has_csum(&jbd_fs->sb))
279                 return true;
280
281         return header->chksum[0] == to_be32(jbd_commit_csum(jbd_fs,
282                                             header));
283 }
284 #else
285 #define jbd_verify_commit_csum(...) true
286 #endif
287
288 #if CONFIG_META_CSUM_ENABLE
289 static uint32_t jbd_block_csum(struct jbd_fs *jbd_fs, const void *buf)
290 {
291         uint32_t checksum = 0;
292
293         if (jbd_has_csum(&jbd_fs->sb)) {
294                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
295                 /* First calculate crc32c checksum against fs uuid */
296                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
297                                        sizeof(jbd_fs->sb.uuid));
298                 /* Calculate crc32c checksum against tho whole block */
299                 checksum = ext4_crc32c(checksum, buf,
300                                 block_size);
301         }
302         return checksum;
303 }
304 #else
305 #define jbd_block_csum(...) 0
306 #endif
307
308 static void jbd_block_tag_csum_set(struct jbd_fs *jbd_fs, void *__tag,
309                                    uint32_t checksum)
310 {
311         int ver = jbd_has_csum(&jbd_fs->sb);
312         if (!ver)
313                 return;
314
315         if (ver == 2) {
316                 struct jbd_block_tag *tag = __tag;
317                 tag->checksum = (uint16_t)to_be32(checksum);
318         } else {
319                 struct jbd_block_tag3 *tag = __tag;
320                 tag->checksum = to_be32(checksum);
321         }
322 }
323
324 /**@brief  Write jbd superblock to disk.
325  * @param  jbd_fs jbd filesystem
326  * @param  s jbd superblock
327  * @return standard error code*/
328 static int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
329 {
330         int rc;
331         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
332         uint64_t offset;
333         ext4_fsblk_t fblock;
334         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
335         if (rc != EOK)
336                 return rc;
337
338         jbd_sb_csum_set(s);
339         offset = fblock * ext4_sb_get_block_size(&fs->sb);
340         return ext4_block_writebytes(fs->bdev, offset, s,
341                                      EXT4_SUPERBLOCK_SIZE);
342 }
343
344 /**@brief  Read jbd superblock from disk.
345  * @param  jbd_fs jbd filesystem
346  * @param  s jbd superblock
347  * @return standard error code*/
348 static int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
349 {
350         int rc;
351         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
352         uint64_t offset;
353         ext4_fsblk_t fblock;
354         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
355         if (rc != EOK)
356                 return rc;
357
358         offset = fblock * ext4_sb_get_block_size(&fs->sb);
359         return ext4_block_readbytes(fs->bdev, offset, s,
360                                     EXT4_SUPERBLOCK_SIZE);
361 }
362
363 /**@brief  Verify jbd superblock.
364  * @param  sb jbd superblock
365  * @return true if jbd superblock is valid */
366 static bool jbd_verify_sb(struct jbd_sb *sb)
367 {
368         struct jbd_bhdr *header = &sb->header;
369         if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
370                 return false;
371
372         if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
373             jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
374                 return false;
375
376         return jbd_verify_sb_csum(sb);
377 }
378
379 /**@brief  Write back dirty jbd superblock to disk.
380  * @param  jbd_fs jbd filesystem
381  * @return standard error code*/
382 static int jbd_write_sb(struct jbd_fs *jbd_fs)
383 {
384         int rc = EOK;
385         if (jbd_fs->dirty) {
386                 rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
387                 if (rc != EOK)
388                         return rc;
389
390                 jbd_fs->dirty = false;
391         }
392         return rc;
393 }
394
395 /**@brief  Get reference to jbd filesystem.
396  * @param  fs Filesystem to load journal of
397  * @param  jbd_fs jbd filesystem
398  * @return standard error code*/
399 int jbd_get_fs(struct ext4_fs *fs,
400                struct jbd_fs *jbd_fs)
401 {
402         int rc;
403         uint32_t journal_ino;
404
405         memset(jbd_fs, 0, sizeof(struct jbd_fs));
406         /* See if there is journal inode on this filesystem.*/
407         /* FIXME: detection on existance ofbkejournal bdev is
408          *        missing.*/
409         journal_ino = ext4_get32(&fs->sb, journal_inode_number);
410
411         rc = ext4_fs_get_inode_ref(fs,
412                                    journal_ino,
413                                    &jbd_fs->inode_ref);
414         if (rc != EOK) {
415                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
416                 return rc;
417         }
418         rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
419         if (rc != EOK) {
420                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
421                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
422                 return rc;
423         }
424         if (!jbd_verify_sb(&jbd_fs->sb)) {
425                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
426                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
427                 rc = EIO;
428         }
429
430         return rc;
431 }
432
433 /**@brief  Put reference of jbd filesystem.
434  * @param  jbd_fs jbd filesystem
435  * @return standard error code*/
436 int jbd_put_fs(struct jbd_fs *jbd_fs)
437 {
438         int rc = EOK;
439         rc = jbd_write_sb(jbd_fs);
440
441         ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
442         return rc;
443 }
444
445 /**@brief  Data block lookup helper.
446  * @param  jbd_fs jbd filesystem
447  * @param  iblock block index
448  * @param  fblock logical block address
449  * @return standard error code*/
450 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
451                    ext4_lblk_t iblock,
452                    ext4_fsblk_t *fblock)
453 {
454         int rc = ext4_fs_get_inode_dblk_idx(
455                         &jbd_fs->inode_ref,
456                         iblock,
457                         fblock,
458                         false);
459         return rc;
460 }
461
462 /**@brief   jbd block get function (through cache).
463  * @param   jbd_fs jbd filesystem
464  * @param   block block descriptor
465  * @param   fblock jbd logical block address
466  * @return  standard error code*/
467 static int jbd_block_get(struct jbd_fs *jbd_fs,
468                   struct ext4_block *block,
469                   ext4_fsblk_t fblock)
470 {
471         /* TODO: journal device. */
472         int rc;
473         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
474
475         /* Lookup the logical block address of
476          * fblock.*/
477         rc = jbd_inode_bmap(jbd_fs, iblock,
478                             &fblock);
479         if (rc != EOK)
480                 return rc;
481
482         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
483         rc = ext4_block_get(bdev, block, fblock);
484
485         /* If succeeded, mark buffer as BC_FLUSH to indicate
486          * that data should be written to disk immediately.*/
487         if (rc == EOK) {
488                 ext4_bcache_set_flag(block->buf, BC_FLUSH);
489                 /* As we don't want to occupy too much space
490                  * in block cache, we set this buffer BC_TMP.*/
491                 ext4_bcache_set_flag(block->buf, BC_TMP);
492         }
493
494         return rc;
495 }
496
497 /**@brief   jbd block get function (through cache, don't read).
498  * @param   jbd_fs jbd filesystem
499  * @param   block block descriptor
500  * @param   fblock jbd logical block address
501  * @return  standard error code*/
502 static int jbd_block_get_noread(struct jbd_fs *jbd_fs,
503                          struct ext4_block *block,
504                          ext4_fsblk_t fblock)
505 {
506         /* TODO: journal device. */
507         int rc;
508         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
509         rc = jbd_inode_bmap(jbd_fs, iblock,
510                             &fblock);
511         if (rc != EOK)
512                 return rc;
513
514         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
515         rc = ext4_block_get_noread(bdev, block, fblock);
516         if (rc == EOK)
517                 ext4_bcache_set_flag(block->buf, BC_FLUSH);
518
519         return rc;
520 }
521
522 /**@brief   jbd block set procedure (through cache).
523  * @param   jbd_fs jbd filesystem
524  * @param   block block descriptor
525  * @return  standard error code*/
526 static int jbd_block_set(struct jbd_fs *jbd_fs,
527                   struct ext4_block *block)
528 {
529         return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
530                               block);
531 }
532
533 /**@brief  helper functions to calculate
534  *         block tag size, not including UUID part.
535  * @param  jbd_fs jbd filesystem
536  * @return tag size in bytes*/
537 static int jbd_tag_bytes(struct jbd_fs *jbd_fs)
538 {
539         int size;
540
541         /* It is very easy to deal with the case which
542          * JBD_FEATURE_INCOMPAT_CSUM_V3 is enabled.*/
543         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
544                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
545                 return sizeof(struct jbd_block_tag3);
546
547         size = sizeof(struct jbd_block_tag);
548
549         /* If JBD_FEATURE_INCOMPAT_CSUM_V2 is enabled,
550          * add 2 bytes to size.*/
551         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
552                                      JBD_FEATURE_INCOMPAT_CSUM_V2))
553                 size += sizeof(uint16_t);
554
555         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
556                                      JBD_FEATURE_INCOMPAT_64BIT))
557                 return size;
558
559         /* If block number is 4 bytes in size,
560          * minus 4 bytes from size */
561         return size - sizeof(uint32_t);
562 }
563
564 /**@brief  Tag information. */
565 struct tag_info {
566         /**@brief  Tag size in bytes, including UUID part.*/
567         int tag_bytes;
568
569         /**@brief  block number stored in this tag.*/
570         ext4_fsblk_t block;
571
572         /**@brief  whether UUID part exists or not.*/
573         bool uuid_exist;
574
575         /**@brief  UUID content if UUID part exists.*/
576         uint8_t uuid[UUID_SIZE];
577
578         /**@brief  Is this the last tag? */
579         bool last_tag;
580
581         /**@brief  crc32c checksum. */
582         uint32_t checksum;
583 };
584
585 /**@brief  Extract information from a block tag.
586  * @param  __tag pointer to the block tag
587  * @param  tag_bytes block tag size of this jbd filesystem
588  * @param  remaining size in buffer containing the block tag
589  * @param  tag_info information of this tag.
590  * @return  EOK when succeed, otherwise return EINVAL.*/
591 static int
592 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
593                       void *__tag,
594                       int tag_bytes,
595                       int32_t remain_buf_size,
596                       struct tag_info *tag_info)
597 {
598         char *uuid_start;
599         tag_info->tag_bytes = tag_bytes;
600         tag_info->uuid_exist = false;
601         tag_info->last_tag = false;
602
603         /* See whether it is possible to hold a valid block tag.*/
604         if (remain_buf_size - tag_bytes < 0)
605                 return EINVAL;
606
607         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
608                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
609                 struct jbd_block_tag3 *tag = __tag;
610                 tag_info->block = jbd_get32(tag, blocknr);
611                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
612                                              JBD_FEATURE_INCOMPAT_64BIT))
613                          tag_info->block |=
614                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
615
616                 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
617                         tag_info->block = 0;
618
619                 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
620                         /* See whether it is possible to hold UUID part.*/
621                         if (remain_buf_size - tag_bytes < UUID_SIZE)
622                                 return EINVAL;
623
624                         uuid_start = (char *)tag + tag_bytes;
625                         tag_info->uuid_exist = true;
626                         tag_info->tag_bytes += UUID_SIZE;
627                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
628                 }
629
630                 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
631                         tag_info->last_tag = true;
632
633         } else {
634                 struct jbd_block_tag *tag = __tag;
635                 tag_info->block = jbd_get32(tag, blocknr);
636                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
637                                              JBD_FEATURE_INCOMPAT_64BIT))
638                          tag_info->block |=
639                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
640
641                 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
642                         tag_info->block = 0;
643
644                 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
645                         /* See whether it is possible to hold UUID part.*/
646                         if (remain_buf_size - tag_bytes < UUID_SIZE)
647                                 return EINVAL;
648
649                         uuid_start = (char *)tag + tag_bytes;
650                         tag_info->uuid_exist = true;
651                         tag_info->tag_bytes += UUID_SIZE;
652                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
653                 }
654
655                 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
656                         tag_info->last_tag = true;
657
658         }
659         return EOK;
660 }
661
662 /**@brief  Write information to a block tag.
663  * @param  __tag pointer to the block tag
664  * @param  remaining size in buffer containing the block tag
665  * @param  tag_info information of this tag.
666  * @return  EOK when succeed, otherwise return EINVAL.*/
667 static int
668 jbd_write_block_tag(struct jbd_fs *jbd_fs,
669                     void *__tag,
670                     int32_t remain_buf_size,
671                     struct tag_info *tag_info)
672 {
673         char *uuid_start;
674         int tag_bytes = jbd_tag_bytes(jbd_fs);
675
676         tag_info->tag_bytes = tag_bytes;
677
678         /* See whether it is possible to hold a valid block tag.*/
679         if (remain_buf_size - tag_bytes < 0)
680                 return EINVAL;
681
682         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
683                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
684                 struct jbd_block_tag3 *tag = __tag;
685                 memset(tag, 0, sizeof(struct jbd_block_tag3));
686                 jbd_set32(tag, blocknr, tag_info->block);
687                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
688                                              JBD_FEATURE_INCOMPAT_64BIT))
689                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
690
691                 if (tag_info->uuid_exist) {
692                         /* See whether it is possible to hold UUID part.*/
693                         if (remain_buf_size - tag_bytes < UUID_SIZE)
694                                 return EINVAL;
695
696                         uuid_start = (char *)tag + tag_bytes;
697                         tag_info->tag_bytes += UUID_SIZE;
698                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
699                 } else
700                         jbd_set32(tag, flags,
701                                   jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
702
703                 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
704
705                 if (tag_info->last_tag)
706                         jbd_set32(tag, flags,
707                                   jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
708
709         } else {
710                 struct jbd_block_tag *tag = __tag;
711                 memset(tag, 0, sizeof(struct jbd_block_tag));
712                 jbd_set32(tag, blocknr, tag_info->block);
713                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
714                                              JBD_FEATURE_INCOMPAT_64BIT))
715                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
716
717                 if (tag_info->uuid_exist) {
718                         /* See whether it is possible to hold UUID part.*/
719                         if (remain_buf_size - tag_bytes < UUID_SIZE)
720                                 return EINVAL;
721
722                         uuid_start = (char *)tag + tag_bytes;
723                         tag_info->tag_bytes += UUID_SIZE;
724                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
725                 } else
726                         jbd_set16(tag, flags,
727                                   jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
728
729                 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
730
731                 if (tag_info->last_tag)
732                         jbd_set16(tag, flags,
733                                   jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
734
735         }
736         return EOK;
737 }
738
739 /**@brief  Iterate all block tags in a block.
740  * @param  jbd_fs jbd filesystem
741  * @param  __tag_start pointer to the block
742  * @param  tag_tbl_size size of the block
743  * @param  func callback routine to indicate that
744  *         a block tag is found
745  * @param  arg additional argument to be passed to func */
746 static void
747 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
748                         void *__tag_start,
749                         int32_t tag_tbl_size,
750                         void (*func)(struct jbd_fs * jbd_fs,
751                                         ext4_fsblk_t block,
752                                         uint8_t *uuid,
753                                         void *arg),
754                         void *arg)
755 {
756         char *tag_start, *tag_ptr;
757         int tag_bytes = jbd_tag_bytes(jbd_fs);
758         tag_start = __tag_start;
759         tag_ptr = tag_start;
760
761         /* Cut off the size of block tail storing checksum. */
762         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
763                                      JBD_FEATURE_INCOMPAT_CSUM_V2) ||
764             JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
765                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
766                 tag_tbl_size -= sizeof(struct jbd_block_tail);
767
768         while (tag_tbl_size) {
769                 struct tag_info tag_info;
770                 int rc = jbd_extract_block_tag(jbd_fs,
771                                       tag_ptr,
772                                       tag_bytes,
773                                       tag_tbl_size,
774                                       &tag_info);
775                 if (rc != EOK)
776                         break;
777
778                 if (func)
779                         func(jbd_fs, tag_info.block, tag_info.uuid, arg);
780
781                 /* Stop the iteration when we reach the last tag. */
782                 if (tag_info.last_tag)
783                         break;
784
785                 tag_ptr += tag_info.tag_bytes;
786                 tag_tbl_size -= tag_info.tag_bytes;
787         }
788 }
789
790 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
791                                    ext4_fsblk_t block,
792                                    uint8_t *uuid,
793                                    void *arg)
794 {
795         uint32_t *iblock = arg;
796         ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
797         (*iblock)++;
798         (void)jbd_fs;
799         (void)uuid;
800         return;
801 }
802
803 static struct revoke_entry *
804 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
805 {
806         struct revoke_entry tmp = {
807                 .block = block
808         };
809
810         return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
811 }
812
813 /**@brief  Replay a block in a transaction.
814  * @param  jbd_fs jbd filesystem
815  * @param  block  block address to be replayed.*/
816 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
817                                   ext4_fsblk_t block,
818                                   uint8_t *uuid __unused,
819                                   void *__arg)
820 {
821         int r;
822         struct replay_arg *arg = __arg;
823         struct recover_info *info = arg->info;
824         uint32_t *this_block = arg->this_block;
825         struct revoke_entry *revoke_entry;
826         struct ext4_block journal_block, ext4_block;
827         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
828
829         (*this_block)++;
830
831         /* We replay this block only if the current transaction id
832          * is equal or greater than that in revoke entry.*/
833         revoke_entry = jbd_revoke_entry_lookup(info, block);
834         if (revoke_entry &&
835             arg->this_trans_id < revoke_entry->trans_id)
836                 return;
837
838         ext4_dbg(DEBUG_JBD,
839                  "Replaying block in block_tag: %" PRIu64 "\n",
840                  block);
841
842         r = jbd_block_get(jbd_fs, &journal_block, *this_block);
843         if (r != EOK)
844                 return;
845
846         /* We need special treatment for ext4 superblock. */
847         if (block) {
848                 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
849                 if (r != EOK) {
850                         jbd_block_set(jbd_fs, &journal_block);
851                         return;
852                 }
853
854                 memcpy(ext4_block.data,
855                         journal_block.data,
856                         jbd_get32(&jbd_fs->sb, blocksize));
857
858                 ext4_bcache_set_dirty(ext4_block.buf);
859                 ext4_block_set(fs->bdev, &ext4_block);
860         } else {
861                 uint16_t mount_count, state;
862                 mount_count = ext4_get16(&fs->sb, mount_count);
863                 state = ext4_get16(&fs->sb, state);
864
865                 memcpy(&fs->sb,
866                         journal_block.data + EXT4_SUPERBLOCK_OFFSET,
867                         EXT4_SUPERBLOCK_SIZE);
868
869                 /* Mark system as mounted */
870                 ext4_set16(&fs->sb, state, state);
871                 r = ext4_sb_write(fs->bdev, &fs->sb);
872                 if (r != EOK)
873                         return;
874
875                 /*Update mount count*/
876                 ext4_set16(&fs->sb, mount_count, mount_count);
877         }
878
879         jbd_block_set(jbd_fs, &journal_block);
880         
881         return;
882 }
883
884 /**@brief  Add block address to revoke tree, along with
885  *         its transaction id.
886  * @param  info  journal replay info
887  * @param  block  block address to be replayed.*/
888 static void jbd_add_revoke_block_tags(struct recover_info *info,
889                                       ext4_fsblk_t block)
890 {
891         struct revoke_entry *revoke_entry;
892
893         ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
894         /* If the revoke entry with respect to the block address
895          * exists already, update its transaction id.*/
896         revoke_entry = jbd_revoke_entry_lookup(info, block);
897         if (revoke_entry) {
898                 revoke_entry->trans_id = info->this_trans_id;
899                 return;
900         }
901
902         revoke_entry = jbd_alloc_revoke_entry();
903         ext4_assert(revoke_entry);
904         revoke_entry->block = block;
905         revoke_entry->trans_id = info->this_trans_id;
906         RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
907
908         return;
909 }
910
911 static void jbd_destroy_revoke_tree(struct recover_info *info)
912 {
913         while (!RB_EMPTY(&info->revoke_root)) {
914                 struct revoke_entry *revoke_entry =
915                         RB_MIN(jbd_revoke, &info->revoke_root);
916                 ext4_assert(revoke_entry);
917                 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
918                 jbd_free_revoke_entry(revoke_entry);
919         }
920 }
921
922 /* Make sure we wrap around the log correctly! */
923 #define wrap(sb, var)                                           \
924 do {                                                                    \
925         if (var >= jbd_get32((sb), maxlen))                                     \
926                 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first));      \
927 } while (0)
928
929 #define ACTION_SCAN 0
930 #define ACTION_REVOKE 1
931 #define ACTION_RECOVER 2
932
933 /**@brief  Add entries in a revoke block to revoke tree.
934  * @param  jbd_fs jbd filesystem
935  * @param  header revoke block header
936  * @param  recover_info  journal replay info*/
937 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
938                                   struct jbd_bhdr *header,
939                                   struct recover_info *info)
940 {
941         char *blocks_entry;
942         struct jbd_revoke_header *revoke_hdr =
943                 (struct jbd_revoke_header *)header;
944         uint32_t i, nr_entries, record_len = 4;
945
946         /* If we are working on a 64bit jbd filesystem, */
947         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
948                                      JBD_FEATURE_INCOMPAT_64BIT))
949                 record_len = 8;
950
951         nr_entries = (jbd_get32(revoke_hdr, count) -
952                         sizeof(struct jbd_revoke_header)) /
953                         record_len;
954
955         blocks_entry = (char *)(revoke_hdr + 1);
956
957         for (i = 0;i < nr_entries;i++) {
958                 if (record_len == 8) {
959                         uint64_t *blocks =
960                                 (uint64_t *)blocks_entry;
961                         jbd_add_revoke_block_tags(info, to_be64(*blocks));
962                 } else {
963                         uint32_t *blocks =
964                                 (uint32_t *)blocks_entry;
965                         jbd_add_revoke_block_tags(info, to_be32(*blocks));
966                 }
967                 blocks_entry += record_len;
968         }
969 }
970
971 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
972                                        struct jbd_bhdr *header,
973                                        uint32_t *iblock)
974 {
975         jbd_iterate_block_table(jbd_fs,
976                                 header + 1,
977                                 jbd_get32(&jbd_fs->sb, blocksize) -
978                                         sizeof(struct jbd_bhdr),
979                                 jbd_display_block_tags,
980                                 iblock);
981 }
982
983 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
984                                         struct jbd_bhdr *header,
985                                         struct replay_arg *arg)
986 {
987         jbd_iterate_block_table(jbd_fs,
988                                 header + 1,
989                                 jbd_get32(&jbd_fs->sb, blocksize) -
990                                         sizeof(struct jbd_bhdr),
991                                 jbd_replay_block_tags,
992                                 arg);
993 }
994
995 /**@brief  The core routine of journal replay.
996  * @param  jbd_fs jbd filesystem
997  * @param  recover_info  journal replay info
998  * @param  action action needed to be taken
999  * @return standard error code*/
1000 static int jbd_iterate_log(struct jbd_fs *jbd_fs,
1001                            struct recover_info *info,
1002                            int action)
1003 {
1004         int r = EOK;
1005         bool log_end = false;
1006         struct jbd_sb *sb = &jbd_fs->sb;
1007         uint32_t start_trans_id, this_trans_id;
1008         uint32_t start_block, this_block;
1009
1010         /* We start iterating valid blocks in the whole journal.*/
1011         start_trans_id = this_trans_id = jbd_get32(sb, sequence);
1012         start_block = this_block = jbd_get32(sb, start);
1013
1014         ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
1015                             start_trans_id);
1016
1017         while (!log_end) {
1018                 struct ext4_block block;
1019                 struct jbd_bhdr *header;
1020                 /* If we are not scanning for the last
1021                  * valid transaction in the journal,
1022                  * we will stop when we reach the end of
1023                  * the journal.*/
1024                 if (action != ACTION_SCAN)
1025                         if (this_trans_id > info->last_trans_id) {
1026                                 log_end = true;
1027                                 continue;
1028                         }
1029
1030                 r = jbd_block_get(jbd_fs, &block, this_block);
1031                 if (r != EOK)
1032                         break;
1033
1034                 header = (struct jbd_bhdr *)block.data;
1035                 /* This block does not have a valid magic number,
1036                  * so we have reached the end of the journal.*/
1037                 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
1038                         jbd_block_set(jbd_fs, &block);
1039                         log_end = true;
1040                         continue;
1041                 }
1042
1043                 /* If the transaction id we found is not expected,
1044                  * we may have reached the end of the journal.
1045                  *
1046                  * If we are not scanning the journal, something
1047                  * bad might have taken place. :-( */
1048                 if (jbd_get32(header, sequence) != this_trans_id) {
1049                         if (action != ACTION_SCAN)
1050                                 r = EIO;
1051
1052                         jbd_block_set(jbd_fs, &block);
1053                         log_end = true;
1054                         continue;
1055                 }
1056
1057                 switch (jbd_get32(header, blocktype)) {
1058                 case JBD_DESCRIPTOR_BLOCK:
1059                         if (!jbd_verify_meta_csum(jbd_fs, header)) {
1060                                 ext4_dbg(DEBUG_JBD,
1061                                         DBG_WARN "Descriptor block checksum failed."
1062                                                 "Journal block: %" PRIu32"\n",
1063                                                 this_block);
1064                                 log_end = true;
1065                                 break;
1066                         }
1067                         ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
1068                                             "trans_id: %" PRIu32"\n",
1069                                             this_block, this_trans_id);
1070                         if (action == ACTION_RECOVER) {
1071                                 struct replay_arg replay_arg;
1072                                 replay_arg.info = info;
1073                                 replay_arg.this_block = &this_block;
1074                                 replay_arg.this_trans_id = this_trans_id;
1075
1076                                 jbd_replay_descriptor_block(jbd_fs,
1077                                                 header, &replay_arg);
1078                         } else
1079                                 jbd_debug_descriptor_block(jbd_fs,
1080                                                 header, &this_block);
1081
1082                         break;
1083                 case JBD_COMMIT_BLOCK:
1084                         if (!jbd_verify_commit_csum(jbd_fs,
1085                                         (struct jbd_commit_header *)header)) {
1086                                 ext4_dbg(DEBUG_JBD,
1087                                         DBG_WARN "Commit block checksum failed."
1088                                                 "Journal block: %" PRIu32"\n",
1089                                                 this_block);
1090                                 log_end = true;
1091                                 break;
1092                         }
1093                         ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
1094                                             "trans_id: %" PRIu32"\n",
1095                                             this_block, this_trans_id);
1096                         /* This is the end of a transaction,
1097                          * we may now proceed to the next transaction.
1098                          */
1099                         this_trans_id++;
1100                         break;
1101                 case JBD_REVOKE_BLOCK:
1102                         if (!jbd_verify_meta_csum(jbd_fs, header)) {
1103                                 ext4_dbg(DEBUG_JBD,
1104                                         DBG_WARN "Revoke block checksum failed."
1105                                                 "Journal block: %" PRIu32"\n",
1106                                                 this_block);
1107                                 log_end = true;
1108                                 break;
1109                         }
1110                         ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
1111                                             "trans_id: %" PRIu32"\n",
1112                                             this_block, this_trans_id);
1113                         if (action == ACTION_REVOKE) {
1114                                 info->this_trans_id = this_trans_id;
1115                                 jbd_build_revoke_tree(jbd_fs,
1116                                                 header, info);
1117                         }
1118                         break;
1119                 default:
1120                         log_end = true;
1121                         break;
1122                 }
1123                 jbd_block_set(jbd_fs, &block);
1124                 this_block++;
1125                 wrap(sb, this_block);
1126                 if (this_block == start_block)
1127                         log_end = true;
1128
1129         }
1130         ext4_dbg(DEBUG_JBD, "End of journal.\n");
1131         if (r == EOK && action == ACTION_SCAN) {
1132                 /* We have finished scanning the journal. */
1133                 info->start_trans_id = start_trans_id;
1134                 if (this_trans_id > start_trans_id)
1135                         info->last_trans_id = this_trans_id - 1;
1136                 else
1137                         info->last_trans_id = this_trans_id;
1138         }
1139
1140         return r;
1141 }
1142
1143 /**@brief  Replay journal.
1144  * @param  jbd_fs jbd filesystem
1145  * @return standard error code*/
1146 int jbd_recover(struct jbd_fs *jbd_fs)
1147 {
1148         int r;
1149         struct recover_info info;
1150         struct jbd_sb *sb = &jbd_fs->sb;
1151         if (!sb->start)
1152                 return EOK;
1153
1154         RB_INIT(&info.revoke_root);
1155
1156         r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
1157         if (r != EOK)
1158                 return r;
1159
1160         r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
1161         if (r != EOK)
1162                 return r;
1163
1164         r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
1165         if (r == EOK) {
1166                 /* If we successfully replay the journal,
1167                  * clear EXT4_FINCOM_RECOVER flag on the
1168                  * ext4 superblock, and set the start of
1169                  * journal to 0.*/
1170                 uint32_t features_incompatible =
1171                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
1172                                    features_incompatible);
1173                 jbd_set32(&jbd_fs->sb, start, 0);
1174                 features_incompatible &= ~EXT4_FINCOM_RECOVER;
1175                 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1176                            features_incompatible,
1177                            features_incompatible);
1178                 jbd_fs->dirty = true;
1179                 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1180                                   &jbd_fs->inode_ref.fs->sb);
1181         }
1182         jbd_destroy_revoke_tree(&info);
1183         return r;
1184 }
1185
1186 static void jbd_journal_write_sb(struct jbd_journal *journal)
1187 {
1188         struct jbd_fs *jbd_fs = journal->jbd_fs;
1189         jbd_set32(&jbd_fs->sb, start, journal->start);
1190         jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
1191         jbd_fs->dirty = true;
1192 }
1193
1194 /**@brief  Start accessing the journal.
1195  * @param  jbd_fs jbd filesystem
1196  * @param  journal current journal session
1197  * @return standard error code*/
1198 int jbd_journal_start(struct jbd_fs *jbd_fs,
1199                       struct jbd_journal *journal)
1200 {
1201         int r;
1202         uint32_t features_incompatible =
1203                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
1204                                    features_incompatible);
1205         features_incompatible |= EXT4_FINCOM_RECOVER;
1206         ext4_set32(&jbd_fs->inode_ref.fs->sb,
1207                         features_incompatible,
1208                         features_incompatible);
1209         r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1210                         &jbd_fs->inode_ref.fs->sb);
1211         if (r != EOK)
1212                 return r;
1213
1214         journal->first = jbd_get32(&jbd_fs->sb, first);
1215         journal->start = journal->first;
1216         journal->last = journal->first;
1217         journal->trans_id = 1;
1218         journal->alloc_trans_id = 1;
1219
1220         journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
1221
1222         TAILQ_INIT(&journal->trans_queue);
1223         TAILQ_INIT(&journal->cp_queue);
1224         RB_INIT(&journal->block_rec_root);
1225         journal->jbd_fs = jbd_fs;
1226         jbd_journal_write_sb(journal);
1227         return jbd_write_sb(jbd_fs);
1228 }
1229
1230 static void jbd_journal_flush_trans(struct jbd_trans *trans)
1231 {
1232         struct jbd_buf *jbd_buf, *tmp;
1233         struct jbd_journal *journal = trans->journal;
1234         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1235         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1236                         tmp) {
1237                 struct ext4_block block = jbd_buf->block;
1238                 ext4_block_flush_buf(fs->bdev, block.buf);
1239         }
1240 }
1241
1242 static void
1243 jbd_journal_skip_pure_revoke(struct jbd_journal *journal,
1244                              struct jbd_trans *trans)
1245 {
1246         journal->start = trans->start_iblock +
1247                 trans->alloc_blocks;
1248         wrap(&journal->jbd_fs->sb, journal->start);
1249         journal->trans_id = trans->trans_id + 1;
1250         jbd_journal_free_trans(journal,
1251                         trans, false);
1252         jbd_journal_write_sb(journal);
1253 }
1254
1255 static void
1256 jbd_journal_purge_cp_trans(struct jbd_journal *journal,
1257                            bool flush)
1258 {
1259         struct jbd_trans *trans;
1260         while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1261                 if (!trans->data_cnt) {
1262                         TAILQ_REMOVE(&journal->cp_queue,
1263                                         trans,
1264                                         trans_node);
1265                         jbd_journal_skip_pure_revoke(journal, trans);
1266                 } else {
1267                         if (trans->data_cnt ==
1268                                         trans->written_cnt) {
1269                                 journal->start =
1270                                         trans->start_iblock +
1271                                         trans->alloc_blocks;
1272                                 wrap(&journal->jbd_fs->sb,
1273                                                 journal->start);
1274                                 journal->trans_id =
1275                                         trans->trans_id + 1;
1276                                 TAILQ_REMOVE(&journal->cp_queue,
1277                                                 trans,
1278                                                 trans_node);
1279                                 jbd_journal_free_trans(journal,
1280                                                 trans,
1281                                                 false);
1282                                 jbd_journal_write_sb(journal);
1283                         } else if (!flush) {
1284                                 journal->start =
1285                                         trans->start_iblock;
1286                                 wrap(&journal->jbd_fs->sb,
1287                                                 journal->start);
1288                                 journal->trans_id =
1289                                         trans->trans_id;
1290                                 jbd_journal_write_sb(journal);
1291                                 break;
1292                         } else
1293                                 jbd_journal_flush_trans(trans);
1294                 }
1295         }
1296 }
1297
1298 /**@brief  Stop accessing the journal.
1299  * @param  journal current journal session
1300  * @return standard error code*/
1301 int jbd_journal_stop(struct jbd_journal *journal)
1302 {
1303         int r;
1304         struct jbd_fs *jbd_fs = journal->jbd_fs;
1305         uint32_t features_incompatible;
1306
1307         /* Commit all the transactions to the journal.*/
1308         jbd_journal_commit_all(journal);
1309
1310         /* Make sure that journalled content have reached
1311          * the disk.*/
1312         jbd_journal_purge_cp_trans(journal, true);
1313
1314         /* There should be no block record in this journal
1315          * session. */
1316         if (!RB_EMPTY(&journal->block_rec_root))
1317                 ext4_dbg(DEBUG_JBD,
1318                          DBG_WARN "There are still block records "
1319                                   "in this journal session!\n");
1320
1321         features_incompatible =
1322                 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1323                            features_incompatible);
1324         features_incompatible &= ~EXT4_FINCOM_RECOVER;
1325         ext4_set32(&jbd_fs->inode_ref.fs->sb,
1326                         features_incompatible,
1327                         features_incompatible);
1328         r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1329                         &jbd_fs->inode_ref.fs->sb);
1330         if (r != EOK)
1331                 return r;
1332
1333         journal->start = 0;
1334         journal->trans_id = 0;
1335         jbd_journal_write_sb(journal);
1336         return jbd_write_sb(journal->jbd_fs);
1337 }
1338
1339 /**@brief  Allocate a block in the journal.
1340  * @param  journal current journal session
1341  * @param  trans transaction
1342  * @return allocated block address*/
1343 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
1344                                         struct jbd_trans *trans)
1345 {
1346         uint32_t start_block;
1347
1348         start_block = journal->last++;
1349         trans->alloc_blocks++;
1350         wrap(&journal->jbd_fs->sb, journal->last);
1351         
1352         /* If there is no space left, flush all journalled
1353          * blocks to disk first.*/
1354         if (journal->last == journal->start)
1355                 jbd_journal_purge_cp_trans(journal, true);
1356
1357         return start_block;
1358 }
1359
1360 /**@brief  Allocate a new transaction
1361  * @param  journal current journal session
1362  * @return transaction allocated*/
1363 struct jbd_trans *
1364 jbd_journal_new_trans(struct jbd_journal *journal)
1365 {
1366         struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
1367         if (!trans)
1368                 return NULL;
1369
1370         /* We will assign a trans_id to this transaction,
1371          * once it has been committed.*/
1372         trans->journal = journal;
1373         trans->error = EOK;
1374         TAILQ_INIT(&trans->buf_queue);
1375         return trans;
1376 }
1377
1378 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1379                           struct ext4_buf *buf __unused,
1380                           int res,
1381                           void *arg);
1382
1383 /**@brief  gain access to it before making any modications.
1384  * @param  journal current journal session
1385  * @param  trans transaction
1386  * @param  block descriptor
1387  * @return standard error code.*/
1388 int jbd_trans_get_access(struct jbd_journal *journal,
1389                          struct jbd_trans *trans,
1390                          struct ext4_block *block)
1391 {
1392         int r = EOK;
1393         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1394         struct jbd_buf *jbd_buf = block->buf->end_write_arg;
1395
1396         /* If the buffer has already been modified, we should
1397          * flush dirty data in this buffer to disk.*/
1398         if (ext4_bcache_test_flag(block->buf, BC_DIRTY) &&
1399             block->buf->end_write == jbd_trans_end_write) {
1400                 ext4_assert(jbd_buf);
1401                 if (jbd_buf->trans != trans)
1402                         r = ext4_block_flush_buf(fs->bdev, block->buf);
1403
1404         }
1405         return r;
1406 }
1407
1408 static struct jbd_block_rec *
1409 jbd_trans_block_rec_lookup(struct jbd_journal *journal,
1410                            ext4_fsblk_t lba)
1411 {
1412         struct jbd_block_rec tmp = {
1413                 .lba = lba
1414         };
1415
1416         return RB_FIND(jbd_block,
1417                        &journal->block_rec_root,
1418                        &tmp);
1419 }
1420
1421 static inline struct jbd_block_rec *
1422 jbd_trans_insert_block_rec(struct jbd_trans *trans,
1423                            ext4_fsblk_t lba,
1424                            struct ext4_buf *buf)
1425 {
1426         struct jbd_block_rec *block_rec;
1427         block_rec = jbd_trans_block_rec_lookup(trans->journal, lba);
1428         if (block_rec) {
1429                 LIST_REMOVE(block_rec, tbrec_node);
1430                 /* Data should be flushed to disk already. */
1431                 ext4_assert(!block_rec->buf);
1432                 /* Now this block record belongs to this transaction. */
1433                 LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);
1434                 block_rec->trans = trans;
1435                 return block_rec;
1436         }
1437         block_rec = calloc(1, sizeof(struct jbd_block_rec));
1438         if (!block_rec)
1439                 return NULL;
1440
1441         block_rec->lba = lba;
1442         block_rec->buf = buf;
1443         block_rec->trans = trans;
1444         LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);
1445         RB_INSERT(jbd_block, &trans->journal->block_rec_root, block_rec);
1446         return block_rec;
1447 }
1448
1449 static inline void
1450 jbd_trans_remove_block_rec(struct jbd_journal *journal,
1451                            struct jbd_block_rec *block_rec,
1452                            struct jbd_trans *trans)
1453 {
1454         /* If this block record doesn't belong to this transaction,
1455          * give up.*/
1456         if (block_rec->trans == trans) {
1457                 LIST_REMOVE(block_rec, tbrec_node);
1458                 RB_REMOVE(jbd_block,
1459                                 &journal->block_rec_root,
1460                                 block_rec);
1461                 free(block_rec);
1462         }
1463 }
1464
1465 /**@brief  Add block to a transaction and mark it dirty.
1466  * @param  trans transaction
1467  * @param  block block descriptor
1468  * @return standard error code*/
1469 int jbd_trans_set_block_dirty(struct jbd_trans *trans,
1470                               struct ext4_block *block)
1471 {
1472         struct jbd_buf *buf;
1473
1474         if (!ext4_bcache_test_flag(block->buf, BC_DIRTY) &&
1475             block->buf->end_write != jbd_trans_end_write) {
1476                 struct jbd_block_rec *block_rec;
1477                 buf = calloc(1, sizeof(struct jbd_buf));
1478                 if (!buf)
1479                         return ENOMEM;
1480
1481                 if ((block_rec = jbd_trans_insert_block_rec(trans,
1482                                         block->lb_id,
1483                                         block->buf)) == NULL) {
1484                         free(buf);
1485                         return ENOMEM;
1486                 }
1487
1488                 buf->block_rec = block_rec;
1489                 buf->trans = trans;
1490                 buf->block = *block;
1491                 ext4_bcache_inc_ref(block->buf);
1492
1493                 /* If the content reach the disk, notify us
1494                  * so that we may do a checkpoint. */
1495                 block->buf->end_write = jbd_trans_end_write;
1496                 block->buf->end_write_arg = buf;
1497
1498                 trans->data_cnt++;
1499                 TAILQ_INSERT_HEAD(&trans->buf_queue, buf, buf_node);
1500
1501                 ext4_bcache_set_dirty(block->buf);
1502         }
1503         return EOK;
1504 }
1505
1506 /**@brief  Add block to be revoked to a transaction
1507  * @param  trans transaction
1508  * @param  lba logical block address
1509  * @return standard error code*/
1510 int jbd_trans_revoke_block(struct jbd_trans *trans,
1511                            ext4_fsblk_t lba)
1512 {
1513         struct jbd_revoke_rec *rec =
1514                 calloc(1, sizeof(struct jbd_revoke_rec));
1515         if (!rec)
1516                 return ENOMEM;
1517
1518         rec->lba = lba;
1519         LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
1520         return EOK;
1521 }
1522
1523 /**@brief  Try to add block to be revoked to a transaction.
1524  *         If @lba still remains in an transaction on checkpoint
1525  *         queue, add @lba as a revoked block to the transaction.
1526  * @param  trans transaction
1527  * @param  lba logical block address
1528  * @return standard error code*/
1529 int jbd_trans_try_revoke_block(struct jbd_trans *trans,
1530                                ext4_fsblk_t lba)
1531 {
1532         int r = EOK;
1533         struct jbd_journal *journal = trans->journal;
1534         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1535         struct jbd_block_rec *block_rec =
1536                 jbd_trans_block_rec_lookup(journal, lba);
1537
1538         /* Make sure we don't flush any buffers belong to this transaction. */
1539         if (block_rec && block_rec->trans != trans) {
1540                 /* If the buffer has not been flushed yet, flush it now. */
1541                 if (block_rec->buf) {
1542                         r = ext4_block_flush_buf(fs->bdev, block_rec->buf);
1543                         if (r != EOK)
1544                                 return r;
1545
1546                 }
1547
1548                 jbd_trans_revoke_block(trans, lba);
1549         }
1550
1551         return EOK;
1552 }
1553
1554 /**@brief  Free a transaction
1555  * @param  journal current journal session
1556  * @param  trans transaction
1557  * @param  abort discard all the modifications on the block?
1558  * @return standard error code*/
1559 void jbd_journal_free_trans(struct jbd_journal *journal,
1560                             struct jbd_trans *trans,
1561                             bool abort)
1562 {
1563         struct jbd_buf *jbd_buf, *tmp;
1564         struct jbd_revoke_rec *rec, *tmp2;
1565         struct jbd_block_rec *block_rec, *tmp3;
1566         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1567         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1568                           tmp) {
1569                 if (abort) {
1570                         jbd_buf->block.buf->end_write = NULL;
1571                         jbd_buf->block.buf->end_write_arg = NULL;
1572                         ext4_bcache_clear_dirty(jbd_buf->block.buf);
1573                         ext4_block_set(fs->bdev, &jbd_buf->block);
1574                 }
1575
1576                 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1577                 free(jbd_buf);
1578         }
1579         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1580                           tmp2) {
1581                 LIST_REMOVE(rec, revoke_node);
1582                 free(rec);
1583         }
1584         LIST_FOREACH_SAFE(block_rec, &trans->tbrec_list, tbrec_node,
1585                           tmp3) {
1586                 jbd_trans_remove_block_rec(journal, block_rec, trans);
1587         }
1588
1589         free(trans);
1590 }
1591
1592 /**@brief  Write commit block for a transaction
1593  * @param  trans transaction
1594  * @return standard error code*/
1595 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
1596 {
1597         int rc;
1598         struct jbd_commit_header *header;
1599         uint32_t commit_iblock = 0;
1600         struct ext4_block commit_block;
1601         struct jbd_journal *journal = trans->journal;
1602
1603         commit_iblock = jbd_journal_alloc_block(journal, trans);
1604         rc = jbd_block_get_noread(journal->jbd_fs,
1605                         &commit_block, commit_iblock);
1606         if (rc != EOK)
1607                 return rc;
1608
1609         header = (struct jbd_commit_header *)commit_block.data;
1610         jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
1611         jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
1612         jbd_set32(&header->header, sequence, trans->trans_id);
1613
1614         jbd_commit_csum_set(journal->jbd_fs, header);
1615         ext4_bcache_set_dirty(commit_block.buf);
1616         rc = jbd_block_set(journal->jbd_fs, &commit_block);
1617         if (rc != EOK)
1618                 return rc;
1619
1620         return EOK;
1621 }
1622
1623 /**@brief  Write descriptor block for a transaction
1624  * @param  journal current journal session
1625  * @param  trans transaction
1626  * @return standard error code*/
1627 static int jbd_journal_prepare(struct jbd_journal *journal,
1628                                struct jbd_trans *trans)
1629 {
1630         int rc = EOK, i = 0;
1631         int32_t tag_tbl_size;
1632         uint32_t desc_iblock = 0;
1633         uint32_t data_iblock = 0;
1634         char *tag_start = NULL, *tag_ptr = NULL;
1635         struct jbd_buf *jbd_buf, *tmp;
1636         struct ext4_block desc_block, data_block;
1637         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1638
1639         /* Try to remove any non-dirty buffers from the tail of
1640          * buf_queue. */
1641         TAILQ_FOREACH_REVERSE_SAFE(jbd_buf, &trans->buf_queue,
1642                         jbd_trans_buf, buf_node, tmp) {
1643                 /* We stop the iteration when we find a dirty buffer. */
1644                 if (ext4_bcache_test_flag(jbd_buf->block.buf,
1645                                         BC_DIRTY))
1646                         break;
1647
1648                 /* The buffer has not been modified, just release
1649                  * that jbd_buf. */
1650                 jbd_trans_remove_block_rec(journal,
1651                                 jbd_buf->block_rec, trans);
1652                 trans->data_cnt--;
1653
1654                 jbd_buf->block.buf->end_write = NULL;
1655                 jbd_buf->block.buf->end_write_arg = NULL;
1656                 ext4_block_set(fs->bdev, &jbd_buf->block);
1657                 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1658                 free(jbd_buf);
1659         }
1660
1661         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {
1662                 struct tag_info tag_info;
1663                 bool uuid_exist = false;
1664                 uint32_t checksum;
1665                 if (!ext4_bcache_test_flag(jbd_buf->block.buf,
1666                                            BC_DIRTY)) {
1667                         /* The buffer has not been modified, just release
1668                          * that jbd_buf. */
1669                         jbd_trans_remove_block_rec(journal,
1670                                         jbd_buf->block_rec, trans);
1671                         trans->data_cnt--;
1672
1673                         jbd_buf->block.buf->end_write = NULL;
1674                         jbd_buf->block.buf->end_write_arg = NULL;
1675                         ext4_block_set(fs->bdev, &jbd_buf->block);
1676                         TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1677                         free(jbd_buf);
1678                         continue;
1679                 }
1680                 checksum = jbd_block_csum(journal->jbd_fs,
1681                                           jbd_buf->block.data);
1682 again:
1683                 if (!desc_iblock) {
1684                         struct jbd_bhdr *bhdr;
1685                         desc_iblock = jbd_journal_alloc_block(journal, trans);
1686                         rc = jbd_block_get_noread(journal->jbd_fs,
1687                                            &desc_block, desc_iblock);
1688                         if (rc != EOK)
1689                                 break;
1690
1691                         ext4_bcache_set_dirty(desc_block.buf);
1692
1693                         bhdr = (struct jbd_bhdr *)desc_block.data;
1694                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1695                         jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
1696                         jbd_set32(bhdr, sequence, trans->trans_id);
1697
1698                         tag_start = (char *)(bhdr + 1);
1699                         tag_ptr = tag_start;
1700                         uuid_exist = true;
1701                         tag_tbl_size = journal->block_size -
1702                                 sizeof(struct jbd_bhdr);
1703
1704                         if (jbd_has_csum(&journal->jbd_fs->sb))
1705                                 tag_tbl_size -= sizeof(struct jbd_block_tail);
1706
1707                         if (!trans->start_iblock)
1708                                 trans->start_iblock = desc_iblock;
1709
1710                 }
1711                 tag_info.block = jbd_buf->block.lb_id;
1712                 tag_info.uuid_exist = uuid_exist;
1713                 if (i == trans->data_cnt - 1)
1714                         tag_info.last_tag = true;
1715                 else
1716                         tag_info.last_tag = false;
1717                 tag_info.checksum = checksum;
1718
1719                 if (uuid_exist)
1720                         memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
1721                                         UUID_SIZE);
1722
1723                 rc = jbd_write_block_tag(journal->jbd_fs,
1724                                 tag_ptr,
1725                                 tag_tbl_size,
1726                                 &tag_info);
1727                 if (rc != EOK) {
1728                         jbd_meta_csum_set(journal->jbd_fs,
1729                                         (struct jbd_bhdr *)desc_block.data);
1730                         jbd_block_set(journal->jbd_fs, &desc_block);
1731                         desc_iblock = 0;
1732                         goto again;
1733                 }
1734
1735                 data_iblock = jbd_journal_alloc_block(journal, trans);
1736                 rc = jbd_block_get_noread(journal->jbd_fs,
1737                                 &data_block, data_iblock);
1738                 if (rc != EOK)
1739                         break;
1740
1741                 ext4_bcache_set_dirty(data_block.buf);
1742
1743                 memcpy(data_block.data, jbd_buf->block.data,
1744                         journal->block_size);
1745
1746                 rc = jbd_block_set(journal->jbd_fs, &data_block);
1747                 if (rc != EOK)
1748                         break;
1749
1750                 tag_ptr += tag_info.tag_bytes;
1751                 tag_tbl_size -= tag_info.tag_bytes;
1752
1753                 i++;
1754         }
1755         if (rc == EOK && desc_iblock) {
1756                 jbd_meta_csum_set(journal->jbd_fs,
1757                                 (struct jbd_bhdr *)desc_block.data);
1758                 jbd_block_set(journal->jbd_fs, &desc_block);
1759         }
1760
1761         return rc;
1762 }
1763
1764 /**@brief  Write revoke block for a transaction
1765  * @param  journal current journal session
1766  * @param  trans transaction
1767  * @return standard error code*/
1768 static int
1769 jbd_journal_prepare_revoke(struct jbd_journal *journal,
1770                            struct jbd_trans *trans)
1771 {
1772         int rc = EOK, i = 0;
1773         int32_t tag_tbl_size;
1774         uint32_t desc_iblock = 0;
1775         char *blocks_entry = NULL;
1776         struct jbd_revoke_rec *rec, *tmp;
1777         struct ext4_block desc_block;
1778         struct jbd_revoke_header *header = NULL;
1779         int32_t record_len = 4;
1780
1781         if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1782                                      JBD_FEATURE_INCOMPAT_64BIT))
1783                 record_len = 8;
1784
1785         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1786                           tmp) {
1787 again:
1788                 if (!desc_iblock) {
1789                         struct jbd_bhdr *bhdr;
1790                         desc_iblock = jbd_journal_alloc_block(journal, trans);
1791                         rc = jbd_block_get_noread(journal->jbd_fs,
1792                                            &desc_block, desc_iblock);
1793                         if (rc != EOK) {
1794                                 break;
1795                         }
1796
1797                         ext4_bcache_set_dirty(desc_block.buf);
1798
1799                         bhdr = (struct jbd_bhdr *)desc_block.data;
1800                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1801                         jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
1802                         jbd_set32(bhdr, sequence, trans->trans_id);
1803                         
1804                         header = (struct jbd_revoke_header *)bhdr;
1805                         blocks_entry = (char *)(header + 1);
1806                         tag_tbl_size = journal->block_size -
1807                                 sizeof(struct jbd_revoke_header);
1808
1809                         if (jbd_has_csum(&journal->jbd_fs->sb))
1810                                 tag_tbl_size -= sizeof(struct jbd_block_tail);
1811
1812                         if (!trans->start_iblock)
1813                                 trans->start_iblock = desc_iblock;
1814
1815                 }
1816
1817                 if (tag_tbl_size < record_len) {
1818                         jbd_set32(header, count,
1819                                   journal->block_size - tag_tbl_size);
1820                         jbd_meta_csum_set(journal->jbd_fs,
1821                                         (struct jbd_bhdr *)desc_block.data);
1822                         jbd_block_set(journal->jbd_fs, &desc_block);
1823                         desc_iblock = 0;
1824                         header = NULL;
1825                         goto again;
1826                 }
1827                 if (record_len == 8) {
1828                         uint64_t *blocks =
1829                                 (uint64_t *)blocks_entry;
1830                         *blocks = to_be64(rec->lba);
1831                 } else {
1832                         uint32_t *blocks =
1833                                 (uint32_t *)blocks_entry;
1834                         *blocks = to_be32(rec->lba);
1835                 }
1836                 blocks_entry += record_len;
1837                 tag_tbl_size -= record_len;
1838
1839                 i++;
1840         }
1841         if (rc == EOK && desc_iblock) {
1842                 if (header != NULL)
1843                         jbd_set32(header, count,
1844                                   journal->block_size - tag_tbl_size);
1845
1846                 jbd_meta_csum_set(journal->jbd_fs,
1847                                 (struct jbd_bhdr *)desc_block.data);
1848                 jbd_block_set(journal->jbd_fs, &desc_block);
1849         }
1850
1851         return rc;
1852 }
1853
1854 /**@brief  Submit the transaction to transaction queue.
1855  * @param  journal current journal session
1856  * @param  trans transaction*/
1857 void
1858 jbd_journal_submit_trans(struct jbd_journal *journal,
1859                          struct jbd_trans *trans)
1860 {
1861         TAILQ_INSERT_TAIL(&journal->trans_queue,
1862                           trans,
1863                           trans_node);
1864 }
1865
1866 /**@brief  Put references of block descriptors in a transaction.
1867  * @param  journal current journal session
1868  * @param  trans transaction*/
1869 void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
1870 {
1871         struct jbd_buf *jbd_buf, *tmp;
1872         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1873         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1874                         tmp) {
1875                 struct ext4_block block = jbd_buf->block;
1876                 ext4_block_set(fs->bdev, &block);
1877         }
1878 }
1879
1880 /**@brief  Update the start block of the journal when
1881  *         all the contents in a transaction reach the disk.*/
1882 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1883                           struct ext4_buf *buf,
1884                           int res,
1885                           void *arg)
1886 {
1887         struct jbd_buf *jbd_buf = arg;
1888         struct jbd_trans *trans = jbd_buf->trans;
1889         struct jbd_journal *journal = trans->journal;
1890         bool first_in_queue =
1891                 trans == TAILQ_FIRST(&journal->cp_queue);
1892         if (res != EOK)
1893                 trans->error = res;
1894
1895         TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1896         jbd_buf->block_rec->buf = NULL;
1897         free(jbd_buf);
1898
1899         /* Clear the end_write and end_write_arg fields. */
1900         buf->end_write = NULL;
1901         buf->end_write_arg = NULL;
1902
1903         trans->written_cnt++;
1904         if (trans->written_cnt == trans->data_cnt) {
1905                 /* If it is the first transaction on checkpoint queue,
1906                  * we will shift the start of the journal to the next
1907                  * transaction, and remove subsequent written
1908                  * transactions from checkpoint queue until we find
1909                  * an unwritten one. */
1910                 if (first_in_queue) {
1911                         journal->start = trans->start_iblock +
1912                                 trans->alloc_blocks;
1913                         wrap(&journal->jbd_fs->sb, journal->start);
1914                         journal->trans_id = trans->trans_id + 1;
1915                         TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
1916                         jbd_journal_free_trans(journal, trans, false);
1917
1918                         jbd_journal_purge_cp_trans(journal, false);
1919                         jbd_journal_write_sb(journal);
1920                         jbd_write_sb(journal->jbd_fs);
1921                 }
1922         }
1923 }
1924
1925 /**@brief  Commit a transaction to the journal immediately.
1926  * @param  journal current journal session
1927  * @param  trans transaction
1928  * @return standard error code*/
1929 int jbd_journal_commit_trans(struct jbd_journal *journal,
1930                              struct jbd_trans *trans)
1931 {
1932         int rc = EOK;
1933         uint32_t last = journal->last;
1934
1935         trans->trans_id = journal->alloc_trans_id;
1936         rc = jbd_journal_prepare(journal, trans);
1937         if (rc != EOK)
1938                 goto Finish;
1939
1940         rc = jbd_journal_prepare_revoke(journal, trans);
1941         if (rc != EOK)
1942                 goto Finish;
1943
1944         if (TAILQ_EMPTY(&trans->buf_queue) &&
1945             LIST_EMPTY(&trans->revoke_list)) {
1946                 /* Since there are no entries in both buffer list
1947                  * and revoke entry list, we do not consider trans as
1948                  * complete transaction and just return EOK.*/
1949                 jbd_journal_free_trans(journal, trans, false);
1950                 goto Finish;
1951         }
1952
1953         rc = jbd_trans_write_commit_block(trans);
1954         if (rc != EOK)
1955                 goto Finish;
1956
1957         journal->alloc_trans_id++;
1958         if (TAILQ_EMPTY(&journal->cp_queue)) {
1959                 if (trans->data_cnt) {
1960                         journal->start = trans->start_iblock;
1961                         wrap(&journal->jbd_fs->sb, journal->start);
1962                         journal->trans_id = trans->trans_id;
1963                         jbd_journal_write_sb(journal);
1964                         jbd_write_sb(journal->jbd_fs);
1965                         TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
1966                                         trans_node);
1967                         jbd_journal_cp_trans(journal, trans);
1968                 } else {
1969                         journal->start = trans->start_iblock +
1970                                 trans->alloc_blocks;
1971                         wrap(&journal->jbd_fs->sb, journal->start);
1972                         journal->trans_id = trans->trans_id + 1;
1973                         jbd_journal_write_sb(journal);
1974                         jbd_journal_free_trans(journal, trans, false);
1975                 }
1976         } else {
1977                 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
1978                                 trans_node);
1979                 if (trans->data_cnt)
1980                         jbd_journal_cp_trans(journal, trans);
1981
1982         }
1983 Finish:
1984         if (rc != EOK) {
1985                 journal->last = last;
1986                 jbd_journal_free_trans(journal, trans, true);
1987         }
1988         return rc;
1989 }
1990
1991 /**@brief  Commit one transaction on transaction queue
1992  *         to the journal.
1993  * @param  journal current journal session.*/
1994 void jbd_journal_commit_one(struct jbd_journal *journal)
1995 {
1996         struct jbd_trans *trans;
1997
1998         if ((trans = TAILQ_FIRST(&journal->trans_queue))) {
1999                 TAILQ_REMOVE(&journal->trans_queue, trans, trans_node);
2000                 jbd_journal_commit_trans(journal, trans);
2001         }
2002 }
2003
2004 /**@brief  Commit all the transactions on transaction queue
2005  *         to the journal.
2006  * @param  journal current journal session.*/
2007 void jbd_journal_commit_all(struct jbd_journal *journal)
2008 {
2009         while (!TAILQ_EMPTY(&journal->trans_queue)) {
2010                 jbd_journal_commit_one(journal);
2011         }
2012 }
2013
2014 /**
2015  * @}
2016  */