ext4_journal: two changes below
[lwext4.git] / lwext4 / ext4_extent_full.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * - Redistributions of source code must retain the above copyright
10  *   notice, this list of conditions and the following disclaimer.
11  * - Redistributions in binary form must reproduce the above copyright
12  *   notice, this list of conditions and the following disclaimer in the
13  *   documentation and/or other materials provided with the distribution.
14  * - The name of the author may not be used to endorse or promote products
15  *   derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #include "ext4_config.h"
30 #include "ext4_blockdev.h"
31 #include "ext4_fs.h"
32 #include "ext4_super.h"
33 #include "ext4_crc32c.h"
34 #include "ext4_balloc.h"
35 #include "ext4_debug.h"
36
37 #include <stdlib.h>
38 #include <string.h>
39 #include <inttypes.h>
40 #include <stddef.h>
41
42 #include "ext4_extent.h"
43
44 #if CONFIG_EXTENT_FULL
45
46 /*
47  * used by extent splitting.
48  */
49 #define EXT4_EXT_MARK_UNWRIT1 0x02 /* mark first half unwritten */
50 #define EXT4_EXT_MARK_UNWRIT2 0x04 /* mark second half unwritten */
51 #define EXT4_EXT_DATA_VALID1 0x08  /* first half contains valid data */
52 #define EXT4_EXT_DATA_VALID2 0x10  /* second half contains valid data */
53 #define EXT4_EXT_NO_COMBINE 0x20   /* do not combine two extents */
54
55 static struct ext4_extent_tail *
56 find_ext4_extent_tail(struct ext4_extent_header *eh)
57 {
58         return (struct ext4_extent_tail *)(((char *)eh) +
59                                            EXT4_EXTENT_TAIL_OFFSET(eh));
60 }
61
62 static struct ext4_extent_header *ext_inode_hdr(struct ext4_inode *inode)
63 {
64         return (struct ext4_extent_header *)inode->blocks;
65 }
66
67 static struct ext4_extent_header *ext_block_hdr(struct ext4_block *block)
68 {
69         return (struct ext4_extent_header *)block->data;
70 }
71
72 static uint16_t ext_depth(struct ext4_inode *inode)
73 {
74         return to_le16(ext_inode_hdr(inode)->depth);
75 }
76
77 static uint16_t ext4_ext_get_actual_len(struct ext4_extent *ext)
78 {
79         return (to_le16(ext->block_count) <= EXT_INIT_MAX_LEN
80                     ? to_le16(ext->block_count)
81                     : (to_le16(ext->block_count) - EXT_INIT_MAX_LEN));
82 }
83
84 static void ext4_ext_mark_initialized(struct ext4_extent *ext)
85 {
86         ext->block_count = to_le16(ext4_ext_get_actual_len(ext));
87 }
88
89 static void ext4_ext_mark_unwritten(struct ext4_extent *ext)
90 {
91         ext->block_count |= to_le16(EXT_INIT_MAX_LEN);
92 }
93
94 static int ext4_ext_is_unwritten(struct ext4_extent *ext)
95 {
96         /* Extent with ee_len of 0x8000 is treated as an initialized extent */
97         return (to_le16(ext->block_count) > EXT_INIT_MAX_LEN);
98 }
99
100 /*
101  * ext4_ext_pblock:
102  * combine low and high parts of physical block number into ext4_fsblk_t
103  */
104 static ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
105 {
106         ext4_fsblk_t block;
107
108         block = to_le32(ex->start_lo);
109         block |= ((ext4_fsblk_t)to_le16(ex->start_hi) << 31) << 1;
110         return block;
111 }
112
113 /*
114  * ext4_idx_pblock:
115  * combine low and high parts of a leaf physical block number into ext4_fsblk_t
116  */
117 static ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_index *ix)
118 {
119         ext4_fsblk_t block;
120
121         block = to_le32(ix->leaf_lo);
122         block |= ((ext4_fsblk_t)to_le16(ix->leaf_hi) << 31) << 1;
123         return block;
124 }
125
126 /*
127  * ext4_ext_store_pblock:
128  * stores a large physical block number into an extent struct,
129  * breaking it into parts
130  */
131 static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
132 {
133         ex->start_lo = to_le32((uint32_t)(pb & 0xffffffff));
134         ex->start_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
135 }
136
137 /*
138  * ext4_idx_store_pblock:
139  * stores a large physical block number into an index struct,
140  * breaking it into parts
141  */
142 static void ext4_idx_store_pblock(struct ext4_extent_index *ix, ext4_fsblk_t pb)
143 {
144         ix->leaf_lo = to_le32((uint32_t)(pb & 0xffffffff));
145         ix->leaf_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
146 }
147
148 static int ext4_allocate_single_block(struct ext4_inode_ref *inode_ref,
149                                       ext4_fsblk_t goal,
150                                       ext4_fsblk_t *blockp)
151 {
152         return ext4_balloc_alloc_block(inode_ref, goal, blockp);
153 }
154
155 static ext4_fsblk_t ext4_new_meta_blocks(struct ext4_inode_ref *inode_ref,
156                                          ext4_fsblk_t goal,
157                                          uint32_t flags __unused,
158                                          uint32_t *count, int *errp)
159 {
160         ext4_fsblk_t block = 0;
161
162         *errp = ext4_allocate_single_block(inode_ref, goal, &block);
163         if (count)
164                 *count = 1;
165         return block;
166 }
167
168 static void ext4_ext_free_blocks(struct ext4_inode_ref *inode_ref,
169                                  ext4_fsblk_t block, uint32_t count,
170                                  uint32_t flags __unused)
171 {
172         ext4_balloc_free_blocks(inode_ref, block, count);
173 }
174
175 static size_t ext4_ext_space_block(struct ext4_inode_ref *inode_ref)
176 {
177         size_t size;
178         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
179
180         size = (block_size - sizeof(struct ext4_extent_header)) /
181                sizeof(struct ext4_extent);
182 #ifdef AGGRESSIVE_TEST
183         if (size > 6)
184                 size = 6;
185 #endif
186         return size;
187 }
188
189 static size_t ext4_ext_space_block_idx(struct ext4_inode_ref *inode_ref)
190 {
191         size_t size;
192         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
193
194         size = (block_size - sizeof(struct ext4_extent_header)) /
195                sizeof(struct ext4_extent_index);
196 #ifdef AGGRESSIVE_TEST
197         if (size > 5)
198                 size = 5;
199 #endif
200         return size;
201 }
202
203 static size_t ext4_ext_space_root(struct ext4_inode_ref *inode_ref)
204 {
205         size_t size;
206
207         size = sizeof(inode_ref->inode->blocks);
208         size -= sizeof(struct ext4_extent_header);
209         size /= sizeof(struct ext4_extent);
210 #ifdef AGGRESSIVE_TEST
211         if (size > 3)
212                 size = 3;
213 #endif
214         return size;
215 }
216
217 static size_t ext4_ext_space_root_idx(struct ext4_inode_ref *inode_ref)
218 {
219         size_t size;
220
221         size = sizeof(inode_ref->inode->blocks);
222         size -= sizeof(struct ext4_extent_header);
223         size /= sizeof(struct ext4_extent_index);
224 #ifdef AGGRESSIVE_TEST
225         if (size > 4)
226                 size = 4;
227 #endif
228         return size;
229 }
230
231 static size_t ext4_ext_max_entries(struct ext4_inode_ref *inode_ref,
232                                    uint32_t depth)
233 {
234         size_t max;
235
236         if (depth == ext_depth(inode_ref->inode)) {
237                 if (depth == 0)
238                         max = ext4_ext_space_root(inode_ref);
239                 else
240                         max = ext4_ext_space_root_idx(inode_ref);
241         } else {
242                 if (depth == 0)
243                         max = ext4_ext_space_block(inode_ref);
244                 else
245                         max = ext4_ext_space_block_idx(inode_ref);
246         }
247
248         return max;
249 }
250
251 static ext4_fsblk_t ext4_ext_find_goal(struct ext4_inode_ref *inode_ref,
252                                        struct ext4_extent_path *path,
253                                        ext4_lblk_t block)
254 {
255         if (path) {
256                 uint32_t depth = path->depth;
257                 struct ext4_extent *ex;
258
259                 /*
260                  * Try to predict block placement assuming that we are
261                  * filling in a file which will eventually be
262                  * non-sparse --- i.e., in the case of libbfd writing
263                  * an ELF object sections out-of-order but in a way
264                  * the eventually results in a contiguous object or
265                  * executable file, or some database extending a table
266                  * space file.  However, this is actually somewhat
267                  * non-ideal if we are writing a sparse file such as
268                  * qemu or KVM writing a raw image file that is going
269                  * to stay fairly sparse, since it will end up
270                  * fragmenting the file system's free space.  Maybe we
271                  * should have some hueristics or some way to allow
272                  * userspace to pass a hint to file system,
273                  * especially if the latter case turns out to be
274                  * common.
275                  */
276                 ex = path[depth].extent;
277                 if (ex) {
278                         ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
279                         ext4_lblk_t ext_block = to_le32(ex->first_block);
280
281                         if (block > ext_block)
282                                 return ext_pblk + (block - ext_block);
283                         else
284                                 return ext_pblk - (ext_block - block);
285                 }
286
287                 /* it looks like index is empty;
288                  * try to find starting block from index itself */
289                 if (path[depth].block.lb_id)
290                         return path[depth].block.lb_id;
291         }
292
293         /* OK. use inode's group */
294         return ext4_fs_inode_to_goal_block(inode_ref);
295 }
296
297 /*
298  * Allocation for a meta data block
299  */
300 static ext4_fsblk_t ext4_ext_new_meta_block(struct ext4_inode_ref *inode_ref,
301                                             struct ext4_extent_path *path,
302                                             struct ext4_extent *ex, int *err,
303                                             uint32_t flags)
304 {
305         ext4_fsblk_t goal, newblock;
306
307         goal = ext4_ext_find_goal(inode_ref, path, to_le32(ex->first_block));
308         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, err);
309         return newblock;
310 }
311
312 #if CONFIG_META_CSUM_ENABLE
313 static uint32_t ext4_ext_block_csum(struct ext4_inode_ref *inode_ref,
314                                     struct ext4_extent_header *eh)
315 {
316         uint32_t checksum = 0;
317         struct ext4_sblock *sb = &inode_ref->fs->sb;
318
319         if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
320                 uint32_t ino_index = to_le32(inode_ref->index);
321                 uint32_t ino_gen =
322                         to_le32(ext4_inode_get_generation(inode_ref->inode));
323                 /* First calculate crc32 checksum against fs uuid */
324                 checksum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid,
325                                 sizeof(sb->uuid));
326                 /* Then calculate crc32 checksum against inode number
327                  * and inode generation */
328                 checksum = ext4_crc32c(checksum, &ino_index,
329                                      sizeof(ino_index));
330                 checksum = ext4_crc32c(checksum, &ino_gen,
331                                      sizeof(ino_gen));
332                 /* Finally calculate crc32 checksum against 
333                  * the entire extent block up to the checksum field */
334                 checksum = ext4_crc32c(checksum, eh,
335                                 EXT4_EXTENT_TAIL_OFFSET(eh));
336         }
337         return checksum;
338 }
339 #else
340 #define ext4_ext_block_csum(...) 0
341 #endif
342
343 static void ext4_extent_block_csum_set(struct ext4_inode_ref *inode_ref __unused,
344                                        struct ext4_extent_header *eh)
345 {
346         struct ext4_extent_tail *tail;
347
348         tail = find_ext4_extent_tail(eh);
349         tail->et_checksum = to_le32(ext4_ext_block_csum(inode_ref, eh));
350 }
351
352 static int ext4_ext_dirty(struct ext4_inode_ref *inode_ref,
353                           struct ext4_extent_path *path)
354 {
355         if (path->block.lb_id)
356                 path->block.dirty = true;
357         else
358                 inode_ref->dirty = true;
359
360         return EOK;
361 }
362
363 static void ext4_ext_drop_refs(struct ext4_inode_ref *inode_ref,
364                                struct ext4_extent_path *path, bool keep_other)
365 {
366         int32_t depth, i;
367
368         if (!path)
369                 return;
370         if (keep_other)
371                 depth = 0;
372         else
373                 depth = path->depth;
374
375         for (i = 0; i <= depth; i++, path++) {
376                 if (path->block.lb_id) {
377                         if (path->block.dirty)
378                                 ext4_extent_block_csum_set(inode_ref,
379                                                 path->header);
380
381                         ext4_block_set(inode_ref->fs->bdev, &path->block);
382                 }
383         }
384 }
385
386 /*
387  * Check that whether the basic information inside the extent header
388  * is correct or not.
389  */
390 static int ext4_ext_check(struct ext4_inode_ref *inode_ref,
391                           struct ext4_extent_header *eh, uint16_t depth,
392                           ext4_fsblk_t pblk __unused)
393 {
394         struct ext4_extent_tail *tail;
395         struct ext4_sblock *sb = &inode_ref->fs->sb;
396         const char *error_msg;
397         (void)error_msg;
398
399         if (to_le16(eh->magic) != EXT4_EXTENT_MAGIC) {
400                 error_msg = "invalid magic";
401                 goto corrupted;
402         }
403         if (to_le16(eh->depth) != depth) {
404                 error_msg = "unexpected eh_depth";
405                 goto corrupted;
406         }
407         if (eh->max_entries_count == 0) {
408                 error_msg = "invalid eh_max";
409                 goto corrupted;
410         }
411         if (to_le16(eh->entries_count) > to_le16(eh->max_entries_count)) {
412                 error_msg = "invalid eh_entries";
413                 goto corrupted;
414         }
415
416         tail = find_ext4_extent_tail(eh);
417         if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
418                 if (tail->et_checksum != to_le32(ext4_ext_block_csum(inode_ref, eh))) {
419                         ext4_dbg(DEBUG_EXTENT,
420                                  DBG_WARN "Extent block checksum failed."
421                                  "Blocknr: %" PRIu64"\n",
422                                  pblk);
423
424                 }
425         }
426
427         return EOK;
428
429 corrupted:
430         ext4_dbg(DEBUG_EXTENT, "Bad extents B+ tree block: %s. "
431                                "Blocknr: %" PRId64 "\n",
432                  error_msg, pblk);
433         return EIO;
434 }
435
436 static int read_extent_tree_block(struct ext4_inode_ref *inode_ref,
437                                   ext4_fsblk_t pblk, int32_t depth,
438                                   struct ext4_block *bh,
439                                   uint32_t flags __unused)
440 {
441         int err;
442
443         err = ext4_block_get(inode_ref->fs->bdev, bh, pblk);
444         if (err != EOK)
445                 goto errout;
446
447         err = ext4_ext_check(inode_ref, ext_block_hdr(bh), depth, pblk);
448         if (err != EOK)
449                 goto errout;
450
451         return EOK;
452 errout:
453         if (bh->lb_id)
454                 ext4_block_set(inode_ref->fs->bdev, bh);
455
456         return err;
457 }
458
459 /*
460  * ext4_ext_binsearch_idx:
461  * binary search for the closest index of the given block
462  * the header must be checked before calling this
463  */
464 static void ext4_ext_binsearch_idx(struct ext4_extent_path *path,
465                                    ext4_lblk_t block)
466 {
467         struct ext4_extent_header *eh = path->header;
468         struct ext4_extent_index *r, *l, *m;
469
470         l = EXT_FIRST_INDEX(eh) + 1;
471         r = EXT_LAST_INDEX(eh);
472         while (l <= r) {
473                 m = l + (r - l) / 2;
474                 if (block < to_le32(m->first_block))
475                         r = m - 1;
476                 else
477                         l = m + 1;
478         }
479
480         path->index = l - 1;
481 }
482
483 /*
484  * ext4_ext_binsearch:
485  * binary search for closest extent of the given block
486  * the header must be checked before calling this
487  */
488 static void ext4_ext_binsearch(struct ext4_extent_path *path, ext4_lblk_t block)
489 {
490         struct ext4_extent_header *eh = path->header;
491         struct ext4_extent *r, *l, *m;
492
493         if (eh->entries_count == 0) {
494                 /*
495                  * this leaf is empty:
496                  * we get such a leaf in split/add case
497                  */
498                 return;
499         }
500
501         l = EXT_FIRST_EXTENT(eh) + 1;
502         r = EXT_LAST_EXTENT(eh);
503
504         while (l <= r) {
505                 m = l + (r - l) / 2;
506                 if (block < to_le32(m->first_block))
507                         r = m - 1;
508                 else
509                         l = m + 1;
510         }
511
512         path->extent = l - 1;
513 }
514
515 static int ext4_find_extent(struct ext4_inode_ref *inode_ref, ext4_lblk_t block,
516                             struct ext4_extent_path **orig_path, uint32_t flags)
517 {
518         struct ext4_extent_header *eh;
519         struct ext4_block bh = EXT4_BLOCK_ZERO();
520         ext4_fsblk_t buf_block = 0;
521         struct ext4_extent_path *path = *orig_path;
522         int32_t depth, ppos = 0;
523         int32_t i;
524         int ret;
525
526         eh = ext_inode_hdr(inode_ref->inode);
527         depth = ext_depth(inode_ref->inode);
528
529         if (path) {
530                 ext4_ext_drop_refs(inode_ref, path, 0);
531                 if (depth > path[0].maxdepth) {
532                         free(path);
533                         *orig_path = path = NULL;
534                 }
535         }
536         if (!path) {
537                 int32_t path_depth = depth + 1;
538                 /* account possible depth increase */
539                 path = calloc(1, sizeof(struct ext4_extent_path) *
540                                      (path_depth + 1));
541                 if (!path)
542                         return ENOMEM;
543                 path[0].maxdepth = path_depth;
544         }
545         path[0].header = eh;
546         path[0].block = bh;
547
548         i = depth;
549         /* walk through the tree */
550         while (i) {
551                 ext4_ext_binsearch_idx(path + ppos, block);
552                 path[ppos].p_block = ext4_idx_pblock(path[ppos].index);
553                 path[ppos].depth = i;
554                 path[ppos].extent = NULL;
555                 buf_block = path[ppos].p_block;
556
557                 i--;
558                 ppos++;
559                 if (!path[ppos].block.lb_id ||
560                     path[ppos].block.lb_id != buf_block) {
561                         ret = read_extent_tree_block(inode_ref, buf_block, i,
562                                                      &bh, flags);
563                         if (ret != EOK) {
564                                 goto err;
565                         }
566                         if (ppos > depth) {
567                                 ext4_block_set(inode_ref->fs->bdev, &bh);
568                                 ret = EIO;
569                                 goto err;
570                         }
571
572                         eh = ext_block_hdr(&bh);
573                         path[ppos].block = bh;
574                         path[ppos].header = eh;
575                 }
576         }
577
578         path[ppos].depth = i;
579         path[ppos].extent = NULL;
580         path[ppos].index = NULL;
581
582         /* find extent */
583         ext4_ext_binsearch(path + ppos, block);
584         /* if not an empty leaf */
585         if (path[ppos].extent)
586                 path[ppos].p_block = ext4_ext_pblock(path[ppos].extent);
587
588         *orig_path = path;
589
590         ret = EOK;
591         return ret;
592
593 err:
594         ext4_ext_drop_refs(inode_ref, path, 0);
595         free(path);
596         if (orig_path)
597                 *orig_path = NULL;
598         return ret;
599 }
600
601 static void ext4_ext_init_header(struct ext4_inode_ref *inode_ref,
602                                  struct ext4_extent_header *eh, int32_t depth)
603 {
604         eh->entries_count = 0;
605         eh->max_entries_count = to_le16(ext4_ext_max_entries(inode_ref, depth));
606         eh->magic = to_le16(EXT4_EXTENT_MAGIC);
607         eh->depth = depth;
608 }
609
610 /*
611  * Be cautious, the buffer_head returned is not yet mark dirtied. */
612 static int ext4_ext_split_node(struct ext4_inode_ref *inode_ref,
613                                struct ext4_extent_path *path, int32_t at,
614                                struct ext4_extent *newext,
615                                ext4_fsblk_t *sibling, struct ext4_block *new_bh)
616 {
617         int ret;
618         ext4_fsblk_t newblock;
619         struct ext4_block bh = EXT4_BLOCK_ZERO();
620         int32_t depth = ext_depth(inode_ref->inode);
621
622         ext4_assert(sibling);
623
624         /* FIXME: currently we split at the point after the current extent. */
625         newblock = ext4_ext_new_meta_block(inode_ref, path, newext, &ret, 0);
626         if (ret)
627                 goto cleanup;
628
629         /*  For write access.# */
630         ret = ext4_block_get_noread(inode_ref->fs->bdev, &bh, newblock);
631         if (ret != EOK)
632                 goto cleanup;
633
634         if (at == depth) {
635                 /* start copy from next extent */
636                 ptrdiff_t m = EXT_MAX_EXTENT(path[at].header) - path[at].extent;
637                 struct ext4_extent_header *neh;
638                 neh = ext_block_hdr(&bh);
639                 ext4_ext_init_header(inode_ref, neh, 0);
640                 if (m) {
641                         struct ext4_extent *ex;
642                         ex = EXT_FIRST_EXTENT(neh);
643                         memmove(ex, path[at].extent + 1,
644                                 sizeof(struct ext4_extent) * m);
645                         neh->entries_count =
646                             to_le16(to_le16(neh->entries_count) + m);
647                         path[at].header->entries_count = to_le16(
648                             to_le16(path[at].header->entries_count) - m);
649                         ret = ext4_ext_dirty(inode_ref, path + at);
650                         if (ret)
651                                 goto cleanup;
652                 }
653         } else {
654                 ptrdiff_t m = EXT_MAX_INDEX(path[at].header) - path[at].index;
655                 struct ext4_extent_header *neh;
656                 neh = ext_block_hdr(&bh);
657                 ext4_ext_init_header(inode_ref, neh, depth - at);
658                 if (m) {
659                         struct ext4_extent_index *ix;
660                         ix = EXT_FIRST_INDEX(neh);
661                         memmove(ix, path[at].index + 1,
662                                 sizeof(struct ext4_extent) * m);
663                         neh->entries_count =
664                             to_le16(to_le16(neh->entries_count) + m);
665                         path[at].header->entries_count = to_le16(
666                             to_le16(path[at].header->entries_count) - m);
667                         ret = ext4_ext_dirty(inode_ref, path + at);
668                         if (ret)
669                                 goto cleanup;
670                 }
671         }
672 cleanup:
673         if (ret) {
674                 if (bh.lb_id) {
675                         ext4_block_set(inode_ref->fs->bdev, &bh);
676                 }
677                 if (newblock)
678                         ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
679
680                 newblock = 0;
681         }
682         *sibling = newblock;
683         *new_bh = bh;
684         return ret;
685 }
686
687 static ext4_lblk_t ext4_ext_block_index(struct ext4_extent_header *eh)
688 {
689         if (eh->depth)
690                 return to_le32(EXT_FIRST_INDEX(eh)->first_block);
691
692         return to_le32(EXT_FIRST_EXTENT(eh)->first_block);
693 }
694
695 struct ext_split_trans {
696         ext4_fsblk_t ptr;
697         struct ext4_extent_path path;
698         int switch_to;
699 };
700
701 static int ext4_ext_insert_index(struct ext4_inode_ref *inode_ref,
702                                  struct ext4_extent_path *path,
703                                  int32_t at,
704                                  struct ext4_extent *newext,
705                                  ext4_lblk_t insert_index,
706                                  ext4_fsblk_t insert_block,
707                                  struct ext_split_trans *spt,
708                                  bool *need_grow)
709 {
710         struct ext4_extent_index *ix;
711         struct ext4_extent_path *curp = path + at;
712         struct ext4_block bh = EXT4_BLOCK_ZERO();
713         int32_t len;
714         int err;
715         struct ext4_extent_header *eh;
716
717         *need_grow = false;
718
719         if (curp->index && insert_index == to_le32(curp->index->first_block))
720                 return EIO;
721
722         if (to_le16(curp->header->entries_count) ==
723             to_le16(curp->header->max_entries_count)) {
724                 if (at) {
725                         struct ext4_extent_header *neh;
726                         err = ext4_ext_split_node(inode_ref, path, at, newext,
727                                                   &spt->ptr, &bh);
728                         if (err != EOK)
729                                 goto out;
730
731                         neh = ext_block_hdr(&bh);
732                         if (insert_index > to_le32(curp->index->first_block)) {
733                                 /* Make decision which node should be used to
734                                  * insert the index.*/
735                                 if (to_le16(neh->entries_count) >
736                                     to_le16(curp->header->entries_count)) {
737                                         eh = curp->header;
738                                         /* insert after */
739                                         ix = EXT_LAST_INDEX(eh) + 1;
740                                 } else {
741                                         eh = neh;
742                                         ix = EXT_FIRST_INDEX(eh);
743                                 }
744                         } else {
745                                 eh = curp->header;
746                                 /* insert before */
747                                 ix = EXT_LAST_INDEX(eh);
748                         }
749                 } else {
750                         err = EOK;
751                         *need_grow = true;
752                         goto out;
753                 }
754         } else {
755                 eh = curp->header;
756                 if (curp->index == NULL) {
757                         ix = EXT_FIRST_INDEX(eh);
758                         curp->index = ix;
759                 } else if (insert_index > to_le32(curp->index->first_block)) {
760                         /* insert after */
761                         ix = curp->index + 1;
762                 } else {
763                         /* insert before */
764                         ix = curp->index;
765                 }
766         }
767
768         len = EXT_LAST_INDEX(eh) - ix + 1;
769         ext4_assert(len >= 0);
770         if (len > 0)
771                 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_index));
772
773         if (ix > EXT_MAX_INDEX(eh)) {
774                 err = EIO;
775                 goto out;
776         }
777
778         ix->first_block = to_le32(insert_index);
779         ext4_idx_store_pblock(ix, insert_block);
780         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
781
782         if (ix > EXT_LAST_INDEX(eh)) {
783                 err = EIO;
784                 goto out;
785         }
786
787         if (eh == curp->header)
788                 err = ext4_ext_dirty(inode_ref, curp);
789         else
790                 err = EOK;
791
792 out:
793         if (err != EOK || *need_grow) {
794                 if (bh.lb_id)
795                         ext4_block_set(inode_ref->fs->bdev, &bh);
796
797                 spt->ptr = 0;
798         } else if (bh.lb_id) {
799                 /* If we got a sibling leaf. */
800                 ext4_extent_block_csum_set(inode_ref, ext_block_hdr(&bh));
801                 bh.dirty = true;
802
803                 spt->path.p_block = ext4_idx_pblock(ix);
804                 spt->path.depth = to_le16(eh->depth);
805                 spt->path.maxdepth = 0;
806                 spt->path.extent = NULL;
807                 spt->path.index = ix;
808                 spt->path.header = eh;
809                 spt->path.block = bh;
810
811                 /*
812                  * If newext->ee_block can be included into the
813                  * right sub-tree.
814                  */
815                 if (to_le32(newext->first_block) >=
816                     ext4_ext_block_index(ext_block_hdr(&bh)))
817                         spt->switch_to = 1;
818                 else {
819                         curp->index = ix;
820                         curp->p_block = ext4_idx_pblock(ix);
821                 }
822
823         } else {
824                 spt->ptr = 0;
825                 curp->index = ix;
826                 curp->p_block = ext4_idx_pblock(ix);
827         }
828         return err;
829 }
830
831 /*
832  * ext4_ext_correct_indexes:
833  * if leaf gets modified and modified extent is first in the leaf,
834  * then we have to correct all indexes above.
835  */
836 static int ext4_ext_correct_indexes(struct ext4_inode_ref *inode_ref,
837                                     struct ext4_extent_path *path)
838 {
839         struct ext4_extent_header *eh;
840         int32_t depth = ext_depth(inode_ref->inode);
841         struct ext4_extent *ex;
842         uint32_t border;
843         int32_t k;
844         int err = EOK;
845
846         eh = path[depth].header;
847         ex = path[depth].extent;
848
849         if (ex == NULL || eh == NULL) {
850                 return EIO;
851         }
852
853         if (depth == 0) {
854                 /* there is no tree at all */
855                 return EOK;
856         }
857
858         if (ex != EXT_FIRST_EXTENT(eh)) {
859                 /* we correct tree if first leaf got modified only */
860                 return EOK;
861         }
862
863         /*
864          * TODO: we need correction if border is smaller than current one
865          */
866         k = depth - 1;
867         border = path[depth].extent->first_block;
868         path[k].index->first_block = border;
869         err = ext4_ext_dirty(inode_ref, path + k);
870         if (err != EOK)
871                 return err;
872
873         while (k--) {
874                 /* change all left-side indexes */
875                 if (path[k + 1].index != EXT_FIRST_INDEX(path[k + 1].header))
876                         break;
877                 path[k].index->first_block = border;
878                 err = ext4_ext_dirty(inode_ref, path + k);
879                 if (err != EOK)
880                         break;
881         }
882
883         return err;
884 }
885
886 static bool ext4_ext_can_prepend(struct ext4_extent *ex1,
887                                  struct ext4_extent *ex2)
888 {
889         if (ext4_ext_pblock(ex2) + ext4_ext_get_actual_len(ex2) !=
890             ext4_ext_pblock(ex1))
891                 return false;
892
893 #ifdef AGGRESSIVE_TEST
894         if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
895                 return 0;
896 #else
897         if (ext4_ext_is_unwritten(ex1)) {
898                 if (ext4_ext_get_actual_len(ex1) +
899                         ext4_ext_get_actual_len(ex2) >
900                     EXT_UNWRITTEN_MAX_LEN)
901                         return false;
902         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
903                    EXT_INIT_MAX_LEN)
904                 return false;
905 #endif
906
907         if (to_le32(ex2->first_block) + ext4_ext_get_actual_len(ex2) !=
908             to_le32(ex1->first_block))
909                 return false;
910
911         return true;
912 }
913
914 static bool ext4_ext_can_append(struct ext4_extent *ex1,
915                                 struct ext4_extent *ex2)
916 {
917         if (ext4_ext_pblock(ex1) + ext4_ext_get_actual_len(ex1) !=
918             ext4_ext_pblock(ex2))
919                 return false;
920
921 #ifdef AGGRESSIVE_TEST
922         if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
923                 return 0;
924 #else
925         if (ext4_ext_is_unwritten(ex1)) {
926                 if (ext4_ext_get_actual_len(ex1) +
927                         ext4_ext_get_actual_len(ex2) >
928                     EXT_UNWRITTEN_MAX_LEN)
929                         return false;
930         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
931                    EXT_INIT_MAX_LEN)
932                 return false;
933 #endif
934
935         if (to_le32(ex1->first_block) + ext4_ext_get_actual_len(ex1) !=
936             to_le32(ex2->first_block))
937                 return false;
938
939         return true;
940 }
941
942 static int ext4_ext_insert_leaf(struct ext4_inode_ref *inode_ref,
943                                 struct ext4_extent_path *path,
944                                 int32_t at,
945                                 struct ext4_extent *newext,
946                                 struct ext_split_trans *spt,
947                                 uint32_t flags,
948                                 bool *need_grow)
949 {
950         struct ext4_extent_path *curp = path + at;
951         struct ext4_extent *ex = curp->extent;
952         struct ext4_block bh = EXT4_BLOCK_ZERO();
953         int32_t len;
954         int err = EOK;
955         int unwritten;
956         struct ext4_extent_header *eh = NULL;
957
958         *need_grow = false;
959
960         if (curp->extent &&
961             to_le32(newext->first_block) == to_le32(curp->extent->first_block))
962                 return EIO;
963
964         if (!(flags & EXT4_EXT_NO_COMBINE)) {
965                 if (curp->extent && ext4_ext_can_append(curp->extent, newext)) {
966                         unwritten = ext4_ext_is_unwritten(curp->extent);
967                         curp->extent->block_count =
968                             to_le16(ext4_ext_get_actual_len(curp->extent) +
969                                     ext4_ext_get_actual_len(newext));
970                         if (unwritten)
971                                 ext4_ext_mark_unwritten(curp->extent);
972                         err = ext4_ext_dirty(inode_ref, curp);
973                         goto out;
974                 }
975
976                 if (curp->extent &&
977                     ext4_ext_can_prepend(curp->extent, newext)) {
978                         unwritten = ext4_ext_is_unwritten(curp->extent);
979                         curp->extent->first_block = newext->first_block;
980                         curp->extent->block_count =
981                             to_le16(ext4_ext_get_actual_len(curp->extent) +
982                                     ext4_ext_get_actual_len(newext));
983                         if (unwritten)
984                                 ext4_ext_mark_unwritten(curp->extent);
985                         err = ext4_ext_dirty(inode_ref, curp);
986                         goto out;
987                 }
988         }
989
990         if (to_le16(curp->header->entries_count) ==
991             to_le16(curp->header->max_entries_count)) {
992                 if (at) {
993                         struct ext4_extent_header *neh;
994                         err = ext4_ext_split_node(inode_ref, path, at, newext,
995                                                   &spt->ptr, &bh);
996                         if (err != EOK)
997                                 goto out;
998
999                         neh = ext_block_hdr(&bh);
1000                         if (to_le32(newext->first_block) >
1001                             to_le32(curp->extent->first_block)) {
1002                                 if (to_le16(neh->entries_count) >
1003                                     to_le16(curp->header->entries_count)) {
1004                                         eh = curp->header;
1005                                         /* insert after */
1006                                         ex = EXT_LAST_EXTENT(eh) + 1;
1007                                 } else {
1008                                         eh = neh;
1009                                         ex = EXT_FIRST_EXTENT(eh);
1010                                 }
1011                         } else {
1012                                 eh = curp->header;
1013                                 /* insert before */
1014                                 ex = EXT_LAST_EXTENT(eh);
1015                         }
1016                 } else {
1017                         err = EOK;
1018                         *need_grow = true;
1019                         goto out;
1020                 }
1021         } else {
1022                 eh = curp->header;
1023                 if (curp->extent == NULL) {
1024                         ex = EXT_FIRST_EXTENT(eh);
1025                         curp->extent = ex;
1026                 } else if (to_le32(newext->first_block) >
1027                            to_le32(curp->extent->first_block)) {
1028                         /* insert after */
1029                         ex = curp->extent + 1;
1030                 } else {
1031                         /* insert before */
1032                         ex = curp->extent;
1033                 }
1034         }
1035
1036         len = EXT_LAST_EXTENT(eh) - ex + 1;
1037         ext4_assert(len >= 0);
1038         if (len > 0)
1039                 memmove(ex + 1, ex, len * sizeof(struct ext4_extent));
1040
1041         if (ex > EXT_MAX_EXTENT(eh)) {
1042                 err = EIO;
1043                 goto out;
1044         }
1045
1046         ex->first_block = newext->first_block;
1047         ex->block_count = newext->block_count;
1048         ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
1049         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
1050
1051         if (ex > EXT_LAST_EXTENT(eh)) {
1052                 err = EIO;
1053                 goto out;
1054         }
1055
1056         if (eh == curp->header) {
1057                 err = ext4_ext_correct_indexes(inode_ref, path);
1058                 if (err != EOK)
1059                         goto out;
1060                 err = ext4_ext_dirty(inode_ref, curp);
1061         } else
1062                 err = EOK;
1063
1064 out:
1065         if (err != EOK || *need_grow) {
1066                 if (bh.lb_id)
1067                         ext4_block_set(inode_ref->fs->bdev, &bh);
1068
1069                 spt->ptr = 0;
1070         } else if (bh.lb_id) {
1071                 /* If we got a sibling leaf. */
1072                 ext4_extent_block_csum_set(inode_ref, ext_block_hdr(&bh));
1073                 bh.dirty = true;
1074
1075                 spt->path.p_block = ext4_ext_pblock(ex);
1076                 spt->path.depth = to_le16(eh->depth);
1077                 spt->path.maxdepth = 0;
1078                 spt->path.extent = ex;
1079                 spt->path.index = NULL;
1080                 spt->path.header = eh;
1081                 spt->path.block = bh;
1082
1083                 /*
1084                  * If newext->ee_block can be included into the
1085                  * right sub-tree.
1086                  */
1087                 if (to_le32(newext->first_block) >=
1088                     ext4_ext_block_index(ext_block_hdr(&bh)))
1089                         spt->switch_to = 1;
1090                 else {
1091                         curp->extent = ex;
1092                         curp->p_block = ext4_ext_pblock(ex);
1093                 }
1094
1095         } else {
1096                 spt->ptr = 0;
1097                 curp->extent = ex;
1098                 curp->p_block = ext4_ext_pblock(ex);
1099         }
1100
1101         return err;
1102 }
1103
1104 /*
1105  * ext4_ext_grow_indepth:
1106  * implements tree growing procedure:
1107  * - allocates new block
1108  * - moves top-level data (index block or leaf) into the new block
1109  * - initializes new top-level, creating index that points to the
1110  *   just created block
1111  */
1112 static int ext4_ext_grow_indepth(struct ext4_inode_ref *inode_ref,
1113                                  uint32_t flags)
1114 {
1115         struct ext4_extent_header *neh;
1116         struct ext4_block bh = EXT4_BLOCK_ZERO();
1117         ext4_fsblk_t newblock, goal = 0;
1118         int err = EOK;
1119
1120         /* Try to prepend new index to old one */
1121         if (ext_depth(inode_ref->inode))
1122                 goal = ext4_idx_pblock(
1123                     EXT_FIRST_INDEX(ext_inode_hdr(inode_ref->inode)));
1124         else
1125                 goal = ext4_fs_inode_to_goal_block(inode_ref);
1126
1127         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, &err);
1128         if (newblock == 0)
1129                 return err;
1130
1131         /* # */
1132         err = ext4_block_get_noread(inode_ref->fs->bdev, &bh, newblock);
1133         if (err != EOK) {
1134                 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
1135                 return err;
1136         }
1137
1138         /* move top-level index/leaf into new block */
1139         memmove(bh.data, inode_ref->inode->blocks,
1140                 sizeof(inode_ref->inode->blocks));
1141
1142         /* set size of new block */
1143         neh = ext_block_hdr(&bh);
1144         /* old root could have indexes or leaves
1145          * so calculate e_max right way */
1146         if (ext_depth(inode_ref->inode))
1147                 neh->max_entries_count =
1148                     to_le16(ext4_ext_space_block_idx(inode_ref));
1149         else
1150                 neh->max_entries_count =
1151                     to_le16(ext4_ext_space_block(inode_ref));
1152
1153         neh->magic = to_le16(EXT4_EXTENT_MAGIC);
1154         ext4_extent_block_csum_set(inode_ref, neh);
1155
1156         /* Update top-level index: num,max,pointer */
1157         neh = ext_inode_hdr(inode_ref->inode);
1158         neh->entries_count = to_le16(1);
1159         ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1160         if (neh->depth == 0) {
1161                 /* Root extent block becomes index block */
1162                 neh->max_entries_count =
1163                     to_le16(ext4_ext_space_root_idx(inode_ref));
1164                 EXT_FIRST_INDEX(neh)
1165                     ->first_block = EXT_FIRST_EXTENT(neh)->first_block;
1166         }
1167         neh->depth = to_le16(to_le16(neh->depth) + 1);
1168
1169         bh.dirty = true;
1170         inode_ref->dirty = true;
1171         ext4_block_set(inode_ref->fs->bdev, &bh);
1172
1173         return err;
1174 }
1175
1176 __unused static void print_path(struct ext4_extent_path *path)
1177 {
1178         int32_t i = path->depth;
1179         while (i >= 0) {
1180
1181                 ptrdiff_t a =
1182                     (path->extent)
1183                         ? (path->extent - EXT_FIRST_EXTENT(path->header))
1184                         : 0;
1185                 ptrdiff_t b =
1186                     (path->index)
1187                         ? (path->index - EXT_FIRST_INDEX(path->header))
1188                         : 0;
1189
1190                 (void)a;
1191                 (void)b;
1192                 ext4_dbg(DEBUG_EXTENT,
1193                          "depth %" PRId32 ", p_block: %" PRIu64 ","
1194                          "p_ext offset: %td, p_idx offset: %td\n",
1195                          i, path->p_block, a, b);
1196                 i--;
1197                 path++;
1198         }
1199 }
1200
1201 static void ext4_ext_replace_path(struct ext4_inode_ref *inode_ref,
1202                                   struct ext4_extent_path *path,
1203                                   struct ext_split_trans *spt,
1204                                   int32_t level)
1205 {
1206         int32_t depth = ext_depth(inode_ref->inode);
1207         int32_t i = depth - level;
1208         ext4_ext_drop_refs(inode_ref, path + i, 1);
1209         path[i] = spt[level].path;
1210 }
1211
1212 static int ext4_ext_insert_extent(struct ext4_inode_ref *inode_ref,
1213                                   struct ext4_extent_path **ppath,
1214                                   struct ext4_extent *newext, uint32_t flags)
1215 {
1216         int32_t i, depth, level;
1217         int ret = EOK;
1218         ext4_fsblk_t ptr = 0;
1219         bool need_grow = false;
1220         struct ext4_extent_path *path = *ppath;
1221         struct ext_split_trans *spt = NULL;
1222         struct ext_split_trans newblock;
1223
1224         memset(&newblock, 0, sizeof(newblock));
1225
1226         depth = ext_depth(inode_ref->inode);
1227         for (i = depth, level = 0; i >= 0; i--, level++)
1228                 if (EXT_HAS_FREE_INDEX(path + i))
1229                         break;
1230
1231         if (level) {
1232                 spt = calloc(1, sizeof(struct ext_split_trans) * (level));
1233                 if (!spt) {
1234                         ret = ENOMEM;
1235                         goto out;
1236                 }
1237         }
1238         i = 0;
1239 again:
1240         depth = ext_depth(inode_ref->inode);
1241
1242         do {
1243                 if (!i) {
1244                         ret = ext4_ext_insert_leaf(inode_ref, path, depth - i,
1245                                                    newext, &newblock, flags,
1246                                                    &need_grow);
1247                 } else {
1248                         ret = ext4_ext_insert_index(
1249                             inode_ref, path, depth - i, newext,
1250                             ext4_ext_block_index(
1251                                 ext_block_hdr(&spt[i - 1].path.block)),
1252                             spt[i - 1].ptr, &newblock,
1253                             &need_grow);
1254                 }
1255                 ptr = newblock.ptr;
1256
1257                 if (ret != EOK)
1258                         goto out;
1259
1260                 else if (spt && ptr && !ret) {
1261                         /* Prepare for the next iteration after splitting. */
1262                         spt[i] = newblock;
1263                 }
1264
1265                 i++;
1266         } while (ptr != 0 && i <= depth);
1267
1268         if (need_grow) {
1269                 ret = ext4_ext_grow_indepth(inode_ref, 0);
1270                 if (ret)
1271                         goto out;
1272                 ret = ext4_find_extent(inode_ref, to_le32(newext->first_block),
1273                                        ppath, 0);
1274                 if (ret)
1275                         goto out;
1276                 i = depth;
1277                 path = *ppath;
1278                 goto again;
1279         }
1280 out:
1281         if (ret) {
1282                 if (path)
1283                         ext4_ext_drop_refs(inode_ref, path, 0);
1284
1285                 while (--level >= 0 && spt) {
1286                         if (spt[level].ptr) {
1287                                 ext4_ext_free_blocks(inode_ref, spt[level].ptr,
1288                                                      1, 0);
1289                                 ext4_ext_drop_refs(inode_ref, &spt[level].path,
1290                                                    1);
1291                         }
1292                 }
1293         } else {
1294                 while (--level >= 0 && spt) {
1295                         if (spt[level].switch_to)
1296                                 ext4_ext_replace_path(inode_ref, path, spt,
1297                                                       level);
1298                         else if (spt[level].ptr)
1299                                 ext4_ext_drop_refs(inode_ref, &spt[level].path,
1300                                                    1);
1301                 }
1302         }
1303         if (spt)
1304                 free(spt);
1305
1306         return ret;
1307 }
1308
1309 static void ext4_ext_remove_blocks(struct ext4_inode_ref *inode_ref,
1310                                    struct ext4_extent *ex, ext4_lblk_t from,
1311                                    ext4_lblk_t to)
1312 {
1313         ext4_lblk_t len = to - from + 1;
1314         ext4_lblk_t num;
1315         ext4_fsblk_t start;
1316         num = from - to_le32(ex->first_block);
1317         start = ext4_ext_pblock(ex) + num;
1318         ext4_dbg(DEBUG_EXTENT,
1319                  "Freeing %" PRIu32 " at %" PRIu64 ", %" PRIu32 "\n", from,
1320                  start, len);
1321
1322         ext4_ext_free_blocks(inode_ref, start, len, 0);
1323 }
1324
1325 static int ext4_ext_remove_idx(struct ext4_inode_ref *inode_ref,
1326                                struct ext4_extent_path *path, int32_t depth)
1327 {
1328         int err = EOK;
1329         int32_t i = depth;
1330         ext4_fsblk_t leaf;
1331
1332         /* free index block */
1333         leaf = ext4_idx_pblock(path[i].index);
1334
1335         if (path[i].index != EXT_LAST_INDEX(path[i].header)) {
1336                 ptrdiff_t len = EXT_LAST_INDEX(path[i].header) - path[i].index;
1337                 memmove(path[i].index, path[i].index + 1,
1338                         len * sizeof(struct ext4_extent_index));
1339         }
1340
1341         path[i].header->entries_count =
1342             to_le16(to_le16(path[i].header->entries_count) - 1);
1343         err = ext4_ext_dirty(inode_ref, path + i);
1344         if (err != EOK)
1345                 return err;
1346
1347         ext4_dbg(DEBUG_EXTENT, "IDX: Freeing %" PRIu32 " at %" PRIu64 ", %d\n",
1348                  to_le32(path[i].index->first_block), leaf, 1);
1349         ext4_ext_free_blocks(inode_ref, leaf, 1, 0);
1350
1351         while (i > 0) {
1352                 if (path[i].index != EXT_FIRST_INDEX(path[i].header))
1353                         break;
1354
1355                 path[i - 1].index->first_block = path[i].index->first_block;
1356                 err = ext4_ext_dirty(inode_ref, path + i - 1);
1357                 if (err != EOK)
1358                         break;
1359
1360                 i--;
1361         }
1362         return err;
1363 }
1364
1365 static int ext4_ext_remove_leaf(struct ext4_inode_ref *inode_ref,
1366                                 struct ext4_extent_path *path, ext4_lblk_t from,
1367                                 ext4_lblk_t to)
1368 {
1369
1370         int32_t depth = ext_depth(inode_ref->inode);
1371         struct ext4_extent *ex = path[depth].extent;
1372         struct ext4_extent *start_ex, *ex2 = NULL;
1373         struct ext4_extent_header *eh = path[depth].header;
1374         int32_t len;
1375         int err = EOK;
1376         uint16_t new_entries;
1377
1378         start_ex = ex;
1379         new_entries = to_le16(eh->entries_count);
1380         while (ex <= EXT_LAST_EXTENT(path[depth].header) &&
1381                to_le32(ex->first_block) <= to) {
1382                 int32_t new_len = 0;
1383                 int unwritten;
1384                 ext4_lblk_t start, new_start;
1385                 ext4_fsblk_t newblock;
1386                 new_start = start = to_le32(ex->first_block);
1387                 len = ext4_ext_get_actual_len(ex);
1388                 newblock = ext4_ext_pblock(ex);
1389                 if (start < from) {
1390                         len -= from - start;
1391                         new_len = from - start;
1392                         start = from;
1393                         start_ex++;
1394                 } else {
1395                         if (start + len - 1 > to) {
1396                                 len -= start + len - 1 - to;
1397                                 new_len = start + len - 1 - to;
1398                                 new_start = to + 1;
1399                                 newblock += to + 1 - start;
1400                                 ex2 = ex;
1401                         }
1402                 }
1403
1404                 ext4_ext_remove_blocks(inode_ref, ex, start, start + len - 1);
1405                 ex->first_block = to_le32(new_start);
1406                 if (!new_len)
1407                         new_entries--;
1408                 else {
1409                         unwritten = ext4_ext_is_unwritten(ex);
1410                         ex->block_count = to_le16(new_len);
1411                         ext4_ext_store_pblock(ex, newblock);
1412                         if (unwritten)
1413                                 ext4_ext_mark_unwritten(ex);
1414                 }
1415
1416                 ex += 1;
1417         }
1418
1419         if (ex2 == NULL)
1420                 ex2 = ex;
1421
1422         if (ex2 <= EXT_LAST_EXTENT(eh))
1423                 memmove(start_ex, ex2, EXT_LAST_EXTENT(eh) - ex2 + 1);
1424
1425         eh->entries_count = to_le16(new_entries);
1426         ext4_ext_dirty(inode_ref, path + depth);
1427         if (path[depth].extent == EXT_FIRST_EXTENT(eh) && eh->entries_count)
1428                 err = ext4_ext_correct_indexes(inode_ref, path);
1429
1430         /* if this leaf is free, then we should
1431          * remove it from index block above */
1432         if (err == EOK && eh->entries_count == 0 && path[depth].block.lb_id)
1433                 err = ext4_ext_remove_idx(inode_ref, path, depth - 1);
1434         else if (depth > 0)
1435                 path[depth - 1].index++;
1436
1437         return err;
1438 }
1439
1440 static bool ext4_ext_more_to_rm(struct ext4_extent_path *path, ext4_lblk_t to)
1441 {
1442         if (!to_le16(path->header->entries_count))
1443                 return false;
1444
1445         if (path->index > EXT_LAST_INDEX(path->header))
1446                 return false;
1447
1448         if (to_le32(path->index->first_block) > to)
1449                 return false;
1450
1451         return true;
1452 }
1453
1454 int ext4_extent_remove_space(struct ext4_inode_ref *inode_ref, ext4_lblk_t from,
1455                           ext4_lblk_t to)
1456 {
1457         struct ext4_extent_path *path = NULL;
1458         int ret = EOK;
1459         int32_t depth = ext_depth(inode_ref->inode);
1460         int32_t i;
1461
1462         ret = ext4_find_extent(inode_ref, from, &path, 0);
1463         if (ret)
1464                 goto out;
1465
1466         if (!path[depth].extent) {
1467                 ret = EOK;
1468                 goto out;
1469         }
1470
1471         bool in_range = IN_RANGE(from, to_le32(path[depth].extent->first_block),
1472                         ext4_ext_get_actual_len(path[depth].extent));
1473
1474         if (!in_range) {
1475                 ret = EOK;
1476                 goto out;
1477         }
1478
1479         /* If we do remove_space inside the range of an extent */
1480         if ((to_le32(path[depth].extent->first_block) < from) &&
1481             (to < to_le32(path[depth].extent->first_block) +
1482                         ext4_ext_get_actual_len(path[depth].extent) - 1)) {
1483
1484                 struct ext4_extent *ex = path[depth].extent, newex;
1485                 int unwritten = ext4_ext_is_unwritten(ex);
1486                 ext4_lblk_t ee_block = to_le32(ex->first_block);
1487                 int32_t len = ext4_ext_get_actual_len(ex);
1488                 ext4_fsblk_t newblock =
1489                         to + 1 - ee_block + ext4_ext_pblock(ex);
1490
1491                 ex->block_count = to_le16(from - ee_block);
1492                 if (unwritten)
1493                         ext4_ext_mark_unwritten(ex);
1494
1495                 ext4_ext_dirty(inode_ref, path + depth);
1496
1497                 newex.first_block = to_le32(to + 1);
1498                 newex.block_count = to_le16(ee_block + len - 1 - to);
1499                 ext4_ext_store_pblock(&newex, newblock);
1500                 if (unwritten)
1501                         ext4_ext_mark_unwritten(&newex);
1502
1503                 ret = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1504                 goto out;
1505         }
1506
1507         i = depth;
1508         while (i >= 0) {
1509                 if (i == depth) {
1510                         struct ext4_extent_header *eh;
1511                         struct ext4_extent *first_ex, *last_ex;
1512                         ext4_lblk_t leaf_from, leaf_to;
1513                         eh = path[i].header;
1514                         ext4_assert(to_le16(eh->entries_count) > 0);
1515                         first_ex = EXT_FIRST_EXTENT(eh);
1516                         last_ex = EXT_LAST_EXTENT(eh);
1517                         leaf_from = to_le32(first_ex->first_block);
1518                         leaf_to = to_le32(last_ex->first_block) +
1519                                   ext4_ext_get_actual_len(last_ex) - 1;
1520                         if (leaf_from < from)
1521                                 leaf_from = from;
1522
1523                         if (leaf_to > to)
1524                                 leaf_to = to;
1525
1526                         ext4_ext_remove_leaf(inode_ref, path, leaf_from,
1527                                         leaf_to);
1528                         ext4_ext_drop_refs(inode_ref, path + i, 0);
1529                         i--;
1530                         continue;
1531                 }
1532
1533                 struct ext4_extent_header *eh;
1534                 eh = path[i].header;
1535                 if (ext4_ext_more_to_rm(path + i, to)) {
1536                         struct ext4_block bh = EXT4_BLOCK_ZERO();
1537                         if (path[i + 1].block.lb_id)
1538                                 ext4_ext_drop_refs(inode_ref, path + i + 1, 0);
1539
1540                         ret = read_extent_tree_block(inode_ref,
1541                                         ext4_idx_pblock(path[i].index),
1542                                         depth - i - 1, &bh, 0);
1543                         if (ret)
1544                                 goto out;
1545
1546                         path[i].p_block =
1547                                         ext4_idx_pblock(path[i].index);
1548                         path[i + 1].block = bh;
1549                         path[i + 1].header = ext_block_hdr(&bh);
1550                         path[i + 1].depth = depth - i - 1;
1551                         if (i + 1 == depth)
1552                                 path[i + 1].extent = EXT_FIRST_EXTENT(
1553                                         path[i + 1].header);
1554                         else
1555                                 path[i + 1].index =
1556                                         EXT_FIRST_INDEX(path[i + 1].header);
1557
1558                         i++;
1559                 } else {
1560                         if (i > 0) {
1561                                 if (!eh->entries_count)
1562                                         ret = ext4_ext_remove_idx(inode_ref, path,
1563                                                         i - 1);
1564                                 else
1565                                         path[i - 1].index++;
1566
1567                         }
1568
1569                         if (i)
1570                                 ext4_block_set(inode_ref->fs->bdev,
1571                                                 &path[i].block);
1572
1573
1574                         i--;
1575                 }
1576
1577         }
1578
1579         /* TODO: flexible tree reduction should be here */
1580         if (path->header->entries_count == 0) {
1581                 /*
1582                  * truncate to zero freed all the tree,
1583                  * so we need to correct eh_depth
1584                  */
1585                 ext_inode_hdr(inode_ref->inode)->depth = 0;
1586                 ext_inode_hdr(inode_ref->inode)->max_entries_count =
1587                     to_le16(ext4_ext_space_root(inode_ref));
1588                 ret = ext4_ext_dirty(inode_ref, path);
1589         }
1590
1591 out:
1592         ext4_ext_drop_refs(inode_ref, path, 0);
1593         free(path);
1594         path = NULL;
1595         return ret;
1596 }
1597
1598 static int ext4_ext_split_extent_at(struct ext4_inode_ref *inode_ref,
1599                                     struct ext4_extent_path **ppath,
1600                                     ext4_lblk_t split, uint32_t split_flag)
1601 {
1602         struct ext4_extent *ex, newex;
1603         ext4_fsblk_t newblock;
1604         ext4_lblk_t ee_block;
1605         int32_t ee_len;
1606         int32_t depth = ext_depth(inode_ref->inode);
1607         int err = EOK;
1608
1609         ex = (*ppath)[depth].extent;
1610         ee_block = to_le32(ex->first_block);
1611         ee_len = ext4_ext_get_actual_len(ex);
1612         newblock = split - ee_block + ext4_ext_pblock(ex);
1613
1614         if (split == ee_block) {
1615                 /*
1616                  * case b: block @split is the block that the extent begins with
1617                  * then we just change the state of the extent, and splitting
1618                  * is not needed.
1619                  */
1620                 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1621                         ext4_ext_mark_unwritten(ex);
1622                 else
1623                         ext4_ext_mark_initialized(ex);
1624
1625                 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1626                 goto out;
1627         }
1628
1629         ex->block_count = to_le16(split - ee_block);
1630         if (split_flag & EXT4_EXT_MARK_UNWRIT1)
1631                 ext4_ext_mark_unwritten(ex);
1632
1633         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1634         if (err != EOK)
1635                 goto out;
1636
1637         newex.first_block = to_le32(split);
1638         newex.block_count = to_le16(ee_len - (split - ee_block));
1639         ext4_ext_store_pblock(&newex, newblock);
1640         if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1641                 ext4_ext_mark_unwritten(&newex);
1642         err = ext4_ext_insert_extent(inode_ref, ppath, &newex,
1643                                      EXT4_EXT_NO_COMBINE);
1644         if (err != EOK)
1645                 goto restore_extent_len;
1646
1647 out:
1648         return err;
1649 restore_extent_len:
1650         ex->block_count = to_le16(ee_len);
1651         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1652         return err;
1653 }
1654
1655 static int ext4_ext_convert_to_initialized(struct ext4_inode_ref *inode_ref,
1656                                            struct ext4_extent_path **ppath,
1657                                            ext4_lblk_t split, uint32_t blocks)
1658 {
1659         int32_t depth = ext_depth(inode_ref->inode), err = EOK;
1660         struct ext4_extent *ex = (*ppath)[depth].extent;
1661
1662         ext4_assert(to_le32(ex->first_block) <= split);
1663
1664         if (split + blocks ==
1665             to_le32(ex->first_block) + ext4_ext_get_actual_len(ex)) {
1666                 /* split and initialize right part */
1667                 err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1668                                                EXT4_EXT_MARK_UNWRIT1);
1669         } else if (to_le32(ex->first_block) == split) {
1670                 /* split and initialize left part */
1671                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1672                                                EXT4_EXT_MARK_UNWRIT2);
1673         } else {
1674                 /* split 1 extent to 3 and initialize the 2nd */
1675                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1676                                                EXT4_EXT_MARK_UNWRIT1 |
1677                                                    EXT4_EXT_MARK_UNWRIT2);
1678                 if (!err) {
1679                         err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1680                                                        EXT4_EXT_MARK_UNWRIT1);
1681                 }
1682         }
1683
1684         return err;
1685 }
1686
1687 static ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_extent_path *path)
1688 {
1689         int32_t depth;
1690
1691         depth = path->depth;
1692
1693         if (depth == 0 && path->extent == NULL)
1694                 return EXT_MAX_BLOCKS;
1695
1696         while (depth >= 0) {
1697                 if (depth == path->depth) {
1698                         /* leaf */
1699                         if (path[depth].extent &&
1700                             path[depth].extent !=
1701                                 EXT_LAST_EXTENT(path[depth].header))
1702                                 return to_le32(
1703                                     path[depth].extent[1].first_block);
1704                 } else {
1705                         /* index */
1706                         if (path[depth].index !=
1707                             EXT_LAST_INDEX(path[depth].header))
1708                                 return to_le32(
1709                                     path[depth].index[1].first_block);
1710                 }
1711                 depth--;
1712         }
1713
1714         return EXT_MAX_BLOCKS;
1715 }
1716
1717 static int ext4_ext_zero_unwritten_range(struct ext4_inode_ref *inode_ref,
1718                                          ext4_fsblk_t block,
1719                                          uint32_t blocks_count)
1720 {
1721         int err = EOK;
1722         uint32_t i;
1723         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
1724         for (i = 0; i < blocks_count; i++) {
1725                 struct ext4_block bh = EXT4_BLOCK_ZERO();
1726                 err = ext4_block_get_noread(inode_ref->fs->bdev, &bh, block + i);
1727                 if (err != EOK)
1728                         break;
1729
1730                 memset(bh.data, 0, block_size);
1731                 bh.dirty = true;
1732                 err = ext4_block_set(inode_ref->fs->bdev, &bh);
1733                 if (err != EOK)
1734                         break;
1735         }
1736         return err;
1737 }
1738
1739 int ext4_extent_get_blocks(struct ext4_inode_ref *inode_ref, ext4_fsblk_t iblock,
1740                         uint32_t max_blocks, ext4_fsblk_t *result, bool create,
1741                         uint32_t *blocks_count)
1742 {
1743         struct ext4_extent_path *path = NULL;
1744         struct ext4_extent newex, *ex;
1745         ext4_fsblk_t goal;
1746         int err = EOK;
1747         int32_t depth;
1748         uint32_t allocated = 0;
1749         ext4_fsblk_t next, newblock;
1750
1751         if (result)
1752                 *result = 0;
1753
1754         if (blocks_count)
1755                 *blocks_count = 0;
1756
1757         /* find extent for this block */
1758         err = ext4_find_extent(inode_ref, iblock, &path, 0);
1759         if (err != EOK) {
1760                 path = NULL;
1761                 goto out2;
1762         }
1763
1764         depth = ext_depth(inode_ref->inode);
1765
1766         /*
1767          * consistent leaf must not be empty
1768          * this situations is possible, though, _during_ tree modification
1769          * this is why assert can't be put in ext4_ext_find_extent()
1770          */
1771         ex = path[depth].extent;
1772         if (ex) {
1773                 ext4_lblk_t ee_block = to_le32(ex->first_block);
1774                 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
1775                 uint16_t ee_len = ext4_ext_get_actual_len(ex);
1776                 /* if found exent covers block, simple return it */
1777                 if (IN_RANGE(iblock, ee_block, ee_len)) {
1778                         /* number of remain blocks in the extent */
1779                         allocated = ee_len - (iblock - ee_block);
1780
1781                         if (!ext4_ext_is_unwritten(ex)) {
1782                                 newblock = iblock - ee_block + ee_start;
1783                                 goto out;
1784                         }
1785
1786                         if (!create) {
1787                                 newblock = 0;
1788                                 goto out;
1789                         }
1790
1791                         uint32_t zero_range;
1792                         zero_range = allocated;
1793                         if (zero_range > max_blocks)
1794                                 zero_range = max_blocks;
1795
1796                         newblock = iblock - ee_block + ee_start;
1797                         err = ext4_ext_zero_unwritten_range(inode_ref, newblock,
1798                                         zero_range);
1799                         if (err != EOK)
1800                                 goto out2;
1801
1802                         err = ext4_ext_convert_to_initialized(inode_ref, &path,
1803                                         iblock, zero_range);
1804                         if (err != EOK)
1805                                 goto out2;
1806
1807                         goto out;
1808                 }
1809         }
1810
1811         /*
1812          * requested block isn't allocated yet
1813          * we couldn't try to create block if create flag is zero
1814          */
1815         if (!create) {
1816                 goto out2;
1817         }
1818
1819         /* find next allocated block so that we know how many
1820          * blocks we can allocate without ovelapping next extent */
1821         next = ext4_ext_next_allocated_block(path);
1822         allocated = next - iblock;
1823         if (allocated > max_blocks)
1824                 allocated = max_blocks;
1825
1826         /* allocate new block */
1827         goal = ext4_ext_find_goal(inode_ref, path, iblock);
1828         newblock = ext4_new_meta_blocks(inode_ref, goal, 0, &allocated, &err);
1829         if (!newblock)
1830                 goto out2;
1831
1832         /* try to insert new extent into found leaf and return */
1833         newex.first_block = to_le32(iblock);
1834         ext4_ext_store_pblock(&newex, newblock);
1835         newex.block_count = to_le16(allocated);
1836         err = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1837         if (err != EOK) {
1838                 /* free data blocks we just allocated */
1839                 ext4_ext_free_blocks(inode_ref, ext4_ext_pblock(&newex),
1840                                      to_le16(newex.block_count), 0);
1841                 goto out2;
1842         }
1843
1844         /* previous routine could use block we allocated */
1845         newblock = ext4_ext_pblock(&newex);
1846
1847 out:
1848         if (allocated > max_blocks)
1849                 allocated = max_blocks;
1850
1851         if (result)
1852                 *result = newblock;
1853
1854         if (blocks_count)
1855                 *blocks_count = allocated;
1856
1857 out2:
1858         if (path) {
1859                 ext4_ext_drop_refs(inode_ref, path, 0);
1860                 free(path);
1861         }
1862
1863         return err;
1864 }
1865 #endif