ext4_extent: fix passing wrong size to memmove.
[lwext4.git] / src / ext4_extent.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * - Redistributions of source code must retain the above copyright
10  *   notice, this list of conditions and the following disclaimer.
11  * - Redistributions in binary form must reproduce the above copyright
12  *   notice, this list of conditions and the following disclaimer in the
13  *   documentation and/or other materials provided with the distribution.
14  * - The name of the author may not be used to endorse or promote products
15  *   derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #include "ext4_config.h"
30 #include "ext4_types.h"
31 #include "ext4_misc.h"
32 #include "ext4_errno.h"
33 #include "ext4_debug.h"
34
35 #include "ext4_blockdev.h"
36 #include "ext4_trans.h"
37 #include "ext4_fs.h"
38 #include "ext4_super.h"
39 #include "ext4_crc32.h"
40 #include "ext4_balloc.h"
41 #include "ext4_extent.h"
42
43 #include <stdlib.h>
44 #include <string.h>
45 #include <inttypes.h>
46 #include <stddef.h>
47
48
49 /*
50  * used by extent splitting.
51  */
52 #define EXT4_EXT_MARK_UNWRIT1 0x02 /* mark first half unwritten */
53 #define EXT4_EXT_MARK_UNWRIT2 0x04 /* mark second half unwritten */
54 #define EXT4_EXT_DATA_VALID1 0x08  /* first half contains valid data */
55 #define EXT4_EXT_DATA_VALID2 0x10  /* second half contains valid data */
56 #define EXT4_EXT_NO_COMBINE 0x20   /* do not combine two extents */
57
58 static struct ext4_extent_tail *
59 find_ext4_extent_tail(struct ext4_extent_header *eh)
60 {
61         return (struct ext4_extent_tail *)(((char *)eh) +
62                                            EXT4_EXTENT_TAIL_OFFSET(eh));
63 }
64
65 static struct ext4_extent_header *ext_inode_hdr(struct ext4_inode *inode)
66 {
67         return (struct ext4_extent_header *)inode->blocks;
68 }
69
70 static struct ext4_extent_header *ext_block_hdr(struct ext4_block *block)
71 {
72         return (struct ext4_extent_header *)block->data;
73 }
74
75 static uint16_t ext_depth(struct ext4_inode *inode)
76 {
77         return to_le16(ext_inode_hdr(inode)->depth);
78 }
79
80 static uint16_t ext4_ext_get_actual_len(struct ext4_extent *ext)
81 {
82         return (to_le16(ext->block_count) <= EXT_INIT_MAX_LEN
83                     ? to_le16(ext->block_count)
84                     : (to_le16(ext->block_count) - EXT_INIT_MAX_LEN));
85 }
86
87 static void ext4_ext_mark_initialized(struct ext4_extent *ext)
88 {
89         ext->block_count = to_le16(ext4_ext_get_actual_len(ext));
90 }
91
92 static void ext4_ext_mark_unwritten(struct ext4_extent *ext)
93 {
94         ext->block_count |= to_le16(EXT_INIT_MAX_LEN);
95 }
96
97 static int ext4_ext_is_unwritten(struct ext4_extent *ext)
98 {
99         /* Extent with ee_len of 0x8000 is treated as an initialized extent */
100         return (to_le16(ext->block_count) > EXT_INIT_MAX_LEN);
101 }
102
103 /*
104  * ext4_ext_pblock:
105  * combine low and high parts of physical block number into ext4_fsblk_t
106  */
107 static ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
108 {
109         ext4_fsblk_t block;
110
111         block = to_le32(ex->start_lo);
112         block |= ((ext4_fsblk_t)to_le16(ex->start_hi) << 31) << 1;
113         return block;
114 }
115
116 /*
117  * ext4_idx_pblock:
118  * combine low and high parts of a leaf physical block number into ext4_fsblk_t
119  */
120 static ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_index *ix)
121 {
122         ext4_fsblk_t block;
123
124         block = to_le32(ix->leaf_lo);
125         block |= ((ext4_fsblk_t)to_le16(ix->leaf_hi) << 31) << 1;
126         return block;
127 }
128
129 /*
130  * ext4_ext_store_pblock:
131  * stores a large physical block number into an extent struct,
132  * breaking it into parts
133  */
134 static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
135 {
136         ex->start_lo = to_le32((uint32_t)(pb & 0xffffffff));
137         ex->start_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
138 }
139
140 /*
141  * ext4_idx_store_pblock:
142  * stores a large physical block number into an index struct,
143  * breaking it into parts
144  */
145 static void ext4_idx_store_pblock(struct ext4_extent_index *ix, ext4_fsblk_t pb)
146 {
147         ix->leaf_lo = to_le32((uint32_t)(pb & 0xffffffff));
148         ix->leaf_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
149 }
150
151 static int ext4_allocate_single_block(struct ext4_inode_ref *inode_ref,
152                                       ext4_fsblk_t goal,
153                                       ext4_fsblk_t *blockp)
154 {
155         return ext4_balloc_alloc_block(inode_ref, goal, blockp);
156 }
157
158 static ext4_fsblk_t ext4_new_meta_blocks(struct ext4_inode_ref *inode_ref,
159                                          ext4_fsblk_t goal,
160                                          uint32_t flags __unused,
161                                          uint32_t *count, int *errp)
162 {
163         ext4_fsblk_t block = 0;
164
165         *errp = ext4_allocate_single_block(inode_ref, goal, &block);
166         if (count)
167                 *count = 1;
168         return block;
169 }
170
171 static void ext4_ext_free_blocks(struct ext4_inode_ref *inode_ref,
172                                  ext4_fsblk_t block, uint32_t count,
173                                  uint32_t flags __unused)
174 {
175         ext4_balloc_free_blocks(inode_ref, block, count);
176 }
177
178 static uint16_t ext4_ext_space_block(struct ext4_inode_ref *inode_ref)
179 {
180         uint16_t size;
181         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
182
183         size = (block_size - sizeof(struct ext4_extent_header)) /
184                sizeof(struct ext4_extent);
185 #ifdef AGGRESSIVE_TEST
186         if (size > 6)
187                 size = 6;
188 #endif
189         return size;
190 }
191
192 static uint16_t ext4_ext_space_block_idx(struct ext4_inode_ref *inode_ref)
193 {
194         uint16_t size;
195         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
196
197         size = (block_size - sizeof(struct ext4_extent_header)) /
198                sizeof(struct ext4_extent_index);
199 #ifdef AGGRESSIVE_TEST
200         if (size > 5)
201                 size = 5;
202 #endif
203         return size;
204 }
205
206 static uint16_t ext4_ext_space_root(struct ext4_inode_ref *inode_ref)
207 {
208         uint16_t size;
209
210         size = sizeof(inode_ref->inode->blocks);
211         size -= sizeof(struct ext4_extent_header);
212         size /= sizeof(struct ext4_extent);
213 #ifdef AGGRESSIVE_TEST
214         if (size > 3)
215                 size = 3;
216 #endif
217         return size;
218 }
219
220 static uint16_t ext4_ext_space_root_idx(struct ext4_inode_ref *inode_ref)
221 {
222         uint16_t size;
223
224         size = sizeof(inode_ref->inode->blocks);
225         size -= sizeof(struct ext4_extent_header);
226         size /= sizeof(struct ext4_extent_index);
227 #ifdef AGGRESSIVE_TEST
228         if (size > 4)
229                 size = 4;
230 #endif
231         return size;
232 }
233
234 static uint16_t ext4_ext_max_entries(struct ext4_inode_ref *inode_ref,
235                                    uint32_t depth)
236 {
237         uint16_t max;
238
239         if (depth == ext_depth(inode_ref->inode)) {
240                 if (depth == 0)
241                         max = ext4_ext_space_root(inode_ref);
242                 else
243                         max = ext4_ext_space_root_idx(inode_ref);
244         } else {
245                 if (depth == 0)
246                         max = ext4_ext_space_block(inode_ref);
247                 else
248                         max = ext4_ext_space_block_idx(inode_ref);
249         }
250
251         return max;
252 }
253
254 static ext4_fsblk_t ext4_ext_find_goal(struct ext4_inode_ref *inode_ref,
255                                        struct ext4_extent_path *path,
256                                        ext4_lblk_t block)
257 {
258         if (path) {
259                 uint32_t depth = path->depth;
260                 struct ext4_extent *ex;
261
262                 /*
263                  * Try to predict block placement assuming that we are
264                  * filling in a file which will eventually be
265                  * non-sparse --- i.e., in the case of libbfd writing
266                  * an ELF object sections out-of-order but in a way
267                  * the eventually results in a contiguous object or
268                  * executable file, or some database extending a table
269                  * space file.  However, this is actually somewhat
270                  * non-ideal if we are writing a sparse file such as
271                  * qemu or KVM writing a raw image file that is going
272                  * to stay fairly sparse, since it will end up
273                  * fragmenting the file system's free space.  Maybe we
274                  * should have some hueristics or some way to allow
275                  * userspace to pass a hint to file system,
276                  * especially if the latter case turns out to be
277                  * common.
278                  */
279                 ex = path[depth].extent;
280                 if (ex) {
281                         ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
282                         ext4_lblk_t ext_block = to_le32(ex->first_block);
283
284                         if (block > ext_block)
285                                 return ext_pblk + (block - ext_block);
286                         else
287                                 return ext_pblk - (ext_block - block);
288                 }
289
290                 /* it looks like index is empty;
291                  * try to find starting block from index itself */
292                 if (path[depth].block.lb_id)
293                         return path[depth].block.lb_id;
294         }
295
296         /* OK. use inode's group */
297         return ext4_fs_inode_to_goal_block(inode_ref);
298 }
299
300 /*
301  * Allocation for a meta data block
302  */
303 static ext4_fsblk_t ext4_ext_new_meta_block(struct ext4_inode_ref *inode_ref,
304                                             struct ext4_extent_path *path,
305                                             struct ext4_extent *ex, int *err,
306                                             uint32_t flags)
307 {
308         ext4_fsblk_t goal, newblock;
309
310         goal = ext4_ext_find_goal(inode_ref, path, to_le32(ex->first_block));
311         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, err);
312         return newblock;
313 }
314
315 #if CONFIG_META_CSUM_ENABLE
316 static uint32_t ext4_ext_block_csum(struct ext4_inode_ref *inode_ref,
317                                     struct ext4_extent_header *eh)
318 {
319         uint32_t checksum = 0;
320         struct ext4_sblock *sb = &inode_ref->fs->sb;
321
322         if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
323                 uint32_t ino_index = to_le32(inode_ref->index);
324                 uint32_t ino_gen =
325                         to_le32(ext4_inode_get_generation(inode_ref->inode));
326                 /* First calculate crc32 checksum against fs uuid */
327                 checksum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid,
328                                 sizeof(sb->uuid));
329                 /* Then calculate crc32 checksum against inode number
330                  * and inode generation */
331                 checksum = ext4_crc32c(checksum, &ino_index,
332                                      sizeof(ino_index));
333                 checksum = ext4_crc32c(checksum, &ino_gen,
334                                      sizeof(ino_gen));
335                 /* Finally calculate crc32 checksum against 
336                  * the entire extent block up to the checksum field */
337                 checksum = ext4_crc32c(checksum, eh,
338                                 EXT4_EXTENT_TAIL_OFFSET(eh));
339         }
340         return checksum;
341 }
342 #else
343 #define ext4_ext_block_csum(...) 0
344 #endif
345
346 static void ext4_extent_block_csum_set(struct ext4_inode_ref *inode_ref __unused,
347                                        struct ext4_extent_header *eh)
348 {
349         struct ext4_extent_tail *tail;
350
351         tail = find_ext4_extent_tail(eh);
352         tail->et_checksum = to_le32(ext4_ext_block_csum(inode_ref, eh));
353 }
354
355 static int ext4_ext_dirty(struct ext4_inode_ref *inode_ref,
356                           struct ext4_extent_path *path)
357 {
358         if (path->block.lb_id)
359                 ext4_trans_set_block_dirty(path->block.buf);
360         else
361                 inode_ref->dirty = true;
362
363         return EOK;
364 }
365
366 static void ext4_ext_drop_refs(struct ext4_inode_ref *inode_ref,
367                                struct ext4_extent_path *path, bool keep_other)
368 {
369         int32_t depth, i;
370
371         if (!path)
372                 return;
373         if (keep_other)
374                 depth = 0;
375         else
376                 depth = path->depth;
377
378         for (i = 0; i <= depth; i++, path++) {
379                 if (path->block.lb_id) {
380                         if (ext4_bcache_test_flag(path->block.buf, BC_DIRTY))
381                                 ext4_extent_block_csum_set(inode_ref,
382                                                 path->header);
383
384                         ext4_block_set(inode_ref->fs->bdev, &path->block);
385                 }
386         }
387 }
388
389 /*
390  * Check that whether the basic information inside the extent header
391  * is correct or not.
392  */
393 static int ext4_ext_check(struct ext4_inode_ref *inode_ref,
394                           struct ext4_extent_header *eh, uint16_t depth,
395                           ext4_fsblk_t pblk __unused)
396 {
397         struct ext4_extent_tail *tail;
398         struct ext4_sblock *sb = &inode_ref->fs->sb;
399         const char *error_msg;
400         (void)error_msg;
401
402         if (to_le16(eh->magic) != EXT4_EXTENT_MAGIC) {
403                 error_msg = "invalid magic";
404                 goto corrupted;
405         }
406         if (to_le16(eh->depth) != depth) {
407                 error_msg = "unexpected eh_depth";
408                 goto corrupted;
409         }
410         if (eh->max_entries_count == 0) {
411                 error_msg = "invalid eh_max";
412                 goto corrupted;
413         }
414         if (to_le16(eh->entries_count) > to_le16(eh->max_entries_count)) {
415                 error_msg = "invalid eh_entries";
416                 goto corrupted;
417         }
418
419         tail = find_ext4_extent_tail(eh);
420         if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
421                 if (tail->et_checksum != to_le32(ext4_ext_block_csum(inode_ref, eh))) {
422                         ext4_dbg(DEBUG_EXTENT,
423                                  DBG_WARN "Extent block checksum failed."
424                                  "Blocknr: %" PRIu64"\n",
425                                  pblk);
426
427                 }
428         }
429
430         return EOK;
431
432 corrupted:
433         ext4_dbg(DEBUG_EXTENT, "Bad extents B+ tree block: %s. "
434                                "Blocknr: %" PRId64 "\n",
435                  error_msg, pblk);
436         return EIO;
437 }
438
439 static int read_extent_tree_block(struct ext4_inode_ref *inode_ref,
440                                   ext4_fsblk_t pblk, int32_t depth,
441                                   struct ext4_block *bh,
442                                   uint32_t flags __unused)
443 {
444         int err;
445
446         err = ext4_trans_block_get(inode_ref->fs->bdev, bh, pblk);
447         if (err != EOK)
448                 goto errout;
449
450         err = ext4_ext_check(inode_ref, ext_block_hdr(bh), depth, pblk);
451         if (err != EOK)
452                 goto errout;
453
454         return EOK;
455 errout:
456         if (bh->lb_id)
457                 ext4_block_set(inode_ref->fs->bdev, bh);
458
459         return err;
460 }
461
462 /*
463  * ext4_ext_binsearch_idx:
464  * binary search for the closest index of the given block
465  * the header must be checked before calling this
466  */
467 static void ext4_ext_binsearch_idx(struct ext4_extent_path *path,
468                                    ext4_lblk_t block)
469 {
470         struct ext4_extent_header *eh = path->header;
471         struct ext4_extent_index *r, *l, *m;
472
473         l = EXT_FIRST_INDEX(eh) + 1;
474         r = EXT_LAST_INDEX(eh);
475         while (l <= r) {
476                 m = l + (r - l) / 2;
477                 if (block < to_le32(m->first_block))
478                         r = m - 1;
479                 else
480                         l = m + 1;
481         }
482
483         path->index = l - 1;
484 }
485
486 /*
487  * ext4_ext_binsearch:
488  * binary search for closest extent of the given block
489  * the header must be checked before calling this
490  */
491 static void ext4_ext_binsearch(struct ext4_extent_path *path, ext4_lblk_t block)
492 {
493         struct ext4_extent_header *eh = path->header;
494         struct ext4_extent *r, *l, *m;
495
496         if (eh->entries_count == 0) {
497                 /*
498                  * this leaf is empty:
499                  * we get such a leaf in split/add case
500                  */
501                 return;
502         }
503
504         l = EXT_FIRST_EXTENT(eh) + 1;
505         r = EXT_LAST_EXTENT(eh);
506
507         while (l <= r) {
508                 m = l + (r - l) / 2;
509                 if (block < to_le32(m->first_block))
510                         r = m - 1;
511                 else
512                         l = m + 1;
513         }
514
515         path->extent = l - 1;
516 }
517
518 static int ext4_find_extent(struct ext4_inode_ref *inode_ref, ext4_lblk_t block,
519                             struct ext4_extent_path **orig_path, uint32_t flags)
520 {
521         struct ext4_extent_header *eh;
522         struct ext4_block bh = EXT4_BLOCK_ZERO();
523         ext4_fsblk_t buf_block = 0;
524         struct ext4_extent_path *path = *orig_path;
525         int32_t depth, ppos = 0;
526         int32_t i;
527         int ret;
528
529         eh = ext_inode_hdr(inode_ref->inode);
530         depth = ext_depth(inode_ref->inode);
531
532         if (path) {
533                 ext4_ext_drop_refs(inode_ref, path, 0);
534                 if (depth > path[0].maxdepth) {
535                         free(path);
536                         *orig_path = path = NULL;
537                 }
538         }
539         if (!path) {
540                 int32_t path_depth = depth + 1;
541                 /* account possible depth increase */
542                 path = calloc(1, sizeof(struct ext4_extent_path) *
543                                      (path_depth + 1));
544                 if (!path)
545                         return ENOMEM;
546                 path[0].maxdepth = path_depth;
547         }
548         path[0].header = eh;
549         path[0].block = bh;
550
551         i = depth;
552         /* walk through the tree */
553         while (i) {
554                 ext4_ext_binsearch_idx(path + ppos, block);
555                 path[ppos].p_block = ext4_idx_pblock(path[ppos].index);
556                 path[ppos].depth = i;
557                 path[ppos].extent = NULL;
558                 buf_block = path[ppos].p_block;
559
560                 i--;
561                 ppos++;
562                 if (!path[ppos].block.lb_id ||
563                     path[ppos].block.lb_id != buf_block) {
564                         ret = read_extent_tree_block(inode_ref, buf_block, i,
565                                                      &bh, flags);
566                         if (ret != EOK) {
567                                 goto err;
568                         }
569                         if (ppos > depth) {
570                                 ext4_block_set(inode_ref->fs->bdev, &bh);
571                                 ret = EIO;
572                                 goto err;
573                         }
574
575                         eh = ext_block_hdr(&bh);
576                         path[ppos].block = bh;
577                         path[ppos].header = eh;
578                 }
579         }
580
581         path[ppos].depth = i;
582         path[ppos].extent = NULL;
583         path[ppos].index = NULL;
584
585         /* find extent */
586         ext4_ext_binsearch(path + ppos, block);
587         /* if not an empty leaf */
588         if (path[ppos].extent)
589                 path[ppos].p_block = ext4_ext_pblock(path[ppos].extent);
590
591         *orig_path = path;
592
593         ret = EOK;
594         return ret;
595
596 err:
597         ext4_ext_drop_refs(inode_ref, path, 0);
598         free(path);
599         if (orig_path)
600                 *orig_path = NULL;
601         return ret;
602 }
603
604 static void ext4_ext_init_header(struct ext4_inode_ref *inode_ref,
605                                  struct ext4_extent_header *eh, int32_t depth)
606 {
607         eh->entries_count = 0;
608         eh->max_entries_count = to_le16(ext4_ext_max_entries(inode_ref, depth));
609         eh->magic = to_le16(EXT4_EXTENT_MAGIC);
610         eh->depth = depth;
611 }
612
613 /*
614  * Be cautious, the buffer_head returned is not yet mark dirtied. */
615 static int ext4_ext_split_node(struct ext4_inode_ref *inode_ref,
616                                struct ext4_extent_path *path, int32_t at,
617                                struct ext4_extent *newext,
618                                ext4_fsblk_t *sibling, struct ext4_block *new_bh)
619 {
620         int ret;
621         ext4_fsblk_t newblock;
622         struct ext4_block bh = EXT4_BLOCK_ZERO();
623         int32_t depth = ext_depth(inode_ref->inode);
624
625         ext4_assert(sibling);
626
627         /* FIXME: currently we split at the point after the current extent. */
628         newblock = ext4_ext_new_meta_block(inode_ref, path, newext, &ret, 0);
629         if (ret)
630                 goto cleanup;
631
632         /*  For write access.# */
633         ret = ext4_trans_block_get_noread(inode_ref->fs->bdev, &bh, newblock);
634         if (ret != EOK)
635                 goto cleanup;
636
637         if (at == depth) {
638                 /* start copy from next extent */
639                 ptrdiff_t m = EXT_MAX_EXTENT(path[at].header) - path[at].extent;
640                 struct ext4_extent_header *neh;
641                 neh = ext_block_hdr(&bh);
642                 ext4_ext_init_header(inode_ref, neh, 0);
643                 if (m) {
644                         struct ext4_extent *ex;
645                         ex = EXT_FIRST_EXTENT(neh);
646                         memmove(ex, path[at].extent + 1,
647                                 sizeof(struct ext4_extent) * m);
648                         neh->entries_count =
649                             to_le16(to_le16(neh->entries_count) + m);
650                         path[at].header->entries_count = to_le16(
651                             to_le16(path[at].header->entries_count) - m);
652                         ret = ext4_ext_dirty(inode_ref, path + at);
653                         if (ret)
654                                 goto cleanup;
655                 }
656         } else {
657                 ptrdiff_t m = EXT_MAX_INDEX(path[at].header) - path[at].index;
658                 struct ext4_extent_header *neh;
659                 neh = ext_block_hdr(&bh);
660                 ext4_ext_init_header(inode_ref, neh, depth - at);
661                 if (m) {
662                         struct ext4_extent_index *ix;
663                         ix = EXT_FIRST_INDEX(neh);
664                         memmove(ix, path[at].index + 1,
665                                 sizeof(struct ext4_extent) * m);
666                         neh->entries_count =
667                             to_le16(to_le16(neh->entries_count) + m);
668                         path[at].header->entries_count = to_le16(
669                             to_le16(path[at].header->entries_count) - m);
670                         ret = ext4_ext_dirty(inode_ref, path + at);
671                         if (ret)
672                                 goto cleanup;
673                 }
674         }
675 cleanup:
676         if (ret) {
677                 if (bh.lb_id) {
678                         ext4_block_set(inode_ref->fs->bdev, &bh);
679                 }
680                 if (newblock)
681                         ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
682
683                 newblock = 0;
684         }
685         *sibling = newblock;
686         *new_bh = bh;
687         return ret;
688 }
689
690 static ext4_lblk_t ext4_ext_block_index(struct ext4_extent_header *eh)
691 {
692         if (eh->depth)
693                 return to_le32(EXT_FIRST_INDEX(eh)->first_block);
694
695         return to_le32(EXT_FIRST_EXTENT(eh)->first_block);
696 }
697
698 struct ext_split_trans {
699         ext4_fsblk_t ptr;
700         struct ext4_extent_path path;
701         int switch_to;
702 };
703
704 static int ext4_ext_insert_index(struct ext4_inode_ref *inode_ref,
705                                  struct ext4_extent_path *path,
706                                  int32_t at,
707                                  struct ext4_extent *newext,
708                                  ext4_lblk_t insert_index,
709                                  ext4_fsblk_t insert_block,
710                                  struct ext_split_trans *spt,
711                                  bool *need_grow)
712 {
713         struct ext4_extent_index *ix;
714         struct ext4_extent_path *curp = path + at;
715         struct ext4_block bh = EXT4_BLOCK_ZERO();
716         int32_t len;
717         int err;
718         struct ext4_extent_header *eh;
719
720         *need_grow = false;
721
722         if (curp->index && insert_index == to_le32(curp->index->first_block))
723                 return EIO;
724
725         if (to_le16(curp->header->entries_count) ==
726             to_le16(curp->header->max_entries_count)) {
727                 if (at) {
728                         struct ext4_extent_header *neh;
729                         err = ext4_ext_split_node(inode_ref, path, at, newext,
730                                                   &spt->ptr, &bh);
731                         if (err != EOK)
732                                 goto out;
733
734                         neh = ext_block_hdr(&bh);
735                         if (insert_index > to_le32(curp->index->first_block)) {
736                                 /* Make decision which node should be used to
737                                  * insert the index.*/
738                                 if (to_le16(neh->entries_count) >
739                                     to_le16(curp->header->entries_count)) {
740                                         eh = curp->header;
741                                         /* insert after */
742                                         ix = EXT_LAST_INDEX(eh) + 1;
743                                 } else {
744                                         eh = neh;
745                                         ix = EXT_FIRST_INDEX(eh);
746                                 }
747                         } else {
748                                 eh = curp->header;
749                                 /* insert before */
750                                 ix = EXT_LAST_INDEX(eh);
751                         }
752                 } else {
753                         err = EOK;
754                         *need_grow = true;
755                         goto out;
756                 }
757         } else {
758                 eh = curp->header;
759                 if (curp->index == NULL) {
760                         ix = EXT_FIRST_INDEX(eh);
761                         curp->index = ix;
762                 } else if (insert_index > to_le32(curp->index->first_block)) {
763                         /* insert after */
764                         ix = curp->index + 1;
765                 } else {
766                         /* insert before */
767                         ix = curp->index;
768                 }
769         }
770
771         len = EXT_LAST_INDEX(eh) - ix + 1;
772         ext4_assert(len >= 0);
773         if (len > 0)
774                 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_index));
775
776         if (ix > EXT_MAX_INDEX(eh)) {
777                 err = EIO;
778                 goto out;
779         }
780
781         ix->first_block = to_le32(insert_index);
782         ext4_idx_store_pblock(ix, insert_block);
783         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
784
785         if (ix > EXT_LAST_INDEX(eh)) {
786                 err = EIO;
787                 goto out;
788         }
789
790         if (eh == curp->header)
791                 err = ext4_ext_dirty(inode_ref, curp);
792         else
793                 err = EOK;
794
795 out:
796         if (err != EOK || *need_grow) {
797                 if (bh.lb_id)
798                         ext4_block_set(inode_ref->fs->bdev, &bh);
799
800                 spt->ptr = 0;
801         } else if (bh.lb_id) {
802                 /* If we got a sibling leaf. */
803                 ext4_extent_block_csum_set(inode_ref, ext_block_hdr(&bh));
804                 ext4_trans_set_block_dirty(bh.buf);
805
806                 spt->path.p_block = ext4_idx_pblock(ix);
807                 spt->path.depth = to_le16(eh->depth);
808                 spt->path.maxdepth = 0;
809                 spt->path.extent = NULL;
810                 spt->path.index = ix;
811                 spt->path.header = eh;
812                 spt->path.block = bh;
813
814                 /*
815                  * If newext->ee_block can be included into the
816                  * right sub-tree.
817                  */
818                 if (to_le32(newext->first_block) >=
819                     ext4_ext_block_index(ext_block_hdr(&bh)))
820                         spt->switch_to = 1;
821                 else {
822                         curp->index = ix;
823                         curp->p_block = ext4_idx_pblock(ix);
824                 }
825
826         } else {
827                 spt->ptr = 0;
828                 curp->index = ix;
829                 curp->p_block = ext4_idx_pblock(ix);
830         }
831         return err;
832 }
833
834 /*
835  * ext4_ext_correct_indexes:
836  * if leaf gets modified and modified extent is first in the leaf,
837  * then we have to correct all indexes above.
838  */
839 static int ext4_ext_correct_indexes(struct ext4_inode_ref *inode_ref,
840                                     struct ext4_extent_path *path)
841 {
842         struct ext4_extent_header *eh;
843         int32_t depth = ext_depth(inode_ref->inode);
844         struct ext4_extent *ex;
845         uint32_t border;
846         int32_t k;
847         int err = EOK;
848
849         eh = path[depth].header;
850         ex = path[depth].extent;
851
852         if (ex == NULL || eh == NULL)
853                 return EIO;
854
855         if (depth == 0) {
856                 /* there is no tree at all */
857                 return EOK;
858         }
859
860         if (ex != EXT_FIRST_EXTENT(eh)) {
861                 /* we correct tree if first leaf got modified only */
862                 return EOK;
863         }
864
865         k = depth - 1;
866         border = path[depth].extent->first_block;
867         path[k].index->first_block = border;
868         err = ext4_ext_dirty(inode_ref, path + k);
869         if (err != EOK)
870                 return err;
871
872         while (k--) {
873                 /* change all left-side indexes */
874                 if (path[k + 1].index != EXT_FIRST_INDEX(path[k + 1].header))
875                         break;
876                 path[k].index->first_block = border;
877                 err = ext4_ext_dirty(inode_ref, path + k);
878                 if (err != EOK)
879                         break;
880         }
881
882         return err;
883 }
884
885 static bool ext4_ext_can_prepend(struct ext4_extent *ex1,
886                                  struct ext4_extent *ex2)
887 {
888         if (ext4_ext_pblock(ex2) + ext4_ext_get_actual_len(ex2) !=
889             ext4_ext_pblock(ex1))
890                 return false;
891
892 #ifdef AGGRESSIVE_TEST
893         if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
894                 return 0;
895 #else
896         if (ext4_ext_is_unwritten(ex1)) {
897                 if (ext4_ext_get_actual_len(ex1) +
898                         ext4_ext_get_actual_len(ex2) >
899                     EXT_UNWRITTEN_MAX_LEN)
900                         return false;
901         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
902                    EXT_INIT_MAX_LEN)
903                 return false;
904 #endif
905
906         if (to_le32(ex2->first_block) + ext4_ext_get_actual_len(ex2) !=
907             to_le32(ex1->first_block))
908                 return false;
909
910         return true;
911 }
912
913 static bool ext4_ext_can_append(struct ext4_extent *ex1,
914                                 struct ext4_extent *ex2)
915 {
916         if (ext4_ext_pblock(ex1) + ext4_ext_get_actual_len(ex1) !=
917             ext4_ext_pblock(ex2))
918                 return false;
919
920 #ifdef AGGRESSIVE_TEST
921         if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
922                 return 0;
923 #else
924         if (ext4_ext_is_unwritten(ex1)) {
925                 if (ext4_ext_get_actual_len(ex1) +
926                         ext4_ext_get_actual_len(ex2) >
927                     EXT_UNWRITTEN_MAX_LEN)
928                         return false;
929         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
930                    EXT_INIT_MAX_LEN)
931                 return false;
932 #endif
933
934         if (to_le32(ex1->first_block) + ext4_ext_get_actual_len(ex1) !=
935             to_le32(ex2->first_block))
936                 return false;
937
938         return true;
939 }
940
941 static int ext4_ext_insert_leaf(struct ext4_inode_ref *inode_ref,
942                                 struct ext4_extent_path *path,
943                                 int32_t at,
944                                 struct ext4_extent *newext,
945                                 struct ext_split_trans *spt,
946                                 uint32_t flags,
947                                 bool *need_grow)
948 {
949         struct ext4_extent_path *curp = path + at;
950         struct ext4_extent *ex = curp->extent;
951         struct ext4_block bh = EXT4_BLOCK_ZERO();
952         int32_t len;
953         int err = EOK;
954         int unwritten;
955         struct ext4_extent_header *eh = NULL;
956
957         *need_grow = false;
958
959         if (curp->extent &&
960             to_le32(newext->first_block) == to_le32(curp->extent->first_block))
961                 return EIO;
962
963         if (!(flags & EXT4_EXT_NO_COMBINE)) {
964                 if (curp->extent && ext4_ext_can_append(curp->extent, newext)) {
965                         unwritten = ext4_ext_is_unwritten(curp->extent);
966                         curp->extent->block_count =
967                             to_le16(ext4_ext_get_actual_len(curp->extent) +
968                                     ext4_ext_get_actual_len(newext));
969                         if (unwritten)
970                                 ext4_ext_mark_unwritten(curp->extent);
971                         err = ext4_ext_dirty(inode_ref, curp);
972                         goto out;
973                 }
974
975                 if (curp->extent &&
976                     ext4_ext_can_prepend(curp->extent, newext)) {
977                         unwritten = ext4_ext_is_unwritten(curp->extent);
978                         curp->extent->first_block = newext->first_block;
979                         curp->extent->block_count =
980                             to_le16(ext4_ext_get_actual_len(curp->extent) +
981                                     ext4_ext_get_actual_len(newext));
982                         if (unwritten)
983                                 ext4_ext_mark_unwritten(curp->extent);
984                         err = ext4_ext_dirty(inode_ref, curp);
985                         goto out;
986                 }
987         }
988
989         if (to_le16(curp->header->entries_count) ==
990             to_le16(curp->header->max_entries_count)) {
991                 if (at) {
992                         struct ext4_extent_header *neh;
993                         err = ext4_ext_split_node(inode_ref, path, at, newext,
994                                                   &spt->ptr, &bh);
995                         if (err != EOK)
996                                 goto out;
997
998                         neh = ext_block_hdr(&bh);
999                         if (to_le32(newext->first_block) >
1000                             to_le32(curp->extent->first_block)) {
1001                                 if (to_le16(neh->entries_count) >
1002                                     to_le16(curp->header->entries_count)) {
1003                                         eh = curp->header;
1004                                         /* insert after */
1005                                         ex = EXT_LAST_EXTENT(eh) + 1;
1006                                 } else {
1007                                         eh = neh;
1008                                         ex = EXT_FIRST_EXTENT(eh);
1009                                 }
1010                         } else {
1011                                 eh = curp->header;
1012                                 /* insert before */
1013                                 ex = EXT_LAST_EXTENT(eh);
1014                         }
1015                 } else {
1016                         err = EOK;
1017                         *need_grow = true;
1018                         goto out;
1019                 }
1020         } else {
1021                 eh = curp->header;
1022                 if (curp->extent == NULL) {
1023                         ex = EXT_FIRST_EXTENT(eh);
1024                         curp->extent = ex;
1025                 } else if (to_le32(newext->first_block) >
1026                            to_le32(curp->extent->first_block)) {
1027                         /* insert after */
1028                         ex = curp->extent + 1;
1029                 } else {
1030                         /* insert before */
1031                         ex = curp->extent;
1032                 }
1033         }
1034
1035         len = EXT_LAST_EXTENT(eh) - ex + 1;
1036         ext4_assert(len >= 0);
1037         if (len > 0)
1038                 memmove(ex + 1, ex, len * sizeof(struct ext4_extent));
1039
1040         if (ex > EXT_MAX_EXTENT(eh)) {
1041                 err = EIO;
1042                 goto out;
1043         }
1044
1045         ex->first_block = newext->first_block;
1046         ex->block_count = newext->block_count;
1047         ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
1048         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
1049
1050         if (ex > EXT_LAST_EXTENT(eh)) {
1051                 err = EIO;
1052                 goto out;
1053         }
1054
1055         if (eh == curp->header) {
1056                 err = ext4_ext_correct_indexes(inode_ref, path);
1057                 if (err != EOK)
1058                         goto out;
1059                 err = ext4_ext_dirty(inode_ref, curp);
1060         } else
1061                 err = EOK;
1062
1063 out:
1064         if (err != EOK || *need_grow) {
1065                 if (bh.lb_id)
1066                         ext4_block_set(inode_ref->fs->bdev, &bh);
1067
1068                 spt->ptr = 0;
1069         } else if (bh.lb_id) {
1070                 /* If we got a sibling leaf. */
1071                 ext4_extent_block_csum_set(inode_ref, ext_block_hdr(&bh));
1072                 ext4_trans_set_block_dirty(bh.buf);
1073
1074                 spt->path.p_block = ext4_ext_pblock(ex);
1075                 spt->path.depth = to_le16(eh->depth);
1076                 spt->path.maxdepth = 0;
1077                 spt->path.extent = ex;
1078                 spt->path.index = NULL;
1079                 spt->path.header = eh;
1080                 spt->path.block = bh;
1081
1082                 /*
1083                  * If newext->ee_block can be included into the
1084                  * right sub-tree.
1085                  */
1086                 if (to_le32(newext->first_block) >=
1087                     ext4_ext_block_index(ext_block_hdr(&bh)))
1088                         spt->switch_to = 1;
1089                 else {
1090                         curp->extent = ex;
1091                         curp->p_block = ext4_ext_pblock(ex);
1092                 }
1093
1094         } else {
1095                 spt->ptr = 0;
1096                 curp->extent = ex;
1097                 curp->p_block = ext4_ext_pblock(ex);
1098         }
1099
1100         return err;
1101 }
1102
1103 /*
1104  * ext4_ext_grow_indepth:
1105  * implements tree growing procedure:
1106  * - allocates new block
1107  * - moves top-level data (index block or leaf) into the new block
1108  * - initializes new top-level, creating index that points to the
1109  *   just created block
1110  */
1111 static int ext4_ext_grow_indepth(struct ext4_inode_ref *inode_ref,
1112                                  uint32_t flags)
1113 {
1114         struct ext4_extent_header *neh;
1115         struct ext4_block bh = EXT4_BLOCK_ZERO();
1116         ext4_fsblk_t newblock, goal = 0;
1117         int err = EOK;
1118
1119         /* Try to prepend new index to old one */
1120         if (ext_depth(inode_ref->inode))
1121                 goal = ext4_idx_pblock(
1122                     EXT_FIRST_INDEX(ext_inode_hdr(inode_ref->inode)));
1123         else
1124                 goal = ext4_fs_inode_to_goal_block(inode_ref);
1125
1126         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, &err);
1127         if (newblock == 0)
1128                 return err;
1129
1130         /* # */
1131         err = ext4_trans_block_get_noread(inode_ref->fs->bdev, &bh, newblock);
1132         if (err != EOK) {
1133                 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
1134                 return err;
1135         }
1136
1137         /* move top-level index/leaf into new block */
1138         memmove(bh.data, inode_ref->inode->blocks,
1139                 sizeof(inode_ref->inode->blocks));
1140
1141         /* set size of new block */
1142         neh = ext_block_hdr(&bh);
1143         /* old root could have indexes or leaves
1144          * so calculate e_max right way */
1145         if (ext_depth(inode_ref->inode))
1146                 neh->max_entries_count =
1147                     to_le16(ext4_ext_space_block_idx(inode_ref));
1148         else
1149                 neh->max_entries_count =
1150                     to_le16(ext4_ext_space_block(inode_ref));
1151
1152         neh->magic = to_le16(EXT4_EXTENT_MAGIC);
1153         ext4_extent_block_csum_set(inode_ref, neh);
1154
1155         /* Update top-level index: num,max,pointer */
1156         neh = ext_inode_hdr(inode_ref->inode);
1157         neh->entries_count = to_le16(1);
1158         ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1159         if (neh->depth == 0) {
1160                 /* Root extent block becomes index block */
1161                 neh->max_entries_count =
1162                     to_le16(ext4_ext_space_root_idx(inode_ref));
1163                 EXT_FIRST_INDEX(neh)
1164                     ->first_block = EXT_FIRST_EXTENT(neh)->first_block;
1165         }
1166         neh->depth = to_le16(to_le16(neh->depth) + 1);
1167
1168         ext4_trans_set_block_dirty(bh.buf);
1169         inode_ref->dirty = true;
1170         ext4_block_set(inode_ref->fs->bdev, &bh);
1171
1172         return err;
1173 }
1174
1175 __unused static void print_path(struct ext4_extent_path *path)
1176 {
1177         int32_t i = path->depth;
1178         while (i >= 0) {
1179
1180                 ptrdiff_t a =
1181                     (path->extent)
1182                         ? (path->extent - EXT_FIRST_EXTENT(path->header))
1183                         : 0;
1184                 ptrdiff_t b =
1185                     (path->index)
1186                         ? (path->index - EXT_FIRST_INDEX(path->header))
1187                         : 0;
1188
1189                 (void)a;
1190                 (void)b;
1191                 ext4_dbg(DEBUG_EXTENT,
1192                          "depth %" PRId32 ", p_block: %" PRIu64 ","
1193                          "p_ext offset: %td, p_idx offset: %td\n",
1194                          i, path->p_block, a, b);
1195                 i--;
1196                 path++;
1197         }
1198 }
1199
1200 static void ext4_ext_replace_path(struct ext4_inode_ref *inode_ref,
1201                                   struct ext4_extent_path *path,
1202                                   struct ext_split_trans *spt,
1203                                   int32_t level)
1204 {
1205         int32_t depth = ext_depth(inode_ref->inode);
1206         int32_t i = depth - level;
1207         ext4_ext_drop_refs(inode_ref, path + i, 1);
1208         path[i] = spt[level].path;
1209 }
1210
1211 static int ext4_ext_insert_extent(struct ext4_inode_ref *inode_ref,
1212                                   struct ext4_extent_path **ppath,
1213                                   struct ext4_extent *newext, uint32_t flags)
1214 {
1215         int32_t i, depth, level;
1216         int ret = EOK;
1217         ext4_fsblk_t ptr = 0;
1218         bool need_grow = false;
1219         struct ext4_extent_path *path = *ppath;
1220         struct ext_split_trans *spt = NULL;
1221         struct ext_split_trans newblock;
1222
1223         memset(&newblock, 0, sizeof(newblock));
1224
1225         depth = ext_depth(inode_ref->inode);
1226         for (i = depth, level = 0; i >= 0; i--, level++)
1227                 if (EXT_HAS_FREE_INDEX(path + i))
1228                         break;
1229
1230         if (level) {
1231                 spt = calloc(1, sizeof(struct ext_split_trans) * (level));
1232                 if (!spt) {
1233                         ret = ENOMEM;
1234                         goto out;
1235                 }
1236         }
1237         i = 0;
1238 again:
1239         depth = ext_depth(inode_ref->inode);
1240
1241         do {
1242                 if (!i) {
1243                         ret = ext4_ext_insert_leaf(inode_ref, path, depth - i,
1244                                                    newext, &newblock, flags,
1245                                                    &need_grow);
1246                 } else {
1247                         ret = ext4_ext_insert_index(
1248                             inode_ref, path, depth - i, newext,
1249                             ext4_ext_block_index(
1250                                 ext_block_hdr(&spt[i - 1].path.block)),
1251                             spt[i - 1].ptr, &newblock,
1252                             &need_grow);
1253                 }
1254                 ptr = newblock.ptr;
1255
1256                 if (ret != EOK)
1257                         goto out;
1258
1259                 else if (spt && ptr && !ret) {
1260                         /* Prepare for the next iteration after splitting. */
1261                         spt[i] = newblock;
1262                 }
1263
1264                 i++;
1265         } while (ptr != 0 && i <= depth);
1266
1267         if (need_grow) {
1268                 ret = ext4_ext_grow_indepth(inode_ref, 0);
1269                 if (ret)
1270                         goto out;
1271                 ret = ext4_find_extent(inode_ref, to_le32(newext->first_block),
1272                                        ppath, 0);
1273                 if (ret)
1274                         goto out;
1275                 i = depth;
1276                 path = *ppath;
1277                 goto again;
1278         }
1279 out:
1280         if (ret) {
1281                 if (path)
1282                         ext4_ext_drop_refs(inode_ref, path, 0);
1283
1284                 while (--level >= 0 && spt) {
1285                         if (spt[level].ptr) {
1286                                 ext4_ext_free_blocks(inode_ref, spt[level].ptr,
1287                                                      1, 0);
1288                                 ext4_ext_drop_refs(inode_ref, &spt[level].path,
1289                                                    1);
1290                         }
1291                 }
1292         } else {
1293                 while (--level >= 0 && spt) {
1294                         if (spt[level].switch_to)
1295                                 ext4_ext_replace_path(inode_ref, path, spt,
1296                                                       level);
1297                         else if (spt[level].ptr)
1298                                 ext4_ext_drop_refs(inode_ref, &spt[level].path,
1299                                                    1);
1300                 }
1301         }
1302         if (spt)
1303                 free(spt);
1304
1305         return ret;
1306 }
1307
1308 static void ext4_ext_remove_blocks(struct ext4_inode_ref *inode_ref,
1309                                    struct ext4_extent *ex, ext4_lblk_t from,
1310                                    ext4_lblk_t to)
1311 {
1312         ext4_lblk_t len = to - from + 1;
1313         ext4_lblk_t num;
1314         ext4_fsblk_t start;
1315         num = from - to_le32(ex->first_block);
1316         start = ext4_ext_pblock(ex) + num;
1317         ext4_dbg(DEBUG_EXTENT,
1318                  "Freeing %" PRIu32 " at %" PRIu64 ", %" PRIu32 "\n", from,
1319                  start, len);
1320
1321         ext4_ext_free_blocks(inode_ref, start, len, 0);
1322 }
1323
1324 static int ext4_ext_remove_idx(struct ext4_inode_ref *inode_ref,
1325                                struct ext4_extent_path *path, int32_t depth)
1326 {
1327         int err = EOK;
1328         int32_t i = depth;
1329         ext4_fsblk_t leaf;
1330
1331         /* free index block */
1332         leaf = ext4_idx_pblock(path[i].index);
1333
1334         if (path[i].index != EXT_LAST_INDEX(path[i].header)) {
1335                 ptrdiff_t len = EXT_LAST_INDEX(path[i].header) - path[i].index;
1336                 memmove(path[i].index, path[i].index + 1,
1337                         len * sizeof(struct ext4_extent_index));
1338         }
1339
1340         path[i].header->entries_count =
1341             to_le16(to_le16(path[i].header->entries_count) - 1);
1342         err = ext4_ext_dirty(inode_ref, path + i);
1343         if (err != EOK)
1344                 return err;
1345
1346         ext4_dbg(DEBUG_EXTENT, "IDX: Freeing %" PRIu32 " at %" PRIu64 ", %d\n",
1347                  to_le32(path[i].index->first_block), leaf, 1);
1348         ext4_ext_free_blocks(inode_ref, leaf, 1, 0);
1349
1350         while (i > 0) {
1351                 if (path[i].index != EXT_FIRST_INDEX(path[i].header))
1352                         break;
1353
1354                 path[i - 1].index->first_block = path[i].index->first_block;
1355                 err = ext4_ext_dirty(inode_ref, path + i - 1);
1356                 if (err != EOK)
1357                         break;
1358
1359                 i--;
1360         }
1361         return err;
1362 }
1363
1364 static int ext4_ext_remove_leaf(struct ext4_inode_ref *inode_ref,
1365                                 struct ext4_extent_path *path, ext4_lblk_t from,
1366                                 ext4_lblk_t to)
1367 {
1368
1369         int32_t depth = ext_depth(inode_ref->inode);
1370         struct ext4_extent *ex = path[depth].extent;
1371         struct ext4_extent *start_ex, *ex2 = NULL;
1372         struct ext4_extent_header *eh = path[depth].header;
1373         int32_t len;
1374         int err = EOK;
1375         uint16_t new_entries;
1376
1377         start_ex = ex;
1378         new_entries = to_le16(eh->entries_count);
1379         while (ex <= EXT_LAST_EXTENT(path[depth].header) &&
1380                to_le32(ex->first_block) <= to) {
1381                 int32_t new_len = 0;
1382                 int unwritten;
1383                 ext4_lblk_t start, new_start;
1384                 ext4_fsblk_t newblock;
1385                 new_start = start = to_le32(ex->first_block);
1386                 len = ext4_ext_get_actual_len(ex);
1387                 newblock = ext4_ext_pblock(ex);
1388                 if (start < from) {
1389                         len -= from - start;
1390                         new_len = from - start;
1391                         start = from;
1392                         start_ex++;
1393                 } else {
1394                         if (start + len - 1 > to) {
1395                                 len -= start + len - 1 - to;
1396                                 new_len = start + len - 1 - to;
1397                                 new_start = to + 1;
1398                                 newblock += to + 1 - start;
1399                                 ex2 = ex;
1400                         }
1401                 }
1402
1403                 ext4_ext_remove_blocks(inode_ref, ex, start, start + len - 1);
1404                 ex->first_block = to_le32(new_start);
1405                 if (!new_len)
1406                         new_entries--;
1407                 else {
1408                         unwritten = ext4_ext_is_unwritten(ex);
1409                         ex->block_count = to_le16(new_len);
1410                         ext4_ext_store_pblock(ex, newblock);
1411                         if (unwritten)
1412                                 ext4_ext_mark_unwritten(ex);
1413                 }
1414
1415                 ex += 1;
1416         }
1417
1418         if (ex2 == NULL)
1419                 ex2 = ex;
1420
1421         if (ex2 <= EXT_LAST_EXTENT(eh))
1422                 memmove(start_ex, ex2,
1423                         (EXT_LAST_EXTENT(eh) - ex2 + 1) * sizeof(struct ext4_extent));
1424
1425         eh->entries_count = to_le16(new_entries);
1426         ext4_ext_dirty(inode_ref, path + depth);
1427         if (path[depth].extent == EXT_FIRST_EXTENT(eh) && eh->entries_count)
1428                 err = ext4_ext_correct_indexes(inode_ref, path);
1429
1430         /* if this leaf is free, then we should
1431          * remove it from index block above */
1432         if (err == EOK && eh->entries_count == 0 && path[depth].block.lb_id)
1433                 err = ext4_ext_remove_idx(inode_ref, path, depth - 1);
1434         else if (depth > 0)
1435                 path[depth - 1].index++;
1436
1437         return err;
1438 }
1439
1440 static bool ext4_ext_more_to_rm(struct ext4_extent_path *path, ext4_lblk_t to)
1441 {
1442         if (!to_le16(path->header->entries_count))
1443                 return false;
1444
1445         if (path->index > EXT_LAST_INDEX(path->header))
1446                 return false;
1447
1448         if (to_le32(path->index->first_block) > to)
1449                 return false;
1450
1451         return true;
1452 }
1453
1454 int ext4_extent_remove_space(struct ext4_inode_ref *inode_ref, ext4_lblk_t from,
1455                           ext4_lblk_t to)
1456 {
1457         struct ext4_extent_path *path = NULL;
1458         int ret = EOK;
1459         int32_t depth = ext_depth(inode_ref->inode);
1460         int32_t i;
1461
1462         ret = ext4_find_extent(inode_ref, from, &path, 0);
1463         if (ret)
1464                 goto out;
1465
1466         if (!path[depth].extent) {
1467                 ret = EOK;
1468                 goto out;
1469         }
1470
1471         bool in_range = IN_RANGE(from, to_le32(path[depth].extent->first_block),
1472                         ext4_ext_get_actual_len(path[depth].extent));
1473
1474         if (!in_range) {
1475                 ret = EOK;
1476                 goto out;
1477         }
1478
1479         /* If we do remove_space inside the range of an extent */
1480         if ((to_le32(path[depth].extent->first_block) < from) &&
1481             (to < to_le32(path[depth].extent->first_block) +
1482                         ext4_ext_get_actual_len(path[depth].extent) - 1)) {
1483
1484                 struct ext4_extent *ex = path[depth].extent, newex;
1485                 int unwritten = ext4_ext_is_unwritten(ex);
1486                 ext4_lblk_t ee_block = to_le32(ex->first_block);
1487                 int32_t len = ext4_ext_get_actual_len(ex);
1488                 ext4_fsblk_t newblock =
1489                         to + 1 - ee_block + ext4_ext_pblock(ex);
1490
1491                 ex->block_count = to_le16(from - ee_block);
1492                 if (unwritten)
1493                         ext4_ext_mark_unwritten(ex);
1494
1495                 ext4_ext_dirty(inode_ref, path + depth);
1496
1497                 newex.first_block = to_le32(to + 1);
1498                 newex.block_count = to_le16(ee_block + len - 1 - to);
1499                 ext4_ext_store_pblock(&newex, newblock);
1500                 if (unwritten)
1501                         ext4_ext_mark_unwritten(&newex);
1502
1503                 ret = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1504                 goto out;
1505         }
1506
1507         i = depth;
1508         while (i >= 0) {
1509                 if (i == depth) {
1510                         struct ext4_extent_header *eh;
1511                         struct ext4_extent *first_ex, *last_ex;
1512                         ext4_lblk_t leaf_from, leaf_to;
1513                         eh = path[i].header;
1514                         ext4_assert(to_le16(eh->entries_count) > 0);
1515                         first_ex = EXT_FIRST_EXTENT(eh);
1516                         last_ex = EXT_LAST_EXTENT(eh);
1517                         leaf_from = to_le32(first_ex->first_block);
1518                         leaf_to = to_le32(last_ex->first_block) +
1519                                   ext4_ext_get_actual_len(last_ex) - 1;
1520                         if (leaf_from < from)
1521                                 leaf_from = from;
1522
1523                         if (leaf_to > to)
1524                                 leaf_to = to;
1525
1526                         ext4_ext_remove_leaf(inode_ref, path, leaf_from,
1527                                         leaf_to);
1528                         ext4_ext_drop_refs(inode_ref, path + i, 0);
1529                         i--;
1530                         continue;
1531                 }
1532
1533                 struct ext4_extent_header *eh;
1534                 eh = path[i].header;
1535                 if (ext4_ext_more_to_rm(path + i, to)) {
1536                         struct ext4_block bh = EXT4_BLOCK_ZERO();
1537                         if (path[i + 1].block.lb_id)
1538                                 ext4_ext_drop_refs(inode_ref, path + i + 1, 0);
1539
1540                         ret = read_extent_tree_block(inode_ref,
1541                                         ext4_idx_pblock(path[i].index),
1542                                         depth - i - 1, &bh, 0);
1543                         if (ret)
1544                                 goto out;
1545
1546                         path[i].p_block =
1547                                         ext4_idx_pblock(path[i].index);
1548                         path[i + 1].block = bh;
1549                         path[i + 1].header = ext_block_hdr(&bh);
1550                         path[i + 1].depth = depth - i - 1;
1551                         if (i + 1 == depth)
1552                                 path[i + 1].extent = EXT_FIRST_EXTENT(
1553                                         path[i + 1].header);
1554                         else
1555                                 path[i + 1].index =
1556                                         EXT_FIRST_INDEX(path[i + 1].header);
1557
1558                         i++;
1559                 } else {
1560                         if (i > 0) {
1561                                 if (!eh->entries_count)
1562                                         ret = ext4_ext_remove_idx(inode_ref, path,
1563                                                         i - 1);
1564                                 else
1565                                         path[i - 1].index++;
1566
1567                         }
1568
1569                         if (i)
1570                                 ext4_block_set(inode_ref->fs->bdev,
1571                                                 &path[i].block);
1572
1573
1574                         i--;
1575                 }
1576
1577         }
1578
1579         /* TODO: flexible tree reduction should be here */
1580         if (path->header->entries_count == 0) {
1581                 /*
1582                  * truncate to zero freed all the tree,
1583                  * so we need to correct eh_depth
1584                  */
1585                 ext_inode_hdr(inode_ref->inode)->depth = 0;
1586                 ext_inode_hdr(inode_ref->inode)->max_entries_count =
1587                     to_le16(ext4_ext_space_root(inode_ref));
1588                 ret = ext4_ext_dirty(inode_ref, path);
1589         }
1590
1591 out:
1592         ext4_ext_drop_refs(inode_ref, path, 0);
1593         free(path);
1594         path = NULL;
1595         return ret;
1596 }
1597
1598 static int ext4_ext_split_extent_at(struct ext4_inode_ref *inode_ref,
1599                                     struct ext4_extent_path **ppath,
1600                                     ext4_lblk_t split, uint32_t split_flag)
1601 {
1602         struct ext4_extent *ex, newex;
1603         ext4_fsblk_t newblock;
1604         ext4_lblk_t ee_block;
1605         int32_t ee_len;
1606         int32_t depth = ext_depth(inode_ref->inode);
1607         int err = EOK;
1608
1609         ex = (*ppath)[depth].extent;
1610         ee_block = to_le32(ex->first_block);
1611         ee_len = ext4_ext_get_actual_len(ex);
1612         newblock = split - ee_block + ext4_ext_pblock(ex);
1613
1614         if (split == ee_block) {
1615                 /*
1616                  * case b: block @split is the block that the extent begins with
1617                  * then we just change the state of the extent, and splitting
1618                  * is not needed.
1619                  */
1620                 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1621                         ext4_ext_mark_unwritten(ex);
1622                 else
1623                         ext4_ext_mark_initialized(ex);
1624
1625                 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1626                 goto out;
1627         }
1628
1629         ex->block_count = to_le16(split - ee_block);
1630         if (split_flag & EXT4_EXT_MARK_UNWRIT1)
1631                 ext4_ext_mark_unwritten(ex);
1632
1633         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1634         if (err != EOK)
1635                 goto out;
1636
1637         newex.first_block = to_le32(split);
1638         newex.block_count = to_le16(ee_len - (split - ee_block));
1639         ext4_ext_store_pblock(&newex, newblock);
1640         if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1641                 ext4_ext_mark_unwritten(&newex);
1642         err = ext4_ext_insert_extent(inode_ref, ppath, &newex,
1643                                      EXT4_EXT_NO_COMBINE);
1644         if (err != EOK)
1645                 goto restore_extent_len;
1646
1647 out:
1648         return err;
1649 restore_extent_len:
1650         ex->block_count = to_le16(ee_len);
1651         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1652         return err;
1653 }
1654
1655 static int ext4_ext_convert_to_initialized(struct ext4_inode_ref *inode_ref,
1656                                            struct ext4_extent_path **ppath,
1657                                            ext4_lblk_t split, uint32_t blocks)
1658 {
1659         int32_t depth = ext_depth(inode_ref->inode), err = EOK;
1660         struct ext4_extent *ex = (*ppath)[depth].extent;
1661
1662         ext4_assert(to_le32(ex->first_block) <= split);
1663
1664         if (split + blocks ==
1665             to_le32(ex->first_block) + ext4_ext_get_actual_len(ex)) {
1666                 /* split and initialize right part */
1667                 err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1668                                                EXT4_EXT_MARK_UNWRIT1);
1669         } else if (to_le32(ex->first_block) == split) {
1670                 /* split and initialize left part */
1671                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1672                                                EXT4_EXT_MARK_UNWRIT2);
1673         } else {
1674                 /* split 1 extent to 3 and initialize the 2nd */
1675                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1676                                                EXT4_EXT_MARK_UNWRIT1 |
1677                                                    EXT4_EXT_MARK_UNWRIT2);
1678                 if (!err) {
1679                         err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1680                                                        EXT4_EXT_MARK_UNWRIT1);
1681                 }
1682         }
1683
1684         return err;
1685 }
1686
1687 static ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_extent_path *path)
1688 {
1689         int32_t depth;
1690
1691         depth = path->depth;
1692
1693         if (depth == 0 && path->extent == NULL)
1694                 return EXT_MAX_BLOCKS;
1695
1696         while (depth >= 0) {
1697                 if (depth == path->depth) {
1698                         /* leaf */
1699                         if (path[depth].extent &&
1700                             path[depth].extent !=
1701                                 EXT_LAST_EXTENT(path[depth].header))
1702                                 return to_le32(
1703                                     path[depth].extent[1].first_block);
1704                 } else {
1705                         /* index */
1706                         if (path[depth].index !=
1707                             EXT_LAST_INDEX(path[depth].header))
1708                                 return to_le32(
1709                                     path[depth].index[1].first_block);
1710                 }
1711                 depth--;
1712         }
1713
1714         return EXT_MAX_BLOCKS;
1715 }
1716
1717 static int ext4_ext_zero_unwritten_range(struct ext4_inode_ref *inode_ref,
1718                                          ext4_fsblk_t block,
1719                                          uint32_t blocks_count)
1720 {
1721         int err = EOK;
1722         uint32_t i;
1723         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
1724         for (i = 0; i < blocks_count; i++) {
1725                 struct ext4_block bh = EXT4_BLOCK_ZERO();
1726                 err = ext4_trans_block_get_noread(inode_ref->fs->bdev, &bh, block + i);
1727                 if (err != EOK)
1728                         break;
1729
1730                 memset(bh.data, 0, block_size);
1731                 ext4_trans_set_block_dirty(bh.buf);
1732                 err = ext4_block_set(inode_ref->fs->bdev, &bh);
1733                 if (err != EOK)
1734                         break;
1735         }
1736         return err;
1737 }
1738
1739 int ext4_extent_get_blocks(struct ext4_inode_ref *inode_ref, ext4_lblk_t iblock,
1740                         uint32_t max_blocks, ext4_fsblk_t *result, bool create,
1741                         uint32_t *blocks_count)
1742 {
1743         struct ext4_extent_path *path = NULL;
1744         struct ext4_extent newex, *ex;
1745         ext4_fsblk_t goal;
1746         int err = EOK;
1747         int32_t depth;
1748         uint32_t allocated = 0;
1749         ext4_lblk_t next;
1750         ext4_fsblk_t newblock;
1751
1752         if (result)
1753                 *result = 0;
1754
1755         if (blocks_count)
1756                 *blocks_count = 0;
1757
1758         /* find extent for this block */
1759         err = ext4_find_extent(inode_ref, iblock, &path, 0);
1760         if (err != EOK) {
1761                 path = NULL;
1762                 goto out2;
1763         }
1764
1765         depth = ext_depth(inode_ref->inode);
1766
1767         /*
1768          * consistent leaf must not be empty
1769          * this situations is possible, though, _during_ tree modification
1770          * this is why assert can't be put in ext4_ext_find_extent()
1771          */
1772         ex = path[depth].extent;
1773         if (ex) {
1774                 ext4_lblk_t ee_block = to_le32(ex->first_block);
1775                 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
1776                 uint16_t ee_len = ext4_ext_get_actual_len(ex);
1777                 /* if found exent covers block, simple return it */
1778                 if (IN_RANGE(iblock, ee_block, ee_len)) {
1779                         /* number of remain blocks in the extent */
1780                         allocated = ee_len - (iblock - ee_block);
1781
1782                         if (!ext4_ext_is_unwritten(ex)) {
1783                                 newblock = iblock - ee_block + ee_start;
1784                                 goto out;
1785                         }
1786
1787                         if (!create) {
1788                                 newblock = 0;
1789                                 goto out;
1790                         }
1791
1792                         uint32_t zero_range;
1793                         zero_range = allocated;
1794                         if (zero_range > max_blocks)
1795                                 zero_range = max_blocks;
1796
1797                         newblock = iblock - ee_block + ee_start;
1798                         err = ext4_ext_zero_unwritten_range(inode_ref, newblock,
1799                                         zero_range);
1800                         if (err != EOK)
1801                                 goto out2;
1802
1803                         err = ext4_ext_convert_to_initialized(inode_ref, &path,
1804                                         iblock, zero_range);
1805                         if (err != EOK)
1806                                 goto out2;
1807
1808                         goto out;
1809                 }
1810         }
1811
1812         /*
1813          * requested block isn't allocated yet
1814          * we couldn't try to create block if create flag is zero
1815          */
1816         if (!create) {
1817                 goto out2;
1818         }
1819
1820         /* find next allocated block so that we know how many
1821          * blocks we can allocate without ovelapping next extent */
1822         next = ext4_ext_next_allocated_block(path);
1823         allocated = next - iblock;
1824         if (allocated > max_blocks)
1825                 allocated = max_blocks;
1826
1827         /* allocate new block */
1828         goal = ext4_ext_find_goal(inode_ref, path, iblock);
1829         newblock = ext4_new_meta_blocks(inode_ref, goal, 0, &allocated, &err);
1830         if (!newblock)
1831                 goto out2;
1832
1833         /* try to insert new extent into found leaf and return */
1834         newex.first_block = to_le32(iblock);
1835         ext4_ext_store_pblock(&newex, newblock);
1836         newex.block_count = to_le16(allocated);
1837         err = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1838         if (err != EOK) {
1839                 /* free data blocks we just allocated */
1840                 ext4_ext_free_blocks(inode_ref, ext4_ext_pblock(&newex),
1841                                      to_le16(newex.block_count), 0);
1842                 goto out2;
1843         }
1844
1845         /* previous routine could use block we allocated */
1846         newblock = ext4_ext_pblock(&newex);
1847
1848 out:
1849         if (allocated > max_blocks)
1850                 allocated = max_blocks;
1851
1852         if (result)
1853                 *result = newblock;
1854
1855         if (blocks_count)
1856                 *blocks_count = allocated;
1857
1858 out2:
1859         if (path) {
1860                 ext4_ext_drop_refs(inode_ref, path, 0);
1861                 free(path);
1862         }
1863
1864         return err;
1865 }