Multiple fixes related to basic block type
[lwext4.git] / lwext4 / ext4_extent_full.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * - Redistributions of source code must retain the above copyright
10  *   notice, this list of conditions and the following disclaimer.
11  * - Redistributions in binary form must reproduce the above copyright
12  *   notice, this list of conditions and the following disclaimer in the
13  *   documentation and/or other materials provided with the distribution.
14  * - The name of the author may not be used to endorse or promote products
15  *   derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #include "ext4_config.h"
30 #include "ext4_blockdev.h"
31 #include "ext4_fs.h"
32 #include "ext4_super.h"
33 #include "ext4_balloc.h"
34 #include "ext4_debug.h"
35
36 #include <stdlib.h>
37 #include <string.h>
38 #include <inttypes.h>
39 #include <stddef.h>
40
41 #include "ext4_extent.h"
42
43 #if CONFIG_EXTENT_FULL
44
45 /*
46  * used by extent splitting.
47  */
48 #define EXT4_EXT_MARK_UNWRIT1 0x02 /* mark first half unwritten */
49 #define EXT4_EXT_MARK_UNWRIT2 0x04 /* mark second half unwritten */
50 #define EXT4_EXT_DATA_VALID1 0x08  /* first half contains valid data */
51 #define EXT4_EXT_DATA_VALID2 0x10  /* second half contains valid data */
52 #define EXT4_EXT_NO_COMBINE 0x20   /* do not combine two extents */
53
54 static struct ext4_extent_tail *
55 find_ext4_extent_tail(struct ext4_extent_header *eh)
56 {
57         return (struct ext4_extent_tail *)(((char *)eh) +
58                                            EXT4_EXTENT_TAIL_OFFSET(eh));
59 }
60
61 static struct ext4_extent_header *ext_inode_hdr(struct ext4_inode *inode)
62 {
63         return (struct ext4_extent_header *)inode->blocks;
64 }
65
66 static struct ext4_extent_header *ext_block_hdr(struct ext4_block *block)
67 {
68         return (struct ext4_extent_header *)block->data;
69 }
70
71 static uint16_t ext_depth(struct ext4_inode *inode)
72 {
73         return to_le16(ext_inode_hdr(inode)->depth);
74 }
75
76 static uint16_t ext4_ext_get_actual_len(struct ext4_extent *ext)
77 {
78         return (to_le16(ext->block_count) <= EXT_INIT_MAX_LEN
79                     ? to_le16(ext->block_count)
80                     : (to_le16(ext->block_count) - EXT_INIT_MAX_LEN));
81 }
82
83 static void ext4_ext_mark_initialized(struct ext4_extent *ext)
84 {
85         ext->block_count = to_le16(ext4_ext_get_actual_len(ext));
86 }
87
88 static void ext4_ext_mark_unwritten(struct ext4_extent *ext)
89 {
90         ext->block_count |= to_le16(EXT_INIT_MAX_LEN);
91 }
92
93 static int ext4_ext_is_unwritten(struct ext4_extent *ext)
94 {
95         /* Extent with ee_len of 0x8000 is treated as an initialized extent */
96         return (to_le16(ext->block_count) > EXT_INIT_MAX_LEN);
97 }
98
99 /*
100  * ext4_ext_pblock:
101  * combine low and high parts of physical block number into ext4_fsblk_t
102  */
103 static ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
104 {
105         ext4_fsblk_t block;
106
107         block = to_le32(ex->start_lo);
108         block |= ((ext4_fsblk_t)to_le16(ex->start_hi) << 31) << 1;
109         return block;
110 }
111
112 /*
113  * ext4_idx_pblock:
114  * combine low and high parts of a leaf physical block number into ext4_fsblk_t
115  */
116 static ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_index *ix)
117 {
118         ext4_fsblk_t block;
119
120         block = to_le32(ix->leaf_lo);
121         block |= ((ext4_fsblk_t)to_le16(ix->leaf_hi) << 31) << 1;
122         return block;
123 }
124
125 /*
126  * ext4_ext_store_pblock:
127  * stores a large physical block number into an extent struct,
128  * breaking it into parts
129  */
130 static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
131 {
132         ex->start_lo = to_le32((unsigned long)(pb & 0xffffffff));
133         ex->start_hi = to_le16((unsigned long)((pb >> 31) >> 1) & 0xffff);
134 }
135
136 /*
137  * ext4_idx_store_pblock:
138  * stores a large physical block number into an index struct,
139  * breaking it into parts
140  */
141 static void ext4_idx_store_pblock(struct ext4_extent_index *ix, ext4_fsblk_t pb)
142 {
143         ix->leaf_lo = to_le32((unsigned long)(pb & 0xffffffff));
144         ix->leaf_hi = to_le16((unsigned long)((pb >> 31) >> 1) & 0xffff);
145 }
146
147 static int ext4_allocate_single_block(struct ext4_inode_ref *inode_ref,
148                                       ext4_fsblk_t goal,
149                                       ext4_fsblk_t *blockp)
150 {
151         return ext4_balloc_alloc_block(inode_ref, goal, blockp);
152 }
153
154 static ext4_fsblk_t ext4_new_meta_blocks(struct ext4_inode_ref *inode_ref,
155                                          ext4_fsblk_t goal,
156                                          uint32_t flags __unused,
157                                          uint32_t *count, int *errp)
158 {
159         ext4_fsblk_t block = 0;
160
161         *errp = ext4_allocate_single_block(inode_ref, goal, &block);
162         if (count)
163                 *count = 1;
164         return block;
165 }
166
167 static void ext4_ext_free_blocks(struct ext4_inode_ref *inode_ref,
168                                  ext4_fsblk_t block, uint32_t count,
169                                  uint32_t flags __unused)
170 {
171         ext4_balloc_free_blocks(inode_ref, block, count);
172 }
173
174 static size_t ext4_ext_space_block(struct ext4_inode_ref *inode_ref)
175 {
176         size_t size;
177         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
178
179         size = (block_size - sizeof(struct ext4_extent_header)) /
180                sizeof(struct ext4_extent);
181         return size;
182 }
183
184 static size_t ext4_ext_space_block_idx(struct ext4_inode_ref *inode_ref)
185 {
186         size_t size;
187         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
188
189         size = (block_size - sizeof(struct ext4_extent_header)) /
190                sizeof(struct ext4_extent_index);
191         return size;
192 }
193
194 static size_t ext4_ext_space_root(struct ext4_inode_ref *inode_ref)
195 {
196         size_t size;
197
198         size = sizeof(inode_ref->inode->blocks);
199         size -= sizeof(struct ext4_extent_header);
200         size /= sizeof(struct ext4_extent);
201         return size;
202 }
203
204 static size_t ext4_ext_space_root_idx(struct ext4_inode_ref *inode_ref)
205 {
206         size_t size;
207
208         size = sizeof(inode_ref->inode->blocks);
209         size -= sizeof(struct ext4_extent_header);
210         size /= sizeof(struct ext4_extent_index);
211         return size;
212 }
213
214 static size_t ext4_ext_max_entries(struct ext4_inode_ref *inode_ref,
215                                    uint32_t depth)
216 {
217         size_t max;
218
219         if (depth == ext_depth(inode_ref->inode)) {
220                 if (depth == 0)
221                         max = ext4_ext_space_root(inode_ref);
222                 else
223                         max = ext4_ext_space_root_idx(inode_ref);
224         } else {
225                 if (depth == 0)
226                         max = ext4_ext_space_block(inode_ref);
227                 else
228                         max = ext4_ext_space_block_idx(inode_ref);
229         }
230
231         return max;
232 }
233
234 static ext4_fsblk_t ext4_ext_find_goal(struct ext4_inode_ref *inode_ref,
235                                        struct ext4_extent_path *path,
236                                        ext4_lblk_t block)
237 {
238         if (path) {
239                 uint32_t depth = path->depth;
240                 struct ext4_extent *ex;
241
242                 /*
243                  * Try to predict block placement assuming that we are
244                  * filling in a file which will eventually be
245                  * non-sparse --- i.e., in the case of libbfd writing
246                  * an ELF object sections out-of-order but in a way
247                  * the eventually results in a contiguous object or
248                  * executable file, or some database extending a table
249                  * space file.  However, this is actually somewhat
250                  * non-ideal if we are writing a sparse file such as
251                  * qemu or KVM writing a raw image file that is going
252                  * to stay fairly sparse, since it will end up
253                  * fragmenting the file system's free space.  Maybe we
254                  * should have some hueristics or some way to allow
255                  * userspace to pass a hint to file system,
256                  * especially if the latter case turns out to be
257                  * common.
258                  */
259                 ex = path[depth].extent;
260                 if (ex) {
261                         ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
262                         ext4_lblk_t ext_block = to_le32(ex->first_block);
263
264                         if (block > ext_block)
265                                 return ext_pblk + (block - ext_block);
266                         else
267                                 return ext_pblk - (ext_block - block);
268                 }
269
270                 /* it looks like index is empty;
271                  * try to find starting block from index itself */
272                 if (path[depth].block.lb_id)
273                         return path[depth].block.lb_id;
274         }
275
276         /* OK. use inode's group */
277         return ext4_fs_inode_to_goal_block(inode_ref);
278 }
279
280 /*
281  * Allocation for a meta data block
282  */
283 static ext4_fsblk_t ext4_ext_new_meta_block(struct ext4_inode_ref *inode_ref,
284                                             struct ext4_extent_path *path,
285                                             struct ext4_extent *ex, int *err,
286                                             uint32_t flags)
287 {
288         ext4_fsblk_t goal, newblock;
289
290         goal = ext4_ext_find_goal(inode_ref, path, to_le32(ex->first_block));
291         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, err);
292         return newblock;
293 }
294
295 static int ext4_ext_dirty(struct ext4_inode_ref *inode_ref,
296                           struct ext4_extent_path *path)
297 {
298         if (path->block.lb_id)
299                 path->block.dirty = true;
300         else
301                 inode_ref->dirty = true;
302
303         return EOK;
304 }
305
306 static void ext4_ext_drop_refs(struct ext4_inode_ref *inode_ref,
307                                struct ext4_extent_path *path, bool keep_other)
308 {
309         int32_t depth, i;
310
311         if (!path)
312                 return;
313         if (keep_other)
314                 depth = 0;
315         else
316                 depth = path->depth;
317
318         for (i = 0; i <= depth; i++, path++)
319                 if (path->block.lb_id) {
320                         ext4_block_set(inode_ref->fs->bdev, &path->block);
321                 }
322 }
323
324 /*
325  * Temporarily we don't need to support checksum.
326  */
327 static uint32_t ext4_ext_block_csum(struct ext4_inode_ref *inode_ref __unused,
328                                     struct ext4_extent_header *eh __unused)
329 {
330         /*TODO: should we add crc32 here ?*/
331         /*return ext4_crc32c(inode->i_csum, eh, EXT4_EXTENT_TAIL_OFFSET(eh));*/
332         return 0;
333 }
334
335 static void ext4_extent_block_csum_set(struct ext4_inode_ref *inode_ref,
336                                        struct ext4_extent_header *eh)
337 {
338         struct ext4_extent_tail *tail;
339
340         tail = find_ext4_extent_tail(eh);
341         tail->et_checksum = ext4_ext_block_csum(inode_ref, eh);
342 }
343
344 /*
345  * Check that whether the basic information inside the extent header
346  * is correct or not.
347  */
348 static int ext4_ext_check(struct ext4_inode_ref *inode_ref,
349                           struct ext4_extent_header *eh, uint16_t depth,
350                           ext4_fsblk_t pblk __unused)
351 {
352         struct ext4_extent_tail *tail;
353         const char *error_msg;
354         (void)error_msg;
355
356         if (to_le16(eh->magic) != EXT4_EXTENT_MAGIC) {
357                 error_msg = "invalid magic";
358                 goto corrupted;
359         }
360         if (to_le16(eh->depth) != depth) {
361                 error_msg = "unexpected eh_depth";
362                 goto corrupted;
363         }
364         if (eh->max_entries_count == 0) {
365                 error_msg = "invalid eh_max";
366                 goto corrupted;
367         }
368         if (to_le16(eh->entries_count) > to_le16(eh->max_entries_count)) {
369                 error_msg = "invalid eh_entries";
370                 goto corrupted;
371         }
372
373         tail = find_ext4_extent_tail(eh);
374         if (tail->et_checksum != ext4_ext_block_csum(inode_ref, eh)) {
375                 /* FIXME: Warning: extent checksum damaged? */
376         }
377
378         return EOK;
379
380 corrupted:
381         ext4_dbg(DEBUG_EXTENT, "Bad extents B+ tree block: %s. "
382                                "Blocknr: %" PRId64 "\n",
383                  error_msg, pblk);
384         return EIO;
385 }
386
387 static int read_extent_tree_block(struct ext4_inode_ref *inode_ref,
388                                   ext4_fsblk_t pblk, int32_t depth,
389                                   struct ext4_block *bh,
390                                   uint32_t flags __unused)
391 {
392         int err;
393
394         err = ext4_block_get(inode_ref->fs->bdev, bh, pblk);
395         if (err != EOK)
396                 goto errout;
397
398         err = ext4_ext_check(inode_ref, ext_block_hdr(bh), depth, pblk);
399         if (err != EOK)
400                 goto errout;
401
402         return EOK;
403 errout:
404         if (bh->lb_id)
405                 ext4_block_set(inode_ref->fs->bdev, bh);
406
407         return err;
408 }
409
410 /*
411  * ext4_ext_binsearch_idx:
412  * binary search for the closest index of the given block
413  * the header must be checked before calling this
414  */
415 static void ext4_ext_binsearch_idx(struct ext4_extent_path *path,
416                                    ext4_lblk_t block)
417 {
418         struct ext4_extent_header *eh = path->header;
419         struct ext4_extent_index *r, *l, *m;
420
421         l = EXT_FIRST_INDEX(eh) + 1;
422         r = EXT_LAST_INDEX(eh);
423         while (l <= r) {
424                 m = l + (r - l) / 2;
425                 if (block < to_le32(m->first_block))
426                         r = m - 1;
427                 else
428                         l = m + 1;
429         }
430
431         path->index = l - 1;
432 }
433
434 /*
435  * ext4_ext_binsearch:
436  * binary search for closest extent of the given block
437  * the header must be checked before calling this
438  */
439 static void ext4_ext_binsearch(struct ext4_extent_path *path, ext4_lblk_t block)
440 {
441         struct ext4_extent_header *eh = path->header;
442         struct ext4_extent *r, *l, *m;
443
444         if (eh->entries_count == 0) {
445                 /*
446                  * this leaf is empty:
447                  * we get such a leaf in split/add case
448                  */
449                 return;
450         }
451
452         l = EXT_FIRST_EXTENT(eh) + 1;
453         r = EXT_LAST_EXTENT(eh);
454
455         while (l <= r) {
456                 m = l + (r - l) / 2;
457                 if (block < to_le32(m->first_block))
458                         r = m - 1;
459                 else
460                         l = m + 1;
461         }
462
463         path->extent = l - 1;
464 }
465
466 #define EXT4_EXT_PATH_INC_DEPTH 1
467
468 static int ext4_find_extent(struct ext4_inode_ref *inode_ref, ext4_lblk_t block,
469                             struct ext4_extent_path **orig_path, uint32_t flags)
470 {
471         struct ext4_extent_header *eh;
472         struct ext4_block bh = EXT4_BLOCK_ZERO();
473         ext4_fsblk_t buf_block = 0;
474         struct ext4_extent_path *path = *orig_path;
475         int32_t depth, ppos = 0;
476         int32_t i;
477         int ret;
478
479         eh = ext_inode_hdr(inode_ref->inode);
480         depth = ext_depth(inode_ref->inode);
481
482         if (path) {
483                 ext4_ext_drop_refs(inode_ref, path, 0);
484                 if (depth > path[0].maxdepth) {
485                         free(path);
486                         *orig_path = path = NULL;
487                 }
488         }
489         if (!path) {
490                 int32_t path_depth = depth + EXT4_EXT_PATH_INC_DEPTH;
491                 /* account possible depth increase */
492                 path = calloc(1, sizeof(struct ext4_extent_path) *
493                                      (path_depth + 1));
494                 if (!path)
495                         return ENOMEM;
496                 path[0].maxdepth = path_depth;
497         }
498         path[0].header = eh;
499         path[0].block = bh;
500
501         i = depth;
502         /* walk through the tree */
503         while (i) {
504                 ext4_ext_binsearch_idx(path + ppos, block);
505                 path[ppos].p_block = ext4_idx_pblock(path[ppos].index);
506                 path[ppos].depth = i;
507                 path[ppos].extent = NULL;
508                 buf_block = path[ppos].p_block;
509
510                 i--;
511                 ppos++;
512                 if (!path[ppos].block.lb_id ||
513                     path[ppos].block.lb_id != buf_block) {
514                         ret = read_extent_tree_block(inode_ref, buf_block, i,
515                                                      &bh, flags);
516                         if (ret != EOK) {
517                                 goto err;
518                         }
519                         if (ppos > depth) {
520                                 ext4_block_set(inode_ref->fs->bdev, &bh);
521                                 ret = EIO;
522                                 goto err;
523                         }
524
525                         eh = ext_block_hdr(&bh);
526                         path[ppos].block = bh;
527                         path[ppos].header = eh;
528                 }
529         }
530
531         path[ppos].depth = i;
532         path[ppos].extent = NULL;
533         path[ppos].index = NULL;
534
535         /* find extent */
536         ext4_ext_binsearch(path + ppos, block);
537         /* if not an empty leaf */
538         if (path[ppos].extent)
539                 path[ppos].p_block = ext4_ext_pblock(path[ppos].extent);
540
541         *orig_path = path;
542
543         ret = EOK;
544         return ret;
545
546 err:
547         ext4_ext_drop_refs(inode_ref, path, 0);
548         free(path);
549         if (orig_path)
550                 *orig_path = NULL;
551         return ret;
552 }
553
554 static void ext4_ext_init_header(struct ext4_inode_ref *inode_ref,
555                                  struct ext4_extent_header *eh, int32_t depth)
556 {
557         eh->entries_count = 0;
558         eh->max_entries_count = to_le16(ext4_ext_max_entries(inode_ref, depth));
559         eh->magic = to_le16(EXT4_EXTENT_MAGIC);
560         eh->depth = depth;
561 }
562
563 /*
564  * Be cautious, the buffer_head returned is not yet mark dirtied. */
565 static int ext4_ext_split_node(struct ext4_inode_ref *inode_ref,
566                                struct ext4_extent_path *path, int32_t at,
567                                struct ext4_extent *newext,
568                                ext4_fsblk_t *sibling, struct ext4_block *new_bh)
569 {
570         int ret;
571         ext4_fsblk_t newblock;
572         struct ext4_block bh = EXT4_BLOCK_ZERO();
573         int32_t depth = ext_depth(inode_ref->inode);
574
575         ext4_assert(sibling);
576
577         /* FIXME: currently we split at the point after the current extent. */
578         newblock = ext4_ext_new_meta_block(inode_ref, path, newext, &ret, 0);
579         if (ret)
580                 goto cleanup;
581
582         /*  For write access.# */
583         ret = ext4_block_get(inode_ref->fs->bdev, &bh, newblock);
584         if (ret != EOK)
585                 goto cleanup;
586
587         if (at == depth) {
588                 /* start copy from next extent */
589                 ptrdiff_t m = EXT_MAX_EXTENT(path[at].header) - path[at].extent;
590                 struct ext4_extent_header *neh;
591                 neh = ext_block_hdr(&bh);
592                 ext4_ext_init_header(inode_ref, neh, 0);
593                 if (m) {
594                         struct ext4_extent *ex;
595                         ex = EXT_FIRST_EXTENT(neh);
596                         memmove(ex, path[at].extent + 1,
597                                 sizeof(struct ext4_extent) * m);
598                         neh->entries_count =
599                             to_le16(to_le16(neh->entries_count) + m);
600                         path[at].header->entries_count = to_le16(
601                             to_le16(path[at].header->entries_count) - m);
602                         ret = ext4_ext_dirty(inode_ref, path + at);
603                         if (ret)
604                                 goto cleanup;
605                 }
606         } else {
607                 ptrdiff_t m = EXT_MAX_INDEX(path[at].header) - path[at].index;
608                 struct ext4_extent_header *neh;
609                 neh = ext_block_hdr(&bh);
610                 ext4_ext_init_header(inode_ref, neh, depth - at);
611                 if (m) {
612                         struct ext4_extent_index *ix;
613                         ix = EXT_FIRST_INDEX(neh);
614                         memmove(ix, path[at].index + 1,
615                                 sizeof(struct ext4_extent) * m);
616                         neh->entries_count =
617                             to_le16(to_le16(neh->entries_count) + m);
618                         path[at].header->entries_count = to_le16(
619                             to_le16(path[at].header->entries_count) - m);
620                         ret = ext4_ext_dirty(inode_ref, path + at);
621                         if (ret)
622                                 goto cleanup;
623                 }
624         }
625 cleanup:
626         if (ret) {
627                 if (bh.lb_id) {
628                         ext4_block_set(inode_ref->fs->bdev, &bh);
629                 }
630                 if (newblock)
631                         ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
632
633                 newblock = 0;
634         }
635         *sibling = newblock;
636         *new_bh = bh;
637         return ret;
638 }
639
640 static ext4_lblk_t ext4_ext_block_index(struct ext4_extent_header *eh)
641 {
642         if (eh->depth)
643                 return to_le32(EXT_FIRST_INDEX(eh)->first_block);
644
645         return to_le32(EXT_FIRST_EXTENT(eh)->first_block);
646 }
647
648 #define EXT_INODE_HDR_NEED_GROW 0x1
649
650 struct ext_split_trans {
651         ext4_fsblk_t ptr;
652         struct ext4_extent_path path;
653         int switch_to;
654 };
655
656 static int ext4_ext_insert_index(struct ext4_inode_ref *inode_ref,
657                                  struct ext4_extent_path *path, int32_t at,
658                                  struct ext4_extent *newext,
659                                  ext4_lblk_t insert_index,
660                                  ext4_fsblk_t insert_block,
661                                  struct ext_split_trans *spt)
662 {
663         struct ext4_extent_index *ix;
664         struct ext4_extent_path *curp = path + at;
665         struct ext4_block bh = EXT4_BLOCK_ZERO();
666         int32_t len;
667         int err;
668         struct ext4_extent_header *eh;
669
670         if (curp->index && insert_index == to_le32(curp->index->first_block))
671                 return EIO;
672
673         if (to_le16(curp->header->entries_count) ==
674             to_le16(curp->header->max_entries_count)) {
675                 if (at) {
676                         struct ext4_extent_header *neh;
677                         err = ext4_ext_split_node(inode_ref, path, at, newext,
678                                                   &spt->ptr, &bh);
679                         if (err != EOK)
680                                 goto out;
681
682                         neh = ext_block_hdr(&bh);
683                         if (insert_index > to_le32(curp->index->first_block)) {
684                                 /* Make decision which node should be used to
685                                  * insert the index.*/
686                                 if (to_le16(neh->entries_count) >
687                                     to_le16(curp->header->entries_count)) {
688                                         eh = curp->header;
689                                         /* insert after */
690                                         ix = EXT_LAST_INDEX(eh) + 1;
691                                 } else {
692                                         eh = neh;
693                                         ix = EXT_FIRST_INDEX(eh);
694                                 }
695                         } else {
696                                 eh = curp->header;
697                                 /* insert before */
698                                 ix = EXT_LAST_INDEX(eh);
699                         }
700                 } else {
701                         err = EXT_INODE_HDR_NEED_GROW;
702                         goto out;
703                 }
704         } else {
705                 eh = curp->header;
706                 if (curp->index == NULL) {
707                         ix = EXT_FIRST_INDEX(eh);
708                         curp->index = ix;
709                 } else if (insert_index > to_le32(curp->index->first_block)) {
710                         /* insert after */
711                         ix = curp->index + 1;
712                 } else {
713                         /* insert before */
714                         ix = curp->index;
715                 }
716         }
717
718         len = EXT_LAST_INDEX(eh) - ix + 1;
719         ext4_assert(len >= 0);
720         if (len > 0)
721                 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_index));
722
723         if (ix > EXT_MAX_INDEX(eh)) {
724                 err = EIO;
725                 goto out;
726         }
727
728         ix->first_block = to_le32(insert_index);
729         ext4_idx_store_pblock(ix, insert_block);
730         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
731
732         if (ix > EXT_LAST_INDEX(eh)) {
733                 err = EIO;
734                 goto out;
735         }
736
737         if (eh == curp->header)
738                 err = ext4_ext_dirty(inode_ref, curp);
739         else
740                 err = EOK;
741
742 out:
743         if (err != EOK) {
744                 if (bh.lb_id)
745                         ext4_block_set(inode_ref->fs->bdev, &bh);
746
747                 spt->ptr = 0;
748         } else if (bh.lb_id) {
749                 /* If we got a sibling leaf. */
750                 bh.dirty = true;
751
752                 spt->path.p_block = ext4_idx_pblock(ix);
753                 spt->path.depth = to_le16(eh->depth);
754                 spt->path.maxdepth = 0;
755                 spt->path.extent = NULL;
756                 spt->path.index = ix;
757                 spt->path.header = eh;
758                 spt->path.block = bh;
759
760                 /*
761                  * If newext->ee_block can be included into the
762                  * right sub-tree.
763                  */
764                 if (to_le32(newext->first_block) >=
765                     ext4_ext_block_index(ext_block_hdr(&bh)))
766                         spt->switch_to = 1;
767                 else {
768                         curp->index = ix;
769                         curp->p_block = ext4_idx_pblock(ix);
770                 }
771
772         } else {
773                 spt->ptr = 0;
774                 curp->index = ix;
775                 curp->p_block = ext4_idx_pblock(ix);
776         }
777         return err;
778 }
779
780 /*
781  * ext4_ext_correct_indexes:
782  * if leaf gets modified and modified extent is first in the leaf,
783  * then we have to correct all indexes above.
784  */
785 static int ext4_ext_correct_indexes(struct ext4_inode_ref *inode_ref,
786                                     struct ext4_extent_path *path)
787 {
788         struct ext4_extent_header *eh;
789         int32_t depth = ext_depth(inode_ref->inode);
790         struct ext4_extent *ex;
791         uint32_t border;
792         int32_t k;
793         int err = EOK;
794
795         eh = path[depth].header;
796         ex = path[depth].extent;
797
798         if (ex == NULL || eh == NULL) {
799                 return EIO;
800         }
801
802         if (depth == 0) {
803                 /* there is no tree at all */
804                 return EOK;
805         }
806
807         if (ex != EXT_FIRST_EXTENT(eh)) {
808                 /* we correct tree if first leaf got modified only */
809                 return EOK;
810         }
811
812         /*
813          * TODO: we need correction if border is smaller than current one
814          */
815         k = depth - 1;
816         border = path[depth].extent->first_block;
817         path[k].index->first_block = border;
818         err = ext4_ext_dirty(inode_ref, path + k);
819         if (err != EOK)
820                 return err;
821
822         while (k--) {
823                 /* change all left-side indexes */
824                 if (path[k + 1].index != EXT_FIRST_INDEX(path[k + 1].header))
825                         break;
826                 path[k].index->first_block = border;
827                 err = ext4_ext_dirty(inode_ref, path + k);
828                 if (err != EOK)
829                         break;
830         }
831
832         return err;
833 }
834
835 static bool ext4_ext_can_prepend(struct ext4_extent *ex1,
836                                  struct ext4_extent *ex2)
837 {
838         if (ext4_ext_pblock(ex2) + ext4_ext_get_actual_len(ex2) !=
839             ext4_ext_pblock(ex1))
840                 return false;
841
842         if (ext4_ext_is_unwritten(ex1)) {
843                 if (ext4_ext_get_actual_len(ex1) +
844                         ext4_ext_get_actual_len(ex2) >
845                     EXT_UNWRITTEN_MAX_LEN)
846                         return false;
847         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
848                    EXT_INIT_MAX_LEN)
849                 return false;
850
851         if (to_le32(ex2->first_block) + ext4_ext_get_actual_len(ex2) !=
852             to_le32(ex1->first_block))
853                 return false;
854
855         return true;
856 }
857
858 static bool ext4_ext_can_append(struct ext4_extent *ex1,
859                                 struct ext4_extent *ex2)
860 {
861         if (ext4_ext_pblock(ex1) + ext4_ext_get_actual_len(ex1) !=
862             ext4_ext_pblock(ex2))
863                 return false;
864
865         if (ext4_ext_is_unwritten(ex1)) {
866                 if (ext4_ext_get_actual_len(ex1) +
867                         ext4_ext_get_actual_len(ex2) >
868                     EXT_UNWRITTEN_MAX_LEN)
869                         return false;
870         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
871                    EXT_INIT_MAX_LEN)
872                 return false;
873
874         if (to_le32(ex1->first_block) + ext4_ext_get_actual_len(ex1) !=
875             to_le32(ex2->first_block))
876                 return false;
877
878         return true;
879 }
880
881 static int ext4_ext_insert_leaf(struct ext4_inode_ref *inode_ref,
882                                 struct ext4_extent_path *path, int32_t at,
883                                 struct ext4_extent *newext,
884                                 struct ext_split_trans *spt, uint32_t flags)
885 {
886         struct ext4_extent_path *curp = path + at;
887         struct ext4_extent *ex = curp->extent;
888         struct ext4_block bh = EXT4_BLOCK_ZERO();
889         int32_t len;
890         int err = EOK;
891         int unwritten;
892         struct ext4_extent_header *eh = NULL;
893
894         if (curp->extent &&
895             to_le32(newext->first_block) == to_le32(curp->extent->first_block))
896                 return EIO;
897
898         if (!(flags & EXT4_EXT_NO_COMBINE)) {
899                 if (curp->extent && ext4_ext_can_append(curp->extent, newext)) {
900                         unwritten = ext4_ext_is_unwritten(curp->extent);
901                         curp->extent->block_count =
902                             to_le16(ext4_ext_get_actual_len(curp->extent) +
903                                     ext4_ext_get_actual_len(newext));
904                         if (unwritten)
905                                 ext4_ext_mark_unwritten(curp->extent);
906                         err = ext4_ext_dirty(inode_ref, curp);
907                         goto out;
908                 }
909
910                 if (curp->extent &&
911                     ext4_ext_can_prepend(curp->extent, newext)) {
912                         unwritten = ext4_ext_is_unwritten(curp->extent);
913                         curp->extent->first_block = newext->first_block;
914                         curp->extent->block_count =
915                             to_le16(ext4_ext_get_actual_len(curp->extent) +
916                                     ext4_ext_get_actual_len(newext));
917                         if (unwritten)
918                                 ext4_ext_mark_unwritten(curp->extent);
919                         err = ext4_ext_dirty(inode_ref, curp);
920                         goto out;
921                 }
922         }
923
924         if (to_le16(curp->header->entries_count) ==
925             to_le16(curp->header->max_entries_count)) {
926                 if (at) {
927                         struct ext4_extent_header *neh;
928                         err = ext4_ext_split_node(inode_ref, path, at, newext,
929                                                   &spt->ptr, &bh);
930                         if (err != EOK)
931                                 goto out;
932
933                         neh = ext_block_hdr(&bh);
934                         if (to_le32(newext->first_block) >
935                             to_le32(curp->extent->first_block)) {
936                                 if (to_le16(neh->entries_count) >
937                                     to_le16(curp->header->entries_count)) {
938                                         eh = curp->header;
939                                         /* insert after */
940                                         ex = EXT_LAST_EXTENT(eh) + 1;
941                                 } else {
942                                         eh = neh;
943                                         ex = EXT_FIRST_EXTENT(eh);
944                                 }
945                         } else {
946                                 eh = curp->header;
947                                 /* insert before */
948                                 ex = EXT_LAST_EXTENT(eh);
949                         }
950                 } else {
951                         err = EXT_INODE_HDR_NEED_GROW;
952                         goto out;
953                 }
954         } else {
955                 eh = curp->header;
956                 if (curp->extent == NULL) {
957                         ex = EXT_FIRST_EXTENT(eh);
958                         curp->extent = ex;
959                 } else if (to_le32(newext->first_block) >
960                            to_le32(curp->extent->first_block)) {
961                         /* insert after */
962                         ex = curp->extent + 1;
963                 } else {
964                         /* insert before */
965                         ex = curp->extent;
966                 }
967         }
968
969         len = EXT_LAST_EXTENT(eh) - ex + 1;
970         ext4_assert(len >= 0);
971         if (len > 0)
972                 memmove(ex + 1, ex, len * sizeof(struct ext4_extent));
973
974         if (ex > EXT_MAX_EXTENT(eh)) {
975                 err = EIO;
976                 goto out;
977         }
978
979         ex->first_block = newext->first_block;
980         ex->block_count = newext->block_count;
981         ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
982         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
983
984         if (ex > EXT_LAST_EXTENT(eh)) {
985                 err = EIO;
986                 goto out;
987         }
988
989         if (eh == curp->header) {
990                 err = ext4_ext_correct_indexes(inode_ref, path);
991                 if (err != EOK)
992                         goto out;
993                 err = ext4_ext_dirty(inode_ref, curp);
994         } else
995                 err = EOK;
996
997 out:
998         if (err != EOK) {
999                 if (bh.lb_id)
1000                         ext4_block_set(inode_ref->fs->bdev, &bh);
1001
1002                 spt->ptr = 0;
1003         } else if (bh.lb_id) {
1004                 /* If we got a sibling leaf. */
1005                 bh.dirty = true;
1006
1007                 spt->path.p_block = ext4_ext_pblock(ex);
1008                 spt->path.depth = to_le16(eh->depth);
1009                 spt->path.maxdepth = 0;
1010                 spt->path.extent = ex;
1011                 spt->path.index = NULL;
1012                 spt->path.header = eh;
1013                 spt->path.block = bh;
1014
1015                 /*
1016                  * If newext->ee_block can be included into the
1017                  * right sub-tree.
1018                  */
1019                 if (to_le32(newext->first_block) >=
1020                     ext4_ext_block_index(ext_block_hdr(&bh)))
1021                         spt->switch_to = 1;
1022                 else {
1023                         curp->extent = ex;
1024                         curp->p_block = ext4_ext_pblock(ex);
1025                 }
1026
1027         } else {
1028                 spt->ptr = 0;
1029                 curp->extent = ex;
1030                 curp->p_block = ext4_ext_pblock(ex);
1031         }
1032
1033         return err;
1034 }
1035
1036 /*
1037  * ext4_ext_grow_indepth:
1038  * implements tree growing procedure:
1039  * - allocates new block
1040  * - moves top-level data (index block or leaf) into the new block
1041  * - initializes new top-level, creating index that points to the
1042  *   just created block
1043  */
1044 static int ext4_ext_grow_indepth(struct ext4_inode_ref *inode_ref,
1045                                  uint32_t flags)
1046 {
1047         struct ext4_extent_header *neh;
1048         struct ext4_block bh = EXT4_BLOCK_ZERO();
1049         ext4_fsblk_t newblock, goal = 0;
1050         int err = EOK;
1051
1052         /* Try to prepend new index to old one */
1053         if (ext_depth(inode_ref->inode))
1054                 goal = ext4_idx_pblock(
1055                     EXT_FIRST_INDEX(ext_inode_hdr(inode_ref->inode)));
1056         else
1057                 goal = ext4_fs_inode_to_goal_block(inode_ref);
1058
1059         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, &err);
1060         if (newblock == 0)
1061                 return err;
1062
1063         /* # */
1064         err = ext4_block_get(inode_ref->fs->bdev, &bh, newblock);
1065         if (err != EOK) {
1066                 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
1067                 return err;
1068         }
1069
1070         /* move top-level index/leaf into new block */
1071         memmove(bh.data, inode_ref->inode->blocks,
1072                 sizeof(inode_ref->inode->blocks));
1073
1074         /* set size of new block */
1075         neh = ext_block_hdr(&bh);
1076         /* old root could have indexes or leaves
1077          * so calculate e_max right way */
1078         if (ext_depth(inode_ref->inode))
1079                 neh->max_entries_count =
1080                     to_le16(ext4_ext_space_block_idx(inode_ref));
1081         else
1082                 neh->max_entries_count =
1083                     to_le16(ext4_ext_space_block(inode_ref));
1084
1085         neh->magic = to_le16(EXT4_EXTENT_MAGIC);
1086         ext4_extent_block_csum_set(inode_ref, neh);
1087
1088         /* Update top-level index: num,max,pointer */
1089         neh = ext_inode_hdr(inode_ref->inode);
1090         neh->entries_count = to_le16(1);
1091         ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1092         if (neh->depth == 0) {
1093                 /* Root extent block becomes index block */
1094                 neh->max_entries_count =
1095                     to_le16(ext4_ext_space_root_idx(inode_ref));
1096                 EXT_FIRST_INDEX(neh)
1097                     ->first_block = EXT_FIRST_EXTENT(neh)->first_block;
1098         }
1099         neh->depth = to_le16(to_le16(neh->depth) + 1);
1100
1101         bh.dirty = true;
1102         inode_ref->dirty = true;
1103         ext4_block_set(inode_ref->fs->bdev, &bh);
1104
1105         return err;
1106 }
1107
1108 __unused static void print_path(struct ext4_extent_path *path)
1109 {
1110         int32_t i = path->depth;
1111         while (i >= 0) {
1112
1113                 ptrdiff_t a =
1114                     (path->extent)
1115                         ? (path->extent - EXT_FIRST_EXTENT(path->header))
1116                         : 0;
1117                 ptrdiff_t b =
1118                     (path->index)
1119                         ? (path->index - EXT_FIRST_INDEX(path->header))
1120                         : 0;
1121
1122                 (void)a;
1123                 (void)b;
1124                 ext4_dbg(DEBUG_EXTENT,
1125                          "depth %" PRId32 ", p_block: %" PRIu64 ","
1126                          "p_ext offset: %td, p_idx offset: %td\n",
1127                          i, path->p_block, a, b);
1128                 i--;
1129                 path++;
1130         }
1131 }
1132
1133 static void ext4_ext_replace_path(struct ext4_inode_ref *inode_ref,
1134                                   struct ext4_extent_path *path,
1135                                   struct ext_split_trans *spt, int32_t depth,
1136                                   int32_t level)
1137 {
1138         int32_t i = depth - level;
1139
1140         ext4_ext_drop_refs(inode_ref, path + i, 1);
1141         path[i] = spt->path;
1142 }
1143
1144 static int ext4_ext_insert_extent(struct ext4_inode_ref *inode_ref,
1145                                   struct ext4_extent_path **ppath,
1146                                   struct ext4_extent *newext, uint32_t flags)
1147 {
1148         int32_t i, depth, level;
1149         int ret = EOK;
1150         ext4_fsblk_t ptr = 0;
1151         struct ext4_extent_path *path = *ppath;
1152         struct ext_split_trans *spt = NULL;
1153         struct ext_split_trans newblock;
1154
1155         memset(&newblock, 0, sizeof(newblock));
1156
1157         depth = ext_depth(inode_ref->inode);
1158         for (i = depth, level = 0; i >= 0; i--, level++)
1159                 if (EXT_HAS_FREE_INDEX(path + i))
1160                         break;
1161
1162         if (level) {
1163                 spt = calloc(1, sizeof(struct ext_split_trans) * (level));
1164                 if (!spt) {
1165                         ret = ENOMEM;
1166                         goto out;
1167                 }
1168         }
1169         i = 0;
1170 again:
1171         depth = ext_depth(inode_ref->inode);
1172
1173         do {
1174                 if (!i) {
1175                         ret = ext4_ext_insert_leaf(inode_ref, path, depth - i,
1176                                                    newext, &newblock, flags);
1177                 } else {
1178                         ret = ext4_ext_insert_index(
1179                             inode_ref, path, depth - i, newext,
1180                             ext4_ext_block_index(
1181                                 ext_block_hdr(&spt[i - 1].path.block)),
1182                             spt[i - 1].ptr, &newblock);
1183                 }
1184                 ptr = newblock.ptr;
1185
1186                 if (ret && ret != EXT_INODE_HDR_NEED_GROW)
1187                         goto out;
1188                 else if (spt && ptr && !ret) {
1189                         /* Prepare for the next iteration after splitting. */
1190                         spt[i] = newblock;
1191                 }
1192
1193                 i++;
1194         } while (ptr != 0 && i <= depth);
1195
1196         if (ret == EXT_INODE_HDR_NEED_GROW) {
1197                 ret = ext4_ext_grow_indepth(inode_ref, 0);
1198                 if (ret)
1199                         goto out;
1200                 ret = ext4_find_extent(inode_ref, to_le32(newext->first_block),
1201                                        ppath, 0);
1202                 if (ret)
1203                         goto out;
1204                 i = depth;
1205                 path = *ppath;
1206                 goto again;
1207         }
1208 out:
1209         if (ret) {
1210                 if (path)
1211                         ext4_ext_drop_refs(inode_ref, path, 0);
1212
1213                 while (--level >= 0 && spt) {
1214                         if (spt[level].ptr) {
1215                                 ext4_ext_free_blocks(inode_ref, spt[level].ptr,
1216                                                      1, 0);
1217                                 ext4_ext_drop_refs(inode_ref, &spt[level].path,
1218                                                    1);
1219                         }
1220                 }
1221         } else {
1222                 while (--level >= 0 && spt) {
1223                         if (spt[level].switch_to)
1224                                 ext4_ext_replace_path(inode_ref, path, spt,
1225                                                       depth, level);
1226                         else if (spt[level].ptr)
1227                                 ext4_ext_drop_refs(inode_ref, &spt[level].path,
1228                                                    1);
1229                 }
1230         }
1231         if (spt)
1232                 free(spt);
1233
1234         return ret;
1235 }
1236
1237 static void ext4_ext_remove_blocks(struct ext4_inode_ref *inode_ref,
1238                                    struct ext4_extent *ex, ext4_lblk_t from,
1239                                    ext4_lblk_t to)
1240 {
1241         ext4_lblk_t len = to - from + 1;
1242         ext4_lblk_t num;
1243         ext4_fsblk_t start;
1244         num = from - to_le32(ex->first_block);
1245         start = ext4_ext_pblock(ex) + num;
1246         ext4_dbg(DEBUG_EXTENT,
1247                  "Freeing %" PRIu32 " at %" PRIu64 ", %" PRIu32 "\n", from,
1248                  start, len);
1249
1250         ext4_ext_free_blocks(inode_ref, start, len, 0);
1251 }
1252
1253 static int ext4_ext_remove_idx(struct ext4_inode_ref *inode_ref,
1254                                struct ext4_extent_path *path, int32_t depth)
1255 {
1256         int err = EOK;
1257         int32_t i = depth;
1258         ext4_fsblk_t leaf;
1259
1260         /* free index block */
1261         leaf = ext4_idx_pblock(path[i].index);
1262
1263         if (path[i].index != EXT_LAST_INDEX(path[i].header)) {
1264                 ptrdiff_t len = EXT_LAST_INDEX(path[i].header) - path[i].index;
1265                 memmove(path[i].index, path[i].index + 1,
1266                         len * sizeof(struct ext4_extent_index));
1267         }
1268
1269         path[i].header->entries_count =
1270             to_le16(to_le16(path[i].header->entries_count) - 1);
1271         err = ext4_ext_dirty(inode_ref, path + i);
1272         if (err != EOK)
1273                 return err;
1274
1275         ext4_dbg(DEBUG_EXTENT, "IDX: Freeing %" PRIu32 " at %" PRIu64 ", %d\n",
1276                  to_le32(path[i].index->first_block), leaf, 1);
1277         ext4_ext_free_blocks(inode_ref, leaf, 1, 0);
1278
1279         while (i > 0) {
1280                 if (path[i].index != EXT_FIRST_INDEX(path[i].header))
1281                         break;
1282
1283                 path[i - 1].index->first_block = path[i].index->first_block;
1284                 err = ext4_ext_dirty(inode_ref, path + i - 1);
1285                 if (err != EOK)
1286                         break;
1287
1288                 i--;
1289         }
1290         return err;
1291 }
1292
1293 static int ext4_ext_remove_leaf(struct ext4_inode_ref *inode_ref,
1294                                 struct ext4_extent_path *path, ext4_lblk_t from,
1295                                 ext4_lblk_t to)
1296 {
1297
1298         int32_t depth = ext_depth(inode_ref->inode);
1299         struct ext4_extent *ex = path[depth].extent;
1300         struct ext4_extent *start_ex, *ex2 = NULL;
1301         struct ext4_extent_header *eh = path[depth].header;
1302         int32_t len;
1303         int err = EOK;
1304         uint16_t new_entries;
1305
1306         start_ex = ex;
1307         new_entries = to_le16(eh->entries_count);
1308         while (ex <= EXT_LAST_EXTENT(path[depth].header) &&
1309                to_le32(ex->first_block) <= to) {
1310                 int32_t new_len = 0;
1311                 int unwritten;
1312                 ext4_fsblk_t start, new_start;
1313                 new_start = start = to_le32(ex->first_block);
1314                 len = ext4_ext_get_actual_len(ex);
1315                 if (start < from) {
1316                         start = from;
1317                         len -= from - start;
1318                         new_len = from - start;
1319                         start_ex++;
1320                 }
1321                 if (start + len - 1 > to) {
1322                         len -= start + len - 1 - to;
1323                         new_len = start + len - 1 - to;
1324                         new_start += to + 1;
1325                         ex2 = ex;
1326                 }
1327
1328                 ext4_ext_remove_blocks(inode_ref, ex, start, start + len - 1);
1329                 ex->first_block = to_le32(new_start);
1330                 if (!new_len)
1331                         new_entries--;
1332                 else {
1333                         unwritten = ext4_ext_is_unwritten(ex);
1334                         ex->block_count = to_le16(new_len);
1335                         if (unwritten)
1336                                 ext4_ext_mark_unwritten(ex);
1337                 }
1338
1339                 ex += 1;
1340         }
1341
1342         if (ex2 == NULL)
1343                 ex2 = ex;
1344
1345         if (ex2 <= EXT_LAST_EXTENT(eh))
1346                 memmove(start_ex, ex2, EXT_LAST_EXTENT(eh) - ex2 + 1);
1347
1348         eh->entries_count = to_le16(new_entries);
1349         ext4_ext_dirty(inode_ref, path + depth);
1350         if (path[depth].extent == EXT_FIRST_EXTENT(eh) && eh->entries_count)
1351                 err = ext4_ext_correct_indexes(inode_ref, path);
1352
1353         /* if this leaf is free, then we should
1354          * remove it from index block above */
1355         if (err == EOK && eh->entries_count == 0 && path[depth].block.lb_id)
1356                 err = ext4_ext_remove_idx(inode_ref, path, depth - 1);
1357
1358         return err;
1359 }
1360
1361 static int ext4_ext_more_to_rm(struct ext4_extent_path *path, ext4_lblk_t to)
1362 {
1363         if (!to_le16(path->header->entries_count))
1364                 return 0;
1365
1366         if (path->index > EXT_LAST_INDEX(path->header))
1367                 return 0;
1368
1369         if (to_le32(path->index->first_block) > to)
1370                 return 0;
1371
1372         return 1;
1373 }
1374
1375 int ext4_extent_remove_space(struct ext4_inode_ref *inode_ref, ext4_lblk_t from,
1376                           ext4_lblk_t to)
1377 {
1378         struct ext4_extent_path *path = NULL;
1379         int ret = EOK;
1380         int32_t depth = ext_depth(inode_ref->inode);
1381         int32_t i;
1382
1383         ret = ext4_find_extent(inode_ref, from, &path, 0);
1384         if (ret)
1385                 goto out;
1386
1387         if (!path[depth].extent ||
1388             !IN_RANGE(from, to_le32(path[depth].extent->first_block),
1389                       ext4_ext_get_actual_len(path[depth].extent))) {
1390                 ret = EOK;
1391                 goto out;
1392         }
1393
1394         i = depth;
1395         while (i >= 0) {
1396                 if (i == depth) {
1397                         struct ext4_extent_header *eh;
1398                         struct ext4_extent *first_ex, *last_ex;
1399                         ext4_lblk_t leaf_from, leaf_to;
1400                         eh = path[i].header;
1401                         ext4_assert(to_le16(eh->entries_count) > 0);
1402                         first_ex = EXT_FIRST_EXTENT(eh);
1403                         last_ex = EXT_LAST_EXTENT(eh);
1404                         leaf_from = to_le32(first_ex->first_block);
1405                         leaf_to = to_le32(last_ex->first_block) +
1406                                   ext4_ext_get_actual_len(last_ex) - 1;
1407                         if (leaf_from < from)
1408                                 leaf_from = from;
1409
1410                         if (leaf_to > to)
1411                                 leaf_to = to;
1412
1413                         ext4_ext_remove_leaf(inode_ref, path, leaf_from,
1414                                              leaf_to);
1415                         ext4_ext_drop_refs(inode_ref, path + i, 0);
1416                         i--;
1417                         continue;
1418                 } else {
1419                         struct ext4_extent_header *eh;
1420                         eh = path[i].header;
1421                         if (ext4_ext_more_to_rm(path + i, to)) {
1422                                 struct ext4_block bh = EXT4_BLOCK_ZERO();
1423                                 if (path[i + 1].block.lb_id)
1424                                         ext4_ext_drop_refs(inode_ref,
1425                                                            path + i + 1, 0);
1426
1427                                 ret = read_extent_tree_block(
1428                                     inode_ref, ext4_idx_pblock(path[i].index),
1429                                     depth - i - 1, &bh, 0);
1430                                 if (ret)
1431                                         goto out;
1432
1433                                 path[i].p_block =
1434                                     ext4_idx_pblock(path[i].index);
1435                                 path[i + 1].block = bh;
1436                                 path[i + 1].header = ext_block_hdr(&bh);
1437                                 path[i + 1].depth = depth - i - 1;
1438                                 if (i + 1 == depth)
1439                                         path[i + 1].extent = EXT_FIRST_EXTENT(
1440                                             path[i + 1].header);
1441                                 else
1442                                         path[i + 1].index =
1443                                             EXT_FIRST_INDEX(path[i + 1].header);
1444
1445                                 i++;
1446                         } else {
1447                                 if (!eh->entries_count && i > 0) {
1448
1449                                         ret = ext4_ext_remove_idx(inode_ref,
1450                                                                   path, i - 1);
1451                                 }
1452                                 if (i) {
1453                                         ext4_block_set(inode_ref->fs->bdev,
1454                                                        &path[i].block);
1455                                 }
1456                                 i--;
1457                         }
1458                 }
1459         }
1460
1461         /* TODO: flexible tree reduction should be here */
1462         if (path->header->entries_count == 0) {
1463                 /*
1464                  * truncate to zero freed all the tree,
1465                  * so we need to correct eh_depth
1466                  */
1467                 ext_inode_hdr(inode_ref->inode)->depth = 0;
1468                 ext_inode_hdr(inode_ref->inode)->max_entries_count =
1469                     to_le16(ext4_ext_space_root(inode_ref));
1470                 ret = ext4_ext_dirty(inode_ref, path);
1471         }
1472
1473 out:
1474         ext4_ext_drop_refs(inode_ref, path, 0);
1475         free(path);
1476         path = NULL;
1477         return ret;
1478 }
1479
1480 static int ext4_ext_split_extent_at(struct ext4_inode_ref *inode_ref,
1481                                     struct ext4_extent_path **ppath,
1482                                     ext4_lblk_t split, uint32_t split_flag)
1483 {
1484         struct ext4_extent *ex, newex;
1485         ext4_fsblk_t newblock;
1486         ext4_lblk_t ee_block;
1487         int32_t ee_len;
1488         int32_t depth = ext_depth(inode_ref->inode);
1489         int err = EOK;
1490
1491         ex = (*ppath)[depth].extent;
1492         ee_block = to_le32(ex->first_block);
1493         ee_len = ext4_ext_get_actual_len(ex);
1494         newblock = split - ee_block + ext4_ext_pblock(ex);
1495
1496         if (split == ee_block) {
1497                 /*
1498                  * case b: block @split is the block that the extent begins with
1499                  * then we just change the state of the extent, and splitting
1500                  * is not needed.
1501                  */
1502                 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1503                         ext4_ext_mark_unwritten(ex);
1504                 else
1505                         ext4_ext_mark_initialized(ex);
1506
1507                 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1508                 goto out;
1509         }
1510
1511         ex->block_count = to_le16(split - ee_block);
1512         if (split_flag & EXT4_EXT_MARK_UNWRIT1)
1513                 ext4_ext_mark_unwritten(ex);
1514
1515         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1516         if (err != EOK)
1517                 goto out;
1518
1519         newex.first_block = to_le32(split);
1520         newex.block_count = to_le16(ee_len - (split - ee_block));
1521         ext4_ext_store_pblock(&newex, newblock);
1522         if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1523                 ext4_ext_mark_unwritten(&newex);
1524         err = ext4_ext_insert_extent(inode_ref, ppath, &newex,
1525                                      EXT4_EXT_NO_COMBINE);
1526         if (err != EOK)
1527                 goto restore_extent_len;
1528
1529 out:
1530         return err;
1531 restore_extent_len:
1532         ex->block_count = to_le16(ee_len);
1533         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1534         return err;
1535 }
1536
1537 static int ext4_ext_convert_to_initialized(struct ext4_inode_ref *inode_ref,
1538                                            struct ext4_extent_path **ppath,
1539                                            ext4_lblk_t split, uint32_t blocks)
1540 {
1541         int32_t depth = ext_depth(inode_ref->inode), err = EOK;
1542         struct ext4_extent *ex = (*ppath)[depth].extent;
1543
1544         ext4_assert(to_le32(ex->first_block) <= split);
1545
1546         if (split + blocks ==
1547             to_le32(ex->first_block) + ext4_ext_get_actual_len(ex)) {
1548                 /* split and initialize right part */
1549                 err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1550                                                EXT4_EXT_MARK_UNWRIT1);
1551         } else if (to_le32(ex->first_block) == split) {
1552                 /* split and initialize left part */
1553                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1554                                                EXT4_EXT_MARK_UNWRIT2);
1555         } else {
1556                 /* split 1 extent to 3 and initialize the 2nd */
1557                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1558                                                EXT4_EXT_MARK_UNWRIT1 |
1559                                                    EXT4_EXT_MARK_UNWRIT2);
1560                 if (!err) {
1561                         err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1562                                                        EXT4_EXT_MARK_UNWRIT1);
1563                 }
1564         }
1565
1566         return err;
1567 }
1568
1569 /*
1570  * ext4_ext_next_allocated_block:
1571  * returns allocated block in subsequent extent or EXT_MAX_BLOCKS.
1572  * NOTE: it considers block number from index entry as
1573  * allocated block. Thus, index entries have to be consistent
1574  * with leaves.
1575  */
1576 #define EXT_MAX_BLOCKS (ext4_lblk_t) - 1
1577
1578 static ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_extent_path *path)
1579 {
1580         int32_t depth;
1581
1582         depth = path->depth;
1583
1584         if (depth == 0 && path->extent == NULL)
1585                 return EXT_MAX_BLOCKS;
1586
1587         while (depth >= 0) {
1588                 if (depth == path->depth) {
1589                         /* leaf */
1590                         if (path[depth].extent &&
1591                             path[depth].extent !=
1592                                 EXT_LAST_EXTENT(path[depth].header))
1593                                 return to_le32(
1594                                     path[depth].extent[1].first_block);
1595                 } else {
1596                         /* index */
1597                         if (path[depth].index !=
1598                             EXT_LAST_INDEX(path[depth].header))
1599                                 return to_le32(
1600                                     path[depth].index[1].first_block);
1601                 }
1602                 depth--;
1603         }
1604
1605         return EXT_MAX_BLOCKS;
1606 }
1607
1608 static int ext4_ext_zero_unwritten_range(struct ext4_inode_ref *inode_ref,
1609                                          ext4_fsblk_t block,
1610                                          uint32_t blocks_count)
1611 {
1612         int err = EOK;
1613         uint32_t i;
1614         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
1615         for (i = 0; i < blocks_count; i++) {
1616                 uint32_t block_u32 = (uint32_t)block + (uint32_t)i;
1617                 struct ext4_block bh = EXT4_BLOCK_ZERO();
1618                 err = ext4_block_get(inode_ref->fs->bdev, &bh, block_u32);
1619                 if (err != EOK)
1620                         break;
1621
1622                 memset(bh.data, 0, block_size);
1623                 bh.dirty = true;
1624                 err = ext4_block_set(inode_ref->fs->bdev, &bh);
1625                 if (err != EOK)
1626                         break;
1627         }
1628         return err;
1629 }
1630
1631 int ext4_extent_get_blocks(struct ext4_inode_ref *inode_ref, ext4_fsblk_t iblock,
1632                         uint32_t max_blocks, ext4_fsblk_t *result, bool create,
1633                         uint32_t *blocks_count)
1634 {
1635         struct ext4_extent_path *path = NULL;
1636         struct ext4_extent newex, *ex;
1637         ext4_fsblk_t goal;
1638         int err = EOK;
1639         int32_t depth;
1640         uint32_t allocated = 0;
1641         ext4_fsblk_t next, newblock;
1642
1643         if (result)
1644                 *result = 0;
1645
1646         if (blocks_count)
1647                 *blocks_count = 0;
1648
1649         /* find extent for this block */
1650         err = ext4_find_extent(inode_ref, iblock, &path, 0);
1651         if (err != EOK) {
1652                 path = NULL;
1653                 goto out2;
1654         }
1655
1656         depth = ext_depth(inode_ref->inode);
1657
1658         /*
1659          * consistent leaf must not be empty
1660          * this situations is possible, though, _during_ tree modification
1661          * this is why assert can't be put in ext4_ext_find_extent()
1662          */
1663         if ((ex = path[depth].extent)) {
1664                 ext4_lblk_t ee_block = to_le32(ex->first_block);
1665                 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
1666                 uint16_t ee_len = ext4_ext_get_actual_len(ex);
1667                 /* if found exent covers block, simple return it */
1668                 if (IN_RANGE(iblock, ee_block, ee_len)) {
1669                         /* number of remain blocks in the extent */
1670                         allocated = ee_len - (iblock - ee_block);
1671
1672                         if (ext4_ext_is_unwritten(ex)) {
1673                                 if (create) {
1674                                         uint32_t zero_range;
1675                                         zero_range = allocated;
1676                                         if (zero_range > max_blocks)
1677                                                 zero_range = max_blocks;
1678
1679                                         newblock = iblock - ee_block + ee_start;
1680                                         err = ext4_ext_zero_unwritten_range(
1681                                             inode_ref, newblock, zero_range);
1682                                         if (err != EOK)
1683                                                 goto out2;
1684
1685                                         err = ext4_ext_convert_to_initialized(
1686                                             inode_ref, &path, iblock,
1687                                             zero_range);
1688                                         if (err != EOK)
1689                                                 goto out2;
1690
1691                                 } else {
1692                                         newblock = 0;
1693                                 }
1694                         } else {
1695                                 newblock = iblock - ee_block + ee_start;
1696                         }
1697                         goto out;
1698                 }
1699         }
1700
1701         /*
1702          * requested block isn't allocated yet
1703          * we couldn't try to create block if create flag is zero
1704          */
1705         if (!create) {
1706                 goto out2;
1707         }
1708
1709         /* find next allocated block so that we know how many
1710          * blocks we can allocate without ovelapping next extent */
1711         next = ext4_ext_next_allocated_block(path);
1712         allocated = next - iblock;
1713         if (allocated > max_blocks)
1714                 allocated = max_blocks;
1715
1716         /* allocate new block */
1717         goal = ext4_ext_find_goal(inode_ref, path, iblock);
1718         newblock = ext4_new_meta_blocks(inode_ref, goal, 0, &allocated, &err);
1719         if (!newblock)
1720                 goto out2;
1721
1722         /* try to insert new extent into found leaf and return */
1723         newex.first_block = to_le32(iblock);
1724         ext4_ext_store_pblock(&newex, newblock);
1725         newex.block_count = to_le16(allocated);
1726         err = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1727         if (err != EOK) {
1728                 /* free data blocks we just allocated */
1729                 ext4_ext_free_blocks(inode_ref, ext4_ext_pblock(&newex),
1730                                      to_le16(newex.block_count), 0);
1731                 goto out2;
1732         }
1733
1734         /* previous routine could use block we allocated */
1735         newblock = ext4_ext_pblock(&newex);
1736
1737 out:
1738         if (allocated > max_blocks)
1739                 allocated = max_blocks;
1740
1741         if (result)
1742                 *result = newblock;
1743
1744         if (blocks_count)
1745                 *blocks_count = allocated;
1746
1747 out2:
1748         if (path) {
1749                 ext4_ext_drop_refs(inode_ref, path, 0);
1750                 free(path);
1751         }
1752
1753         return err;
1754 }
1755 #endif