Fix indentation level
[lwext4.git] / lwext4 / ext4_extent_full.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * - Redistributions of source code must retain the above copyright
10  *   notice, this list of conditions and the following disclaimer.
11  * - Redistributions in binary form must reproduce the above copyright
12  *   notice, this list of conditions and the following disclaimer in the
13  *   documentation and/or other materials provided with the distribution.
14  * - The name of the author may not be used to endorse or promote products
15  *   derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #include "ext4_config.h"
30 #include "ext4_blockdev.h"
31 #include "ext4_fs.h"
32 #include "ext4_super.h"
33 #include "ext4_balloc.h"
34 #include "ext4_debug.h"
35
36 #include <stdlib.h>
37 #include <string.h>
38 #include <inttypes.h>
39 #include <stddef.h>
40
41 #include "ext4_extent.h"
42
43 #if CONFIG_EXTENT_FULL
44
45 /*
46  * used by extent splitting.
47  */
48 #define EXT4_EXT_MARK_UNWRIT1 0x02 /* mark first half unwritten */
49 #define EXT4_EXT_MARK_UNWRIT2 0x04 /* mark second half unwritten */
50 #define EXT4_EXT_DATA_VALID1 0x08  /* first half contains valid data */
51 #define EXT4_EXT_DATA_VALID2 0x10  /* second half contains valid data */
52 #define EXT4_EXT_NO_COMBINE 0x20   /* do not combine two extents */
53
54 static struct ext4_extent_tail *
55 find_ext4_extent_tail(struct ext4_extent_header *eh)
56 {
57         return (struct ext4_extent_tail *)(((char *)eh) +
58                                            EXT4_EXTENT_TAIL_OFFSET(eh));
59 }
60
61 static struct ext4_extent_header *ext_inode_hdr(struct ext4_inode *inode)
62 {
63         return (struct ext4_extent_header *)inode->blocks;
64 }
65
66 static struct ext4_extent_header *ext_block_hdr(struct ext4_block *block)
67 {
68         return (struct ext4_extent_header *)block->data;
69 }
70
71 static uint16_t ext_depth(struct ext4_inode *inode)
72 {
73         return to_le16(ext_inode_hdr(inode)->depth);
74 }
75
76 static uint16_t ext4_ext_get_actual_len(struct ext4_extent *ext)
77 {
78         return (to_le16(ext->block_count) <= EXT_INIT_MAX_LEN
79                     ? to_le16(ext->block_count)
80                     : (to_le16(ext->block_count) - EXT_INIT_MAX_LEN));
81 }
82
83 static void ext4_ext_mark_initialized(struct ext4_extent *ext)
84 {
85         ext->block_count = to_le16(ext4_ext_get_actual_len(ext));
86 }
87
88 static void ext4_ext_mark_unwritten(struct ext4_extent *ext)
89 {
90         ext->block_count |= to_le16(EXT_INIT_MAX_LEN);
91 }
92
93 static int ext4_ext_is_unwritten(struct ext4_extent *ext)
94 {
95         /* Extent with ee_len of 0x8000 is treated as an initialized extent */
96         return (to_le16(ext->block_count) > EXT_INIT_MAX_LEN);
97 }
98
99 /*
100  * ext4_ext_pblock:
101  * combine low and high parts of physical block number into ext4_fsblk_t
102  */
103 static ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
104 {
105         ext4_fsblk_t block;
106
107         block = to_le32(ex->start_lo);
108         block |= ((ext4_fsblk_t)to_le16(ex->start_hi) << 31) << 1;
109         return block;
110 }
111
112 /*
113  * ext4_idx_pblock:
114  * combine low and high parts of a leaf physical block number into ext4_fsblk_t
115  */
116 static ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_index *ix)
117 {
118         ext4_fsblk_t block;
119
120         block = to_le32(ix->leaf_lo);
121         block |= ((ext4_fsblk_t)to_le16(ix->leaf_hi) << 31) << 1;
122         return block;
123 }
124
125 /*
126  * ext4_ext_store_pblock:
127  * stores a large physical block number into an extent struct,
128  * breaking it into parts
129  */
130 static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
131 {
132         ex->start_lo = to_le32((unsigned long)(pb & 0xffffffff));
133         ex->start_hi = to_le16((unsigned long)((pb >> 31) >> 1) & 0xffff);
134 }
135
136 /*
137  * ext4_idx_store_pblock:
138  * stores a large physical block number into an index struct,
139  * breaking it into parts
140  */
141 static void ext4_idx_store_pblock(struct ext4_extent_index *ix, ext4_fsblk_t pb)
142 {
143         ix->leaf_lo = to_le32((unsigned long)(pb & 0xffffffff));
144         ix->leaf_hi = to_le16((unsigned long)((pb >> 31) >> 1) & 0xffff);
145 }
146
147 static int ext4_allocate_single_block(struct ext4_inode_ref *inode_ref,
148                                       ext4_fsblk_t goal,
149                                       ext4_fsblk_t *blockp)
150 {
151         return ext4_balloc_alloc_block(inode_ref, goal, blockp);
152 }
153
154 static ext4_fsblk_t ext4_new_meta_blocks(struct ext4_inode_ref *inode_ref,
155                                          ext4_fsblk_t goal,
156                                          uint32_t flags __unused,
157                                          uint32_t *count, int *errp)
158 {
159         ext4_fsblk_t block = 0;
160
161         *errp = ext4_allocate_single_block(inode_ref, goal, &block);
162         if (count)
163                 *count = 1;
164         return block;
165 }
166
167 static void ext4_ext_free_blocks(struct ext4_inode_ref *inode_ref,
168                                  ext4_fsblk_t block, uint32_t count,
169                                  uint32_t flags __unused)
170 {
171         ext4_balloc_free_blocks(inode_ref, block, count);
172 }
173
174 static size_t ext4_ext_space_block(struct ext4_inode_ref *inode_ref)
175 {
176         size_t size;
177         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
178
179         size = (block_size - sizeof(struct ext4_extent_header)) /
180                sizeof(struct ext4_extent);
181         return size;
182 }
183
184 static size_t ext4_ext_space_block_idx(struct ext4_inode_ref *inode_ref)
185 {
186         size_t size;
187         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
188
189         size = (block_size - sizeof(struct ext4_extent_header)) /
190                sizeof(struct ext4_extent_index);
191         return size;
192 }
193
194 static size_t ext4_ext_space_root(struct ext4_inode_ref *inode_ref)
195 {
196         size_t size;
197
198         size = sizeof(inode_ref->inode->blocks);
199         size -= sizeof(struct ext4_extent_header);
200         size /= sizeof(struct ext4_extent);
201         return size;
202 }
203
204 static size_t ext4_ext_space_root_idx(struct ext4_inode_ref *inode_ref)
205 {
206         size_t size;
207
208         size = sizeof(inode_ref->inode->blocks);
209         size -= sizeof(struct ext4_extent_header);
210         size /= sizeof(struct ext4_extent_index);
211         return size;
212 }
213
214 static size_t ext4_ext_max_entries(struct ext4_inode_ref *inode_ref,
215                                    uint32_t depth)
216 {
217         size_t max;
218
219         if (depth == ext_depth(inode_ref->inode)) {
220                 if (depth == 0)
221                         max = ext4_ext_space_root(inode_ref);
222                 else
223                         max = ext4_ext_space_root_idx(inode_ref);
224         } else {
225                 if (depth == 0)
226                         max = ext4_ext_space_block(inode_ref);
227                 else
228                         max = ext4_ext_space_block_idx(inode_ref);
229         }
230
231         return max;
232 }
233
234 static ext4_fsblk_t ext4_ext_find_goal(struct ext4_inode_ref *inode_ref,
235                                        struct ext4_extent_path *path,
236                                        ext4_lblk_t block)
237 {
238         if (path) {
239                 uint32_t depth = path->depth;
240                 struct ext4_extent *ex;
241
242                 /*
243                  * Try to predict block placement assuming that we are
244                  * filling in a file which will eventually be
245                  * non-sparse --- i.e., in the case of libbfd writing
246                  * an ELF object sections out-of-order but in a way
247                  * the eventually results in a contiguous object or
248                  * executable file, or some database extending a table
249                  * space file.  However, this is actually somewhat
250                  * non-ideal if we are writing a sparse file such as
251                  * qemu or KVM writing a raw image file that is going
252                  * to stay fairly sparse, since it will end up
253                  * fragmenting the file system's free space.  Maybe we
254                  * should have some hueristics or some way to allow
255                  * userspace to pass a hint to file system,
256                  * especially if the latter case turns out to be
257                  * common.
258                  */
259                 ex = path[depth].extent;
260                 if (ex) {
261                         ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
262                         ext4_lblk_t ext_block = to_le32(ex->first_block);
263
264                         if (block > ext_block)
265                                 return ext_pblk + (block - ext_block);
266                         else
267                                 return ext_pblk - (ext_block - block);
268                 }
269
270                 /* it looks like index is empty;
271                  * try to find starting block from index itself */
272                 if (path[depth].block.lb_id)
273                         return path[depth].block.lb_id;
274         }
275
276         /* OK. use inode's group */
277         return ext4_fs_inode_to_goal_block(inode_ref);
278 }
279
280 /*
281  * Allocation for a meta data block
282  */
283 static ext4_fsblk_t ext4_ext_new_meta_block(struct ext4_inode_ref *inode_ref,
284                                             struct ext4_extent_path *path,
285                                             struct ext4_extent *ex, int *err,
286                                             uint32_t flags)
287 {
288         ext4_fsblk_t goal, newblock;
289
290         goal = ext4_ext_find_goal(inode_ref, path, to_le32(ex->first_block));
291         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, err);
292         return newblock;
293 }
294
295 static int ext4_ext_dirty(struct ext4_inode_ref *inode_ref,
296                           struct ext4_extent_path *path)
297 {
298         if (path->block.lb_id)
299                 path->block.dirty = true;
300         else
301                 inode_ref->dirty = true;
302
303         return EOK;
304 }
305
306 static void ext4_ext_drop_refs(struct ext4_inode_ref *inode_ref,
307                                struct ext4_extent_path *path, bool keep_other)
308 {
309         int32_t depth, i;
310
311         if (!path)
312                 return;
313         if (keep_other)
314                 depth = 0;
315         else
316                 depth = path->depth;
317
318         for (i = 0; i <= depth; i++, path++) {
319                 if (path->block.lb_id) {
320                         ext4_block_set(inode_ref->fs->bdev, &path->block);
321                 }
322         }
323 }
324
325 /*
326  * Temporarily we don't need to support checksum.
327  */
328 static uint32_t ext4_ext_block_csum(struct ext4_inode_ref *inode_ref __unused,
329                                     struct ext4_extent_header *eh __unused)
330 {
331         /*TODO: should we add crc32 here ?*/
332         /*return ext4_crc32c(inode->i_csum, eh, EXT4_EXTENT_TAIL_OFFSET(eh));*/
333         return 0;
334 }
335
336 static void ext4_extent_block_csum_set(struct ext4_inode_ref *inode_ref,
337                                        struct ext4_extent_header *eh)
338 {
339         struct ext4_extent_tail *tail;
340
341         tail = find_ext4_extent_tail(eh);
342         tail->et_checksum = ext4_ext_block_csum(inode_ref, eh);
343 }
344
345 /*
346  * Check that whether the basic information inside the extent header
347  * is correct or not.
348  */
349 static int ext4_ext_check(struct ext4_inode_ref *inode_ref,
350                           struct ext4_extent_header *eh, uint16_t depth,
351                           ext4_fsblk_t pblk __unused)
352 {
353         struct ext4_extent_tail *tail;
354         const char *error_msg;
355         (void)error_msg;
356
357         if (to_le16(eh->magic) != EXT4_EXTENT_MAGIC) {
358                 error_msg = "invalid magic";
359                 goto corrupted;
360         }
361         if (to_le16(eh->depth) != depth) {
362                 error_msg = "unexpected eh_depth";
363                 goto corrupted;
364         }
365         if (eh->max_entries_count == 0) {
366                 error_msg = "invalid eh_max";
367                 goto corrupted;
368         }
369         if (to_le16(eh->entries_count) > to_le16(eh->max_entries_count)) {
370                 error_msg = "invalid eh_entries";
371                 goto corrupted;
372         }
373
374         tail = find_ext4_extent_tail(eh);
375         if (tail->et_checksum != ext4_ext_block_csum(inode_ref, eh)) {
376                 /* FIXME: Warning: extent checksum damaged? */
377         }
378
379         return EOK;
380
381 corrupted:
382         ext4_dbg(DEBUG_EXTENT, "Bad extents B+ tree block: %s. "
383                                "Blocknr: %" PRId64 "\n",
384                  error_msg, pblk);
385         return EIO;
386 }
387
388 static int read_extent_tree_block(struct ext4_inode_ref *inode_ref,
389                                   ext4_fsblk_t pblk, int32_t depth,
390                                   struct ext4_block *bh,
391                                   uint32_t flags __unused)
392 {
393         int err;
394
395         err = ext4_block_get(inode_ref->fs->bdev, bh, pblk);
396         if (err != EOK)
397                 goto errout;
398
399         err = ext4_ext_check(inode_ref, ext_block_hdr(bh), depth, pblk);
400         if (err != EOK)
401                 goto errout;
402
403         return EOK;
404 errout:
405         if (bh->lb_id)
406                 ext4_block_set(inode_ref->fs->bdev, bh);
407
408         return err;
409 }
410
411 /*
412  * ext4_ext_binsearch_idx:
413  * binary search for the closest index of the given block
414  * the header must be checked before calling this
415  */
416 static void ext4_ext_binsearch_idx(struct ext4_extent_path *path,
417                                    ext4_lblk_t block)
418 {
419         struct ext4_extent_header *eh = path->header;
420         struct ext4_extent_index *r, *l, *m;
421
422         l = EXT_FIRST_INDEX(eh) + 1;
423         r = EXT_LAST_INDEX(eh);
424         while (l <= r) {
425                 m = l + (r - l) / 2;
426                 if (block < to_le32(m->first_block))
427                         r = m - 1;
428                 else
429                         l = m + 1;
430         }
431
432         path->index = l - 1;
433 }
434
435 /*
436  * ext4_ext_binsearch:
437  * binary search for closest extent of the given block
438  * the header must be checked before calling this
439  */
440 static void ext4_ext_binsearch(struct ext4_extent_path *path, ext4_lblk_t block)
441 {
442         struct ext4_extent_header *eh = path->header;
443         struct ext4_extent *r, *l, *m;
444
445         if (eh->entries_count == 0) {
446                 /*
447                  * this leaf is empty:
448                  * we get such a leaf in split/add case
449                  */
450                 return;
451         }
452
453         l = EXT_FIRST_EXTENT(eh) + 1;
454         r = EXT_LAST_EXTENT(eh);
455
456         while (l <= r) {
457                 m = l + (r - l) / 2;
458                 if (block < to_le32(m->first_block))
459                         r = m - 1;
460                 else
461                         l = m + 1;
462         }
463
464         path->extent = l - 1;
465 }
466
467 #define EXT4_EXT_PATH_INC_DEPTH 1
468
469 static int ext4_find_extent(struct ext4_inode_ref *inode_ref, ext4_lblk_t block,
470                             struct ext4_extent_path **orig_path, uint32_t flags)
471 {
472         struct ext4_extent_header *eh;
473         struct ext4_block bh = EXT4_BLOCK_ZERO();
474         ext4_fsblk_t buf_block = 0;
475         struct ext4_extent_path *path = *orig_path;
476         int32_t depth, ppos = 0;
477         int32_t i;
478         int ret;
479
480         eh = ext_inode_hdr(inode_ref->inode);
481         depth = ext_depth(inode_ref->inode);
482
483         if (path) {
484                 ext4_ext_drop_refs(inode_ref, path, 0);
485                 if (depth > path[0].maxdepth) {
486                         free(path);
487                         *orig_path = path = NULL;
488                 }
489         }
490         if (!path) {
491                 int32_t path_depth = depth + EXT4_EXT_PATH_INC_DEPTH;
492                 /* account possible depth increase */
493                 path = calloc(1, sizeof(struct ext4_extent_path) *
494                                      (path_depth + 1));
495                 if (!path)
496                         return ENOMEM;
497                 path[0].maxdepth = path_depth;
498         }
499         path[0].header = eh;
500         path[0].block = bh;
501
502         i = depth;
503         /* walk through the tree */
504         while (i) {
505                 ext4_ext_binsearch_idx(path + ppos, block);
506                 path[ppos].p_block = ext4_idx_pblock(path[ppos].index);
507                 path[ppos].depth = i;
508                 path[ppos].extent = NULL;
509                 buf_block = path[ppos].p_block;
510
511                 i--;
512                 ppos++;
513                 if (!path[ppos].block.lb_id ||
514                     path[ppos].block.lb_id != buf_block) {
515                         ret = read_extent_tree_block(inode_ref, buf_block, i,
516                                                      &bh, flags);
517                         if (ret != EOK) {
518                                 goto err;
519                         }
520                         if (ppos > depth) {
521                                 ext4_block_set(inode_ref->fs->bdev, &bh);
522                                 ret = EIO;
523                                 goto err;
524                         }
525
526                         eh = ext_block_hdr(&bh);
527                         path[ppos].block = bh;
528                         path[ppos].header = eh;
529                 }
530         }
531
532         path[ppos].depth = i;
533         path[ppos].extent = NULL;
534         path[ppos].index = NULL;
535
536         /* find extent */
537         ext4_ext_binsearch(path + ppos, block);
538         /* if not an empty leaf */
539         if (path[ppos].extent)
540                 path[ppos].p_block = ext4_ext_pblock(path[ppos].extent);
541
542         *orig_path = path;
543
544         ret = EOK;
545         return ret;
546
547 err:
548         ext4_ext_drop_refs(inode_ref, path, 0);
549         free(path);
550         if (orig_path)
551                 *orig_path = NULL;
552         return ret;
553 }
554
555 static void ext4_ext_init_header(struct ext4_inode_ref *inode_ref,
556                                  struct ext4_extent_header *eh, int32_t depth)
557 {
558         eh->entries_count = 0;
559         eh->max_entries_count = to_le16(ext4_ext_max_entries(inode_ref, depth));
560         eh->magic = to_le16(EXT4_EXTENT_MAGIC);
561         eh->depth = depth;
562 }
563
564 /*
565  * Be cautious, the buffer_head returned is not yet mark dirtied. */
566 static int ext4_ext_split_node(struct ext4_inode_ref *inode_ref,
567                                struct ext4_extent_path *path, int32_t at,
568                                struct ext4_extent *newext,
569                                ext4_fsblk_t *sibling, struct ext4_block *new_bh)
570 {
571         int ret;
572         ext4_fsblk_t newblock;
573         struct ext4_block bh = EXT4_BLOCK_ZERO();
574         int32_t depth = ext_depth(inode_ref->inode);
575
576         ext4_assert(sibling);
577
578         /* FIXME: currently we split at the point after the current extent. */
579         newblock = ext4_ext_new_meta_block(inode_ref, path, newext, &ret, 0);
580         if (ret)
581                 goto cleanup;
582
583         /*  For write access.# */
584         ret = ext4_block_get(inode_ref->fs->bdev, &bh, newblock);
585         if (ret != EOK)
586                 goto cleanup;
587
588         if (at == depth) {
589                 /* start copy from next extent */
590                 ptrdiff_t m = EXT_MAX_EXTENT(path[at].header) - path[at].extent;
591                 struct ext4_extent_header *neh;
592                 neh = ext_block_hdr(&bh);
593                 ext4_ext_init_header(inode_ref, neh, 0);
594                 if (m) {
595                         struct ext4_extent *ex;
596                         ex = EXT_FIRST_EXTENT(neh);
597                         memmove(ex, path[at].extent + 1,
598                                 sizeof(struct ext4_extent) * m);
599                         neh->entries_count =
600                             to_le16(to_le16(neh->entries_count) + m);
601                         path[at].header->entries_count = to_le16(
602                             to_le16(path[at].header->entries_count) - m);
603                         ret = ext4_ext_dirty(inode_ref, path + at);
604                         if (ret)
605                                 goto cleanup;
606                 }
607         } else {
608                 ptrdiff_t m = EXT_MAX_INDEX(path[at].header) - path[at].index;
609                 struct ext4_extent_header *neh;
610                 neh = ext_block_hdr(&bh);
611                 ext4_ext_init_header(inode_ref, neh, depth - at);
612                 if (m) {
613                         struct ext4_extent_index *ix;
614                         ix = EXT_FIRST_INDEX(neh);
615                         memmove(ix, path[at].index + 1,
616                                 sizeof(struct ext4_extent) * m);
617                         neh->entries_count =
618                             to_le16(to_le16(neh->entries_count) + m);
619                         path[at].header->entries_count = to_le16(
620                             to_le16(path[at].header->entries_count) - m);
621                         ret = ext4_ext_dirty(inode_ref, path + at);
622                         if (ret)
623                                 goto cleanup;
624                 }
625         }
626 cleanup:
627         if (ret) {
628                 if (bh.lb_id) {
629                         ext4_block_set(inode_ref->fs->bdev, &bh);
630                 }
631                 if (newblock)
632                         ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
633
634                 newblock = 0;
635         }
636         *sibling = newblock;
637         *new_bh = bh;
638         return ret;
639 }
640
641 static ext4_lblk_t ext4_ext_block_index(struct ext4_extent_header *eh)
642 {
643         if (eh->depth)
644                 return to_le32(EXT_FIRST_INDEX(eh)->first_block);
645
646         return to_le32(EXT_FIRST_EXTENT(eh)->first_block);
647 }
648
649 #define EXT_INODE_HDR_NEED_GROW 0x1
650
651 struct ext_split_trans {
652         ext4_fsblk_t ptr;
653         struct ext4_extent_path path;
654         int switch_to;
655 };
656
657 static int ext4_ext_insert_index(struct ext4_inode_ref *inode_ref,
658                                  struct ext4_extent_path *path, int32_t at,
659                                  struct ext4_extent *newext,
660                                  ext4_lblk_t insert_index,
661                                  ext4_fsblk_t insert_block,
662                                  struct ext_split_trans *spt)
663 {
664         struct ext4_extent_index *ix;
665         struct ext4_extent_path *curp = path + at;
666         struct ext4_block bh = EXT4_BLOCK_ZERO();
667         int32_t len;
668         int err;
669         struct ext4_extent_header *eh;
670
671         if (curp->index && insert_index == to_le32(curp->index->first_block))
672                 return EIO;
673
674         if (to_le16(curp->header->entries_count) ==
675             to_le16(curp->header->max_entries_count)) {
676                 if (at) {
677                         struct ext4_extent_header *neh;
678                         err = ext4_ext_split_node(inode_ref, path, at, newext,
679                                                   &spt->ptr, &bh);
680                         if (err != EOK)
681                                 goto out;
682
683                         neh = ext_block_hdr(&bh);
684                         if (insert_index > to_le32(curp->index->first_block)) {
685                                 /* Make decision which node should be used to
686                                  * insert the index.*/
687                                 if (to_le16(neh->entries_count) >
688                                     to_le16(curp->header->entries_count)) {
689                                         eh = curp->header;
690                                         /* insert after */
691                                         ix = EXT_LAST_INDEX(eh) + 1;
692                                 } else {
693                                         eh = neh;
694                                         ix = EXT_FIRST_INDEX(eh);
695                                 }
696                         } else {
697                                 eh = curp->header;
698                                 /* insert before */
699                                 ix = EXT_LAST_INDEX(eh);
700                         }
701                 } else {
702                         err = EXT_INODE_HDR_NEED_GROW;
703                         goto out;
704                 }
705         } else {
706                 eh = curp->header;
707                 if (curp->index == NULL) {
708                         ix = EXT_FIRST_INDEX(eh);
709                         curp->index = ix;
710                 } else if (insert_index > to_le32(curp->index->first_block)) {
711                         /* insert after */
712                         ix = curp->index + 1;
713                 } else {
714                         /* insert before */
715                         ix = curp->index;
716                 }
717         }
718
719         len = EXT_LAST_INDEX(eh) - ix + 1;
720         ext4_assert(len >= 0);
721         if (len > 0)
722                 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_index));
723
724         if (ix > EXT_MAX_INDEX(eh)) {
725                 err = EIO;
726                 goto out;
727         }
728
729         ix->first_block = to_le32(insert_index);
730         ext4_idx_store_pblock(ix, insert_block);
731         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
732
733         if (ix > EXT_LAST_INDEX(eh)) {
734                 err = EIO;
735                 goto out;
736         }
737
738         if (eh == curp->header)
739                 err = ext4_ext_dirty(inode_ref, curp);
740         else
741                 err = EOK;
742
743 out:
744         if (err != EOK) {
745                 if (bh.lb_id)
746                         ext4_block_set(inode_ref->fs->bdev, &bh);
747
748                 spt->ptr = 0;
749         } else if (bh.lb_id) {
750                 /* If we got a sibling leaf. */
751                 bh.dirty = true;
752
753                 spt->path.p_block = ext4_idx_pblock(ix);
754                 spt->path.depth = to_le16(eh->depth);
755                 spt->path.maxdepth = 0;
756                 spt->path.extent = NULL;
757                 spt->path.index = ix;
758                 spt->path.header = eh;
759                 spt->path.block = bh;
760
761                 /*
762                  * If newext->ee_block can be included into the
763                  * right sub-tree.
764                  */
765                 if (to_le32(newext->first_block) >=
766                     ext4_ext_block_index(ext_block_hdr(&bh)))
767                         spt->switch_to = 1;
768                 else {
769                         curp->index = ix;
770                         curp->p_block = ext4_idx_pblock(ix);
771                 }
772
773         } else {
774                 spt->ptr = 0;
775                 curp->index = ix;
776                 curp->p_block = ext4_idx_pblock(ix);
777         }
778         return err;
779 }
780
781 /*
782  * ext4_ext_correct_indexes:
783  * if leaf gets modified and modified extent is first in the leaf,
784  * then we have to correct all indexes above.
785  */
786 static int ext4_ext_correct_indexes(struct ext4_inode_ref *inode_ref,
787                                     struct ext4_extent_path *path)
788 {
789         struct ext4_extent_header *eh;
790         int32_t depth = ext_depth(inode_ref->inode);
791         struct ext4_extent *ex;
792         uint32_t border;
793         int32_t k;
794         int err = EOK;
795
796         eh = path[depth].header;
797         ex = path[depth].extent;
798
799         if (ex == NULL || eh == NULL) {
800                 return EIO;
801         }
802
803         if (depth == 0) {
804                 /* there is no tree at all */
805                 return EOK;
806         }
807
808         if (ex != EXT_FIRST_EXTENT(eh)) {
809                 /* we correct tree if first leaf got modified only */
810                 return EOK;
811         }
812
813         /*
814          * TODO: we need correction if border is smaller than current one
815          */
816         k = depth - 1;
817         border = path[depth].extent->first_block;
818         path[k].index->first_block = border;
819         err = ext4_ext_dirty(inode_ref, path + k);
820         if (err != EOK)
821                 return err;
822
823         while (k--) {
824                 /* change all left-side indexes */
825                 if (path[k + 1].index != EXT_FIRST_INDEX(path[k + 1].header))
826                         break;
827                 path[k].index->first_block = border;
828                 err = ext4_ext_dirty(inode_ref, path + k);
829                 if (err != EOK)
830                         break;
831         }
832
833         return err;
834 }
835
836 static bool ext4_ext_can_prepend(struct ext4_extent *ex1,
837                                  struct ext4_extent *ex2)
838 {
839         if (ext4_ext_pblock(ex2) + ext4_ext_get_actual_len(ex2) !=
840             ext4_ext_pblock(ex1))
841                 return false;
842
843         if (ext4_ext_is_unwritten(ex1)) {
844                 if (ext4_ext_get_actual_len(ex1) +
845                         ext4_ext_get_actual_len(ex2) >
846                     EXT_UNWRITTEN_MAX_LEN)
847                         return false;
848         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
849                    EXT_INIT_MAX_LEN)
850                 return false;
851
852         if (to_le32(ex2->first_block) + ext4_ext_get_actual_len(ex2) !=
853             to_le32(ex1->first_block))
854                 return false;
855
856         return true;
857 }
858
859 static bool ext4_ext_can_append(struct ext4_extent *ex1,
860                                 struct ext4_extent *ex2)
861 {
862         if (ext4_ext_pblock(ex1) + ext4_ext_get_actual_len(ex1) !=
863             ext4_ext_pblock(ex2))
864                 return false;
865
866         if (ext4_ext_is_unwritten(ex1)) {
867                 if (ext4_ext_get_actual_len(ex1) +
868                         ext4_ext_get_actual_len(ex2) >
869                     EXT_UNWRITTEN_MAX_LEN)
870                         return false;
871         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
872                    EXT_INIT_MAX_LEN)
873                 return false;
874
875         if (to_le32(ex1->first_block) + ext4_ext_get_actual_len(ex1) !=
876             to_le32(ex2->first_block))
877                 return false;
878
879         return true;
880 }
881
882 static int ext4_ext_insert_leaf(struct ext4_inode_ref *inode_ref,
883                                 struct ext4_extent_path *path, int32_t at,
884                                 struct ext4_extent *newext,
885                                 struct ext_split_trans *spt, uint32_t flags)
886 {
887         struct ext4_extent_path *curp = path + at;
888         struct ext4_extent *ex = curp->extent;
889         struct ext4_block bh = EXT4_BLOCK_ZERO();
890         int32_t len;
891         int err = EOK;
892         int unwritten;
893         struct ext4_extent_header *eh = NULL;
894
895         if (curp->extent &&
896             to_le32(newext->first_block) == to_le32(curp->extent->first_block))
897                 return EIO;
898
899         if (!(flags & EXT4_EXT_NO_COMBINE)) {
900                 if (curp->extent && ext4_ext_can_append(curp->extent, newext)) {
901                         unwritten = ext4_ext_is_unwritten(curp->extent);
902                         curp->extent->block_count =
903                             to_le16(ext4_ext_get_actual_len(curp->extent) +
904                                     ext4_ext_get_actual_len(newext));
905                         if (unwritten)
906                                 ext4_ext_mark_unwritten(curp->extent);
907                         err = ext4_ext_dirty(inode_ref, curp);
908                         goto out;
909                 }
910
911                 if (curp->extent &&
912                     ext4_ext_can_prepend(curp->extent, newext)) {
913                         unwritten = ext4_ext_is_unwritten(curp->extent);
914                         curp->extent->first_block = newext->first_block;
915                         curp->extent->block_count =
916                             to_le16(ext4_ext_get_actual_len(curp->extent) +
917                                     ext4_ext_get_actual_len(newext));
918                         if (unwritten)
919                                 ext4_ext_mark_unwritten(curp->extent);
920                         err = ext4_ext_dirty(inode_ref, curp);
921                         goto out;
922                 }
923         }
924
925         if (to_le16(curp->header->entries_count) ==
926             to_le16(curp->header->max_entries_count)) {
927                 if (at) {
928                         struct ext4_extent_header *neh;
929                         err = ext4_ext_split_node(inode_ref, path, at, newext,
930                                                   &spt->ptr, &bh);
931                         if (err != EOK)
932                                 goto out;
933
934                         neh = ext_block_hdr(&bh);
935                         if (to_le32(newext->first_block) >
936                             to_le32(curp->extent->first_block)) {
937                                 if (to_le16(neh->entries_count) >
938                                     to_le16(curp->header->entries_count)) {
939                                         eh = curp->header;
940                                         /* insert after */
941                                         ex = EXT_LAST_EXTENT(eh) + 1;
942                                 } else {
943                                         eh = neh;
944                                         ex = EXT_FIRST_EXTENT(eh);
945                                 }
946                         } else {
947                                 eh = curp->header;
948                                 /* insert before */
949                                 ex = EXT_LAST_EXTENT(eh);
950                         }
951                 } else {
952                         err = EXT_INODE_HDR_NEED_GROW;
953                         goto out;
954                 }
955         } else {
956                 eh = curp->header;
957                 if (curp->extent == NULL) {
958                         ex = EXT_FIRST_EXTENT(eh);
959                         curp->extent = ex;
960                 } else if (to_le32(newext->first_block) >
961                            to_le32(curp->extent->first_block)) {
962                         /* insert after */
963                         ex = curp->extent + 1;
964                 } else {
965                         /* insert before */
966                         ex = curp->extent;
967                 }
968         }
969
970         len = EXT_LAST_EXTENT(eh) - ex + 1;
971         ext4_assert(len >= 0);
972         if (len > 0)
973                 memmove(ex + 1, ex, len * sizeof(struct ext4_extent));
974
975         if (ex > EXT_MAX_EXTENT(eh)) {
976                 err = EIO;
977                 goto out;
978         }
979
980         ex->first_block = newext->first_block;
981         ex->block_count = newext->block_count;
982         ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
983         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
984
985         if (ex > EXT_LAST_EXTENT(eh)) {
986                 err = EIO;
987                 goto out;
988         }
989
990         if (eh == curp->header) {
991                 err = ext4_ext_correct_indexes(inode_ref, path);
992                 if (err != EOK)
993                         goto out;
994                 err = ext4_ext_dirty(inode_ref, curp);
995         } else
996                 err = EOK;
997
998 out:
999         if (err != EOK) {
1000                 if (bh.lb_id)
1001                         ext4_block_set(inode_ref->fs->bdev, &bh);
1002
1003                 spt->ptr = 0;
1004         } else if (bh.lb_id) {
1005                 /* If we got a sibling leaf. */
1006                 bh.dirty = true;
1007
1008                 spt->path.p_block = ext4_ext_pblock(ex);
1009                 spt->path.depth = to_le16(eh->depth);
1010                 spt->path.maxdepth = 0;
1011                 spt->path.extent = ex;
1012                 spt->path.index = NULL;
1013                 spt->path.header = eh;
1014                 spt->path.block = bh;
1015
1016                 /*
1017                  * If newext->ee_block can be included into the
1018                  * right sub-tree.
1019                  */
1020                 if (to_le32(newext->first_block) >=
1021                     ext4_ext_block_index(ext_block_hdr(&bh)))
1022                         spt->switch_to = 1;
1023                 else {
1024                         curp->extent = ex;
1025                         curp->p_block = ext4_ext_pblock(ex);
1026                 }
1027
1028         } else {
1029                 spt->ptr = 0;
1030                 curp->extent = ex;
1031                 curp->p_block = ext4_ext_pblock(ex);
1032         }
1033
1034         return err;
1035 }
1036
1037 /*
1038  * ext4_ext_grow_indepth:
1039  * implements tree growing procedure:
1040  * - allocates new block
1041  * - moves top-level data (index block or leaf) into the new block
1042  * - initializes new top-level, creating index that points to the
1043  *   just created block
1044  */
1045 static int ext4_ext_grow_indepth(struct ext4_inode_ref *inode_ref,
1046                                  uint32_t flags)
1047 {
1048         struct ext4_extent_header *neh;
1049         struct ext4_block bh = EXT4_BLOCK_ZERO();
1050         ext4_fsblk_t newblock, goal = 0;
1051         int err = EOK;
1052
1053         /* Try to prepend new index to old one */
1054         if (ext_depth(inode_ref->inode))
1055                 goal = ext4_idx_pblock(
1056                     EXT_FIRST_INDEX(ext_inode_hdr(inode_ref->inode)));
1057         else
1058                 goal = ext4_fs_inode_to_goal_block(inode_ref);
1059
1060         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, &err);
1061         if (newblock == 0)
1062                 return err;
1063
1064         /* # */
1065         err = ext4_block_get(inode_ref->fs->bdev, &bh, newblock);
1066         if (err != EOK) {
1067                 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
1068                 return err;
1069         }
1070
1071         /* move top-level index/leaf into new block */
1072         memmove(bh.data, inode_ref->inode->blocks,
1073                 sizeof(inode_ref->inode->blocks));
1074
1075         /* set size of new block */
1076         neh = ext_block_hdr(&bh);
1077         /* old root could have indexes or leaves
1078          * so calculate e_max right way */
1079         if (ext_depth(inode_ref->inode))
1080                 neh->max_entries_count =
1081                     to_le16(ext4_ext_space_block_idx(inode_ref));
1082         else
1083                 neh->max_entries_count =
1084                     to_le16(ext4_ext_space_block(inode_ref));
1085
1086         neh->magic = to_le16(EXT4_EXTENT_MAGIC);
1087         ext4_extent_block_csum_set(inode_ref, neh);
1088
1089         /* Update top-level index: num,max,pointer */
1090         neh = ext_inode_hdr(inode_ref->inode);
1091         neh->entries_count = to_le16(1);
1092         ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1093         if (neh->depth == 0) {
1094                 /* Root extent block becomes index block */
1095                 neh->max_entries_count =
1096                     to_le16(ext4_ext_space_root_idx(inode_ref));
1097                 EXT_FIRST_INDEX(neh)
1098                     ->first_block = EXT_FIRST_EXTENT(neh)->first_block;
1099         }
1100         neh->depth = to_le16(to_le16(neh->depth) + 1);
1101
1102         bh.dirty = true;
1103         inode_ref->dirty = true;
1104         ext4_block_set(inode_ref->fs->bdev, &bh);
1105
1106         return err;
1107 }
1108
1109 __unused static void print_path(struct ext4_extent_path *path)
1110 {
1111         int32_t i = path->depth;
1112         while (i >= 0) {
1113
1114                 ptrdiff_t a =
1115                     (path->extent)
1116                         ? (path->extent - EXT_FIRST_EXTENT(path->header))
1117                         : 0;
1118                 ptrdiff_t b =
1119                     (path->index)
1120                         ? (path->index - EXT_FIRST_INDEX(path->header))
1121                         : 0;
1122
1123                 (void)a;
1124                 (void)b;
1125                 ext4_dbg(DEBUG_EXTENT,
1126                          "depth %" PRId32 ", p_block: %" PRIu64 ","
1127                          "p_ext offset: %td, p_idx offset: %td\n",
1128                          i, path->p_block, a, b);
1129                 i--;
1130                 path++;
1131         }
1132 }
1133
1134 static void ext4_ext_replace_path(struct ext4_inode_ref *inode_ref,
1135                                   struct ext4_extent_path *path,
1136                                   struct ext_split_trans *spt, int32_t depth,
1137                                   int32_t level)
1138 {
1139         int32_t i = depth - level;
1140
1141         ext4_ext_drop_refs(inode_ref, path + i, 1);
1142         path[i] = spt->path;
1143 }
1144
1145 static int ext4_ext_insert_extent(struct ext4_inode_ref *inode_ref,
1146                                   struct ext4_extent_path **ppath,
1147                                   struct ext4_extent *newext, uint32_t flags)
1148 {
1149         int32_t i, depth, level;
1150         int ret = EOK;
1151         ext4_fsblk_t ptr = 0;
1152         struct ext4_extent_path *path = *ppath;
1153         struct ext_split_trans *spt = NULL;
1154         struct ext_split_trans newblock;
1155
1156         memset(&newblock, 0, sizeof(newblock));
1157
1158         depth = ext_depth(inode_ref->inode);
1159         for (i = depth, level = 0; i >= 0; i--, level++)
1160                 if (EXT_HAS_FREE_INDEX(path + i))
1161                         break;
1162
1163         if (level) {
1164                 spt = calloc(1, sizeof(struct ext_split_trans) * (level));
1165                 if (!spt) {
1166                         ret = ENOMEM;
1167                         goto out;
1168                 }
1169         }
1170         i = 0;
1171 again:
1172         depth = ext_depth(inode_ref->inode);
1173
1174         do {
1175                 if (!i) {
1176                         ret = ext4_ext_insert_leaf(inode_ref, path, depth - i,
1177                                                    newext, &newblock, flags);
1178                 } else {
1179                         ret = ext4_ext_insert_index(
1180                             inode_ref, path, depth - i, newext,
1181                             ext4_ext_block_index(
1182                                 ext_block_hdr(&spt[i - 1].path.block)),
1183                             spt[i - 1].ptr, &newblock);
1184                 }
1185                 ptr = newblock.ptr;
1186
1187                 if (ret && ret != EXT_INODE_HDR_NEED_GROW)
1188                         goto out;
1189                 else if (spt && ptr && !ret) {
1190                         /* Prepare for the next iteration after splitting. */
1191                         spt[i] = newblock;
1192                 }
1193
1194                 i++;
1195         } while (ptr != 0 && i <= depth);
1196
1197         if (ret == EXT_INODE_HDR_NEED_GROW) {
1198                 ret = ext4_ext_grow_indepth(inode_ref, 0);
1199                 if (ret)
1200                         goto out;
1201                 ret = ext4_find_extent(inode_ref, to_le32(newext->first_block),
1202                                        ppath, 0);
1203                 if (ret)
1204                         goto out;
1205                 i = depth;
1206                 path = *ppath;
1207                 goto again;
1208         }
1209 out:
1210         if (ret) {
1211                 if (path)
1212                         ext4_ext_drop_refs(inode_ref, path, 0);
1213
1214                 while (--level >= 0 && spt) {
1215                         if (spt[level].ptr) {
1216                                 ext4_ext_free_blocks(inode_ref, spt[level].ptr,
1217                                                      1, 0);
1218                                 ext4_ext_drop_refs(inode_ref, &spt[level].path,
1219                                                    1);
1220                         }
1221                 }
1222         } else {
1223                 while (--level >= 0 && spt) {
1224                         if (spt[level].switch_to)
1225                                 ext4_ext_replace_path(inode_ref, path, spt,
1226                                                       depth, level);
1227                         else if (spt[level].ptr)
1228                                 ext4_ext_drop_refs(inode_ref, &spt[level].path,
1229                                                    1);
1230                 }
1231         }
1232         if (spt)
1233                 free(spt);
1234
1235         return ret;
1236 }
1237
1238 static void ext4_ext_remove_blocks(struct ext4_inode_ref *inode_ref,
1239                                    struct ext4_extent *ex, ext4_lblk_t from,
1240                                    ext4_lblk_t to)
1241 {
1242         ext4_lblk_t len = to - from + 1;
1243         ext4_lblk_t num;
1244         ext4_fsblk_t start;
1245         num = from - to_le32(ex->first_block);
1246         start = ext4_ext_pblock(ex) + num;
1247         ext4_dbg(DEBUG_EXTENT,
1248                  "Freeing %" PRIu32 " at %" PRIu64 ", %" PRIu32 "\n", from,
1249                  start, len);
1250
1251         ext4_ext_free_blocks(inode_ref, start, len, 0);
1252 }
1253
1254 static int ext4_ext_remove_idx(struct ext4_inode_ref *inode_ref,
1255                                struct ext4_extent_path *path, int32_t depth)
1256 {
1257         int err = EOK;
1258         int32_t i = depth;
1259         ext4_fsblk_t leaf;
1260
1261         /* free index block */
1262         leaf = ext4_idx_pblock(path[i].index);
1263
1264         if (path[i].index != EXT_LAST_INDEX(path[i].header)) {
1265                 ptrdiff_t len = EXT_LAST_INDEX(path[i].header) - path[i].index;
1266                 memmove(path[i].index, path[i].index + 1,
1267                         len * sizeof(struct ext4_extent_index));
1268         }
1269
1270         path[i].header->entries_count =
1271             to_le16(to_le16(path[i].header->entries_count) - 1);
1272         err = ext4_ext_dirty(inode_ref, path + i);
1273         if (err != EOK)
1274                 return err;
1275
1276         ext4_dbg(DEBUG_EXTENT, "IDX: Freeing %" PRIu32 " at %" PRIu64 ", %d\n",
1277                  to_le32(path[i].index->first_block), leaf, 1);
1278         ext4_ext_free_blocks(inode_ref, leaf, 1, 0);
1279
1280         while (i > 0) {
1281                 if (path[i].index != EXT_FIRST_INDEX(path[i].header))
1282                         break;
1283
1284                 path[i - 1].index->first_block = path[i].index->first_block;
1285                 err = ext4_ext_dirty(inode_ref, path + i - 1);
1286                 if (err != EOK)
1287                         break;
1288
1289                 i--;
1290         }
1291         return err;
1292 }
1293
1294 static int ext4_ext_remove_leaf(struct ext4_inode_ref *inode_ref,
1295                                 struct ext4_extent_path *path, ext4_lblk_t from,
1296                                 ext4_lblk_t to)
1297 {
1298
1299         int32_t depth = ext_depth(inode_ref->inode);
1300         struct ext4_extent *ex = path[depth].extent;
1301         struct ext4_extent *start_ex, *ex2 = NULL;
1302         struct ext4_extent_header *eh = path[depth].header;
1303         int32_t len;
1304         int err = EOK;
1305         uint16_t new_entries;
1306
1307         start_ex = ex;
1308         new_entries = to_le16(eh->entries_count);
1309         while (ex <= EXT_LAST_EXTENT(path[depth].header) &&
1310                to_le32(ex->first_block) <= to) {
1311                 int32_t new_len = 0;
1312                 int unwritten;
1313                 ext4_fsblk_t start, new_start;
1314                 new_start = start = to_le32(ex->first_block);
1315                 len = ext4_ext_get_actual_len(ex);
1316                 if (start < from) {
1317                         start = from;
1318                         len -= from - start;
1319                         new_len = from - start;
1320                         start_ex++;
1321                 }
1322                 if (start + len - 1 > to) {
1323                         len -= start + len - 1 - to;
1324                         new_len = start + len - 1 - to;
1325                         new_start += to + 1;
1326                         ex2 = ex;
1327                 }
1328
1329                 ext4_ext_remove_blocks(inode_ref, ex, start, start + len - 1);
1330                 ex->first_block = to_le32(new_start);
1331                 if (!new_len)
1332                         new_entries--;
1333                 else {
1334                         unwritten = ext4_ext_is_unwritten(ex);
1335                         ex->block_count = to_le16(new_len);
1336                         if (unwritten)
1337                                 ext4_ext_mark_unwritten(ex);
1338                 }
1339
1340                 ex += 1;
1341         }
1342
1343         if (ex2 == NULL)
1344                 ex2 = ex;
1345
1346         if (ex2 <= EXT_LAST_EXTENT(eh))
1347                 memmove(start_ex, ex2, EXT_LAST_EXTENT(eh) - ex2 + 1);
1348
1349         eh->entries_count = to_le16(new_entries);
1350         ext4_ext_dirty(inode_ref, path + depth);
1351         if (path[depth].extent == EXT_FIRST_EXTENT(eh) && eh->entries_count)
1352                 err = ext4_ext_correct_indexes(inode_ref, path);
1353
1354         /* if this leaf is free, then we should
1355          * remove it from index block above */
1356         if (err == EOK && eh->entries_count == 0 && path[depth].block.lb_id)
1357                 err = ext4_ext_remove_idx(inode_ref, path, depth - 1);
1358
1359         return err;
1360 }
1361
1362 static int ext4_ext_more_to_rm(struct ext4_extent_path *path, ext4_lblk_t to)
1363 {
1364         if (!to_le16(path->header->entries_count))
1365                 return 0;
1366
1367         if (path->index > EXT_LAST_INDEX(path->header))
1368                 return 0;
1369
1370         if (to_le32(path->index->first_block) > to)
1371                 return 0;
1372
1373         return 1;
1374 }
1375
1376 int ext4_extent_remove_space(struct ext4_inode_ref *inode_ref, ext4_lblk_t from,
1377                           ext4_lblk_t to)
1378 {
1379         struct ext4_extent_path *path = NULL;
1380         int ret = EOK;
1381         int32_t depth = ext_depth(inode_ref->inode);
1382         int32_t i;
1383
1384         ret = ext4_find_extent(inode_ref, from, &path, 0);
1385         if (ret)
1386                 goto out;
1387
1388         if (!path[depth].extent ||
1389             !IN_RANGE(from, to_le32(path[depth].extent->first_block),
1390                       ext4_ext_get_actual_len(path[depth].extent))) {
1391                 ret = EOK;
1392                 goto out;
1393         }
1394
1395         i = depth;
1396         while (i >= 0) {
1397                 if (i == depth) {
1398                         struct ext4_extent_header *eh;
1399                         struct ext4_extent *first_ex, *last_ex;
1400                         ext4_lblk_t leaf_from, leaf_to;
1401                         eh = path[i].header;
1402                         ext4_assert(to_le16(eh->entries_count) > 0);
1403                         first_ex = EXT_FIRST_EXTENT(eh);
1404                         last_ex = EXT_LAST_EXTENT(eh);
1405                         leaf_from = to_le32(first_ex->first_block);
1406                         leaf_to = to_le32(last_ex->first_block) +
1407                                   ext4_ext_get_actual_len(last_ex) - 1;
1408                         if (leaf_from < from)
1409                                 leaf_from = from;
1410
1411                         if (leaf_to > to)
1412                                 leaf_to = to;
1413
1414                         ext4_ext_remove_leaf(inode_ref, path, leaf_from,
1415                                         leaf_to);
1416                         ext4_ext_drop_refs(inode_ref, path + i, 0);
1417                         i--;
1418                         continue;
1419                 }
1420
1421                 struct ext4_extent_header *eh;
1422                 eh = path[i].header;
1423                 if (ext4_ext_more_to_rm(path + i, to)) {
1424                         struct ext4_block bh = EXT4_BLOCK_ZERO();
1425                         if (path[i + 1].block.lb_id)
1426                                 ext4_ext_drop_refs(inode_ref, path + i + 1, 0);
1427
1428                         ret = read_extent_tree_block(inode_ref,
1429                                         ext4_idx_pblock(path[i].index),
1430                                         depth - i - 1, &bh, 0);
1431                         if (ret)
1432                                 goto out;
1433
1434                         path[i].p_block =
1435                                         ext4_idx_pblock(path[i].index);
1436                         path[i + 1].block = bh;
1437                         path[i + 1].header = ext_block_hdr(&bh);
1438                         path[i + 1].depth = depth - i - 1;
1439                         if (i + 1 == depth)
1440                                 path[i + 1].extent = EXT_FIRST_EXTENT(
1441                                         path[i + 1].header);
1442                         else
1443                                 path[i + 1].index =
1444                                         EXT_FIRST_INDEX(path[i + 1].header);
1445
1446                         i++;
1447                 } else {
1448                         if (!eh->entries_count && i > 0)
1449                                 ret = ext4_ext_remove_idx(inode_ref, path,
1450                                                 i - 1);
1451
1452
1453                         if (i)
1454                                 ext4_block_set(inode_ref->fs->bdev,
1455                                                 &path[i].block);
1456
1457
1458                         i--;
1459                 }
1460
1461         }
1462
1463         /* TODO: flexible tree reduction should be here */
1464         if (path->header->entries_count == 0) {
1465                 /*
1466                  * truncate to zero freed all the tree,
1467                  * so we need to correct eh_depth
1468                  */
1469                 ext_inode_hdr(inode_ref->inode)->depth = 0;
1470                 ext_inode_hdr(inode_ref->inode)->max_entries_count =
1471                     to_le16(ext4_ext_space_root(inode_ref));
1472                 ret = ext4_ext_dirty(inode_ref, path);
1473         }
1474
1475 out:
1476         ext4_ext_drop_refs(inode_ref, path, 0);
1477         free(path);
1478         path = NULL;
1479         return ret;
1480 }
1481
1482 static int ext4_ext_split_extent_at(struct ext4_inode_ref *inode_ref,
1483                                     struct ext4_extent_path **ppath,
1484                                     ext4_lblk_t split, uint32_t split_flag)
1485 {
1486         struct ext4_extent *ex, newex;
1487         ext4_fsblk_t newblock;
1488         ext4_lblk_t ee_block;
1489         int32_t ee_len;
1490         int32_t depth = ext_depth(inode_ref->inode);
1491         int err = EOK;
1492
1493         ex = (*ppath)[depth].extent;
1494         ee_block = to_le32(ex->first_block);
1495         ee_len = ext4_ext_get_actual_len(ex);
1496         newblock = split - ee_block + ext4_ext_pblock(ex);
1497
1498         if (split == ee_block) {
1499                 /*
1500                  * case b: block @split is the block that the extent begins with
1501                  * then we just change the state of the extent, and splitting
1502                  * is not needed.
1503                  */
1504                 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1505                         ext4_ext_mark_unwritten(ex);
1506                 else
1507                         ext4_ext_mark_initialized(ex);
1508
1509                 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1510                 goto out;
1511         }
1512
1513         ex->block_count = to_le16(split - ee_block);
1514         if (split_flag & EXT4_EXT_MARK_UNWRIT1)
1515                 ext4_ext_mark_unwritten(ex);
1516
1517         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1518         if (err != EOK)
1519                 goto out;
1520
1521         newex.first_block = to_le32(split);
1522         newex.block_count = to_le16(ee_len - (split - ee_block));
1523         ext4_ext_store_pblock(&newex, newblock);
1524         if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1525                 ext4_ext_mark_unwritten(&newex);
1526         err = ext4_ext_insert_extent(inode_ref, ppath, &newex,
1527                                      EXT4_EXT_NO_COMBINE);
1528         if (err != EOK)
1529                 goto restore_extent_len;
1530
1531 out:
1532         return err;
1533 restore_extent_len:
1534         ex->block_count = to_le16(ee_len);
1535         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1536         return err;
1537 }
1538
1539 static int ext4_ext_convert_to_initialized(struct ext4_inode_ref *inode_ref,
1540                                            struct ext4_extent_path **ppath,
1541                                            ext4_lblk_t split, uint32_t blocks)
1542 {
1543         int32_t depth = ext_depth(inode_ref->inode), err = EOK;
1544         struct ext4_extent *ex = (*ppath)[depth].extent;
1545
1546         ext4_assert(to_le32(ex->first_block) <= split);
1547
1548         if (split + blocks ==
1549             to_le32(ex->first_block) + ext4_ext_get_actual_len(ex)) {
1550                 /* split and initialize right part */
1551                 err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1552                                                EXT4_EXT_MARK_UNWRIT1);
1553         } else if (to_le32(ex->first_block) == split) {
1554                 /* split and initialize left part */
1555                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1556                                                EXT4_EXT_MARK_UNWRIT2);
1557         } else {
1558                 /* split 1 extent to 3 and initialize the 2nd */
1559                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1560                                                EXT4_EXT_MARK_UNWRIT1 |
1561                                                    EXT4_EXT_MARK_UNWRIT2);
1562                 if (!err) {
1563                         err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1564                                                        EXT4_EXT_MARK_UNWRIT1);
1565                 }
1566         }
1567
1568         return err;
1569 }
1570
1571 /*
1572  * ext4_ext_next_allocated_block:
1573  * returns allocated block in subsequent extent or EXT_MAX_BLOCKS.
1574  * NOTE: it considers block number from index entry as
1575  * allocated block. Thus, index entries have to be consistent
1576  * with leaves.
1577  */
1578 #define EXT_MAX_BLOCKS (ext4_lblk_t) - 1
1579
1580 static ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_extent_path *path)
1581 {
1582         int32_t depth;
1583
1584         depth = path->depth;
1585
1586         if (depth == 0 && path->extent == NULL)
1587                 return EXT_MAX_BLOCKS;
1588
1589         while (depth >= 0) {
1590                 if (depth == path->depth) {
1591                         /* leaf */
1592                         if (path[depth].extent &&
1593                             path[depth].extent !=
1594                                 EXT_LAST_EXTENT(path[depth].header))
1595                                 return to_le32(
1596                                     path[depth].extent[1].first_block);
1597                 } else {
1598                         /* index */
1599                         if (path[depth].index !=
1600                             EXT_LAST_INDEX(path[depth].header))
1601                                 return to_le32(
1602                                     path[depth].index[1].first_block);
1603                 }
1604                 depth--;
1605         }
1606
1607         return EXT_MAX_BLOCKS;
1608 }
1609
1610 static int ext4_ext_zero_unwritten_range(struct ext4_inode_ref *inode_ref,
1611                                          ext4_fsblk_t block,
1612                                          uint32_t blocks_count)
1613 {
1614         int err = EOK;
1615         uint32_t i;
1616         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
1617         for (i = 0; i < blocks_count; i++) {
1618                 struct ext4_block bh = EXT4_BLOCK_ZERO();
1619                 err = ext4_block_get(inode_ref->fs->bdev, &bh, block + i);
1620                 if (err != EOK)
1621                         break;
1622
1623                 memset(bh.data, 0, block_size);
1624                 bh.dirty = true;
1625                 err = ext4_block_set(inode_ref->fs->bdev, &bh);
1626                 if (err != EOK)
1627                         break;
1628         }
1629         return err;
1630 }
1631
1632 int ext4_extent_get_blocks(struct ext4_inode_ref *inode_ref, ext4_fsblk_t iblock,
1633                         uint32_t max_blocks, ext4_fsblk_t *result, bool create,
1634                         uint32_t *blocks_count)
1635 {
1636         struct ext4_extent_path *path = NULL;
1637         struct ext4_extent newex, *ex;
1638         ext4_fsblk_t goal;
1639         int err = EOK;
1640         int32_t depth;
1641         uint32_t allocated = 0;
1642         ext4_fsblk_t next, newblock;
1643
1644         if (result)
1645                 *result = 0;
1646
1647         if (blocks_count)
1648                 *blocks_count = 0;
1649
1650         /* find extent for this block */
1651         err = ext4_find_extent(inode_ref, iblock, &path, 0);
1652         if (err != EOK) {
1653                 path = NULL;
1654                 goto out2;
1655         }
1656
1657         depth = ext_depth(inode_ref->inode);
1658
1659         /*
1660          * consistent leaf must not be empty
1661          * this situations is possible, though, _during_ tree modification
1662          * this is why assert can't be put in ext4_ext_find_extent()
1663          */
1664         if ((ex = path[depth].extent)) {
1665                 ext4_lblk_t ee_block = to_le32(ex->first_block);
1666                 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
1667                 uint16_t ee_len = ext4_ext_get_actual_len(ex);
1668                 /* if found exent covers block, simple return it */
1669                 if (IN_RANGE(iblock, ee_block, ee_len)) {
1670                         /* number of remain blocks in the extent */
1671                         allocated = ee_len - (iblock - ee_block);
1672
1673                         if (!ext4_ext_is_unwritten(ex)) {
1674                                 newblock = iblock - ee_block + ee_start;
1675                                 goto out;
1676                         }
1677
1678                         if (!create) {
1679                                 newblock = 0;
1680                                 goto out;
1681                         }
1682
1683                         uint32_t zero_range;
1684                         zero_range = allocated;
1685                         if (zero_range > max_blocks)
1686                                 zero_range = max_blocks;
1687
1688                         newblock = iblock - ee_block + ee_start;
1689                         err = ext4_ext_zero_unwritten_range(inode_ref, newblock,
1690                                         zero_range);
1691                         if (err != EOK)
1692                                 goto out2;
1693
1694                         err = ext4_ext_convert_to_initialized(inode_ref, &path,
1695                                         iblock, zero_range);
1696                         if (err != EOK)
1697                                 goto out2;
1698
1699                         goto out;
1700                 }
1701         }
1702
1703         /*
1704          * requested block isn't allocated yet
1705          * we couldn't try to create block if create flag is zero
1706          */
1707         if (!create) {
1708                 goto out2;
1709         }
1710
1711         /* find next allocated block so that we know how many
1712          * blocks we can allocate without ovelapping next extent */
1713         next = ext4_ext_next_allocated_block(path);
1714         allocated = next - iblock;
1715         if (allocated > max_blocks)
1716                 allocated = max_blocks;
1717
1718         /* allocate new block */
1719         goal = ext4_ext_find_goal(inode_ref, path, iblock);
1720         newblock = ext4_new_meta_blocks(inode_ref, goal, 0, &allocated, &err);
1721         if (!newblock)
1722                 goto out2;
1723
1724         /* try to insert new extent into found leaf and return */
1725         newex.first_block = to_le32(iblock);
1726         ext4_ext_store_pblock(&newex, newblock);
1727         newex.block_count = to_le16(allocated);
1728         err = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1729         if (err != EOK) {
1730                 /* free data blocks we just allocated */
1731                 ext4_ext_free_blocks(inode_ref, ext4_ext_pblock(&newex),
1732                                      to_le16(newex.block_count), 0);
1733                 goto out2;
1734         }
1735
1736         /* previous routine could use block we allocated */
1737         newblock = ext4_ext_pblock(&newex);
1738
1739 out:
1740         if (allocated > max_blocks)
1741                 allocated = max_blocks;
1742
1743         if (result)
1744                 *result = newblock;
1745
1746         if (blocks_count)
1747                 *blocks_count = allocated;
1748
1749 out2:
1750         if (path) {
1751                 ext4_ext_drop_refs(inode_ref, path, 0);
1752                 free(path);
1753         }
1754
1755         return err;
1756 }
1757 #endif