Introduce EXT4_BLOCK_ZERO macro to initialize ext4_block
[lwext4.git] / lwext4 / ext4_extent_full.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * - Redistributions of source code must retain the above copyright
10  *   notice, this list of conditions and the following disclaimer.
11  * - Redistributions in binary form must reproduce the above copyright
12  *   notice, this list of conditions and the following disclaimer in the
13  *   documentation and/or other materials provided with the distribution.
14  * - The name of the author may not be used to endorse or promote products
15  *   derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #include "ext4_blockdev.h"
30 #include "ext4_super.h"
31 #include "ext4_balloc.h"
32 #include "ext4_debug.h"
33
34 #include <stdlib.h>
35 #include <string.h>
36 #include <inttypes.h>
37 #include <stddef.h>
38
39 #include "ext4_extent.h"
40
41 /*
42  * used by extent splitting.
43  */
44 #define EXT4_EXT_MARK_UNWRIT1 0x02 /* mark first half unwritten */
45 #define EXT4_EXT_MARK_UNWRIT2 0x04 /* mark second half unwritten */
46 #define EXT4_EXT_DATA_VALID1 0x08  /* first half contains valid data */
47 #define EXT4_EXT_DATA_VALID2 0x10  /* second half contains valid data */
48 #define EXT4_EXT_NO_COMBINE 0x20   /* do not combine two extents */
49
50 static struct ext4_extent_tail *
51 find_ext4_extent_tail(struct ext4_extent_header *eh)
52 {
53         return (struct ext4_extent_tail *)(((char *)eh) +
54                                            EXT4_EXTENT_TAIL_OFFSET(eh));
55 }
56
57 static struct ext4_extent_header *ext_inode_hdr(struct ext4_inode *inode)
58 {
59         return (struct ext4_extent_header *)inode->blocks;
60 }
61
62 static struct ext4_extent_header *ext_block_hdr(struct ext4_block *block)
63 {
64         return (struct ext4_extent_header *)block->data;
65 }
66
67 static uint16_t ext_depth(struct ext4_inode *inode)
68 {
69         return to_le16(ext_inode_hdr(inode)->depth);
70 }
71
72 static uint16_t ext4_ext_get_actual_len(struct ext4_extent *ext)
73 {
74         return (to_le16(ext->block_count) <= EXT_INIT_MAX_LEN
75                     ? to_le16(ext->block_count)
76                     : (to_le16(ext->block_count) - EXT_INIT_MAX_LEN));
77 }
78
79 static void ext4_ext_mark_initialized(struct ext4_extent *ext)
80 {
81         ext->block_count = to_le16(ext4_ext_get_actual_len(ext));
82 }
83
84 static void ext4_ext_mark_unwritten(struct ext4_extent *ext)
85 {
86         ext->block_count |= to_le16(EXT_INIT_MAX_LEN);
87 }
88
89 static int ext4_ext_is_unwritten(struct ext4_extent *ext)
90 {
91         /* Extent with ee_len of 0x8000 is treated as an initialized extent */
92         return (to_le16(ext->block_count) > EXT_INIT_MAX_LEN);
93 }
94
95 /*
96  * ext4_ext_pblock:
97  * combine low and high parts of physical block number into ext4_fsblk_t
98  */
99 static ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
100 {
101         ext4_fsblk_t block;
102
103         block = to_le32(ex->start_lo);
104         block |= ((ext4_fsblk_t)to_le16(ex->start_hi) << 31) << 1;
105         return block;
106 }
107
108 /*
109  * ext4_idx_pblock:
110  * combine low and high parts of a leaf physical block number into ext4_fsblk_t
111  */
112 static ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_index *ix)
113 {
114         ext4_fsblk_t block;
115
116         block = to_le32(ix->leaf_lo);
117         block |= ((ext4_fsblk_t)to_le16(ix->leaf_hi) << 31) << 1;
118         return block;
119 }
120
121 /*
122  * ext4_ext_store_pblock:
123  * stores a large physical block number into an extent struct,
124  * breaking it into parts
125  */
126 static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
127 {
128         ex->start_lo = to_le32((unsigned long)(pb & 0xffffffff));
129         ex->start_hi = to_le16((unsigned long)((pb >> 31) >> 1) & 0xffff);
130 }
131
132 /*
133  * ext4_idx_store_pblock:
134  * stores a large physical block number into an index struct,
135  * breaking it into parts
136  */
137 static void ext4_idx_store_pblock(struct ext4_extent_index *ix, ext4_fsblk_t pb)
138 {
139         ix->leaf_lo = to_le32((unsigned long)(pb & 0xffffffff));
140         ix->leaf_hi = to_le16((unsigned long)((pb >> 31) >> 1) & 0xffff);
141 }
142
143 static int ext4_allocate_single_block(struct ext4_inode_ref *inode_ref,
144                                       ext4_fsblk_t goal __unused,
145                                       ext4_fsblk_t *blockp)
146 {
147         return ext4_balloc_alloc_block(inode_ref, (uint32_t *)blockp);
148 }
149
150 static ext4_fsblk_t ext4_new_meta_blocks(struct ext4_inode_ref *inode_ref,
151                                          ext4_fsblk_t goal,
152                                          uint32_t flags __unused,
153                                          uint32_t *count, int *errp)
154 {
155         ext4_fsblk_t block = 0;
156
157         *errp = ext4_allocate_single_block(inode_ref, goal, &block);
158         if (count)
159                 *count = 1;
160         return block;
161 }
162
163 static void ext4_ext_free_blocks(struct ext4_inode_ref *inode_ref,
164                                  ext4_fsblk_t block, uint32_t count,
165                                  uint32_t flags __unused)
166 {
167         uint32_t i;
168         for (i = 0; i < count; i++)
169                 ext4_balloc_free_block(inode_ref, (uint32_t)block + i);
170 }
171
172 static size_t ext4_ext_space_block(struct ext4_inode_ref *inode_ref)
173 {
174         size_t size;
175         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
176
177         size = (block_size - sizeof(struct ext4_extent_header)) /
178                sizeof(struct ext4_extent);
179         return size;
180 }
181
182 static size_t ext4_ext_space_block_idx(struct ext4_inode_ref *inode_ref)
183 {
184         size_t size;
185         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
186
187         size = (block_size - sizeof(struct ext4_extent_header)) /
188                sizeof(struct ext4_extent_index);
189         return size;
190 }
191
192 static size_t ext4_ext_space_root(struct ext4_inode_ref *inode_ref)
193 {
194         size_t size;
195
196         size = sizeof(inode_ref->inode->blocks);
197         size -= sizeof(struct ext4_extent_header);
198         size /= sizeof(struct ext4_extent);
199         return size;
200 }
201
202 static size_t ext4_ext_space_root_idx(struct ext4_inode_ref *inode_ref)
203 {
204         size_t size;
205
206         size = sizeof(inode_ref->inode->blocks);
207         size -= sizeof(struct ext4_extent_header);
208         size /= sizeof(struct ext4_extent_index);
209         return size;
210 }
211
212 static size_t ext4_ext_max_entries(struct ext4_inode_ref *inode_ref,
213                                    uint32_t depth)
214 {
215         size_t max;
216
217         if (depth == ext_depth(inode_ref->inode)) {
218                 if (depth == 0)
219                         max = ext4_ext_space_root(inode_ref);
220                 else
221                         max = ext4_ext_space_root_idx(inode_ref);
222         } else {
223                 if (depth == 0)
224                         max = ext4_ext_space_block(inode_ref);
225                 else
226                         max = ext4_ext_space_block_idx(inode_ref);
227         }
228
229         return max;
230 }
231
232 static ext4_fsblk_t ext4_ext_find_goal(struct ext4_extent_path *path,
233                                        ext4_lblk_t block)
234 {
235         if (path) {
236                 uint32_t depth = path->depth;
237                 struct ext4_extent *ex;
238
239                 /*
240                  * Try to predict block placement assuming that we are
241                  * filling in a file which will eventually be
242                  * non-sparse --- i.e., in the case of libbfd writing
243                  * an ELF object sections out-of-order but in a way
244                  * the eventually results in a contiguous object or
245                  * executable file, or some database extending a table
246                  * space file.  However, this is actually somewhat
247                  * non-ideal if we are writing a sparse file such as
248                  * qemu or KVM writing a raw image file that is going
249                  * to stay fairly sparse, since it will end up
250                  * fragmenting the file system's free space.  Maybe we
251                  * should have some hueristics or some way to allow
252                  * userspace to pass a hint to file system,
253                  * especially if the latter case turns out to be
254                  * common.
255                  */
256                 ex = path[depth].extent;
257                 if (ex) {
258                         ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
259                         ext4_lblk_t ext_block = to_le32(ex->first_block);
260
261                         if (block > ext_block)
262                                 return ext_pblk + (block - ext_block);
263                         else
264                                 return ext_pblk - (ext_block - block);
265                 }
266
267                 /* it looks like index is empty;
268                  * try to find starting block from index itself */
269                 if (path[depth].block.lb_id)
270                         return path[depth].block.lb_id;
271         }
272
273         /* OK. use inode's group */
274         return 0;
275 }
276
277 /*
278  * Allocation for a meta data block
279  */
280 static ext4_fsblk_t ext4_ext_new_meta_block(struct ext4_inode_ref *inode_ref,
281                                             struct ext4_extent_path *path,
282                                             struct ext4_extent *ex, int *err,
283                                             uint32_t flags)
284 {
285         ext4_fsblk_t goal, newblock;
286
287         goal = ext4_ext_find_goal(path, to_le32(ex->first_block));
288         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, err);
289         return newblock;
290 }
291
292 static int ext4_ext_dirty(struct ext4_inode_ref *inode_ref,
293                           struct ext4_extent_path *path)
294 {
295         if (path->block.lb_id)
296                 path->block.dirty = true;
297         else
298                 inode_ref->dirty = true;
299
300         return EOK;
301 }
302
303 static void ext4_ext_drop_refs(struct ext4_inode_ref *inode_ref,
304                                struct ext4_extent_path *path, bool keep_other)
305 {
306         int32_t depth, i;
307
308         if (!path)
309                 return;
310         if (keep_other)
311                 depth = 0;
312         else
313                 depth = path->depth;
314
315         for (i = 0; i <= depth; i++, path++)
316                 if (path->block.lb_id) {
317                         ext4_block_set(inode_ref->fs->bdev, &path->block);
318                 }
319 }
320
321 /*
322  * Temporarily we don't need to support checksum.
323  */
324 static uint32_t ext4_ext_block_csum(struct ext4_inode_ref *inode_ref __unused,
325                                     struct ext4_extent_header *eh __unused)
326 {
327         /*TODO: should we add crc32 here ?*/
328         /*return ext4_crc32c(inode->i_csum, eh, EXT4_EXTENT_TAIL_OFFSET(eh));*/
329         return 0;
330 }
331
332 static void ext4_extent_block_csum_set(struct ext4_inode_ref *inode_ref,
333                                        struct ext4_extent_header *eh)
334 {
335         struct ext4_extent_tail *tail;
336
337         tail = find_ext4_extent_tail(eh);
338         tail->et_checksum = ext4_ext_block_csum(inode_ref, eh);
339 }
340
341 /*
342  * Check that whether the basic information inside the extent header
343  * is correct or not.
344  */
345 static int ext4_ext_check(struct ext4_inode_ref *inode_ref,
346                           struct ext4_extent_header *eh, uint16_t depth,
347                           ext4_fsblk_t pblk __unused)
348 {
349         struct ext4_extent_tail *tail;
350         const char *error_msg;
351         (void)error_msg;
352
353         if (to_le16(eh->magic) != EXT4_EXTENT_MAGIC) {
354                 error_msg = "invalid magic";
355                 goto corrupted;
356         }
357         if (to_le16(eh->depth) != depth) {
358                 error_msg = "unexpected eh_depth";
359                 goto corrupted;
360         }
361         if (eh->max_entries_count == 0) {
362                 error_msg = "invalid eh_max";
363                 goto corrupted;
364         }
365         if (to_le16(eh->entries_count) > to_le16(eh->max_entries_count)) {
366                 error_msg = "invalid eh_entries";
367                 goto corrupted;
368         }
369
370         tail = find_ext4_extent_tail(eh);
371         if (tail->et_checksum != ext4_ext_block_csum(inode_ref, eh)) {
372                 /* FIXME: Warning: extent checksum damaged? */
373         }
374
375         return EOK;
376
377 corrupted:
378         ext4_dbg(DEBUG_EXTENT, "Bad extents B+ tree block: %s. "
379                                "Blocknr: %" PRId64 "\n",
380                  error_msg, pblk);
381         return EIO;
382 }
383
384 static int read_extent_tree_block(struct ext4_inode_ref *inode_ref,
385                                   ext4_fsblk_t pblk, int32_t depth,
386                                   struct ext4_block *bh,
387                                   uint32_t flags __unused)
388 {
389         int err;
390
391         err = ext4_block_get(inode_ref->fs->bdev, bh, pblk);
392         if (err != EOK)
393                 goto errout;
394
395         err = ext4_ext_check(inode_ref, ext_block_hdr(bh), depth, pblk);
396         if (err != EOK)
397                 goto errout;
398
399         return EOK;
400 errout:
401         if (bh->lb_id)
402                 ext4_block_set(inode_ref->fs->bdev, bh);
403
404         return err;
405 }
406
407 /*
408  * ext4_ext_binsearch_idx:
409  * binary search for the closest index of the given block
410  * the header must be checked before calling this
411  */
412 static void ext4_ext_binsearch_idx(struct ext4_extent_path *path,
413                                    ext4_lblk_t block)
414 {
415         struct ext4_extent_header *eh = path->header;
416         struct ext4_extent_index *r, *l, *m;
417
418         l = EXT_FIRST_INDEX(eh) + 1;
419         r = EXT_LAST_INDEX(eh);
420         while (l <= r) {
421                 m = l + (r - l) / 2;
422                 if (block < to_le32(m->first_block))
423                         r = m - 1;
424                 else
425                         l = m + 1;
426         }
427
428         path->index = l - 1;
429 }
430
431 /*
432  * ext4_ext_binsearch:
433  * binary search for closest extent of the given block
434  * the header must be checked before calling this
435  */
436 static void ext4_ext_binsearch(struct ext4_extent_path *path, ext4_lblk_t block)
437 {
438         struct ext4_extent_header *eh = path->header;
439         struct ext4_extent *r, *l, *m;
440
441         if (eh->entries_count == 0) {
442                 /*
443                  * this leaf is empty:
444                  * we get such a leaf in split/add case
445                  */
446                 return;
447         }
448
449         l = EXT_FIRST_EXTENT(eh) + 1;
450         r = EXT_LAST_EXTENT(eh);
451
452         while (l <= r) {
453                 m = l + (r - l) / 2;
454                 if (block < to_le32(m->first_block))
455                         r = m - 1;
456                 else
457                         l = m + 1;
458         }
459
460         path->extent = l - 1;
461 }
462
463 #define EXT4_EXT_PATH_INC_DEPTH 1
464
465 static int ext4_find_extent(struct ext4_inode_ref *inode_ref, ext4_lblk_t block,
466                             struct ext4_extent_path **orig_path, uint32_t flags)
467 {
468         struct ext4_extent_header *eh;
469         struct ext4_block bh = EXT4_BLOCK_ZERO();
470         ext4_fsblk_t buf_block = 0;
471         struct ext4_extent_path *path = *orig_path;
472         int32_t depth, ppos = 0;
473         int32_t i;
474         int ret;
475
476         eh = ext_inode_hdr(inode_ref->inode);
477         depth = ext_depth(inode_ref->inode);
478
479         if (path) {
480                 ext4_ext_drop_refs(inode_ref, path, 0);
481                 if (depth > path[0].maxdepth) {
482                         free(path);
483                         *orig_path = path = NULL;
484                 }
485         }
486         if (!path) {
487                 int32_t path_depth = depth + EXT4_EXT_PATH_INC_DEPTH;
488                 /* account possible depth increase */
489                 path = calloc(1, sizeof(struct ext4_extent_path) *
490                                      (path_depth + 1));
491                 if (!path)
492                         return ENOMEM;
493                 path[0].maxdepth = path_depth;
494         }
495         path[0].header = eh;
496         path[0].block = bh;
497
498         i = depth;
499         /* walk through the tree */
500         while (i) {
501                 ext4_ext_binsearch_idx(path + ppos, block);
502                 path[ppos].p_block = ext4_idx_pblock(path[ppos].index);
503                 path[ppos].depth = i;
504                 path[ppos].extent = NULL;
505                 buf_block = path[ppos].p_block;
506
507                 i--;
508                 ppos++;
509                 if (!path[ppos].block.lb_id ||
510                     path[ppos].block.lb_id != buf_block) {
511                         ret = read_extent_tree_block(inode_ref, buf_block, i,
512                                                      &bh, flags);
513                         if (ret != EOK) {
514                                 goto err;
515                         }
516                         if (ppos > depth) {
517                                 ext4_block_set(inode_ref->fs->bdev, &bh);
518                                 ret = EIO;
519                                 goto err;
520                         }
521
522                         eh = ext_block_hdr(&bh);
523                         path[ppos].block = bh;
524                         path[ppos].header = eh;
525                 }
526         }
527
528         path[ppos].depth = i;
529         path[ppos].extent = NULL;
530         path[ppos].index = NULL;
531
532         /* find extent */
533         ext4_ext_binsearch(path + ppos, block);
534         /* if not an empty leaf */
535         if (path[ppos].extent)
536                 path[ppos].p_block = ext4_ext_pblock(path[ppos].extent);
537
538         *orig_path = path;
539
540         ret = EOK;
541         return ret;
542
543 err:
544         ext4_ext_drop_refs(inode_ref, path, 0);
545         free(path);
546         if (orig_path)
547                 *orig_path = NULL;
548         return ret;
549 }
550
551 static void ext4_ext_init_header(struct ext4_inode_ref *inode_ref,
552                                  struct ext4_extent_header *eh, int32_t depth)
553 {
554         eh->entries_count = 0;
555         eh->max_entries_count = to_le16(ext4_ext_max_entries(inode_ref, depth));
556         eh->magic = to_le16(EXT4_EXTENT_MAGIC);
557         eh->depth = depth;
558 }
559
560 /*
561  * Be cautious, the buffer_head returned is not yet mark dirtied. */
562 static int ext4_ext_split_node(struct ext4_inode_ref *inode_ref,
563                                struct ext4_extent_path *path, int32_t at,
564                                struct ext4_extent *newext,
565                                ext4_fsblk_t *sibling, struct ext4_block *new_bh)
566 {
567         int ret;
568         ext4_fsblk_t newblock;
569         struct ext4_block bh = EXT4_BLOCK_ZERO();
570         int32_t depth = ext_depth(inode_ref->inode);
571
572         ext4_assert(sibling);
573
574         /* FIXME: currently we split at the point after the current extent. */
575         newblock = ext4_ext_new_meta_block(inode_ref, path, newext, &ret, 0);
576         if (ret)
577                 goto cleanup;
578
579         /*  For write access.# */
580         ret = ext4_block_get(inode_ref->fs->bdev, &bh, newblock);
581         if (ret != EOK)
582                 goto cleanup;
583
584         if (at == depth) {
585                 /* start copy from next extent */
586                 ptrdiff_t m = EXT_MAX_EXTENT(path[at].header) - path[at].extent;
587                 struct ext4_extent_header *neh;
588                 neh = ext_block_hdr(&bh);
589                 ext4_ext_init_header(inode_ref, neh, 0);
590                 if (m) {
591                         struct ext4_extent *ex;
592                         ex = EXT_FIRST_EXTENT(neh);
593                         memmove(ex, path[at].extent + 1,
594                                 sizeof(struct ext4_extent) * m);
595                         neh->entries_count =
596                             to_le16(to_le16(neh->entries_count) + m);
597                         path[at].header->entries_count = to_le16(
598                             to_le16(path[at].header->entries_count) - m);
599                         ret = ext4_ext_dirty(inode_ref, path + at);
600                         if (ret)
601                                 goto cleanup;
602                 }
603         } else {
604                 ptrdiff_t m = EXT_MAX_INDEX(path[at].header) - path[at].index;
605                 struct ext4_extent_header *neh;
606                 neh = ext_block_hdr(&bh);
607                 ext4_ext_init_header(inode_ref, neh, depth - at);
608                 if (m) {
609                         struct ext4_extent_index *ix;
610                         ix = EXT_FIRST_INDEX(neh);
611                         memmove(ix, path[at].index + 1,
612                                 sizeof(struct ext4_extent) * m);
613                         neh->entries_count =
614                             to_le16(to_le16(neh->entries_count) + m);
615                         path[at].header->entries_count = to_le16(
616                             to_le16(path[at].header->entries_count) - m);
617                         ret = ext4_ext_dirty(inode_ref, path + at);
618                         if (ret)
619                                 goto cleanup;
620                 }
621         }
622 cleanup:
623         if (ret) {
624                 if (bh.lb_id) {
625                         ext4_block_set(inode_ref->fs->bdev, &bh);
626                 }
627                 if (newblock)
628                         ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
629
630                 newblock = 0;
631         }
632         *sibling = newblock;
633         *new_bh = bh;
634         return ret;
635 }
636
637 static ext4_lblk_t ext4_ext_block_index(struct ext4_extent_header *eh)
638 {
639         if (eh->depth)
640                 return to_le32(EXT_FIRST_INDEX(eh)->first_block);
641
642         return to_le32(EXT_FIRST_EXTENT(eh)->first_block);
643 }
644
645 #define EXT_INODE_HDR_NEED_GROW 0x1
646
647 struct ext_split_trans {
648         ext4_fsblk_t ptr;
649         struct ext4_extent_path path;
650         int switch_to;
651 };
652
653 static int ext4_ext_insert_index(struct ext4_inode_ref *inode_ref,
654                                  struct ext4_extent_path *path, int32_t at,
655                                  struct ext4_extent *newext,
656                                  ext4_lblk_t insert_index,
657                                  ext4_fsblk_t insert_block,
658                                  struct ext_split_trans *spt)
659 {
660         struct ext4_extent_index *ix;
661         struct ext4_extent_path *curp = path + at;
662         struct ext4_block bh = EXT4_BLOCK_ZERO();
663         int32_t len;
664         int err;
665         struct ext4_extent_header *eh;
666
667         if (curp->index && insert_index == to_le32(curp->index->first_block))
668                 return EIO;
669
670         if (to_le16(curp->header->entries_count) ==
671             to_le16(curp->header->max_entries_count)) {
672                 if (at) {
673                         struct ext4_extent_header *neh;
674                         err = ext4_ext_split_node(inode_ref, path, at, newext,
675                                                   &spt->ptr, &bh);
676                         if (err != EOK)
677                                 goto out;
678
679                         neh = ext_block_hdr(&bh);
680                         if (insert_index > to_le32(curp->index->first_block)) {
681                                 /* Make decision which node should be used to
682                                  * insert the index.*/
683                                 if (to_le16(neh->entries_count) >
684                                     to_le16(curp->header->entries_count)) {
685                                         eh = curp->header;
686                                         /* insert after */
687                                         ix = EXT_LAST_INDEX(eh) + 1;
688                                 } else {
689                                         eh = neh;
690                                         ix = EXT_FIRST_INDEX(eh);
691                                 }
692                         } else {
693                                 eh = curp->header;
694                                 /* insert before */
695                                 ix = EXT_LAST_INDEX(eh);
696                         }
697                 } else {
698                         err = EXT_INODE_HDR_NEED_GROW;
699                         goto out;
700                 }
701         } else {
702                 eh = curp->header;
703                 if (curp->index == NULL) {
704                         ix = EXT_FIRST_INDEX(eh);
705                         curp->index = ix;
706                 } else if (insert_index > to_le32(curp->index->first_block)) {
707                         /* insert after */
708                         ix = curp->index + 1;
709                 } else {
710                         /* insert before */
711                         ix = curp->index;
712                 }
713         }
714
715         len = EXT_LAST_INDEX(eh) - ix + 1;
716         ext4_assert(len >= 0);
717         if (len > 0)
718                 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_index));
719
720         if (ix > EXT_MAX_INDEX(eh)) {
721                 err = EIO;
722                 goto out;
723         }
724
725         ix->first_block = to_le32(insert_index);
726         ext4_idx_store_pblock(ix, insert_block);
727         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
728
729         if (ix > EXT_LAST_INDEX(eh)) {
730                 err = EIO;
731                 goto out;
732         }
733
734         if (eh == curp->header)
735                 err = ext4_ext_dirty(inode_ref, curp);
736         else
737                 err = EOK;
738
739 out:
740         if (err != EOK) {
741                 if (bh.lb_id)
742                         ext4_block_set(inode_ref->fs->bdev, &bh);
743
744                 spt->ptr = 0;
745         } else if (bh.lb_id) {
746                 /* If we got a sibling leaf. */
747                 bh.dirty = true;
748
749                 spt->path.p_block = ext4_idx_pblock(ix);
750                 spt->path.depth = to_le16(eh->depth);
751                 spt->path.maxdepth = 0;
752                 spt->path.extent = NULL;
753                 spt->path.index = ix;
754                 spt->path.header = eh;
755                 spt->path.block = bh;
756
757                 /*
758                  * If newext->ee_block can be included into the
759                  * right sub-tree.
760                  */
761                 if (to_le32(newext->first_block) >=
762                     ext4_ext_block_index(ext_block_hdr(&bh)))
763                         spt->switch_to = 1;
764                 else {
765                         curp->index = ix;
766                         curp->p_block = ext4_idx_pblock(ix);
767                 }
768
769         } else {
770                 spt->ptr = 0;
771                 curp->index = ix;
772                 curp->p_block = ext4_idx_pblock(ix);
773         }
774         return err;
775 }
776
777 /*
778  * ext4_ext_correct_indexes:
779  * if leaf gets modified and modified extent is first in the leaf,
780  * then we have to correct all indexes above.
781  */
782 static int ext4_ext_correct_indexes(struct ext4_inode_ref *inode_ref,
783                                     struct ext4_extent_path *path)
784 {
785         struct ext4_extent_header *eh;
786         int32_t depth = ext_depth(inode_ref->inode);
787         struct ext4_extent *ex;
788         uint32_t border;
789         int32_t k;
790         int err = EOK;
791
792         eh = path[depth].header;
793         ex = path[depth].extent;
794
795         if (ex == NULL || eh == NULL) {
796                 return EIO;
797         }
798
799         if (depth == 0) {
800                 /* there is no tree at all */
801                 return EOK;
802         }
803
804         if (ex != EXT_FIRST_EXTENT(eh)) {
805                 /* we correct tree if first leaf got modified only */
806                 return EOK;
807         }
808
809         /*
810          * TODO: we need correction if border is smaller than current one
811          */
812         k = depth - 1;
813         border = path[depth].extent->first_block;
814         path[k].index->first_block = border;
815         err = ext4_ext_dirty(inode_ref, path + k);
816         if (err != EOK)
817                 return err;
818
819         while (k--) {
820                 /* change all left-side indexes */
821                 if (path[k + 1].index != EXT_FIRST_INDEX(path[k + 1].header))
822                         break;
823                 path[k].index->first_block = border;
824                 err = ext4_ext_dirty(inode_ref, path + k);
825                 if (err != EOK)
826                         break;
827         }
828
829         return err;
830 }
831
832 static bool ext4_ext_can_prepend(struct ext4_extent *ex1,
833                                  struct ext4_extent *ex2)
834 {
835         if (ext4_ext_pblock(ex2) + ext4_ext_get_actual_len(ex2) !=
836             ext4_ext_pblock(ex1))
837                 return false;
838
839         if (ext4_ext_is_unwritten(ex1)) {
840                 if (ext4_ext_get_actual_len(ex1) +
841                         ext4_ext_get_actual_len(ex2) >
842                     EXT_UNWRITTEN_MAX_LEN)
843                         return false;
844         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
845                    EXT_INIT_MAX_LEN)
846                 return false;
847
848         if (to_le32(ex2->first_block) + ext4_ext_get_actual_len(ex2) !=
849             to_le32(ex1->first_block))
850                 return false;
851
852         return true;
853 }
854
855 static bool ext4_ext_can_append(struct ext4_extent *ex1,
856                                 struct ext4_extent *ex2)
857 {
858         if (ext4_ext_pblock(ex1) + ext4_ext_get_actual_len(ex1) !=
859             ext4_ext_pblock(ex2))
860                 return false;
861
862         if (ext4_ext_is_unwritten(ex1)) {
863                 if (ext4_ext_get_actual_len(ex1) +
864                         ext4_ext_get_actual_len(ex2) >
865                     EXT_UNWRITTEN_MAX_LEN)
866                         return false;
867         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
868                    EXT_INIT_MAX_LEN)
869                 return false;
870
871         if (to_le32(ex1->first_block) + ext4_ext_get_actual_len(ex1) !=
872             to_le32(ex2->first_block))
873                 return false;
874
875         return true;
876 }
877
878 static int ext4_ext_insert_leaf(struct ext4_inode_ref *inode_ref,
879                                 struct ext4_extent_path *path, int32_t at,
880                                 struct ext4_extent *newext,
881                                 struct ext_split_trans *spt, uint32_t flags)
882 {
883         struct ext4_extent_path *curp = path + at;
884         struct ext4_extent *ex = curp->extent;
885         struct ext4_block bh = EXT4_BLOCK_ZERO();
886         int32_t len;
887         int err = EOK;
888         int unwritten;
889         struct ext4_extent_header *eh = NULL;
890
891         if (curp->extent &&
892             to_le32(newext->first_block) == to_le32(curp->extent->first_block))
893                 return EIO;
894
895         if (!(flags & EXT4_EXT_NO_COMBINE)) {
896                 if (curp->extent && ext4_ext_can_append(curp->extent, newext)) {
897                         unwritten = ext4_ext_is_unwritten(curp->extent);
898                         curp->extent->block_count =
899                             to_le16(ext4_ext_get_actual_len(curp->extent) +
900                                     ext4_ext_get_actual_len(newext));
901                         if (unwritten)
902                                 ext4_ext_mark_unwritten(curp->extent);
903                         err = ext4_ext_dirty(inode_ref, curp);
904                         goto out;
905                 }
906
907                 if (curp->extent &&
908                     ext4_ext_can_prepend(curp->extent, newext)) {
909                         unwritten = ext4_ext_is_unwritten(curp->extent);
910                         curp->extent->first_block = newext->first_block;
911                         curp->extent->block_count =
912                             to_le16(ext4_ext_get_actual_len(curp->extent) +
913                                     ext4_ext_get_actual_len(newext));
914                         if (unwritten)
915                                 ext4_ext_mark_unwritten(curp->extent);
916                         err = ext4_ext_dirty(inode_ref, curp);
917                         goto out;
918                 }
919         }
920
921         if (to_le16(curp->header->entries_count) ==
922             to_le16(curp->header->max_entries_count)) {
923                 if (at) {
924                         struct ext4_extent_header *neh;
925                         err = ext4_ext_split_node(inode_ref, path, at, newext,
926                                                   &spt->ptr, &bh);
927                         if (err != EOK)
928                                 goto out;
929
930                         neh = ext_block_hdr(&bh);
931                         if (to_le32(newext->first_block) >
932                             to_le32(curp->extent->first_block)) {
933                                 if (to_le16(neh->entries_count) >
934                                     to_le16(curp->header->entries_count)) {
935                                         eh = curp->header;
936                                         /* insert after */
937                                         ex = EXT_LAST_EXTENT(eh) + 1;
938                                 } else {
939                                         eh = neh;
940                                         ex = EXT_FIRST_EXTENT(eh);
941                                 }
942                         } else {
943                                 eh = curp->header;
944                                 /* insert before */
945                                 ex = EXT_LAST_EXTENT(eh);
946                         }
947                 } else {
948                         err = EXT_INODE_HDR_NEED_GROW;
949                         goto out;
950                 }
951         } else {
952                 eh = curp->header;
953                 if (curp->extent == NULL) {
954                         ex = EXT_FIRST_EXTENT(eh);
955                         curp->extent = ex;
956                 } else if (to_le32(newext->first_block) >
957                            to_le32(curp->extent->first_block)) {
958                         /* insert after */
959                         ex = curp->extent + 1;
960                 } else {
961                         /* insert before */
962                         ex = curp->extent;
963                 }
964         }
965
966         len = EXT_LAST_EXTENT(eh) - ex + 1;
967         ext4_assert(len >= 0);
968         if (len > 0)
969                 memmove(ex + 1, ex, len * sizeof(struct ext4_extent));
970
971         if (ex > EXT_MAX_EXTENT(eh)) {
972                 err = EIO;
973                 goto out;
974         }
975
976         ex->first_block = newext->first_block;
977         ex->block_count = newext->block_count;
978         ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
979         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
980
981         if (ex > EXT_LAST_EXTENT(eh)) {
982                 err = EIO;
983                 goto out;
984         }
985
986         if (eh == curp->header) {
987                 err = ext4_ext_correct_indexes(inode_ref, path);
988                 if (err != EOK)
989                         goto out;
990                 err = ext4_ext_dirty(inode_ref, curp);
991         } else
992                 err = EOK;
993
994 out:
995         if (err != EOK) {
996                 if (bh.lb_id)
997                         ext4_block_set(inode_ref->fs->bdev, &bh);
998
999                 spt->ptr = 0;
1000         } else if (bh.lb_id) {
1001                 /* If we got a sibling leaf. */
1002                 bh.dirty = true;
1003
1004                 spt->path.p_block = ext4_ext_pblock(ex);
1005                 spt->path.depth = to_le16(eh->depth);
1006                 spt->path.maxdepth = 0;
1007                 spt->path.extent = ex;
1008                 spt->path.index = NULL;
1009                 spt->path.header = eh;
1010                 spt->path.block = bh;
1011
1012                 /*
1013                  * If newext->ee_block can be included into the
1014                  * right sub-tree.
1015                  */
1016                 if (to_le32(newext->first_block) >=
1017                     ext4_ext_block_index(ext_block_hdr(&bh)))
1018                         spt->switch_to = 1;
1019                 else {
1020                         curp->extent = ex;
1021                         curp->p_block = ext4_ext_pblock(ex);
1022                 }
1023
1024         } else {
1025                 spt->ptr = 0;
1026                 curp->extent = ex;
1027                 curp->p_block = ext4_ext_pblock(ex);
1028         }
1029
1030         return err;
1031 }
1032
1033 /*
1034  * ext4_ext_grow_indepth:
1035  * implements tree growing procedure:
1036  * - allocates new block
1037  * - moves top-level data (index block or leaf) into the new block
1038  * - initializes new top-level, creating index that points to the
1039  *   just created block
1040  */
1041 static int ext4_ext_grow_indepth(struct ext4_inode_ref *inode_ref,
1042                                  uint32_t flags)
1043 {
1044         struct ext4_extent_header *neh;
1045         struct ext4_block bh = EXT4_BLOCK_ZERO();
1046         ext4_fsblk_t newblock, goal = 0;
1047         int err = EOK;
1048
1049         /* Try to prepend new index to old one */
1050         if (ext_depth(inode_ref->inode))
1051                 goal = ext4_idx_pblock(
1052                     EXT_FIRST_INDEX(ext_inode_hdr(inode_ref->inode)));
1053         else
1054                 goal = 0;
1055
1056         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, &err);
1057         if (newblock == 0)
1058                 return err;
1059
1060         /* # */
1061         err = ext4_block_get(inode_ref->fs->bdev, &bh, newblock);
1062         if (err != EOK) {
1063                 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
1064                 return err;
1065         }
1066
1067         /* move top-level index/leaf into new block */
1068         memmove(bh.data, inode_ref->inode->blocks,
1069                 sizeof(inode_ref->inode->blocks));
1070
1071         /* set size of new block */
1072         neh = ext_block_hdr(&bh);
1073         /* old root could have indexes or leaves
1074          * so calculate e_max right way */
1075         if (ext_depth(inode_ref->inode))
1076                 neh->max_entries_count =
1077                     to_le16(ext4_ext_space_block_idx(inode_ref));
1078         else
1079                 neh->max_entries_count =
1080                     to_le16(ext4_ext_space_block(inode_ref));
1081
1082         neh->magic = to_le16(EXT4_EXTENT_MAGIC);
1083         ext4_extent_block_csum_set(inode_ref, neh);
1084
1085         /* Update top-level index: num,max,pointer */
1086         neh = ext_inode_hdr(inode_ref->inode);
1087         neh->entries_count = to_le16(1);
1088         ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1089         if (neh->depth == 0) {
1090                 /* Root extent block becomes index block */
1091                 neh->max_entries_count =
1092                     to_le16(ext4_ext_space_root_idx(inode_ref));
1093                 EXT_FIRST_INDEX(neh)
1094                     ->first_block = EXT_FIRST_EXTENT(neh)->first_block;
1095         }
1096         neh->depth = to_le16(to_le16(neh->depth) + 1);
1097
1098         bh.dirty = true;
1099         inode_ref->dirty = true;
1100         ext4_block_set(inode_ref->fs->bdev, &bh);
1101
1102         return err;
1103 }
1104
1105 __unused static void print_path(struct ext4_extent_path *path)
1106 {
1107         int32_t i = path->depth;
1108         while (i >= 0) {
1109
1110                 ptrdiff_t a =
1111                     (path->extent)
1112                         ? (path->extent - EXT_FIRST_EXTENT(path->header))
1113                         : 0;
1114                 ptrdiff_t b =
1115                     (path->index)
1116                         ? (path->index - EXT_FIRST_INDEX(path->header))
1117                         : 0;
1118
1119                 (void)a;
1120                 (void)b;
1121                 ext4_dbg(DEBUG_EXTENT,
1122                          "depth %" PRId32 ", p_block: %" PRIu64 ","
1123                          "p_ext offset: %td, p_idx offset: %td\n",
1124                          i, path->p_block, a, b);
1125                 i--;
1126                 path++;
1127         }
1128 }
1129
1130 static void ext4_ext_replace_path(struct ext4_inode_ref *inode_ref,
1131                                   struct ext4_extent_path *path,
1132                                   struct ext_split_trans *spt, int32_t depth,
1133                                   int32_t level)
1134 {
1135         int32_t i = depth - level;
1136
1137         ext4_ext_drop_refs(inode_ref, path + i, 1);
1138         path[i] = spt->path;
1139 }
1140
1141 static int ext4_ext_insert_extent(struct ext4_inode_ref *inode_ref,
1142                                   struct ext4_extent_path **ppath,
1143                                   struct ext4_extent *newext, uint32_t flags)
1144 {
1145         int32_t i, depth, level;
1146         int ret = EOK;
1147         ext4_fsblk_t ptr = 0;
1148         struct ext4_extent_path *path = *ppath;
1149         struct ext_split_trans *spt = NULL;
1150         struct ext_split_trans newblock;
1151
1152         memset(&newblock, 0, sizeof(newblock));
1153
1154         depth = ext_depth(inode_ref->inode);
1155         for (i = depth, level = 0; i >= 0; i--, level++)
1156                 if (EXT_HAS_FREE_INDEX(path + i))
1157                         break;
1158
1159         if (level) {
1160                 spt = calloc(1, sizeof(struct ext_split_trans) * (level));
1161                 if (!spt) {
1162                         ret = ENOMEM;
1163                         goto out;
1164                 }
1165         }
1166         i = 0;
1167 again:
1168         depth = ext_depth(inode_ref->inode);
1169
1170         do {
1171                 if (!i) {
1172                         ret = ext4_ext_insert_leaf(inode_ref, path, depth - i,
1173                                                    newext, &newblock, flags);
1174                 } else {
1175                         ret = ext4_ext_insert_index(
1176                             inode_ref, path, depth - i, newext,
1177                             ext4_ext_block_index(
1178                                 ext_block_hdr(&spt[i - 1].path.block)),
1179                             spt[i - 1].ptr, &newblock);
1180                 }
1181                 ptr = newblock.ptr;
1182
1183                 if (ret && ret != EXT_INODE_HDR_NEED_GROW)
1184                         goto out;
1185                 else if (spt && ptr && !ret) {
1186                         /* Prepare for the next iteration after splitting. */
1187                         spt[i] = newblock;
1188                 }
1189
1190                 i++;
1191         } while (ptr != 0 && i <= depth);
1192
1193         if (ret == EXT_INODE_HDR_NEED_GROW) {
1194                 ret = ext4_ext_grow_indepth(inode_ref, 0);
1195                 if (ret)
1196                         goto out;
1197                 ret = ext4_find_extent(inode_ref, to_le32(newext->first_block),
1198                                        ppath, 0);
1199                 if (ret)
1200                         goto out;
1201                 i = depth;
1202                 path = *ppath;
1203                 goto again;
1204         }
1205 out:
1206         if (ret) {
1207                 if (path)
1208                         ext4_ext_drop_refs(inode_ref, path, 0);
1209
1210                 while (--level >= 0 && spt) {
1211                         if (spt[level].ptr) {
1212                                 ext4_ext_free_blocks(inode_ref, spt[level].ptr,
1213                                                      1, 0);
1214                                 ext4_ext_drop_refs(inode_ref, &spt[level].path,
1215                                                    1);
1216                         }
1217                 }
1218         } else {
1219                 while (--level >= 0 && spt) {
1220                         if (spt[level].switch_to)
1221                                 ext4_ext_replace_path(inode_ref, path, spt,
1222                                                       depth, level);
1223                         else if (spt[level].ptr)
1224                                 ext4_ext_drop_refs(inode_ref, &spt[level].path,
1225                                                    1);
1226                 }
1227         }
1228         if (spt)
1229                 free(spt);
1230
1231         return ret;
1232 }
1233
1234 static void ext4_ext_remove_blocks(struct ext4_inode_ref *inode_ref,
1235                                    struct ext4_extent *ex, ext4_lblk_t from,
1236                                    ext4_lblk_t to)
1237 {
1238         ext4_lblk_t len = to - from + 1;
1239         ext4_lblk_t num;
1240         ext4_fsblk_t start;
1241         num = from - to_le32(ex->first_block);
1242         start = ext4_ext_pblock(ex) + num;
1243         ext4_dbg(DEBUG_EXTENT,
1244                  "Freeing %" PRIu32 " at %" PRIu64 ", %" PRIu32 "\n", from,
1245                  start, len);
1246
1247         ext4_ext_free_blocks(inode_ref, start, len, 0);
1248 }
1249
1250 static int ext4_ext_remove_idx(struct ext4_inode_ref *inode_ref,
1251                                struct ext4_extent_path *path, int32_t depth)
1252 {
1253         int err = EOK;
1254         int32_t i = depth;
1255         ext4_fsblk_t leaf;
1256
1257         /* free index block */
1258         leaf = ext4_idx_pblock(path[i].index);
1259
1260         if (path[i].index != EXT_LAST_INDEX(path[i].header)) {
1261                 ptrdiff_t len = EXT_LAST_INDEX(path[i].header) - path[i].index;
1262                 memmove(path[i].index, path[i].index + 1,
1263                         len * sizeof(struct ext4_extent_index));
1264         }
1265
1266         path[i].header->entries_count =
1267             to_le16(to_le16(path[i].header->entries_count) - 1);
1268         err = ext4_ext_dirty(inode_ref, path + i);
1269         if (err != EOK)
1270                 return err;
1271
1272         ext4_dbg(DEBUG_EXTENT, "IDX: Freeing %" PRIu32 " at %" PRIu64 ", %d\n",
1273                  to_le32(path[i].index->first_block), leaf, 1);
1274         ext4_ext_free_blocks(inode_ref, leaf, 1, 0);
1275
1276         while (i > 0) {
1277                 if (path[i].index != EXT_FIRST_INDEX(path[i].header))
1278                         break;
1279
1280                 path[i - 1].index->first_block = path[i].index->first_block;
1281                 err = ext4_ext_dirty(inode_ref, path + i - 1);
1282                 if (err != EOK)
1283                         break;
1284
1285                 i--;
1286         }
1287         return err;
1288 }
1289
1290 static int ext4_ext_remove_leaf(struct ext4_inode_ref *inode_ref,
1291                                 struct ext4_extent_path *path, ext4_lblk_t from,
1292                                 ext4_lblk_t to)
1293 {
1294
1295         int32_t depth = ext_depth(inode_ref->inode);
1296         struct ext4_extent *ex = path[depth].extent;
1297         struct ext4_extent *start_ex, *ex2 = NULL;
1298         struct ext4_extent_header *eh = path[depth].header;
1299         int32_t len;
1300         int err = EOK;
1301         uint16_t new_entries;
1302
1303         start_ex = ex;
1304         new_entries = to_le16(eh->entries_count);
1305         while (ex <= EXT_LAST_EXTENT(path[depth].header) &&
1306                to_le32(ex->first_block) <= to) {
1307                 int32_t new_len = 0;
1308                 int unwritten;
1309                 ext4_fsblk_t start, new_start;
1310                 new_start = start = to_le32(ex->first_block);
1311                 len = ext4_ext_get_actual_len(ex);
1312                 if (start < from) {
1313                         start = from;
1314                         len -= from - start;
1315                         new_len = from - start;
1316                         start_ex++;
1317                 }
1318                 if (start + len - 1 > to) {
1319                         len -= start + len - 1 - to;
1320                         new_len = start + len - 1 - to;
1321                         new_start += to + 1;
1322                         ex2 = ex;
1323                 }
1324
1325                 ext4_ext_remove_blocks(inode_ref, ex, start, start + len - 1);
1326                 ex->first_block = to_le32(new_start);
1327                 if (!new_len)
1328                         new_entries--;
1329                 else {
1330                         unwritten = ext4_ext_is_unwritten(ex);
1331                         ex->block_count = to_le16(new_len);
1332                         if (unwritten)
1333                                 ext4_ext_mark_unwritten(ex);
1334                 }
1335
1336                 ex += 1;
1337         }
1338
1339         if (ex2 == NULL)
1340                 ex2 = ex;
1341
1342         if (ex2 <= EXT_LAST_EXTENT(eh))
1343                 memmove(start_ex, ex2, EXT_LAST_EXTENT(eh) - ex2 + 1);
1344
1345         eh->entries_count = to_le16(new_entries);
1346         ext4_ext_dirty(inode_ref, path + depth);
1347         if (path[depth].extent == EXT_FIRST_EXTENT(eh) && eh->entries_count)
1348                 err = ext4_ext_correct_indexes(inode_ref, path);
1349
1350         /* if this leaf is free, then we should
1351          * remove it from index block above */
1352         if (err == EOK && eh->entries_count == 0 && path[depth].block.lb_id)
1353                 err = ext4_ext_remove_idx(inode_ref, path, depth - 1);
1354
1355         return err;
1356 }
1357
1358 static int ext4_ext_more_to_rm(struct ext4_extent_path *path, ext4_lblk_t to)
1359 {
1360         if (!to_le16(path->header->entries_count))
1361                 return 0;
1362
1363         if (path->index > EXT_LAST_INDEX(path->header))
1364                 return 0;
1365
1366         if (to_le32(path->index->first_block) > to)
1367                 return 0;
1368
1369         return 1;
1370 }
1371
1372 int ext4_ext_remove_space(struct ext4_inode_ref *inode_ref, ext4_lblk_t from,
1373                           ext4_lblk_t to)
1374 {
1375         struct ext4_extent_path *path = NULL;
1376         int ret = EOK;
1377         int32_t depth = ext_depth(inode_ref->inode);
1378         int32_t i;
1379
1380         ret = ext4_find_extent(inode_ref, from, &path, 0);
1381         if (ret)
1382                 goto out;
1383
1384         if (!path[depth].extent ||
1385             !IN_RANGE(from, to_le32(path[depth].extent->first_block),
1386                       ext4_ext_get_actual_len(path[depth].extent))) {
1387                 ret = EOK;
1388                 goto out;
1389         }
1390
1391         i = depth;
1392         while (i >= 0) {
1393                 if (i == depth) {
1394                         struct ext4_extent_header *eh;
1395                         struct ext4_extent *first_ex, *last_ex;
1396                         ext4_lblk_t leaf_from, leaf_to;
1397                         eh = path[i].header;
1398                         ext4_assert(to_le16(eh->entries_count) > 0);
1399                         first_ex = EXT_FIRST_EXTENT(eh);
1400                         last_ex = EXT_LAST_EXTENT(eh);
1401                         leaf_from = to_le32(first_ex->first_block);
1402                         leaf_to = to_le32(last_ex->first_block) +
1403                                   ext4_ext_get_actual_len(last_ex) - 1;
1404                         if (leaf_from < from)
1405                                 leaf_from = from;
1406
1407                         if (leaf_to > to)
1408                                 leaf_to = to;
1409
1410                         ext4_ext_remove_leaf(inode_ref, path, leaf_from,
1411                                              leaf_to);
1412                         ext4_ext_drop_refs(inode_ref, path + i, 0);
1413                         i--;
1414                         continue;
1415                 } else {
1416                         struct ext4_extent_header *eh;
1417                         eh = path[i].header;
1418                         if (ext4_ext_more_to_rm(path + i, to)) {
1419                                 struct ext4_block bh = EXT4_BLOCK_ZERO();
1420                                 if (path[i + 1].block.lb_id)
1421                                         ext4_ext_drop_refs(inode_ref,
1422                                                            path + i + 1, 0);
1423
1424                                 ret = read_extent_tree_block(
1425                                     inode_ref, ext4_idx_pblock(path[i].index),
1426                                     depth - i - 1, &bh, 0);
1427                                 if (ret)
1428                                         goto out;
1429
1430                                 path[i].p_block =
1431                                     ext4_idx_pblock(path[i].index);
1432                                 path[i + 1].block = bh;
1433                                 path[i + 1].header = ext_block_hdr(&bh);
1434                                 path[i + 1].depth = depth - i - 1;
1435                                 if (i + 1 == depth)
1436                                         path[i + 1].extent = EXT_FIRST_EXTENT(
1437                                             path[i + 1].header);
1438                                 else
1439                                         path[i + 1].index =
1440                                             EXT_FIRST_INDEX(path[i + 1].header);
1441
1442                                 i++;
1443                         } else {
1444                                 if (!eh->entries_count && i > 0) {
1445
1446                                         ret = ext4_ext_remove_idx(inode_ref,
1447                                                                   path, i - 1);
1448                                 }
1449                                 if (i) {
1450                                         ext4_block_set(inode_ref->fs->bdev,
1451                                                        &path[i].block);
1452                                 }
1453                                 i--;
1454                         }
1455                 }
1456         }
1457
1458         /* TODO: flexible tree reduction should be here */
1459         if (path->header->entries_count == 0) {
1460                 /*
1461                  * truncate to zero freed all the tree,
1462                  * so we need to correct eh_depth
1463                  */
1464                 ext_inode_hdr(inode_ref->inode)->depth = 0;
1465                 ext_inode_hdr(inode_ref->inode)->max_entries_count =
1466                     to_le16(ext4_ext_space_root(inode_ref));
1467                 ret = ext4_ext_dirty(inode_ref, path);
1468         }
1469
1470 out:
1471         ext4_ext_drop_refs(inode_ref, path, 0);
1472         free(path);
1473         path = NULL;
1474         return ret;
1475 }
1476
1477 static int ext4_ext_split_extent_at(struct ext4_inode_ref *inode_ref,
1478                                     struct ext4_extent_path **ppath,
1479                                     ext4_lblk_t split, uint32_t split_flag)
1480 {
1481         struct ext4_extent *ex, newex;
1482         ext4_fsblk_t newblock;
1483         ext4_lblk_t ee_block;
1484         int32_t ee_len;
1485         int32_t depth = ext_depth(inode_ref->inode);
1486         int err = EOK;
1487
1488         ex = (*ppath)[depth].extent;
1489         ee_block = to_le32(ex->first_block);
1490         ee_len = ext4_ext_get_actual_len(ex);
1491         newblock = split - ee_block + ext4_ext_pblock(ex);
1492
1493         if (split == ee_block) {
1494                 /*
1495                  * case b: block @split is the block that the extent begins with
1496                  * then we just change the state of the extent, and splitting
1497                  * is not needed.
1498                  */
1499                 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1500                         ext4_ext_mark_unwritten(ex);
1501                 else
1502                         ext4_ext_mark_initialized(ex);
1503
1504                 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1505                 goto out;
1506         }
1507
1508         ex->block_count = to_le16(split - ee_block);
1509         if (split_flag & EXT4_EXT_MARK_UNWRIT1)
1510                 ext4_ext_mark_unwritten(ex);
1511
1512         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1513         if (err != EOK)
1514                 goto out;
1515
1516         newex.first_block = to_le32(split);
1517         newex.block_count = to_le16(ee_len - (split - ee_block));
1518         ext4_ext_store_pblock(&newex, newblock);
1519         if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1520                 ext4_ext_mark_unwritten(&newex);
1521         err = ext4_ext_insert_extent(inode_ref, ppath, &newex,
1522                                      EXT4_EXT_NO_COMBINE);
1523         if (err != EOK)
1524                 goto restore_extent_len;
1525
1526 out:
1527         return err;
1528 restore_extent_len:
1529         ex->block_count = to_le16(ee_len);
1530         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1531         return err;
1532 }
1533
1534 static int ext4_ext_convert_to_initialized(struct ext4_inode_ref *inode_ref,
1535                                            struct ext4_extent_path **ppath,
1536                                            ext4_lblk_t split, uint32_t blocks)
1537 {
1538         int32_t depth = ext_depth(inode_ref->inode), err = EOK;
1539         struct ext4_extent *ex = (*ppath)[depth].extent;
1540
1541         ext4_assert(to_le32(ex->first_block) <= split);
1542
1543         if (split + blocks ==
1544             to_le32(ex->first_block) + ext4_ext_get_actual_len(ex)) {
1545                 /* split and initialize right part */
1546                 err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1547                                                EXT4_EXT_MARK_UNWRIT1);
1548         } else if (to_le32(ex->first_block) == split) {
1549                 /* split and initialize left part */
1550                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1551                                                EXT4_EXT_MARK_UNWRIT2);
1552         } else {
1553                 /* split 1 extent to 3 and initialize the 2nd */
1554                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1555                                                EXT4_EXT_MARK_UNWRIT1 |
1556                                                    EXT4_EXT_MARK_UNWRIT2);
1557                 if (!err) {
1558                         err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1559                                                        EXT4_EXT_MARK_UNWRIT1);
1560                 }
1561         }
1562
1563         return err;
1564 }
1565
1566 int ext4_ext_tree_init(struct ext4_inode_ref *inode_ref)
1567 {
1568         struct ext4_extent_header *eh;
1569
1570         eh = ext_inode_hdr(inode_ref->inode);
1571         eh->depth = 0;
1572         eh->entries_count = 0;
1573         eh->magic = to_le16(EXT4_EXTENT_MAGIC);
1574         eh->max_entries_count = to_le16(ext4_ext_space_root(inode_ref));
1575         inode_ref->dirty = true;
1576         return EOK;
1577 }
1578
1579 /*
1580  * ext4_ext_next_allocated_block:
1581  * returns allocated block in subsequent extent or EXT_MAX_BLOCKS.
1582  * NOTE: it considers block number from index entry as
1583  * allocated block. Thus, index entries have to be consistent
1584  * with leaves.
1585  */
1586 #define EXT_MAX_BLOCKS (ext4_lblk_t) - 1
1587
1588 static ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_extent_path *path)
1589 {
1590         int32_t depth;
1591
1592         depth = path->depth;
1593
1594         if (depth == 0 && path->extent == NULL)
1595                 return EXT_MAX_BLOCKS;
1596
1597         while (depth >= 0) {
1598                 if (depth == path->depth) {
1599                         /* leaf */
1600                         if (path[depth].extent &&
1601                             path[depth].extent !=
1602                                 EXT_LAST_EXTENT(path[depth].header))
1603                                 return to_le32(
1604                                     path[depth].extent[1].first_block);
1605                 } else {
1606                         /* index */
1607                         if (path[depth].index !=
1608                             EXT_LAST_INDEX(path[depth].header))
1609                                 return to_le32(
1610                                     path[depth].index[1].first_block);
1611                 }
1612                 depth--;
1613         }
1614
1615         return EXT_MAX_BLOCKS;
1616 }
1617
1618 static int ext4_ext_zero_unwritten_range(struct ext4_inode_ref *inode_ref,
1619                                          ext4_fsblk_t block,
1620                                          uint32_t blocks_count)
1621 {
1622         int err = EOK;
1623         uint32_t i;
1624         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
1625         for (i = 0; i < blocks_count; i++) {
1626                 uint32_t block_u32 = (uint32_t)block + (uint32_t)i;
1627                 struct ext4_block bh = EXT4_BLOCK_ZERO();
1628                 err = ext4_block_get(inode_ref->fs->bdev, &bh, block_u32);
1629                 if (err != EOK)
1630                         break;
1631
1632                 memset(bh.data, 0, block_size);
1633                 bh.dirty = true;
1634                 err = ext4_block_set(inode_ref->fs->bdev, &bh);
1635                 if (err != EOK)
1636                         break;
1637         }
1638         return err;
1639 }
1640
1641 int ext4_ext_get_blocks(struct ext4_inode_ref *inode_ref, ext4_fsblk_t iblock,
1642                         uint32_t max_blocks, ext4_fsblk_t *result, bool create,
1643                         uint32_t *blocks_count)
1644 {
1645         struct ext4_extent_path *path = NULL;
1646         struct ext4_extent newex, *ex;
1647         ext4_fsblk_t goal;
1648         int err = EOK;
1649         int32_t depth;
1650         uint32_t allocated = 0;
1651         ext4_fsblk_t next, newblock;
1652
1653         if (result)
1654                 *result = 0;
1655
1656         if (blocks_count)
1657                 *blocks_count = 0;
1658
1659         /* find extent for this block */
1660         err = ext4_find_extent(inode_ref, iblock, &path, 0);
1661         if (err != EOK) {
1662                 path = NULL;
1663                 goto out2;
1664         }
1665
1666         depth = ext_depth(inode_ref->inode);
1667
1668         /*
1669          * consistent leaf must not be empty
1670          * this situations is possible, though, _during_ tree modification
1671          * this is why assert can't be put in ext4_ext_find_extent()
1672          */
1673         if ((ex = path[depth].extent)) {
1674                 ext4_lblk_t ee_block = to_le32(ex->first_block);
1675                 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
1676                 uint16_t ee_len = ext4_ext_get_actual_len(ex);
1677                 /* if found exent covers block, simple return it */
1678                 if (IN_RANGE(iblock, ee_block, ee_len)) {
1679                         /* number of remain blocks in the extent */
1680                         allocated = ee_len - (iblock - ee_block);
1681
1682                         if (ext4_ext_is_unwritten(ex)) {
1683                                 if (create) {
1684                                         uint32_t zero_range;
1685                                         zero_range = allocated;
1686                                         if (zero_range > max_blocks)
1687                                                 zero_range = max_blocks;
1688
1689                                         newblock = iblock - ee_block + ee_start;
1690                                         err = ext4_ext_zero_unwritten_range(
1691                                             inode_ref, newblock, zero_range);
1692                                         if (err != EOK)
1693                                                 goto out2;
1694
1695                                         err = ext4_ext_convert_to_initialized(
1696                                             inode_ref, &path, iblock,
1697                                             zero_range);
1698                                         if (err != EOK)
1699                                                 goto out2;
1700
1701                                 } else {
1702                                         newblock = 0;
1703                                 }
1704                         } else {
1705                                 newblock = iblock - ee_block + ee_start;
1706                         }
1707                         goto out;
1708                 }
1709         }
1710
1711         /*
1712          * requested block isn't allocated yet
1713          * we couldn't try to create block if create flag is zero
1714          */
1715         if (!create) {
1716                 goto out2;
1717         }
1718
1719         /* find next allocated block so that we know how many
1720          * blocks we can allocate without ovelapping next extent */
1721         next = ext4_ext_next_allocated_block(path);
1722         allocated = next - iblock;
1723         if (allocated > max_blocks)
1724                 allocated = max_blocks;
1725
1726         /* allocate new block */
1727         goal = ext4_ext_find_goal(path, iblock);
1728         newblock = ext4_new_meta_blocks(inode_ref, goal, 0, &allocated, &err);
1729         if (!newblock)
1730                 goto out2;
1731
1732         /* try to insert new extent into found leaf and return */
1733         newex.first_block = to_le32(iblock);
1734         ext4_ext_store_pblock(&newex, newblock);
1735         newex.block_count = to_le16(allocated);
1736         err = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1737         if (err != EOK) {
1738                 /* free data blocks we just allocated */
1739                 ext4_ext_free_blocks(inode_ref, ext4_ext_pblock(&newex),
1740                                      to_le16(newex.block_count), 0);
1741                 goto out2;
1742         }
1743
1744         /* previous routine could use block we allocated */
1745         newblock = ext4_ext_pblock(&newex);
1746
1747 out:
1748         if (allocated > max_blocks)
1749                 allocated = max_blocks;
1750
1751         if (result)
1752                 *result = newblock;
1753
1754         if (blocks_count)
1755                 *blocks_count = allocated;
1756
1757 out2:
1758         if (path) {
1759                 ext4_ext_drop_refs(inode_ref, path, 0);
1760                 free(path);
1761         }
1762
1763         return err;
1764 }