f5148a7a66dad82636286cb68ccac7cb7a6bf5e3
[lwext4.git] / src / ext4_extent.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * - Redistributions of source code must retain the above copyright
10  *   notice, this list of conditions and the following disclaimer.
11  * - Redistributions in binary form must reproduce the above copyright
12  *   notice, this list of conditions and the following disclaimer in the
13  *   documentation and/or other materials provided with the distribution.
14  * - The name of the author may not be used to endorse or promote products
15  *   derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #include "ext4_config.h"
30 #include "ext4_types.h"
31 #include "ext4_misc.h"
32 #include "ext4_errno.h"
33 #include "ext4_debug.h"
34
35 #include "ext4_blockdev.h"
36 #include "ext4_trans.h"
37 #include "ext4_fs.h"
38 #include "ext4_super.h"
39 #include "ext4_crc32.h"
40 #include "ext4_balloc.h"
41 #include "ext4_extent.h"
42
43 #include <stdlib.h>
44 #include <string.h>
45 #include <inttypes.h>
46 #include <stddef.h>
47
48 /*
49  * used by extent splitting.
50  */
51 #define EXT4_EXT_MARK_UNWRIT1 0x02 /* mark first half unwritten */
52 #define EXT4_EXT_MARK_UNWRIT2 0x04 /* mark second half unwritten */
53 #define EXT4_EXT_DATA_VALID1 0x08  /* first half contains valid data */
54 #define EXT4_EXT_DATA_VALID2 0x10  /* second half contains valid data */
55 #define EXT4_EXT_NO_COMBINE 0x20   /* do not combine two extents */
56
57 static struct ext4_extent_tail *
58 find_ext4_extent_tail(struct ext4_extent_header *eh)
59 {
60         return (struct ext4_extent_tail *)(((char *)eh) +
61                                            EXT4_EXTENT_TAIL_OFFSET(eh));
62 }
63
64 static struct ext4_extent_header *ext_inode_hdr(struct ext4_inode *inode)
65 {
66         return (struct ext4_extent_header *)inode->blocks;
67 }
68
69 static struct ext4_extent_header *ext_block_hdr(struct ext4_block *block)
70 {
71         return (struct ext4_extent_header *)block->data;
72 }
73
74 static uint16_t ext_depth(struct ext4_inode *inode)
75 {
76         return to_le16(ext_inode_hdr(inode)->depth);
77 }
78
79 static uint16_t ext4_ext_get_actual_len(struct ext4_extent *ext)
80 {
81         return (to_le16(ext->block_count) <= EXT_INIT_MAX_LEN
82                     ? to_le16(ext->block_count)
83                     : (to_le16(ext->block_count) - EXT_INIT_MAX_LEN));
84 }
85
86 static void ext4_ext_mark_initialized(struct ext4_extent *ext)
87 {
88         ext->block_count = to_le16(ext4_ext_get_actual_len(ext));
89 }
90
91 static void ext4_ext_mark_unwritten(struct ext4_extent *ext)
92 {
93         ext->block_count |= to_le16(EXT_INIT_MAX_LEN);
94 }
95
96 static int ext4_ext_is_unwritten(struct ext4_extent *ext)
97 {
98         /* Extent with ee_len of 0x8000 is treated as an initialized extent */
99         return (to_le16(ext->block_count) > EXT_INIT_MAX_LEN);
100 }
101
102 /*
103  * ext4_ext_pblock:
104  * combine low and high parts of physical block number into ext4_fsblk_t
105  */
106 static ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
107 {
108         ext4_fsblk_t block;
109
110         block = to_le32(ex->start_lo);
111         block |= ((ext4_fsblk_t)to_le16(ex->start_hi) << 31) << 1;
112         return block;
113 }
114
115 /*
116  * ext4_idx_pblock:
117  * combine low and high parts of a leaf physical block number into ext4_fsblk_t
118  */
119 static ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_index *ix)
120 {
121         ext4_fsblk_t block;
122
123         block = to_le32(ix->leaf_lo);
124         block |= ((ext4_fsblk_t)to_le16(ix->leaf_hi) << 31) << 1;
125         return block;
126 }
127
128 /*
129  * ext4_ext_store_pblock:
130  * stores a large physical block number into an extent struct,
131  * breaking it into parts
132  */
133 static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
134 {
135         ex->start_lo = to_le32((uint32_t)(pb & 0xffffffff));
136         ex->start_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
137 }
138
139 /*
140  * ext4_idx_store_pblock:
141  * stores a large physical block number into an index struct,
142  * breaking it into parts
143  */
144 static void ext4_idx_store_pblock(struct ext4_extent_index *ix, ext4_fsblk_t pb)
145 {
146         ix->leaf_lo = to_le32((uint32_t)(pb & 0xffffffff));
147         ix->leaf_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
148 }
149
150 static int ext4_allocate_single_block(struct ext4_inode_ref *inode_ref,
151                                       ext4_fsblk_t goal, ext4_fsblk_t *blockp)
152 {
153         return ext4_balloc_alloc_block(inode_ref, goal, blockp);
154 }
155
156 static ext4_fsblk_t ext4_new_meta_blocks(struct ext4_inode_ref *inode_ref,
157                                          ext4_fsblk_t goal,
158                                          uint32_t flags __unused,
159                                          uint32_t *count, int *errp)
160 {
161         ext4_fsblk_t block = 0;
162
163         *errp = ext4_allocate_single_block(inode_ref, goal, &block);
164         if (count)
165                 *count = 1;
166         return block;
167 }
168
169 static void ext4_ext_free_blocks(struct ext4_inode_ref *inode_ref,
170                                  ext4_fsblk_t block, uint32_t count,
171                                  uint32_t flags __unused)
172 {
173         ext4_balloc_free_blocks(inode_ref, block, count);
174 }
175
176 static uint16_t ext4_ext_space_block(struct ext4_inode_ref *inode_ref)
177 {
178         uint16_t size;
179         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
180
181         size = (block_size - sizeof(struct ext4_extent_header)) /
182                sizeof(struct ext4_extent);
183 #ifdef AGGRESSIVE_TEST
184         if (size > 6)
185                 size = 6;
186 #endif
187         return size;
188 }
189
190 static uint16_t ext4_ext_space_block_idx(struct ext4_inode_ref *inode_ref)
191 {
192         uint16_t size;
193         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
194
195         size = (block_size - sizeof(struct ext4_extent_header)) /
196                sizeof(struct ext4_extent_index);
197 #ifdef AGGRESSIVE_TEST
198         if (size > 5)
199                 size = 5;
200 #endif
201         return size;
202 }
203
204 static uint16_t ext4_ext_space_root(struct ext4_inode_ref *inode_ref)
205 {
206         uint16_t size;
207
208         size = sizeof(inode_ref->inode->blocks);
209         size -= sizeof(struct ext4_extent_header);
210         size /= sizeof(struct ext4_extent);
211 #ifdef AGGRESSIVE_TEST
212         if (size > 3)
213                 size = 3;
214 #endif
215         return size;
216 }
217
218 static uint16_t ext4_ext_space_root_idx(struct ext4_inode_ref *inode_ref)
219 {
220         uint16_t size;
221
222         size = sizeof(inode_ref->inode->blocks);
223         size -= sizeof(struct ext4_extent_header);
224         size /= sizeof(struct ext4_extent_index);
225 #ifdef AGGRESSIVE_TEST
226         if (size > 4)
227                 size = 4;
228 #endif
229         return size;
230 }
231
232 static uint16_t ext4_ext_max_entries(struct ext4_inode_ref *inode_ref,
233                                      uint32_t depth)
234 {
235         uint16_t max;
236
237         if (depth == ext_depth(inode_ref->inode)) {
238                 if (depth == 0)
239                         max = ext4_ext_space_root(inode_ref);
240                 else
241                         max = ext4_ext_space_root_idx(inode_ref);
242         } else {
243                 if (depth == 0)
244                         max = ext4_ext_space_block(inode_ref);
245                 else
246                         max = ext4_ext_space_block_idx(inode_ref);
247         }
248
249         return max;
250 }
251
252 static ext4_fsblk_t ext4_ext_find_goal(struct ext4_inode_ref *inode_ref,
253                                        struct ext4_extent_path *path,
254                                        ext4_lblk_t block)
255 {
256         if (path) {
257                 uint32_t depth = path->depth;
258                 struct ext4_extent *ex;
259
260                 /*
261                  * Try to predict block placement assuming that we are
262                  * filling in a file which will eventually be
263                  * non-sparse --- i.e., in the case of libbfd writing
264                  * an ELF object sections out-of-order but in a way
265                  * the eventually results in a contiguous object or
266                  * executable file, or some database extending a table
267                  * space file.  However, this is actually somewhat
268                  * non-ideal if we are writing a sparse file such as
269                  * qemu or KVM writing a raw image file that is going
270                  * to stay fairly sparse, since it will end up
271                  * fragmenting the file system's free space.  Maybe we
272                  * should have some hueristics or some way to allow
273                  * userspace to pass a hint to file system,
274                  * especially if the latter case turns out to be
275                  * common.
276                  */
277                 ex = path[depth].extent;
278                 if (ex) {
279                         ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
280                         ext4_lblk_t ext_block = to_le32(ex->first_block);
281
282                         if (block > ext_block)
283                                 return ext_pblk + (block - ext_block);
284                         else
285                                 return ext_pblk - (ext_block - block);
286                 }
287
288                 /* it looks like index is empty;
289                  * try to find starting block from index itself */
290                 if (path[depth].block.lb_id)
291                         return path[depth].block.lb_id;
292         }
293
294         /* OK. use inode's group */
295         return ext4_fs_inode_to_goal_block(inode_ref);
296 }
297
298 /*
299  * Allocation for a meta data block
300  */
301 static ext4_fsblk_t ext4_ext_new_meta_block(struct ext4_inode_ref *inode_ref,
302                                             struct ext4_extent_path *path,
303                                             struct ext4_extent *ex, int *err,
304                                             uint32_t flags)
305 {
306         ext4_fsblk_t goal, newblock;
307
308         goal = ext4_ext_find_goal(inode_ref, path, to_le32(ex->first_block));
309         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, err);
310         return newblock;
311 }
312
313 #if CONFIG_META_CSUM_ENABLE
314 static uint32_t ext4_ext_block_csum(struct ext4_inode_ref *inode_ref,
315                                     struct ext4_extent_header *eh)
316 {
317         uint32_t checksum = 0;
318         struct ext4_sblock *sb = &inode_ref->fs->sb;
319
320         if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
321                 uint32_t ino_index = to_le32(inode_ref->index);
322                 uint32_t ino_gen =
323                     to_le32(ext4_inode_get_generation(inode_ref->inode));
324                 /* First calculate crc32 checksum against fs uuid */
325                 checksum =
326                     ext4_crc32c(EXT4_CRC32_INIT, sb->uuid, sizeof(sb->uuid));
327                 /* Then calculate crc32 checksum against inode number
328                  * and inode generation */
329                 checksum = ext4_crc32c(checksum, &ino_index, sizeof(ino_index));
330                 checksum = ext4_crc32c(checksum, &ino_gen, sizeof(ino_gen));
331                 /* Finally calculate crc32 checksum against
332                  * the entire extent block up to the checksum field */
333                 checksum =
334                     ext4_crc32c(checksum, eh, EXT4_EXTENT_TAIL_OFFSET(eh));
335         }
336         return checksum;
337 }
338 #else
339 #define ext4_ext_block_csum(...) 0
340 #endif
341
342 static void
343 ext4_extent_block_csum_set(struct ext4_inode_ref *inode_ref __unused,
344                            struct ext4_extent_header *eh)
345 {
346         struct ext4_extent_tail *tail;
347
348         tail = find_ext4_extent_tail(eh);
349         tail->et_checksum = to_le32(ext4_ext_block_csum(inode_ref, eh));
350 }
351
352 static int ext4_ext_dirty(struct ext4_inode_ref *inode_ref,
353                           struct ext4_extent_path *path)
354 {
355         if (path->block.lb_id)
356                 ext4_trans_set_block_dirty(path->block.buf);
357         else
358                 inode_ref->dirty = true;
359
360         return EOK;
361 }
362
363 static void ext4_ext_drop_refs(struct ext4_inode_ref *inode_ref,
364                                struct ext4_extent_path *path, bool keep_other)
365 {
366         int32_t depth, i;
367
368         if (!path)
369                 return;
370         if (keep_other)
371                 depth = 0;
372         else
373                 depth = path->depth;
374
375         for (i = 0; i <= depth; i++, path++) {
376                 if (path->block.lb_id) {
377                         if (ext4_bcache_test_flag(path->block.buf, BC_DIRTY))
378                                 ext4_extent_block_csum_set(inode_ref,
379                                                            path->header);
380
381                         ext4_block_set(inode_ref->fs->bdev, &path->block);
382                 }
383         }
384 }
385
386 /*
387  * Check that whether the basic information inside the extent header
388  * is correct or not.
389  */
390 static int ext4_ext_check(struct ext4_inode_ref *inode_ref,
391                           struct ext4_extent_header *eh, uint16_t depth,
392                           ext4_fsblk_t pblk __unused)
393 {
394         struct ext4_extent_tail *tail;
395         struct ext4_sblock *sb = &inode_ref->fs->sb;
396         const char *error_msg;
397         (void)error_msg;
398
399         if (to_le16(eh->magic) != EXT4_EXTENT_MAGIC) {
400                 error_msg = "invalid magic";
401                 goto corrupted;
402         }
403         if (to_le16(eh->depth) != depth) {
404                 error_msg = "unexpected eh_depth";
405                 goto corrupted;
406         }
407         if (eh->max_entries_count == 0) {
408                 error_msg = "invalid eh_max";
409                 goto corrupted;
410         }
411         if (to_le16(eh->entries_count) > to_le16(eh->max_entries_count)) {
412                 error_msg = "invalid eh_entries";
413                 goto corrupted;
414         }
415
416         tail = find_ext4_extent_tail(eh);
417         if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
418                 if (tail->et_checksum !=
419                     to_le32(ext4_ext_block_csum(inode_ref, eh))) {
420                         ext4_dbg(DEBUG_EXTENT,
421                                  DBG_WARN "Extent block checksum failed."
422                                           "Blocknr: %" PRIu64 "\n",
423                                  pblk);
424                 }
425         }
426
427         return EOK;
428
429 corrupted:
430         ext4_dbg(DEBUG_EXTENT, "Bad extents B+ tree block: %s. "
431                                "Blocknr: %" PRId64 "\n",
432                  error_msg, pblk);
433         return EIO;
434 }
435
436 static int read_extent_tree_block(struct ext4_inode_ref *inode_ref,
437                                   ext4_fsblk_t pblk, int32_t depth,
438                                   struct ext4_block *bh,
439                                   uint32_t flags __unused)
440 {
441         int err;
442
443         err = ext4_trans_block_get(inode_ref->fs->bdev, bh, pblk);
444         if (err != EOK)
445                 goto errout;
446
447         err = ext4_ext_check(inode_ref, ext_block_hdr(bh), depth, pblk);
448         if (err != EOK)
449                 goto errout;
450
451         return EOK;
452 errout:
453         if (bh->lb_id)
454                 ext4_block_set(inode_ref->fs->bdev, bh);
455
456         return err;
457 }
458
459 /*
460  * ext4_ext_binsearch_idx:
461  * binary search for the closest index of the given block
462  * the header must be checked before calling this
463  */
464 static void ext4_ext_binsearch_idx(struct ext4_extent_path *path,
465                                    ext4_lblk_t block)
466 {
467         struct ext4_extent_header *eh = path->header;
468         struct ext4_extent_index *r, *l, *m;
469
470         l = EXT_FIRST_INDEX(eh) + 1;
471         r = EXT_LAST_INDEX(eh);
472         while (l <= r) {
473                 m = l + (r - l) / 2;
474                 if (block < to_le32(m->first_block))
475                         r = m - 1;
476                 else
477                         l = m + 1;
478         }
479
480         path->index = l - 1;
481 }
482
483 /*
484  * ext4_ext_binsearch:
485  * binary search for closest extent of the given block
486  * the header must be checked before calling this
487  */
488 static void ext4_ext_binsearch(struct ext4_extent_path *path, ext4_lblk_t block)
489 {
490         struct ext4_extent_header *eh = path->header;
491         struct ext4_extent *r, *l, *m;
492
493         if (eh->entries_count == 0) {
494                 /*
495                  * this leaf is empty:
496                  * we get such a leaf in split/add case
497                  */
498                 return;
499         }
500
501         l = EXT_FIRST_EXTENT(eh) + 1;
502         r = EXT_LAST_EXTENT(eh);
503
504         while (l <= r) {
505                 m = l + (r - l) / 2;
506                 if (block < to_le32(m->first_block))
507                         r = m - 1;
508                 else
509                         l = m + 1;
510         }
511
512         path->extent = l - 1;
513 }
514
515 static int ext4_find_extent(struct ext4_inode_ref *inode_ref, ext4_lblk_t block,
516                             struct ext4_extent_path **orig_path, uint32_t flags)
517 {
518         struct ext4_extent_header *eh;
519         struct ext4_block bh = EXT4_BLOCK_ZERO();
520         ext4_fsblk_t buf_block = 0;
521         struct ext4_extent_path *path = *orig_path;
522         int32_t depth, ppos = 0;
523         int32_t i;
524         int ret;
525
526         eh = ext_inode_hdr(inode_ref->inode);
527         depth = ext_depth(inode_ref->inode);
528
529         if (path) {
530                 ext4_ext_drop_refs(inode_ref, path, 0);
531                 if (depth > path[0].maxdepth) {
532                         free(path);
533                         *orig_path = path = NULL;
534                 }
535         }
536         if (!path) {
537                 int32_t path_depth = depth + 1;
538                 /* account possible depth increase */
539                 path = calloc(1, sizeof(struct ext4_extent_path) *
540                                      (path_depth + 1));
541                 if (!path)
542                         return ENOMEM;
543                 path[0].maxdepth = path_depth;
544         }
545         path[0].header = eh;
546         path[0].block = bh;
547
548         i = depth;
549         /* walk through the tree */
550         while (i) {
551                 ext4_ext_binsearch_idx(path + ppos, block);
552                 path[ppos].p_block = ext4_idx_pblock(path[ppos].index);
553                 path[ppos].depth = i;
554                 path[ppos].extent = NULL;
555                 buf_block = path[ppos].p_block;
556
557                 i--;
558                 ppos++;
559                 if (!path[ppos].block.lb_id ||
560                     path[ppos].block.lb_id != buf_block) {
561                         ret = read_extent_tree_block(inode_ref, buf_block, i,
562                                                      &bh, flags);
563                         if (ret != EOK) {
564                                 goto err;
565                         }
566                         if (ppos > depth) {
567                                 ext4_block_set(inode_ref->fs->bdev, &bh);
568                                 ret = EIO;
569                                 goto err;
570                         }
571
572                         eh = ext_block_hdr(&bh);
573                         path[ppos].block = bh;
574                         path[ppos].header = eh;
575                 }
576         }
577
578         path[ppos].depth = i;
579         path[ppos].extent = NULL;
580         path[ppos].index = NULL;
581
582         /* find extent */
583         ext4_ext_binsearch(path + ppos, block);
584         /* if not an empty leaf */
585         if (path[ppos].extent)
586                 path[ppos].p_block = ext4_ext_pblock(path[ppos].extent);
587
588         *orig_path = path;
589
590         ret = EOK;
591         return ret;
592
593 err:
594         ext4_ext_drop_refs(inode_ref, path, 0);
595         free(path);
596         if (orig_path)
597                 *orig_path = NULL;
598         return ret;
599 }
600
601 static void ext4_ext_init_header(struct ext4_inode_ref *inode_ref,
602                                  struct ext4_extent_header *eh, int32_t depth)
603 {
604         eh->entries_count = 0;
605         eh->max_entries_count = to_le16(ext4_ext_max_entries(inode_ref, depth));
606         eh->magic = to_le16(EXT4_EXTENT_MAGIC);
607         eh->depth = depth;
608 }
609
610 static int ext4_ext_insert_index(struct ext4_inode_ref *inode_ref,
611                                  struct ext4_extent_path *path, int at,
612                                  ext4_lblk_t insert_index,
613                                  ext4_fsblk_t insert_block, bool set_to_ix)
614 {
615         struct ext4_extent_index *ix;
616         struct ext4_extent_path *curp = path + at;
617         int len, err;
618         struct ext4_extent_header *eh;
619
620         if (curp->index && insert_index == to_le32(curp->index->first_block))
621                 return EIO;
622
623         if (to_le16(curp->header->entries_count) ==
624             to_le16(curp->header->max_entries_count))
625                 return EIO;
626
627         eh = curp->header;
628         if (curp->index == NULL) {
629                 ix = EXT_FIRST_INDEX(eh);
630                 curp->index = ix;
631         } else if (insert_index > to_le32(curp->index->first_block)) {
632                 /* insert after */
633                 ix = curp->index + 1;
634         } else {
635                 /* insert before */
636                 ix = curp->index;
637         }
638
639         if (ix > EXT_MAX_INDEX(eh))
640                 return EIO;
641
642         len = EXT_LAST_INDEX(eh) - ix + 1;
643         ext4_assert(len >= 0);
644         if (len > 0)
645                 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_index));
646
647         ix->first_block = to_le32(insert_index);
648         ext4_idx_store_pblock(ix, insert_block);
649         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
650
651         if (ix > EXT_LAST_INDEX(eh)) {
652                 err = EIO;
653                 goto out;
654         }
655
656         err = ext4_ext_dirty(inode_ref, curp);
657
658 out:
659         if (err == EOK && set_to_ix) {
660                 curp->index = ix;
661                 curp->p_block = ext4_idx_pblock(ix);
662         }
663         return err;
664 }
665
666 static int ext4_ext_split_node(struct ext4_inode_ref *inode_ref,
667                                struct ext4_extent_path *path, int at,
668                                struct ext4_extent *newext,
669                                struct ext4_extent_path *npath,
670                                bool *ins_right_leaf)
671 {
672         int i, npath_at, ret;
673         ext4_lblk_t insert_index;
674         ext4_fsblk_t newblock = 0;
675         int depth = ext_depth(inode_ref->inode);
676         npath_at = depth - at;
677
678         ext4_assert(at > 0);
679
680         if (path[depth].extent != EXT_MAX_EXTENT(path[depth].header))
681                 insert_index = path[depth].extent[1].first_block;
682         else
683                 insert_index = newext->first_block;
684
685         for (i = depth; i >= at; i--, npath_at--) {
686                 struct ext4_block bh = EXT4_BLOCK_ZERO();
687
688                 /* FIXME: currently we split at the point after the current
689                  * extent. */
690                 newblock =
691                     ext4_ext_new_meta_block(inode_ref, path, newext, &ret, 0);
692                 if (ret != EOK)
693                         goto cleanup;
694
695                 /*  For write access.*/
696                 ret = ext4_trans_block_get_noread(inode_ref->fs->bdev, &bh,
697                                                   newblock);
698                 if (ret != EOK)
699                         goto cleanup;
700
701                 if (i == depth) {
702                         /* start copy from next extent */
703                         int m = EXT_MAX_EXTENT(path[i].header) - path[i].extent;
704                         struct ext4_extent_header *neh;
705                         struct ext4_extent *ex;
706                         neh = ext_block_hdr(&bh);
707                         ex = EXT_FIRST_EXTENT(neh);
708                         ext4_ext_init_header(inode_ref, neh, 0);
709                         if (m) {
710                                 memmove(ex, path[i].extent + 1,
711                                         sizeof(struct ext4_extent) * m);
712                                 neh->entries_count =
713                                     to_le16(to_le16(neh->entries_count) + m);
714                                 path[i].header->entries_count = to_le16(
715                                     to_le16(path[i].header->entries_count) - m);
716                                 ret = ext4_ext_dirty(inode_ref, path + i);
717                                 if (ret != EOK)
718                                         goto cleanup;
719
720                                 npath[npath_at].p_block = ext4_ext_pblock(ex);
721                                 npath[npath_at].extent = ex;
722                         } else {
723                                 npath[npath_at].p_block = 0;
724                                 npath[npath_at].extent = NULL;
725                         }
726
727                         npath[npath_at].depth = to_le16(neh->depth);
728                         npath[npath_at].maxdepth = 0;
729                         npath[npath_at].index = NULL;
730                         npath[npath_at].header = neh;
731                         npath[npath_at].block = bh;
732
733                         ext4_trans_set_block_dirty(bh.buf);
734                 } else {
735                         int m = EXT_MAX_INDEX(path[i].header) - path[i].index;
736                         struct ext4_extent_header *neh;
737                         struct ext4_extent_index *ix;
738                         neh = ext_block_hdr(&bh);
739                         ix = EXT_FIRST_INDEX(neh);
740                         ext4_ext_init_header(inode_ref, neh, depth - i);
741                         ix->first_block = to_le32(insert_index);
742                         ext4_idx_store_pblock(ix,
743                                               npath[npath_at + 1].block.lb_id);
744                         neh->entries_count = to_le16(1);
745                         if (m) {
746                                 memmove(ix + 1, path[i].index + 1,
747                                         sizeof(struct ext4_extent) * m);
748                                 neh->entries_count =
749                                     to_le16(to_le16(neh->entries_count) + m);
750                                 path[i].header->entries_count = to_le16(
751                                     to_le16(path[i].header->entries_count) - m);
752                                 ret = ext4_ext_dirty(inode_ref, path + i);
753                                 if (ret != EOK)
754                                         goto cleanup;
755                         }
756
757                         npath[npath_at].p_block = ext4_idx_pblock(ix);
758                         npath[npath_at].depth = to_le16(neh->depth);
759                         npath[npath_at].maxdepth = 0;
760                         npath[npath_at].extent = NULL;
761                         npath[npath_at].index = ix;
762                         npath[npath_at].header = neh;
763                         npath[npath_at].block = bh;
764
765                         ext4_trans_set_block_dirty(bh.buf);
766                 }
767         }
768         newblock = 0;
769
770         /*
771          * If newext->first_block can be included into the
772          * right sub-tree.
773          */
774         if (to_le32(newext->first_block) < insert_index)
775                 *ins_right_leaf = false;
776         else
777                 *ins_right_leaf = true;
778
779         ret = ext4_ext_insert_index(inode_ref, path, at - 1, insert_index,
780                                     npath[0].block.lb_id, *ins_right_leaf);
781
782 cleanup:
783         if (ret != EOK) {
784                 if (newblock)
785                         ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
786
787                 npath_at = depth - at;
788                 while (npath_at >= 0) {
789                         if (npath[npath_at].block.lb_id) {
790                                 newblock = npath[npath_at].block.lb_id;
791                                 ext4_block_set(inode_ref->fs->bdev,
792                                                &npath[npath_at].block);
793                                 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
794                                 memset(&npath[npath_at].block, 0,
795                                        sizeof(struct ext4_block));
796                         }
797                         npath_at--;
798                 }
799         }
800         return ret;
801 }
802
803 /*
804  * ext4_ext_correct_indexes:
805  * if leaf gets modified and modified extent is first in the leaf,
806  * then we have to correct all indexes above.
807  */
808 static int ext4_ext_correct_indexes(struct ext4_inode_ref *inode_ref,
809                                     struct ext4_extent_path *path)
810 {
811         struct ext4_extent_header *eh;
812         int32_t depth = ext_depth(inode_ref->inode);
813         struct ext4_extent *ex;
814         uint32_t border;
815         int32_t k;
816         int err = EOK;
817
818         eh = path[depth].header;
819         ex = path[depth].extent;
820
821         if (ex == NULL || eh == NULL)
822                 return EIO;
823
824         if (depth == 0) {
825                 /* there is no tree at all */
826                 return EOK;
827         }
828
829         if (ex != EXT_FIRST_EXTENT(eh)) {
830                 /* we correct tree if first leaf got modified only */
831                 return EOK;
832         }
833
834         k = depth - 1;
835         border = path[depth].extent->first_block;
836         path[k].index->first_block = border;
837         err = ext4_ext_dirty(inode_ref, path + k);
838         if (err != EOK)
839                 return err;
840
841         while (k--) {
842                 /* change all left-side indexes */
843                 if (path[k + 1].index != EXT_FIRST_INDEX(path[k + 1].header))
844                         break;
845                 path[k].index->first_block = border;
846                 err = ext4_ext_dirty(inode_ref, path + k);
847                 if (err != EOK)
848                         break;
849         }
850
851         return err;
852 }
853
854 static inline bool ext4_ext_can_prepend(struct ext4_extent *ex1,
855                                         struct ext4_extent *ex2)
856 {
857         if (ext4_ext_pblock(ex2) + ext4_ext_get_actual_len(ex2) !=
858             ext4_ext_pblock(ex1))
859                 return 0;
860
861 #ifdef AGGRESSIVE_TEST
862         if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
863                 return 0;
864 #else
865         if (ext4_ext_is_unwritten(ex1)) {
866                 if (ext4_ext_get_actual_len(ex1) +
867                         ext4_ext_get_actual_len(ex2) >
868                     EXT_UNWRITTEN_MAX_LEN)
869                         return 0;
870         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
871                    EXT_INIT_MAX_LEN)
872                 return 0;
873 #endif
874
875         if (to_le32(ex2->first_block) + ext4_ext_get_actual_len(ex2) !=
876             to_le32(ex1->first_block))
877                 return 0;
878
879         return 1;
880 }
881
882 static inline bool ext4_ext_can_append(struct ext4_extent *ex1,
883                                        struct ext4_extent *ex2)
884 {
885         if (ext4_ext_pblock(ex1) + ext4_ext_get_actual_len(ex1) !=
886             ext4_ext_pblock(ex2))
887                 return 0;
888
889 #ifdef AGGRESSIVE_TEST
890         if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
891                 return 0;
892 #else
893         if (ext4_ext_is_unwritten(ex1)) {
894                 if (ext4_ext_get_actual_len(ex1) +
895                         ext4_ext_get_actual_len(ex2) >
896                     EXT_UNWRITTEN_MAX_LEN)
897                         return 0;
898         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
899                    EXT_INIT_MAX_LEN)
900                 return 0;
901 #endif
902
903         if (to_le32(ex1->first_block) + ext4_ext_get_actual_len(ex1) !=
904             to_le32(ex2->first_block))
905                 return 0;
906
907         return 1;
908 }
909
910 static int ext4_ext_insert_leaf(struct ext4_inode_ref *inode_ref,
911                                 struct ext4_extent_path *path, int at,
912                                 struct ext4_extent *newext, int flags,
913                                 bool *need_split)
914 {
915         struct ext4_extent_path *curp = path + at;
916         struct ext4_extent *ex = curp->extent;
917         int len, err, unwritten;
918         struct ext4_extent_header *eh;
919
920         *need_split = false;
921
922         if (curp->extent &&
923             to_le32(newext->first_block) == to_le32(curp->extent->first_block))
924                 return EIO;
925
926         if (!(flags & EXT4_EXT_NO_COMBINE)) {
927                 if (curp->extent && ext4_ext_can_append(curp->extent, newext)) {
928                         unwritten = ext4_ext_is_unwritten(curp->extent);
929                         curp->extent->block_count =
930                             to_le16(ext4_ext_get_actual_len(curp->extent) +
931                                     ext4_ext_get_actual_len(newext));
932                         if (unwritten)
933                                 ext4_ext_mark_unwritten(curp->extent);
934
935                         err = ext4_ext_dirty(inode_ref, curp);
936                         goto out;
937                 }
938
939                 if (curp->extent &&
940                     ext4_ext_can_prepend(curp->extent, newext)) {
941                         unwritten = ext4_ext_is_unwritten(curp->extent);
942                         curp->extent->first_block = newext->first_block;
943                         curp->extent->block_count =
944                             to_le16(ext4_ext_get_actual_len(curp->extent) +
945                                     ext4_ext_get_actual_len(newext));
946                         if (unwritten)
947                                 ext4_ext_mark_unwritten(curp->extent);
948
949                         err = ext4_ext_dirty(inode_ref, curp);
950                         goto out;
951                 }
952         }
953
954         if (to_le16(curp->header->entries_count) ==
955             to_le16(curp->header->max_entries_count)) {
956                 err = EIO;
957                 *need_split = true;
958                 goto out;
959         } else {
960                 eh = curp->header;
961                 if (curp->extent == NULL) {
962                         ex = EXT_FIRST_EXTENT(eh);
963                         curp->extent = ex;
964                 } else if (to_le32(newext->first_block) >
965                            to_le32(curp->extent->first_block)) {
966                         /* insert after */
967                         ex = curp->extent + 1;
968                 } else {
969                         /* insert before */
970                         ex = curp->extent;
971                 }
972         }
973
974         len = EXT_LAST_EXTENT(eh) - ex + 1;
975         ext4_assert(len >= 0);
976         if (len > 0)
977                 memmove(ex + 1, ex, len * sizeof(struct ext4_extent));
978
979         if (ex > EXT_MAX_EXTENT(eh)) {
980                 err = EIO;
981                 goto out;
982         }
983
984         ex->first_block = newext->first_block;
985         ex->block_count = newext->block_count;
986         ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
987         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
988
989         if (ex > EXT_LAST_EXTENT(eh)) {
990                 err = EIO;
991                 goto out;
992         }
993
994         err = ext4_ext_correct_indexes(inode_ref, path);
995         if (err != EOK)
996                 goto out;
997         err = ext4_ext_dirty(inode_ref, curp);
998
999 out:
1000         if (err == EOK) {
1001                 curp->extent = ex;
1002                 curp->p_block = ext4_ext_pblock(ex);
1003         }
1004
1005         return err;
1006 }
1007
1008 /*
1009  * ext4_ext_grow_indepth:
1010  * implements tree growing procedure:
1011  * - allocates new block
1012  * - moves top-level data (index block or leaf) into the new block
1013  * - initializes new top-level, creating index that points to the
1014  *   just created block
1015  */
1016 static int ext4_ext_grow_indepth(struct ext4_inode_ref *inode_ref,
1017                                  uint32_t flags)
1018 {
1019         struct ext4_extent_header *neh;
1020         struct ext4_block bh = EXT4_BLOCK_ZERO();
1021         ext4_fsblk_t newblock, goal = 0;
1022         int err = EOK;
1023
1024         /* Try to prepend new index to old one */
1025         if (ext_depth(inode_ref->inode))
1026                 goal = ext4_idx_pblock(
1027                     EXT_FIRST_INDEX(ext_inode_hdr(inode_ref->inode)));
1028         else
1029                 goal = ext4_fs_inode_to_goal_block(inode_ref);
1030
1031         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, &err);
1032         if (newblock == 0)
1033                 return err;
1034
1035         /* # */
1036         err = ext4_trans_block_get_noread(inode_ref->fs->bdev, &bh, newblock);
1037         if (err != EOK) {
1038                 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
1039                 return err;
1040         }
1041
1042         /* move top-level index/leaf into new block */
1043         memmove(bh.data, inode_ref->inode->blocks,
1044                 sizeof(inode_ref->inode->blocks));
1045
1046         /* set size of new block */
1047         neh = ext_block_hdr(&bh);
1048         /* old root could have indexes or leaves
1049          * so calculate e_max right way */
1050         if (ext_depth(inode_ref->inode))
1051                 neh->max_entries_count =
1052                     to_le16(ext4_ext_space_block_idx(inode_ref));
1053         else
1054                 neh->max_entries_count =
1055                     to_le16(ext4_ext_space_block(inode_ref));
1056
1057         neh->magic = to_le16(EXT4_EXTENT_MAGIC);
1058         ext4_extent_block_csum_set(inode_ref, neh);
1059
1060         /* Update top-level index: num,max,pointer */
1061         neh = ext_inode_hdr(inode_ref->inode);
1062         neh->entries_count = to_le16(1);
1063         ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1064         if (neh->depth == 0) {
1065                 /* Root extent block becomes index block */
1066                 neh->max_entries_count =
1067                     to_le16(ext4_ext_space_root_idx(inode_ref));
1068                 EXT_FIRST_INDEX(neh)
1069                     ->first_block = EXT_FIRST_EXTENT(neh)->first_block;
1070         }
1071         neh->depth = to_le16(to_le16(neh->depth) + 1);
1072
1073         ext4_trans_set_block_dirty(bh.buf);
1074         inode_ref->dirty = true;
1075         ext4_block_set(inode_ref->fs->bdev, &bh);
1076
1077         return err;
1078 }
1079
1080 static inline void ext4_ext_replace_path(struct ext4_inode_ref *inode_ref,
1081                                          struct ext4_extent_path *path,
1082                                          struct ext4_extent_path *newpath,
1083                                          int at)
1084 {
1085         ext4_ext_drop_refs(inode_ref, path + at, 1);
1086         path[at] = *newpath;
1087         memset(newpath, 0, sizeof(struct ext4_extent_path));
1088 }
1089
1090 int ext4_ext_insert_extent(struct ext4_inode_ref *inode_ref,
1091                            struct ext4_extent_path **ppath,
1092                            struct ext4_extent *newext, int flags)
1093 {
1094         int depth, level = 0, ret = 0;
1095         struct ext4_extent_path *path = *ppath;
1096         struct ext4_extent_path *npath = NULL;
1097         bool ins_right_leaf = false;
1098         bool need_split;
1099
1100 again:
1101         depth = ext_depth(inode_ref->inode);
1102         ret = ext4_ext_insert_leaf(inode_ref, path, depth, newext, flags,
1103                                    &need_split);
1104         if (ret == EIO && need_split == true) {
1105                 int i;
1106                 for (i = depth, level = 0; i >= 0; i--, level++)
1107                         if (EXT_HAS_FREE_INDEX(path + i))
1108                                 break;
1109
1110                 /* Do we need to grow the tree? */
1111                 if (i < 0) {
1112                         ret = ext4_ext_grow_indepth(inode_ref, 0);
1113                         if (ret != EOK)
1114                                 goto out;
1115
1116                         ret = ext4_find_extent(
1117                             inode_ref, to_le32(newext->first_block), ppath, 0);
1118                         if (ret != EOK)
1119                                 goto out;
1120
1121                         path = *ppath;
1122                         /*
1123                          * After growing the tree, there should be free space in
1124                          * the only child node of the root.
1125                          */
1126                         level--;
1127                         depth++;
1128                 }
1129
1130                 i = depth - (level - 1);
1131                 /* We split from leaf to the i-th node */
1132                 if (level > 0) {
1133                         npath = calloc(1, sizeof(struct ext4_extent_path) *
1134                                               (level));
1135                         if (!npath) {
1136                                 ret = ENOMEM;
1137                                 goto out;
1138                         }
1139                         ret = ext4_ext_split_node(inode_ref, path, i, newext,
1140                                                   npath, &ins_right_leaf);
1141                         if (ret != EOK)
1142                                 goto out;
1143
1144                         while (--level >= 0) {
1145                                 if (ins_right_leaf)
1146                                         ext4_ext_replace_path(inode_ref, path,
1147                                                               &npath[level],
1148                                                               i + level);
1149                                 else if (npath[level].block.lb_id)
1150                                         ext4_ext_drop_refs(inode_ref,
1151                                                            npath + level, 1);
1152                         }
1153                 }
1154                 goto again;
1155         }
1156
1157 out:
1158         if (ret != EOK) {
1159                 if (path)
1160                         ext4_ext_drop_refs(inode_ref, path, 0);
1161
1162                 while (--level >= 0 && npath) {
1163                         if (npath[level].block.lb_id) {
1164                                 ext4_fsblk_t block = npath[level].block.lb_id;
1165                                 ext4_ext_free_blocks(inode_ref, block, 1, 0);
1166                                 ext4_ext_drop_refs(inode_ref, npath + level, 1);
1167                         }
1168                 }
1169         }
1170         if (npath)
1171                 free(npath);
1172
1173         return ret;
1174 }
1175
1176 static void ext4_ext_remove_blocks(struct ext4_inode_ref *inode_ref,
1177                                    struct ext4_extent *ex, ext4_lblk_t from,
1178                                    ext4_lblk_t to)
1179 {
1180         ext4_lblk_t len = to - from + 1;
1181         ext4_lblk_t num;
1182         ext4_fsblk_t start;
1183         num = from - to_le32(ex->first_block);
1184         start = ext4_ext_pblock(ex) + num;
1185         ext4_dbg(DEBUG_EXTENT,
1186                  "Freeing %" PRIu32 " at %" PRIu64 ", %" PRIu32 "\n", from,
1187                  start, len);
1188
1189         ext4_ext_free_blocks(inode_ref, start, len, 0);
1190 }
1191
1192 static int ext4_ext_remove_idx(struct ext4_inode_ref *inode_ref,
1193                                struct ext4_extent_path *path, int32_t depth)
1194 {
1195         int err = EOK;
1196         int32_t i = depth;
1197         ext4_fsblk_t leaf;
1198
1199         /* free index block */
1200         leaf = ext4_idx_pblock(path[i].index);
1201
1202         if (path[i].index != EXT_LAST_INDEX(path[i].header)) {
1203                 ptrdiff_t len = EXT_LAST_INDEX(path[i].header) - path[i].index;
1204                 memmove(path[i].index, path[i].index + 1,
1205                         len * sizeof(struct ext4_extent_index));
1206         }
1207
1208         path[i].header->entries_count =
1209             to_le16(to_le16(path[i].header->entries_count) - 1);
1210         err = ext4_ext_dirty(inode_ref, path + i);
1211         if (err != EOK)
1212                 return err;
1213
1214         ext4_dbg(DEBUG_EXTENT, "IDX: Freeing %" PRIu32 " at %" PRIu64 ", %d\n",
1215                  to_le32(path[i].index->first_block), leaf, 1);
1216         ext4_ext_free_blocks(inode_ref, leaf, 1, 0);
1217
1218         while (i > 0) {
1219                 if (path[i].index != EXT_FIRST_INDEX(path[i].header))
1220                         break;
1221
1222                 path[i - 1].index->first_block = path[i].index->first_block;
1223                 err = ext4_ext_dirty(inode_ref, path + i - 1);
1224                 if (err != EOK)
1225                         break;
1226
1227                 i--;
1228         }
1229         return err;
1230 }
1231
1232 static int ext4_ext_remove_leaf(struct ext4_inode_ref *inode_ref,
1233                                 struct ext4_extent_path *path, ext4_lblk_t from,
1234                                 ext4_lblk_t to)
1235 {
1236
1237         int32_t depth = ext_depth(inode_ref->inode);
1238         struct ext4_extent *ex = path[depth].extent;
1239         struct ext4_extent *start_ex, *ex2 = NULL;
1240         struct ext4_extent_header *eh = path[depth].header;
1241         int32_t len;
1242         int err = EOK;
1243         uint16_t new_entries;
1244
1245         start_ex = ex;
1246         new_entries = to_le16(eh->entries_count);
1247         while (ex <= EXT_LAST_EXTENT(path[depth].header) &&
1248                to_le32(ex->first_block) <= to) {
1249                 int32_t new_len = 0;
1250                 int unwritten;
1251                 ext4_lblk_t start, new_start;
1252                 ext4_fsblk_t newblock;
1253                 new_start = start = to_le32(ex->first_block);
1254                 len = ext4_ext_get_actual_len(ex);
1255                 newblock = ext4_ext_pblock(ex);
1256                 if (start < from) {
1257                         len -= from - start;
1258                         new_len = from - start;
1259                         start = from;
1260                         start_ex++;
1261                 } else {
1262                         if (start + len - 1 > to) {
1263                                 len -= start + len - 1 - to;
1264                                 new_len = start + len - 1 - to;
1265                                 new_start = to + 1;
1266                                 newblock += to + 1 - start;
1267                                 ex2 = ex;
1268                         }
1269                 }
1270
1271                 ext4_ext_remove_blocks(inode_ref, ex, start, start + len - 1);
1272                 ex->first_block = to_le32(new_start);
1273                 if (!new_len)
1274                         new_entries--;
1275                 else {
1276                         unwritten = ext4_ext_is_unwritten(ex);
1277                         ex->block_count = to_le16(new_len);
1278                         ext4_ext_store_pblock(ex, newblock);
1279                         if (unwritten)
1280                                 ext4_ext_mark_unwritten(ex);
1281                 }
1282
1283                 ex += 1;
1284         }
1285
1286         if (ex2 == NULL)
1287                 ex2 = ex;
1288
1289         if (ex2 <= EXT_LAST_EXTENT(eh))
1290                 memmove(start_ex, ex2, (EXT_LAST_EXTENT(eh) - ex2 + 1) *
1291                                            sizeof(struct ext4_extent));
1292
1293         eh->entries_count = to_le16(new_entries);
1294         ext4_ext_dirty(inode_ref, path + depth);
1295         if (path[depth].extent == EXT_FIRST_EXTENT(eh) && eh->entries_count) {
1296                 err = ext4_ext_correct_indexes(inode_ref, path);
1297                 if (err != EOK)
1298                         return err;
1299         }
1300
1301         /* if this leaf is free, then we should
1302          * remove it from index block above */
1303         if (eh->entries_count == 0 && path[depth].block.lb_id)
1304                 err = ext4_ext_remove_idx(inode_ref, path, depth - 1);
1305         else if (depth > 0)
1306                 path[depth - 1].index++;
1307
1308         return err;
1309 }
1310
1311 static bool ext4_ext_more_to_rm(struct ext4_extent_path *path, ext4_lblk_t to)
1312 {
1313         if (!to_le16(path->header->entries_count))
1314                 return false;
1315
1316         if (path->index > EXT_LAST_INDEX(path->header))
1317                 return false;
1318
1319         if (to_le32(path->index->first_block) > to)
1320                 return false;
1321
1322         return true;
1323 }
1324
1325 int ext4_extent_remove_space(struct ext4_inode_ref *inode_ref, ext4_lblk_t from,
1326                              ext4_lblk_t to)
1327 {
1328         struct ext4_extent_path *path = NULL;
1329         int ret = EOK;
1330         int32_t depth = ext_depth(inode_ref->inode);
1331         int32_t i;
1332
1333         ret = ext4_find_extent(inode_ref, from, &path, 0);
1334         if (ret != EOK)
1335                 goto out;
1336
1337         if (!path[depth].extent) {
1338                 ret = EOK;
1339                 goto out;
1340         }
1341
1342         bool in_range = IN_RANGE(from, to_le32(path[depth].extent->first_block),
1343                                  ext4_ext_get_actual_len(path[depth].extent));
1344
1345         if (!in_range) {
1346                 ret = EOK;
1347                 goto out;
1348         }
1349
1350         /* If we do remove_space inside the range of an extent */
1351         if ((to_le32(path[depth].extent->first_block) < from) &&
1352             (to < to_le32(path[depth].extent->first_block) +
1353                       ext4_ext_get_actual_len(path[depth].extent) - 1)) {
1354
1355                 struct ext4_extent *ex = path[depth].extent, newex;
1356                 int unwritten = ext4_ext_is_unwritten(ex);
1357                 ext4_lblk_t ee_block = to_le32(ex->first_block);
1358                 int32_t len = ext4_ext_get_actual_len(ex);
1359                 ext4_fsblk_t newblock = to + 1 - ee_block + ext4_ext_pblock(ex);
1360
1361                 ex->block_count = to_le16(from - ee_block);
1362                 if (unwritten)
1363                         ext4_ext_mark_unwritten(ex);
1364
1365                 ext4_ext_dirty(inode_ref, path + depth);
1366
1367                 newex.first_block = to_le32(to + 1);
1368                 newex.block_count = to_le16(ee_block + len - 1 - to);
1369                 ext4_ext_store_pblock(&newex, newblock);
1370                 if (unwritten)
1371                         ext4_ext_mark_unwritten(&newex);
1372
1373                 ret = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1374                 goto out;
1375         }
1376
1377         i = depth;
1378         while (i >= 0) {
1379                 if (i == depth) {
1380                         struct ext4_extent_header *eh;
1381                         struct ext4_extent *first_ex, *last_ex;
1382                         ext4_lblk_t leaf_from, leaf_to;
1383                         eh = path[i].header;
1384                         ext4_assert(to_le16(eh->entries_count) > 0);
1385                         first_ex = EXT_FIRST_EXTENT(eh);
1386                         last_ex = EXT_LAST_EXTENT(eh);
1387                         leaf_from = to_le32(first_ex->first_block);
1388                         leaf_to = to_le32(last_ex->first_block) +
1389                                   ext4_ext_get_actual_len(last_ex) - 1;
1390                         if (leaf_from < from)
1391                                 leaf_from = from;
1392
1393                         if (leaf_to > to)
1394                                 leaf_to = to;
1395
1396                         ext4_ext_remove_leaf(inode_ref, path, leaf_from,
1397                                              leaf_to);
1398                         ext4_ext_drop_refs(inode_ref, path + i, 0);
1399                         i--;
1400                         continue;
1401                 }
1402
1403                 struct ext4_extent_header *eh;
1404                 eh = path[i].header;
1405                 if (ext4_ext_more_to_rm(path + i, to)) {
1406                         struct ext4_block bh = EXT4_BLOCK_ZERO();
1407                         if (path[i + 1].block.lb_id)
1408                                 ext4_ext_drop_refs(inode_ref, path + i + 1, 0);
1409
1410                         ret = read_extent_tree_block(
1411                             inode_ref, ext4_idx_pblock(path[i].index),
1412                             depth - i - 1, &bh, 0);
1413                         if (ret != EOK)
1414                                 goto out;
1415
1416                         path[i].p_block = ext4_idx_pblock(path[i].index);
1417                         path[i + 1].block = bh;
1418                         path[i + 1].header = ext_block_hdr(&bh);
1419                         path[i + 1].depth = depth - i - 1;
1420                         if (i + 1 == depth)
1421                                 path[i + 1].extent =
1422                                     EXT_FIRST_EXTENT(path[i + 1].header);
1423                         else
1424                                 path[i + 1].index =
1425                                     EXT_FIRST_INDEX(path[i + 1].header);
1426
1427                         i++;
1428                 } else {
1429                         if (i > 0) {
1430                                 if (!eh->entries_count)
1431                                         ret = ext4_ext_remove_idx(inode_ref,
1432                                                                   path, i - 1);
1433                                 else
1434                                         path[i - 1].index++;
1435                         }
1436
1437                         if (i)
1438                                 ext4_block_set(inode_ref->fs->bdev,
1439                                                &path[i].block);
1440
1441                         i--;
1442                 }
1443         }
1444
1445         /* TODO: flexible tree reduction should be here */
1446         if (path->header->entries_count == 0) {
1447                 /*
1448                  * truncate to zero freed all the tree,
1449                  * so we need to correct eh_depth
1450                  */
1451                 ext_inode_hdr(inode_ref->inode)->depth = 0;
1452                 ext_inode_hdr(inode_ref->inode)->max_entries_count =
1453                     to_le16(ext4_ext_space_root(inode_ref));
1454                 ret = ext4_ext_dirty(inode_ref, path);
1455         }
1456
1457 out:
1458         ext4_ext_drop_refs(inode_ref, path, 0);
1459         free(path);
1460         path = NULL;
1461         return ret;
1462 }
1463
1464 static int ext4_ext_split_extent_at(struct ext4_inode_ref *inode_ref,
1465                                     struct ext4_extent_path **ppath,
1466                                     ext4_lblk_t split, uint32_t split_flag)
1467 {
1468         struct ext4_extent *ex, newex;
1469         ext4_fsblk_t newblock;
1470         ext4_lblk_t ee_block;
1471         int32_t ee_len;
1472         int32_t depth = ext_depth(inode_ref->inode);
1473         int err = EOK;
1474
1475         ex = (*ppath)[depth].extent;
1476         ee_block = to_le32(ex->first_block);
1477         ee_len = ext4_ext_get_actual_len(ex);
1478         newblock = split - ee_block + ext4_ext_pblock(ex);
1479
1480         if (split == ee_block) {
1481                 /*
1482                  * case b: block @split is the block that the extent begins with
1483                  * then we just change the state of the extent, and splitting
1484                  * is not needed.
1485                  */
1486                 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1487                         ext4_ext_mark_unwritten(ex);
1488                 else
1489                         ext4_ext_mark_initialized(ex);
1490
1491                 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1492                 goto out;
1493         }
1494
1495         ex->block_count = to_le16(split - ee_block);
1496         if (split_flag & EXT4_EXT_MARK_UNWRIT1)
1497                 ext4_ext_mark_unwritten(ex);
1498
1499         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1500         if (err != EOK)
1501                 goto out;
1502
1503         newex.first_block = to_le32(split);
1504         newex.block_count = to_le16(ee_len - (split - ee_block));
1505         ext4_ext_store_pblock(&newex, newblock);
1506         if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1507                 ext4_ext_mark_unwritten(&newex);
1508         err = ext4_ext_insert_extent(inode_ref, ppath, &newex,
1509                                      EXT4_EXT_NO_COMBINE);
1510         if (err != EOK)
1511                 goto restore_extent_len;
1512
1513 out:
1514         return err;
1515 restore_extent_len:
1516         ex->block_count = to_le16(ee_len);
1517         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1518         return err;
1519 }
1520
1521 static int ext4_ext_convert_to_initialized(struct ext4_inode_ref *inode_ref,
1522                                            struct ext4_extent_path **ppath,
1523                                            ext4_lblk_t split, uint32_t blocks)
1524 {
1525         int32_t depth = ext_depth(inode_ref->inode), err = EOK;
1526         struct ext4_extent *ex = (*ppath)[depth].extent;
1527
1528         ext4_assert(to_le32(ex->first_block) <= split);
1529
1530         if (split + blocks ==
1531             to_le32(ex->first_block) + ext4_ext_get_actual_len(ex)) {
1532                 /* split and initialize right part */
1533                 err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1534                                                EXT4_EXT_MARK_UNWRIT1);
1535         } else if (to_le32(ex->first_block) == split) {
1536                 /* split and initialize left part */
1537                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1538                                                EXT4_EXT_MARK_UNWRIT2);
1539         } else {
1540                 /* split 1 extent to 3 and initialize the 2nd */
1541                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1542                                                EXT4_EXT_MARK_UNWRIT1 |
1543                                                    EXT4_EXT_MARK_UNWRIT2);
1544                 if (err == EOK) {
1545                         err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1546                                                        EXT4_EXT_MARK_UNWRIT1);
1547                 }
1548         }
1549
1550         return err;
1551 }
1552
1553 static ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_extent_path *path)
1554 {
1555         int32_t depth;
1556
1557         depth = path->depth;
1558
1559         if (depth == 0 && path->extent == NULL)
1560                 return EXT_MAX_BLOCKS;
1561
1562         while (depth >= 0) {
1563                 if (depth == path->depth) {
1564                         /* leaf */
1565                         if (path[depth].extent &&
1566                             path[depth].extent !=
1567                                 EXT_LAST_EXTENT(path[depth].header))
1568                                 return to_le32(
1569                                     path[depth].extent[1].first_block);
1570                 } else {
1571                         /* index */
1572                         if (path[depth].index !=
1573                             EXT_LAST_INDEX(path[depth].header))
1574                                 return to_le32(
1575                                     path[depth].index[1].first_block);
1576                 }
1577                 depth--;
1578         }
1579
1580         return EXT_MAX_BLOCKS;
1581 }
1582
1583 static int ext4_ext_zero_unwritten_range(struct ext4_inode_ref *inode_ref,
1584                                          ext4_fsblk_t block,
1585                                          uint32_t blocks_count)
1586 {
1587         int err = EOK;
1588         uint32_t i;
1589         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
1590         for (i = 0; i < blocks_count; i++) {
1591                 struct ext4_block bh = EXT4_BLOCK_ZERO();
1592                 err = ext4_trans_block_get_noread(inode_ref->fs->bdev, &bh,
1593                                                   block + i);
1594                 if (err != EOK)
1595                         break;
1596
1597                 memset(bh.data, 0, block_size);
1598                 ext4_trans_set_block_dirty(bh.buf);
1599                 err = ext4_block_set(inode_ref->fs->bdev, &bh);
1600                 if (err != EOK)
1601                         break;
1602         }
1603         return err;
1604 }
1605
1606 __unused static void print_path(struct ext4_extent_path *path)
1607 {
1608         int32_t i = path->depth;
1609         while (i >= 0) {
1610
1611                 ptrdiff_t a =
1612                     (path->extent)
1613                         ? (path->extent - EXT_FIRST_EXTENT(path->header))
1614                         : 0;
1615                 ptrdiff_t b =
1616                     (path->index)
1617                         ? (path->index - EXT_FIRST_INDEX(path->header))
1618                         : 0;
1619
1620                 (void)a;
1621                 (void)b;
1622                 ext4_dbg(DEBUG_EXTENT,
1623                          "depth %" PRId32 ", p_block: %" PRIu64 ","
1624                          "p_ext offset: %td, p_idx offset: %td\n",
1625                          i, path->p_block, a, b);
1626                 i--;
1627                 path++;
1628         }
1629 }
1630
1631 int ext4_extent_get_blocks(struct ext4_inode_ref *inode_ref, ext4_lblk_t iblock,
1632                            uint32_t max_blocks, ext4_fsblk_t *result,
1633                            bool create, uint32_t *blocks_count)
1634 {
1635         struct ext4_extent_path *path = NULL;
1636         struct ext4_extent newex, *ex;
1637         ext4_fsblk_t goal;
1638         int err = EOK;
1639         int32_t depth;
1640         uint32_t allocated = 0;
1641         ext4_lblk_t next;
1642         ext4_fsblk_t newblock;
1643
1644         if (result)
1645                 *result = 0;
1646
1647         if (blocks_count)
1648                 *blocks_count = 0;
1649
1650         /* find extent for this block */
1651         err = ext4_find_extent(inode_ref, iblock, &path, 0);
1652         if (err != EOK) {
1653                 path = NULL;
1654                 goto out2;
1655         }
1656
1657         depth = ext_depth(inode_ref->inode);
1658
1659         /*
1660          * consistent leaf must not be empty
1661          * this situations is possible, though, _during_ tree modification
1662          * this is why assert can't be put in ext4_ext_find_extent()
1663          */
1664         ex = path[depth].extent;
1665         if (ex) {
1666                 ext4_lblk_t ee_block = to_le32(ex->first_block);
1667                 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
1668                 uint16_t ee_len = ext4_ext_get_actual_len(ex);
1669                 /* if found exent covers block, simple return it */
1670                 if (IN_RANGE(iblock, ee_block, ee_len)) {
1671                         /* number of remain blocks in the extent */
1672                         allocated = ee_len - (iblock - ee_block);
1673
1674                         if (!ext4_ext_is_unwritten(ex)) {
1675                                 newblock = iblock - ee_block + ee_start;
1676                                 goto out;
1677                         }
1678
1679                         if (!create) {
1680                                 newblock = 0;
1681                                 goto out;
1682                         }
1683
1684                         uint32_t zero_range;
1685                         zero_range = allocated;
1686                         if (zero_range > max_blocks)
1687                                 zero_range = max_blocks;
1688
1689                         newblock = iblock - ee_block + ee_start;
1690                         err = ext4_ext_zero_unwritten_range(inode_ref, newblock,
1691                                                             zero_range);
1692                         if (err != EOK)
1693                                 goto out2;
1694
1695                         err = ext4_ext_convert_to_initialized(
1696                             inode_ref, &path, iblock, zero_range);
1697                         if (err != EOK)
1698                                 goto out2;
1699
1700                         goto out;
1701                 }
1702         }
1703
1704         /*
1705          * requested block isn't allocated yet
1706          * we couldn't try to create block if create flag is zero
1707          */
1708         if (!create) {
1709                 goto out2;
1710         }
1711
1712         /* find next allocated block so that we know how many
1713          * blocks we can allocate without ovelapping next extent */
1714         next = ext4_ext_next_allocated_block(path);
1715         allocated = next - iblock;
1716         if (allocated > max_blocks)
1717                 allocated = max_blocks;
1718
1719         /* allocate new block */
1720         goal = ext4_ext_find_goal(inode_ref, path, iblock);
1721         newblock = ext4_new_meta_blocks(inode_ref, goal, 0, &allocated, &err);
1722         if (!newblock)
1723                 goto out2;
1724
1725         /* try to insert new extent into found leaf and return */
1726         newex.first_block = to_le32(iblock);
1727         ext4_ext_store_pblock(&newex, newblock);
1728         newex.block_count = to_le16(allocated);
1729         err = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1730         if (err != EOK) {
1731                 /* free data blocks we just allocated */
1732                 ext4_ext_free_blocks(inode_ref, ext4_ext_pblock(&newex),
1733                                      to_le16(newex.block_count), 0);
1734                 goto out2;
1735         }
1736
1737         /* previous routine could use block we allocated */
1738         newblock = ext4_ext_pblock(&newex);
1739
1740 out:
1741         if (allocated > max_blocks)
1742                 allocated = max_blocks;
1743
1744         if (result)
1745                 *result = newblock;
1746
1747         if (blocks_count)
1748                 *blocks_count = allocated;
1749
1750 out2:
1751         if (path) {
1752                 ext4_ext_drop_refs(inode_ref, path, 0);
1753                 free(path);
1754         }
1755
1756         return err;
1757 }