Reconstruct source directory tree.
[lwext4.git] / src / ext4_extent.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * - Redistributions of source code must retain the above copyright
10  *   notice, this list of conditions and the following disclaimer.
11  * - Redistributions in binary form must reproduce the above copyright
12  *   notice, this list of conditions and the following disclaimer in the
13  *   documentation and/or other materials provided with the distribution.
14  * - The name of the author may not be used to endorse or promote products
15  *   derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #include "ext4_config.h"
30 #include "ext4_blockdev.h"
31 #include "ext4_fs.h"
32 #include "ext4_super.h"
33 #include "ext4_crc32.h"
34 #include "ext4_balloc.h"
35 #include "ext4_debug.h"
36
37 #include <stdlib.h>
38 #include <string.h>
39 #include <inttypes.h>
40 #include <stddef.h>
41
42 #include "ext4_extent.h"
43
44 /*
45  * used by extent splitting.
46  */
47 #define EXT4_EXT_MARK_UNWRIT1 0x02 /* mark first half unwritten */
48 #define EXT4_EXT_MARK_UNWRIT2 0x04 /* mark second half unwritten */
49 #define EXT4_EXT_DATA_VALID1 0x08  /* first half contains valid data */
50 #define EXT4_EXT_DATA_VALID2 0x10  /* second half contains valid data */
51 #define EXT4_EXT_NO_COMBINE 0x20   /* do not combine two extents */
52
53 static struct ext4_extent_tail *
54 find_ext4_extent_tail(struct ext4_extent_header *eh)
55 {
56         return (struct ext4_extent_tail *)(((char *)eh) +
57                                            EXT4_EXTENT_TAIL_OFFSET(eh));
58 }
59
60 static struct ext4_extent_header *ext_inode_hdr(struct ext4_inode *inode)
61 {
62         return (struct ext4_extent_header *)inode->blocks;
63 }
64
65 static struct ext4_extent_header *ext_block_hdr(struct ext4_block *block)
66 {
67         return (struct ext4_extent_header *)block->data;
68 }
69
70 static uint16_t ext_depth(struct ext4_inode *inode)
71 {
72         return to_le16(ext_inode_hdr(inode)->depth);
73 }
74
75 static uint16_t ext4_ext_get_actual_len(struct ext4_extent *ext)
76 {
77         return (to_le16(ext->block_count) <= EXT_INIT_MAX_LEN
78                     ? to_le16(ext->block_count)
79                     : (to_le16(ext->block_count) - EXT_INIT_MAX_LEN));
80 }
81
82 static void ext4_ext_mark_initialized(struct ext4_extent *ext)
83 {
84         ext->block_count = to_le16(ext4_ext_get_actual_len(ext));
85 }
86
87 static void ext4_ext_mark_unwritten(struct ext4_extent *ext)
88 {
89         ext->block_count |= to_le16(EXT_INIT_MAX_LEN);
90 }
91
92 static int ext4_ext_is_unwritten(struct ext4_extent *ext)
93 {
94         /* Extent with ee_len of 0x8000 is treated as an initialized extent */
95         return (to_le16(ext->block_count) > EXT_INIT_MAX_LEN);
96 }
97
98 /*
99  * ext4_ext_pblock:
100  * combine low and high parts of physical block number into ext4_fsblk_t
101  */
102 static ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
103 {
104         ext4_fsblk_t block;
105
106         block = to_le32(ex->start_lo);
107         block |= ((ext4_fsblk_t)to_le16(ex->start_hi) << 31) << 1;
108         return block;
109 }
110
111 /*
112  * ext4_idx_pblock:
113  * combine low and high parts of a leaf physical block number into ext4_fsblk_t
114  */
115 static ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_index *ix)
116 {
117         ext4_fsblk_t block;
118
119         block = to_le32(ix->leaf_lo);
120         block |= ((ext4_fsblk_t)to_le16(ix->leaf_hi) << 31) << 1;
121         return block;
122 }
123
124 /*
125  * ext4_ext_store_pblock:
126  * stores a large physical block number into an extent struct,
127  * breaking it into parts
128  */
129 static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
130 {
131         ex->start_lo = to_le32((uint32_t)(pb & 0xffffffff));
132         ex->start_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
133 }
134
135 /*
136  * ext4_idx_store_pblock:
137  * stores a large physical block number into an index struct,
138  * breaking it into parts
139  */
140 static void ext4_idx_store_pblock(struct ext4_extent_index *ix, ext4_fsblk_t pb)
141 {
142         ix->leaf_lo = to_le32((uint32_t)(pb & 0xffffffff));
143         ix->leaf_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
144 }
145
146 static int ext4_allocate_single_block(struct ext4_inode_ref *inode_ref,
147                                       ext4_fsblk_t goal,
148                                       ext4_fsblk_t *blockp)
149 {
150         return ext4_balloc_alloc_block(inode_ref, goal, blockp);
151 }
152
153 static ext4_fsblk_t ext4_new_meta_blocks(struct ext4_inode_ref *inode_ref,
154                                          ext4_fsblk_t goal,
155                                          uint32_t flags __unused,
156                                          uint32_t *count, int *errp)
157 {
158         ext4_fsblk_t block = 0;
159
160         *errp = ext4_allocate_single_block(inode_ref, goal, &block);
161         if (count)
162                 *count = 1;
163         return block;
164 }
165
166 static void ext4_ext_free_blocks(struct ext4_inode_ref *inode_ref,
167                                  ext4_fsblk_t block, uint32_t count,
168                                  uint32_t flags __unused)
169 {
170         ext4_balloc_free_blocks(inode_ref, block, count);
171 }
172
173 static uint16_t ext4_ext_space_block(struct ext4_inode_ref *inode_ref)
174 {
175         uint16_t size;
176         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
177
178         size = (block_size - sizeof(struct ext4_extent_header)) /
179                sizeof(struct ext4_extent);
180 #ifdef AGGRESSIVE_TEST
181         if (size > 6)
182                 size = 6;
183 #endif
184         return size;
185 }
186
187 static uint16_t ext4_ext_space_block_idx(struct ext4_inode_ref *inode_ref)
188 {
189         uint16_t size;
190         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
191
192         size = (block_size - sizeof(struct ext4_extent_header)) /
193                sizeof(struct ext4_extent_index);
194 #ifdef AGGRESSIVE_TEST
195         if (size > 5)
196                 size = 5;
197 #endif
198         return size;
199 }
200
201 static uint16_t ext4_ext_space_root(struct ext4_inode_ref *inode_ref)
202 {
203         uint16_t size;
204
205         size = sizeof(inode_ref->inode->blocks);
206         size -= sizeof(struct ext4_extent_header);
207         size /= sizeof(struct ext4_extent);
208 #ifdef AGGRESSIVE_TEST
209         if (size > 3)
210                 size = 3;
211 #endif
212         return size;
213 }
214
215 static uint16_t ext4_ext_space_root_idx(struct ext4_inode_ref *inode_ref)
216 {
217         uint16_t size;
218
219         size = sizeof(inode_ref->inode->blocks);
220         size -= sizeof(struct ext4_extent_header);
221         size /= sizeof(struct ext4_extent_index);
222 #ifdef AGGRESSIVE_TEST
223         if (size > 4)
224                 size = 4;
225 #endif
226         return size;
227 }
228
229 static uint16_t ext4_ext_max_entries(struct ext4_inode_ref *inode_ref,
230                                    uint32_t depth)
231 {
232         uint16_t max;
233
234         if (depth == ext_depth(inode_ref->inode)) {
235                 if (depth == 0)
236                         max = ext4_ext_space_root(inode_ref);
237                 else
238                         max = ext4_ext_space_root_idx(inode_ref);
239         } else {
240                 if (depth == 0)
241                         max = ext4_ext_space_block(inode_ref);
242                 else
243                         max = ext4_ext_space_block_idx(inode_ref);
244         }
245
246         return max;
247 }
248
249 static ext4_fsblk_t ext4_ext_find_goal(struct ext4_inode_ref *inode_ref,
250                                        struct ext4_extent_path *path,
251                                        ext4_lblk_t block)
252 {
253         if (path) {
254                 uint32_t depth = path->depth;
255                 struct ext4_extent *ex;
256
257                 /*
258                  * Try to predict block placement assuming that we are
259                  * filling in a file which will eventually be
260                  * non-sparse --- i.e., in the case of libbfd writing
261                  * an ELF object sections out-of-order but in a way
262                  * the eventually results in a contiguous object or
263                  * executable file, or some database extending a table
264                  * space file.  However, this is actually somewhat
265                  * non-ideal if we are writing a sparse file such as
266                  * qemu or KVM writing a raw image file that is going
267                  * to stay fairly sparse, since it will end up
268                  * fragmenting the file system's free space.  Maybe we
269                  * should have some hueristics or some way to allow
270                  * userspace to pass a hint to file system,
271                  * especially if the latter case turns out to be
272                  * common.
273                  */
274                 ex = path[depth].extent;
275                 if (ex) {
276                         ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
277                         ext4_lblk_t ext_block = to_le32(ex->first_block);
278
279                         if (block > ext_block)
280                                 return ext_pblk + (block - ext_block);
281                         else
282                                 return ext_pblk - (ext_block - block);
283                 }
284
285                 /* it looks like index is empty;
286                  * try to find starting block from index itself */
287                 if (path[depth].block.lb_id)
288                         return path[depth].block.lb_id;
289         }
290
291         /* OK. use inode's group */
292         return ext4_fs_inode_to_goal_block(inode_ref);
293 }
294
295 /*
296  * Allocation for a meta data block
297  */
298 static ext4_fsblk_t ext4_ext_new_meta_block(struct ext4_inode_ref *inode_ref,
299                                             struct ext4_extent_path *path,
300                                             struct ext4_extent *ex, int *err,
301                                             uint32_t flags)
302 {
303         ext4_fsblk_t goal, newblock;
304
305         goal = ext4_ext_find_goal(inode_ref, path, to_le32(ex->first_block));
306         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, err);
307         return newblock;
308 }
309
310 #if CONFIG_META_CSUM_ENABLE
311 static uint32_t ext4_ext_block_csum(struct ext4_inode_ref *inode_ref,
312                                     struct ext4_extent_header *eh)
313 {
314         uint32_t checksum = 0;
315         struct ext4_sblock *sb = &inode_ref->fs->sb;
316
317         if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
318                 uint32_t ino_index = to_le32(inode_ref->index);
319                 uint32_t ino_gen =
320                         to_le32(ext4_inode_get_generation(inode_ref->inode));
321                 /* First calculate crc32 checksum against fs uuid */
322                 checksum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid,
323                                 sizeof(sb->uuid));
324                 /* Then calculate crc32 checksum against inode number
325                  * and inode generation */
326                 checksum = ext4_crc32c(checksum, &ino_index,
327                                      sizeof(ino_index));
328                 checksum = ext4_crc32c(checksum, &ino_gen,
329                                      sizeof(ino_gen));
330                 /* Finally calculate crc32 checksum against 
331                  * the entire extent block up to the checksum field */
332                 checksum = ext4_crc32c(checksum, eh,
333                                 EXT4_EXTENT_TAIL_OFFSET(eh));
334         }
335         return checksum;
336 }
337 #else
338 #define ext4_ext_block_csum(...) 0
339 #endif
340
341 static void ext4_extent_block_csum_set(struct ext4_inode_ref *inode_ref __unused,
342                                        struct ext4_extent_header *eh)
343 {
344         struct ext4_extent_tail *tail;
345
346         tail = find_ext4_extent_tail(eh);
347         tail->et_checksum = to_le32(ext4_ext_block_csum(inode_ref, eh));
348 }
349
350 static int ext4_ext_dirty(struct ext4_inode_ref *inode_ref,
351                           struct ext4_extent_path *path)
352 {
353         if (path->block.lb_id)
354                 ext4_trans_set_block_dirty(path->block.buf);
355         else
356                 inode_ref->dirty = true;
357
358         return EOK;
359 }
360
361 static void ext4_ext_drop_refs(struct ext4_inode_ref *inode_ref,
362                                struct ext4_extent_path *path, bool keep_other)
363 {
364         int32_t depth, i;
365
366         if (!path)
367                 return;
368         if (keep_other)
369                 depth = 0;
370         else
371                 depth = path->depth;
372
373         for (i = 0; i <= depth; i++, path++) {
374                 if (path->block.lb_id) {
375                         if (ext4_bcache_test_flag(path->block.buf, BC_DIRTY))
376                                 ext4_extent_block_csum_set(inode_ref,
377                                                 path->header);
378
379                         ext4_block_set(inode_ref->fs->bdev, &path->block);
380                 }
381         }
382 }
383
384 /*
385  * Check that whether the basic information inside the extent header
386  * is correct or not.
387  */
388 static int ext4_ext_check(struct ext4_inode_ref *inode_ref,
389                           struct ext4_extent_header *eh, uint16_t depth,
390                           ext4_fsblk_t pblk __unused)
391 {
392         struct ext4_extent_tail *tail;
393         struct ext4_sblock *sb = &inode_ref->fs->sb;
394         const char *error_msg;
395         (void)error_msg;
396
397         if (to_le16(eh->magic) != EXT4_EXTENT_MAGIC) {
398                 error_msg = "invalid magic";
399                 goto corrupted;
400         }
401         if (to_le16(eh->depth) != depth) {
402                 error_msg = "unexpected eh_depth";
403                 goto corrupted;
404         }
405         if (eh->max_entries_count == 0) {
406                 error_msg = "invalid eh_max";
407                 goto corrupted;
408         }
409         if (to_le16(eh->entries_count) > to_le16(eh->max_entries_count)) {
410                 error_msg = "invalid eh_entries";
411                 goto corrupted;
412         }
413
414         tail = find_ext4_extent_tail(eh);
415         if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
416                 if (tail->et_checksum != to_le32(ext4_ext_block_csum(inode_ref, eh))) {
417                         ext4_dbg(DEBUG_EXTENT,
418                                  DBG_WARN "Extent block checksum failed."
419                                  "Blocknr: %" PRIu64"\n",
420                                  pblk);
421
422                 }
423         }
424
425         return EOK;
426
427 corrupted:
428         ext4_dbg(DEBUG_EXTENT, "Bad extents B+ tree block: %s. "
429                                "Blocknr: %" PRId64 "\n",
430                  error_msg, pblk);
431         return EIO;
432 }
433
434 static int read_extent_tree_block(struct ext4_inode_ref *inode_ref,
435                                   ext4_fsblk_t pblk, int32_t depth,
436                                   struct ext4_block *bh,
437                                   uint32_t flags __unused)
438 {
439         int err;
440
441         err = ext4_trans_block_get(inode_ref->fs->bdev, bh, pblk);
442         if (err != EOK)
443                 goto errout;
444
445         err = ext4_ext_check(inode_ref, ext_block_hdr(bh), depth, pblk);
446         if (err != EOK)
447                 goto errout;
448
449         return EOK;
450 errout:
451         if (bh->lb_id)
452                 ext4_block_set(inode_ref->fs->bdev, bh);
453
454         return err;
455 }
456
457 /*
458  * ext4_ext_binsearch_idx:
459  * binary search for the closest index of the given block
460  * the header must be checked before calling this
461  */
462 static void ext4_ext_binsearch_idx(struct ext4_extent_path *path,
463                                    ext4_lblk_t block)
464 {
465         struct ext4_extent_header *eh = path->header;
466         struct ext4_extent_index *r, *l, *m;
467
468         l = EXT_FIRST_INDEX(eh) + 1;
469         r = EXT_LAST_INDEX(eh);
470         while (l <= r) {
471                 m = l + (r - l) / 2;
472                 if (block < to_le32(m->first_block))
473                         r = m - 1;
474                 else
475                         l = m + 1;
476         }
477
478         path->index = l - 1;
479 }
480
481 /*
482  * ext4_ext_binsearch:
483  * binary search for closest extent of the given block
484  * the header must be checked before calling this
485  */
486 static void ext4_ext_binsearch(struct ext4_extent_path *path, ext4_lblk_t block)
487 {
488         struct ext4_extent_header *eh = path->header;
489         struct ext4_extent *r, *l, *m;
490
491         if (eh->entries_count == 0) {
492                 /*
493                  * this leaf is empty:
494                  * we get such a leaf in split/add case
495                  */
496                 return;
497         }
498
499         l = EXT_FIRST_EXTENT(eh) + 1;
500         r = EXT_LAST_EXTENT(eh);
501
502         while (l <= r) {
503                 m = l + (r - l) / 2;
504                 if (block < to_le32(m->first_block))
505                         r = m - 1;
506                 else
507                         l = m + 1;
508         }
509
510         path->extent = l - 1;
511 }
512
513 static int ext4_find_extent(struct ext4_inode_ref *inode_ref, ext4_lblk_t block,
514                             struct ext4_extent_path **orig_path, uint32_t flags)
515 {
516         struct ext4_extent_header *eh;
517         struct ext4_block bh = EXT4_BLOCK_ZERO();
518         ext4_fsblk_t buf_block = 0;
519         struct ext4_extent_path *path = *orig_path;
520         int32_t depth, ppos = 0;
521         int32_t i;
522         int ret;
523
524         eh = ext_inode_hdr(inode_ref->inode);
525         depth = ext_depth(inode_ref->inode);
526
527         if (path) {
528                 ext4_ext_drop_refs(inode_ref, path, 0);
529                 if (depth > path[0].maxdepth) {
530                         free(path);
531                         *orig_path = path = NULL;
532                 }
533         }
534         if (!path) {
535                 int32_t path_depth = depth + 1;
536                 /* account possible depth increase */
537                 path = calloc(1, sizeof(struct ext4_extent_path) *
538                                      (path_depth + 1));
539                 if (!path)
540                         return ENOMEM;
541                 path[0].maxdepth = path_depth;
542         }
543         path[0].header = eh;
544         path[0].block = bh;
545
546         i = depth;
547         /* walk through the tree */
548         while (i) {
549                 ext4_ext_binsearch_idx(path + ppos, block);
550                 path[ppos].p_block = ext4_idx_pblock(path[ppos].index);
551                 path[ppos].depth = i;
552                 path[ppos].extent = NULL;
553                 buf_block = path[ppos].p_block;
554
555                 i--;
556                 ppos++;
557                 if (!path[ppos].block.lb_id ||
558                     path[ppos].block.lb_id != buf_block) {
559                         ret = read_extent_tree_block(inode_ref, buf_block, i,
560                                                      &bh, flags);
561                         if (ret != EOK) {
562                                 goto err;
563                         }
564                         if (ppos > depth) {
565                                 ext4_block_set(inode_ref->fs->bdev, &bh);
566                                 ret = EIO;
567                                 goto err;
568                         }
569
570                         eh = ext_block_hdr(&bh);
571                         path[ppos].block = bh;
572                         path[ppos].header = eh;
573                 }
574         }
575
576         path[ppos].depth = i;
577         path[ppos].extent = NULL;
578         path[ppos].index = NULL;
579
580         /* find extent */
581         ext4_ext_binsearch(path + ppos, block);
582         /* if not an empty leaf */
583         if (path[ppos].extent)
584                 path[ppos].p_block = ext4_ext_pblock(path[ppos].extent);
585
586         *orig_path = path;
587
588         ret = EOK;
589         return ret;
590
591 err:
592         ext4_ext_drop_refs(inode_ref, path, 0);
593         free(path);
594         if (orig_path)
595                 *orig_path = NULL;
596         return ret;
597 }
598
599 static void ext4_ext_init_header(struct ext4_inode_ref *inode_ref,
600                                  struct ext4_extent_header *eh, int32_t depth)
601 {
602         eh->entries_count = 0;
603         eh->max_entries_count = to_le16(ext4_ext_max_entries(inode_ref, depth));
604         eh->magic = to_le16(EXT4_EXTENT_MAGIC);
605         eh->depth = depth;
606 }
607
608 /*
609  * Be cautious, the buffer_head returned is not yet mark dirtied. */
610 static int ext4_ext_split_node(struct ext4_inode_ref *inode_ref,
611                                struct ext4_extent_path *path, int32_t at,
612                                struct ext4_extent *newext,
613                                ext4_fsblk_t *sibling, struct ext4_block *new_bh)
614 {
615         int ret;
616         ext4_fsblk_t newblock;
617         struct ext4_block bh = EXT4_BLOCK_ZERO();
618         int32_t depth = ext_depth(inode_ref->inode);
619
620         ext4_assert(sibling);
621
622         /* FIXME: currently we split at the point after the current extent. */
623         newblock = ext4_ext_new_meta_block(inode_ref, path, newext, &ret, 0);
624         if (ret)
625                 goto cleanup;
626
627         /*  For write access.# */
628         ret = ext4_trans_block_get_noread(inode_ref->fs->bdev, &bh, newblock);
629         if (ret != EOK)
630                 goto cleanup;
631
632         if (at == depth) {
633                 /* start copy from next extent */
634                 ptrdiff_t m = EXT_MAX_EXTENT(path[at].header) - path[at].extent;
635                 struct ext4_extent_header *neh;
636                 neh = ext_block_hdr(&bh);
637                 ext4_ext_init_header(inode_ref, neh, 0);
638                 if (m) {
639                         struct ext4_extent *ex;
640                         ex = EXT_FIRST_EXTENT(neh);
641                         memmove(ex, path[at].extent + 1,
642                                 sizeof(struct ext4_extent) * m);
643                         neh->entries_count =
644                             to_le16(to_le16(neh->entries_count) + m);
645                         path[at].header->entries_count = to_le16(
646                             to_le16(path[at].header->entries_count) - m);
647                         ret = ext4_ext_dirty(inode_ref, path + at);
648                         if (ret)
649                                 goto cleanup;
650                 }
651         } else {
652                 ptrdiff_t m = EXT_MAX_INDEX(path[at].header) - path[at].index;
653                 struct ext4_extent_header *neh;
654                 neh = ext_block_hdr(&bh);
655                 ext4_ext_init_header(inode_ref, neh, depth - at);
656                 if (m) {
657                         struct ext4_extent_index *ix;
658                         ix = EXT_FIRST_INDEX(neh);
659                         memmove(ix, path[at].index + 1,
660                                 sizeof(struct ext4_extent) * m);
661                         neh->entries_count =
662                             to_le16(to_le16(neh->entries_count) + m);
663                         path[at].header->entries_count = to_le16(
664                             to_le16(path[at].header->entries_count) - m);
665                         ret = ext4_ext_dirty(inode_ref, path + at);
666                         if (ret)
667                                 goto cleanup;
668                 }
669         }
670 cleanup:
671         if (ret) {
672                 if (bh.lb_id) {
673                         ext4_block_set(inode_ref->fs->bdev, &bh);
674                 }
675                 if (newblock)
676                         ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
677
678                 newblock = 0;
679         }
680         *sibling = newblock;
681         *new_bh = bh;
682         return ret;
683 }
684
685 static ext4_lblk_t ext4_ext_block_index(struct ext4_extent_header *eh)
686 {
687         if (eh->depth)
688                 return to_le32(EXT_FIRST_INDEX(eh)->first_block);
689
690         return to_le32(EXT_FIRST_EXTENT(eh)->first_block);
691 }
692
693 struct ext_split_trans {
694         ext4_fsblk_t ptr;
695         struct ext4_extent_path path;
696         int switch_to;
697 };
698
699 static int ext4_ext_insert_index(struct ext4_inode_ref *inode_ref,
700                                  struct ext4_extent_path *path,
701                                  int32_t at,
702                                  struct ext4_extent *newext,
703                                  ext4_lblk_t insert_index,
704                                  ext4_fsblk_t insert_block,
705                                  struct ext_split_trans *spt,
706                                  bool *need_grow)
707 {
708         struct ext4_extent_index *ix;
709         struct ext4_extent_path *curp = path + at;
710         struct ext4_block bh = EXT4_BLOCK_ZERO();
711         int32_t len;
712         int err;
713         struct ext4_extent_header *eh;
714
715         *need_grow = false;
716
717         if (curp->index && insert_index == to_le32(curp->index->first_block))
718                 return EIO;
719
720         if (to_le16(curp->header->entries_count) ==
721             to_le16(curp->header->max_entries_count)) {
722                 if (at) {
723                         struct ext4_extent_header *neh;
724                         err = ext4_ext_split_node(inode_ref, path, at, newext,
725                                                   &spt->ptr, &bh);
726                         if (err != EOK)
727                                 goto out;
728
729                         neh = ext_block_hdr(&bh);
730                         if (insert_index > to_le32(curp->index->first_block)) {
731                                 /* Make decision which node should be used to
732                                  * insert the index.*/
733                                 if (to_le16(neh->entries_count) >
734                                     to_le16(curp->header->entries_count)) {
735                                         eh = curp->header;
736                                         /* insert after */
737                                         ix = EXT_LAST_INDEX(eh) + 1;
738                                 } else {
739                                         eh = neh;
740                                         ix = EXT_FIRST_INDEX(eh);
741                                 }
742                         } else {
743                                 eh = curp->header;
744                                 /* insert before */
745                                 ix = EXT_LAST_INDEX(eh);
746                         }
747                 } else {
748                         err = EOK;
749                         *need_grow = true;
750                         goto out;
751                 }
752         } else {
753                 eh = curp->header;
754                 if (curp->index == NULL) {
755                         ix = EXT_FIRST_INDEX(eh);
756                         curp->index = ix;
757                 } else if (insert_index > to_le32(curp->index->first_block)) {
758                         /* insert after */
759                         ix = curp->index + 1;
760                 } else {
761                         /* insert before */
762                         ix = curp->index;
763                 }
764         }
765
766         len = EXT_LAST_INDEX(eh) - ix + 1;
767         ext4_assert(len >= 0);
768         if (len > 0)
769                 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_index));
770
771         if (ix > EXT_MAX_INDEX(eh)) {
772                 err = EIO;
773                 goto out;
774         }
775
776         ix->first_block = to_le32(insert_index);
777         ext4_idx_store_pblock(ix, insert_block);
778         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
779
780         if (ix > EXT_LAST_INDEX(eh)) {
781                 err = EIO;
782                 goto out;
783         }
784
785         if (eh == curp->header)
786                 err = ext4_ext_dirty(inode_ref, curp);
787         else
788                 err = EOK;
789
790 out:
791         if (err != EOK || *need_grow) {
792                 if (bh.lb_id)
793                         ext4_block_set(inode_ref->fs->bdev, &bh);
794
795                 spt->ptr = 0;
796         } else if (bh.lb_id) {
797                 /* If we got a sibling leaf. */
798                 ext4_extent_block_csum_set(inode_ref, ext_block_hdr(&bh));
799                 ext4_trans_set_block_dirty(bh.buf);
800
801                 spt->path.p_block = ext4_idx_pblock(ix);
802                 spt->path.depth = to_le16(eh->depth);
803                 spt->path.maxdepth = 0;
804                 spt->path.extent = NULL;
805                 spt->path.index = ix;
806                 spt->path.header = eh;
807                 spt->path.block = bh;
808
809                 /*
810                  * If newext->ee_block can be included into the
811                  * right sub-tree.
812                  */
813                 if (to_le32(newext->first_block) >=
814                     ext4_ext_block_index(ext_block_hdr(&bh)))
815                         spt->switch_to = 1;
816                 else {
817                         curp->index = ix;
818                         curp->p_block = ext4_idx_pblock(ix);
819                 }
820
821         } else {
822                 spt->ptr = 0;
823                 curp->index = ix;
824                 curp->p_block = ext4_idx_pblock(ix);
825         }
826         return err;
827 }
828
829 /*
830  * ext4_ext_correct_indexes:
831  * if leaf gets modified and modified extent is first in the leaf,
832  * then we have to correct all indexes above.
833  */
834 static int ext4_ext_correct_indexes(struct ext4_inode_ref *inode_ref,
835                                     struct ext4_extent_path *path)
836 {
837         struct ext4_extent_header *eh;
838         int32_t depth = ext_depth(inode_ref->inode);
839         struct ext4_extent *ex;
840         uint32_t border;
841         int32_t k;
842         int err = EOK;
843
844         eh = path[depth].header;
845         ex = path[depth].extent;
846
847         if (ex == NULL || eh == NULL)
848                 return EIO;
849
850         if (depth == 0) {
851                 /* there is no tree at all */
852                 return EOK;
853         }
854
855         if (ex != EXT_FIRST_EXTENT(eh)) {
856                 /* we correct tree if first leaf got modified only */
857                 return EOK;
858         }
859
860         k = depth - 1;
861         border = path[depth].extent->first_block;
862         path[k].index->first_block = border;
863         err = ext4_ext_dirty(inode_ref, path + k);
864         if (err != EOK)
865                 return err;
866
867         while (k--) {
868                 /* change all left-side indexes */
869                 if (path[k + 1].index != EXT_FIRST_INDEX(path[k + 1].header))
870                         break;
871                 path[k].index->first_block = border;
872                 err = ext4_ext_dirty(inode_ref, path + k);
873                 if (err != EOK)
874                         break;
875         }
876
877         return err;
878 }
879
880 static bool ext4_ext_can_prepend(struct ext4_extent *ex1,
881                                  struct ext4_extent *ex2)
882 {
883         if (ext4_ext_pblock(ex2) + ext4_ext_get_actual_len(ex2) !=
884             ext4_ext_pblock(ex1))
885                 return false;
886
887 #ifdef AGGRESSIVE_TEST
888         if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
889                 return 0;
890 #else
891         if (ext4_ext_is_unwritten(ex1)) {
892                 if (ext4_ext_get_actual_len(ex1) +
893                         ext4_ext_get_actual_len(ex2) >
894                     EXT_UNWRITTEN_MAX_LEN)
895                         return false;
896         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
897                    EXT_INIT_MAX_LEN)
898                 return false;
899 #endif
900
901         if (to_le32(ex2->first_block) + ext4_ext_get_actual_len(ex2) !=
902             to_le32(ex1->first_block))
903                 return false;
904
905         return true;
906 }
907
908 static bool ext4_ext_can_append(struct ext4_extent *ex1,
909                                 struct ext4_extent *ex2)
910 {
911         if (ext4_ext_pblock(ex1) + ext4_ext_get_actual_len(ex1) !=
912             ext4_ext_pblock(ex2))
913                 return false;
914
915 #ifdef AGGRESSIVE_TEST
916         if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
917                 return 0;
918 #else
919         if (ext4_ext_is_unwritten(ex1)) {
920                 if (ext4_ext_get_actual_len(ex1) +
921                         ext4_ext_get_actual_len(ex2) >
922                     EXT_UNWRITTEN_MAX_LEN)
923                         return false;
924         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
925                    EXT_INIT_MAX_LEN)
926                 return false;
927 #endif
928
929         if (to_le32(ex1->first_block) + ext4_ext_get_actual_len(ex1) !=
930             to_le32(ex2->first_block))
931                 return false;
932
933         return true;
934 }
935
936 static int ext4_ext_insert_leaf(struct ext4_inode_ref *inode_ref,
937                                 struct ext4_extent_path *path,
938                                 int32_t at,
939                                 struct ext4_extent *newext,
940                                 struct ext_split_trans *spt,
941                                 uint32_t flags,
942                                 bool *need_grow)
943 {
944         struct ext4_extent_path *curp = path + at;
945         struct ext4_extent *ex = curp->extent;
946         struct ext4_block bh = EXT4_BLOCK_ZERO();
947         int32_t len;
948         int err = EOK;
949         int unwritten;
950         struct ext4_extent_header *eh = NULL;
951
952         *need_grow = false;
953
954         if (curp->extent &&
955             to_le32(newext->first_block) == to_le32(curp->extent->first_block))
956                 return EIO;
957
958         if (!(flags & EXT4_EXT_NO_COMBINE)) {
959                 if (curp->extent && ext4_ext_can_append(curp->extent, newext)) {
960                         unwritten = ext4_ext_is_unwritten(curp->extent);
961                         curp->extent->block_count =
962                             to_le16(ext4_ext_get_actual_len(curp->extent) +
963                                     ext4_ext_get_actual_len(newext));
964                         if (unwritten)
965                                 ext4_ext_mark_unwritten(curp->extent);
966                         err = ext4_ext_dirty(inode_ref, curp);
967                         goto out;
968                 }
969
970                 if (curp->extent &&
971                     ext4_ext_can_prepend(curp->extent, newext)) {
972                         unwritten = ext4_ext_is_unwritten(curp->extent);
973                         curp->extent->first_block = newext->first_block;
974                         curp->extent->block_count =
975                             to_le16(ext4_ext_get_actual_len(curp->extent) +
976                                     ext4_ext_get_actual_len(newext));
977                         if (unwritten)
978                                 ext4_ext_mark_unwritten(curp->extent);
979                         err = ext4_ext_dirty(inode_ref, curp);
980                         goto out;
981                 }
982         }
983
984         if (to_le16(curp->header->entries_count) ==
985             to_le16(curp->header->max_entries_count)) {
986                 if (at) {
987                         struct ext4_extent_header *neh;
988                         err = ext4_ext_split_node(inode_ref, path, at, newext,
989                                                   &spt->ptr, &bh);
990                         if (err != EOK)
991                                 goto out;
992
993                         neh = ext_block_hdr(&bh);
994                         if (to_le32(newext->first_block) >
995                             to_le32(curp->extent->first_block)) {
996                                 if (to_le16(neh->entries_count) >
997                                     to_le16(curp->header->entries_count)) {
998                                         eh = curp->header;
999                                         /* insert after */
1000                                         ex = EXT_LAST_EXTENT(eh) + 1;
1001                                 } else {
1002                                         eh = neh;
1003                                         ex = EXT_FIRST_EXTENT(eh);
1004                                 }
1005                         } else {
1006                                 eh = curp->header;
1007                                 /* insert before */
1008                                 ex = EXT_LAST_EXTENT(eh);
1009                         }
1010                 } else {
1011                         err = EOK;
1012                         *need_grow = true;
1013                         goto out;
1014                 }
1015         } else {
1016                 eh = curp->header;
1017                 if (curp->extent == NULL) {
1018                         ex = EXT_FIRST_EXTENT(eh);
1019                         curp->extent = ex;
1020                 } else if (to_le32(newext->first_block) >
1021                            to_le32(curp->extent->first_block)) {
1022                         /* insert after */
1023                         ex = curp->extent + 1;
1024                 } else {
1025                         /* insert before */
1026                         ex = curp->extent;
1027                 }
1028         }
1029
1030         len = EXT_LAST_EXTENT(eh) - ex + 1;
1031         ext4_assert(len >= 0);
1032         if (len > 0)
1033                 memmove(ex + 1, ex, len * sizeof(struct ext4_extent));
1034
1035         if (ex > EXT_MAX_EXTENT(eh)) {
1036                 err = EIO;
1037                 goto out;
1038         }
1039
1040         ex->first_block = newext->first_block;
1041         ex->block_count = newext->block_count;
1042         ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
1043         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
1044
1045         if (ex > EXT_LAST_EXTENT(eh)) {
1046                 err = EIO;
1047                 goto out;
1048         }
1049
1050         if (eh == curp->header) {
1051                 err = ext4_ext_correct_indexes(inode_ref, path);
1052                 if (err != EOK)
1053                         goto out;
1054                 err = ext4_ext_dirty(inode_ref, curp);
1055         } else
1056                 err = EOK;
1057
1058 out:
1059         if (err != EOK || *need_grow) {
1060                 if (bh.lb_id)
1061                         ext4_block_set(inode_ref->fs->bdev, &bh);
1062
1063                 spt->ptr = 0;
1064         } else if (bh.lb_id) {
1065                 /* If we got a sibling leaf. */
1066                 ext4_extent_block_csum_set(inode_ref, ext_block_hdr(&bh));
1067                 ext4_trans_set_block_dirty(bh.buf);
1068
1069                 spt->path.p_block = ext4_ext_pblock(ex);
1070                 spt->path.depth = to_le16(eh->depth);
1071                 spt->path.maxdepth = 0;
1072                 spt->path.extent = ex;
1073                 spt->path.index = NULL;
1074                 spt->path.header = eh;
1075                 spt->path.block = bh;
1076
1077                 /*
1078                  * If newext->ee_block can be included into the
1079                  * right sub-tree.
1080                  */
1081                 if (to_le32(newext->first_block) >=
1082                     ext4_ext_block_index(ext_block_hdr(&bh)))
1083                         spt->switch_to = 1;
1084                 else {
1085                         curp->extent = ex;
1086                         curp->p_block = ext4_ext_pblock(ex);
1087                 }
1088
1089         } else {
1090                 spt->ptr = 0;
1091                 curp->extent = ex;
1092                 curp->p_block = ext4_ext_pblock(ex);
1093         }
1094
1095         return err;
1096 }
1097
1098 /*
1099  * ext4_ext_grow_indepth:
1100  * implements tree growing procedure:
1101  * - allocates new block
1102  * - moves top-level data (index block or leaf) into the new block
1103  * - initializes new top-level, creating index that points to the
1104  *   just created block
1105  */
1106 static int ext4_ext_grow_indepth(struct ext4_inode_ref *inode_ref,
1107                                  uint32_t flags)
1108 {
1109         struct ext4_extent_header *neh;
1110         struct ext4_block bh = EXT4_BLOCK_ZERO();
1111         ext4_fsblk_t newblock, goal = 0;
1112         int err = EOK;
1113
1114         /* Try to prepend new index to old one */
1115         if (ext_depth(inode_ref->inode))
1116                 goal = ext4_idx_pblock(
1117                     EXT_FIRST_INDEX(ext_inode_hdr(inode_ref->inode)));
1118         else
1119                 goal = ext4_fs_inode_to_goal_block(inode_ref);
1120
1121         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, &err);
1122         if (newblock == 0)
1123                 return err;
1124
1125         /* # */
1126         err = ext4_trans_block_get_noread(inode_ref->fs->bdev, &bh, newblock);
1127         if (err != EOK) {
1128                 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
1129                 return err;
1130         }
1131
1132         /* move top-level index/leaf into new block */
1133         memmove(bh.data, inode_ref->inode->blocks,
1134                 sizeof(inode_ref->inode->blocks));
1135
1136         /* set size of new block */
1137         neh = ext_block_hdr(&bh);
1138         /* old root could have indexes or leaves
1139          * so calculate e_max right way */
1140         if (ext_depth(inode_ref->inode))
1141                 neh->max_entries_count =
1142                     to_le16(ext4_ext_space_block_idx(inode_ref));
1143         else
1144                 neh->max_entries_count =
1145                     to_le16(ext4_ext_space_block(inode_ref));
1146
1147         neh->magic = to_le16(EXT4_EXTENT_MAGIC);
1148         ext4_extent_block_csum_set(inode_ref, neh);
1149
1150         /* Update top-level index: num,max,pointer */
1151         neh = ext_inode_hdr(inode_ref->inode);
1152         neh->entries_count = to_le16(1);
1153         ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1154         if (neh->depth == 0) {
1155                 /* Root extent block becomes index block */
1156                 neh->max_entries_count =
1157                     to_le16(ext4_ext_space_root_idx(inode_ref));
1158                 EXT_FIRST_INDEX(neh)
1159                     ->first_block = EXT_FIRST_EXTENT(neh)->first_block;
1160         }
1161         neh->depth = to_le16(to_le16(neh->depth) + 1);
1162
1163         ext4_trans_set_block_dirty(bh.buf);
1164         inode_ref->dirty = true;
1165         ext4_block_set(inode_ref->fs->bdev, &bh);
1166
1167         return err;
1168 }
1169
1170 __unused static void print_path(struct ext4_extent_path *path)
1171 {
1172         int32_t i = path->depth;
1173         while (i >= 0) {
1174
1175                 ptrdiff_t a =
1176                     (path->extent)
1177                         ? (path->extent - EXT_FIRST_EXTENT(path->header))
1178                         : 0;
1179                 ptrdiff_t b =
1180                     (path->index)
1181                         ? (path->index - EXT_FIRST_INDEX(path->header))
1182                         : 0;
1183
1184                 (void)a;
1185                 (void)b;
1186                 ext4_dbg(DEBUG_EXTENT,
1187                          "depth %" PRId32 ", p_block: %" PRIu64 ","
1188                          "p_ext offset: %td, p_idx offset: %td\n",
1189                          i, path->p_block, a, b);
1190                 i--;
1191                 path++;
1192         }
1193 }
1194
1195 static void ext4_ext_replace_path(struct ext4_inode_ref *inode_ref,
1196                                   struct ext4_extent_path *path,
1197                                   struct ext_split_trans *spt,
1198                                   int32_t level)
1199 {
1200         int32_t depth = ext_depth(inode_ref->inode);
1201         int32_t i = depth - level;
1202         ext4_ext_drop_refs(inode_ref, path + i, 1);
1203         path[i] = spt[level].path;
1204 }
1205
1206 static int ext4_ext_insert_extent(struct ext4_inode_ref *inode_ref,
1207                                   struct ext4_extent_path **ppath,
1208                                   struct ext4_extent *newext, uint32_t flags)
1209 {
1210         int32_t i, depth, level;
1211         int ret = EOK;
1212         ext4_fsblk_t ptr = 0;
1213         bool need_grow = false;
1214         struct ext4_extent_path *path = *ppath;
1215         struct ext_split_trans *spt = NULL;
1216         struct ext_split_trans newblock;
1217
1218         memset(&newblock, 0, sizeof(newblock));
1219
1220         depth = ext_depth(inode_ref->inode);
1221         for (i = depth, level = 0; i >= 0; i--, level++)
1222                 if (EXT_HAS_FREE_INDEX(path + i))
1223                         break;
1224
1225         if (level) {
1226                 spt = calloc(1, sizeof(struct ext_split_trans) * (level));
1227                 if (!spt) {
1228                         ret = ENOMEM;
1229                         goto out;
1230                 }
1231         }
1232         i = 0;
1233 again:
1234         depth = ext_depth(inode_ref->inode);
1235
1236         do {
1237                 if (!i) {
1238                         ret = ext4_ext_insert_leaf(inode_ref, path, depth - i,
1239                                                    newext, &newblock, flags,
1240                                                    &need_grow);
1241                 } else {
1242                         ret = ext4_ext_insert_index(
1243                             inode_ref, path, depth - i, newext,
1244                             ext4_ext_block_index(
1245                                 ext_block_hdr(&spt[i - 1].path.block)),
1246                             spt[i - 1].ptr, &newblock,
1247                             &need_grow);
1248                 }
1249                 ptr = newblock.ptr;
1250
1251                 if (ret != EOK)
1252                         goto out;
1253
1254                 else if (spt && ptr && !ret) {
1255                         /* Prepare for the next iteration after splitting. */
1256                         spt[i] = newblock;
1257                 }
1258
1259                 i++;
1260         } while (ptr != 0 && i <= depth);
1261
1262         if (need_grow) {
1263                 ret = ext4_ext_grow_indepth(inode_ref, 0);
1264                 if (ret)
1265                         goto out;
1266                 ret = ext4_find_extent(inode_ref, to_le32(newext->first_block),
1267                                        ppath, 0);
1268                 if (ret)
1269                         goto out;
1270                 i = depth;
1271                 path = *ppath;
1272                 goto again;
1273         }
1274 out:
1275         if (ret) {
1276                 if (path)
1277                         ext4_ext_drop_refs(inode_ref, path, 0);
1278
1279                 while (--level >= 0 && spt) {
1280                         if (spt[level].ptr) {
1281                                 ext4_ext_free_blocks(inode_ref, spt[level].ptr,
1282                                                      1, 0);
1283                                 ext4_ext_drop_refs(inode_ref, &spt[level].path,
1284                                                    1);
1285                         }
1286                 }
1287         } else {
1288                 while (--level >= 0 && spt) {
1289                         if (spt[level].switch_to)
1290                                 ext4_ext_replace_path(inode_ref, path, spt,
1291                                                       level);
1292                         else if (spt[level].ptr)
1293                                 ext4_ext_drop_refs(inode_ref, &spt[level].path,
1294                                                    1);
1295                 }
1296         }
1297         if (spt)
1298                 free(spt);
1299
1300         return ret;
1301 }
1302
1303 static void ext4_ext_remove_blocks(struct ext4_inode_ref *inode_ref,
1304                                    struct ext4_extent *ex, ext4_lblk_t from,
1305                                    ext4_lblk_t to)
1306 {
1307         ext4_lblk_t len = to - from + 1;
1308         ext4_lblk_t num;
1309         ext4_fsblk_t start;
1310         num = from - to_le32(ex->first_block);
1311         start = ext4_ext_pblock(ex) + num;
1312         ext4_dbg(DEBUG_EXTENT,
1313                  "Freeing %" PRIu32 " at %" PRIu64 ", %" PRIu32 "\n", from,
1314                  start, len);
1315
1316         ext4_ext_free_blocks(inode_ref, start, len, 0);
1317 }
1318
1319 static int ext4_ext_remove_idx(struct ext4_inode_ref *inode_ref,
1320                                struct ext4_extent_path *path, int32_t depth)
1321 {
1322         int err = EOK;
1323         int32_t i = depth;
1324         ext4_fsblk_t leaf;
1325
1326         /* free index block */
1327         leaf = ext4_idx_pblock(path[i].index);
1328
1329         if (path[i].index != EXT_LAST_INDEX(path[i].header)) {
1330                 ptrdiff_t len = EXT_LAST_INDEX(path[i].header) - path[i].index;
1331                 memmove(path[i].index, path[i].index + 1,
1332                         len * sizeof(struct ext4_extent_index));
1333         }
1334
1335         path[i].header->entries_count =
1336             to_le16(to_le16(path[i].header->entries_count) - 1);
1337         err = ext4_ext_dirty(inode_ref, path + i);
1338         if (err != EOK)
1339                 return err;
1340
1341         ext4_dbg(DEBUG_EXTENT, "IDX: Freeing %" PRIu32 " at %" PRIu64 ", %d\n",
1342                  to_le32(path[i].index->first_block), leaf, 1);
1343         ext4_ext_free_blocks(inode_ref, leaf, 1, 0);
1344
1345         while (i > 0) {
1346                 if (path[i].index != EXT_FIRST_INDEX(path[i].header))
1347                         break;
1348
1349                 path[i - 1].index->first_block = path[i].index->first_block;
1350                 err = ext4_ext_dirty(inode_ref, path + i - 1);
1351                 if (err != EOK)
1352                         break;
1353
1354                 i--;
1355         }
1356         return err;
1357 }
1358
1359 static int ext4_ext_remove_leaf(struct ext4_inode_ref *inode_ref,
1360                                 struct ext4_extent_path *path, ext4_lblk_t from,
1361                                 ext4_lblk_t to)
1362 {
1363
1364         int32_t depth = ext_depth(inode_ref->inode);
1365         struct ext4_extent *ex = path[depth].extent;
1366         struct ext4_extent *start_ex, *ex2 = NULL;
1367         struct ext4_extent_header *eh = path[depth].header;
1368         int32_t len;
1369         int err = EOK;
1370         uint16_t new_entries;
1371
1372         start_ex = ex;
1373         new_entries = to_le16(eh->entries_count);
1374         while (ex <= EXT_LAST_EXTENT(path[depth].header) &&
1375                to_le32(ex->first_block) <= to) {
1376                 int32_t new_len = 0;
1377                 int unwritten;
1378                 ext4_lblk_t start, new_start;
1379                 ext4_fsblk_t newblock;
1380                 new_start = start = to_le32(ex->first_block);
1381                 len = ext4_ext_get_actual_len(ex);
1382                 newblock = ext4_ext_pblock(ex);
1383                 if (start < from) {
1384                         len -= from - start;
1385                         new_len = from - start;
1386                         start = from;
1387                         start_ex++;
1388                 } else {
1389                         if (start + len - 1 > to) {
1390                                 len -= start + len - 1 - to;
1391                                 new_len = start + len - 1 - to;
1392                                 new_start = to + 1;
1393                                 newblock += to + 1 - start;
1394                                 ex2 = ex;
1395                         }
1396                 }
1397
1398                 ext4_ext_remove_blocks(inode_ref, ex, start, start + len - 1);
1399                 ex->first_block = to_le32(new_start);
1400                 if (!new_len)
1401                         new_entries--;
1402                 else {
1403                         unwritten = ext4_ext_is_unwritten(ex);
1404                         ex->block_count = to_le16(new_len);
1405                         ext4_ext_store_pblock(ex, newblock);
1406                         if (unwritten)
1407                                 ext4_ext_mark_unwritten(ex);
1408                 }
1409
1410                 ex += 1;
1411         }
1412
1413         if (ex2 == NULL)
1414                 ex2 = ex;
1415
1416         if (ex2 <= EXT_LAST_EXTENT(eh))
1417                 memmove(start_ex, ex2, EXT_LAST_EXTENT(eh) - ex2 + 1);
1418
1419         eh->entries_count = to_le16(new_entries);
1420         ext4_ext_dirty(inode_ref, path + depth);
1421         if (path[depth].extent == EXT_FIRST_EXTENT(eh) && eh->entries_count)
1422                 err = ext4_ext_correct_indexes(inode_ref, path);
1423
1424         /* if this leaf is free, then we should
1425          * remove it from index block above */
1426         if (err == EOK && eh->entries_count == 0 && path[depth].block.lb_id)
1427                 err = ext4_ext_remove_idx(inode_ref, path, depth - 1);
1428         else if (depth > 0)
1429                 path[depth - 1].index++;
1430
1431         return err;
1432 }
1433
1434 static bool ext4_ext_more_to_rm(struct ext4_extent_path *path, ext4_lblk_t to)
1435 {
1436         if (!to_le16(path->header->entries_count))
1437                 return false;
1438
1439         if (path->index > EXT_LAST_INDEX(path->header))
1440                 return false;
1441
1442         if (to_le32(path->index->first_block) > to)
1443                 return false;
1444
1445         return true;
1446 }
1447
1448 int ext4_extent_remove_space(struct ext4_inode_ref *inode_ref, ext4_lblk_t from,
1449                           ext4_lblk_t to)
1450 {
1451         struct ext4_extent_path *path = NULL;
1452         int ret = EOK;
1453         int32_t depth = ext_depth(inode_ref->inode);
1454         int32_t i;
1455
1456         ret = ext4_find_extent(inode_ref, from, &path, 0);
1457         if (ret)
1458                 goto out;
1459
1460         if (!path[depth].extent) {
1461                 ret = EOK;
1462                 goto out;
1463         }
1464
1465         bool in_range = IN_RANGE(from, to_le32(path[depth].extent->first_block),
1466                         ext4_ext_get_actual_len(path[depth].extent));
1467
1468         if (!in_range) {
1469                 ret = EOK;
1470                 goto out;
1471         }
1472
1473         /* If we do remove_space inside the range of an extent */
1474         if ((to_le32(path[depth].extent->first_block) < from) &&
1475             (to < to_le32(path[depth].extent->first_block) +
1476                         ext4_ext_get_actual_len(path[depth].extent) - 1)) {
1477
1478                 struct ext4_extent *ex = path[depth].extent, newex;
1479                 int unwritten = ext4_ext_is_unwritten(ex);
1480                 ext4_lblk_t ee_block = to_le32(ex->first_block);
1481                 int32_t len = ext4_ext_get_actual_len(ex);
1482                 ext4_fsblk_t newblock =
1483                         to + 1 - ee_block + ext4_ext_pblock(ex);
1484
1485                 ex->block_count = to_le16(from - ee_block);
1486                 if (unwritten)
1487                         ext4_ext_mark_unwritten(ex);
1488
1489                 ext4_ext_dirty(inode_ref, path + depth);
1490
1491                 newex.first_block = to_le32(to + 1);
1492                 newex.block_count = to_le16(ee_block + len - 1 - to);
1493                 ext4_ext_store_pblock(&newex, newblock);
1494                 if (unwritten)
1495                         ext4_ext_mark_unwritten(&newex);
1496
1497                 ret = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1498                 goto out;
1499         }
1500
1501         i = depth;
1502         while (i >= 0) {
1503                 if (i == depth) {
1504                         struct ext4_extent_header *eh;
1505                         struct ext4_extent *first_ex, *last_ex;
1506                         ext4_lblk_t leaf_from, leaf_to;
1507                         eh = path[i].header;
1508                         ext4_assert(to_le16(eh->entries_count) > 0);
1509                         first_ex = EXT_FIRST_EXTENT(eh);
1510                         last_ex = EXT_LAST_EXTENT(eh);
1511                         leaf_from = to_le32(first_ex->first_block);
1512                         leaf_to = to_le32(last_ex->first_block) +
1513                                   ext4_ext_get_actual_len(last_ex) - 1;
1514                         if (leaf_from < from)
1515                                 leaf_from = from;
1516
1517                         if (leaf_to > to)
1518                                 leaf_to = to;
1519
1520                         ext4_ext_remove_leaf(inode_ref, path, leaf_from,
1521                                         leaf_to);
1522                         ext4_ext_drop_refs(inode_ref, path + i, 0);
1523                         i--;
1524                         continue;
1525                 }
1526
1527                 struct ext4_extent_header *eh;
1528                 eh = path[i].header;
1529                 if (ext4_ext_more_to_rm(path + i, to)) {
1530                         struct ext4_block bh = EXT4_BLOCK_ZERO();
1531                         if (path[i + 1].block.lb_id)
1532                                 ext4_ext_drop_refs(inode_ref, path + i + 1, 0);
1533
1534                         ret = read_extent_tree_block(inode_ref,
1535                                         ext4_idx_pblock(path[i].index),
1536                                         depth - i - 1, &bh, 0);
1537                         if (ret)
1538                                 goto out;
1539
1540                         path[i].p_block =
1541                                         ext4_idx_pblock(path[i].index);
1542                         path[i + 1].block = bh;
1543                         path[i + 1].header = ext_block_hdr(&bh);
1544                         path[i + 1].depth = depth - i - 1;
1545                         if (i + 1 == depth)
1546                                 path[i + 1].extent = EXT_FIRST_EXTENT(
1547                                         path[i + 1].header);
1548                         else
1549                                 path[i + 1].index =
1550                                         EXT_FIRST_INDEX(path[i + 1].header);
1551
1552                         i++;
1553                 } else {
1554                         if (i > 0) {
1555                                 if (!eh->entries_count)
1556                                         ret = ext4_ext_remove_idx(inode_ref, path,
1557                                                         i - 1);
1558                                 else
1559                                         path[i - 1].index++;
1560
1561                         }
1562
1563                         if (i)
1564                                 ext4_block_set(inode_ref->fs->bdev,
1565                                                 &path[i].block);
1566
1567
1568                         i--;
1569                 }
1570
1571         }
1572
1573         /* TODO: flexible tree reduction should be here */
1574         if (path->header->entries_count == 0) {
1575                 /*
1576                  * truncate to zero freed all the tree,
1577                  * so we need to correct eh_depth
1578                  */
1579                 ext_inode_hdr(inode_ref->inode)->depth = 0;
1580                 ext_inode_hdr(inode_ref->inode)->max_entries_count =
1581                     to_le16(ext4_ext_space_root(inode_ref));
1582                 ret = ext4_ext_dirty(inode_ref, path);
1583         }
1584
1585 out:
1586         ext4_ext_drop_refs(inode_ref, path, 0);
1587         free(path);
1588         path = NULL;
1589         return ret;
1590 }
1591
1592 static int ext4_ext_split_extent_at(struct ext4_inode_ref *inode_ref,
1593                                     struct ext4_extent_path **ppath,
1594                                     ext4_lblk_t split, uint32_t split_flag)
1595 {
1596         struct ext4_extent *ex, newex;
1597         ext4_fsblk_t newblock;
1598         ext4_lblk_t ee_block;
1599         int32_t ee_len;
1600         int32_t depth = ext_depth(inode_ref->inode);
1601         int err = EOK;
1602
1603         ex = (*ppath)[depth].extent;
1604         ee_block = to_le32(ex->first_block);
1605         ee_len = ext4_ext_get_actual_len(ex);
1606         newblock = split - ee_block + ext4_ext_pblock(ex);
1607
1608         if (split == ee_block) {
1609                 /*
1610                  * case b: block @split is the block that the extent begins with
1611                  * then we just change the state of the extent, and splitting
1612                  * is not needed.
1613                  */
1614                 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1615                         ext4_ext_mark_unwritten(ex);
1616                 else
1617                         ext4_ext_mark_initialized(ex);
1618
1619                 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1620                 goto out;
1621         }
1622
1623         ex->block_count = to_le16(split - ee_block);
1624         if (split_flag & EXT4_EXT_MARK_UNWRIT1)
1625                 ext4_ext_mark_unwritten(ex);
1626
1627         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1628         if (err != EOK)
1629                 goto out;
1630
1631         newex.first_block = to_le32(split);
1632         newex.block_count = to_le16(ee_len - (split - ee_block));
1633         ext4_ext_store_pblock(&newex, newblock);
1634         if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1635                 ext4_ext_mark_unwritten(&newex);
1636         err = ext4_ext_insert_extent(inode_ref, ppath, &newex,
1637                                      EXT4_EXT_NO_COMBINE);
1638         if (err != EOK)
1639                 goto restore_extent_len;
1640
1641 out:
1642         return err;
1643 restore_extent_len:
1644         ex->block_count = to_le16(ee_len);
1645         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1646         return err;
1647 }
1648
1649 static int ext4_ext_convert_to_initialized(struct ext4_inode_ref *inode_ref,
1650                                            struct ext4_extent_path **ppath,
1651                                            ext4_lblk_t split, uint32_t blocks)
1652 {
1653         int32_t depth = ext_depth(inode_ref->inode), err = EOK;
1654         struct ext4_extent *ex = (*ppath)[depth].extent;
1655
1656         ext4_assert(to_le32(ex->first_block) <= split);
1657
1658         if (split + blocks ==
1659             to_le32(ex->first_block) + ext4_ext_get_actual_len(ex)) {
1660                 /* split and initialize right part */
1661                 err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1662                                                EXT4_EXT_MARK_UNWRIT1);
1663         } else if (to_le32(ex->first_block) == split) {
1664                 /* split and initialize left part */
1665                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1666                                                EXT4_EXT_MARK_UNWRIT2);
1667         } else {
1668                 /* split 1 extent to 3 and initialize the 2nd */
1669                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1670                                                EXT4_EXT_MARK_UNWRIT1 |
1671                                                    EXT4_EXT_MARK_UNWRIT2);
1672                 if (!err) {
1673                         err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1674                                                        EXT4_EXT_MARK_UNWRIT1);
1675                 }
1676         }
1677
1678         return err;
1679 }
1680
1681 static ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_extent_path *path)
1682 {
1683         int32_t depth;
1684
1685         depth = path->depth;
1686
1687         if (depth == 0 && path->extent == NULL)
1688                 return EXT_MAX_BLOCKS;
1689
1690         while (depth >= 0) {
1691                 if (depth == path->depth) {
1692                         /* leaf */
1693                         if (path[depth].extent &&
1694                             path[depth].extent !=
1695                                 EXT_LAST_EXTENT(path[depth].header))
1696                                 return to_le32(
1697                                     path[depth].extent[1].first_block);
1698                 } else {
1699                         /* index */
1700                         if (path[depth].index !=
1701                             EXT_LAST_INDEX(path[depth].header))
1702                                 return to_le32(
1703                                     path[depth].index[1].first_block);
1704                 }
1705                 depth--;
1706         }
1707
1708         return EXT_MAX_BLOCKS;
1709 }
1710
1711 static int ext4_ext_zero_unwritten_range(struct ext4_inode_ref *inode_ref,
1712                                          ext4_fsblk_t block,
1713                                          uint32_t blocks_count)
1714 {
1715         int err = EOK;
1716         uint32_t i;
1717         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
1718         for (i = 0; i < blocks_count; i++) {
1719                 struct ext4_block bh = EXT4_BLOCK_ZERO();
1720                 err = ext4_trans_block_get_noread(inode_ref->fs->bdev, &bh, block + i);
1721                 if (err != EOK)
1722                         break;
1723
1724                 memset(bh.data, 0, block_size);
1725                 ext4_trans_set_block_dirty(bh.buf);
1726                 err = ext4_block_set(inode_ref->fs->bdev, &bh);
1727                 if (err != EOK)
1728                         break;
1729         }
1730         return err;
1731 }
1732
1733 int ext4_extent_get_blocks(struct ext4_inode_ref *inode_ref, ext4_lblk_t iblock,
1734                         uint32_t max_blocks, ext4_fsblk_t *result, bool create,
1735                         uint32_t *blocks_count)
1736 {
1737         struct ext4_extent_path *path = NULL;
1738         struct ext4_extent newex, *ex;
1739         ext4_fsblk_t goal;
1740         int err = EOK;
1741         int32_t depth;
1742         uint32_t allocated = 0;
1743         ext4_lblk_t next;
1744         ext4_fsblk_t newblock;
1745
1746         if (result)
1747                 *result = 0;
1748
1749         if (blocks_count)
1750                 *blocks_count = 0;
1751
1752         /* find extent for this block */
1753         err = ext4_find_extent(inode_ref, iblock, &path, 0);
1754         if (err != EOK) {
1755                 path = NULL;
1756                 goto out2;
1757         }
1758
1759         depth = ext_depth(inode_ref->inode);
1760
1761         /*
1762          * consistent leaf must not be empty
1763          * this situations is possible, though, _during_ tree modification
1764          * this is why assert can't be put in ext4_ext_find_extent()
1765          */
1766         ex = path[depth].extent;
1767         if (ex) {
1768                 ext4_lblk_t ee_block = to_le32(ex->first_block);
1769                 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
1770                 uint16_t ee_len = ext4_ext_get_actual_len(ex);
1771                 /* if found exent covers block, simple return it */
1772                 if (IN_RANGE(iblock, ee_block, ee_len)) {
1773                         /* number of remain blocks in the extent */
1774                         allocated = ee_len - (iblock - ee_block);
1775
1776                         if (!ext4_ext_is_unwritten(ex)) {
1777                                 newblock = iblock - ee_block + ee_start;
1778                                 goto out;
1779                         }
1780
1781                         if (!create) {
1782                                 newblock = 0;
1783                                 goto out;
1784                         }
1785
1786                         uint32_t zero_range;
1787                         zero_range = allocated;
1788                         if (zero_range > max_blocks)
1789                                 zero_range = max_blocks;
1790
1791                         newblock = iblock - ee_block + ee_start;
1792                         err = ext4_ext_zero_unwritten_range(inode_ref, newblock,
1793                                         zero_range);
1794                         if (err != EOK)
1795                                 goto out2;
1796
1797                         err = ext4_ext_convert_to_initialized(inode_ref, &path,
1798                                         iblock, zero_range);
1799                         if (err != EOK)
1800                                 goto out2;
1801
1802                         goto out;
1803                 }
1804         }
1805
1806         /*
1807          * requested block isn't allocated yet
1808          * we couldn't try to create block if create flag is zero
1809          */
1810         if (!create) {
1811                 goto out2;
1812         }
1813
1814         /* find next allocated block so that we know how many
1815          * blocks we can allocate without ovelapping next extent */
1816         next = ext4_ext_next_allocated_block(path);
1817         allocated = next - iblock;
1818         if (allocated > max_blocks)
1819                 allocated = max_blocks;
1820
1821         /* allocate new block */
1822         goal = ext4_ext_find_goal(inode_ref, path, iblock);
1823         newblock = ext4_new_meta_blocks(inode_ref, goal, 0, &allocated, &err);
1824         if (!newblock)
1825                 goto out2;
1826
1827         /* try to insert new extent into found leaf and return */
1828         newex.first_block = to_le32(iblock);
1829         ext4_ext_store_pblock(&newex, newblock);
1830         newex.block_count = to_le16(allocated);
1831         err = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1832         if (err != EOK) {
1833                 /* free data blocks we just allocated */
1834                 ext4_ext_free_blocks(inode_ref, ext4_ext_pblock(&newex),
1835                                      to_le16(newex.block_count), 0);
1836                 goto out2;
1837         }
1838
1839         /* previous routine could use block we allocated */
1840         newblock = ext4_ext_pblock(&newex);
1841
1842 out:
1843         if (allocated > max_blocks)
1844                 allocated = max_blocks;
1845
1846         if (result)
1847                 *result = newblock;
1848
1849         if (blocks_count)
1850                 *blocks_count = allocated;
1851
1852 out2:
1853         if (path) {
1854                 ext4_ext_drop_refs(inode_ref, path, 0);
1855                 free(path);
1856         }
1857
1858         return err;
1859 }