Introduce CONFIG_META_CSUM_ENABLE flag
[lwext4.git] / lwext4 / ext4_extent_full.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * - Redistributions of source code must retain the above copyright
10  *   notice, this list of conditions and the following disclaimer.
11  * - Redistributions in binary form must reproduce the above copyright
12  *   notice, this list of conditions and the following disclaimer in the
13  *   documentation and/or other materials provided with the distribution.
14  * - The name of the author may not be used to endorse or promote products
15  *   derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #include "ext4_config.h"
30 #include "ext4_blockdev.h"
31 #include "ext4_fs.h"
32 #include "ext4_super.h"
33 #include "ext4_crc32c.h"
34 #include "ext4_balloc.h"
35 #include "ext4_debug.h"
36
37 #include <stdlib.h>
38 #include <string.h>
39 #include <inttypes.h>
40 #include <stddef.h>
41
42 #include "ext4_extent.h"
43
44 #if CONFIG_EXTENT_FULL
45
46 /*
47  * used by extent splitting.
48  */
49 #define EXT4_EXT_MARK_UNWRIT1 0x02 /* mark first half unwritten */
50 #define EXT4_EXT_MARK_UNWRIT2 0x04 /* mark second half unwritten */
51 #define EXT4_EXT_DATA_VALID1 0x08  /* first half contains valid data */
52 #define EXT4_EXT_DATA_VALID2 0x10  /* second half contains valid data */
53 #define EXT4_EXT_NO_COMBINE 0x20   /* do not combine two extents */
54
55 static struct ext4_extent_tail *
56 find_ext4_extent_tail(struct ext4_extent_header *eh)
57 {
58         return (struct ext4_extent_tail *)(((char *)eh) +
59                                            EXT4_EXTENT_TAIL_OFFSET(eh));
60 }
61
62 static struct ext4_extent_header *ext_inode_hdr(struct ext4_inode *inode)
63 {
64         return (struct ext4_extent_header *)inode->blocks;
65 }
66
67 static struct ext4_extent_header *ext_block_hdr(struct ext4_block *block)
68 {
69         return (struct ext4_extent_header *)block->data;
70 }
71
72 static uint16_t ext_depth(struct ext4_inode *inode)
73 {
74         return to_le16(ext_inode_hdr(inode)->depth);
75 }
76
77 static uint16_t ext4_ext_get_actual_len(struct ext4_extent *ext)
78 {
79         return (to_le16(ext->block_count) <= EXT_INIT_MAX_LEN
80                     ? to_le16(ext->block_count)
81                     : (to_le16(ext->block_count) - EXT_INIT_MAX_LEN));
82 }
83
84 static void ext4_ext_mark_initialized(struct ext4_extent *ext)
85 {
86         ext->block_count = to_le16(ext4_ext_get_actual_len(ext));
87 }
88
89 static void ext4_ext_mark_unwritten(struct ext4_extent *ext)
90 {
91         ext->block_count |= to_le16(EXT_INIT_MAX_LEN);
92 }
93
94 static int ext4_ext_is_unwritten(struct ext4_extent *ext)
95 {
96         /* Extent with ee_len of 0x8000 is treated as an initialized extent */
97         return (to_le16(ext->block_count) > EXT_INIT_MAX_LEN);
98 }
99
100 /*
101  * ext4_ext_pblock:
102  * combine low and high parts of physical block number into ext4_fsblk_t
103  */
104 static ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
105 {
106         ext4_fsblk_t block;
107
108         block = to_le32(ex->start_lo);
109         block |= ((ext4_fsblk_t)to_le16(ex->start_hi) << 31) << 1;
110         return block;
111 }
112
113 /*
114  * ext4_idx_pblock:
115  * combine low and high parts of a leaf physical block number into ext4_fsblk_t
116  */
117 static ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_index *ix)
118 {
119         ext4_fsblk_t block;
120
121         block = to_le32(ix->leaf_lo);
122         block |= ((ext4_fsblk_t)to_le16(ix->leaf_hi) << 31) << 1;
123         return block;
124 }
125
126 /*
127  * ext4_ext_store_pblock:
128  * stores a large physical block number into an extent struct,
129  * breaking it into parts
130  */
131 static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
132 {
133         ex->start_lo = to_le32((uint32_t)(pb & 0xffffffff));
134         ex->start_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
135 }
136
137 /*
138  * ext4_idx_store_pblock:
139  * stores a large physical block number into an index struct,
140  * breaking it into parts
141  */
142 static void ext4_idx_store_pblock(struct ext4_extent_index *ix, ext4_fsblk_t pb)
143 {
144         ix->leaf_lo = to_le32((uint32_t)(pb & 0xffffffff));
145         ix->leaf_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
146 }
147
148 static int ext4_allocate_single_block(struct ext4_inode_ref *inode_ref,
149                                       ext4_fsblk_t goal,
150                                       ext4_fsblk_t *blockp)
151 {
152         return ext4_balloc_alloc_block(inode_ref, goal, blockp);
153 }
154
155 static ext4_fsblk_t ext4_new_meta_blocks(struct ext4_inode_ref *inode_ref,
156                                          ext4_fsblk_t goal,
157                                          uint32_t flags __unused,
158                                          uint32_t *count, int *errp)
159 {
160         ext4_fsblk_t block = 0;
161
162         *errp = ext4_allocate_single_block(inode_ref, goal, &block);
163         if (count)
164                 *count = 1;
165         return block;
166 }
167
168 static void ext4_ext_free_blocks(struct ext4_inode_ref *inode_ref,
169                                  ext4_fsblk_t block, uint32_t count,
170                                  uint32_t flags __unused)
171 {
172         ext4_balloc_free_blocks(inode_ref, block, count);
173 }
174
175 static size_t ext4_ext_space_block(struct ext4_inode_ref *inode_ref)
176 {
177         size_t size;
178         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
179
180         size = (block_size - sizeof(struct ext4_extent_header)) /
181                sizeof(struct ext4_extent);
182 #ifdef AGGRESSIVE_TEST
183         if (size > 6)
184                 size = 6;
185 #endif
186         return size;
187 }
188
189 static size_t ext4_ext_space_block_idx(struct ext4_inode_ref *inode_ref)
190 {
191         size_t size;
192         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
193
194         size = (block_size - sizeof(struct ext4_extent_header)) /
195                sizeof(struct ext4_extent_index);
196 #ifdef AGGRESSIVE_TEST
197         if (size > 5)
198                 size = 5;
199 #endif
200         return size;
201 }
202
203 static size_t ext4_ext_space_root(struct ext4_inode_ref *inode_ref)
204 {
205         size_t size;
206
207         size = sizeof(inode_ref->inode->blocks);
208         size -= sizeof(struct ext4_extent_header);
209         size /= sizeof(struct ext4_extent);
210 #ifdef AGGRESSIVE_TEST
211         if (size > 3)
212                 size = 3;
213 #endif
214         return size;
215 }
216
217 static size_t ext4_ext_space_root_idx(struct ext4_inode_ref *inode_ref)
218 {
219         size_t size;
220
221         size = sizeof(inode_ref->inode->blocks);
222         size -= sizeof(struct ext4_extent_header);
223         size /= sizeof(struct ext4_extent_index);
224 #ifdef AGGRESSIVE_TEST
225         if (size > 4)
226                 size = 4;
227 #endif
228         return size;
229 }
230
231 static size_t ext4_ext_max_entries(struct ext4_inode_ref *inode_ref,
232                                    uint32_t depth)
233 {
234         size_t max;
235
236         if (depth == ext_depth(inode_ref->inode)) {
237                 if (depth == 0)
238                         max = ext4_ext_space_root(inode_ref);
239                 else
240                         max = ext4_ext_space_root_idx(inode_ref);
241         } else {
242                 if (depth == 0)
243                         max = ext4_ext_space_block(inode_ref);
244                 else
245                         max = ext4_ext_space_block_idx(inode_ref);
246         }
247
248         return max;
249 }
250
251 static ext4_fsblk_t ext4_ext_find_goal(struct ext4_inode_ref *inode_ref,
252                                        struct ext4_extent_path *path,
253                                        ext4_lblk_t block)
254 {
255         if (path) {
256                 uint32_t depth = path->depth;
257                 struct ext4_extent *ex;
258
259                 /*
260                  * Try to predict block placement assuming that we are
261                  * filling in a file which will eventually be
262                  * non-sparse --- i.e., in the case of libbfd writing
263                  * an ELF object sections out-of-order but in a way
264                  * the eventually results in a contiguous object or
265                  * executable file, or some database extending a table
266                  * space file.  However, this is actually somewhat
267                  * non-ideal if we are writing a sparse file such as
268                  * qemu or KVM writing a raw image file that is going
269                  * to stay fairly sparse, since it will end up
270                  * fragmenting the file system's free space.  Maybe we
271                  * should have some hueristics or some way to allow
272                  * userspace to pass a hint to file system,
273                  * especially if the latter case turns out to be
274                  * common.
275                  */
276                 ex = path[depth].extent;
277                 if (ex) {
278                         ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
279                         ext4_lblk_t ext_block = to_le32(ex->first_block);
280
281                         if (block > ext_block)
282                                 return ext_pblk + (block - ext_block);
283                         else
284                                 return ext_pblk - (ext_block - block);
285                 }
286
287                 /* it looks like index is empty;
288                  * try to find starting block from index itself */
289                 if (path[depth].block.lb_id)
290                         return path[depth].block.lb_id;
291         }
292
293         /* OK. use inode's group */
294         return ext4_fs_inode_to_goal_block(inode_ref);
295 }
296
297 /*
298  * Allocation for a meta data block
299  */
300 static ext4_fsblk_t ext4_ext_new_meta_block(struct ext4_inode_ref *inode_ref,
301                                             struct ext4_extent_path *path,
302                                             struct ext4_extent *ex, int *err,
303                                             uint32_t flags)
304 {
305         ext4_fsblk_t goal, newblock;
306
307         goal = ext4_ext_find_goal(inode_ref, path, to_le32(ex->first_block));
308         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, err);
309         return newblock;
310 }
311
312 #if CONFIG_META_CSUM_ENABLE
313 static uint32_t ext4_ext_block_csum(struct ext4_inode_ref *inode_ref,
314                                     struct ext4_extent_header *eh)
315 {
316         uint32_t checksum = 0;
317         struct ext4_sblock *sb = &inode_ref->fs->sb;
318
319         if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
320                 uint32_t ino_index = to_le32(inode_ref->index);
321                 uint32_t ino_gen =
322                         to_le32(ext4_inode_get_generation(inode_ref->inode));
323                 /* First calculate crc32 checksum against fs uuid */
324                 checksum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid,
325                                 sizeof(sb->uuid));
326                 /* Then calculate crc32 checksum against inode number
327                  * and inode generation */
328                 checksum = ext4_crc32c(checksum, &ino_index,
329                                      sizeof(ino_index));
330                 checksum = ext4_crc32c(checksum, &ino_gen,
331                                      sizeof(ino_gen));
332                 /* Finally calculate crc32 checksum against 
333                  * the entire extent block up to the checksum field */
334                 checksum = ext4_crc32c(checksum, eh,
335                                 EXT4_EXTENT_TAIL_OFFSET(eh));
336         }
337         return checksum;
338 }
339 #else
340 #define ext4_ext_block_csum(...) 0
341 #endif
342
343 static void ext4_extent_block_csum_set(struct ext4_inode_ref *inode_ref __unused,
344                                        struct ext4_extent_header *eh)
345 {
346         struct ext4_extent_tail *tail;
347
348         tail = find_ext4_extent_tail(eh);
349         tail->et_checksum = to_le32(ext4_ext_block_csum(inode_ref, eh));
350 }
351
352 static int ext4_ext_dirty(struct ext4_inode_ref *inode_ref,
353                           struct ext4_extent_path *path)
354 {
355         if (path->block.lb_id)
356                 path->block.dirty = true;
357         else
358                 inode_ref->dirty = true;
359
360         return EOK;
361 }
362
363 static void ext4_ext_drop_refs(struct ext4_inode_ref *inode_ref,
364                                struct ext4_extent_path *path, bool keep_other)
365 {
366         int32_t depth, i;
367
368         if (!path)
369                 return;
370         if (keep_other)
371                 depth = 0;
372         else
373                 depth = path->depth;
374
375         for (i = 0; i <= depth; i++, path++) {
376                 if (path->block.lb_id) {
377                         if (path->block.dirty)
378                                 ext4_extent_block_csum_set(inode_ref,
379                                                 path->header);
380
381                         ext4_block_set(inode_ref->fs->bdev, &path->block);
382                 }
383         }
384 }
385
386 /*
387  * Check that whether the basic information inside the extent header
388  * is correct or not.
389  */
390 static int ext4_ext_check(struct ext4_inode_ref *inode_ref,
391                           struct ext4_extent_header *eh, uint16_t depth,
392                           ext4_fsblk_t pblk __unused)
393 {
394         struct ext4_extent_tail *tail;
395         const char *error_msg;
396         (void)error_msg;
397
398         if (to_le16(eh->magic) != EXT4_EXTENT_MAGIC) {
399                 error_msg = "invalid magic";
400                 goto corrupted;
401         }
402         if (to_le16(eh->depth) != depth) {
403                 error_msg = "unexpected eh_depth";
404                 goto corrupted;
405         }
406         if (eh->max_entries_count == 0) {
407                 error_msg = "invalid eh_max";
408                 goto corrupted;
409         }
410         if (to_le16(eh->entries_count) > to_le16(eh->max_entries_count)) {
411                 error_msg = "invalid eh_entries";
412                 goto corrupted;
413         }
414
415         tail = find_ext4_extent_tail(eh);
416         struct ext4_sblock *sb = &inode_ref->fs->sb;
417         if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
418                 if (tail->et_checksum != to_le32(ext4_ext_block_csum(inode_ref, eh))) {
419                         /* FIXME: Warning: extent checksum damaged? */
420                 }
421         }
422
423         return EOK;
424
425 corrupted:
426         ext4_dbg(DEBUG_EXTENT, "Bad extents B+ tree block: %s. "
427                                "Blocknr: %" PRId64 "\n",
428                  error_msg, pblk);
429         return EIO;
430 }
431
432 static int read_extent_tree_block(struct ext4_inode_ref *inode_ref,
433                                   ext4_fsblk_t pblk, int32_t depth,
434                                   struct ext4_block *bh,
435                                   uint32_t flags __unused)
436 {
437         int err;
438
439         err = ext4_block_get(inode_ref->fs->bdev, bh, pblk);
440         if (err != EOK)
441                 goto errout;
442
443         err = ext4_ext_check(inode_ref, ext_block_hdr(bh), depth, pblk);
444         if (err != EOK)
445                 goto errout;
446
447         return EOK;
448 errout:
449         if (bh->lb_id)
450                 ext4_block_set(inode_ref->fs->bdev, bh);
451
452         return err;
453 }
454
455 /*
456  * ext4_ext_binsearch_idx:
457  * binary search for the closest index of the given block
458  * the header must be checked before calling this
459  */
460 static void ext4_ext_binsearch_idx(struct ext4_extent_path *path,
461                                    ext4_lblk_t block)
462 {
463         struct ext4_extent_header *eh = path->header;
464         struct ext4_extent_index *r, *l, *m;
465
466         l = EXT_FIRST_INDEX(eh) + 1;
467         r = EXT_LAST_INDEX(eh);
468         while (l <= r) {
469                 m = l + (r - l) / 2;
470                 if (block < to_le32(m->first_block))
471                         r = m - 1;
472                 else
473                         l = m + 1;
474         }
475
476         path->index = l - 1;
477 }
478
479 /*
480  * ext4_ext_binsearch:
481  * binary search for closest extent of the given block
482  * the header must be checked before calling this
483  */
484 static void ext4_ext_binsearch(struct ext4_extent_path *path, ext4_lblk_t block)
485 {
486         struct ext4_extent_header *eh = path->header;
487         struct ext4_extent *r, *l, *m;
488
489         if (eh->entries_count == 0) {
490                 /*
491                  * this leaf is empty:
492                  * we get such a leaf in split/add case
493                  */
494                 return;
495         }
496
497         l = EXT_FIRST_EXTENT(eh) + 1;
498         r = EXT_LAST_EXTENT(eh);
499
500         while (l <= r) {
501                 m = l + (r - l) / 2;
502                 if (block < to_le32(m->first_block))
503                         r = m - 1;
504                 else
505                         l = m + 1;
506         }
507
508         path->extent = l - 1;
509 }
510
511 static int ext4_find_extent(struct ext4_inode_ref *inode_ref, ext4_lblk_t block,
512                             struct ext4_extent_path **orig_path, uint32_t flags)
513 {
514         struct ext4_extent_header *eh;
515         struct ext4_block bh = EXT4_BLOCK_ZERO();
516         ext4_fsblk_t buf_block = 0;
517         struct ext4_extent_path *path = *orig_path;
518         int32_t depth, ppos = 0;
519         int32_t i;
520         int ret;
521
522         eh = ext_inode_hdr(inode_ref->inode);
523         depth = ext_depth(inode_ref->inode);
524
525         if (path) {
526                 ext4_ext_drop_refs(inode_ref, path, 0);
527                 if (depth > path[0].maxdepth) {
528                         free(path);
529                         *orig_path = path = NULL;
530                 }
531         }
532         if (!path) {
533                 int32_t path_depth = depth + 1;
534                 /* account possible depth increase */
535                 path = calloc(1, sizeof(struct ext4_extent_path) *
536                                      (path_depth + 1));
537                 if (!path)
538                         return ENOMEM;
539                 path[0].maxdepth = path_depth;
540         }
541         path[0].header = eh;
542         path[0].block = bh;
543
544         i = depth;
545         /* walk through the tree */
546         while (i) {
547                 ext4_ext_binsearch_idx(path + ppos, block);
548                 path[ppos].p_block = ext4_idx_pblock(path[ppos].index);
549                 path[ppos].depth = i;
550                 path[ppos].extent = NULL;
551                 buf_block = path[ppos].p_block;
552
553                 i--;
554                 ppos++;
555                 if (!path[ppos].block.lb_id ||
556                     path[ppos].block.lb_id != buf_block) {
557                         ret = read_extent_tree_block(inode_ref, buf_block, i,
558                                                      &bh, flags);
559                         if (ret != EOK) {
560                                 goto err;
561                         }
562                         if (ppos > depth) {
563                                 ext4_block_set(inode_ref->fs->bdev, &bh);
564                                 ret = EIO;
565                                 goto err;
566                         }
567
568                         eh = ext_block_hdr(&bh);
569                         path[ppos].block = bh;
570                         path[ppos].header = eh;
571                 }
572         }
573
574         path[ppos].depth = i;
575         path[ppos].extent = NULL;
576         path[ppos].index = NULL;
577
578         /* find extent */
579         ext4_ext_binsearch(path + ppos, block);
580         /* if not an empty leaf */
581         if (path[ppos].extent)
582                 path[ppos].p_block = ext4_ext_pblock(path[ppos].extent);
583
584         *orig_path = path;
585
586         ret = EOK;
587         return ret;
588
589 err:
590         ext4_ext_drop_refs(inode_ref, path, 0);
591         free(path);
592         if (orig_path)
593                 *orig_path = NULL;
594         return ret;
595 }
596
597 static void ext4_ext_init_header(struct ext4_inode_ref *inode_ref,
598                                  struct ext4_extent_header *eh, int32_t depth)
599 {
600         eh->entries_count = 0;
601         eh->max_entries_count = to_le16(ext4_ext_max_entries(inode_ref, depth));
602         eh->magic = to_le16(EXT4_EXTENT_MAGIC);
603         eh->depth = depth;
604 }
605
606 /*
607  * Be cautious, the buffer_head returned is not yet mark dirtied. */
608 static int ext4_ext_split_node(struct ext4_inode_ref *inode_ref,
609                                struct ext4_extent_path *path, int32_t at,
610                                struct ext4_extent *newext,
611                                ext4_fsblk_t *sibling, struct ext4_block *new_bh)
612 {
613         int ret;
614         ext4_fsblk_t newblock;
615         struct ext4_block bh = EXT4_BLOCK_ZERO();
616         int32_t depth = ext_depth(inode_ref->inode);
617
618         ext4_assert(sibling);
619
620         /* FIXME: currently we split at the point after the current extent. */
621         newblock = ext4_ext_new_meta_block(inode_ref, path, newext, &ret, 0);
622         if (ret)
623                 goto cleanup;
624
625         /*  For write access.# */
626         ret = ext4_block_get(inode_ref->fs->bdev, &bh, newblock);
627         if (ret != EOK)
628                 goto cleanup;
629
630         if (at == depth) {
631                 /* start copy from next extent */
632                 ptrdiff_t m = EXT_MAX_EXTENT(path[at].header) - path[at].extent;
633                 struct ext4_extent_header *neh;
634                 neh = ext_block_hdr(&bh);
635                 ext4_ext_init_header(inode_ref, neh, 0);
636                 if (m) {
637                         struct ext4_extent *ex;
638                         ex = EXT_FIRST_EXTENT(neh);
639                         memmove(ex, path[at].extent + 1,
640                                 sizeof(struct ext4_extent) * m);
641                         neh->entries_count =
642                             to_le16(to_le16(neh->entries_count) + m);
643                         path[at].header->entries_count = to_le16(
644                             to_le16(path[at].header->entries_count) - m);
645                         ret = ext4_ext_dirty(inode_ref, path + at);
646                         if (ret)
647                                 goto cleanup;
648                 }
649         } else {
650                 ptrdiff_t m = EXT_MAX_INDEX(path[at].header) - path[at].index;
651                 struct ext4_extent_header *neh;
652                 neh = ext_block_hdr(&bh);
653                 ext4_ext_init_header(inode_ref, neh, depth - at);
654                 if (m) {
655                         struct ext4_extent_index *ix;
656                         ix = EXT_FIRST_INDEX(neh);
657                         memmove(ix, path[at].index + 1,
658                                 sizeof(struct ext4_extent) * m);
659                         neh->entries_count =
660                             to_le16(to_le16(neh->entries_count) + m);
661                         path[at].header->entries_count = to_le16(
662                             to_le16(path[at].header->entries_count) - m);
663                         ret = ext4_ext_dirty(inode_ref, path + at);
664                         if (ret)
665                                 goto cleanup;
666                 }
667         }
668 cleanup:
669         if (ret) {
670                 if (bh.lb_id) {
671                         ext4_block_set(inode_ref->fs->bdev, &bh);
672                 }
673                 if (newblock)
674                         ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
675
676                 newblock = 0;
677         }
678         *sibling = newblock;
679         *new_bh = bh;
680         return ret;
681 }
682
683 static ext4_lblk_t ext4_ext_block_index(struct ext4_extent_header *eh)
684 {
685         if (eh->depth)
686                 return to_le32(EXT_FIRST_INDEX(eh)->first_block);
687
688         return to_le32(EXT_FIRST_EXTENT(eh)->first_block);
689 }
690
691 struct ext_split_trans {
692         ext4_fsblk_t ptr;
693         struct ext4_extent_path path;
694         int switch_to;
695 };
696
697 static int ext4_ext_insert_index(struct ext4_inode_ref *inode_ref,
698                                  struct ext4_extent_path *path,
699                                  int32_t at,
700                                  struct ext4_extent *newext,
701                                  ext4_lblk_t insert_index,
702                                  ext4_fsblk_t insert_block,
703                                  struct ext_split_trans *spt,
704                                  bool *need_grow)
705 {
706         struct ext4_extent_index *ix;
707         struct ext4_extent_path *curp = path + at;
708         struct ext4_block bh = EXT4_BLOCK_ZERO();
709         int32_t len;
710         int err;
711         struct ext4_extent_header *eh;
712
713         *need_grow = false;
714
715         if (curp->index && insert_index == to_le32(curp->index->first_block))
716                 return EIO;
717
718         if (to_le16(curp->header->entries_count) ==
719             to_le16(curp->header->max_entries_count)) {
720                 if (at) {
721                         struct ext4_extent_header *neh;
722                         err = ext4_ext_split_node(inode_ref, path, at, newext,
723                                                   &spt->ptr, &bh);
724                         if (err != EOK)
725                                 goto out;
726
727                         neh = ext_block_hdr(&bh);
728                         if (insert_index > to_le32(curp->index->first_block)) {
729                                 /* Make decision which node should be used to
730                                  * insert the index.*/
731                                 if (to_le16(neh->entries_count) >
732                                     to_le16(curp->header->entries_count)) {
733                                         eh = curp->header;
734                                         /* insert after */
735                                         ix = EXT_LAST_INDEX(eh) + 1;
736                                 } else {
737                                         eh = neh;
738                                         ix = EXT_FIRST_INDEX(eh);
739                                 }
740                         } else {
741                                 eh = curp->header;
742                                 /* insert before */
743                                 ix = EXT_LAST_INDEX(eh);
744                         }
745                 } else {
746                         err = EOK;
747                         *need_grow = true;
748                         goto out;
749                 }
750         } else {
751                 eh = curp->header;
752                 if (curp->index == NULL) {
753                         ix = EXT_FIRST_INDEX(eh);
754                         curp->index = ix;
755                 } else if (insert_index > to_le32(curp->index->first_block)) {
756                         /* insert after */
757                         ix = curp->index + 1;
758                 } else {
759                         /* insert before */
760                         ix = curp->index;
761                 }
762         }
763
764         len = EXT_LAST_INDEX(eh) - ix + 1;
765         ext4_assert(len >= 0);
766         if (len > 0)
767                 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_index));
768
769         if (ix > EXT_MAX_INDEX(eh)) {
770                 err = EIO;
771                 goto out;
772         }
773
774         ix->first_block = to_le32(insert_index);
775         ext4_idx_store_pblock(ix, insert_block);
776         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
777
778         if (ix > EXT_LAST_INDEX(eh)) {
779                 err = EIO;
780                 goto out;
781         }
782
783         if (eh == curp->header)
784                 err = ext4_ext_dirty(inode_ref, curp);
785         else
786                 err = EOK;
787
788 out:
789         if (err != EOK || *need_grow) {
790                 if (bh.lb_id)
791                         ext4_block_set(inode_ref->fs->bdev, &bh);
792
793                 spt->ptr = 0;
794         } else if (bh.lb_id) {
795                 /* If we got a sibling leaf. */
796                 ext4_extent_block_csum_set(inode_ref, ext_block_hdr(&bh));
797                 bh.dirty = true;
798
799                 spt->path.p_block = ext4_idx_pblock(ix);
800                 spt->path.depth = to_le16(eh->depth);
801                 spt->path.maxdepth = 0;
802                 spt->path.extent = NULL;
803                 spt->path.index = ix;
804                 spt->path.header = eh;
805                 spt->path.block = bh;
806
807                 /*
808                  * If newext->ee_block can be included into the
809                  * right sub-tree.
810                  */
811                 if (to_le32(newext->first_block) >=
812                     ext4_ext_block_index(ext_block_hdr(&bh)))
813                         spt->switch_to = 1;
814                 else {
815                         curp->index = ix;
816                         curp->p_block = ext4_idx_pblock(ix);
817                 }
818
819         } else {
820                 spt->ptr = 0;
821                 curp->index = ix;
822                 curp->p_block = ext4_idx_pblock(ix);
823         }
824         return err;
825 }
826
827 /*
828  * ext4_ext_correct_indexes:
829  * if leaf gets modified and modified extent is first in the leaf,
830  * then we have to correct all indexes above.
831  */
832 static int ext4_ext_correct_indexes(struct ext4_inode_ref *inode_ref,
833                                     struct ext4_extent_path *path)
834 {
835         struct ext4_extent_header *eh;
836         int32_t depth = ext_depth(inode_ref->inode);
837         struct ext4_extent *ex;
838         uint32_t border;
839         int32_t k;
840         int err = EOK;
841
842         eh = path[depth].header;
843         ex = path[depth].extent;
844
845         if (ex == NULL || eh == NULL) {
846                 return EIO;
847         }
848
849         if (depth == 0) {
850                 /* there is no tree at all */
851                 return EOK;
852         }
853
854         if (ex != EXT_FIRST_EXTENT(eh)) {
855                 /* we correct tree if first leaf got modified only */
856                 return EOK;
857         }
858
859         /*
860          * TODO: we need correction if border is smaller than current one
861          */
862         k = depth - 1;
863         border = path[depth].extent->first_block;
864         path[k].index->first_block = border;
865         err = ext4_ext_dirty(inode_ref, path + k);
866         if (err != EOK)
867                 return err;
868
869         while (k--) {
870                 /* change all left-side indexes */
871                 if (path[k + 1].index != EXT_FIRST_INDEX(path[k + 1].header))
872                         break;
873                 path[k].index->first_block = border;
874                 err = ext4_ext_dirty(inode_ref, path + k);
875                 if (err != EOK)
876                         break;
877         }
878
879         return err;
880 }
881
882 static bool ext4_ext_can_prepend(struct ext4_extent *ex1,
883                                  struct ext4_extent *ex2)
884 {
885         if (ext4_ext_pblock(ex2) + ext4_ext_get_actual_len(ex2) !=
886             ext4_ext_pblock(ex1))
887                 return false;
888
889 #ifdef AGGRESSIVE_TEST
890         if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
891                 return 0;
892 #else
893         if (ext4_ext_is_unwritten(ex1)) {
894                 if (ext4_ext_get_actual_len(ex1) +
895                         ext4_ext_get_actual_len(ex2) >
896                     EXT_UNWRITTEN_MAX_LEN)
897                         return false;
898         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
899                    EXT_INIT_MAX_LEN)
900                 return false;
901 #endif
902
903         if (to_le32(ex2->first_block) + ext4_ext_get_actual_len(ex2) !=
904             to_le32(ex1->first_block))
905                 return false;
906
907         return true;
908 }
909
910 static bool ext4_ext_can_append(struct ext4_extent *ex1,
911                                 struct ext4_extent *ex2)
912 {
913         if (ext4_ext_pblock(ex1) + ext4_ext_get_actual_len(ex1) !=
914             ext4_ext_pblock(ex2))
915                 return false;
916
917 #ifdef AGGRESSIVE_TEST
918         if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
919                 return 0;
920 #else
921         if (ext4_ext_is_unwritten(ex1)) {
922                 if (ext4_ext_get_actual_len(ex1) +
923                         ext4_ext_get_actual_len(ex2) >
924                     EXT_UNWRITTEN_MAX_LEN)
925                         return false;
926         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
927                    EXT_INIT_MAX_LEN)
928                 return false;
929 #endif
930
931         if (to_le32(ex1->first_block) + ext4_ext_get_actual_len(ex1) !=
932             to_le32(ex2->first_block))
933                 return false;
934
935         return true;
936 }
937
938 static int ext4_ext_insert_leaf(struct ext4_inode_ref *inode_ref,
939                                 struct ext4_extent_path *path,
940                                 int32_t at,
941                                 struct ext4_extent *newext,
942                                 struct ext_split_trans *spt,
943                                 uint32_t flags,
944                                 bool *need_grow)
945 {
946         struct ext4_extent_path *curp = path + at;
947         struct ext4_extent *ex = curp->extent;
948         struct ext4_block bh = EXT4_BLOCK_ZERO();
949         int32_t len;
950         int err = EOK;
951         int unwritten;
952         struct ext4_extent_header *eh = NULL;
953
954         *need_grow = false;
955
956         if (curp->extent &&
957             to_le32(newext->first_block) == to_le32(curp->extent->first_block))
958                 return EIO;
959
960         if (!(flags & EXT4_EXT_NO_COMBINE)) {
961                 if (curp->extent && ext4_ext_can_append(curp->extent, newext)) {
962                         unwritten = ext4_ext_is_unwritten(curp->extent);
963                         curp->extent->block_count =
964                             to_le16(ext4_ext_get_actual_len(curp->extent) +
965                                     ext4_ext_get_actual_len(newext));
966                         if (unwritten)
967                                 ext4_ext_mark_unwritten(curp->extent);
968                         err = ext4_ext_dirty(inode_ref, curp);
969                         goto out;
970                 }
971
972                 if (curp->extent &&
973                     ext4_ext_can_prepend(curp->extent, newext)) {
974                         unwritten = ext4_ext_is_unwritten(curp->extent);
975                         curp->extent->first_block = newext->first_block;
976                         curp->extent->block_count =
977                             to_le16(ext4_ext_get_actual_len(curp->extent) +
978                                     ext4_ext_get_actual_len(newext));
979                         if (unwritten)
980                                 ext4_ext_mark_unwritten(curp->extent);
981                         err = ext4_ext_dirty(inode_ref, curp);
982                         goto out;
983                 }
984         }
985
986         if (to_le16(curp->header->entries_count) ==
987             to_le16(curp->header->max_entries_count)) {
988                 if (at) {
989                         struct ext4_extent_header *neh;
990                         err = ext4_ext_split_node(inode_ref, path, at, newext,
991                                                   &spt->ptr, &bh);
992                         if (err != EOK)
993                                 goto out;
994
995                         neh = ext_block_hdr(&bh);
996                         if (to_le32(newext->first_block) >
997                             to_le32(curp->extent->first_block)) {
998                                 if (to_le16(neh->entries_count) >
999                                     to_le16(curp->header->entries_count)) {
1000                                         eh = curp->header;
1001                                         /* insert after */
1002                                         ex = EXT_LAST_EXTENT(eh) + 1;
1003                                 } else {
1004                                         eh = neh;
1005                                         ex = EXT_FIRST_EXTENT(eh);
1006                                 }
1007                         } else {
1008                                 eh = curp->header;
1009                                 /* insert before */
1010                                 ex = EXT_LAST_EXTENT(eh);
1011                         }
1012                 } else {
1013                         err = EOK;
1014                         *need_grow = true;
1015                         goto out;
1016                 }
1017         } else {
1018                 eh = curp->header;
1019                 if (curp->extent == NULL) {
1020                         ex = EXT_FIRST_EXTENT(eh);
1021                         curp->extent = ex;
1022                 } else if (to_le32(newext->first_block) >
1023                            to_le32(curp->extent->first_block)) {
1024                         /* insert after */
1025                         ex = curp->extent + 1;
1026                 } else {
1027                         /* insert before */
1028                         ex = curp->extent;
1029                 }
1030         }
1031
1032         len = EXT_LAST_EXTENT(eh) - ex + 1;
1033         ext4_assert(len >= 0);
1034         if (len > 0)
1035                 memmove(ex + 1, ex, len * sizeof(struct ext4_extent));
1036
1037         if (ex > EXT_MAX_EXTENT(eh)) {
1038                 err = EIO;
1039                 goto out;
1040         }
1041
1042         ex->first_block = newext->first_block;
1043         ex->block_count = newext->block_count;
1044         ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
1045         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
1046
1047         if (ex > EXT_LAST_EXTENT(eh)) {
1048                 err = EIO;
1049                 goto out;
1050         }
1051
1052         if (eh == curp->header) {
1053                 err = ext4_ext_correct_indexes(inode_ref, path);
1054                 if (err != EOK)
1055                         goto out;
1056                 err = ext4_ext_dirty(inode_ref, curp);
1057         } else
1058                 err = EOK;
1059
1060 out:
1061         if (err != EOK || *need_grow) {
1062                 if (bh.lb_id)
1063                         ext4_block_set(inode_ref->fs->bdev, &bh);
1064
1065                 spt->ptr = 0;
1066         } else if (bh.lb_id) {
1067                 /* If we got a sibling leaf. */
1068                 ext4_extent_block_csum_set(inode_ref, ext_block_hdr(&bh));
1069                 bh.dirty = true;
1070
1071                 spt->path.p_block = ext4_ext_pblock(ex);
1072                 spt->path.depth = to_le16(eh->depth);
1073                 spt->path.maxdepth = 0;
1074                 spt->path.extent = ex;
1075                 spt->path.index = NULL;
1076                 spt->path.header = eh;
1077                 spt->path.block = bh;
1078
1079                 /*
1080                  * If newext->ee_block can be included into the
1081                  * right sub-tree.
1082                  */
1083                 if (to_le32(newext->first_block) >=
1084                     ext4_ext_block_index(ext_block_hdr(&bh)))
1085                         spt->switch_to = 1;
1086                 else {
1087                         curp->extent = ex;
1088                         curp->p_block = ext4_ext_pblock(ex);
1089                 }
1090
1091         } else {
1092                 spt->ptr = 0;
1093                 curp->extent = ex;
1094                 curp->p_block = ext4_ext_pblock(ex);
1095         }
1096
1097         return err;
1098 }
1099
1100 /*
1101  * ext4_ext_grow_indepth:
1102  * implements tree growing procedure:
1103  * - allocates new block
1104  * - moves top-level data (index block or leaf) into the new block
1105  * - initializes new top-level, creating index that points to the
1106  *   just created block
1107  */
1108 static int ext4_ext_grow_indepth(struct ext4_inode_ref *inode_ref,
1109                                  uint32_t flags)
1110 {
1111         struct ext4_extent_header *neh;
1112         struct ext4_block bh = EXT4_BLOCK_ZERO();
1113         ext4_fsblk_t newblock, goal = 0;
1114         int err = EOK;
1115
1116         /* Try to prepend new index to old one */
1117         if (ext_depth(inode_ref->inode))
1118                 goal = ext4_idx_pblock(
1119                     EXT_FIRST_INDEX(ext_inode_hdr(inode_ref->inode)));
1120         else
1121                 goal = ext4_fs_inode_to_goal_block(inode_ref);
1122
1123         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, &err);
1124         if (newblock == 0)
1125                 return err;
1126
1127         /* # */
1128         err = ext4_block_get(inode_ref->fs->bdev, &bh, newblock);
1129         if (err != EOK) {
1130                 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
1131                 return err;
1132         }
1133
1134         /* move top-level index/leaf into new block */
1135         memmove(bh.data, inode_ref->inode->blocks,
1136                 sizeof(inode_ref->inode->blocks));
1137
1138         /* set size of new block */
1139         neh = ext_block_hdr(&bh);
1140         /* old root could have indexes or leaves
1141          * so calculate e_max right way */
1142         if (ext_depth(inode_ref->inode))
1143                 neh->max_entries_count =
1144                     to_le16(ext4_ext_space_block_idx(inode_ref));
1145         else
1146                 neh->max_entries_count =
1147                     to_le16(ext4_ext_space_block(inode_ref));
1148
1149         neh->magic = to_le16(EXT4_EXTENT_MAGIC);
1150
1151         /* Update top-level index: num,max,pointer */
1152         neh = ext_inode_hdr(inode_ref->inode);
1153         neh->entries_count = to_le16(1);
1154         ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1155         if (neh->depth == 0) {
1156                 /* Root extent block becomes index block */
1157                 neh->max_entries_count =
1158                     to_le16(ext4_ext_space_root_idx(inode_ref));
1159                 EXT_FIRST_INDEX(neh)
1160                     ->first_block = EXT_FIRST_EXTENT(neh)->first_block;
1161         }
1162         neh->depth = to_le16(to_le16(neh->depth) + 1);
1163
1164         ext4_extent_block_csum_set(inode_ref, neh);
1165         bh.dirty = true;
1166         inode_ref->dirty = true;
1167         ext4_block_set(inode_ref->fs->bdev, &bh);
1168
1169         return err;
1170 }
1171
1172 __unused static void print_path(struct ext4_extent_path *path)
1173 {
1174         int32_t i = path->depth;
1175         while (i >= 0) {
1176
1177                 ptrdiff_t a =
1178                     (path->extent)
1179                         ? (path->extent - EXT_FIRST_EXTENT(path->header))
1180                         : 0;
1181                 ptrdiff_t b =
1182                     (path->index)
1183                         ? (path->index - EXT_FIRST_INDEX(path->header))
1184                         : 0;
1185
1186                 (void)a;
1187                 (void)b;
1188                 ext4_dbg(DEBUG_EXTENT,
1189                          "depth %" PRId32 ", p_block: %" PRIu64 ","
1190                          "p_ext offset: %td, p_idx offset: %td\n",
1191                          i, path->p_block, a, b);
1192                 i--;
1193                 path++;
1194         }
1195 }
1196
1197 static void ext4_ext_replace_path(struct ext4_inode_ref *inode_ref,
1198                                   struct ext4_extent_path *path,
1199                                   struct ext_split_trans *spt,
1200                                   int32_t level)
1201 {
1202         int32_t depth = ext_depth(inode_ref->inode);
1203         int32_t i = depth - level;
1204         ext4_ext_drop_refs(inode_ref, path + i, 1);
1205         path[i] = spt[level].path;
1206 }
1207
1208 static int ext4_ext_insert_extent(struct ext4_inode_ref *inode_ref,
1209                                   struct ext4_extent_path **ppath,
1210                                   struct ext4_extent *newext, uint32_t flags)
1211 {
1212         int32_t i, depth, level;
1213         int ret = EOK;
1214         ext4_fsblk_t ptr = 0;
1215         bool need_grow = false;
1216         struct ext4_extent_path *path = *ppath;
1217         struct ext_split_trans *spt = NULL;
1218         struct ext_split_trans newblock;
1219
1220         memset(&newblock, 0, sizeof(newblock));
1221
1222         depth = ext_depth(inode_ref->inode);
1223         for (i = depth, level = 0; i >= 0; i--, level++)
1224                 if (EXT_HAS_FREE_INDEX(path + i))
1225                         break;
1226
1227         if (level) {
1228                 spt = calloc(1, sizeof(struct ext_split_trans) * (level));
1229                 if (!spt) {
1230                         ret = ENOMEM;
1231                         goto out;
1232                 }
1233         }
1234         i = 0;
1235 again:
1236         depth = ext_depth(inode_ref->inode);
1237
1238         do {
1239                 if (!i) {
1240                         ret = ext4_ext_insert_leaf(inode_ref, path, depth - i,
1241                                                    newext, &newblock, flags,
1242                                                    &need_grow);
1243                 } else {
1244                         ret = ext4_ext_insert_index(
1245                             inode_ref, path, depth - i, newext,
1246                             ext4_ext_block_index(
1247                                 ext_block_hdr(&spt[i - 1].path.block)),
1248                             spt[i - 1].ptr, &newblock,
1249                             &need_grow);
1250                 }
1251                 ptr = newblock.ptr;
1252
1253                 if (ret != EOK)
1254                         goto out;
1255
1256                 else if (spt && ptr && !ret) {
1257                         /* Prepare for the next iteration after splitting. */
1258                         spt[i] = newblock;
1259                 }
1260
1261                 i++;
1262         } while (ptr != 0 && i <= depth);
1263
1264         if (need_grow) {
1265                 ret = ext4_ext_grow_indepth(inode_ref, 0);
1266                 if (ret)
1267                         goto out;
1268                 ret = ext4_find_extent(inode_ref, to_le32(newext->first_block),
1269                                        ppath, 0);
1270                 if (ret)
1271                         goto out;
1272                 i = depth;
1273                 path = *ppath;
1274                 goto again;
1275         }
1276 out:
1277         if (ret) {
1278                 if (path)
1279                         ext4_ext_drop_refs(inode_ref, path, 0);
1280
1281                 while (--level >= 0 && spt) {
1282                         if (spt[level].ptr) {
1283                                 ext4_ext_free_blocks(inode_ref, spt[level].ptr,
1284                                                      1, 0);
1285                                 ext4_ext_drop_refs(inode_ref, &spt[level].path,
1286                                                    1);
1287                         }
1288                 }
1289         } else {
1290                 while (--level >= 0 && spt) {
1291                         if (spt[level].switch_to)
1292                                 ext4_ext_replace_path(inode_ref, path, spt,
1293                                                       level);
1294                         else if (spt[level].ptr)
1295                                 ext4_ext_drop_refs(inode_ref, &spt[level].path,
1296                                                    1);
1297                 }
1298         }
1299         if (spt)
1300                 free(spt);
1301
1302         return ret;
1303 }
1304
1305 static void ext4_ext_remove_blocks(struct ext4_inode_ref *inode_ref,
1306                                    struct ext4_extent *ex, ext4_lblk_t from,
1307                                    ext4_lblk_t to)
1308 {
1309         ext4_lblk_t len = to - from + 1;
1310         ext4_lblk_t num;
1311         ext4_fsblk_t start;
1312         num = from - to_le32(ex->first_block);
1313         start = ext4_ext_pblock(ex) + num;
1314         ext4_dbg(DEBUG_EXTENT,
1315                  "Freeing %" PRIu32 " at %" PRIu64 ", %" PRIu32 "\n", from,
1316                  start, len);
1317
1318         ext4_ext_free_blocks(inode_ref, start, len, 0);
1319 }
1320
1321 static int ext4_ext_remove_idx(struct ext4_inode_ref *inode_ref,
1322                                struct ext4_extent_path *path, int32_t depth)
1323 {
1324         int err = EOK;
1325         int32_t i = depth;
1326         ext4_fsblk_t leaf;
1327
1328         /* free index block */
1329         leaf = ext4_idx_pblock(path[i].index);
1330
1331         if (path[i].index != EXT_LAST_INDEX(path[i].header)) {
1332                 ptrdiff_t len = EXT_LAST_INDEX(path[i].header) - path[i].index;
1333                 memmove(path[i].index, path[i].index + 1,
1334                         len * sizeof(struct ext4_extent_index));
1335         }
1336
1337         path[i].header->entries_count =
1338             to_le16(to_le16(path[i].header->entries_count) - 1);
1339         err = ext4_ext_dirty(inode_ref, path + i);
1340         if (err != EOK)
1341                 return err;
1342
1343         ext4_dbg(DEBUG_EXTENT, "IDX: Freeing %" PRIu32 " at %" PRIu64 ", %d\n",
1344                  to_le32(path[i].index->first_block), leaf, 1);
1345         ext4_ext_free_blocks(inode_ref, leaf, 1, 0);
1346
1347         while (i > 0) {
1348                 if (path[i].index != EXT_FIRST_INDEX(path[i].header))
1349                         break;
1350
1351                 path[i - 1].index->first_block = path[i].index->first_block;
1352                 err = ext4_ext_dirty(inode_ref, path + i - 1);
1353                 if (err != EOK)
1354                         break;
1355
1356                 i--;
1357         }
1358         return err;
1359 }
1360
1361 static int ext4_ext_remove_leaf(struct ext4_inode_ref *inode_ref,
1362                                 struct ext4_extent_path *path, ext4_lblk_t from,
1363                                 ext4_lblk_t to)
1364 {
1365
1366         int32_t depth = ext_depth(inode_ref->inode);
1367         struct ext4_extent *ex = path[depth].extent;
1368         struct ext4_extent *start_ex, *ex2 = NULL;
1369         struct ext4_extent_header *eh = path[depth].header;
1370         int32_t len;
1371         int err = EOK;
1372         uint16_t new_entries;
1373
1374         start_ex = ex;
1375         new_entries = to_le16(eh->entries_count);
1376         while (ex <= EXT_LAST_EXTENT(path[depth].header) &&
1377                to_le32(ex->first_block) <= to) {
1378                 int32_t new_len = 0;
1379                 int unwritten;
1380                 ext4_lblk_t start, new_start;
1381                 ext4_fsblk_t newblock;
1382                 new_start = start = to_le32(ex->first_block);
1383                 len = ext4_ext_get_actual_len(ex);
1384                 newblock = ext4_ext_pblock(ex);
1385                 if (start < from) {
1386                         len -= from - start;
1387                         new_len = from - start;
1388                         start = from;
1389                         start_ex++;
1390                 } else {
1391                         if (start + len - 1 > to) {
1392                                 len -= start + len - 1 - to;
1393                                 new_len = start + len - 1 - to;
1394                                 new_start = to + 1;
1395                                 newblock += to + 1 - start;
1396                                 ex2 = ex;
1397                         }
1398                 }
1399
1400                 ext4_ext_remove_blocks(inode_ref, ex, start, start + len - 1);
1401                 ex->first_block = to_le32(new_start);
1402                 if (!new_len)
1403                         new_entries--;
1404                 else {
1405                         unwritten = ext4_ext_is_unwritten(ex);
1406                         ex->block_count = to_le16(new_len);
1407                         ext4_ext_store_pblock(ex, newblock);
1408                         if (unwritten)
1409                                 ext4_ext_mark_unwritten(ex);
1410                 }
1411
1412                 ex += 1;
1413         }
1414
1415         if (ex2 == NULL)
1416                 ex2 = ex;
1417
1418         if (ex2 <= EXT_LAST_EXTENT(eh))
1419                 memmove(start_ex, ex2, EXT_LAST_EXTENT(eh) - ex2 + 1);
1420
1421         eh->entries_count = to_le16(new_entries);
1422         ext4_ext_dirty(inode_ref, path + depth);
1423         if (path[depth].extent == EXT_FIRST_EXTENT(eh) && eh->entries_count)
1424                 err = ext4_ext_correct_indexes(inode_ref, path);
1425
1426         /* if this leaf is free, then we should
1427          * remove it from index block above */
1428         if (err == EOK && eh->entries_count == 0 && path[depth].block.lb_id)
1429                 err = ext4_ext_remove_idx(inode_ref, path, depth - 1);
1430         else if (depth > 0)
1431                 path[depth - 1].index++;
1432
1433         return err;
1434 }
1435
1436 static bool ext4_ext_more_to_rm(struct ext4_extent_path *path, ext4_lblk_t to)
1437 {
1438         if (!to_le16(path->header->entries_count))
1439                 return false;
1440
1441         if (path->index > EXT_LAST_INDEX(path->header))
1442                 return false;
1443
1444         if (to_le32(path->index->first_block) > to)
1445                 return false;
1446
1447         return true;
1448 }
1449
1450 int ext4_extent_remove_space(struct ext4_inode_ref *inode_ref, ext4_lblk_t from,
1451                           ext4_lblk_t to)
1452 {
1453         struct ext4_extent_path *path = NULL;
1454         int ret = EOK;
1455         int32_t depth = ext_depth(inode_ref->inode);
1456         int32_t i;
1457
1458         ret = ext4_find_extent(inode_ref, from, &path, 0);
1459         if (ret)
1460                 goto out;
1461
1462         if (!path[depth].extent) {
1463                 ret = EOK;
1464                 goto out;
1465         }
1466
1467         bool in_range = IN_RANGE(from, to_le32(path[depth].extent->first_block),
1468                         ext4_ext_get_actual_len(path[depth].extent));
1469
1470         if (!in_range) {
1471                 ret = EOK;
1472                 goto out;
1473         }
1474
1475         /* If we do remove_space inside the range of an extent */
1476         if ((to_le32(path[depth].extent->first_block) < from) &&
1477             (to < to_le32(path[depth].extent->first_block) +
1478                         ext4_ext_get_actual_len(path[depth].extent) - 1)) {
1479
1480                 struct ext4_extent *ex = path[depth].extent, newex;
1481                 int unwritten = ext4_ext_is_unwritten(ex);
1482                 ext4_lblk_t ee_block = to_le32(ex->first_block);
1483                 int32_t len = ext4_ext_get_actual_len(ex);
1484                 ext4_fsblk_t newblock =
1485                         to + 1 - ee_block + ext4_ext_pblock(ex);
1486
1487                 ex->block_count = to_le16(from - ee_block);
1488                 if (unwritten)
1489                         ext4_ext_mark_unwritten(ex);
1490
1491                 ext4_ext_dirty(inode_ref, path + depth);
1492
1493                 newex.first_block = to_le32(to + 1);
1494                 newex.block_count = to_le16(ee_block + len - 1 - to);
1495                 ext4_ext_store_pblock(&newex, newblock);
1496                 if (unwritten)
1497                         ext4_ext_mark_unwritten(&newex);
1498
1499                 ret = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1500                 goto out;
1501         }
1502
1503         i = depth;
1504         while (i >= 0) {
1505                 if (i == depth) {
1506                         struct ext4_extent_header *eh;
1507                         struct ext4_extent *first_ex, *last_ex;
1508                         ext4_lblk_t leaf_from, leaf_to;
1509                         eh = path[i].header;
1510                         ext4_assert(to_le16(eh->entries_count) > 0);
1511                         first_ex = EXT_FIRST_EXTENT(eh);
1512                         last_ex = EXT_LAST_EXTENT(eh);
1513                         leaf_from = to_le32(first_ex->first_block);
1514                         leaf_to = to_le32(last_ex->first_block) +
1515                                   ext4_ext_get_actual_len(last_ex) - 1;
1516                         if (leaf_from < from)
1517                                 leaf_from = from;
1518
1519                         if (leaf_to > to)
1520                                 leaf_to = to;
1521
1522                         ext4_ext_remove_leaf(inode_ref, path, leaf_from,
1523                                         leaf_to);
1524                         ext4_ext_drop_refs(inode_ref, path + i, 0);
1525                         i--;
1526                         continue;
1527                 }
1528
1529                 struct ext4_extent_header *eh;
1530                 eh = path[i].header;
1531                 if (ext4_ext_more_to_rm(path + i, to)) {
1532                         struct ext4_block bh = EXT4_BLOCK_ZERO();
1533                         if (path[i + 1].block.lb_id)
1534                                 ext4_ext_drop_refs(inode_ref, path + i + 1, 0);
1535
1536                         ret = read_extent_tree_block(inode_ref,
1537                                         ext4_idx_pblock(path[i].index),
1538                                         depth - i - 1, &bh, 0);
1539                         if (ret)
1540                                 goto out;
1541
1542                         path[i].p_block =
1543                                         ext4_idx_pblock(path[i].index);
1544                         path[i + 1].block = bh;
1545                         path[i + 1].header = ext_block_hdr(&bh);
1546                         path[i + 1].depth = depth - i - 1;
1547                         if (i + 1 == depth)
1548                                 path[i + 1].extent = EXT_FIRST_EXTENT(
1549                                         path[i + 1].header);
1550                         else
1551                                 path[i + 1].index =
1552                                         EXT_FIRST_INDEX(path[i + 1].header);
1553
1554                         i++;
1555                 } else {
1556                         if (i > 0) {
1557                                 if (!eh->entries_count)
1558                                         ret = ext4_ext_remove_idx(inode_ref, path,
1559                                                         i - 1);
1560                                 else
1561                                         path[i - 1].index++;
1562
1563                         }
1564
1565                         if (i)
1566                                 ext4_block_set(inode_ref->fs->bdev,
1567                                                 &path[i].block);
1568
1569
1570                         i--;
1571                 }
1572
1573         }
1574
1575         /* TODO: flexible tree reduction should be here */
1576         if (path->header->entries_count == 0) {
1577                 /*
1578                  * truncate to zero freed all the tree,
1579                  * so we need to correct eh_depth
1580                  */
1581                 ext_inode_hdr(inode_ref->inode)->depth = 0;
1582                 ext_inode_hdr(inode_ref->inode)->max_entries_count =
1583                     to_le16(ext4_ext_space_root(inode_ref));
1584                 ret = ext4_ext_dirty(inode_ref, path);
1585         }
1586
1587 out:
1588         ext4_ext_drop_refs(inode_ref, path, 0);
1589         free(path);
1590         path = NULL;
1591         return ret;
1592 }
1593
1594 static int ext4_ext_split_extent_at(struct ext4_inode_ref *inode_ref,
1595                                     struct ext4_extent_path **ppath,
1596                                     ext4_lblk_t split, uint32_t split_flag)
1597 {
1598         struct ext4_extent *ex, newex;
1599         ext4_fsblk_t newblock;
1600         ext4_lblk_t ee_block;
1601         int32_t ee_len;
1602         int32_t depth = ext_depth(inode_ref->inode);
1603         int err = EOK;
1604
1605         ex = (*ppath)[depth].extent;
1606         ee_block = to_le32(ex->first_block);
1607         ee_len = ext4_ext_get_actual_len(ex);
1608         newblock = split - ee_block + ext4_ext_pblock(ex);
1609
1610         if (split == ee_block) {
1611                 /*
1612                  * case b: block @split is the block that the extent begins with
1613                  * then we just change the state of the extent, and splitting
1614                  * is not needed.
1615                  */
1616                 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1617                         ext4_ext_mark_unwritten(ex);
1618                 else
1619                         ext4_ext_mark_initialized(ex);
1620
1621                 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1622                 goto out;
1623         }
1624
1625         ex->block_count = to_le16(split - ee_block);
1626         if (split_flag & EXT4_EXT_MARK_UNWRIT1)
1627                 ext4_ext_mark_unwritten(ex);
1628
1629         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1630         if (err != EOK)
1631                 goto out;
1632
1633         newex.first_block = to_le32(split);
1634         newex.block_count = to_le16(ee_len - (split - ee_block));
1635         ext4_ext_store_pblock(&newex, newblock);
1636         if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1637                 ext4_ext_mark_unwritten(&newex);
1638         err = ext4_ext_insert_extent(inode_ref, ppath, &newex,
1639                                      EXT4_EXT_NO_COMBINE);
1640         if (err != EOK)
1641                 goto restore_extent_len;
1642
1643 out:
1644         return err;
1645 restore_extent_len:
1646         ex->block_count = to_le16(ee_len);
1647         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1648         return err;
1649 }
1650
1651 static int ext4_ext_convert_to_initialized(struct ext4_inode_ref *inode_ref,
1652                                            struct ext4_extent_path **ppath,
1653                                            ext4_lblk_t split, uint32_t blocks)
1654 {
1655         int32_t depth = ext_depth(inode_ref->inode), err = EOK;
1656         struct ext4_extent *ex = (*ppath)[depth].extent;
1657
1658         ext4_assert(to_le32(ex->first_block) <= split);
1659
1660         if (split + blocks ==
1661             to_le32(ex->first_block) + ext4_ext_get_actual_len(ex)) {
1662                 /* split and initialize right part */
1663                 err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1664                                                EXT4_EXT_MARK_UNWRIT1);
1665         } else if (to_le32(ex->first_block) == split) {
1666                 /* split and initialize left part */
1667                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1668                                                EXT4_EXT_MARK_UNWRIT2);
1669         } else {
1670                 /* split 1 extent to 3 and initialize the 2nd */
1671                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1672                                                EXT4_EXT_MARK_UNWRIT1 |
1673                                                    EXT4_EXT_MARK_UNWRIT2);
1674                 if (!err) {
1675                         err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1676                                                        EXT4_EXT_MARK_UNWRIT1);
1677                 }
1678         }
1679
1680         return err;
1681 }
1682
1683 static ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_extent_path *path)
1684 {
1685         int32_t depth;
1686
1687         depth = path->depth;
1688
1689         if (depth == 0 && path->extent == NULL)
1690                 return EXT_MAX_BLOCKS;
1691
1692         while (depth >= 0) {
1693                 if (depth == path->depth) {
1694                         /* leaf */
1695                         if (path[depth].extent &&
1696                             path[depth].extent !=
1697                                 EXT_LAST_EXTENT(path[depth].header))
1698                                 return to_le32(
1699                                     path[depth].extent[1].first_block);
1700                 } else {
1701                         /* index */
1702                         if (path[depth].index !=
1703                             EXT_LAST_INDEX(path[depth].header))
1704                                 return to_le32(
1705                                     path[depth].index[1].first_block);
1706                 }
1707                 depth--;
1708         }
1709
1710         return EXT_MAX_BLOCKS;
1711 }
1712
1713 static int ext4_ext_zero_unwritten_range(struct ext4_inode_ref *inode_ref,
1714                                          ext4_fsblk_t block,
1715                                          uint32_t blocks_count)
1716 {
1717         int err = EOK;
1718         uint32_t i;
1719         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
1720         for (i = 0; i < blocks_count; i++) {
1721                 struct ext4_block bh = EXT4_BLOCK_ZERO();
1722                 err = ext4_block_get(inode_ref->fs->bdev, &bh, block + i);
1723                 if (err != EOK)
1724                         break;
1725
1726                 memset(bh.data, 0, block_size);
1727                 bh.dirty = true;
1728                 err = ext4_block_set(inode_ref->fs->bdev, &bh);
1729                 if (err != EOK)
1730                         break;
1731         }
1732         return err;
1733 }
1734
1735 int ext4_extent_get_blocks(struct ext4_inode_ref *inode_ref, ext4_fsblk_t iblock,
1736                         uint32_t max_blocks, ext4_fsblk_t *result, bool create,
1737                         uint32_t *blocks_count)
1738 {
1739         struct ext4_extent_path *path = NULL;
1740         struct ext4_extent newex, *ex;
1741         ext4_fsblk_t goal;
1742         int err = EOK;
1743         int32_t depth;
1744         uint32_t allocated = 0;
1745         ext4_fsblk_t next, newblock;
1746
1747         if (result)
1748                 *result = 0;
1749
1750         if (blocks_count)
1751                 *blocks_count = 0;
1752
1753         /* find extent for this block */
1754         err = ext4_find_extent(inode_ref, iblock, &path, 0);
1755         if (err != EOK) {
1756                 path = NULL;
1757                 goto out2;
1758         }
1759
1760         depth = ext_depth(inode_ref->inode);
1761
1762         /*
1763          * consistent leaf must not be empty
1764          * this situations is possible, though, _during_ tree modification
1765          * this is why assert can't be put in ext4_ext_find_extent()
1766          */
1767         ex = path[depth].extent;
1768         if (ex) {
1769                 ext4_lblk_t ee_block = to_le32(ex->first_block);
1770                 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
1771                 uint16_t ee_len = ext4_ext_get_actual_len(ex);
1772                 /* if found exent covers block, simple return it */
1773                 if (IN_RANGE(iblock, ee_block, ee_len)) {
1774                         /* number of remain blocks in the extent */
1775                         allocated = ee_len - (iblock - ee_block);
1776
1777                         if (!ext4_ext_is_unwritten(ex)) {
1778                                 newblock = iblock - ee_block + ee_start;
1779                                 goto out;
1780                         }
1781
1782                         if (!create) {
1783                                 newblock = 0;
1784                                 goto out;
1785                         }
1786
1787                         uint32_t zero_range;
1788                         zero_range = allocated;
1789                         if (zero_range > max_blocks)
1790                                 zero_range = max_blocks;
1791
1792                         newblock = iblock - ee_block + ee_start;
1793                         err = ext4_ext_zero_unwritten_range(inode_ref, newblock,
1794                                         zero_range);
1795                         if (err != EOK)
1796                                 goto out2;
1797
1798                         err = ext4_ext_convert_to_initialized(inode_ref, &path,
1799                                         iblock, zero_range);
1800                         if (err != EOK)
1801                                 goto out2;
1802
1803                         goto out;
1804                 }
1805         }
1806
1807         /*
1808          * requested block isn't allocated yet
1809          * we couldn't try to create block if create flag is zero
1810          */
1811         if (!create) {
1812                 goto out2;
1813         }
1814
1815         /* find next allocated block so that we know how many
1816          * blocks we can allocate without ovelapping next extent */
1817         next = ext4_ext_next_allocated_block(path);
1818         allocated = next - iblock;
1819         if (allocated > max_blocks)
1820                 allocated = max_blocks;
1821
1822         /* allocate new block */
1823         goal = ext4_ext_find_goal(inode_ref, path, iblock);
1824         newblock = ext4_new_meta_blocks(inode_ref, goal, 0, &allocated, &err);
1825         if (!newblock)
1826                 goto out2;
1827
1828         /* try to insert new extent into found leaf and return */
1829         newex.first_block = to_le32(iblock);
1830         ext4_ext_store_pblock(&newex, newblock);
1831         newex.block_count = to_le16(allocated);
1832         err = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1833         if (err != EOK) {
1834                 /* free data blocks we just allocated */
1835                 ext4_ext_free_blocks(inode_ref, ext4_ext_pblock(&newex),
1836                                      to_le16(newex.block_count), 0);
1837                 goto out2;
1838         }
1839
1840         /* previous routine could use block we allocated */
1841         newblock = ext4_ext_pblock(&newex);
1842
1843 out:
1844         if (allocated > max_blocks)
1845                 allocated = max_blocks;
1846
1847         if (result)
1848                 *result = newblock;
1849
1850         if (blocks_count)
1851                 *blocks_count = allocated;
1852
1853 out2:
1854         if (path) {
1855                 ext4_ext_drop_refs(inode_ref, path, 0);
1856                 free(path);
1857         }
1858
1859         return err;
1860 }
1861 #endif