2 * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3 * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include "ext4_config.h"
30 #include "ext4_blockdev.h"
32 #include "ext4_super.h"
33 #include "ext4_balloc.h"
34 #include "ext4_debug.h"
41 #include "ext4_extent.h"
43 #if CONFIG_EXTENT_FULL
46 * used by extent splitting.
48 #define EXT4_EXT_MARK_UNWRIT1 0x02 /* mark first half unwritten */
49 #define EXT4_EXT_MARK_UNWRIT2 0x04 /* mark second half unwritten */
50 #define EXT4_EXT_DATA_VALID1 0x08 /* first half contains valid data */
51 #define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */
52 #define EXT4_EXT_NO_COMBINE 0x20 /* do not combine two extents */
54 static struct ext4_extent_tail *
55 find_ext4_extent_tail(struct ext4_extent_header *eh)
57 return (struct ext4_extent_tail *)(((char *)eh) +
58 EXT4_EXTENT_TAIL_OFFSET(eh));
61 static struct ext4_extent_header *ext_inode_hdr(struct ext4_inode *inode)
63 return (struct ext4_extent_header *)inode->blocks;
66 static struct ext4_extent_header *ext_block_hdr(struct ext4_block *block)
68 return (struct ext4_extent_header *)block->data;
71 static uint16_t ext_depth(struct ext4_inode *inode)
73 return to_le16(ext_inode_hdr(inode)->depth);
76 static uint16_t ext4_ext_get_actual_len(struct ext4_extent *ext)
78 return (to_le16(ext->block_count) <= EXT_INIT_MAX_LEN
79 ? to_le16(ext->block_count)
80 : (to_le16(ext->block_count) - EXT_INIT_MAX_LEN));
83 static void ext4_ext_mark_initialized(struct ext4_extent *ext)
85 ext->block_count = to_le16(ext4_ext_get_actual_len(ext));
88 static void ext4_ext_mark_unwritten(struct ext4_extent *ext)
90 ext->block_count |= to_le16(EXT_INIT_MAX_LEN);
93 static int ext4_ext_is_unwritten(struct ext4_extent *ext)
95 /* Extent with ee_len of 0x8000 is treated as an initialized extent */
96 return (to_le16(ext->block_count) > EXT_INIT_MAX_LEN);
101 * combine low and high parts of physical block number into ext4_fsblk_t
103 static ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
107 block = to_le32(ex->start_lo);
108 block |= ((ext4_fsblk_t)to_le16(ex->start_hi) << 31) << 1;
114 * combine low and high parts of a leaf physical block number into ext4_fsblk_t
116 static ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_index *ix)
120 block = to_le32(ix->leaf_lo);
121 block |= ((ext4_fsblk_t)to_le16(ix->leaf_hi) << 31) << 1;
126 * ext4_ext_store_pblock:
127 * stores a large physical block number into an extent struct,
128 * breaking it into parts
130 static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
132 ex->start_lo = to_le32((unsigned long)(pb & 0xffffffff));
133 ex->start_hi = to_le16((unsigned long)((pb >> 31) >> 1) & 0xffff);
137 * ext4_idx_store_pblock:
138 * stores a large physical block number into an index struct,
139 * breaking it into parts
141 static void ext4_idx_store_pblock(struct ext4_extent_index *ix, ext4_fsblk_t pb)
143 ix->leaf_lo = to_le32((unsigned long)(pb & 0xffffffff));
144 ix->leaf_hi = to_le16((unsigned long)((pb >> 31) >> 1) & 0xffff);
147 static int ext4_allocate_single_block(struct ext4_inode_ref *inode_ref,
149 ext4_fsblk_t *blockp)
151 return ext4_balloc_alloc_block(inode_ref, goal, blockp);
154 static ext4_fsblk_t ext4_new_meta_blocks(struct ext4_inode_ref *inode_ref,
156 uint32_t flags __unused,
157 uint32_t *count, int *errp)
159 ext4_fsblk_t block = 0;
161 *errp = ext4_allocate_single_block(inode_ref, goal, &block);
167 static void ext4_ext_free_blocks(struct ext4_inode_ref *inode_ref,
168 ext4_fsblk_t block, uint32_t count,
169 uint32_t flags __unused)
171 ext4_balloc_free_blocks(inode_ref, block, count);
174 static size_t ext4_ext_space_block(struct ext4_inode_ref *inode_ref)
177 uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
179 size = (block_size - sizeof(struct ext4_extent_header)) /
180 sizeof(struct ext4_extent);
184 static size_t ext4_ext_space_block_idx(struct ext4_inode_ref *inode_ref)
187 uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
189 size = (block_size - sizeof(struct ext4_extent_header)) /
190 sizeof(struct ext4_extent_index);
194 static size_t ext4_ext_space_root(struct ext4_inode_ref *inode_ref)
198 size = sizeof(inode_ref->inode->blocks);
199 size -= sizeof(struct ext4_extent_header);
200 size /= sizeof(struct ext4_extent);
204 static size_t ext4_ext_space_root_idx(struct ext4_inode_ref *inode_ref)
208 size = sizeof(inode_ref->inode->blocks);
209 size -= sizeof(struct ext4_extent_header);
210 size /= sizeof(struct ext4_extent_index);
214 static size_t ext4_ext_max_entries(struct ext4_inode_ref *inode_ref,
219 if (depth == ext_depth(inode_ref->inode)) {
221 max = ext4_ext_space_root(inode_ref);
223 max = ext4_ext_space_root_idx(inode_ref);
226 max = ext4_ext_space_block(inode_ref);
228 max = ext4_ext_space_block_idx(inode_ref);
234 static ext4_fsblk_t ext4_ext_find_goal(struct ext4_inode_ref *inode_ref,
235 struct ext4_extent_path *path,
239 uint32_t depth = path->depth;
240 struct ext4_extent *ex;
243 * Try to predict block placement assuming that we are
244 * filling in a file which will eventually be
245 * non-sparse --- i.e., in the case of libbfd writing
246 * an ELF object sections out-of-order but in a way
247 * the eventually results in a contiguous object or
248 * executable file, or some database extending a table
249 * space file. However, this is actually somewhat
250 * non-ideal if we are writing a sparse file such as
251 * qemu or KVM writing a raw image file that is going
252 * to stay fairly sparse, since it will end up
253 * fragmenting the file system's free space. Maybe we
254 * should have some hueristics or some way to allow
255 * userspace to pass a hint to file system,
256 * especially if the latter case turns out to be
259 ex = path[depth].extent;
261 ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
262 ext4_lblk_t ext_block = to_le32(ex->first_block);
264 if (block > ext_block)
265 return ext_pblk + (block - ext_block);
267 return ext_pblk - (ext_block - block);
270 /* it looks like index is empty;
271 * try to find starting block from index itself */
272 if (path[depth].block.lb_id)
273 return path[depth].block.lb_id;
276 /* OK. use inode's group */
277 return ext4_fs_inode_to_goal_block(inode_ref);
281 * Allocation for a meta data block
283 static ext4_fsblk_t ext4_ext_new_meta_block(struct ext4_inode_ref *inode_ref,
284 struct ext4_extent_path *path,
285 struct ext4_extent *ex, int *err,
288 ext4_fsblk_t goal, newblock;
290 goal = ext4_ext_find_goal(inode_ref, path, to_le32(ex->first_block));
291 newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, err);
295 static int ext4_ext_dirty(struct ext4_inode_ref *inode_ref,
296 struct ext4_extent_path *path)
298 if (path->block.lb_id)
299 path->block.dirty = true;
301 inode_ref->dirty = true;
306 static void ext4_ext_drop_refs(struct ext4_inode_ref *inode_ref,
307 struct ext4_extent_path *path, bool keep_other)
318 for (i = 0; i <= depth; i++, path++)
319 if (path->block.lb_id) {
320 ext4_block_set(inode_ref->fs->bdev, &path->block);
325 * Temporarily we don't need to support checksum.
327 static uint32_t ext4_ext_block_csum(struct ext4_inode_ref *inode_ref __unused,
328 struct ext4_extent_header *eh __unused)
330 /*TODO: should we add crc32 here ?*/
331 /*return ext4_crc32c(inode->i_csum, eh, EXT4_EXTENT_TAIL_OFFSET(eh));*/
335 static void ext4_extent_block_csum_set(struct ext4_inode_ref *inode_ref,
336 struct ext4_extent_header *eh)
338 struct ext4_extent_tail *tail;
340 tail = find_ext4_extent_tail(eh);
341 tail->et_checksum = ext4_ext_block_csum(inode_ref, eh);
345 * Check that whether the basic information inside the extent header
348 static int ext4_ext_check(struct ext4_inode_ref *inode_ref,
349 struct ext4_extent_header *eh, uint16_t depth,
350 ext4_fsblk_t pblk __unused)
352 struct ext4_extent_tail *tail;
353 const char *error_msg;
356 if (to_le16(eh->magic) != EXT4_EXTENT_MAGIC) {
357 error_msg = "invalid magic";
360 if (to_le16(eh->depth) != depth) {
361 error_msg = "unexpected eh_depth";
364 if (eh->max_entries_count == 0) {
365 error_msg = "invalid eh_max";
368 if (to_le16(eh->entries_count) > to_le16(eh->max_entries_count)) {
369 error_msg = "invalid eh_entries";
373 tail = find_ext4_extent_tail(eh);
374 if (tail->et_checksum != ext4_ext_block_csum(inode_ref, eh)) {
375 /* FIXME: Warning: extent checksum damaged? */
381 ext4_dbg(DEBUG_EXTENT, "Bad extents B+ tree block: %s. "
382 "Blocknr: %" PRId64 "\n",
387 static int read_extent_tree_block(struct ext4_inode_ref *inode_ref,
388 ext4_fsblk_t pblk, int32_t depth,
389 struct ext4_block *bh,
390 uint32_t flags __unused)
394 err = ext4_block_get(inode_ref->fs->bdev, bh, pblk);
398 err = ext4_ext_check(inode_ref, ext_block_hdr(bh), depth, pblk);
405 ext4_block_set(inode_ref->fs->bdev, bh);
411 * ext4_ext_binsearch_idx:
412 * binary search for the closest index of the given block
413 * the header must be checked before calling this
415 static void ext4_ext_binsearch_idx(struct ext4_extent_path *path,
418 struct ext4_extent_header *eh = path->header;
419 struct ext4_extent_index *r, *l, *m;
421 l = EXT_FIRST_INDEX(eh) + 1;
422 r = EXT_LAST_INDEX(eh);
425 if (block < to_le32(m->first_block))
435 * ext4_ext_binsearch:
436 * binary search for closest extent of the given block
437 * the header must be checked before calling this
439 static void ext4_ext_binsearch(struct ext4_extent_path *path, ext4_lblk_t block)
441 struct ext4_extent_header *eh = path->header;
442 struct ext4_extent *r, *l, *m;
444 if (eh->entries_count == 0) {
446 * this leaf is empty:
447 * we get such a leaf in split/add case
452 l = EXT_FIRST_EXTENT(eh) + 1;
453 r = EXT_LAST_EXTENT(eh);
457 if (block < to_le32(m->first_block))
463 path->extent = l - 1;
466 #define EXT4_EXT_PATH_INC_DEPTH 1
468 static int ext4_find_extent(struct ext4_inode_ref *inode_ref, ext4_lblk_t block,
469 struct ext4_extent_path **orig_path, uint32_t flags)
471 struct ext4_extent_header *eh;
472 struct ext4_block bh = EXT4_BLOCK_ZERO();
473 ext4_fsblk_t buf_block = 0;
474 struct ext4_extent_path *path = *orig_path;
475 int32_t depth, ppos = 0;
479 eh = ext_inode_hdr(inode_ref->inode);
480 depth = ext_depth(inode_ref->inode);
483 ext4_ext_drop_refs(inode_ref, path, 0);
484 if (depth > path[0].maxdepth) {
486 *orig_path = path = NULL;
490 int32_t path_depth = depth + EXT4_EXT_PATH_INC_DEPTH;
491 /* account possible depth increase */
492 path = calloc(1, sizeof(struct ext4_extent_path) *
496 path[0].maxdepth = path_depth;
502 /* walk through the tree */
504 ext4_ext_binsearch_idx(path + ppos, block);
505 path[ppos].p_block = ext4_idx_pblock(path[ppos].index);
506 path[ppos].depth = i;
507 path[ppos].extent = NULL;
508 buf_block = path[ppos].p_block;
512 if (!path[ppos].block.lb_id ||
513 path[ppos].block.lb_id != buf_block) {
514 ret = read_extent_tree_block(inode_ref, buf_block, i,
520 ext4_block_set(inode_ref->fs->bdev, &bh);
525 eh = ext_block_hdr(&bh);
526 path[ppos].block = bh;
527 path[ppos].header = eh;
531 path[ppos].depth = i;
532 path[ppos].extent = NULL;
533 path[ppos].index = NULL;
536 ext4_ext_binsearch(path + ppos, block);
537 /* if not an empty leaf */
538 if (path[ppos].extent)
539 path[ppos].p_block = ext4_ext_pblock(path[ppos].extent);
547 ext4_ext_drop_refs(inode_ref, path, 0);
554 static void ext4_ext_init_header(struct ext4_inode_ref *inode_ref,
555 struct ext4_extent_header *eh, int32_t depth)
557 eh->entries_count = 0;
558 eh->max_entries_count = to_le16(ext4_ext_max_entries(inode_ref, depth));
559 eh->magic = to_le16(EXT4_EXTENT_MAGIC);
564 * Be cautious, the buffer_head returned is not yet mark dirtied. */
565 static int ext4_ext_split_node(struct ext4_inode_ref *inode_ref,
566 struct ext4_extent_path *path, int32_t at,
567 struct ext4_extent *newext,
568 ext4_fsblk_t *sibling, struct ext4_block *new_bh)
571 ext4_fsblk_t newblock;
572 struct ext4_block bh = EXT4_BLOCK_ZERO();
573 int32_t depth = ext_depth(inode_ref->inode);
575 ext4_assert(sibling);
577 /* FIXME: currently we split at the point after the current extent. */
578 newblock = ext4_ext_new_meta_block(inode_ref, path, newext, &ret, 0);
582 /* For write access.# */
583 ret = ext4_block_get(inode_ref->fs->bdev, &bh, newblock);
588 /* start copy from next extent */
589 ptrdiff_t m = EXT_MAX_EXTENT(path[at].header) - path[at].extent;
590 struct ext4_extent_header *neh;
591 neh = ext_block_hdr(&bh);
592 ext4_ext_init_header(inode_ref, neh, 0);
594 struct ext4_extent *ex;
595 ex = EXT_FIRST_EXTENT(neh);
596 memmove(ex, path[at].extent + 1,
597 sizeof(struct ext4_extent) * m);
599 to_le16(to_le16(neh->entries_count) + m);
600 path[at].header->entries_count = to_le16(
601 to_le16(path[at].header->entries_count) - m);
602 ret = ext4_ext_dirty(inode_ref, path + at);
607 ptrdiff_t m = EXT_MAX_INDEX(path[at].header) - path[at].index;
608 struct ext4_extent_header *neh;
609 neh = ext_block_hdr(&bh);
610 ext4_ext_init_header(inode_ref, neh, depth - at);
612 struct ext4_extent_index *ix;
613 ix = EXT_FIRST_INDEX(neh);
614 memmove(ix, path[at].index + 1,
615 sizeof(struct ext4_extent) * m);
617 to_le16(to_le16(neh->entries_count) + m);
618 path[at].header->entries_count = to_le16(
619 to_le16(path[at].header->entries_count) - m);
620 ret = ext4_ext_dirty(inode_ref, path + at);
628 ext4_block_set(inode_ref->fs->bdev, &bh);
631 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
640 static ext4_lblk_t ext4_ext_block_index(struct ext4_extent_header *eh)
643 return to_le32(EXT_FIRST_INDEX(eh)->first_block);
645 return to_le32(EXT_FIRST_EXTENT(eh)->first_block);
648 #define EXT_INODE_HDR_NEED_GROW 0x1
650 struct ext_split_trans {
652 struct ext4_extent_path path;
656 static int ext4_ext_insert_index(struct ext4_inode_ref *inode_ref,
657 struct ext4_extent_path *path, int32_t at,
658 struct ext4_extent *newext,
659 ext4_lblk_t insert_index,
660 ext4_fsblk_t insert_block,
661 struct ext_split_trans *spt)
663 struct ext4_extent_index *ix;
664 struct ext4_extent_path *curp = path + at;
665 struct ext4_block bh = EXT4_BLOCK_ZERO();
668 struct ext4_extent_header *eh;
670 if (curp->index && insert_index == to_le32(curp->index->first_block))
673 if (to_le16(curp->header->entries_count) ==
674 to_le16(curp->header->max_entries_count)) {
676 struct ext4_extent_header *neh;
677 err = ext4_ext_split_node(inode_ref, path, at, newext,
682 neh = ext_block_hdr(&bh);
683 if (insert_index > to_le32(curp->index->first_block)) {
684 /* Make decision which node should be used to
685 * insert the index.*/
686 if (to_le16(neh->entries_count) >
687 to_le16(curp->header->entries_count)) {
690 ix = EXT_LAST_INDEX(eh) + 1;
693 ix = EXT_FIRST_INDEX(eh);
698 ix = EXT_LAST_INDEX(eh);
701 err = EXT_INODE_HDR_NEED_GROW;
706 if (curp->index == NULL) {
707 ix = EXT_FIRST_INDEX(eh);
709 } else if (insert_index > to_le32(curp->index->first_block)) {
711 ix = curp->index + 1;
718 len = EXT_LAST_INDEX(eh) - ix + 1;
719 ext4_assert(len >= 0);
721 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_index));
723 if (ix > EXT_MAX_INDEX(eh)) {
728 ix->first_block = to_le32(insert_index);
729 ext4_idx_store_pblock(ix, insert_block);
730 eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
732 if (ix > EXT_LAST_INDEX(eh)) {
737 if (eh == curp->header)
738 err = ext4_ext_dirty(inode_ref, curp);
745 ext4_block_set(inode_ref->fs->bdev, &bh);
748 } else if (bh.lb_id) {
749 /* If we got a sibling leaf. */
752 spt->path.p_block = ext4_idx_pblock(ix);
753 spt->path.depth = to_le16(eh->depth);
754 spt->path.maxdepth = 0;
755 spt->path.extent = NULL;
756 spt->path.index = ix;
757 spt->path.header = eh;
758 spt->path.block = bh;
761 * If newext->ee_block can be included into the
764 if (to_le32(newext->first_block) >=
765 ext4_ext_block_index(ext_block_hdr(&bh)))
769 curp->p_block = ext4_idx_pblock(ix);
775 curp->p_block = ext4_idx_pblock(ix);
781 * ext4_ext_correct_indexes:
782 * if leaf gets modified and modified extent is first in the leaf,
783 * then we have to correct all indexes above.
785 static int ext4_ext_correct_indexes(struct ext4_inode_ref *inode_ref,
786 struct ext4_extent_path *path)
788 struct ext4_extent_header *eh;
789 int32_t depth = ext_depth(inode_ref->inode);
790 struct ext4_extent *ex;
795 eh = path[depth].header;
796 ex = path[depth].extent;
798 if (ex == NULL || eh == NULL) {
803 /* there is no tree at all */
807 if (ex != EXT_FIRST_EXTENT(eh)) {
808 /* we correct tree if first leaf got modified only */
813 * TODO: we need correction if border is smaller than current one
816 border = path[depth].extent->first_block;
817 path[k].index->first_block = border;
818 err = ext4_ext_dirty(inode_ref, path + k);
823 /* change all left-side indexes */
824 if (path[k + 1].index != EXT_FIRST_INDEX(path[k + 1].header))
826 path[k].index->first_block = border;
827 err = ext4_ext_dirty(inode_ref, path + k);
835 static bool ext4_ext_can_prepend(struct ext4_extent *ex1,
836 struct ext4_extent *ex2)
838 if (ext4_ext_pblock(ex2) + ext4_ext_get_actual_len(ex2) !=
839 ext4_ext_pblock(ex1))
842 if (ext4_ext_is_unwritten(ex1)) {
843 if (ext4_ext_get_actual_len(ex1) +
844 ext4_ext_get_actual_len(ex2) >
845 EXT_UNWRITTEN_MAX_LEN)
847 } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
851 if (to_le32(ex2->first_block) + ext4_ext_get_actual_len(ex2) !=
852 to_le32(ex1->first_block))
858 static bool ext4_ext_can_append(struct ext4_extent *ex1,
859 struct ext4_extent *ex2)
861 if (ext4_ext_pblock(ex1) + ext4_ext_get_actual_len(ex1) !=
862 ext4_ext_pblock(ex2))
865 if (ext4_ext_is_unwritten(ex1)) {
866 if (ext4_ext_get_actual_len(ex1) +
867 ext4_ext_get_actual_len(ex2) >
868 EXT_UNWRITTEN_MAX_LEN)
870 } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
874 if (to_le32(ex1->first_block) + ext4_ext_get_actual_len(ex1) !=
875 to_le32(ex2->first_block))
881 static int ext4_ext_insert_leaf(struct ext4_inode_ref *inode_ref,
882 struct ext4_extent_path *path, int32_t at,
883 struct ext4_extent *newext,
884 struct ext_split_trans *spt, uint32_t flags)
886 struct ext4_extent_path *curp = path + at;
887 struct ext4_extent *ex = curp->extent;
888 struct ext4_block bh = EXT4_BLOCK_ZERO();
892 struct ext4_extent_header *eh = NULL;
895 to_le32(newext->first_block) == to_le32(curp->extent->first_block))
898 if (!(flags & EXT4_EXT_NO_COMBINE)) {
899 if (curp->extent && ext4_ext_can_append(curp->extent, newext)) {
900 unwritten = ext4_ext_is_unwritten(curp->extent);
901 curp->extent->block_count =
902 to_le16(ext4_ext_get_actual_len(curp->extent) +
903 ext4_ext_get_actual_len(newext));
905 ext4_ext_mark_unwritten(curp->extent);
906 err = ext4_ext_dirty(inode_ref, curp);
911 ext4_ext_can_prepend(curp->extent, newext)) {
912 unwritten = ext4_ext_is_unwritten(curp->extent);
913 curp->extent->first_block = newext->first_block;
914 curp->extent->block_count =
915 to_le16(ext4_ext_get_actual_len(curp->extent) +
916 ext4_ext_get_actual_len(newext));
918 ext4_ext_mark_unwritten(curp->extent);
919 err = ext4_ext_dirty(inode_ref, curp);
924 if (to_le16(curp->header->entries_count) ==
925 to_le16(curp->header->max_entries_count)) {
927 struct ext4_extent_header *neh;
928 err = ext4_ext_split_node(inode_ref, path, at, newext,
933 neh = ext_block_hdr(&bh);
934 if (to_le32(newext->first_block) >
935 to_le32(curp->extent->first_block)) {
936 if (to_le16(neh->entries_count) >
937 to_le16(curp->header->entries_count)) {
940 ex = EXT_LAST_EXTENT(eh) + 1;
943 ex = EXT_FIRST_EXTENT(eh);
948 ex = EXT_LAST_EXTENT(eh);
951 err = EXT_INODE_HDR_NEED_GROW;
956 if (curp->extent == NULL) {
957 ex = EXT_FIRST_EXTENT(eh);
959 } else if (to_le32(newext->first_block) >
960 to_le32(curp->extent->first_block)) {
962 ex = curp->extent + 1;
969 len = EXT_LAST_EXTENT(eh) - ex + 1;
970 ext4_assert(len >= 0);
972 memmove(ex + 1, ex, len * sizeof(struct ext4_extent));
974 if (ex > EXT_MAX_EXTENT(eh)) {
979 ex->first_block = newext->first_block;
980 ex->block_count = newext->block_count;
981 ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
982 eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
984 if (ex > EXT_LAST_EXTENT(eh)) {
989 if (eh == curp->header) {
990 err = ext4_ext_correct_indexes(inode_ref, path);
993 err = ext4_ext_dirty(inode_ref, curp);
1000 ext4_block_set(inode_ref->fs->bdev, &bh);
1003 } else if (bh.lb_id) {
1004 /* If we got a sibling leaf. */
1007 spt->path.p_block = ext4_ext_pblock(ex);
1008 spt->path.depth = to_le16(eh->depth);
1009 spt->path.maxdepth = 0;
1010 spt->path.extent = ex;
1011 spt->path.index = NULL;
1012 spt->path.header = eh;
1013 spt->path.block = bh;
1016 * If newext->ee_block can be included into the
1019 if (to_le32(newext->first_block) >=
1020 ext4_ext_block_index(ext_block_hdr(&bh)))
1024 curp->p_block = ext4_ext_pblock(ex);
1030 curp->p_block = ext4_ext_pblock(ex);
1037 * ext4_ext_grow_indepth:
1038 * implements tree growing procedure:
1039 * - allocates new block
1040 * - moves top-level data (index block or leaf) into the new block
1041 * - initializes new top-level, creating index that points to the
1042 * just created block
1044 static int ext4_ext_grow_indepth(struct ext4_inode_ref *inode_ref,
1047 struct ext4_extent_header *neh;
1048 struct ext4_block bh = EXT4_BLOCK_ZERO();
1049 ext4_fsblk_t newblock, goal = 0;
1052 /* Try to prepend new index to old one */
1053 if (ext_depth(inode_ref->inode))
1054 goal = ext4_idx_pblock(
1055 EXT_FIRST_INDEX(ext_inode_hdr(inode_ref->inode)));
1057 goal = ext4_fs_inode_to_goal_block(inode_ref);
1059 newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, &err);
1064 err = ext4_block_get(inode_ref->fs->bdev, &bh, newblock);
1066 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
1070 /* move top-level index/leaf into new block */
1071 memmove(bh.data, inode_ref->inode->blocks,
1072 sizeof(inode_ref->inode->blocks));
1074 /* set size of new block */
1075 neh = ext_block_hdr(&bh);
1076 /* old root could have indexes or leaves
1077 * so calculate e_max right way */
1078 if (ext_depth(inode_ref->inode))
1079 neh->max_entries_count =
1080 to_le16(ext4_ext_space_block_idx(inode_ref));
1082 neh->max_entries_count =
1083 to_le16(ext4_ext_space_block(inode_ref));
1085 neh->magic = to_le16(EXT4_EXTENT_MAGIC);
1086 ext4_extent_block_csum_set(inode_ref, neh);
1088 /* Update top-level index: num,max,pointer */
1089 neh = ext_inode_hdr(inode_ref->inode);
1090 neh->entries_count = to_le16(1);
1091 ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1092 if (neh->depth == 0) {
1093 /* Root extent block becomes index block */
1094 neh->max_entries_count =
1095 to_le16(ext4_ext_space_root_idx(inode_ref));
1096 EXT_FIRST_INDEX(neh)
1097 ->first_block = EXT_FIRST_EXTENT(neh)->first_block;
1099 neh->depth = to_le16(to_le16(neh->depth) + 1);
1102 inode_ref->dirty = true;
1103 ext4_block_set(inode_ref->fs->bdev, &bh);
1108 __unused static void print_path(struct ext4_extent_path *path)
1110 int32_t i = path->depth;
1115 ? (path->extent - EXT_FIRST_EXTENT(path->header))
1119 ? (path->index - EXT_FIRST_INDEX(path->header))
1124 ext4_dbg(DEBUG_EXTENT,
1125 "depth %" PRId32 ", p_block: %" PRIu64 ","
1126 "p_ext offset: %td, p_idx offset: %td\n",
1127 i, path->p_block, a, b);
1133 static void ext4_ext_replace_path(struct ext4_inode_ref *inode_ref,
1134 struct ext4_extent_path *path,
1135 struct ext_split_trans *spt, int32_t depth,
1138 int32_t i = depth - level;
1140 ext4_ext_drop_refs(inode_ref, path + i, 1);
1141 path[i] = spt->path;
1144 static int ext4_ext_insert_extent(struct ext4_inode_ref *inode_ref,
1145 struct ext4_extent_path **ppath,
1146 struct ext4_extent *newext, uint32_t flags)
1148 int32_t i, depth, level;
1150 ext4_fsblk_t ptr = 0;
1151 struct ext4_extent_path *path = *ppath;
1152 struct ext_split_trans *spt = NULL;
1153 struct ext_split_trans newblock;
1155 memset(&newblock, 0, sizeof(newblock));
1157 depth = ext_depth(inode_ref->inode);
1158 for (i = depth, level = 0; i >= 0; i--, level++)
1159 if (EXT_HAS_FREE_INDEX(path + i))
1163 spt = calloc(1, sizeof(struct ext_split_trans) * (level));
1171 depth = ext_depth(inode_ref->inode);
1175 ret = ext4_ext_insert_leaf(inode_ref, path, depth - i,
1176 newext, &newblock, flags);
1178 ret = ext4_ext_insert_index(
1179 inode_ref, path, depth - i, newext,
1180 ext4_ext_block_index(
1181 ext_block_hdr(&spt[i - 1].path.block)),
1182 spt[i - 1].ptr, &newblock);
1186 if (ret && ret != EXT_INODE_HDR_NEED_GROW)
1188 else if (spt && ptr && !ret) {
1189 /* Prepare for the next iteration after splitting. */
1194 } while (ptr != 0 && i <= depth);
1196 if (ret == EXT_INODE_HDR_NEED_GROW) {
1197 ret = ext4_ext_grow_indepth(inode_ref, 0);
1200 ret = ext4_find_extent(inode_ref, to_le32(newext->first_block),
1211 ext4_ext_drop_refs(inode_ref, path, 0);
1213 while (--level >= 0 && spt) {
1214 if (spt[level].ptr) {
1215 ext4_ext_free_blocks(inode_ref, spt[level].ptr,
1217 ext4_ext_drop_refs(inode_ref, &spt[level].path,
1222 while (--level >= 0 && spt) {
1223 if (spt[level].switch_to)
1224 ext4_ext_replace_path(inode_ref, path, spt,
1226 else if (spt[level].ptr)
1227 ext4_ext_drop_refs(inode_ref, &spt[level].path,
1237 static void ext4_ext_remove_blocks(struct ext4_inode_ref *inode_ref,
1238 struct ext4_extent *ex, ext4_lblk_t from,
1241 ext4_lblk_t len = to - from + 1;
1244 num = from - to_le32(ex->first_block);
1245 start = ext4_ext_pblock(ex) + num;
1246 ext4_dbg(DEBUG_EXTENT,
1247 "Freeing %" PRIu32 " at %" PRIu64 ", %" PRIu32 "\n", from,
1250 ext4_ext_free_blocks(inode_ref, start, len, 0);
1253 static int ext4_ext_remove_idx(struct ext4_inode_ref *inode_ref,
1254 struct ext4_extent_path *path, int32_t depth)
1260 /* free index block */
1261 leaf = ext4_idx_pblock(path[i].index);
1263 if (path[i].index != EXT_LAST_INDEX(path[i].header)) {
1264 ptrdiff_t len = EXT_LAST_INDEX(path[i].header) - path[i].index;
1265 memmove(path[i].index, path[i].index + 1,
1266 len * sizeof(struct ext4_extent_index));
1269 path[i].header->entries_count =
1270 to_le16(to_le16(path[i].header->entries_count) - 1);
1271 err = ext4_ext_dirty(inode_ref, path + i);
1275 ext4_dbg(DEBUG_EXTENT, "IDX: Freeing %" PRIu32 " at %" PRIu64 ", %d\n",
1276 to_le32(path[i].index->first_block), leaf, 1);
1277 ext4_ext_free_blocks(inode_ref, leaf, 1, 0);
1280 if (path[i].index != EXT_FIRST_INDEX(path[i].header))
1283 path[i - 1].index->first_block = path[i].index->first_block;
1284 err = ext4_ext_dirty(inode_ref, path + i - 1);
1293 static int ext4_ext_remove_leaf(struct ext4_inode_ref *inode_ref,
1294 struct ext4_extent_path *path, ext4_lblk_t from,
1298 int32_t depth = ext_depth(inode_ref->inode);
1299 struct ext4_extent *ex = path[depth].extent;
1300 struct ext4_extent *start_ex, *ex2 = NULL;
1301 struct ext4_extent_header *eh = path[depth].header;
1304 uint16_t new_entries;
1307 new_entries = to_le16(eh->entries_count);
1308 while (ex <= EXT_LAST_EXTENT(path[depth].header) &&
1309 to_le32(ex->first_block) <= to) {
1310 int32_t new_len = 0;
1312 ext4_fsblk_t start, new_start;
1313 new_start = start = to_le32(ex->first_block);
1314 len = ext4_ext_get_actual_len(ex);
1317 len -= from - start;
1318 new_len = from - start;
1321 if (start + len - 1 > to) {
1322 len -= start + len - 1 - to;
1323 new_len = start + len - 1 - to;
1324 new_start += to + 1;
1328 ext4_ext_remove_blocks(inode_ref, ex, start, start + len - 1);
1329 ex->first_block = to_le32(new_start);
1333 unwritten = ext4_ext_is_unwritten(ex);
1334 ex->block_count = to_le16(new_len);
1336 ext4_ext_mark_unwritten(ex);
1345 if (ex2 <= EXT_LAST_EXTENT(eh))
1346 memmove(start_ex, ex2, EXT_LAST_EXTENT(eh) - ex2 + 1);
1348 eh->entries_count = to_le16(new_entries);
1349 ext4_ext_dirty(inode_ref, path + depth);
1350 if (path[depth].extent == EXT_FIRST_EXTENT(eh) && eh->entries_count)
1351 err = ext4_ext_correct_indexes(inode_ref, path);
1353 /* if this leaf is free, then we should
1354 * remove it from index block above */
1355 if (err == EOK && eh->entries_count == 0 && path[depth].block.lb_id)
1356 err = ext4_ext_remove_idx(inode_ref, path, depth - 1);
1361 static int ext4_ext_more_to_rm(struct ext4_extent_path *path, ext4_lblk_t to)
1363 if (!to_le16(path->header->entries_count))
1366 if (path->index > EXT_LAST_INDEX(path->header))
1369 if (to_le32(path->index->first_block) > to)
1375 int ext4_extent_remove_space(struct ext4_inode_ref *inode_ref, ext4_lblk_t from,
1378 struct ext4_extent_path *path = NULL;
1380 int32_t depth = ext_depth(inode_ref->inode);
1383 ret = ext4_find_extent(inode_ref, from, &path, 0);
1387 if (!path[depth].extent ||
1388 !IN_RANGE(from, to_le32(path[depth].extent->first_block),
1389 ext4_ext_get_actual_len(path[depth].extent))) {
1397 struct ext4_extent_header *eh;
1398 struct ext4_extent *first_ex, *last_ex;
1399 ext4_lblk_t leaf_from, leaf_to;
1400 eh = path[i].header;
1401 ext4_assert(to_le16(eh->entries_count) > 0);
1402 first_ex = EXT_FIRST_EXTENT(eh);
1403 last_ex = EXT_LAST_EXTENT(eh);
1404 leaf_from = to_le32(first_ex->first_block);
1405 leaf_to = to_le32(last_ex->first_block) +
1406 ext4_ext_get_actual_len(last_ex) - 1;
1407 if (leaf_from < from)
1413 ext4_ext_remove_leaf(inode_ref, path, leaf_from,
1415 ext4_ext_drop_refs(inode_ref, path + i, 0);
1419 struct ext4_extent_header *eh;
1420 eh = path[i].header;
1421 if (ext4_ext_more_to_rm(path + i, to)) {
1422 struct ext4_block bh = EXT4_BLOCK_ZERO();
1423 if (path[i + 1].block.lb_id)
1424 ext4_ext_drop_refs(inode_ref,
1427 ret = read_extent_tree_block(
1428 inode_ref, ext4_idx_pblock(path[i].index),
1429 depth - i - 1, &bh, 0);
1434 ext4_idx_pblock(path[i].index);
1435 path[i + 1].block = bh;
1436 path[i + 1].header = ext_block_hdr(&bh);
1437 path[i + 1].depth = depth - i - 1;
1439 path[i + 1].extent = EXT_FIRST_EXTENT(
1440 path[i + 1].header);
1443 EXT_FIRST_INDEX(path[i + 1].header);
1447 if (!eh->entries_count && i > 0) {
1449 ret = ext4_ext_remove_idx(inode_ref,
1453 ext4_block_set(inode_ref->fs->bdev,
1461 /* TODO: flexible tree reduction should be here */
1462 if (path->header->entries_count == 0) {
1464 * truncate to zero freed all the tree,
1465 * so we need to correct eh_depth
1467 ext_inode_hdr(inode_ref->inode)->depth = 0;
1468 ext_inode_hdr(inode_ref->inode)->max_entries_count =
1469 to_le16(ext4_ext_space_root(inode_ref));
1470 ret = ext4_ext_dirty(inode_ref, path);
1474 ext4_ext_drop_refs(inode_ref, path, 0);
1480 static int ext4_ext_split_extent_at(struct ext4_inode_ref *inode_ref,
1481 struct ext4_extent_path **ppath,
1482 ext4_lblk_t split, uint32_t split_flag)
1484 struct ext4_extent *ex, newex;
1485 ext4_fsblk_t newblock;
1486 ext4_lblk_t ee_block;
1488 int32_t depth = ext_depth(inode_ref->inode);
1491 ex = (*ppath)[depth].extent;
1492 ee_block = to_le32(ex->first_block);
1493 ee_len = ext4_ext_get_actual_len(ex);
1494 newblock = split - ee_block + ext4_ext_pblock(ex);
1496 if (split == ee_block) {
1498 * case b: block @split is the block that the extent begins with
1499 * then we just change the state of the extent, and splitting
1502 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1503 ext4_ext_mark_unwritten(ex);
1505 ext4_ext_mark_initialized(ex);
1507 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1511 ex->block_count = to_le16(split - ee_block);
1512 if (split_flag & EXT4_EXT_MARK_UNWRIT1)
1513 ext4_ext_mark_unwritten(ex);
1515 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1519 newex.first_block = to_le32(split);
1520 newex.block_count = to_le16(ee_len - (split - ee_block));
1521 ext4_ext_store_pblock(&newex, newblock);
1522 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1523 ext4_ext_mark_unwritten(&newex);
1524 err = ext4_ext_insert_extent(inode_ref, ppath, &newex,
1525 EXT4_EXT_NO_COMBINE);
1527 goto restore_extent_len;
1532 ex->block_count = to_le16(ee_len);
1533 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1537 static int ext4_ext_convert_to_initialized(struct ext4_inode_ref *inode_ref,
1538 struct ext4_extent_path **ppath,
1539 ext4_lblk_t split, uint32_t blocks)
1541 int32_t depth = ext_depth(inode_ref->inode), err = EOK;
1542 struct ext4_extent *ex = (*ppath)[depth].extent;
1544 ext4_assert(to_le32(ex->first_block) <= split);
1546 if (split + blocks ==
1547 to_le32(ex->first_block) + ext4_ext_get_actual_len(ex)) {
1548 /* split and initialize right part */
1549 err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1550 EXT4_EXT_MARK_UNWRIT1);
1551 } else if (to_le32(ex->first_block) == split) {
1552 /* split and initialize left part */
1553 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1554 EXT4_EXT_MARK_UNWRIT2);
1556 /* split 1 extent to 3 and initialize the 2nd */
1557 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1558 EXT4_EXT_MARK_UNWRIT1 |
1559 EXT4_EXT_MARK_UNWRIT2);
1561 err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1562 EXT4_EXT_MARK_UNWRIT1);
1570 * ext4_ext_next_allocated_block:
1571 * returns allocated block in subsequent extent or EXT_MAX_BLOCKS.
1572 * NOTE: it considers block number from index entry as
1573 * allocated block. Thus, index entries have to be consistent
1576 #define EXT_MAX_BLOCKS (ext4_lblk_t) - 1
1578 static ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_extent_path *path)
1582 depth = path->depth;
1584 if (depth == 0 && path->extent == NULL)
1585 return EXT_MAX_BLOCKS;
1587 while (depth >= 0) {
1588 if (depth == path->depth) {
1590 if (path[depth].extent &&
1591 path[depth].extent !=
1592 EXT_LAST_EXTENT(path[depth].header))
1594 path[depth].extent[1].first_block);
1597 if (path[depth].index !=
1598 EXT_LAST_INDEX(path[depth].header))
1600 path[depth].index[1].first_block);
1605 return EXT_MAX_BLOCKS;
1608 static int ext4_ext_zero_unwritten_range(struct ext4_inode_ref *inode_ref,
1610 uint32_t blocks_count)
1614 uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
1615 for (i = 0; i < blocks_count; i++) {
1616 uint32_t block_u32 = (uint32_t)block + (uint32_t)i;
1617 struct ext4_block bh = EXT4_BLOCK_ZERO();
1618 err = ext4_block_get(inode_ref->fs->bdev, &bh, block_u32);
1622 memset(bh.data, 0, block_size);
1624 err = ext4_block_set(inode_ref->fs->bdev, &bh);
1631 int ext4_extent_get_blocks(struct ext4_inode_ref *inode_ref, ext4_fsblk_t iblock,
1632 uint32_t max_blocks, ext4_fsblk_t *result, bool create,
1633 uint32_t *blocks_count)
1635 struct ext4_extent_path *path = NULL;
1636 struct ext4_extent newex, *ex;
1640 uint32_t allocated = 0;
1641 ext4_fsblk_t next, newblock;
1649 /* find extent for this block */
1650 err = ext4_find_extent(inode_ref, iblock, &path, 0);
1656 depth = ext_depth(inode_ref->inode);
1659 * consistent leaf must not be empty
1660 * this situations is possible, though, _during_ tree modification
1661 * this is why assert can't be put in ext4_ext_find_extent()
1663 if ((ex = path[depth].extent)) {
1664 ext4_lblk_t ee_block = to_le32(ex->first_block);
1665 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
1666 uint16_t ee_len = ext4_ext_get_actual_len(ex);
1667 /* if found exent covers block, simple return it */
1668 if (IN_RANGE(iblock, ee_block, ee_len)) {
1669 /* number of remain blocks in the extent */
1670 allocated = ee_len - (iblock - ee_block);
1672 if (ext4_ext_is_unwritten(ex)) {
1674 uint32_t zero_range;
1675 zero_range = allocated;
1676 if (zero_range > max_blocks)
1677 zero_range = max_blocks;
1679 newblock = iblock - ee_block + ee_start;
1680 err = ext4_ext_zero_unwritten_range(
1681 inode_ref, newblock, zero_range);
1685 err = ext4_ext_convert_to_initialized(
1686 inode_ref, &path, iblock,
1695 newblock = iblock - ee_block + ee_start;
1702 * requested block isn't allocated yet
1703 * we couldn't try to create block if create flag is zero
1709 /* find next allocated block so that we know how many
1710 * blocks we can allocate without ovelapping next extent */
1711 next = ext4_ext_next_allocated_block(path);
1712 allocated = next - iblock;
1713 if (allocated > max_blocks)
1714 allocated = max_blocks;
1716 /* allocate new block */
1717 goal = ext4_ext_find_goal(inode_ref, path, iblock);
1718 newblock = ext4_new_meta_blocks(inode_ref, goal, 0, &allocated, &err);
1722 /* try to insert new extent into found leaf and return */
1723 newex.first_block = to_le32(iblock);
1724 ext4_ext_store_pblock(&newex, newblock);
1725 newex.block_count = to_le16(allocated);
1726 err = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1728 /* free data blocks we just allocated */
1729 ext4_ext_free_blocks(inode_ref, ext4_ext_pblock(&newex),
1730 to_le16(newex.block_count), 0);
1734 /* previous routine could use block we allocated */
1735 newblock = ext4_ext_pblock(&newex);
1738 if (allocated > max_blocks)
1739 allocated = max_blocks;
1745 *blocks_count = allocated;
1749 ext4_ext_drop_refs(inode_ref, path, 0);