2 * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3 * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include "ext4_config.h"
30 #include "ext4_types.h"
31 #include "ext4_misc.h"
32 #include "ext4_errno.h"
33 #include "ext4_debug.h"
35 #include "ext4_blockdev.h"
36 #include "ext4_trans.h"
38 #include "ext4_super.h"
39 #include "ext4_crc32.h"
40 #include "ext4_balloc.h"
41 #include "ext4_extent.h"
49 * used by extent splitting.
51 #define EXT4_EXT_MARK_UNWRIT1 0x02 /* mark first half unwritten */
52 #define EXT4_EXT_MARK_UNWRIT2 0x04 /* mark second half unwritten */
53 #define EXT4_EXT_DATA_VALID1 0x08 /* first half contains valid data */
54 #define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */
55 #define EXT4_EXT_NO_COMBINE 0x20 /* do not combine two extents */
57 static struct ext4_extent_tail *
58 find_ext4_extent_tail(struct ext4_extent_header *eh)
60 return (struct ext4_extent_tail *)(((char *)eh) +
61 EXT4_EXTENT_TAIL_OFFSET(eh));
64 static struct ext4_extent_header *ext_inode_hdr(struct ext4_inode *inode)
66 return (struct ext4_extent_header *)inode->blocks;
69 static struct ext4_extent_header *ext_block_hdr(struct ext4_block *block)
71 return (struct ext4_extent_header *)block->data;
74 static uint16_t ext_depth(struct ext4_inode *inode)
76 return to_le16(ext_inode_hdr(inode)->depth);
79 static uint16_t ext4_ext_get_actual_len(struct ext4_extent *ext)
81 return (to_le16(ext->block_count) <= EXT_INIT_MAX_LEN
82 ? to_le16(ext->block_count)
83 : (to_le16(ext->block_count) - EXT_INIT_MAX_LEN));
86 static void ext4_ext_mark_initialized(struct ext4_extent *ext)
88 ext->block_count = to_le16(ext4_ext_get_actual_len(ext));
91 static void ext4_ext_mark_unwritten(struct ext4_extent *ext)
93 ext->block_count |= to_le16(EXT_INIT_MAX_LEN);
96 static int ext4_ext_is_unwritten(struct ext4_extent *ext)
98 /* Extent with ee_len of 0x8000 is treated as an initialized extent */
99 return (to_le16(ext->block_count) > EXT_INIT_MAX_LEN);
104 * combine low and high parts of physical block number into ext4_fsblk_t
106 static ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
110 block = to_le32(ex->start_lo);
111 block |= ((ext4_fsblk_t)to_le16(ex->start_hi) << 31) << 1;
117 * combine low and high parts of a leaf physical block number into ext4_fsblk_t
119 static ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_index *ix)
123 block = to_le32(ix->leaf_lo);
124 block |= ((ext4_fsblk_t)to_le16(ix->leaf_hi) << 31) << 1;
129 * ext4_ext_store_pblock:
130 * stores a large physical block number into an extent struct,
131 * breaking it into parts
133 static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
135 ex->start_lo = to_le32((uint32_t)(pb & 0xffffffff));
136 ex->start_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
140 * ext4_idx_store_pblock:
141 * stores a large physical block number into an index struct,
142 * breaking it into parts
144 static void ext4_idx_store_pblock(struct ext4_extent_index *ix, ext4_fsblk_t pb)
146 ix->leaf_lo = to_le32((uint32_t)(pb & 0xffffffff));
147 ix->leaf_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
150 static int ext4_allocate_single_block(struct ext4_inode_ref *inode_ref,
151 ext4_fsblk_t goal, ext4_fsblk_t *blockp)
153 return ext4_balloc_alloc_block(inode_ref, goal, blockp);
156 static ext4_fsblk_t ext4_new_meta_blocks(struct ext4_inode_ref *inode_ref,
158 uint32_t flags __unused,
159 uint32_t *count, int *errp)
161 ext4_fsblk_t block = 0;
163 *errp = ext4_allocate_single_block(inode_ref, goal, &block);
169 static void ext4_ext_free_blocks(struct ext4_inode_ref *inode_ref,
170 ext4_fsblk_t block, uint32_t count,
171 uint32_t flags __unused)
173 ext4_balloc_free_blocks(inode_ref, block, count);
176 static uint16_t ext4_ext_space_block(struct ext4_inode_ref *inode_ref)
179 uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
181 size = (block_size - sizeof(struct ext4_extent_header)) /
182 sizeof(struct ext4_extent);
183 #ifdef AGGRESSIVE_TEST
190 static uint16_t ext4_ext_space_block_idx(struct ext4_inode_ref *inode_ref)
193 uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
195 size = (block_size - sizeof(struct ext4_extent_header)) /
196 sizeof(struct ext4_extent_index);
197 #ifdef AGGRESSIVE_TEST
204 static uint16_t ext4_ext_space_root(struct ext4_inode_ref *inode_ref)
208 size = sizeof(inode_ref->inode->blocks);
209 size -= sizeof(struct ext4_extent_header);
210 size /= sizeof(struct ext4_extent);
211 #ifdef AGGRESSIVE_TEST
218 static uint16_t ext4_ext_space_root_idx(struct ext4_inode_ref *inode_ref)
222 size = sizeof(inode_ref->inode->blocks);
223 size -= sizeof(struct ext4_extent_header);
224 size /= sizeof(struct ext4_extent_index);
225 #ifdef AGGRESSIVE_TEST
232 static uint16_t ext4_ext_max_entries(struct ext4_inode_ref *inode_ref,
237 if (depth == ext_depth(inode_ref->inode)) {
239 max = ext4_ext_space_root(inode_ref);
241 max = ext4_ext_space_root_idx(inode_ref);
244 max = ext4_ext_space_block(inode_ref);
246 max = ext4_ext_space_block_idx(inode_ref);
252 static ext4_fsblk_t ext4_ext_find_goal(struct ext4_inode_ref *inode_ref,
253 struct ext4_extent_path *path,
257 uint32_t depth = path->depth;
258 struct ext4_extent *ex;
261 * Try to predict block placement assuming that we are
262 * filling in a file which will eventually be
263 * non-sparse --- i.e., in the case of libbfd writing
264 * an ELF object sections out-of-order but in a way
265 * the eventually results in a contiguous object or
266 * executable file, or some database extending a table
267 * space file. However, this is actually somewhat
268 * non-ideal if we are writing a sparse file such as
269 * qemu or KVM writing a raw image file that is going
270 * to stay fairly sparse, since it will end up
271 * fragmenting the file system's free space. Maybe we
272 * should have some hueristics or some way to allow
273 * userspace to pass a hint to file system,
274 * especially if the latter case turns out to be
277 ex = path[depth].extent;
279 ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
280 ext4_lblk_t ext_block = to_le32(ex->first_block);
282 if (block > ext_block)
283 return ext_pblk + (block - ext_block);
285 return ext_pblk - (ext_block - block);
288 /* it looks like index is empty;
289 * try to find starting block from index itself */
290 if (path[depth].block.lb_id)
291 return path[depth].block.lb_id;
294 /* OK. use inode's group */
295 return ext4_fs_inode_to_goal_block(inode_ref);
299 * Allocation for a meta data block
301 static ext4_fsblk_t ext4_ext_new_meta_block(struct ext4_inode_ref *inode_ref,
302 struct ext4_extent_path *path,
303 struct ext4_extent *ex, int *err,
306 ext4_fsblk_t goal, newblock;
308 goal = ext4_ext_find_goal(inode_ref, path, to_le32(ex->first_block));
309 newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, err);
313 #if CONFIG_META_CSUM_ENABLE
314 static uint32_t ext4_ext_block_csum(struct ext4_inode_ref *inode_ref,
315 struct ext4_extent_header *eh)
317 uint32_t checksum = 0;
318 struct ext4_sblock *sb = &inode_ref->fs->sb;
320 if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
321 uint32_t ino_index = to_le32(inode_ref->index);
323 to_le32(ext4_inode_get_generation(inode_ref->inode));
324 /* First calculate crc32 checksum against fs uuid */
326 ext4_crc32c(EXT4_CRC32_INIT, sb->uuid, sizeof(sb->uuid));
327 /* Then calculate crc32 checksum against inode number
328 * and inode generation */
329 checksum = ext4_crc32c(checksum, &ino_index, sizeof(ino_index));
330 checksum = ext4_crc32c(checksum, &ino_gen, sizeof(ino_gen));
331 /* Finally calculate crc32 checksum against
332 * the entire extent block up to the checksum field */
334 ext4_crc32c(checksum, eh, EXT4_EXTENT_TAIL_OFFSET(eh));
339 #define ext4_ext_block_csum(...) 0
343 ext4_extent_block_csum_set(struct ext4_inode_ref *inode_ref __unused,
344 struct ext4_extent_header *eh)
346 struct ext4_extent_tail *tail;
348 tail = find_ext4_extent_tail(eh);
349 tail->et_checksum = to_le32(ext4_ext_block_csum(inode_ref, eh));
352 static int ext4_ext_dirty(struct ext4_inode_ref *inode_ref,
353 struct ext4_extent_path *path)
355 if (path->block.lb_id)
356 ext4_trans_set_block_dirty(path->block.buf);
358 inode_ref->dirty = true;
363 static void ext4_ext_drop_refs(struct ext4_inode_ref *inode_ref,
364 struct ext4_extent_path *path, bool keep_other)
375 for (i = 0; i <= depth; i++, path++) {
376 if (path->block.lb_id) {
377 if (ext4_bcache_test_flag(path->block.buf, BC_DIRTY))
378 ext4_extent_block_csum_set(inode_ref,
381 ext4_block_set(inode_ref->fs->bdev, &path->block);
387 * Check that whether the basic information inside the extent header
390 static int ext4_ext_check(struct ext4_inode_ref *inode_ref,
391 struct ext4_extent_header *eh, uint16_t depth,
392 ext4_fsblk_t pblk __unused)
394 struct ext4_extent_tail *tail;
395 struct ext4_sblock *sb = &inode_ref->fs->sb;
396 const char *error_msg;
399 if (to_le16(eh->magic) != EXT4_EXTENT_MAGIC) {
400 error_msg = "invalid magic";
403 if (to_le16(eh->depth) != depth) {
404 error_msg = "unexpected eh_depth";
407 if (eh->max_entries_count == 0) {
408 error_msg = "invalid eh_max";
411 if (to_le16(eh->entries_count) > to_le16(eh->max_entries_count)) {
412 error_msg = "invalid eh_entries";
416 tail = find_ext4_extent_tail(eh);
417 if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
418 if (tail->et_checksum !=
419 to_le32(ext4_ext_block_csum(inode_ref, eh))) {
420 ext4_dbg(DEBUG_EXTENT,
421 DBG_WARN "Extent block checksum failed."
422 "Blocknr: %" PRIu64 "\n",
430 ext4_dbg(DEBUG_EXTENT, "Bad extents B+ tree block: %s. "
431 "Blocknr: %" PRId64 "\n",
436 static int read_extent_tree_block(struct ext4_inode_ref *inode_ref,
437 ext4_fsblk_t pblk, int32_t depth,
438 struct ext4_block *bh,
439 uint32_t flags __unused)
443 err = ext4_trans_block_get(inode_ref->fs->bdev, bh, pblk);
447 err = ext4_ext_check(inode_ref, ext_block_hdr(bh), depth, pblk);
454 ext4_block_set(inode_ref->fs->bdev, bh);
460 * ext4_ext_binsearch_idx:
461 * binary search for the closest index of the given block
462 * the header must be checked before calling this
464 static void ext4_ext_binsearch_idx(struct ext4_extent_path *path,
467 struct ext4_extent_header *eh = path->header;
468 struct ext4_extent_index *r, *l, *m;
470 l = EXT_FIRST_INDEX(eh) + 1;
471 r = EXT_LAST_INDEX(eh);
474 if (block < to_le32(m->first_block))
484 * ext4_ext_binsearch:
485 * binary search for closest extent of the given block
486 * the header must be checked before calling this
488 static void ext4_ext_binsearch(struct ext4_extent_path *path, ext4_lblk_t block)
490 struct ext4_extent_header *eh = path->header;
491 struct ext4_extent *r, *l, *m;
493 if (eh->entries_count == 0) {
495 * this leaf is empty:
496 * we get such a leaf in split/add case
501 l = EXT_FIRST_EXTENT(eh) + 1;
502 r = EXT_LAST_EXTENT(eh);
506 if (block < to_le32(m->first_block))
512 path->extent = l - 1;
515 static int ext4_find_extent(struct ext4_inode_ref *inode_ref, ext4_lblk_t block,
516 struct ext4_extent_path **orig_path, uint32_t flags)
518 struct ext4_extent_header *eh;
519 struct ext4_block bh = EXT4_BLOCK_ZERO();
520 ext4_fsblk_t buf_block = 0;
521 struct ext4_extent_path *path = *orig_path;
522 int32_t depth, ppos = 0;
526 eh = ext_inode_hdr(inode_ref->inode);
527 depth = ext_depth(inode_ref->inode);
530 ext4_ext_drop_refs(inode_ref, path, 0);
531 if (depth > path[0].maxdepth) {
533 *orig_path = path = NULL;
537 int32_t path_depth = depth + 1;
538 /* account possible depth increase */
539 path = calloc(1, sizeof(struct ext4_extent_path) *
543 path[0].maxdepth = path_depth;
549 /* walk through the tree */
551 ext4_ext_binsearch_idx(path + ppos, block);
552 path[ppos].p_block = ext4_idx_pblock(path[ppos].index);
553 path[ppos].depth = i;
554 path[ppos].extent = NULL;
555 buf_block = path[ppos].p_block;
559 if (!path[ppos].block.lb_id ||
560 path[ppos].block.lb_id != buf_block) {
561 ret = read_extent_tree_block(inode_ref, buf_block, i,
567 ext4_block_set(inode_ref->fs->bdev, &bh);
572 eh = ext_block_hdr(&bh);
573 path[ppos].block = bh;
574 path[ppos].header = eh;
578 path[ppos].depth = i;
579 path[ppos].extent = NULL;
580 path[ppos].index = NULL;
583 ext4_ext_binsearch(path + ppos, block);
584 /* if not an empty leaf */
585 if (path[ppos].extent)
586 path[ppos].p_block = ext4_ext_pblock(path[ppos].extent);
594 ext4_ext_drop_refs(inode_ref, path, 0);
601 static void ext4_ext_init_header(struct ext4_inode_ref *inode_ref,
602 struct ext4_extent_header *eh, int32_t depth)
604 eh->entries_count = 0;
605 eh->max_entries_count = to_le16(ext4_ext_max_entries(inode_ref, depth));
606 eh->magic = to_le16(EXT4_EXTENT_MAGIC);
610 static int ext4_ext_insert_index(struct ext4_inode_ref *inode_ref,
611 struct ext4_extent_path *path, int at,
612 ext4_lblk_t insert_index,
613 ext4_fsblk_t insert_block, bool set_to_ix)
615 struct ext4_extent_index *ix;
616 struct ext4_extent_path *curp = path + at;
618 struct ext4_extent_header *eh;
620 if (curp->index && insert_index == to_le32(curp->index->first_block))
623 if (to_le16(curp->header->entries_count) ==
624 to_le16(curp->header->max_entries_count))
628 if (curp->index == NULL) {
629 ix = EXT_FIRST_INDEX(eh);
631 } else if (insert_index > to_le32(curp->index->first_block)) {
633 ix = curp->index + 1;
639 if (ix > EXT_MAX_INDEX(eh))
642 len = EXT_LAST_INDEX(eh) - ix + 1;
643 ext4_assert(len >= 0);
645 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_index));
647 ix->first_block = to_le32(insert_index);
648 ext4_idx_store_pblock(ix, insert_block);
649 eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
651 if (ix > EXT_LAST_INDEX(eh)) {
656 err = ext4_ext_dirty(inode_ref, curp);
659 if (err == EOK && set_to_ix) {
661 curp->p_block = ext4_idx_pblock(ix);
666 static int ext4_ext_split_node(struct ext4_inode_ref *inode_ref,
667 struct ext4_extent_path *path, int at,
668 struct ext4_extent *newext,
669 struct ext4_extent_path *npath,
670 bool *ins_right_leaf)
672 int i, npath_at, ret;
673 ext4_lblk_t insert_index;
674 ext4_fsblk_t newblock = 0;
675 int depth = ext_depth(inode_ref->inode);
676 npath_at = depth - at;
680 if (path[depth].extent != EXT_MAX_EXTENT(path[depth].header))
681 insert_index = path[depth].extent[1].first_block;
683 insert_index = newext->first_block;
685 for (i = depth; i >= at; i--, npath_at--) {
686 struct ext4_block bh = EXT4_BLOCK_ZERO();
688 /* FIXME: currently we split at the point after the current
691 ext4_ext_new_meta_block(inode_ref, path, newext, &ret, 0);
695 /* For write access.*/
696 ret = ext4_trans_block_get_noread(inode_ref->fs->bdev, &bh,
702 /* start copy from next extent */
703 int m = EXT_MAX_EXTENT(path[i].header) - path[i].extent;
704 struct ext4_extent_header *neh;
705 struct ext4_extent *ex;
706 neh = ext_block_hdr(&bh);
707 ex = EXT_FIRST_EXTENT(neh);
708 ext4_ext_init_header(inode_ref, neh, 0);
710 memmove(ex, path[i].extent + 1,
711 sizeof(struct ext4_extent) * m);
713 to_le16(to_le16(neh->entries_count) + m);
714 path[i].header->entries_count = to_le16(
715 to_le16(path[i].header->entries_count) - m);
716 ret = ext4_ext_dirty(inode_ref, path + i);
720 npath[npath_at].p_block = ext4_ext_pblock(ex);
721 npath[npath_at].extent = ex;
723 npath[npath_at].p_block = 0;
724 npath[npath_at].extent = NULL;
727 npath[npath_at].depth = to_le16(neh->depth);
728 npath[npath_at].maxdepth = 0;
729 npath[npath_at].index = NULL;
730 npath[npath_at].header = neh;
731 npath[npath_at].block = bh;
733 ext4_trans_set_block_dirty(bh.buf);
735 int m = EXT_MAX_INDEX(path[i].header) - path[i].index;
736 struct ext4_extent_header *neh;
737 struct ext4_extent_index *ix;
738 neh = ext_block_hdr(&bh);
739 ix = EXT_FIRST_INDEX(neh);
740 ext4_ext_init_header(inode_ref, neh, depth - i);
741 ix->first_block = to_le32(insert_index);
742 ext4_idx_store_pblock(ix,
743 npath[npath_at + 1].block.lb_id);
744 neh->entries_count = to_le16(1);
746 memmove(ix + 1, path[i].index + 1,
747 sizeof(struct ext4_extent) * m);
749 to_le16(to_le16(neh->entries_count) + m);
750 path[i].header->entries_count = to_le16(
751 to_le16(path[i].header->entries_count) - m);
752 ret = ext4_ext_dirty(inode_ref, path + i);
757 npath[npath_at].p_block = ext4_idx_pblock(ix);
758 npath[npath_at].depth = to_le16(neh->depth);
759 npath[npath_at].maxdepth = 0;
760 npath[npath_at].extent = NULL;
761 npath[npath_at].index = ix;
762 npath[npath_at].header = neh;
763 npath[npath_at].block = bh;
765 ext4_trans_set_block_dirty(bh.buf);
771 * If newext->first_block can be included into the
774 if (to_le32(newext->first_block) < insert_index)
775 *ins_right_leaf = false;
777 *ins_right_leaf = true;
779 ret = ext4_ext_insert_index(inode_ref, path, at - 1, insert_index,
780 npath[0].block.lb_id, *ins_right_leaf);
785 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
787 npath_at = depth - at;
788 while (npath_at >= 0) {
789 if (npath[npath_at].block.lb_id) {
790 newblock = npath[npath_at].block.lb_id;
791 ext4_block_set(inode_ref->fs->bdev,
792 &npath[npath_at].block);
793 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
794 memset(&npath[npath_at].block, 0,
795 sizeof(struct ext4_block));
804 * ext4_ext_correct_indexes:
805 * if leaf gets modified and modified extent is first in the leaf,
806 * then we have to correct all indexes above.
808 static int ext4_ext_correct_indexes(struct ext4_inode_ref *inode_ref,
809 struct ext4_extent_path *path)
811 struct ext4_extent_header *eh;
812 int32_t depth = ext_depth(inode_ref->inode);
813 struct ext4_extent *ex;
818 eh = path[depth].header;
819 ex = path[depth].extent;
821 if (ex == NULL || eh == NULL)
825 /* there is no tree at all */
829 if (ex != EXT_FIRST_EXTENT(eh)) {
830 /* we correct tree if first leaf got modified only */
835 border = path[depth].extent->first_block;
836 path[k].index->first_block = border;
837 err = ext4_ext_dirty(inode_ref, path + k);
842 /* change all left-side indexes */
843 if (path[k + 1].index != EXT_FIRST_INDEX(path[k + 1].header))
845 path[k].index->first_block = border;
846 err = ext4_ext_dirty(inode_ref, path + k);
854 static inline bool ext4_ext_can_prepend(struct ext4_extent *ex1,
855 struct ext4_extent *ex2)
857 if (ext4_ext_pblock(ex2) + ext4_ext_get_actual_len(ex2) !=
858 ext4_ext_pblock(ex1))
861 #ifdef AGGRESSIVE_TEST
862 if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
865 if (ext4_ext_is_unwritten(ex1)) {
866 if (ext4_ext_get_actual_len(ex1) +
867 ext4_ext_get_actual_len(ex2) >
868 EXT_UNWRITTEN_MAX_LEN)
870 } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
875 if (to_le32(ex2->first_block) + ext4_ext_get_actual_len(ex2) !=
876 to_le32(ex1->first_block))
882 static inline bool ext4_ext_can_append(struct ext4_extent *ex1,
883 struct ext4_extent *ex2)
885 if (ext4_ext_pblock(ex1) + ext4_ext_get_actual_len(ex1) !=
886 ext4_ext_pblock(ex2))
889 #ifdef AGGRESSIVE_TEST
890 if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
893 if (ext4_ext_is_unwritten(ex1)) {
894 if (ext4_ext_get_actual_len(ex1) +
895 ext4_ext_get_actual_len(ex2) >
896 EXT_UNWRITTEN_MAX_LEN)
898 } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
903 if (to_le32(ex1->first_block) + ext4_ext_get_actual_len(ex1) !=
904 to_le32(ex2->first_block))
910 static int ext4_ext_insert_leaf(struct ext4_inode_ref *inode_ref,
911 struct ext4_extent_path *path, int at,
912 struct ext4_extent *newext, int flags,
915 struct ext4_extent_path *curp = path + at;
916 struct ext4_extent *ex = curp->extent;
917 int len, err, unwritten;
918 struct ext4_extent_header *eh;
923 to_le32(newext->first_block) == to_le32(curp->extent->first_block))
926 if (!(flags & EXT4_EXT_NO_COMBINE)) {
927 if (curp->extent && ext4_ext_can_append(curp->extent, newext)) {
928 unwritten = ext4_ext_is_unwritten(curp->extent);
929 curp->extent->block_count =
930 to_le16(ext4_ext_get_actual_len(curp->extent) +
931 ext4_ext_get_actual_len(newext));
933 ext4_ext_mark_unwritten(curp->extent);
935 err = ext4_ext_dirty(inode_ref, curp);
940 ext4_ext_can_prepend(curp->extent, newext)) {
941 unwritten = ext4_ext_is_unwritten(curp->extent);
942 curp->extent->first_block = newext->first_block;
943 curp->extent->block_count =
944 to_le16(ext4_ext_get_actual_len(curp->extent) +
945 ext4_ext_get_actual_len(newext));
947 ext4_ext_mark_unwritten(curp->extent);
949 err = ext4_ext_dirty(inode_ref, curp);
954 if (to_le16(curp->header->entries_count) ==
955 to_le16(curp->header->max_entries_count)) {
961 if (curp->extent == NULL) {
962 ex = EXT_FIRST_EXTENT(eh);
964 } else if (to_le32(newext->first_block) >
965 to_le32(curp->extent->first_block)) {
967 ex = curp->extent + 1;
974 len = EXT_LAST_EXTENT(eh) - ex + 1;
975 ext4_assert(len >= 0);
977 memmove(ex + 1, ex, len * sizeof(struct ext4_extent));
979 if (ex > EXT_MAX_EXTENT(eh)) {
984 ex->first_block = newext->first_block;
985 ex->block_count = newext->block_count;
986 ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
987 eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
989 if (ex > EXT_LAST_EXTENT(eh)) {
994 err = ext4_ext_correct_indexes(inode_ref, path);
997 err = ext4_ext_dirty(inode_ref, curp);
1002 curp->p_block = ext4_ext_pblock(ex);
1009 * ext4_ext_grow_indepth:
1010 * implements tree growing procedure:
1011 * - allocates new block
1012 * - moves top-level data (index block or leaf) into the new block
1013 * - initializes new top-level, creating index that points to the
1014 * just created block
1016 static int ext4_ext_grow_indepth(struct ext4_inode_ref *inode_ref,
1019 struct ext4_extent_header *neh;
1020 struct ext4_block bh = EXT4_BLOCK_ZERO();
1021 ext4_fsblk_t newblock, goal = 0;
1024 /* Try to prepend new index to old one */
1025 if (ext_depth(inode_ref->inode))
1026 goal = ext4_idx_pblock(
1027 EXT_FIRST_INDEX(ext_inode_hdr(inode_ref->inode)));
1029 goal = ext4_fs_inode_to_goal_block(inode_ref);
1031 newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, &err);
1036 err = ext4_trans_block_get_noread(inode_ref->fs->bdev, &bh, newblock);
1038 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
1042 /* move top-level index/leaf into new block */
1043 memmove(bh.data, inode_ref->inode->blocks,
1044 sizeof(inode_ref->inode->blocks));
1046 /* set size of new block */
1047 neh = ext_block_hdr(&bh);
1048 /* old root could have indexes or leaves
1049 * so calculate e_max right way */
1050 if (ext_depth(inode_ref->inode))
1051 neh->max_entries_count =
1052 to_le16(ext4_ext_space_block_idx(inode_ref));
1054 neh->max_entries_count =
1055 to_le16(ext4_ext_space_block(inode_ref));
1057 neh->magic = to_le16(EXT4_EXTENT_MAGIC);
1058 ext4_extent_block_csum_set(inode_ref, neh);
1060 /* Update top-level index: num,max,pointer */
1061 neh = ext_inode_hdr(inode_ref->inode);
1062 neh->entries_count = to_le16(1);
1063 ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1064 if (neh->depth == 0) {
1065 /* Root extent block becomes index block */
1066 neh->max_entries_count =
1067 to_le16(ext4_ext_space_root_idx(inode_ref));
1068 EXT_FIRST_INDEX(neh)
1069 ->first_block = EXT_FIRST_EXTENT(neh)->first_block;
1071 neh->depth = to_le16(to_le16(neh->depth) + 1);
1073 ext4_trans_set_block_dirty(bh.buf);
1074 inode_ref->dirty = true;
1075 ext4_block_set(inode_ref->fs->bdev, &bh);
1080 static inline void ext4_ext_replace_path(struct ext4_inode_ref *inode_ref,
1081 struct ext4_extent_path *path,
1082 struct ext4_extent_path *newpath,
1085 ext4_ext_drop_refs(inode_ref, path + at, 1);
1086 path[at] = *newpath;
1087 memset(newpath, 0, sizeof(struct ext4_extent_path));
1090 int ext4_ext_insert_extent(struct ext4_inode_ref *inode_ref,
1091 struct ext4_extent_path **ppath,
1092 struct ext4_extent *newext, int flags)
1094 int depth, level = 0, ret = 0;
1095 struct ext4_extent_path *path = *ppath;
1096 struct ext4_extent_path *npath = NULL;
1097 bool ins_right_leaf = false;
1101 depth = ext_depth(inode_ref->inode);
1102 ret = ext4_ext_insert_leaf(inode_ref, path, depth, newext, flags,
1104 if (ret == EIO && need_split == true) {
1106 for (i = depth, level = 0; i >= 0; i--, level++)
1107 if (EXT_HAS_FREE_INDEX(path + i))
1110 /* Do we need to grow the tree? */
1112 ret = ext4_ext_grow_indepth(inode_ref, 0);
1116 ret = ext4_find_extent(
1117 inode_ref, to_le32(newext->first_block), ppath, 0);
1123 * After growing the tree, there should be free space in
1124 * the only child node of the root.
1130 i = depth - (level - 1);
1131 /* We split from leaf to the i-th node */
1133 npath = calloc(1, sizeof(struct ext4_extent_path) *
1139 ret = ext4_ext_split_node(inode_ref, path, i, newext,
1140 npath, &ins_right_leaf);
1144 while (--level >= 0) {
1146 ext4_ext_replace_path(inode_ref, path,
1149 else if (npath[level].block.lb_id)
1150 ext4_ext_drop_refs(inode_ref,
1160 ext4_ext_drop_refs(inode_ref, path, 0);
1162 while (--level >= 0 && npath) {
1163 if (npath[level].block.lb_id) {
1164 ext4_fsblk_t block = npath[level].block.lb_id;
1165 ext4_ext_free_blocks(inode_ref, block, 1, 0);
1166 ext4_ext_drop_refs(inode_ref, npath + level, 1);
1176 static void ext4_ext_remove_blocks(struct ext4_inode_ref *inode_ref,
1177 struct ext4_extent *ex, ext4_lblk_t from,
1180 ext4_lblk_t len = to - from + 1;
1183 num = from - to_le32(ex->first_block);
1184 start = ext4_ext_pblock(ex) + num;
1185 ext4_dbg(DEBUG_EXTENT,
1186 "Freeing %" PRIu32 " at %" PRIu64 ", %" PRIu32 "\n", from,
1189 ext4_ext_free_blocks(inode_ref, start, len, 0);
1192 static int ext4_ext_remove_idx(struct ext4_inode_ref *inode_ref,
1193 struct ext4_extent_path *path, int32_t depth)
1199 /* free index block */
1200 leaf = ext4_idx_pblock(path[i].index);
1202 if (path[i].index != EXT_LAST_INDEX(path[i].header)) {
1203 ptrdiff_t len = EXT_LAST_INDEX(path[i].header) - path[i].index;
1204 memmove(path[i].index, path[i].index + 1,
1205 len * sizeof(struct ext4_extent_index));
1208 path[i].header->entries_count =
1209 to_le16(to_le16(path[i].header->entries_count) - 1);
1210 err = ext4_ext_dirty(inode_ref, path + i);
1214 ext4_dbg(DEBUG_EXTENT, "IDX: Freeing %" PRIu32 " at %" PRIu64 ", %d\n",
1215 to_le32(path[i].index->first_block), leaf, 1);
1216 ext4_ext_free_blocks(inode_ref, leaf, 1, 0);
1219 if (path[i].index != EXT_FIRST_INDEX(path[i].header))
1222 path[i - 1].index->first_block = path[i].index->first_block;
1223 err = ext4_ext_dirty(inode_ref, path + i - 1);
1232 static int ext4_ext_remove_leaf(struct ext4_inode_ref *inode_ref,
1233 struct ext4_extent_path *path, ext4_lblk_t from,
1237 int32_t depth = ext_depth(inode_ref->inode);
1238 struct ext4_extent *ex = path[depth].extent;
1239 struct ext4_extent *start_ex, *ex2 = NULL;
1240 struct ext4_extent_header *eh = path[depth].header;
1243 uint16_t new_entries;
1246 new_entries = to_le16(eh->entries_count);
1247 while (ex <= EXT_LAST_EXTENT(path[depth].header) &&
1248 to_le32(ex->first_block) <= to) {
1249 int32_t new_len = 0;
1251 ext4_lblk_t start, new_start;
1252 ext4_fsblk_t newblock;
1253 new_start = start = to_le32(ex->first_block);
1254 len = ext4_ext_get_actual_len(ex);
1255 newblock = ext4_ext_pblock(ex);
1257 len -= from - start;
1258 new_len = from - start;
1262 if (start + len - 1 > to) {
1263 len -= start + len - 1 - to;
1264 new_len = start + len - 1 - to;
1266 newblock += to + 1 - start;
1271 ext4_ext_remove_blocks(inode_ref, ex, start, start + len - 1);
1272 ex->first_block = to_le32(new_start);
1276 unwritten = ext4_ext_is_unwritten(ex);
1277 ex->block_count = to_le16(new_len);
1278 ext4_ext_store_pblock(ex, newblock);
1280 ext4_ext_mark_unwritten(ex);
1289 if (ex2 <= EXT_LAST_EXTENT(eh))
1290 memmove(start_ex, ex2, (EXT_LAST_EXTENT(eh) - ex2 + 1) *
1291 sizeof(struct ext4_extent));
1293 eh->entries_count = to_le16(new_entries);
1294 ext4_ext_dirty(inode_ref, path + depth);
1295 if (path[depth].extent == EXT_FIRST_EXTENT(eh) && eh->entries_count) {
1296 err = ext4_ext_correct_indexes(inode_ref, path);
1301 /* if this leaf is free, then we should
1302 * remove it from index block above */
1303 if (eh->entries_count == 0 && path[depth].block.lb_id)
1304 err = ext4_ext_remove_idx(inode_ref, path, depth - 1);
1306 path[depth - 1].index++;
1311 static bool ext4_ext_more_to_rm(struct ext4_extent_path *path, ext4_lblk_t to)
1313 if (!to_le16(path->header->entries_count))
1316 if (path->index > EXT_LAST_INDEX(path->header))
1319 if (to_le32(path->index->first_block) > to)
1325 int ext4_extent_remove_space(struct ext4_inode_ref *inode_ref, ext4_lblk_t from,
1328 struct ext4_extent_path *path = NULL;
1330 int32_t depth = ext_depth(inode_ref->inode);
1333 ret = ext4_find_extent(inode_ref, from, &path, 0);
1337 if (!path[depth].extent) {
1342 bool in_range = IN_RANGE(from, to_le32(path[depth].extent->first_block),
1343 ext4_ext_get_actual_len(path[depth].extent));
1350 /* If we do remove_space inside the range of an extent */
1351 if ((to_le32(path[depth].extent->first_block) < from) &&
1352 (to < to_le32(path[depth].extent->first_block) +
1353 ext4_ext_get_actual_len(path[depth].extent) - 1)) {
1355 struct ext4_extent *ex = path[depth].extent, newex;
1356 int unwritten = ext4_ext_is_unwritten(ex);
1357 ext4_lblk_t ee_block = to_le32(ex->first_block);
1358 int32_t len = ext4_ext_get_actual_len(ex);
1359 ext4_fsblk_t newblock = to + 1 - ee_block + ext4_ext_pblock(ex);
1361 ex->block_count = to_le16(from - ee_block);
1363 ext4_ext_mark_unwritten(ex);
1365 ext4_ext_dirty(inode_ref, path + depth);
1367 newex.first_block = to_le32(to + 1);
1368 newex.block_count = to_le16(ee_block + len - 1 - to);
1369 ext4_ext_store_pblock(&newex, newblock);
1371 ext4_ext_mark_unwritten(&newex);
1373 ret = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1380 struct ext4_extent_header *eh;
1381 struct ext4_extent *first_ex, *last_ex;
1382 ext4_lblk_t leaf_from, leaf_to;
1383 eh = path[i].header;
1384 ext4_assert(to_le16(eh->entries_count) > 0);
1385 first_ex = EXT_FIRST_EXTENT(eh);
1386 last_ex = EXT_LAST_EXTENT(eh);
1387 leaf_from = to_le32(first_ex->first_block);
1388 leaf_to = to_le32(last_ex->first_block) +
1389 ext4_ext_get_actual_len(last_ex) - 1;
1390 if (leaf_from < from)
1396 ext4_ext_remove_leaf(inode_ref, path, leaf_from,
1398 ext4_ext_drop_refs(inode_ref, path + i, 0);
1403 struct ext4_extent_header *eh;
1404 eh = path[i].header;
1405 if (ext4_ext_more_to_rm(path + i, to)) {
1406 struct ext4_block bh = EXT4_BLOCK_ZERO();
1407 if (path[i + 1].block.lb_id)
1408 ext4_ext_drop_refs(inode_ref, path + i + 1, 0);
1410 ret = read_extent_tree_block(
1411 inode_ref, ext4_idx_pblock(path[i].index),
1412 depth - i - 1, &bh, 0);
1416 path[i].p_block = ext4_idx_pblock(path[i].index);
1417 path[i + 1].block = bh;
1418 path[i + 1].header = ext_block_hdr(&bh);
1419 path[i + 1].depth = depth - i - 1;
1421 path[i + 1].extent =
1422 EXT_FIRST_EXTENT(path[i + 1].header);
1425 EXT_FIRST_INDEX(path[i + 1].header);
1430 if (!eh->entries_count)
1431 ret = ext4_ext_remove_idx(inode_ref,
1434 path[i - 1].index++;
1438 ext4_block_set(inode_ref->fs->bdev,
1445 /* TODO: flexible tree reduction should be here */
1446 if (path->header->entries_count == 0) {
1448 * truncate to zero freed all the tree,
1449 * so we need to correct eh_depth
1451 ext_inode_hdr(inode_ref->inode)->depth = 0;
1452 ext_inode_hdr(inode_ref->inode)->max_entries_count =
1453 to_le16(ext4_ext_space_root(inode_ref));
1454 ret = ext4_ext_dirty(inode_ref, path);
1458 ext4_ext_drop_refs(inode_ref, path, 0);
1464 static int ext4_ext_split_extent_at(struct ext4_inode_ref *inode_ref,
1465 struct ext4_extent_path **ppath,
1466 ext4_lblk_t split, uint32_t split_flag)
1468 struct ext4_extent *ex, newex;
1469 ext4_fsblk_t newblock;
1470 ext4_lblk_t ee_block;
1472 int32_t depth = ext_depth(inode_ref->inode);
1475 ex = (*ppath)[depth].extent;
1476 ee_block = to_le32(ex->first_block);
1477 ee_len = ext4_ext_get_actual_len(ex);
1478 newblock = split - ee_block + ext4_ext_pblock(ex);
1480 if (split == ee_block) {
1482 * case b: block @split is the block that the extent begins with
1483 * then we just change the state of the extent, and splitting
1486 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1487 ext4_ext_mark_unwritten(ex);
1489 ext4_ext_mark_initialized(ex);
1491 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1495 ex->block_count = to_le16(split - ee_block);
1496 if (split_flag & EXT4_EXT_MARK_UNWRIT1)
1497 ext4_ext_mark_unwritten(ex);
1499 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1503 newex.first_block = to_le32(split);
1504 newex.block_count = to_le16(ee_len - (split - ee_block));
1505 ext4_ext_store_pblock(&newex, newblock);
1506 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1507 ext4_ext_mark_unwritten(&newex);
1508 err = ext4_ext_insert_extent(inode_ref, ppath, &newex,
1509 EXT4_EXT_NO_COMBINE);
1511 goto restore_extent_len;
1516 ex->block_count = to_le16(ee_len);
1517 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1521 static int ext4_ext_convert_to_initialized(struct ext4_inode_ref *inode_ref,
1522 struct ext4_extent_path **ppath,
1523 ext4_lblk_t split, uint32_t blocks)
1525 int32_t depth = ext_depth(inode_ref->inode), err = EOK;
1526 struct ext4_extent *ex = (*ppath)[depth].extent;
1528 ext4_assert(to_le32(ex->first_block) <= split);
1530 if (split + blocks ==
1531 to_le32(ex->first_block) + ext4_ext_get_actual_len(ex)) {
1532 /* split and initialize right part */
1533 err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1534 EXT4_EXT_MARK_UNWRIT1);
1535 } else if (to_le32(ex->first_block) == split) {
1536 /* split and initialize left part */
1537 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1538 EXT4_EXT_MARK_UNWRIT2);
1540 /* split 1 extent to 3 and initialize the 2nd */
1541 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1542 EXT4_EXT_MARK_UNWRIT1 |
1543 EXT4_EXT_MARK_UNWRIT2);
1545 err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1546 EXT4_EXT_MARK_UNWRIT1);
1553 static ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_extent_path *path)
1557 depth = path->depth;
1559 if (depth == 0 && path->extent == NULL)
1560 return EXT_MAX_BLOCKS;
1562 while (depth >= 0) {
1563 if (depth == path->depth) {
1565 if (path[depth].extent &&
1566 path[depth].extent !=
1567 EXT_LAST_EXTENT(path[depth].header))
1569 path[depth].extent[1].first_block);
1572 if (path[depth].index !=
1573 EXT_LAST_INDEX(path[depth].header))
1575 path[depth].index[1].first_block);
1580 return EXT_MAX_BLOCKS;
1583 static int ext4_ext_zero_unwritten_range(struct ext4_inode_ref *inode_ref,
1585 uint32_t blocks_count)
1589 uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
1590 for (i = 0; i < blocks_count; i++) {
1591 struct ext4_block bh = EXT4_BLOCK_ZERO();
1592 err = ext4_trans_block_get_noread(inode_ref->fs->bdev, &bh,
1597 memset(bh.data, 0, block_size);
1598 ext4_trans_set_block_dirty(bh.buf);
1599 err = ext4_block_set(inode_ref->fs->bdev, &bh);
1606 __unused static void print_path(struct ext4_extent_path *path)
1608 int32_t i = path->depth;
1613 ? (path->extent - EXT_FIRST_EXTENT(path->header))
1617 ? (path->index - EXT_FIRST_INDEX(path->header))
1622 ext4_dbg(DEBUG_EXTENT,
1623 "depth %" PRId32 ", p_block: %" PRIu64 ","
1624 "p_ext offset: %td, p_idx offset: %td\n",
1625 i, path->p_block, a, b);
1631 int ext4_extent_get_blocks(struct ext4_inode_ref *inode_ref, ext4_lblk_t iblock,
1632 uint32_t max_blocks, ext4_fsblk_t *result,
1633 bool create, uint32_t *blocks_count)
1635 struct ext4_extent_path *path = NULL;
1636 struct ext4_extent newex, *ex;
1640 uint32_t allocated = 0;
1642 ext4_fsblk_t newblock;
1650 /* find extent for this block */
1651 err = ext4_find_extent(inode_ref, iblock, &path, 0);
1657 depth = ext_depth(inode_ref->inode);
1660 * consistent leaf must not be empty
1661 * this situations is possible, though, _during_ tree modification
1662 * this is why assert can't be put in ext4_ext_find_extent()
1664 ex = path[depth].extent;
1666 ext4_lblk_t ee_block = to_le32(ex->first_block);
1667 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
1668 uint16_t ee_len = ext4_ext_get_actual_len(ex);
1669 /* if found exent covers block, simple return it */
1670 if (IN_RANGE(iblock, ee_block, ee_len)) {
1671 /* number of remain blocks in the extent */
1672 allocated = ee_len - (iblock - ee_block);
1674 if (!ext4_ext_is_unwritten(ex)) {
1675 newblock = iblock - ee_block + ee_start;
1684 uint32_t zero_range;
1685 zero_range = allocated;
1686 if (zero_range > max_blocks)
1687 zero_range = max_blocks;
1689 newblock = iblock - ee_block + ee_start;
1690 err = ext4_ext_zero_unwritten_range(inode_ref, newblock,
1695 err = ext4_ext_convert_to_initialized(
1696 inode_ref, &path, iblock, zero_range);
1705 * requested block isn't allocated yet
1706 * we couldn't try to create block if create flag is zero
1712 /* find next allocated block so that we know how many
1713 * blocks we can allocate without ovelapping next extent */
1714 next = ext4_ext_next_allocated_block(path);
1715 allocated = next - iblock;
1716 if (allocated > max_blocks)
1717 allocated = max_blocks;
1719 /* allocate new block */
1720 goal = ext4_ext_find_goal(inode_ref, path, iblock);
1721 newblock = ext4_new_meta_blocks(inode_ref, goal, 0, &allocated, &err);
1725 /* try to insert new extent into found leaf and return */
1726 newex.first_block = to_le32(iblock);
1727 ext4_ext_store_pblock(&newex, newblock);
1728 newex.block_count = to_le16(allocated);
1729 err = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1731 /* free data blocks we just allocated */
1732 ext4_ext_free_blocks(inode_ref, ext4_ext_pblock(&newex),
1733 to_le16(newex.block_count), 0);
1737 /* previous routine could use block we allocated */
1738 newblock = ext4_ext_pblock(&newex);
1741 if (allocated > max_blocks)
1742 allocated = max_blocks;
1748 *blocks_count = allocated;
1752 ext4_ext_drop_refs(inode_ref, path, 0);