2 * Copyright (c) 2017 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3 * Copyright (c) 2017 Kaho Ng (ngkaho1234@gmail.com)
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
11 #include <ext4_config.h>
12 #include <ext4_types.h>
13 #include <ext4_misc.h>
14 #include <ext4_errno.h>
15 #include <ext4_debug.h>
17 #include <ext4_blockdev.h>
18 #include <ext4_trans.h>
20 #include <ext4_super.h>
21 #include <ext4_crc32.h>
22 #include <ext4_balloc.h>
23 #include <ext4_extent.h>
31 * used by extent splitting.
33 #define EXT4_EXT_MARK_UNWRIT1 0x02 /* mark first half unwritten */
34 #define EXT4_EXT_MARK_UNWRIT2 0x04 /* mark second half unwritten */
35 #define EXT4_EXT_DATA_VALID1 0x08 /* first half contains valid data */
36 #define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */
37 #define EXT4_EXT_NO_COMBINE 0x20 /* do not combine two extents */
39 #define EXT4_EXT_UNWRITTEN_MASK (1L << 15)
41 #define EXT4_EXT_MAX_LEN_WRITTEN (1L << 15)
42 #define EXT4_EXT_MAX_LEN_UNWRITTEN \
43 (EXT4_EXT_MAX_LEN_WRITTEN - 1)
45 #define EXT4_EXT_GET_LEN(ex) to_le16((ex)->block_count)
46 #define EXT4_EXT_GET_LEN_UNWRITTEN(ex) \
47 (EXT4_EXT_GET_LEN(ex) &= ~(EXT4_EXT_UNWRITTEN_MASK))
48 #define EXT4_EXT_SET_LEN(ex, count) \
49 ((ex)->block_count = to_le16(count))
51 #define EXT4_EXT_IS_UNWRITTEN(ex) \
52 (EXT4_EXT_GET_LEN(ex) > EXT4_EXT_MAX_LEN_WRITTEN)
53 #define EXT4_EXT_SET_UNWRITTEN(ex) \
54 ((ex)->block_count |= to_le16(EXT4_EXT_UNWRITTEN_MASK))
55 #define EXT4_EXT_SET_WRITTEN(ex) \
56 ((ex)->block_count &= ~(to_le16(EXT4_EXT_UNWRITTEN_MASK)))
59 * Array of ext4_ext_path contains path to some extent.
60 * Creation/lookup routines use it for traversal/splitting/etc.
61 * Truncate uses it to simulate recursive walking.
63 struct ext4_extent_path {
65 struct ext4_block block;
68 struct ext4_extent_header *header;
69 struct ext4_extent_index *index;
70 struct ext4_extent *extent;
77 * This is the extent tail on-disk structure.
78 * All other extent structures are 12 bytes long. It turns out that
79 * block_size % 12 >= 4 for at least all powers of 2 greater than 512, which
80 * covers all valid ext4 block sizes. Therefore, this tail structure can be
81 * crammed into the end of the block without having to rebalance the tree.
83 struct ext4_extent_tail
85 uint32_t et_checksum; /* crc32c(uuid+inum+extent_block) */
89 * This is the extent on-disk structure.
90 * It's used at the bottom of the tree.
93 uint32_t first_block; /* First logical block extent covers */
94 uint16_t block_count; /* Number of blocks covered by extent */
95 uint16_t start_hi; /* High 16 bits of physical block */
96 uint32_t start_lo; /* Low 32 bits of physical block */
100 * This is index on-disk structure.
101 * It's used at all the levels except the bottom.
103 struct ext4_extent_index {
104 uint32_t first_block; /* Index covers logical blocks from 'block' */
107 * Pointer to the physical block of the next
108 * level. leaf or next index could be there
109 * high 16 bits of physical block
117 * Each block (leaves and indexes), even inode-stored has header.
119 struct ext4_extent_header {
121 uint16_t entries_count; /* Number of valid entries */
122 uint16_t max_entries_count; /* Capacity of store in entries */
123 uint16_t depth; /* Has tree real underlying blocks? */
124 uint32_t generation; /* generation of the tree */
130 #define EXT4_EXTENT_MAGIC 0xF30A
132 #define EXT4_EXTENT_FIRST(header) \
133 ((struct ext4_extent *)(((char *)(header)) + \
134 sizeof(struct ext4_extent_header)))
136 #define EXT4_EXTENT_FIRST_INDEX(header) \
137 ((struct ext4_extent_index *)(((char *)(header)) + \
138 sizeof(struct ext4_extent_header)))
141 * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
142 * initialized extent. This is 2^15 and not (2^16 - 1), since we use the
143 * MSB of ee_len field in the extent datastructure to signify if this
144 * particular extent is an initialized extent or an uninitialized (i.e.
146 * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an
147 * uninitialized extent.
148 * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an
149 * uninitialized one. In other words, if MSB of ee_len is set, it is an
150 * uninitialized extent with only one special scenario when ee_len = 0x8000.
151 * In this case we can not have an uninitialized extent of zero length and
152 * thus we make it as a special case of initialized extent with 0x8000 length.
153 * This way we get better extent-to-group alignment for initialized extents.
154 * Hence, the maximum number of blocks we can have in an *initialized*
155 * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767).
157 #define EXT_INIT_MAX_LEN (1L << 15)
158 #define EXT_UNWRITTEN_MAX_LEN (EXT_INIT_MAX_LEN - 1)
160 #define EXT_EXTENT_SIZE sizeof(struct ext4_extent)
161 #define EXT_INDEX_SIZE sizeof(struct ext4_extent_idx)
163 #define EXT_FIRST_EXTENT(__hdr__) \
164 ((struct ext4_extent *)(((char *)(__hdr__)) + \
165 sizeof(struct ext4_extent_header)))
166 #define EXT_FIRST_INDEX(__hdr__) \
167 ((struct ext4_extent_index *)(((char *)(__hdr__)) + \
168 sizeof(struct ext4_extent_header)))
169 #define EXT_HAS_FREE_INDEX(__path__) \
170 (to_le16((__path__)->header->entries_count) < \
171 to_le16((__path__)->header->max_entries_count))
172 #define EXT_LAST_EXTENT(__hdr__) \
173 (EXT_FIRST_EXTENT((__hdr__)) + to_le16((__hdr__)->entries_count) - 1)
174 #define EXT_LAST_INDEX(__hdr__) \
175 (EXT_FIRST_INDEX((__hdr__)) + to_le16((__hdr__)->entries_count) - 1)
176 #define EXT_MAX_EXTENT(__hdr__) \
177 (EXT_FIRST_EXTENT((__hdr__)) + to_le16((__hdr__)->max_entries_count) - 1)
178 #define EXT_MAX_INDEX(__hdr__) \
179 (EXT_FIRST_INDEX((__hdr__)) + to_le16((__hdr__)->max_entries_count) - 1)
181 #define EXT4_EXTENT_TAIL_OFFSET(hdr) \
182 (sizeof(struct ext4_extent_header) + \
183 (sizeof(struct ext4_extent) * to_le16((hdr)->max_entries_count)))
186 /**@brief Get logical number of the block covered by extent.
187 * @param extent Extent to load number from
188 * @return Logical number of the first block covered by extent */
189 static inline uint32_t ext4_extent_get_first_block(struct ext4_extent *extent)
191 return to_le32(extent->first_block);
194 /**@brief Set logical number of the first block covered by extent.
195 * @param extent Extent to set number to
196 * @param iblock Logical number of the first block covered by extent */
197 static inline void ext4_extent_set_first_block(struct ext4_extent *extent,
200 extent->first_block = to_le32(iblock);
203 /**@brief Get number of blocks covered by extent.
204 * @param extent Extent to load count from
205 * @return Number of blocks covered by extent */
206 static inline uint16_t ext4_extent_get_block_count(struct ext4_extent *extent)
208 if (EXT4_EXT_IS_UNWRITTEN(extent))
209 return EXT4_EXT_GET_LEN_UNWRITTEN(extent);
211 return EXT4_EXT_GET_LEN(extent);
213 /**@brief Set number of blocks covered by extent.
214 * @param extent Extent to load count from
215 * @param count Number of blocks covered by extent
216 * @param unwritten Whether the extent is unwritten or not */
217 static inline void ext4_extent_set_block_count(struct ext4_extent *extent,
218 uint16_t count, bool unwritten)
220 EXT4_EXT_SET_LEN(extent, count);
222 EXT4_EXT_SET_UNWRITTEN(extent);
225 /**@brief Get physical number of the first block covered by extent.
226 * @param extent Extent to load number
227 * @return Physical number of the first block covered by extent */
228 static inline uint64_t ext4_extent_get_start(struct ext4_extent *extent)
230 return ((uint64_t)to_le16(extent->start_hi)) << 32 |
231 ((uint64_t)to_le32(extent->start_lo));
235 /**@brief Set physical number of the first block covered by extent.
236 * @param extent Extent to load number
237 * @param fblock Physical number of the first block covered by extent */
238 static inline void ext4_extent_set_start(struct ext4_extent *extent, uint64_t fblock)
240 extent->start_lo = to_le32((fblock << 32) >> 32);
241 extent->start_hi = to_le16((uint16_t)(fblock >> 32));
245 /**@brief Get logical number of the block covered by extent index.
246 * @param index Extent index to load number from
247 * @return Logical number of the first block covered by extent index */
248 static inline uint32_t
249 ext4_extent_index_get_first_block(struct ext4_extent_index *index)
251 return to_le32(index->first_block);
254 /**@brief Set logical number of the block covered by extent index.
255 * @param index Extent index to set number to
256 * @param iblock Logical number of the first block covered by extent index */
258 ext4_extent_index_set_first_block(struct ext4_extent_index *index,
261 index->first_block = to_le32(iblock);
264 /**@brief Get physical number of block where the child node is located.
265 * @param index Extent index to load number from
266 * @return Physical number of the block with child node */
267 static inline uint64_t
268 ext4_extent_index_get_leaf(struct ext4_extent_index *index)
270 return ((uint64_t)to_le16(index->leaf_hi)) << 32 |
271 ((uint64_t)to_le32(index->leaf_lo));
274 /**@brief Set physical number of block where the child node is located.
275 * @param index Extent index to set number to
276 * @param fblock Ohysical number of the block with child node */
277 static inline void ext4_extent_index_set_leaf(struct ext4_extent_index *index,
280 index->leaf_lo = to_le32((fblock << 32) >> 32);
281 index->leaf_hi = to_le16((uint16_t)(fblock >> 32));
284 /**@brief Get magic value from extent header.
285 * @param header Extent header to load value from
286 * @return Magic value of extent header */
287 static inline uint16_t
288 ext4_extent_header_get_magic(struct ext4_extent_header *header)
290 return to_le16(header->magic);
293 /**@brief Set magic value to extent header.
294 * @param header Extent header to set value to
295 * @param magic Magic value of extent header */
296 static inline void ext4_extent_header_set_magic(struct ext4_extent_header *header,
299 header->magic = to_le16(magic);
302 /**@brief Get number of entries from extent header
303 * @param header Extent header to get value from
304 * @return Number of entries covered by extent header */
305 static inline uint16_t
306 ext4_extent_header_get_entries_count(struct ext4_extent_header *header)
308 return to_le16(header->entries_count);
311 /**@brief Set number of entries to extent header
312 * @param header Extent header to set value to
313 * @param count Number of entries covered by extent header */
315 ext4_extent_header_set_entries_count(struct ext4_extent_header *header,
318 header->entries_count = to_le16(count);
321 /**@brief Get maximum number of entries from extent header
322 * @param header Extent header to get value from
323 * @return Maximum number of entries covered by extent header */
324 static inline uint16_t
325 ext4_extent_header_get_max_entries_count(struct ext4_extent_header *header)
327 return to_le16(header->max_entries_count);
330 /**@brief Set maximum number of entries to extent header
331 * @param header Extent header to set value to
332 * @param max_count Maximum number of entries covered by extent header */
334 ext4_extent_header_set_max_entries_count(struct ext4_extent_header *header,
337 header->max_entries_count = to_le16(max_count);
340 /**@brief Get depth of extent subtree.
341 * @param header Extent header to get value from
342 * @return Depth of extent subtree */
343 static inline uint16_t
344 ext4_extent_header_get_depth(struct ext4_extent_header *header)
346 return to_le16(header->depth);
349 /**@brief Set depth of extent subtree.
350 * @param header Extent header to set value to
351 * @param depth Depth of extent subtree */
353 ext4_extent_header_set_depth(struct ext4_extent_header *header, uint16_t depth)
355 header->depth = to_le16(depth);
358 /**@brief Get generation from extent header
359 * @param header Extent header to get value from
360 * @return Generation */
361 static inline uint32_t
362 ext4_extent_header_get_generation(struct ext4_extent_header *header)
364 return to_le32(header->generation);
367 /**@brief Set generation to extent header
368 * @param header Extent header to set value to
369 * @param generation Generation */
371 ext4_extent_header_set_generation(struct ext4_extent_header *header,
374 header->generation = to_le32(generation);
377 void ext4_extent_tree_init(struct ext4_inode_ref *inode_ref)
379 /* Initialize extent root header */
380 struct ext4_extent_header *header =
381 ext4_inode_get_extent_header(inode_ref->inode);
382 ext4_extent_header_set_depth(header, 0);
383 ext4_extent_header_set_entries_count(header, 0);
384 ext4_extent_header_set_generation(header, 0);
385 ext4_extent_header_set_magic(header, EXT4_EXTENT_MAGIC);
387 uint16_t max_entries = (EXT4_INODE_BLOCKS * sizeof(uint32_t) -
388 sizeof(struct ext4_extent_header)) /
389 sizeof(struct ext4_extent);
391 ext4_extent_header_set_max_entries_count(header, max_entries);
392 inode_ref->dirty = true;
396 static struct ext4_extent_tail *
397 find_ext4_extent_tail(struct ext4_extent_header *eh)
399 return (struct ext4_extent_tail *)(((char *)eh) +
400 EXT4_EXTENT_TAIL_OFFSET(eh));
403 static struct ext4_extent_header *ext_inode_hdr(struct ext4_inode *inode)
405 return (struct ext4_extent_header *)inode->blocks;
408 static struct ext4_extent_header *ext_block_hdr(struct ext4_block *block)
410 return (struct ext4_extent_header *)block->data;
413 static uint16_t ext_depth(struct ext4_inode *inode)
415 return to_le16(ext_inode_hdr(inode)->depth);
418 static uint16_t ext4_ext_get_actual_len(struct ext4_extent *ext)
420 return (to_le16(ext->block_count) <= EXT_INIT_MAX_LEN
421 ? to_le16(ext->block_count)
422 : (to_le16(ext->block_count) - EXT_INIT_MAX_LEN));
425 static void ext4_ext_mark_initialized(struct ext4_extent *ext)
427 ext->block_count = to_le16(ext4_ext_get_actual_len(ext));
430 static void ext4_ext_mark_unwritten(struct ext4_extent *ext)
432 ext->block_count |= to_le16(EXT_INIT_MAX_LEN);
435 static int ext4_ext_is_unwritten(struct ext4_extent *ext)
437 /* Extent with ee_len of 0x8000 is treated as an initialized extent */
438 return (to_le16(ext->block_count) > EXT_INIT_MAX_LEN);
443 * combine low and high parts of physical block number into ext4_fsblk_t
445 static ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
449 block = to_le32(ex->start_lo);
450 block |= ((ext4_fsblk_t)to_le16(ex->start_hi) << 31) << 1;
456 * combine low and high parts of a leaf physical block number into ext4_fsblk_t
458 static ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_index *ix)
462 block = to_le32(ix->leaf_lo);
463 block |= ((ext4_fsblk_t)to_le16(ix->leaf_hi) << 31) << 1;
468 * ext4_ext_store_pblock:
469 * stores a large physical block number into an extent struct,
470 * breaking it into parts
472 static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
474 ex->start_lo = to_le32((uint32_t)(pb & 0xffffffff));
475 ex->start_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
479 * ext4_idx_store_pblock:
480 * stores a large physical block number into an index struct,
481 * breaking it into parts
483 static void ext4_idx_store_pblock(struct ext4_extent_index *ix, ext4_fsblk_t pb)
485 ix->leaf_lo = to_le32((uint32_t)(pb & 0xffffffff));
486 ix->leaf_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
489 static int ext4_allocate_single_block(struct ext4_inode_ref *inode_ref,
490 ext4_fsblk_t goal, ext4_fsblk_t *blockp)
492 return ext4_balloc_alloc_block(inode_ref, goal, blockp);
495 static ext4_fsblk_t ext4_new_meta_blocks(struct ext4_inode_ref *inode_ref,
497 uint32_t flags __unused,
498 uint32_t *count, int *errp)
500 ext4_fsblk_t block = 0;
502 *errp = ext4_allocate_single_block(inode_ref, goal, &block);
508 static void ext4_ext_free_blocks(struct ext4_inode_ref *inode_ref,
509 ext4_fsblk_t block, uint32_t count,
510 uint32_t flags __unused)
512 ext4_balloc_free_blocks(inode_ref, block, count);
515 static uint16_t ext4_ext_space_block(struct ext4_inode_ref *inode_ref)
518 uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
520 size = (block_size - sizeof(struct ext4_extent_header)) /
521 sizeof(struct ext4_extent);
522 #ifdef AGGRESSIVE_TEST
529 static uint16_t ext4_ext_space_block_idx(struct ext4_inode_ref *inode_ref)
532 uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
534 size = (block_size - sizeof(struct ext4_extent_header)) /
535 sizeof(struct ext4_extent_index);
536 #ifdef AGGRESSIVE_TEST
543 static uint16_t ext4_ext_space_root(struct ext4_inode_ref *inode_ref)
547 size = sizeof(inode_ref->inode->blocks);
548 size -= sizeof(struct ext4_extent_header);
549 size /= sizeof(struct ext4_extent);
550 #ifdef AGGRESSIVE_TEST
557 static uint16_t ext4_ext_space_root_idx(struct ext4_inode_ref *inode_ref)
561 size = sizeof(inode_ref->inode->blocks);
562 size -= sizeof(struct ext4_extent_header);
563 size /= sizeof(struct ext4_extent_index);
564 #ifdef AGGRESSIVE_TEST
571 static uint16_t ext4_ext_max_entries(struct ext4_inode_ref *inode_ref,
576 if (depth == ext_depth(inode_ref->inode)) {
578 max = ext4_ext_space_root(inode_ref);
580 max = ext4_ext_space_root_idx(inode_ref);
583 max = ext4_ext_space_block(inode_ref);
585 max = ext4_ext_space_block_idx(inode_ref);
591 static ext4_fsblk_t ext4_ext_find_goal(struct ext4_inode_ref *inode_ref,
592 struct ext4_extent_path *path,
596 uint32_t depth = path->depth;
597 struct ext4_extent *ex;
600 * Try to predict block placement assuming that we are
601 * filling in a file which will eventually be
602 * non-sparse --- i.e., in the case of libbfd writing
603 * an ELF object sections out-of-order but in a way
604 * the eventually results in a contiguous object or
605 * executable file, or some database extending a table
606 * space file. However, this is actually somewhat
607 * non-ideal if we are writing a sparse file such as
608 * qemu or KVM writing a raw image file that is going
609 * to stay fairly sparse, since it will end up
610 * fragmenting the file system's free space. Maybe we
611 * should have some hueristics or some way to allow
612 * userspace to pass a hint to file system,
613 * especially if the latter case turns out to be
616 ex = path[depth].extent;
618 ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
619 ext4_lblk_t ext_block = to_le32(ex->first_block);
621 if (block > ext_block)
622 return ext_pblk + (block - ext_block);
624 return ext_pblk - (ext_block - block);
627 /* it looks like index is empty;
628 * try to find starting block from index itself */
629 if (path[depth].block.lb_id)
630 return path[depth].block.lb_id;
633 /* OK. use inode's group */
634 return ext4_fs_inode_to_goal_block(inode_ref);
638 * Allocation for a meta data block
640 static ext4_fsblk_t ext4_ext_new_meta_block(struct ext4_inode_ref *inode_ref,
641 struct ext4_extent_path *path,
642 struct ext4_extent *ex, int *err,
645 ext4_fsblk_t goal, newblock;
647 goal = ext4_ext_find_goal(inode_ref, path, to_le32(ex->first_block));
648 newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, err);
652 #if CONFIG_META_CSUM_ENABLE
653 static uint32_t ext4_ext_block_csum(struct ext4_inode_ref *inode_ref,
654 struct ext4_extent_header *eh)
656 uint32_t checksum = 0;
657 struct ext4_sblock *sb = &inode_ref->fs->sb;
659 if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
660 uint32_t ino_index = to_le32(inode_ref->index);
662 to_le32(ext4_inode_get_generation(inode_ref->inode));
663 /* First calculate crc32 checksum against fs uuid */
665 ext4_crc32c(EXT4_CRC32_INIT, sb->uuid, sizeof(sb->uuid));
666 /* Then calculate crc32 checksum against inode number
667 * and inode generation */
668 checksum = ext4_crc32c(checksum, &ino_index, sizeof(ino_index));
669 checksum = ext4_crc32c(checksum, &ino_gen, sizeof(ino_gen));
670 /* Finally calculate crc32 checksum against
671 * the entire extent block up to the checksum field */
673 ext4_crc32c(checksum, eh, EXT4_EXTENT_TAIL_OFFSET(eh));
678 #define ext4_ext_block_csum(...) 0
682 ext4_extent_block_csum_set(struct ext4_inode_ref *inode_ref __unused,
683 struct ext4_extent_header *eh)
685 struct ext4_extent_tail *tail;
687 tail = find_ext4_extent_tail(eh);
688 tail->et_checksum = to_le32(ext4_ext_block_csum(inode_ref, eh));
691 static int ext4_ext_dirty(struct ext4_inode_ref *inode_ref,
692 struct ext4_extent_path *path)
694 if (path->block.lb_id)
695 ext4_trans_set_block_dirty(path->block.buf);
697 inode_ref->dirty = true;
702 static void ext4_ext_drop_refs(struct ext4_inode_ref *inode_ref,
703 struct ext4_extent_path *path, bool keep_other)
714 for (i = 0; i <= depth; i++, path++) {
715 if (path->block.lb_id) {
716 if (ext4_bcache_test_flag(path->block.buf, BC_DIRTY))
717 ext4_extent_block_csum_set(inode_ref,
720 ext4_block_set(inode_ref->fs->bdev, &path->block);
726 * Check that whether the basic information inside the extent header
729 static int ext4_ext_check(struct ext4_inode_ref *inode_ref,
730 struct ext4_extent_header *eh, uint16_t depth,
731 ext4_fsblk_t pblk __unused)
733 struct ext4_extent_tail *tail;
734 struct ext4_sblock *sb = &inode_ref->fs->sb;
735 const char *error_msg;
738 if (to_le16(eh->magic) != EXT4_EXTENT_MAGIC) {
739 error_msg = "invalid magic";
742 if (to_le16(eh->depth) != depth) {
743 error_msg = "unexpected eh_depth";
746 if (eh->max_entries_count == 0) {
747 error_msg = "invalid eh_max";
750 if (to_le16(eh->entries_count) > to_le16(eh->max_entries_count)) {
751 error_msg = "invalid eh_entries";
755 tail = find_ext4_extent_tail(eh);
756 if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
757 if (tail->et_checksum !=
758 to_le32(ext4_ext_block_csum(inode_ref, eh))) {
759 ext4_dbg(DEBUG_EXTENT,
760 DBG_WARN "Extent block checksum failed."
761 "Blocknr: %" PRIu64 "\n",
769 ext4_dbg(DEBUG_EXTENT, "Bad extents B+ tree block: %s. "
770 "Blocknr: %" PRId64 "\n",
775 static int read_extent_tree_block(struct ext4_inode_ref *inode_ref,
776 ext4_fsblk_t pblk, int32_t depth,
777 struct ext4_block *bh,
778 uint32_t flags __unused)
782 err = ext4_trans_block_get(inode_ref->fs->bdev, bh, pblk);
786 err = ext4_ext_check(inode_ref, ext_block_hdr(bh), depth, pblk);
793 ext4_block_set(inode_ref->fs->bdev, bh);
799 * ext4_ext_binsearch_idx:
800 * binary search for the closest index of the given block
801 * the header must be checked before calling this
803 static void ext4_ext_binsearch_idx(struct ext4_extent_path *path,
806 struct ext4_extent_header *eh = path->header;
807 struct ext4_extent_index *r, *l, *m;
809 l = EXT_FIRST_INDEX(eh) + 1;
810 r = EXT_LAST_INDEX(eh);
813 if (block < to_le32(m->first_block))
823 * ext4_ext_binsearch:
824 * binary search for closest extent of the given block
825 * the header must be checked before calling this
827 static void ext4_ext_binsearch(struct ext4_extent_path *path, ext4_lblk_t block)
829 struct ext4_extent_header *eh = path->header;
830 struct ext4_extent *r, *l, *m;
832 if (eh->entries_count == 0) {
834 * this leaf is empty:
835 * we get such a leaf in split/add case
840 l = EXT_FIRST_EXTENT(eh) + 1;
841 r = EXT_LAST_EXTENT(eh);
845 if (block < to_le32(m->first_block))
851 path->extent = l - 1;
854 static int ext4_find_extent(struct ext4_inode_ref *inode_ref, ext4_lblk_t block,
855 struct ext4_extent_path **orig_path, uint32_t flags)
857 struct ext4_extent_header *eh;
858 struct ext4_block bh = EXT4_BLOCK_ZERO();
859 ext4_fsblk_t buf_block = 0;
860 struct ext4_extent_path *path = *orig_path;
861 int32_t depth, ppos = 0;
865 eh = ext_inode_hdr(inode_ref->inode);
866 depth = ext_depth(inode_ref->inode);
869 ext4_ext_drop_refs(inode_ref, path, 0);
870 if (depth > path[0].maxdepth) {
872 *orig_path = path = NULL;
876 int32_t path_depth = depth + 1;
877 /* account possible depth increase */
878 path = ext4_calloc(1, sizeof(struct ext4_extent_path) *
882 path[0].maxdepth = path_depth;
888 /* walk through the tree */
890 ext4_ext_binsearch_idx(path + ppos, block);
891 path[ppos].p_block = ext4_idx_pblock(path[ppos].index);
892 path[ppos].depth = i;
893 path[ppos].extent = NULL;
894 buf_block = path[ppos].p_block;
898 if (!path[ppos].block.lb_id ||
899 path[ppos].block.lb_id != buf_block) {
900 ret = read_extent_tree_block(inode_ref, buf_block, i,
906 ext4_block_set(inode_ref->fs->bdev, &bh);
911 eh = ext_block_hdr(&bh);
912 path[ppos].block = bh;
913 path[ppos].header = eh;
917 path[ppos].depth = i;
918 path[ppos].extent = NULL;
919 path[ppos].index = NULL;
922 ext4_ext_binsearch(path + ppos, block);
923 /* if not an empty leaf */
924 if (path[ppos].extent)
925 path[ppos].p_block = ext4_ext_pblock(path[ppos].extent);
933 ext4_ext_drop_refs(inode_ref, path, 0);
940 static void ext4_ext_init_header(struct ext4_inode_ref *inode_ref,
941 struct ext4_extent_header *eh, int32_t depth)
943 eh->entries_count = 0;
944 eh->max_entries_count = to_le16(ext4_ext_max_entries(inode_ref, depth));
945 eh->magic = to_le16(EXT4_EXTENT_MAGIC);
949 static int ext4_ext_insert_index(struct ext4_inode_ref *inode_ref,
950 struct ext4_extent_path *path, int at,
951 ext4_lblk_t insert_index,
952 ext4_fsblk_t insert_block, bool set_to_ix)
954 struct ext4_extent_index *ix;
955 struct ext4_extent_path *curp = path + at;
957 struct ext4_extent_header *eh;
959 if (curp->index && insert_index == to_le32(curp->index->first_block))
962 if (to_le16(curp->header->entries_count) ==
963 to_le16(curp->header->max_entries_count))
967 if (curp->index == NULL) {
968 ix = EXT_FIRST_INDEX(eh);
970 } else if (insert_index > to_le32(curp->index->first_block)) {
972 ix = curp->index + 1;
978 if (ix > EXT_MAX_INDEX(eh))
981 len = EXT_LAST_INDEX(eh) - ix + 1;
982 ext4_assert(len >= 0);
984 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_index));
986 ix->first_block = to_le32(insert_index);
987 ext4_idx_store_pblock(ix, insert_block);
988 eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
990 if (ix > EXT_LAST_INDEX(eh)) {
995 err = ext4_ext_dirty(inode_ref, curp);
998 if (err == EOK && set_to_ix) {
1000 curp->p_block = ext4_idx_pblock(ix);
1005 static int ext4_ext_split_node(struct ext4_inode_ref *inode_ref,
1006 struct ext4_extent_path *path, int at,
1007 struct ext4_extent *newext,
1008 struct ext4_extent_path *npath,
1009 bool *ins_right_leaf)
1011 int i, npath_at, ret;
1012 ext4_lblk_t insert_index;
1013 ext4_fsblk_t newblock = 0;
1014 int depth = ext_depth(inode_ref->inode);
1015 npath_at = depth - at;
1017 ext4_assert(at > 0);
1019 if (path[depth].extent != EXT_MAX_EXTENT(path[depth].header))
1020 insert_index = path[depth].extent[1].first_block;
1022 insert_index = newext->first_block;
1024 for (i = depth; i >= at; i--, npath_at--) {
1025 struct ext4_block bh = EXT4_BLOCK_ZERO();
1027 /* FIXME: currently we split at the point after the current
1030 ext4_ext_new_meta_block(inode_ref, path, newext, &ret, 0);
1034 /* For write access.*/
1035 ret = ext4_trans_block_get_noread(inode_ref->fs->bdev, &bh,
1041 /* start copy from next extent */
1042 int m = EXT_MAX_EXTENT(path[i].header) - path[i].extent;
1043 struct ext4_extent_header *neh;
1044 struct ext4_extent *ex;
1045 neh = ext_block_hdr(&bh);
1046 ex = EXT_FIRST_EXTENT(neh);
1047 ext4_ext_init_header(inode_ref, neh, 0);
1049 memmove(ex, path[i].extent + 1,
1050 sizeof(struct ext4_extent) * m);
1051 neh->entries_count =
1052 to_le16(to_le16(neh->entries_count) + m);
1053 path[i].header->entries_count = to_le16(
1054 to_le16(path[i].header->entries_count) - m);
1055 ret = ext4_ext_dirty(inode_ref, path + i);
1059 npath[npath_at].p_block = ext4_ext_pblock(ex);
1060 npath[npath_at].extent = ex;
1062 npath[npath_at].p_block = 0;
1063 npath[npath_at].extent = NULL;
1066 npath[npath_at].depth = to_le16(neh->depth);
1067 npath[npath_at].maxdepth = 0;
1068 npath[npath_at].index = NULL;
1069 npath[npath_at].header = neh;
1070 npath[npath_at].block = bh;
1072 ext4_trans_set_block_dirty(bh.buf);
1074 int m = EXT_MAX_INDEX(path[i].header) - path[i].index;
1075 struct ext4_extent_header *neh;
1076 struct ext4_extent_index *ix;
1077 neh = ext_block_hdr(&bh);
1078 ix = EXT_FIRST_INDEX(neh);
1079 ext4_ext_init_header(inode_ref, neh, depth - i);
1080 ix->first_block = to_le32(insert_index);
1081 ext4_idx_store_pblock(ix,
1082 npath[npath_at + 1].block.lb_id);
1083 neh->entries_count = to_le16(1);
1085 memmove(ix + 1, path[i].index + 1,
1086 sizeof(struct ext4_extent) * m);
1087 neh->entries_count =
1088 to_le16(to_le16(neh->entries_count) + m);
1089 path[i].header->entries_count = to_le16(
1090 to_le16(path[i].header->entries_count) - m);
1091 ret = ext4_ext_dirty(inode_ref, path + i);
1096 npath[npath_at].p_block = ext4_idx_pblock(ix);
1097 npath[npath_at].depth = to_le16(neh->depth);
1098 npath[npath_at].maxdepth = 0;
1099 npath[npath_at].extent = NULL;
1100 npath[npath_at].index = ix;
1101 npath[npath_at].header = neh;
1102 npath[npath_at].block = bh;
1104 ext4_trans_set_block_dirty(bh.buf);
1110 * If newext->first_block can be included into the
1113 if (to_le32(newext->first_block) < insert_index)
1114 *ins_right_leaf = false;
1116 *ins_right_leaf = true;
1118 ret = ext4_ext_insert_index(inode_ref, path, at - 1, insert_index,
1119 npath[0].block.lb_id, *ins_right_leaf);
1124 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
1126 npath_at = depth - at;
1127 while (npath_at >= 0) {
1128 if (npath[npath_at].block.lb_id) {
1129 newblock = npath[npath_at].block.lb_id;
1130 ext4_block_set(inode_ref->fs->bdev,
1131 &npath[npath_at].block);
1132 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
1133 memset(&npath[npath_at].block, 0,
1134 sizeof(struct ext4_block));
1143 * ext4_ext_correct_indexes:
1144 * if leaf gets modified and modified extent is first in the leaf,
1145 * then we have to correct all indexes above.
1147 static int ext4_ext_correct_indexes(struct ext4_inode_ref *inode_ref,
1148 struct ext4_extent_path *path)
1150 struct ext4_extent_header *eh;
1151 int32_t depth = ext_depth(inode_ref->inode);
1152 struct ext4_extent *ex;
1157 eh = path[depth].header;
1158 ex = path[depth].extent;
1160 if (ex == NULL || eh == NULL)
1164 /* there is no tree at all */
1168 if (ex != EXT_FIRST_EXTENT(eh)) {
1169 /* we correct tree if first leaf got modified only */
1174 border = path[depth].extent->first_block;
1175 path[k].index->first_block = border;
1176 err = ext4_ext_dirty(inode_ref, path + k);
1181 /* change all left-side indexes */
1182 if (path[k + 1].index != EXT_FIRST_INDEX(path[k + 1].header))
1184 path[k].index->first_block = border;
1185 err = ext4_ext_dirty(inode_ref, path + k);
1193 static inline bool ext4_ext_can_prepend(struct ext4_extent *ex1,
1194 struct ext4_extent *ex2)
1196 if (ext4_ext_pblock(ex2) + ext4_ext_get_actual_len(ex2) !=
1197 ext4_ext_pblock(ex1))
1200 #ifdef AGGRESSIVE_TEST
1201 if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
1204 if (ext4_ext_is_unwritten(ex1)) {
1205 if (ext4_ext_get_actual_len(ex1) +
1206 ext4_ext_get_actual_len(ex2) >
1207 EXT_UNWRITTEN_MAX_LEN)
1209 } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
1214 if (to_le32(ex2->first_block) + ext4_ext_get_actual_len(ex2) !=
1215 to_le32(ex1->first_block))
1221 static inline bool ext4_ext_can_append(struct ext4_extent *ex1,
1222 struct ext4_extent *ex2)
1224 if (ext4_ext_pblock(ex1) + ext4_ext_get_actual_len(ex1) !=
1225 ext4_ext_pblock(ex2))
1228 #ifdef AGGRESSIVE_TEST
1229 if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
1232 if (ext4_ext_is_unwritten(ex1)) {
1233 if (ext4_ext_get_actual_len(ex1) +
1234 ext4_ext_get_actual_len(ex2) >
1235 EXT_UNWRITTEN_MAX_LEN)
1237 } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
1242 if (to_le32(ex1->first_block) + ext4_ext_get_actual_len(ex1) !=
1243 to_le32(ex2->first_block))
1249 static int ext4_ext_insert_leaf(struct ext4_inode_ref *inode_ref,
1250 struct ext4_extent_path *path, int at,
1251 struct ext4_extent *newext, int flags,
1254 struct ext4_extent_path *curp = path + at;
1255 struct ext4_extent *ex = curp->extent;
1256 int len, err, unwritten;
1257 struct ext4_extent_header *eh;
1259 *need_split = false;
1262 to_le32(newext->first_block) == to_le32(curp->extent->first_block))
1265 if (!(flags & EXT4_EXT_NO_COMBINE)) {
1266 if (curp->extent && ext4_ext_can_append(curp->extent, newext)) {
1267 unwritten = ext4_ext_is_unwritten(curp->extent);
1268 curp->extent->block_count =
1269 to_le16(ext4_ext_get_actual_len(curp->extent) +
1270 ext4_ext_get_actual_len(newext));
1272 ext4_ext_mark_unwritten(curp->extent);
1274 err = ext4_ext_dirty(inode_ref, curp);
1279 ext4_ext_can_prepend(curp->extent, newext)) {
1280 unwritten = ext4_ext_is_unwritten(curp->extent);
1281 curp->extent->first_block = newext->first_block;
1282 curp->extent->block_count =
1283 to_le16(ext4_ext_get_actual_len(curp->extent) +
1284 ext4_ext_get_actual_len(newext));
1286 ext4_ext_mark_unwritten(curp->extent);
1288 err = ext4_ext_dirty(inode_ref, curp);
1293 if (to_le16(curp->header->entries_count) ==
1294 to_le16(curp->header->max_entries_count)) {
1300 if (curp->extent == NULL) {
1301 ex = EXT_FIRST_EXTENT(eh);
1303 } else if (to_le32(newext->first_block) >
1304 to_le32(curp->extent->first_block)) {
1306 ex = curp->extent + 1;
1313 len = EXT_LAST_EXTENT(eh) - ex + 1;
1314 ext4_assert(len >= 0);
1316 memmove(ex + 1, ex, len * sizeof(struct ext4_extent));
1318 if (ex > EXT_MAX_EXTENT(eh)) {
1323 ex->first_block = newext->first_block;
1324 ex->block_count = newext->block_count;
1325 ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
1326 eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
1328 if (ex > EXT_LAST_EXTENT(eh)) {
1333 err = ext4_ext_correct_indexes(inode_ref, path);
1336 err = ext4_ext_dirty(inode_ref, curp);
1341 curp->p_block = ext4_ext_pblock(ex);
1348 * ext4_ext_grow_indepth:
1349 * implements tree growing procedure:
1350 * - allocates new block
1351 * - moves top-level data (index block or leaf) into the new block
1352 * - initializes new top-level, creating index that points to the
1353 * just created block
1355 static int ext4_ext_grow_indepth(struct ext4_inode_ref *inode_ref,
1358 struct ext4_extent_header *neh;
1359 struct ext4_block bh = EXT4_BLOCK_ZERO();
1360 ext4_fsblk_t newblock, goal = 0;
1363 /* Try to prepend new index to old one */
1364 if (ext_depth(inode_ref->inode))
1365 goal = ext4_idx_pblock(
1366 EXT_FIRST_INDEX(ext_inode_hdr(inode_ref->inode)));
1368 goal = ext4_fs_inode_to_goal_block(inode_ref);
1370 newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, &err);
1375 err = ext4_trans_block_get_noread(inode_ref->fs->bdev, &bh, newblock);
1377 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
1381 /* move top-level index/leaf into new block */
1382 memmove(bh.data, inode_ref->inode->blocks,
1383 sizeof(inode_ref->inode->blocks));
1385 /* set size of new block */
1386 neh = ext_block_hdr(&bh);
1387 /* old root could have indexes or leaves
1388 * so calculate e_max right way */
1389 if (ext_depth(inode_ref->inode))
1390 neh->max_entries_count =
1391 to_le16(ext4_ext_space_block_idx(inode_ref));
1393 neh->max_entries_count =
1394 to_le16(ext4_ext_space_block(inode_ref));
1396 neh->magic = to_le16(EXT4_EXTENT_MAGIC);
1397 ext4_extent_block_csum_set(inode_ref, neh);
1399 /* Update top-level index: num,max,pointer */
1400 neh = ext_inode_hdr(inode_ref->inode);
1401 neh->entries_count = to_le16(1);
1402 ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1403 if (neh->depth == 0) {
1404 /* Root extent block becomes index block */
1405 neh->max_entries_count =
1406 to_le16(ext4_ext_space_root_idx(inode_ref));
1407 EXT_FIRST_INDEX(neh)
1408 ->first_block = EXT_FIRST_EXTENT(neh)->first_block;
1410 neh->depth = to_le16(to_le16(neh->depth) + 1);
1412 ext4_trans_set_block_dirty(bh.buf);
1413 inode_ref->dirty = true;
1414 ext4_block_set(inode_ref->fs->bdev, &bh);
1419 static inline void ext4_ext_replace_path(struct ext4_inode_ref *inode_ref,
1420 struct ext4_extent_path *path,
1421 struct ext4_extent_path *newpath,
1424 ext4_ext_drop_refs(inode_ref, path + at, 1);
1425 path[at] = *newpath;
1426 memset(newpath, 0, sizeof(struct ext4_extent_path));
1429 int ext4_ext_insert_extent(struct ext4_inode_ref *inode_ref,
1430 struct ext4_extent_path **ppath,
1431 struct ext4_extent *newext, int flags)
1433 int depth, level = 0, ret = 0;
1434 struct ext4_extent_path *path = *ppath;
1435 struct ext4_extent_path *npath = NULL;
1436 bool ins_right_leaf = false;
1440 depth = ext_depth(inode_ref->inode);
1441 ret = ext4_ext_insert_leaf(inode_ref, path, depth, newext, flags,
1443 if (ret == EIO && need_split == true) {
1445 for (i = depth, level = 0; i >= 0; i--, level++)
1446 if (EXT_HAS_FREE_INDEX(path + i))
1449 /* Do we need to grow the tree? */
1451 ret = ext4_ext_grow_indepth(inode_ref, 0);
1455 ret = ext4_find_extent(
1456 inode_ref, to_le32(newext->first_block), ppath, 0);
1462 * After growing the tree, there should be free space in
1463 * the only child node of the root.
1469 i = depth - (level - 1);
1470 /* We split from leaf to the i-th node */
1472 npath = ext4_calloc(1, sizeof(struct ext4_extent_path) *
1478 ret = ext4_ext_split_node(inode_ref, path, i, newext,
1479 npath, &ins_right_leaf);
1483 while (--level >= 0) {
1485 ext4_ext_replace_path(inode_ref, path,
1488 else if (npath[level].block.lb_id)
1489 ext4_ext_drop_refs(inode_ref,
1499 ext4_ext_drop_refs(inode_ref, path, 0);
1501 while (--level >= 0 && npath) {
1502 if (npath[level].block.lb_id) {
1503 ext4_fsblk_t block = npath[level].block.lb_id;
1504 ext4_ext_free_blocks(inode_ref, block, 1, 0);
1505 ext4_ext_drop_refs(inode_ref, npath + level, 1);
1515 static void ext4_ext_remove_blocks(struct ext4_inode_ref *inode_ref,
1516 struct ext4_extent *ex, ext4_lblk_t from,
1519 ext4_lblk_t len = to - from + 1;
1522 num = from - to_le32(ex->first_block);
1523 start = ext4_ext_pblock(ex) + num;
1524 ext4_dbg(DEBUG_EXTENT,
1525 "Freeing %" PRIu32 " at %" PRIu64 ", %" PRIu32 "\n", from,
1528 ext4_ext_free_blocks(inode_ref, start, len, 0);
1531 static int ext4_ext_remove_idx(struct ext4_inode_ref *inode_ref,
1532 struct ext4_extent_path *path, int32_t depth)
1538 /* free index block */
1539 leaf = ext4_idx_pblock(path[i].index);
1541 if (path[i].index != EXT_LAST_INDEX(path[i].header)) {
1542 ptrdiff_t len = EXT_LAST_INDEX(path[i].header) - path[i].index;
1543 memmove(path[i].index, path[i].index + 1,
1544 len * sizeof(struct ext4_extent_index));
1547 path[i].header->entries_count =
1548 to_le16(to_le16(path[i].header->entries_count) - 1);
1549 err = ext4_ext_dirty(inode_ref, path + i);
1553 ext4_dbg(DEBUG_EXTENT, "IDX: Freeing %" PRIu32 " at %" PRIu64 ", %d\n",
1554 to_le32(path[i].index->first_block), leaf, 1);
1555 ext4_ext_free_blocks(inode_ref, leaf, 1, 0);
1558 * We may need to correct the paths after the first extents/indexes in
1559 * a node being modified.
1561 * We do not need to consider whether there's any extents presenting or
1562 * not, as garbage will be cleared soon.
1565 if (path[i].index != EXT_FIRST_INDEX(path[i].header))
1568 path[i - 1].index->first_block = path[i].index->first_block;
1569 err = ext4_ext_dirty(inode_ref, path + i - 1);
1578 static int ext4_ext_remove_leaf(struct ext4_inode_ref *inode_ref,
1579 struct ext4_extent_path *path, ext4_lblk_t from,
1583 int32_t depth = ext_depth(inode_ref->inode);
1584 struct ext4_extent *ex = path[depth].extent;
1585 struct ext4_extent *start_ex, *ex2 = NULL;
1586 struct ext4_extent_header *eh = path[depth].header;
1589 uint16_t new_entries;
1592 new_entries = to_le16(eh->entries_count);
1593 while (ex <= EXT_LAST_EXTENT(path[depth].header) &&
1594 to_le32(ex->first_block) <= to) {
1595 int32_t new_len = 0;
1597 ext4_lblk_t start, new_start;
1598 ext4_fsblk_t newblock;
1599 new_start = start = to_le32(ex->first_block);
1600 len = ext4_ext_get_actual_len(ex);
1601 newblock = ext4_ext_pblock(ex);
1604 * The position that we start truncation is inside the range of an
1605 * extent. Here we should calculate the new length of that extent and
1606 * may start the removal from the next extent.
1609 len -= from - start;
1610 new_len = from - start;
1616 * The last block to be truncated is inside the range of an
1617 * extent. We need to calculate the new length and the new
1618 * start of the extent.
1620 if (start + len - 1 > to) {
1621 new_len = start + len - 1 - to;
1624 newblock += to + 1 - start;
1629 ext4_ext_remove_blocks(inode_ref, ex, start, start + len - 1);
1631 * Set the first block of the extent if it is presented.
1633 ex->first_block = to_le32(new_start);
1636 * If the new length of the current extent we are working on is
1642 unwritten = ext4_ext_is_unwritten(ex);
1643 ex->block_count = to_le16(new_len);
1644 ext4_ext_store_pblock(ex, newblock);
1646 ext4_ext_mark_unwritten(ex);
1656 * Move any remaining extents to the starting position of the node.
1658 if (ex2 <= EXT_LAST_EXTENT(eh))
1659 memmove(start_ex, ex2, (EXT_LAST_EXTENT(eh) - ex2 + 1) *
1660 sizeof(struct ext4_extent));
1662 eh->entries_count = to_le16(new_entries);
1663 ext4_ext_dirty(inode_ref, path + depth);
1666 * If the extent pointer is pointed to the first extent of the node, and
1667 * there's still extents presenting, we may need to correct the indexes
1670 if (path[depth].extent == EXT_FIRST_EXTENT(eh) && eh->entries_count) {
1671 err = ext4_ext_correct_indexes(inode_ref, path);
1676 /* if this leaf is free, then we should
1677 * remove it from index block above */
1678 if (eh->entries_count == 0 && path[depth].block.lb_id)
1679 err = ext4_ext_remove_idx(inode_ref, path, depth - 1);
1681 path[depth - 1].index++;
1687 * Check if there's more to remove at a specific level.
1689 static bool ext4_ext_more_to_rm(struct ext4_extent_path *path, ext4_lblk_t to)
1691 if (!to_le16(path->header->entries_count))
1694 if (path->index > EXT_LAST_INDEX(path->header))
1697 if (to_le32(path->index->first_block) > to)
1703 int ext4_extent_remove_space(struct ext4_inode_ref *inode_ref, ext4_lblk_t from,
1706 struct ext4_extent_path *path = NULL;
1708 int32_t depth = ext_depth(inode_ref->inode);
1711 ret = ext4_find_extent(inode_ref, from, &path, 0);
1715 if (!path[depth].extent) {
1720 bool in_range = IN_RANGE(from, to_le32(path[depth].extent->first_block),
1721 ext4_ext_get_actual_len(path[depth].extent));
1728 /* If we do remove_space inside the range of an extent */
1729 if ((to_le32(path[depth].extent->first_block) < from) &&
1730 (to < to_le32(path[depth].extent->first_block) +
1731 ext4_ext_get_actual_len(path[depth].extent) - 1)) {
1733 struct ext4_extent *ex = path[depth].extent, newex;
1734 int unwritten = ext4_ext_is_unwritten(ex);
1735 ext4_lblk_t ee_block = to_le32(ex->first_block);
1736 int32_t len = ext4_ext_get_actual_len(ex);
1737 ext4_fsblk_t newblock = to + 1 - ee_block + ext4_ext_pblock(ex);
1739 ex->block_count = to_le16(from - ee_block);
1741 ext4_ext_mark_unwritten(ex);
1743 ext4_ext_dirty(inode_ref, path + depth);
1745 newex.first_block = to_le32(to + 1);
1746 newex.block_count = to_le16(ee_block + len - 1 - to);
1747 ext4_ext_store_pblock(&newex, newblock);
1749 ext4_ext_mark_unwritten(&newex);
1751 ret = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1758 struct ext4_extent_header *eh;
1759 struct ext4_extent *first_ex, *last_ex;
1760 ext4_lblk_t leaf_from, leaf_to;
1761 eh = path[i].header;
1762 ext4_assert(to_le16(eh->entries_count) > 0);
1763 first_ex = EXT_FIRST_EXTENT(eh);
1764 last_ex = EXT_LAST_EXTENT(eh);
1765 leaf_from = to_le32(first_ex->first_block);
1766 leaf_to = to_le32(last_ex->first_block) +
1767 ext4_ext_get_actual_len(last_ex) - 1;
1768 if (leaf_from < from)
1774 ext4_ext_remove_leaf(inode_ref, path, leaf_from,
1776 ext4_ext_drop_refs(inode_ref, path + i, 0);
1781 struct ext4_extent_header *eh;
1782 eh = path[i].header;
1783 if (ext4_ext_more_to_rm(path + i, to)) {
1784 struct ext4_block bh = EXT4_BLOCK_ZERO();
1785 if (path[i + 1].block.lb_id)
1786 ext4_ext_drop_refs(inode_ref, path + i + 1, 0);
1788 ret = read_extent_tree_block(
1789 inode_ref, ext4_idx_pblock(path[i].index),
1790 depth - i - 1, &bh, 0);
1794 path[i].p_block = ext4_idx_pblock(path[i].index);
1795 path[i + 1].block = bh;
1796 path[i + 1].header = ext_block_hdr(&bh);
1797 path[i + 1].depth = depth - i - 1;
1799 path[i + 1].extent =
1800 EXT_FIRST_EXTENT(path[i + 1].header);
1803 EXT_FIRST_INDEX(path[i + 1].header);
1809 * Garbage entries will finally be cleared here.
1811 if (!eh->entries_count)
1812 ret = ext4_ext_remove_idx(inode_ref,
1815 path[i - 1].index++;
1819 ext4_block_set(inode_ref->fs->bdev,
1826 /* TODO: flexible tree reduction should be here */
1827 if (path->header->entries_count == 0) {
1829 * truncate to zero freed all the tree,
1830 * so we need to correct eh_depth
1832 ext_inode_hdr(inode_ref->inode)->depth = 0;
1833 ext_inode_hdr(inode_ref->inode)->max_entries_count =
1834 to_le16(ext4_ext_space_root(inode_ref));
1835 ret = ext4_ext_dirty(inode_ref, path);
1839 ext4_ext_drop_refs(inode_ref, path, 0);
1845 static int ext4_ext_split_extent_at(struct ext4_inode_ref *inode_ref,
1846 struct ext4_extent_path **ppath,
1847 ext4_lblk_t split, uint32_t split_flag)
1849 struct ext4_extent *ex, newex;
1850 ext4_fsblk_t newblock;
1851 ext4_lblk_t ee_block;
1853 int32_t depth = ext_depth(inode_ref->inode);
1856 ex = (*ppath)[depth].extent;
1857 ee_block = to_le32(ex->first_block);
1858 ee_len = ext4_ext_get_actual_len(ex);
1859 newblock = split - ee_block + ext4_ext_pblock(ex);
1861 if (split == ee_block) {
1863 * case b: block @split is the block that the extent begins with
1864 * then we just change the state of the extent, and splitting
1867 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1868 ext4_ext_mark_unwritten(ex);
1870 ext4_ext_mark_initialized(ex);
1872 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1876 ex->block_count = to_le16(split - ee_block);
1877 if (split_flag & EXT4_EXT_MARK_UNWRIT1)
1878 ext4_ext_mark_unwritten(ex);
1880 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1884 newex.first_block = to_le32(split);
1885 newex.block_count = to_le16(ee_len - (split - ee_block));
1886 ext4_ext_store_pblock(&newex, newblock);
1887 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1888 ext4_ext_mark_unwritten(&newex);
1889 err = ext4_ext_insert_extent(inode_ref, ppath, &newex,
1890 EXT4_EXT_NO_COMBINE);
1892 goto restore_extent_len;
1897 ex->block_count = to_le16(ee_len);
1898 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1902 static int ext4_ext_convert_to_initialized(struct ext4_inode_ref *inode_ref,
1903 struct ext4_extent_path **ppath,
1904 ext4_lblk_t split, uint32_t blocks)
1906 int32_t depth = ext_depth(inode_ref->inode), err = EOK;
1907 struct ext4_extent *ex = (*ppath)[depth].extent;
1909 ext4_assert(to_le32(ex->first_block) <= split);
1911 if (split + blocks ==
1912 to_le32(ex->first_block) + ext4_ext_get_actual_len(ex)) {
1913 /* split and initialize right part */
1914 err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1915 EXT4_EXT_MARK_UNWRIT1);
1916 } else if (to_le32(ex->first_block) == split) {
1917 /* split and initialize left part */
1918 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1919 EXT4_EXT_MARK_UNWRIT2);
1921 /* split 1 extent to 3 and initialize the 2nd */
1922 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1923 EXT4_EXT_MARK_UNWRIT1 |
1924 EXT4_EXT_MARK_UNWRIT2);
1926 err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1927 EXT4_EXT_MARK_UNWRIT1);
1934 static ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_extent_path *path)
1938 depth = path->depth;
1940 if (depth == 0 && path->extent == NULL)
1941 return EXT_MAX_BLOCKS;
1943 while (depth >= 0) {
1944 if (depth == path->depth) {
1946 if (path[depth].extent &&
1947 path[depth].extent !=
1948 EXT_LAST_EXTENT(path[depth].header))
1950 path[depth].extent[1].first_block);
1953 if (path[depth].index !=
1954 EXT_LAST_INDEX(path[depth].header))
1956 path[depth].index[1].first_block);
1961 return EXT_MAX_BLOCKS;
1964 static int ext4_ext_zero_unwritten_range(struct ext4_inode_ref *inode_ref,
1966 uint32_t blocks_count)
1970 uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
1971 for (i = 0; i < blocks_count; i++) {
1972 struct ext4_block bh = EXT4_BLOCK_ZERO();
1973 err = ext4_trans_block_get_noread(inode_ref->fs->bdev, &bh,
1978 memset(bh.data, 0, block_size);
1979 ext4_trans_set_block_dirty(bh.buf);
1980 err = ext4_block_set(inode_ref->fs->bdev, &bh);
1987 __unused static void print_path(struct ext4_extent_path *path)
1989 int32_t i = path->depth;
1994 ? (path->extent - EXT_FIRST_EXTENT(path->header))
1998 ? (path->index - EXT_FIRST_INDEX(path->header))
2003 ext4_dbg(DEBUG_EXTENT,
2004 "depth %" PRId32 ", p_block: %" PRIu64 ","
2005 "p_ext offset: %td, p_idx offset: %td\n",
2006 i, path->p_block, a, b);
2012 int ext4_extent_get_blocks(struct ext4_inode_ref *inode_ref, ext4_lblk_t iblock,
2013 uint32_t max_blocks, ext4_fsblk_t *result,
2014 bool create, uint32_t *blocks_count)
2016 struct ext4_extent_path *path = NULL;
2017 struct ext4_extent newex, *ex;
2021 uint32_t allocated = 0;
2023 ext4_fsblk_t newblock;
2031 /* find extent for this block */
2032 err = ext4_find_extent(inode_ref, iblock, &path, 0);
2038 depth = ext_depth(inode_ref->inode);
2041 * consistent leaf must not be empty
2042 * this situations is possible, though, _during_ tree modification
2043 * this is why assert can't be put in ext4_ext_find_extent()
2045 ex = path[depth].extent;
2047 ext4_lblk_t ee_block = to_le32(ex->first_block);
2048 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
2049 uint16_t ee_len = ext4_ext_get_actual_len(ex);
2050 /* if found exent covers block, simple return it */
2051 if (IN_RANGE(iblock, ee_block, ee_len)) {
2052 /* number of remain blocks in the extent */
2053 allocated = ee_len - (iblock - ee_block);
2055 if (!ext4_ext_is_unwritten(ex)) {
2056 newblock = iblock - ee_block + ee_start;
2065 uint32_t zero_range;
2066 zero_range = allocated;
2067 if (zero_range > max_blocks)
2068 zero_range = max_blocks;
2070 newblock = iblock - ee_block + ee_start;
2071 err = ext4_ext_zero_unwritten_range(inode_ref, newblock,
2076 err = ext4_ext_convert_to_initialized(
2077 inode_ref, &path, iblock, zero_range);
2086 * requested block isn't allocated yet
2087 * we couldn't try to create block if create flag is zero
2093 /* find next allocated block so that we know how many
2094 * blocks we can allocate without ovelapping next extent */
2095 next = ext4_ext_next_allocated_block(path);
2096 allocated = next - iblock;
2097 if (allocated > max_blocks)
2098 allocated = max_blocks;
2100 /* allocate new block */
2101 goal = ext4_ext_find_goal(inode_ref, path, iblock);
2102 newblock = ext4_new_meta_blocks(inode_ref, goal, 0, &allocated, &err);
2106 /* try to insert new extent into found leaf and return */
2107 newex.first_block = to_le32(iblock);
2108 ext4_ext_store_pblock(&newex, newblock);
2109 newex.block_count = to_le16(allocated);
2110 err = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
2112 /* free data blocks we just allocated */
2113 ext4_ext_free_blocks(inode_ref, ext4_ext_pblock(&newex),
2114 to_le16(newex.block_count), 0);
2118 /* previous routine could use block we allocated */
2119 newblock = ext4_ext_pblock(&newex);
2122 if (allocated > max_blocks)
2123 allocated = max_blocks;
2129 *blocks_count = allocated;
2133 ext4_ext_drop_refs(inode_ref, path, 0);