2 * Copyright (c) 2017 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3 * Copyright (c) 2017 Kaho Ng (ngkaho1234@gmail.com)
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
11 #include <ext4_config.h>
12 #include <ext4_types.h>
13 #include <ext4_misc.h>
14 #include <ext4_errno.h>
15 #include <ext4_debug.h>
17 #include <ext4_blockdev.h>
18 #include <ext4_trans.h>
20 #include <ext4_super.h>
21 #include <ext4_crc32.h>
22 #include <ext4_balloc.h>
23 #include <ext4_extent.h>
30 #if CONFIG_EXTENTS_ENABLE
32 * used by extent splitting.
34 #define EXT4_EXT_MARK_UNWRIT1 0x02 /* mark first half unwritten */
35 #define EXT4_EXT_MARK_UNWRIT2 0x04 /* mark second half unwritten */
36 #define EXT4_EXT_DATA_VALID1 0x08 /* first half contains valid data */
37 #define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */
38 #define EXT4_EXT_NO_COMBINE 0x20 /* do not combine two extents */
40 #define EXT4_EXT_UNWRITTEN_MASK (1L << 15)
42 #define EXT4_EXT_MAX_LEN_WRITTEN (1L << 15)
43 #define EXT4_EXT_MAX_LEN_UNWRITTEN \
44 (EXT4_EXT_MAX_LEN_WRITTEN - 1)
46 #define EXT4_EXT_GET_LEN(ex) to_le16((ex)->block_count)
47 #define EXT4_EXT_GET_LEN_UNWRITTEN(ex) \
48 (EXT4_EXT_GET_LEN(ex) &= ~(EXT4_EXT_UNWRITTEN_MASK))
49 #define EXT4_EXT_SET_LEN(ex, count) \
50 ((ex)->block_count = to_le16(count))
52 #define EXT4_EXT_IS_UNWRITTEN(ex) \
53 (EXT4_EXT_GET_LEN(ex) > EXT4_EXT_MAX_LEN_WRITTEN)
54 #define EXT4_EXT_SET_UNWRITTEN(ex) \
55 ((ex)->block_count |= to_le16(EXT4_EXT_UNWRITTEN_MASK))
56 #define EXT4_EXT_SET_WRITTEN(ex) \
57 ((ex)->block_count &= ~(to_le16(EXT4_EXT_UNWRITTEN_MASK)))
60 * Array of ext4_ext_path contains path to some extent.
61 * Creation/lookup routines use it for traversal/splitting/etc.
62 * Truncate uses it to simulate recursive walking.
64 struct ext4_extent_path {
66 struct ext4_block block;
69 struct ext4_extent_header *header;
70 struct ext4_extent_index *index;
71 struct ext4_extent *extent;
78 * This is the extent tail on-disk structure.
79 * All other extent structures are 12 bytes long. It turns out that
80 * block_size % 12 >= 4 for at least all powers of 2 greater than 512, which
81 * covers all valid ext4 block sizes. Therefore, this tail structure can be
82 * crammed into the end of the block without having to rebalance the tree.
84 struct ext4_extent_tail
86 uint32_t et_checksum; /* crc32c(uuid+inum+extent_block) */
90 * This is the extent on-disk structure.
91 * It's used at the bottom of the tree.
94 uint32_t first_block; /* First logical block extent covers */
95 uint16_t block_count; /* Number of blocks covered by extent */
96 uint16_t start_hi; /* High 16 bits of physical block */
97 uint32_t start_lo; /* Low 32 bits of physical block */
101 * This is index on-disk structure.
102 * It's used at all the levels except the bottom.
104 struct ext4_extent_index {
105 uint32_t first_block; /* Index covers logical blocks from 'block' */
108 * Pointer to the physical block of the next
109 * level. leaf or next index could be there
110 * high 16 bits of physical block
118 * Each block (leaves and indexes), even inode-stored has header.
120 struct ext4_extent_header {
122 uint16_t entries_count; /* Number of valid entries */
123 uint16_t max_entries_count; /* Capacity of store in entries */
124 uint16_t depth; /* Has tree real underlying blocks? */
125 uint32_t generation; /* generation of the tree */
131 #define EXT4_EXTENT_MAGIC 0xF30A
133 #define EXT4_EXTENT_FIRST(header) \
134 ((struct ext4_extent *)(((char *)(header)) + \
135 sizeof(struct ext4_extent_header)))
137 #define EXT4_EXTENT_FIRST_INDEX(header) \
138 ((struct ext4_extent_index *)(((char *)(header)) + \
139 sizeof(struct ext4_extent_header)))
142 * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
143 * initialized extent. This is 2^15 and not (2^16 - 1), since we use the
144 * MSB of ee_len field in the extent datastructure to signify if this
145 * particular extent is an initialized extent or an uninitialized (i.e.
147 * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an
148 * uninitialized extent.
149 * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an
150 * uninitialized one. In other words, if MSB of ee_len is set, it is an
151 * uninitialized extent with only one special scenario when ee_len = 0x8000.
152 * In this case we can not have an uninitialized extent of zero length and
153 * thus we make it as a special case of initialized extent with 0x8000 length.
154 * This way we get better extent-to-group alignment for initialized extents.
155 * Hence, the maximum number of blocks we can have in an *initialized*
156 * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767).
158 #define EXT_INIT_MAX_LEN (1L << 15)
159 #define EXT_UNWRITTEN_MAX_LEN (EXT_INIT_MAX_LEN - 1)
161 #define EXT_EXTENT_SIZE sizeof(struct ext4_extent)
162 #define EXT_INDEX_SIZE sizeof(struct ext4_extent_idx)
164 #define EXT_FIRST_EXTENT(__hdr__) \
165 ((struct ext4_extent *)(((char *)(__hdr__)) + \
166 sizeof(struct ext4_extent_header)))
167 #define EXT_FIRST_INDEX(__hdr__) \
168 ((struct ext4_extent_index *)(((char *)(__hdr__)) + \
169 sizeof(struct ext4_extent_header)))
170 #define EXT_HAS_FREE_INDEX(__path__) \
171 (to_le16((__path__)->header->entries_count) < \
172 to_le16((__path__)->header->max_entries_count))
173 #define EXT_LAST_EXTENT(__hdr__) \
174 (EXT_FIRST_EXTENT((__hdr__)) + to_le16((__hdr__)->entries_count) - 1)
175 #define EXT_LAST_INDEX(__hdr__) \
176 (EXT_FIRST_INDEX((__hdr__)) + to_le16((__hdr__)->entries_count) - 1)
177 #define EXT_MAX_EXTENT(__hdr__) \
178 (EXT_FIRST_EXTENT((__hdr__)) + to_le16((__hdr__)->max_entries_count) - 1)
179 #define EXT_MAX_INDEX(__hdr__) \
180 (EXT_FIRST_INDEX((__hdr__)) + to_le16((__hdr__)->max_entries_count) - 1)
182 #define EXT4_EXTENT_TAIL_OFFSET(hdr) \
183 (sizeof(struct ext4_extent_header) + \
184 (sizeof(struct ext4_extent) * to_le16((hdr)->max_entries_count)))
187 /**@brief Get logical number of the block covered by extent.
188 * @param extent Extent to load number from
189 * @return Logical number of the first block covered by extent */
190 static inline uint32_t ext4_extent_get_first_block(struct ext4_extent *extent)
192 return to_le32(extent->first_block);
195 /**@brief Set logical number of the first block covered by extent.
196 * @param extent Extent to set number to
197 * @param iblock Logical number of the first block covered by extent */
198 static inline void ext4_extent_set_first_block(struct ext4_extent *extent,
201 extent->first_block = to_le32(iblock);
204 /**@brief Get number of blocks covered by extent.
205 * @param extent Extent to load count from
206 * @return Number of blocks covered by extent */
207 static inline uint16_t ext4_extent_get_block_count(struct ext4_extent *extent)
209 if (EXT4_EXT_IS_UNWRITTEN(extent))
210 return EXT4_EXT_GET_LEN_UNWRITTEN(extent);
212 return EXT4_EXT_GET_LEN(extent);
214 /**@brief Set number of blocks covered by extent.
215 * @param extent Extent to load count from
216 * @param count Number of blocks covered by extent
217 * @param unwritten Whether the extent is unwritten or not */
218 static inline void ext4_extent_set_block_count(struct ext4_extent *extent,
219 uint16_t count, bool unwritten)
221 EXT4_EXT_SET_LEN(extent, count);
223 EXT4_EXT_SET_UNWRITTEN(extent);
226 /**@brief Get physical number of the first block covered by extent.
227 * @param extent Extent to load number
228 * @return Physical number of the first block covered by extent */
229 static inline uint64_t ext4_extent_get_start(struct ext4_extent *extent)
231 return ((uint64_t)to_le16(extent->start_hi)) << 32 |
232 ((uint64_t)to_le32(extent->start_lo));
236 /**@brief Set physical number of the first block covered by extent.
237 * @param extent Extent to load number
238 * @param fblock Physical number of the first block covered by extent */
239 static inline void ext4_extent_set_start(struct ext4_extent *extent, uint64_t fblock)
241 extent->start_lo = to_le32((fblock << 32) >> 32);
242 extent->start_hi = to_le16((uint16_t)(fblock >> 32));
246 /**@brief Get logical number of the block covered by extent index.
247 * @param index Extent index to load number from
248 * @return Logical number of the first block covered by extent index */
249 static inline uint32_t
250 ext4_extent_index_get_first_block(struct ext4_extent_index *index)
252 return to_le32(index->first_block);
255 /**@brief Set logical number of the block covered by extent index.
256 * @param index Extent index to set number to
257 * @param iblock Logical number of the first block covered by extent index */
259 ext4_extent_index_set_first_block(struct ext4_extent_index *index,
262 index->first_block = to_le32(iblock);
265 /**@brief Get physical number of block where the child node is located.
266 * @param index Extent index to load number from
267 * @return Physical number of the block with child node */
268 static inline uint64_t
269 ext4_extent_index_get_leaf(struct ext4_extent_index *index)
271 return ((uint64_t)to_le16(index->leaf_hi)) << 32 |
272 ((uint64_t)to_le32(index->leaf_lo));
275 /**@brief Set physical number of block where the child node is located.
276 * @param index Extent index to set number to
277 * @param fblock Ohysical number of the block with child node */
278 static inline void ext4_extent_index_set_leaf(struct ext4_extent_index *index,
281 index->leaf_lo = to_le32((fblock << 32) >> 32);
282 index->leaf_hi = to_le16((uint16_t)(fblock >> 32));
285 /**@brief Get magic value from extent header.
286 * @param header Extent header to load value from
287 * @return Magic value of extent header */
288 static inline uint16_t
289 ext4_extent_header_get_magic(struct ext4_extent_header *header)
291 return to_le16(header->magic);
294 /**@brief Set magic value to extent header.
295 * @param header Extent header to set value to
296 * @param magic Magic value of extent header */
297 static inline void ext4_extent_header_set_magic(struct ext4_extent_header *header,
300 header->magic = to_le16(magic);
303 /**@brief Get number of entries from extent header
304 * @param header Extent header to get value from
305 * @return Number of entries covered by extent header */
306 static inline uint16_t
307 ext4_extent_header_get_entries_count(struct ext4_extent_header *header)
309 return to_le16(header->entries_count);
312 /**@brief Set number of entries to extent header
313 * @param header Extent header to set value to
314 * @param count Number of entries covered by extent header */
316 ext4_extent_header_set_entries_count(struct ext4_extent_header *header,
319 header->entries_count = to_le16(count);
322 /**@brief Get maximum number of entries from extent header
323 * @param header Extent header to get value from
324 * @return Maximum number of entries covered by extent header */
325 static inline uint16_t
326 ext4_extent_header_get_max_entries_count(struct ext4_extent_header *header)
328 return to_le16(header->max_entries_count);
331 /**@brief Set maximum number of entries to extent header
332 * @param header Extent header to set value to
333 * @param max_count Maximum number of entries covered by extent header */
335 ext4_extent_header_set_max_entries_count(struct ext4_extent_header *header,
338 header->max_entries_count = to_le16(max_count);
341 /**@brief Get depth of extent subtree.
342 * @param header Extent header to get value from
343 * @return Depth of extent subtree */
344 static inline uint16_t
345 ext4_extent_header_get_depth(struct ext4_extent_header *header)
347 return to_le16(header->depth);
350 /**@brief Set depth of extent subtree.
351 * @param header Extent header to set value to
352 * @param depth Depth of extent subtree */
354 ext4_extent_header_set_depth(struct ext4_extent_header *header, uint16_t depth)
356 header->depth = to_le16(depth);
359 /**@brief Get generation from extent header
360 * @param header Extent header to get value from
361 * @return Generation */
362 static inline uint32_t
363 ext4_extent_header_get_generation(struct ext4_extent_header *header)
365 return to_le32(header->generation);
368 /**@brief Set generation to extent header
369 * @param header Extent header to set value to
370 * @param generation Generation */
372 ext4_extent_header_set_generation(struct ext4_extent_header *header,
375 header->generation = to_le32(generation);
378 void ext4_extent_tree_init(struct ext4_inode_ref *inode_ref)
380 /* Initialize extent root header */
381 struct ext4_extent_header *header =
382 ext4_inode_get_extent_header(inode_ref->inode);
383 ext4_extent_header_set_depth(header, 0);
384 ext4_extent_header_set_entries_count(header, 0);
385 ext4_extent_header_set_generation(header, 0);
386 ext4_extent_header_set_magic(header, EXT4_EXTENT_MAGIC);
388 uint16_t max_entries = (EXT4_INODE_BLOCKS * sizeof(uint32_t) -
389 sizeof(struct ext4_extent_header)) /
390 sizeof(struct ext4_extent);
392 ext4_extent_header_set_max_entries_count(header, max_entries);
393 inode_ref->dirty = true;
397 static struct ext4_extent_tail *
398 find_ext4_extent_tail(struct ext4_extent_header *eh)
400 return (struct ext4_extent_tail *)(((char *)eh) +
401 EXT4_EXTENT_TAIL_OFFSET(eh));
404 static struct ext4_extent_header *ext_inode_hdr(struct ext4_inode *inode)
406 return (struct ext4_extent_header *)inode->blocks;
409 static struct ext4_extent_header *ext_block_hdr(struct ext4_block *block)
411 return (struct ext4_extent_header *)block->data;
414 static uint16_t ext_depth(struct ext4_inode *inode)
416 return to_le16(ext_inode_hdr(inode)->depth);
419 static uint16_t ext4_ext_get_actual_len(struct ext4_extent *ext)
421 return (to_le16(ext->block_count) <= EXT_INIT_MAX_LEN
422 ? to_le16(ext->block_count)
423 : (to_le16(ext->block_count) - EXT_INIT_MAX_LEN));
426 static void ext4_ext_mark_initialized(struct ext4_extent *ext)
428 ext->block_count = to_le16(ext4_ext_get_actual_len(ext));
431 static void ext4_ext_mark_unwritten(struct ext4_extent *ext)
433 ext->block_count |= to_le16(EXT_INIT_MAX_LEN);
436 static int ext4_ext_is_unwritten(struct ext4_extent *ext)
438 /* Extent with ee_len of 0x8000 is treated as an initialized extent */
439 return (to_le16(ext->block_count) > EXT_INIT_MAX_LEN);
444 * combine low and high parts of physical block number into ext4_fsblk_t
446 static ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
450 block = to_le32(ex->start_lo);
451 block |= ((ext4_fsblk_t)to_le16(ex->start_hi) << 31) << 1;
457 * combine low and high parts of a leaf physical block number into ext4_fsblk_t
459 static ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_index *ix)
463 block = to_le32(ix->leaf_lo);
464 block |= ((ext4_fsblk_t)to_le16(ix->leaf_hi) << 31) << 1;
469 * ext4_ext_store_pblock:
470 * stores a large physical block number into an extent struct,
471 * breaking it into parts
473 static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
475 ex->start_lo = to_le32((uint32_t)(pb & 0xffffffff));
476 ex->start_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
480 * ext4_idx_store_pblock:
481 * stores a large physical block number into an index struct,
482 * breaking it into parts
484 static void ext4_idx_store_pblock(struct ext4_extent_index *ix, ext4_fsblk_t pb)
486 ix->leaf_lo = to_le32((uint32_t)(pb & 0xffffffff));
487 ix->leaf_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
490 static int ext4_allocate_single_block(struct ext4_inode_ref *inode_ref,
491 ext4_fsblk_t goal, ext4_fsblk_t *blockp)
493 return ext4_balloc_alloc_block(inode_ref, goal, blockp);
496 static ext4_fsblk_t ext4_new_meta_blocks(struct ext4_inode_ref *inode_ref,
498 uint32_t flags __unused,
499 uint32_t *count, int *errp)
501 ext4_fsblk_t block = 0;
503 *errp = ext4_allocate_single_block(inode_ref, goal, &block);
509 static void ext4_ext_free_blocks(struct ext4_inode_ref *inode_ref,
510 ext4_fsblk_t block, uint32_t count,
511 uint32_t flags __unused)
513 ext4_balloc_free_blocks(inode_ref, block, count);
516 static uint16_t ext4_ext_space_block(struct ext4_inode_ref *inode_ref)
519 uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
521 size = (block_size - sizeof(struct ext4_extent_header)) /
522 sizeof(struct ext4_extent);
523 #ifdef AGGRESSIVE_TEST
530 static uint16_t ext4_ext_space_block_idx(struct ext4_inode_ref *inode_ref)
533 uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
535 size = (block_size - sizeof(struct ext4_extent_header)) /
536 sizeof(struct ext4_extent_index);
537 #ifdef AGGRESSIVE_TEST
544 static uint16_t ext4_ext_space_root(struct ext4_inode_ref *inode_ref)
548 size = sizeof(inode_ref->inode->blocks);
549 size -= sizeof(struct ext4_extent_header);
550 size /= sizeof(struct ext4_extent);
551 #ifdef AGGRESSIVE_TEST
558 static uint16_t ext4_ext_space_root_idx(struct ext4_inode_ref *inode_ref)
562 size = sizeof(inode_ref->inode->blocks);
563 size -= sizeof(struct ext4_extent_header);
564 size /= sizeof(struct ext4_extent_index);
565 #ifdef AGGRESSIVE_TEST
572 static uint16_t ext4_ext_max_entries(struct ext4_inode_ref *inode_ref,
577 if (depth == ext_depth(inode_ref->inode)) {
579 max = ext4_ext_space_root(inode_ref);
581 max = ext4_ext_space_root_idx(inode_ref);
584 max = ext4_ext_space_block(inode_ref);
586 max = ext4_ext_space_block_idx(inode_ref);
592 static ext4_fsblk_t ext4_ext_find_goal(struct ext4_inode_ref *inode_ref,
593 struct ext4_extent_path *path,
597 uint32_t depth = path->depth;
598 struct ext4_extent *ex;
601 * Try to predict block placement assuming that we are
602 * filling in a file which will eventually be
603 * non-sparse --- i.e., in the case of libbfd writing
604 * an ELF object sections out-of-order but in a way
605 * the eventually results in a contiguous object or
606 * executable file, or some database extending a table
607 * space file. However, this is actually somewhat
608 * non-ideal if we are writing a sparse file such as
609 * qemu or KVM writing a raw image file that is going
610 * to stay fairly sparse, since it will end up
611 * fragmenting the file system's free space. Maybe we
612 * should have some hueristics or some way to allow
613 * userspace to pass a hint to file system,
614 * especially if the latter case turns out to be
617 ex = path[depth].extent;
619 ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
620 ext4_lblk_t ext_block = to_le32(ex->first_block);
622 if (block > ext_block)
623 return ext_pblk + (block - ext_block);
625 return ext_pblk - (ext_block - block);
628 /* it looks like index is empty;
629 * try to find starting block from index itself */
630 if (path[depth].block.lb_id)
631 return path[depth].block.lb_id;
634 /* OK. use inode's group */
635 return ext4_fs_inode_to_goal_block(inode_ref);
639 * Allocation for a meta data block
641 static ext4_fsblk_t ext4_ext_new_meta_block(struct ext4_inode_ref *inode_ref,
642 struct ext4_extent_path *path,
643 struct ext4_extent *ex, int *err,
646 ext4_fsblk_t goal, newblock;
648 goal = ext4_ext_find_goal(inode_ref, path, to_le32(ex->first_block));
649 newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, err);
653 #if CONFIG_META_CSUM_ENABLE
654 static uint32_t ext4_ext_block_csum(struct ext4_inode_ref *inode_ref,
655 struct ext4_extent_header *eh)
657 uint32_t checksum = 0;
658 struct ext4_sblock *sb = &inode_ref->fs->sb;
660 if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
661 uint32_t ino_index = to_le32(inode_ref->index);
663 to_le32(ext4_inode_get_generation(inode_ref->inode));
664 /* First calculate crc32 checksum against fs uuid */
666 ext4_crc32c(EXT4_CRC32_INIT, sb->uuid, sizeof(sb->uuid));
667 /* Then calculate crc32 checksum against inode number
668 * and inode generation */
669 checksum = ext4_crc32c(checksum, &ino_index, sizeof(ino_index));
670 checksum = ext4_crc32c(checksum, &ino_gen, sizeof(ino_gen));
671 /* Finally calculate crc32 checksum against
672 * the entire extent block up to the checksum field */
674 ext4_crc32c(checksum, eh, EXT4_EXTENT_TAIL_OFFSET(eh));
679 #define ext4_ext_block_csum(...) 0
683 ext4_extent_block_csum_set(struct ext4_inode_ref *inode_ref __unused,
684 struct ext4_extent_header *eh)
686 struct ext4_extent_tail *tail;
688 tail = find_ext4_extent_tail(eh);
689 tail->et_checksum = to_le32(ext4_ext_block_csum(inode_ref, eh));
692 static int ext4_ext_dirty(struct ext4_inode_ref *inode_ref,
693 struct ext4_extent_path *path)
695 if (path->block.lb_id)
696 ext4_trans_set_block_dirty(path->block.buf);
698 inode_ref->dirty = true;
703 static void ext4_ext_drop_refs(struct ext4_inode_ref *inode_ref,
704 struct ext4_extent_path *path, bool keep_other)
715 for (i = 0; i <= depth; i++, path++) {
716 if (path->block.lb_id) {
717 if (ext4_bcache_test_flag(path->block.buf, BC_DIRTY))
718 ext4_extent_block_csum_set(inode_ref,
721 ext4_block_set(inode_ref->fs->bdev, &path->block);
727 * Check that whether the basic information inside the extent header
730 static int ext4_ext_check(struct ext4_inode_ref *inode_ref,
731 struct ext4_extent_header *eh, uint16_t depth,
732 ext4_fsblk_t pblk __unused)
734 struct ext4_extent_tail *tail;
735 struct ext4_sblock *sb = &inode_ref->fs->sb;
736 const char *error_msg;
739 if (to_le16(eh->magic) != EXT4_EXTENT_MAGIC) {
740 error_msg = "invalid magic";
743 if (to_le16(eh->depth) != depth) {
744 error_msg = "unexpected eh_depth";
747 if (eh->max_entries_count == 0) {
748 error_msg = "invalid eh_max";
751 if (to_le16(eh->entries_count) > to_le16(eh->max_entries_count)) {
752 error_msg = "invalid eh_entries";
756 tail = find_ext4_extent_tail(eh);
757 if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
758 if (tail->et_checksum !=
759 to_le32(ext4_ext_block_csum(inode_ref, eh))) {
760 ext4_dbg(DEBUG_EXTENT,
761 DBG_WARN "Extent block checksum failed."
762 "Blocknr: %" PRIu64 "\n",
770 ext4_dbg(DEBUG_EXTENT, "Bad extents B+ tree block: %s. "
771 "Blocknr: %" PRId64 "\n",
776 static int read_extent_tree_block(struct ext4_inode_ref *inode_ref,
777 ext4_fsblk_t pblk, int32_t depth,
778 struct ext4_block *bh,
779 uint32_t flags __unused)
783 err = ext4_trans_block_get(inode_ref->fs->bdev, bh, pblk);
787 err = ext4_ext_check(inode_ref, ext_block_hdr(bh), depth, pblk);
794 ext4_block_set(inode_ref->fs->bdev, bh);
800 * ext4_ext_binsearch_idx:
801 * binary search for the closest index of the given block
802 * the header must be checked before calling this
804 static void ext4_ext_binsearch_idx(struct ext4_extent_path *path,
807 struct ext4_extent_header *eh = path->header;
808 struct ext4_extent_index *r, *l, *m;
810 l = EXT_FIRST_INDEX(eh) + 1;
811 r = EXT_LAST_INDEX(eh);
814 if (block < to_le32(m->first_block))
824 * ext4_ext_binsearch:
825 * binary search for closest extent of the given block
826 * the header must be checked before calling this
828 static void ext4_ext_binsearch(struct ext4_extent_path *path, ext4_lblk_t block)
830 struct ext4_extent_header *eh = path->header;
831 struct ext4_extent *r, *l, *m;
833 if (eh->entries_count == 0) {
835 * this leaf is empty:
836 * we get such a leaf in split/add case
841 l = EXT_FIRST_EXTENT(eh) + 1;
842 r = EXT_LAST_EXTENT(eh);
846 if (block < to_le32(m->first_block))
852 path->extent = l - 1;
855 static int ext4_find_extent(struct ext4_inode_ref *inode_ref, ext4_lblk_t block,
856 struct ext4_extent_path **orig_path, uint32_t flags)
858 struct ext4_extent_header *eh;
859 struct ext4_block bh = EXT4_BLOCK_ZERO();
860 ext4_fsblk_t buf_block = 0;
861 struct ext4_extent_path *path = *orig_path;
862 int32_t depth, ppos = 0;
866 eh = ext_inode_hdr(inode_ref->inode);
867 depth = ext_depth(inode_ref->inode);
870 ext4_ext_drop_refs(inode_ref, path, 0);
871 if (depth > path[0].maxdepth) {
873 *orig_path = path = NULL;
877 int32_t path_depth = depth + 1;
878 /* account possible depth increase */
879 path = ext4_calloc(1, sizeof(struct ext4_extent_path) *
883 path[0].maxdepth = path_depth;
889 /* walk through the tree */
891 ext4_ext_binsearch_idx(path + ppos, block);
892 path[ppos].p_block = ext4_idx_pblock(path[ppos].index);
893 path[ppos].depth = i;
894 path[ppos].extent = NULL;
895 buf_block = path[ppos].p_block;
899 if (!path[ppos].block.lb_id ||
900 path[ppos].block.lb_id != buf_block) {
901 ret = read_extent_tree_block(inode_ref, buf_block, i,
907 ext4_block_set(inode_ref->fs->bdev, &bh);
912 eh = ext_block_hdr(&bh);
913 path[ppos].block = bh;
914 path[ppos].header = eh;
918 path[ppos].depth = i;
919 path[ppos].extent = NULL;
920 path[ppos].index = NULL;
923 ext4_ext_binsearch(path + ppos, block);
924 /* if not an empty leaf */
925 if (path[ppos].extent)
926 path[ppos].p_block = ext4_ext_pblock(path[ppos].extent);
934 ext4_ext_drop_refs(inode_ref, path, 0);
941 static void ext4_ext_init_header(struct ext4_inode_ref *inode_ref,
942 struct ext4_extent_header *eh, int32_t depth)
944 eh->entries_count = 0;
945 eh->max_entries_count = to_le16(ext4_ext_max_entries(inode_ref, depth));
946 eh->magic = to_le16(EXT4_EXTENT_MAGIC);
950 static int ext4_ext_insert_index(struct ext4_inode_ref *inode_ref,
951 struct ext4_extent_path *path, int at,
952 ext4_lblk_t insert_index,
953 ext4_fsblk_t insert_block, bool set_to_ix)
955 struct ext4_extent_index *ix;
956 struct ext4_extent_path *curp = path + at;
958 struct ext4_extent_header *eh;
960 if (curp->index && insert_index == to_le32(curp->index->first_block))
963 if (to_le16(curp->header->entries_count) ==
964 to_le16(curp->header->max_entries_count))
968 if (curp->index == NULL) {
969 ix = EXT_FIRST_INDEX(eh);
971 } else if (insert_index > to_le32(curp->index->first_block)) {
973 ix = curp->index + 1;
979 if (ix > EXT_MAX_INDEX(eh))
982 len = EXT_LAST_INDEX(eh) - ix + 1;
983 ext4_assert(len >= 0);
985 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_index));
987 ix->first_block = to_le32(insert_index);
988 ext4_idx_store_pblock(ix, insert_block);
989 eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
991 if (ix > EXT_LAST_INDEX(eh)) {
996 err = ext4_ext_dirty(inode_ref, curp);
999 if (err == EOK && set_to_ix) {
1001 curp->p_block = ext4_idx_pblock(ix);
1006 static int ext4_ext_split_node(struct ext4_inode_ref *inode_ref,
1007 struct ext4_extent_path *path, int at,
1008 struct ext4_extent *newext,
1009 struct ext4_extent_path *npath,
1010 bool *ins_right_leaf)
1012 int i, npath_at, ret;
1013 ext4_lblk_t insert_index;
1014 ext4_fsblk_t newblock = 0;
1015 int depth = ext_depth(inode_ref->inode);
1016 npath_at = depth - at;
1018 ext4_assert(at > 0);
1020 if (path[depth].extent != EXT_MAX_EXTENT(path[depth].header))
1021 insert_index = path[depth].extent[1].first_block;
1023 insert_index = newext->first_block;
1025 for (i = depth; i >= at; i--, npath_at--) {
1026 struct ext4_block bh = EXT4_BLOCK_ZERO();
1028 /* FIXME: currently we split at the point after the current
1031 ext4_ext_new_meta_block(inode_ref, path, newext, &ret, 0);
1035 /* For write access.*/
1036 ret = ext4_trans_block_get_noread(inode_ref->fs->bdev, &bh,
1042 /* start copy from next extent */
1043 int m = EXT_MAX_EXTENT(path[i].header) - path[i].extent;
1044 struct ext4_extent_header *neh;
1045 struct ext4_extent *ex;
1046 neh = ext_block_hdr(&bh);
1047 ex = EXT_FIRST_EXTENT(neh);
1048 ext4_ext_init_header(inode_ref, neh, 0);
1050 memmove(ex, path[i].extent + 1,
1051 sizeof(struct ext4_extent) * m);
1052 neh->entries_count =
1053 to_le16(to_le16(neh->entries_count) + m);
1054 path[i].header->entries_count = to_le16(
1055 to_le16(path[i].header->entries_count) - m);
1056 ret = ext4_ext_dirty(inode_ref, path + i);
1060 npath[npath_at].p_block = ext4_ext_pblock(ex);
1061 npath[npath_at].extent = ex;
1063 npath[npath_at].p_block = 0;
1064 npath[npath_at].extent = NULL;
1067 npath[npath_at].depth = to_le16(neh->depth);
1068 npath[npath_at].maxdepth = 0;
1069 npath[npath_at].index = NULL;
1070 npath[npath_at].header = neh;
1071 npath[npath_at].block = bh;
1073 ext4_trans_set_block_dirty(bh.buf);
1075 int m = EXT_MAX_INDEX(path[i].header) - path[i].index;
1076 struct ext4_extent_header *neh;
1077 struct ext4_extent_index *ix;
1078 neh = ext_block_hdr(&bh);
1079 ix = EXT_FIRST_INDEX(neh);
1080 ext4_ext_init_header(inode_ref, neh, depth - i);
1081 ix->first_block = to_le32(insert_index);
1082 ext4_idx_store_pblock(ix,
1083 npath[npath_at + 1].block.lb_id);
1084 neh->entries_count = to_le16(1);
1086 memmove(ix + 1, path[i].index + 1,
1087 sizeof(struct ext4_extent) * m);
1088 neh->entries_count =
1089 to_le16(to_le16(neh->entries_count) + m);
1090 path[i].header->entries_count = to_le16(
1091 to_le16(path[i].header->entries_count) - m);
1092 ret = ext4_ext_dirty(inode_ref, path + i);
1097 npath[npath_at].p_block = ext4_idx_pblock(ix);
1098 npath[npath_at].depth = to_le16(neh->depth);
1099 npath[npath_at].maxdepth = 0;
1100 npath[npath_at].extent = NULL;
1101 npath[npath_at].index = ix;
1102 npath[npath_at].header = neh;
1103 npath[npath_at].block = bh;
1105 ext4_trans_set_block_dirty(bh.buf);
1111 * If newext->first_block can be included into the
1114 if (to_le32(newext->first_block) < insert_index)
1115 *ins_right_leaf = false;
1117 *ins_right_leaf = true;
1119 ret = ext4_ext_insert_index(inode_ref, path, at - 1, insert_index,
1120 npath[0].block.lb_id, *ins_right_leaf);
1125 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
1127 npath_at = depth - at;
1128 while (npath_at >= 0) {
1129 if (npath[npath_at].block.lb_id) {
1130 newblock = npath[npath_at].block.lb_id;
1131 ext4_block_set(inode_ref->fs->bdev,
1132 &npath[npath_at].block);
1133 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
1134 memset(&npath[npath_at].block, 0,
1135 sizeof(struct ext4_block));
1144 * ext4_ext_correct_indexes:
1145 * if leaf gets modified and modified extent is first in the leaf,
1146 * then we have to correct all indexes above.
1148 static int ext4_ext_correct_indexes(struct ext4_inode_ref *inode_ref,
1149 struct ext4_extent_path *path)
1151 struct ext4_extent_header *eh;
1152 int32_t depth = ext_depth(inode_ref->inode);
1153 struct ext4_extent *ex;
1158 eh = path[depth].header;
1159 ex = path[depth].extent;
1161 if (ex == NULL || eh == NULL)
1165 /* there is no tree at all */
1169 if (ex != EXT_FIRST_EXTENT(eh)) {
1170 /* we correct tree if first leaf got modified only */
1175 border = path[depth].extent->first_block;
1176 path[k].index->first_block = border;
1177 err = ext4_ext_dirty(inode_ref, path + k);
1182 /* change all left-side indexes */
1183 if (path[k + 1].index != EXT_FIRST_INDEX(path[k + 1].header))
1185 path[k].index->first_block = border;
1186 err = ext4_ext_dirty(inode_ref, path + k);
1194 static inline bool ext4_ext_can_prepend(struct ext4_extent *ex1,
1195 struct ext4_extent *ex2)
1197 if (ext4_ext_pblock(ex2) + ext4_ext_get_actual_len(ex2) !=
1198 ext4_ext_pblock(ex1))
1201 #ifdef AGGRESSIVE_TEST
1202 if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
1205 if (ext4_ext_is_unwritten(ex1)) {
1206 if (ext4_ext_get_actual_len(ex1) +
1207 ext4_ext_get_actual_len(ex2) >
1208 EXT_UNWRITTEN_MAX_LEN)
1210 } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
1215 if (to_le32(ex2->first_block) + ext4_ext_get_actual_len(ex2) !=
1216 to_le32(ex1->first_block))
1222 static inline bool ext4_ext_can_append(struct ext4_extent *ex1,
1223 struct ext4_extent *ex2)
1225 if (ext4_ext_pblock(ex1) + ext4_ext_get_actual_len(ex1) !=
1226 ext4_ext_pblock(ex2))
1229 #ifdef AGGRESSIVE_TEST
1230 if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
1233 if (ext4_ext_is_unwritten(ex1)) {
1234 if (ext4_ext_get_actual_len(ex1) +
1235 ext4_ext_get_actual_len(ex2) >
1236 EXT_UNWRITTEN_MAX_LEN)
1238 } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
1243 if (to_le32(ex1->first_block) + ext4_ext_get_actual_len(ex1) !=
1244 to_le32(ex2->first_block))
1250 static int ext4_ext_insert_leaf(struct ext4_inode_ref *inode_ref,
1251 struct ext4_extent_path *path, int at,
1252 struct ext4_extent *newext, int flags,
1255 struct ext4_extent_path *curp = path + at;
1256 struct ext4_extent *ex = curp->extent;
1257 int len, err, unwritten;
1258 struct ext4_extent_header *eh;
1260 *need_split = false;
1263 to_le32(newext->first_block) == to_le32(curp->extent->first_block))
1266 if (!(flags & EXT4_EXT_NO_COMBINE)) {
1267 if (curp->extent && ext4_ext_can_append(curp->extent, newext)) {
1268 unwritten = ext4_ext_is_unwritten(curp->extent);
1269 curp->extent->block_count =
1270 to_le16(ext4_ext_get_actual_len(curp->extent) +
1271 ext4_ext_get_actual_len(newext));
1273 ext4_ext_mark_unwritten(curp->extent);
1275 err = ext4_ext_dirty(inode_ref, curp);
1280 ext4_ext_can_prepend(curp->extent, newext)) {
1281 unwritten = ext4_ext_is_unwritten(curp->extent);
1282 curp->extent->first_block = newext->first_block;
1283 curp->extent->block_count =
1284 to_le16(ext4_ext_get_actual_len(curp->extent) +
1285 ext4_ext_get_actual_len(newext));
1287 ext4_ext_mark_unwritten(curp->extent);
1289 err = ext4_ext_dirty(inode_ref, curp);
1294 if (to_le16(curp->header->entries_count) ==
1295 to_le16(curp->header->max_entries_count)) {
1301 if (curp->extent == NULL) {
1302 ex = EXT_FIRST_EXTENT(eh);
1304 } else if (to_le32(newext->first_block) >
1305 to_le32(curp->extent->first_block)) {
1307 ex = curp->extent + 1;
1314 len = EXT_LAST_EXTENT(eh) - ex + 1;
1315 ext4_assert(len >= 0);
1317 memmove(ex + 1, ex, len * sizeof(struct ext4_extent));
1319 if (ex > EXT_MAX_EXTENT(eh)) {
1324 ex->first_block = newext->first_block;
1325 ex->block_count = newext->block_count;
1326 ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
1327 eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
1329 if (ex > EXT_LAST_EXTENT(eh)) {
1334 err = ext4_ext_correct_indexes(inode_ref, path);
1337 err = ext4_ext_dirty(inode_ref, curp);
1342 curp->p_block = ext4_ext_pblock(ex);
1349 * ext4_ext_grow_indepth:
1350 * implements tree growing procedure:
1351 * - allocates new block
1352 * - moves top-level data (index block or leaf) into the new block
1353 * - initializes new top-level, creating index that points to the
1354 * just created block
1356 static int ext4_ext_grow_indepth(struct ext4_inode_ref *inode_ref,
1359 struct ext4_extent_header *neh;
1360 struct ext4_block bh = EXT4_BLOCK_ZERO();
1361 ext4_fsblk_t newblock, goal = 0;
1364 /* Try to prepend new index to old one */
1365 if (ext_depth(inode_ref->inode))
1366 goal = ext4_idx_pblock(
1367 EXT_FIRST_INDEX(ext_inode_hdr(inode_ref->inode)));
1369 goal = ext4_fs_inode_to_goal_block(inode_ref);
1371 newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, &err);
1376 err = ext4_trans_block_get_noread(inode_ref->fs->bdev, &bh, newblock);
1378 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
1382 /* move top-level index/leaf into new block */
1383 memmove(bh.data, inode_ref->inode->blocks,
1384 sizeof(inode_ref->inode->blocks));
1386 /* set size of new block */
1387 neh = ext_block_hdr(&bh);
1388 /* old root could have indexes or leaves
1389 * so calculate e_max right way */
1390 if (ext_depth(inode_ref->inode))
1391 neh->max_entries_count =
1392 to_le16(ext4_ext_space_block_idx(inode_ref));
1394 neh->max_entries_count =
1395 to_le16(ext4_ext_space_block(inode_ref));
1397 neh->magic = to_le16(EXT4_EXTENT_MAGIC);
1398 ext4_extent_block_csum_set(inode_ref, neh);
1400 /* Update top-level index: num,max,pointer */
1401 neh = ext_inode_hdr(inode_ref->inode);
1402 neh->entries_count = to_le16(1);
1403 ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1404 if (neh->depth == 0) {
1405 /* Root extent block becomes index block */
1406 neh->max_entries_count =
1407 to_le16(ext4_ext_space_root_idx(inode_ref));
1408 EXT_FIRST_INDEX(neh)
1409 ->first_block = EXT_FIRST_EXTENT(neh)->first_block;
1411 neh->depth = to_le16(to_le16(neh->depth) + 1);
1413 ext4_trans_set_block_dirty(bh.buf);
1414 inode_ref->dirty = true;
1415 ext4_block_set(inode_ref->fs->bdev, &bh);
1420 static inline void ext4_ext_replace_path(struct ext4_inode_ref *inode_ref,
1421 struct ext4_extent_path *path,
1422 struct ext4_extent_path *newpath,
1425 ext4_ext_drop_refs(inode_ref, path + at, 1);
1426 path[at] = *newpath;
1427 memset(newpath, 0, sizeof(struct ext4_extent_path));
1430 int ext4_ext_insert_extent(struct ext4_inode_ref *inode_ref,
1431 struct ext4_extent_path **ppath,
1432 struct ext4_extent *newext, int flags)
1434 int depth, level = 0, ret = 0;
1435 struct ext4_extent_path *path = *ppath;
1436 struct ext4_extent_path *npath = NULL;
1437 bool ins_right_leaf = false;
1441 depth = ext_depth(inode_ref->inode);
1442 ret = ext4_ext_insert_leaf(inode_ref, path, depth, newext, flags,
1444 if (ret == EIO && need_split == true) {
1446 for (i = depth, level = 0; i >= 0; i--, level++)
1447 if (EXT_HAS_FREE_INDEX(path + i))
1450 /* Do we need to grow the tree? */
1452 ret = ext4_ext_grow_indepth(inode_ref, 0);
1456 ret = ext4_find_extent(
1457 inode_ref, to_le32(newext->first_block), ppath, 0);
1463 * After growing the tree, there should be free space in
1464 * the only child node of the root.
1470 i = depth - (level - 1);
1471 /* We split from leaf to the i-th node */
1473 npath = ext4_calloc(1, sizeof(struct ext4_extent_path) *
1479 ret = ext4_ext_split_node(inode_ref, path, i, newext,
1480 npath, &ins_right_leaf);
1484 while (--level >= 0) {
1486 ext4_ext_replace_path(inode_ref, path,
1489 else if (npath[level].block.lb_id)
1490 ext4_ext_drop_refs(inode_ref,
1500 ext4_ext_drop_refs(inode_ref, path, 0);
1502 while (--level >= 0 && npath) {
1503 if (npath[level].block.lb_id) {
1504 ext4_fsblk_t block = npath[level].block.lb_id;
1505 ext4_ext_free_blocks(inode_ref, block, 1, 0);
1506 ext4_ext_drop_refs(inode_ref, npath + level, 1);
1516 static void ext4_ext_remove_blocks(struct ext4_inode_ref *inode_ref,
1517 struct ext4_extent *ex, ext4_lblk_t from,
1520 ext4_lblk_t len = to - from + 1;
1523 num = from - to_le32(ex->first_block);
1524 start = ext4_ext_pblock(ex) + num;
1525 ext4_dbg(DEBUG_EXTENT,
1526 "Freeing %" PRIu32 " at %" PRIu64 ", %" PRIu32 "\n", from,
1529 ext4_ext_free_blocks(inode_ref, start, len, 0);
1532 static int ext4_ext_remove_idx(struct ext4_inode_ref *inode_ref,
1533 struct ext4_extent_path *path, int32_t depth)
1539 /* free index block */
1540 leaf = ext4_idx_pblock(path[i].index);
1542 if (path[i].index != EXT_LAST_INDEX(path[i].header)) {
1543 ptrdiff_t len = EXT_LAST_INDEX(path[i].header) - path[i].index;
1544 memmove(path[i].index, path[i].index + 1,
1545 len * sizeof(struct ext4_extent_index));
1548 path[i].header->entries_count =
1549 to_le16(to_le16(path[i].header->entries_count) - 1);
1550 err = ext4_ext_dirty(inode_ref, path + i);
1554 ext4_dbg(DEBUG_EXTENT, "IDX: Freeing %" PRIu32 " at %" PRIu64 ", %d\n",
1555 to_le32(path[i].index->first_block), leaf, 1);
1556 ext4_ext_free_blocks(inode_ref, leaf, 1, 0);
1559 * We may need to correct the paths after the first extents/indexes in
1560 * a node being modified.
1562 * We do not need to consider whether there's any extents presenting or
1563 * not, as garbage will be cleared soon.
1566 if (path[i].index != EXT_FIRST_INDEX(path[i].header))
1569 path[i - 1].index->first_block = path[i].index->first_block;
1570 err = ext4_ext_dirty(inode_ref, path + i - 1);
1579 static int ext4_ext_remove_leaf(struct ext4_inode_ref *inode_ref,
1580 struct ext4_extent_path *path, ext4_lblk_t from,
1584 int32_t depth = ext_depth(inode_ref->inode);
1585 struct ext4_extent *ex = path[depth].extent;
1586 struct ext4_extent *start_ex, *ex2 = NULL;
1587 struct ext4_extent_header *eh = path[depth].header;
1590 uint16_t new_entries;
1593 new_entries = to_le16(eh->entries_count);
1594 while (ex <= EXT_LAST_EXTENT(path[depth].header) &&
1595 to_le32(ex->first_block) <= to) {
1596 int32_t new_len = 0;
1598 ext4_lblk_t start, new_start;
1599 ext4_fsblk_t newblock;
1600 new_start = start = to_le32(ex->first_block);
1601 len = ext4_ext_get_actual_len(ex);
1602 newblock = ext4_ext_pblock(ex);
1605 * The position that we start truncation is inside the range of an
1606 * extent. Here we should calculate the new length of that extent and
1607 * may start the removal from the next extent.
1610 len -= from - start;
1611 new_len = from - start;
1617 * The last block to be truncated is inside the range of an
1618 * extent. We need to calculate the new length and the new
1619 * start of the extent.
1621 if (start + len - 1 > to) {
1622 new_len = start + len - 1 - to;
1625 newblock += to + 1 - start;
1630 ext4_ext_remove_blocks(inode_ref, ex, start, start + len - 1);
1632 * Set the first block of the extent if it is presented.
1634 ex->first_block = to_le32(new_start);
1637 * If the new length of the current extent we are working on is
1643 unwritten = ext4_ext_is_unwritten(ex);
1644 ex->block_count = to_le16(new_len);
1645 ext4_ext_store_pblock(ex, newblock);
1647 ext4_ext_mark_unwritten(ex);
1657 * Move any remaining extents to the starting position of the node.
1659 if (ex2 <= EXT_LAST_EXTENT(eh))
1660 memmove(start_ex, ex2, (EXT_LAST_EXTENT(eh) - ex2 + 1) *
1661 sizeof(struct ext4_extent));
1663 eh->entries_count = to_le16(new_entries);
1664 ext4_ext_dirty(inode_ref, path + depth);
1667 * If the extent pointer is pointed to the first extent of the node, and
1668 * there's still extents presenting, we may need to correct the indexes
1671 if (path[depth].extent == EXT_FIRST_EXTENT(eh) && eh->entries_count) {
1672 err = ext4_ext_correct_indexes(inode_ref, path);
1677 /* if this leaf is free, then we should
1678 * remove it from index block above */
1679 if (eh->entries_count == 0 && path[depth].block.lb_id)
1680 err = ext4_ext_remove_idx(inode_ref, path, depth - 1);
1682 path[depth - 1].index++;
1688 * Check if there's more to remove at a specific level.
1690 static bool ext4_ext_more_to_rm(struct ext4_extent_path *path, ext4_lblk_t to)
1692 if (!to_le16(path->header->entries_count))
1695 if (path->index > EXT_LAST_INDEX(path->header))
1698 if (to_le32(path->index->first_block) > to)
1704 int ext4_extent_remove_space(struct ext4_inode_ref *inode_ref, ext4_lblk_t from,
1707 struct ext4_extent_path *path = NULL;
1709 int32_t depth = ext_depth(inode_ref->inode);
1712 ret = ext4_find_extent(inode_ref, from, &path, 0);
1716 if (!path[depth].extent) {
1721 bool in_range = IN_RANGE(from, to_le32(path[depth].extent->first_block),
1722 ext4_ext_get_actual_len(path[depth].extent));
1729 /* If we do remove_space inside the range of an extent */
1730 if ((to_le32(path[depth].extent->first_block) < from) &&
1731 (to < to_le32(path[depth].extent->first_block) +
1732 ext4_ext_get_actual_len(path[depth].extent) - 1)) {
1734 struct ext4_extent *ex = path[depth].extent, newex;
1735 int unwritten = ext4_ext_is_unwritten(ex);
1736 ext4_lblk_t ee_block = to_le32(ex->first_block);
1737 int32_t len = ext4_ext_get_actual_len(ex);
1738 ext4_fsblk_t newblock = to + 1 - ee_block + ext4_ext_pblock(ex);
1740 ex->block_count = to_le16(from - ee_block);
1742 ext4_ext_mark_unwritten(ex);
1744 ext4_ext_dirty(inode_ref, path + depth);
1746 newex.first_block = to_le32(to + 1);
1747 newex.block_count = to_le16(ee_block + len - 1 - to);
1748 ext4_ext_store_pblock(&newex, newblock);
1750 ext4_ext_mark_unwritten(&newex);
1752 ret = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1759 struct ext4_extent_header *eh;
1760 struct ext4_extent *first_ex, *last_ex;
1761 ext4_lblk_t leaf_from, leaf_to;
1762 eh = path[i].header;
1763 ext4_assert(to_le16(eh->entries_count) > 0);
1764 first_ex = EXT_FIRST_EXTENT(eh);
1765 last_ex = EXT_LAST_EXTENT(eh);
1766 leaf_from = to_le32(first_ex->first_block);
1767 leaf_to = to_le32(last_ex->first_block) +
1768 ext4_ext_get_actual_len(last_ex) - 1;
1769 if (leaf_from < from)
1775 ext4_ext_remove_leaf(inode_ref, path, leaf_from,
1777 ext4_ext_drop_refs(inode_ref, path + i, 0);
1782 struct ext4_extent_header *eh;
1783 eh = path[i].header;
1784 if (ext4_ext_more_to_rm(path + i, to)) {
1785 struct ext4_block bh = EXT4_BLOCK_ZERO();
1786 if (path[i + 1].block.lb_id)
1787 ext4_ext_drop_refs(inode_ref, path + i + 1, 0);
1789 ret = read_extent_tree_block(
1790 inode_ref, ext4_idx_pblock(path[i].index),
1791 depth - i - 1, &bh, 0);
1795 path[i].p_block = ext4_idx_pblock(path[i].index);
1796 path[i + 1].block = bh;
1797 path[i + 1].header = ext_block_hdr(&bh);
1798 path[i + 1].depth = depth - i - 1;
1800 path[i + 1].extent =
1801 EXT_FIRST_EXTENT(path[i + 1].header);
1804 EXT_FIRST_INDEX(path[i + 1].header);
1810 * Garbage entries will finally be cleared here.
1812 if (!eh->entries_count)
1813 ret = ext4_ext_remove_idx(inode_ref,
1816 path[i - 1].index++;
1820 ext4_block_set(inode_ref->fs->bdev,
1827 /* TODO: flexible tree reduction should be here */
1828 if (path->header->entries_count == 0) {
1830 * truncate to zero freed all the tree,
1831 * so we need to correct eh_depth
1833 ext_inode_hdr(inode_ref->inode)->depth = 0;
1834 ext_inode_hdr(inode_ref->inode)->max_entries_count =
1835 to_le16(ext4_ext_space_root(inode_ref));
1836 ret = ext4_ext_dirty(inode_ref, path);
1840 ext4_ext_drop_refs(inode_ref, path, 0);
1846 static int ext4_ext_split_extent_at(struct ext4_inode_ref *inode_ref,
1847 struct ext4_extent_path **ppath,
1848 ext4_lblk_t split, uint32_t split_flag)
1850 struct ext4_extent *ex, newex;
1851 ext4_fsblk_t newblock;
1852 ext4_lblk_t ee_block;
1854 int32_t depth = ext_depth(inode_ref->inode);
1857 ex = (*ppath)[depth].extent;
1858 ee_block = to_le32(ex->first_block);
1859 ee_len = ext4_ext_get_actual_len(ex);
1860 newblock = split - ee_block + ext4_ext_pblock(ex);
1862 if (split == ee_block) {
1864 * case b: block @split is the block that the extent begins with
1865 * then we just change the state of the extent, and splitting
1868 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1869 ext4_ext_mark_unwritten(ex);
1871 ext4_ext_mark_initialized(ex);
1873 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1877 ex->block_count = to_le16(split - ee_block);
1878 if (split_flag & EXT4_EXT_MARK_UNWRIT1)
1879 ext4_ext_mark_unwritten(ex);
1881 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1885 newex.first_block = to_le32(split);
1886 newex.block_count = to_le16(ee_len - (split - ee_block));
1887 ext4_ext_store_pblock(&newex, newblock);
1888 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1889 ext4_ext_mark_unwritten(&newex);
1890 err = ext4_ext_insert_extent(inode_ref, ppath, &newex,
1891 EXT4_EXT_NO_COMBINE);
1893 goto restore_extent_len;
1898 ex->block_count = to_le16(ee_len);
1899 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1903 static int ext4_ext_convert_to_initialized(struct ext4_inode_ref *inode_ref,
1904 struct ext4_extent_path **ppath,
1905 ext4_lblk_t split, uint32_t blocks)
1907 int32_t depth = ext_depth(inode_ref->inode), err = EOK;
1908 struct ext4_extent *ex = (*ppath)[depth].extent;
1910 ext4_assert(to_le32(ex->first_block) <= split);
1912 if (split + blocks ==
1913 to_le32(ex->first_block) + ext4_ext_get_actual_len(ex)) {
1914 /* split and initialize right part */
1915 err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1916 EXT4_EXT_MARK_UNWRIT1);
1917 } else if (to_le32(ex->first_block) == split) {
1918 /* split and initialize left part */
1919 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1920 EXT4_EXT_MARK_UNWRIT2);
1922 /* split 1 extent to 3 and initialize the 2nd */
1923 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1924 EXT4_EXT_MARK_UNWRIT1 |
1925 EXT4_EXT_MARK_UNWRIT2);
1927 err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1928 EXT4_EXT_MARK_UNWRIT1);
1935 static ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_extent_path *path)
1939 depth = path->depth;
1941 if (depth == 0 && path->extent == NULL)
1942 return EXT_MAX_BLOCKS;
1944 while (depth >= 0) {
1945 if (depth == path->depth) {
1947 if (path[depth].extent &&
1948 path[depth].extent !=
1949 EXT_LAST_EXTENT(path[depth].header))
1951 path[depth].extent[1].first_block);
1954 if (path[depth].index !=
1955 EXT_LAST_INDEX(path[depth].header))
1957 path[depth].index[1].first_block);
1962 return EXT_MAX_BLOCKS;
1965 static int ext4_ext_zero_unwritten_range(struct ext4_inode_ref *inode_ref,
1967 uint32_t blocks_count)
1971 uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
1972 for (i = 0; i < blocks_count; i++) {
1973 struct ext4_block bh = EXT4_BLOCK_ZERO();
1974 err = ext4_trans_block_get_noread(inode_ref->fs->bdev, &bh,
1979 memset(bh.data, 0, block_size);
1980 ext4_trans_set_block_dirty(bh.buf);
1981 err = ext4_block_set(inode_ref->fs->bdev, &bh);
1988 __unused static void print_path(struct ext4_extent_path *path)
1990 int32_t i = path->depth;
1995 ? (path->extent - EXT_FIRST_EXTENT(path->header))
1999 ? (path->index - EXT_FIRST_INDEX(path->header))
2004 ext4_dbg(DEBUG_EXTENT,
2005 "depth %" PRId32 ", p_block: %" PRIu64 ","
2006 "p_ext offset: %td, p_idx offset: %td\n",
2007 i, path->p_block, a, b);
2013 int ext4_extent_get_blocks(struct ext4_inode_ref *inode_ref, ext4_lblk_t iblock,
2014 uint32_t max_blocks, ext4_fsblk_t *result,
2015 bool create, uint32_t *blocks_count)
2017 struct ext4_extent_path *path = NULL;
2018 struct ext4_extent newex, *ex;
2022 uint32_t allocated = 0;
2024 ext4_fsblk_t newblock;
2032 /* find extent for this block */
2033 err = ext4_find_extent(inode_ref, iblock, &path, 0);
2039 depth = ext_depth(inode_ref->inode);
2042 * consistent leaf must not be empty
2043 * this situations is possible, though, _during_ tree modification
2044 * this is why assert can't be put in ext4_ext_find_extent()
2046 ex = path[depth].extent;
2048 ext4_lblk_t ee_block = to_le32(ex->first_block);
2049 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
2050 uint16_t ee_len = ext4_ext_get_actual_len(ex);
2051 /* if found exent covers block, simple return it */
2052 if (IN_RANGE(iblock, ee_block, ee_len)) {
2053 /* number of remain blocks in the extent */
2054 allocated = ee_len - (iblock - ee_block);
2056 if (!ext4_ext_is_unwritten(ex)) {
2057 newblock = iblock - ee_block + ee_start;
2066 uint32_t zero_range;
2067 zero_range = allocated;
2068 if (zero_range > max_blocks)
2069 zero_range = max_blocks;
2071 newblock = iblock - ee_block + ee_start;
2072 err = ext4_ext_zero_unwritten_range(inode_ref, newblock,
2077 err = ext4_ext_convert_to_initialized(
2078 inode_ref, &path, iblock, zero_range);
2087 * requested block isn't allocated yet
2088 * we couldn't try to create block if create flag is zero
2094 /* find next allocated block so that we know how many
2095 * blocks we can allocate without ovelapping next extent */
2096 next = ext4_ext_next_allocated_block(path);
2097 allocated = next - iblock;
2098 if (allocated > max_blocks)
2099 allocated = max_blocks;
2101 /* allocate new block */
2102 goal = ext4_ext_find_goal(inode_ref, path, iblock);
2103 newblock = ext4_new_meta_blocks(inode_ref, goal, 0, &allocated, &err);
2107 /* try to insert new extent into found leaf and return */
2108 newex.first_block = to_le32(iblock);
2109 ext4_ext_store_pblock(&newex, newblock);
2110 newex.block_count = to_le16(allocated);
2111 err = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
2113 /* free data blocks we just allocated */
2114 ext4_ext_free_blocks(inode_ref, ext4_ext_pblock(&newex),
2115 to_le16(newex.block_count), 0);
2119 /* previous routine could use block we allocated */
2120 newblock = ext4_ext_pblock(&newex);
2123 if (allocated > max_blocks)
2124 allocated = max_blocks;
2130 *blocks_count = allocated;
2134 ext4_ext_drop_refs(inode_ref, path, 0);