Make ext4_xattr & ext4_extents GPL licensed
[lwext4.git] / src / ext4_extent.c
1 /*
2  * Copyright (c) 2017 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2017 Kaho Ng (ngkaho1234@gmail.com)
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License
7  * as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  */
10
11 #include <ext4_config.h>
12 #include <ext4_types.h>
13 #include <ext4_misc.h>
14 #include <ext4_errno.h>
15 #include <ext4_debug.h>
16
17 #include <ext4_blockdev.h>
18 #include <ext4_trans.h>
19 #include <ext4_fs.h>
20 #include <ext4_super.h>
21 #include <ext4_crc32.h>
22 #include <ext4_balloc.h>
23 #include <ext4_extent.h>
24
25 #include <stdlib.h>
26 #include <string.h>
27 #include <inttypes.h>
28 #include <stddef.h>
29
30 /*
31  * used by extent splitting.
32  */
33 #define EXT4_EXT_MARK_UNWRIT1 0x02 /* mark first half unwritten */
34 #define EXT4_EXT_MARK_UNWRIT2 0x04 /* mark second half unwritten */
35 #define EXT4_EXT_DATA_VALID1 0x08  /* first half contains valid data */
36 #define EXT4_EXT_DATA_VALID2 0x10  /* second half contains valid data */
37 #define EXT4_EXT_NO_COMBINE 0x20   /* do not combine two extents */
38
39 #define EXT4_EXT_UNWRITTEN_MASK (1L << 15)
40
41 #define EXT4_EXT_MAX_LEN_WRITTEN (1L << 15)
42 #define EXT4_EXT_MAX_LEN_UNWRITTEN \
43     (EXT4_EXT_MAX_LEN_WRITTEN - 1)
44
45 #define EXT4_EXT_GET_LEN(ex) to_le16((ex)->block_count)
46 #define EXT4_EXT_GET_LEN_UNWRITTEN(ex) \
47     (EXT4_EXT_GET_LEN(ex) &= ~(EXT4_EXT_UNWRITTEN_MASK))
48 #define EXT4_EXT_SET_LEN(ex, count) \
49     ((ex)->block_count = to_le16(count))
50
51 #define EXT4_EXT_IS_UNWRITTEN(ex) \
52     (EXT4_EXT_GET_LEN(ex) > EXT4_EXT_MAX_LEN_WRITTEN)
53 #define EXT4_EXT_SET_UNWRITTEN(ex) \
54     ((ex)->block_count |= to_le16(EXT4_EXT_UNWRITTEN_MASK))
55 #define EXT4_EXT_SET_WRITTEN(ex) \
56     ((ex)->block_count &= ~(to_le16(EXT4_EXT_UNWRITTEN_MASK)))
57
58 /*
59  * Array of ext4_ext_path contains path to some extent.
60  * Creation/lookup routines use it for traversal/splitting/etc.
61  * Truncate uses it to simulate recursive walking.
62  */
63 struct ext4_extent_path {
64     ext4_fsblk_t p_block;
65     struct ext4_block block;
66     int32_t depth;
67     int32_t maxdepth;
68     struct ext4_extent_header *header;
69     struct ext4_extent_index *index;
70     struct ext4_extent *extent;
71 };
72
73
74 #pragma pack(push, 1)
75
76 /*
77  * This is the extent tail on-disk structure.
78  * All other extent structures are 12 bytes long.  It turns out that
79  * block_size % 12 >= 4 for at least all powers of 2 greater than 512, which
80  * covers all valid ext4 block sizes.  Therefore, this tail structure can be
81  * crammed into the end of the block without having to rebalance the tree.
82  */
83 struct ext4_extent_tail
84 {
85     uint32_t et_checksum; /* crc32c(uuid+inum+extent_block) */
86 };
87
88 /*
89  * This is the extent on-disk structure.
90  * It's used at the bottom of the tree.
91  */
92 struct ext4_extent {
93     uint32_t first_block; /* First logical block extent covers */
94     uint16_t block_count; /* Number of blocks covered by extent */
95     uint16_t start_hi;    /* High 16 bits of physical block */
96     uint32_t start_lo;    /* Low 32 bits of physical block */
97 };
98
99 /*
100  * This is index on-disk structure.
101  * It's used at all the levels except the bottom.
102  */
103 struct ext4_extent_index {
104     uint32_t first_block; /* Index covers logical blocks from 'block' */
105
106     /**
107      * Pointer to the physical block of the next
108      * level. leaf or next index could be there
109      * high 16 bits of physical block
110      */
111     uint32_t leaf_lo;
112     uint16_t leaf_hi;
113     uint16_t padding;
114 };
115
116 /*
117  * Each block (leaves and indexes), even inode-stored has header.
118  */
119 struct ext4_extent_header {
120     uint16_t magic;
121     uint16_t entries_count;     /* Number of valid entries */
122     uint16_t max_entries_count; /* Capacity of store in entries */
123     uint16_t depth;             /* Has tree real underlying blocks? */
124     uint32_t generation;    /* generation of the tree */
125 };
126
127 #pragma pack(pop)
128
129
130 #define EXT4_EXTENT_MAGIC 0xF30A
131
132 #define EXT4_EXTENT_FIRST(header)                                              \
133     ((struct ext4_extent *)(((char *)(header)) +                           \
134                 sizeof(struct ext4_extent_header)))
135
136 #define EXT4_EXTENT_FIRST_INDEX(header)                                        \
137     ((struct ext4_extent_index *)(((char *)(header)) +                     \
138                       sizeof(struct ext4_extent_header)))
139
140 /*
141  * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
142  * initialized extent. This is 2^15 and not (2^16 - 1), since we use the
143  * MSB of ee_len field in the extent datastructure to signify if this
144  * particular extent is an initialized extent or an uninitialized (i.e.
145  * preallocated).
146  * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an
147  * uninitialized extent.
148  * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an
149  * uninitialized one. In other words, if MSB of ee_len is set, it is an
150  * uninitialized extent with only one special scenario when ee_len = 0x8000.
151  * In this case we can not have an uninitialized extent of zero length and
152  * thus we make it as a special case of initialized extent with 0x8000 length.
153  * This way we get better extent-to-group alignment for initialized extents.
154  * Hence, the maximum number of blocks we can have in an *initialized*
155  * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767).
156  */
157 #define EXT_INIT_MAX_LEN (1L << 15)
158 #define EXT_UNWRITTEN_MAX_LEN (EXT_INIT_MAX_LEN - 1)
159
160 #define EXT_EXTENT_SIZE sizeof(struct ext4_extent)
161 #define EXT_INDEX_SIZE sizeof(struct ext4_extent_idx)
162
163 #define EXT_FIRST_EXTENT(__hdr__)                                              \
164     ((struct ext4_extent *)(((char *)(__hdr__)) +                          \
165                 sizeof(struct ext4_extent_header)))
166 #define EXT_FIRST_INDEX(__hdr__)                                               \
167     ((struct ext4_extent_index *)(((char *)(__hdr__)) +                    \
168                     sizeof(struct ext4_extent_header)))
169 #define EXT_HAS_FREE_INDEX(__path__)                                           \
170     (to_le16((__path__)->header->entries_count) <                                \
171                     to_le16((__path__)->header->max_entries_count))
172 #define EXT_LAST_EXTENT(__hdr__)                                               \
173     (EXT_FIRST_EXTENT((__hdr__)) + to_le16((__hdr__)->entries_count) - 1)
174 #define EXT_LAST_INDEX(__hdr__)                                                \
175     (EXT_FIRST_INDEX((__hdr__)) + to_le16((__hdr__)->entries_count) - 1)
176 #define EXT_MAX_EXTENT(__hdr__)                                                \
177     (EXT_FIRST_EXTENT((__hdr__)) + to_le16((__hdr__)->max_entries_count) - 1)
178 #define EXT_MAX_INDEX(__hdr__)                                                 \
179     (EXT_FIRST_INDEX((__hdr__)) + to_le16((__hdr__)->max_entries_count) - 1)
180
181 #define EXT4_EXTENT_TAIL_OFFSET(hdr)                                           \
182     (sizeof(struct ext4_extent_header) +                                   \
183      (sizeof(struct ext4_extent) * to_le16((hdr)->max_entries_count)))
184
185
186 /**@brief Get logical number of the block covered by extent.
187  * @param extent Extent to load number from
188  * @return Logical number of the first block covered by extent */
189 static inline uint32_t ext4_extent_get_first_block(struct ext4_extent *extent)
190 {
191     return to_le32(extent->first_block);
192 }
193
194 /**@brief Set logical number of the first block covered by extent.
195  * @param extent Extent to set number to
196  * @param iblock Logical number of the first block covered by extent */
197 static inline void ext4_extent_set_first_block(struct ext4_extent *extent,
198         uint32_t iblock)
199 {
200     extent->first_block = to_le32(iblock);
201 }
202
203 /**@brief Get number of blocks covered by extent.
204  * @param extent Extent to load count from
205  * @return Number of blocks covered by extent */
206 static inline uint16_t ext4_extent_get_block_count(struct ext4_extent *extent)
207 {
208     if (EXT4_EXT_IS_UNWRITTEN(extent))
209         return EXT4_EXT_GET_LEN_UNWRITTEN(extent);
210     else
211         return EXT4_EXT_GET_LEN(extent);
212 }
213 /**@brief Set number of blocks covered by extent.
214  * @param extent Extent to load count from
215  * @param count  Number of blocks covered by extent
216  * @param unwritten Whether the extent is unwritten or not */
217 static inline void ext4_extent_set_block_count(struct ext4_extent *extent,
218                            uint16_t count, bool unwritten)
219 {
220     EXT4_EXT_SET_LEN(extent, count);
221     if (unwritten)
222         EXT4_EXT_SET_UNWRITTEN(extent);
223 }
224
225 /**@brief Get physical number of the first block covered by extent.
226  * @param extent Extent to load number
227  * @return Physical number of the first block covered by extent */
228 static inline uint64_t ext4_extent_get_start(struct ext4_extent *extent)
229 {
230     return ((uint64_t)to_le16(extent->start_hi)) << 32 |
231            ((uint64_t)to_le32(extent->start_lo));
232 }
233
234
235 /**@brief Set physical number of the first block covered by extent.
236  * @param extent Extent to load number
237  * @param fblock Physical number of the first block covered by extent */
238 static inline void ext4_extent_set_start(struct ext4_extent *extent, uint64_t fblock)
239 {
240     extent->start_lo = to_le32((fblock << 32) >> 32);
241     extent->start_hi = to_le16((uint16_t)(fblock >> 32));
242 }
243
244
245 /**@brief Get logical number of the block covered by extent index.
246  * @param index Extent index to load number from
247  * @return Logical number of the first block covered by extent index */
248 static inline uint32_t
249 ext4_extent_index_get_first_block(struct ext4_extent_index *index)
250 {
251     return to_le32(index->first_block);
252 }
253
254 /**@brief Set logical number of the block covered by extent index.
255  * @param index  Extent index to set number to
256  * @param iblock Logical number of the first block covered by extent index */
257 static inline void
258 ext4_extent_index_set_first_block(struct ext4_extent_index *index,
259                   uint32_t iblock)
260 {
261     index->first_block = to_le32(iblock);
262 }
263
264 /**@brief Get physical number of block where the child node is located.
265  * @param index Extent index to load number from
266  * @return Physical number of the block with child node */
267 static inline uint64_t
268 ext4_extent_index_get_leaf(struct ext4_extent_index *index)
269 {
270     return ((uint64_t)to_le16(index->leaf_hi)) << 32 |
271            ((uint64_t)to_le32(index->leaf_lo));
272 }
273
274 /**@brief Set physical number of block where the child node is located.
275  * @param index  Extent index to set number to
276  * @param fblock Ohysical number of the block with child node */
277 static inline void ext4_extent_index_set_leaf(struct ext4_extent_index *index,
278                           uint64_t fblock)
279 {
280     index->leaf_lo = to_le32((fblock << 32) >> 32);
281     index->leaf_hi = to_le16((uint16_t)(fblock >> 32));
282 }
283
284 /**@brief Get magic value from extent header.
285  * @param header Extent header to load value from
286  * @return Magic value of extent header */
287 static inline uint16_t
288 ext4_extent_header_get_magic(struct ext4_extent_header *header)
289 {
290     return to_le16(header->magic);
291 }
292
293 /**@brief Set magic value to extent header.
294  * @param header Extent header to set value to
295  * @param magic  Magic value of extent header */
296 static inline void ext4_extent_header_set_magic(struct ext4_extent_header *header,
297                         uint16_t magic)
298 {
299     header->magic = to_le16(magic);
300 }
301
302 /**@brief Get number of entries from extent header
303  * @param header Extent header to get value from
304  * @return Number of entries covered by extent header */
305 static inline uint16_t
306 ext4_extent_header_get_entries_count(struct ext4_extent_header *header)
307 {
308     return to_le16(header->entries_count);
309 }
310
311 /**@brief Set number of entries to extent header
312  * @param header Extent header to set value to
313  * @param count  Number of entries covered by extent header */
314 static inline void
315 ext4_extent_header_set_entries_count(struct ext4_extent_header *header,
316                      uint16_t count)
317 {
318     header->entries_count = to_le16(count);
319 }
320
321 /**@brief Get maximum number of entries from extent header
322  * @param header Extent header to get value from
323  * @return Maximum number of entries covered by extent header */
324 static inline uint16_t
325 ext4_extent_header_get_max_entries_count(struct ext4_extent_header *header)
326 {
327     return to_le16(header->max_entries_count);
328 }
329
330 /**@brief Set maximum number of entries to extent header
331  * @param header    Extent header to set value to
332  * @param max_count Maximum number of entries covered by extent header */
333 static inline void
334 ext4_extent_header_set_max_entries_count(struct ext4_extent_header *header,
335                           uint16_t max_count)
336 {
337     header->max_entries_count = to_le16(max_count);
338 }
339
340 /**@brief Get depth of extent subtree.
341  * @param header Extent header to get value from
342  * @return Depth of extent subtree */
343 static inline uint16_t
344 ext4_extent_header_get_depth(struct ext4_extent_header *header)
345 {
346     return to_le16(header->depth);
347 }
348
349 /**@brief Set depth of extent subtree.
350  * @param header Extent header to set value to
351  * @param depth  Depth of extent subtree */
352 static inline void
353 ext4_extent_header_set_depth(struct ext4_extent_header *header, uint16_t depth)
354 {
355     header->depth = to_le16(depth);
356 }
357
358 /**@brief Get generation from extent header
359  * @param header Extent header to get value from
360  * @return Generation */
361 static inline uint32_t
362 ext4_extent_header_get_generation(struct ext4_extent_header *header)
363 {
364     return to_le32(header->generation);
365 }
366
367 /**@brief Set generation to extent header
368  * @param header     Extent header to set value to
369  * @param generation Generation */
370 static inline void
371 ext4_extent_header_set_generation(struct ext4_extent_header *header,
372                        uint32_t generation)
373 {
374     header->generation = to_le32(generation);
375 }
376
377 void ext4_extent_tree_init(struct ext4_inode_ref *inode_ref)
378 {
379     /* Initialize extent root header */
380     struct ext4_extent_header *header =
381             ext4_inode_get_extent_header(inode_ref->inode);
382     ext4_extent_header_set_depth(header, 0);
383     ext4_extent_header_set_entries_count(header, 0);
384     ext4_extent_header_set_generation(header, 0);
385     ext4_extent_header_set_magic(header, EXT4_EXTENT_MAGIC);
386
387     uint16_t max_entries = (EXT4_INODE_BLOCKS * sizeof(uint32_t) -
388             sizeof(struct ext4_extent_header)) /
389                     sizeof(struct ext4_extent);
390
391     ext4_extent_header_set_max_entries_count(header, max_entries);
392     inode_ref->dirty  = true;
393 }
394
395
396 static struct ext4_extent_tail *
397 find_ext4_extent_tail(struct ext4_extent_header *eh)
398 {
399         return (struct ext4_extent_tail *)(((char *)eh) +
400                                            EXT4_EXTENT_TAIL_OFFSET(eh));
401 }
402
403 static struct ext4_extent_header *ext_inode_hdr(struct ext4_inode *inode)
404 {
405         return (struct ext4_extent_header *)inode->blocks;
406 }
407
408 static struct ext4_extent_header *ext_block_hdr(struct ext4_block *block)
409 {
410         return (struct ext4_extent_header *)block->data;
411 }
412
413 static uint16_t ext_depth(struct ext4_inode *inode)
414 {
415         return to_le16(ext_inode_hdr(inode)->depth);
416 }
417
418 static uint16_t ext4_ext_get_actual_len(struct ext4_extent *ext)
419 {
420         return (to_le16(ext->block_count) <= EXT_INIT_MAX_LEN
421                     ? to_le16(ext->block_count)
422                     : (to_le16(ext->block_count) - EXT_INIT_MAX_LEN));
423 }
424
425 static void ext4_ext_mark_initialized(struct ext4_extent *ext)
426 {
427         ext->block_count = to_le16(ext4_ext_get_actual_len(ext));
428 }
429
430 static void ext4_ext_mark_unwritten(struct ext4_extent *ext)
431 {
432         ext->block_count |= to_le16(EXT_INIT_MAX_LEN);
433 }
434
435 static int ext4_ext_is_unwritten(struct ext4_extent *ext)
436 {
437         /* Extent with ee_len of 0x8000 is treated as an initialized extent */
438         return (to_le16(ext->block_count) > EXT_INIT_MAX_LEN);
439 }
440
441 /*
442  * ext4_ext_pblock:
443  * combine low and high parts of physical block number into ext4_fsblk_t
444  */
445 static ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
446 {
447         ext4_fsblk_t block;
448
449         block = to_le32(ex->start_lo);
450         block |= ((ext4_fsblk_t)to_le16(ex->start_hi) << 31) << 1;
451         return block;
452 }
453
454 /*
455  * ext4_idx_pblock:
456  * combine low and high parts of a leaf physical block number into ext4_fsblk_t
457  */
458 static ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_index *ix)
459 {
460         ext4_fsblk_t block;
461
462         block = to_le32(ix->leaf_lo);
463         block |= ((ext4_fsblk_t)to_le16(ix->leaf_hi) << 31) << 1;
464         return block;
465 }
466
467 /*
468  * ext4_ext_store_pblock:
469  * stores a large physical block number into an extent struct,
470  * breaking it into parts
471  */
472 static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
473 {
474         ex->start_lo = to_le32((uint32_t)(pb & 0xffffffff));
475         ex->start_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
476 }
477
478 /*
479  * ext4_idx_store_pblock:
480  * stores a large physical block number into an index struct,
481  * breaking it into parts
482  */
483 static void ext4_idx_store_pblock(struct ext4_extent_index *ix, ext4_fsblk_t pb)
484 {
485         ix->leaf_lo = to_le32((uint32_t)(pb & 0xffffffff));
486         ix->leaf_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
487 }
488
489 static int ext4_allocate_single_block(struct ext4_inode_ref *inode_ref,
490                                       ext4_fsblk_t goal, ext4_fsblk_t *blockp)
491 {
492         return ext4_balloc_alloc_block(inode_ref, goal, blockp);
493 }
494
495 static ext4_fsblk_t ext4_new_meta_blocks(struct ext4_inode_ref *inode_ref,
496                                          ext4_fsblk_t goal,
497                                          uint32_t flags __unused,
498                                          uint32_t *count, int *errp)
499 {
500         ext4_fsblk_t block = 0;
501
502         *errp = ext4_allocate_single_block(inode_ref, goal, &block);
503         if (count)
504                 *count = 1;
505         return block;
506 }
507
508 static void ext4_ext_free_blocks(struct ext4_inode_ref *inode_ref,
509                                  ext4_fsblk_t block, uint32_t count,
510                                  uint32_t flags __unused)
511 {
512         ext4_balloc_free_blocks(inode_ref, block, count);
513 }
514
515 static uint16_t ext4_ext_space_block(struct ext4_inode_ref *inode_ref)
516 {
517         uint16_t size;
518         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
519
520         size = (block_size - sizeof(struct ext4_extent_header)) /
521                sizeof(struct ext4_extent);
522 #ifdef AGGRESSIVE_TEST
523         if (size > 6)
524                 size = 6;
525 #endif
526         return size;
527 }
528
529 static uint16_t ext4_ext_space_block_idx(struct ext4_inode_ref *inode_ref)
530 {
531         uint16_t size;
532         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
533
534         size = (block_size - sizeof(struct ext4_extent_header)) /
535                sizeof(struct ext4_extent_index);
536 #ifdef AGGRESSIVE_TEST
537         if (size > 5)
538                 size = 5;
539 #endif
540         return size;
541 }
542
543 static uint16_t ext4_ext_space_root(struct ext4_inode_ref *inode_ref)
544 {
545         uint16_t size;
546
547         size = sizeof(inode_ref->inode->blocks);
548         size -= sizeof(struct ext4_extent_header);
549         size /= sizeof(struct ext4_extent);
550 #ifdef AGGRESSIVE_TEST
551         if (size > 3)
552                 size = 3;
553 #endif
554         return size;
555 }
556
557 static uint16_t ext4_ext_space_root_idx(struct ext4_inode_ref *inode_ref)
558 {
559         uint16_t size;
560
561         size = sizeof(inode_ref->inode->blocks);
562         size -= sizeof(struct ext4_extent_header);
563         size /= sizeof(struct ext4_extent_index);
564 #ifdef AGGRESSIVE_TEST
565         if (size > 4)
566                 size = 4;
567 #endif
568         return size;
569 }
570
571 static uint16_t ext4_ext_max_entries(struct ext4_inode_ref *inode_ref,
572                                      uint32_t depth)
573 {
574         uint16_t max;
575
576         if (depth == ext_depth(inode_ref->inode)) {
577                 if (depth == 0)
578                         max = ext4_ext_space_root(inode_ref);
579                 else
580                         max = ext4_ext_space_root_idx(inode_ref);
581         } else {
582                 if (depth == 0)
583                         max = ext4_ext_space_block(inode_ref);
584                 else
585                         max = ext4_ext_space_block_idx(inode_ref);
586         }
587
588         return max;
589 }
590
591 static ext4_fsblk_t ext4_ext_find_goal(struct ext4_inode_ref *inode_ref,
592                                        struct ext4_extent_path *path,
593                                        ext4_lblk_t block)
594 {
595         if (path) {
596                 uint32_t depth = path->depth;
597                 struct ext4_extent *ex;
598
599                 /*
600                  * Try to predict block placement assuming that we are
601                  * filling in a file which will eventually be
602                  * non-sparse --- i.e., in the case of libbfd writing
603                  * an ELF object sections out-of-order but in a way
604                  * the eventually results in a contiguous object or
605                  * executable file, or some database extending a table
606                  * space file.  However, this is actually somewhat
607                  * non-ideal if we are writing a sparse file such as
608                  * qemu or KVM writing a raw image file that is going
609                  * to stay fairly sparse, since it will end up
610                  * fragmenting the file system's free space.  Maybe we
611                  * should have some hueristics or some way to allow
612                  * userspace to pass a hint to file system,
613                  * especially if the latter case turns out to be
614                  * common.
615                  */
616                 ex = path[depth].extent;
617                 if (ex) {
618                         ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
619                         ext4_lblk_t ext_block = to_le32(ex->first_block);
620
621                         if (block > ext_block)
622                                 return ext_pblk + (block - ext_block);
623                         else
624                                 return ext_pblk - (ext_block - block);
625                 }
626
627                 /* it looks like index is empty;
628                  * try to find starting block from index itself */
629                 if (path[depth].block.lb_id)
630                         return path[depth].block.lb_id;
631         }
632
633         /* OK. use inode's group */
634         return ext4_fs_inode_to_goal_block(inode_ref);
635 }
636
637 /*
638  * Allocation for a meta data block
639  */
640 static ext4_fsblk_t ext4_ext_new_meta_block(struct ext4_inode_ref *inode_ref,
641                                             struct ext4_extent_path *path,
642                                             struct ext4_extent *ex, int *err,
643                                             uint32_t flags)
644 {
645         ext4_fsblk_t goal, newblock;
646
647         goal = ext4_ext_find_goal(inode_ref, path, to_le32(ex->first_block));
648         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, err);
649         return newblock;
650 }
651
652 #if CONFIG_META_CSUM_ENABLE
653 static uint32_t ext4_ext_block_csum(struct ext4_inode_ref *inode_ref,
654                                     struct ext4_extent_header *eh)
655 {
656         uint32_t checksum = 0;
657         struct ext4_sblock *sb = &inode_ref->fs->sb;
658
659         if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
660                 uint32_t ino_index = to_le32(inode_ref->index);
661                 uint32_t ino_gen =
662                     to_le32(ext4_inode_get_generation(inode_ref->inode));
663                 /* First calculate crc32 checksum against fs uuid */
664                 checksum =
665                     ext4_crc32c(EXT4_CRC32_INIT, sb->uuid, sizeof(sb->uuid));
666                 /* Then calculate crc32 checksum against inode number
667                  * and inode generation */
668                 checksum = ext4_crc32c(checksum, &ino_index, sizeof(ino_index));
669                 checksum = ext4_crc32c(checksum, &ino_gen, sizeof(ino_gen));
670                 /* Finally calculate crc32 checksum against
671                  * the entire extent block up to the checksum field */
672                 checksum =
673                     ext4_crc32c(checksum, eh, EXT4_EXTENT_TAIL_OFFSET(eh));
674         }
675         return checksum;
676 }
677 #else
678 #define ext4_ext_block_csum(...) 0
679 #endif
680
681 static void
682 ext4_extent_block_csum_set(struct ext4_inode_ref *inode_ref __unused,
683                            struct ext4_extent_header *eh)
684 {
685         struct ext4_extent_tail *tail;
686
687         tail = find_ext4_extent_tail(eh);
688         tail->et_checksum = to_le32(ext4_ext_block_csum(inode_ref, eh));
689 }
690
691 static int ext4_ext_dirty(struct ext4_inode_ref *inode_ref,
692                           struct ext4_extent_path *path)
693 {
694         if (path->block.lb_id)
695                 ext4_trans_set_block_dirty(path->block.buf);
696         else
697                 inode_ref->dirty = true;
698
699         return EOK;
700 }
701
702 static void ext4_ext_drop_refs(struct ext4_inode_ref *inode_ref,
703                                struct ext4_extent_path *path, bool keep_other)
704 {
705         int32_t depth, i;
706
707         if (!path)
708                 return;
709         if (keep_other)
710                 depth = 0;
711         else
712                 depth = path->depth;
713
714         for (i = 0; i <= depth; i++, path++) {
715                 if (path->block.lb_id) {
716                         if (ext4_bcache_test_flag(path->block.buf, BC_DIRTY))
717                                 ext4_extent_block_csum_set(inode_ref,
718                                                            path->header);
719
720                         ext4_block_set(inode_ref->fs->bdev, &path->block);
721                 }
722         }
723 }
724
725 /*
726  * Check that whether the basic information inside the extent header
727  * is correct or not.
728  */
729 static int ext4_ext_check(struct ext4_inode_ref *inode_ref,
730                           struct ext4_extent_header *eh, uint16_t depth,
731                           ext4_fsblk_t pblk __unused)
732 {
733         struct ext4_extent_tail *tail;
734         struct ext4_sblock *sb = &inode_ref->fs->sb;
735         const char *error_msg;
736         (void)error_msg;
737
738         if (to_le16(eh->magic) != EXT4_EXTENT_MAGIC) {
739                 error_msg = "invalid magic";
740                 goto corrupted;
741         }
742         if (to_le16(eh->depth) != depth) {
743                 error_msg = "unexpected eh_depth";
744                 goto corrupted;
745         }
746         if (eh->max_entries_count == 0) {
747                 error_msg = "invalid eh_max";
748                 goto corrupted;
749         }
750         if (to_le16(eh->entries_count) > to_le16(eh->max_entries_count)) {
751                 error_msg = "invalid eh_entries";
752                 goto corrupted;
753         }
754
755         tail = find_ext4_extent_tail(eh);
756         if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
757                 if (tail->et_checksum !=
758                     to_le32(ext4_ext_block_csum(inode_ref, eh))) {
759                         ext4_dbg(DEBUG_EXTENT,
760                                  DBG_WARN "Extent block checksum failed."
761                                           "Blocknr: %" PRIu64 "\n",
762                                  pblk);
763                 }
764         }
765
766         return EOK;
767
768 corrupted:
769         ext4_dbg(DEBUG_EXTENT, "Bad extents B+ tree block: %s. "
770                                "Blocknr: %" PRId64 "\n",
771                  error_msg, pblk);
772         return EIO;
773 }
774
775 static int read_extent_tree_block(struct ext4_inode_ref *inode_ref,
776                                   ext4_fsblk_t pblk, int32_t depth,
777                                   struct ext4_block *bh,
778                                   uint32_t flags __unused)
779 {
780         int err;
781
782         err = ext4_trans_block_get(inode_ref->fs->bdev, bh, pblk);
783         if (err != EOK)
784                 goto errout;
785
786         err = ext4_ext_check(inode_ref, ext_block_hdr(bh), depth, pblk);
787         if (err != EOK)
788                 goto errout;
789
790         return EOK;
791 errout:
792         if (bh->lb_id)
793                 ext4_block_set(inode_ref->fs->bdev, bh);
794
795         return err;
796 }
797
798 /*
799  * ext4_ext_binsearch_idx:
800  * binary search for the closest index of the given block
801  * the header must be checked before calling this
802  */
803 static void ext4_ext_binsearch_idx(struct ext4_extent_path *path,
804                                    ext4_lblk_t block)
805 {
806         struct ext4_extent_header *eh = path->header;
807         struct ext4_extent_index *r, *l, *m;
808
809         l = EXT_FIRST_INDEX(eh) + 1;
810         r = EXT_LAST_INDEX(eh);
811         while (l <= r) {
812                 m = l + (r - l) / 2;
813                 if (block < to_le32(m->first_block))
814                         r = m - 1;
815                 else
816                         l = m + 1;
817         }
818
819         path->index = l - 1;
820 }
821
822 /*
823  * ext4_ext_binsearch:
824  * binary search for closest extent of the given block
825  * the header must be checked before calling this
826  */
827 static void ext4_ext_binsearch(struct ext4_extent_path *path, ext4_lblk_t block)
828 {
829         struct ext4_extent_header *eh = path->header;
830         struct ext4_extent *r, *l, *m;
831
832         if (eh->entries_count == 0) {
833                 /*
834                  * this leaf is empty:
835                  * we get such a leaf in split/add case
836                  */
837                 return;
838         }
839
840         l = EXT_FIRST_EXTENT(eh) + 1;
841         r = EXT_LAST_EXTENT(eh);
842
843         while (l <= r) {
844                 m = l + (r - l) / 2;
845                 if (block < to_le32(m->first_block))
846                         r = m - 1;
847                 else
848                         l = m + 1;
849         }
850
851         path->extent = l - 1;
852 }
853
854 static int ext4_find_extent(struct ext4_inode_ref *inode_ref, ext4_lblk_t block,
855                             struct ext4_extent_path **orig_path, uint32_t flags)
856 {
857         struct ext4_extent_header *eh;
858         struct ext4_block bh = EXT4_BLOCK_ZERO();
859         ext4_fsblk_t buf_block = 0;
860         struct ext4_extent_path *path = *orig_path;
861         int32_t depth, ppos = 0;
862         int32_t i;
863         int ret;
864
865         eh = ext_inode_hdr(inode_ref->inode);
866         depth = ext_depth(inode_ref->inode);
867
868         if (path) {
869                 ext4_ext_drop_refs(inode_ref, path, 0);
870                 if (depth > path[0].maxdepth) {
871                         ext4_free(path);
872                         *orig_path = path = NULL;
873                 }
874         }
875         if (!path) {
876                 int32_t path_depth = depth + 1;
877                 /* account possible depth increase */
878                 path = ext4_calloc(1, sizeof(struct ext4_extent_path) *
879                                      (path_depth + 1));
880                 if (!path)
881                         return ENOMEM;
882                 path[0].maxdepth = path_depth;
883         }
884         path[0].header = eh;
885         path[0].block = bh;
886
887         i = depth;
888         /* walk through the tree */
889         while (i) {
890                 ext4_ext_binsearch_idx(path + ppos, block);
891                 path[ppos].p_block = ext4_idx_pblock(path[ppos].index);
892                 path[ppos].depth = i;
893                 path[ppos].extent = NULL;
894                 buf_block = path[ppos].p_block;
895
896                 i--;
897                 ppos++;
898                 if (!path[ppos].block.lb_id ||
899                     path[ppos].block.lb_id != buf_block) {
900                         ret = read_extent_tree_block(inode_ref, buf_block, i,
901                                                      &bh, flags);
902                         if (ret != EOK) {
903                                 goto err;
904                         }
905                         if (ppos > depth) {
906                                 ext4_block_set(inode_ref->fs->bdev, &bh);
907                                 ret = EIO;
908                                 goto err;
909                         }
910
911                         eh = ext_block_hdr(&bh);
912                         path[ppos].block = bh;
913                         path[ppos].header = eh;
914                 }
915         }
916
917         path[ppos].depth = i;
918         path[ppos].extent = NULL;
919         path[ppos].index = NULL;
920
921         /* find extent */
922         ext4_ext_binsearch(path + ppos, block);
923         /* if not an empty leaf */
924         if (path[ppos].extent)
925                 path[ppos].p_block = ext4_ext_pblock(path[ppos].extent);
926
927         *orig_path = path;
928
929         ret = EOK;
930         return ret;
931
932 err:
933         ext4_ext_drop_refs(inode_ref, path, 0);
934         ext4_free(path);
935         if (orig_path)
936                 *orig_path = NULL;
937         return ret;
938 }
939
940 static void ext4_ext_init_header(struct ext4_inode_ref *inode_ref,
941                                  struct ext4_extent_header *eh, int32_t depth)
942 {
943         eh->entries_count = 0;
944         eh->max_entries_count = to_le16(ext4_ext_max_entries(inode_ref, depth));
945         eh->magic = to_le16(EXT4_EXTENT_MAGIC);
946         eh->depth = depth;
947 }
948
949 static int ext4_ext_insert_index(struct ext4_inode_ref *inode_ref,
950                                  struct ext4_extent_path *path, int at,
951                                  ext4_lblk_t insert_index,
952                                  ext4_fsblk_t insert_block, bool set_to_ix)
953 {
954         struct ext4_extent_index *ix;
955         struct ext4_extent_path *curp = path + at;
956         int len, err;
957         struct ext4_extent_header *eh;
958
959         if (curp->index && insert_index == to_le32(curp->index->first_block))
960                 return EIO;
961
962         if (to_le16(curp->header->entries_count) ==
963             to_le16(curp->header->max_entries_count))
964                 return EIO;
965
966         eh = curp->header;
967         if (curp->index == NULL) {
968                 ix = EXT_FIRST_INDEX(eh);
969                 curp->index = ix;
970         } else if (insert_index > to_le32(curp->index->first_block)) {
971                 /* insert after */
972                 ix = curp->index + 1;
973         } else {
974                 /* insert before */
975                 ix = curp->index;
976         }
977
978         if (ix > EXT_MAX_INDEX(eh))
979                 return EIO;
980
981         len = EXT_LAST_INDEX(eh) - ix + 1;
982         ext4_assert(len >= 0);
983         if (len > 0)
984                 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_index));
985
986         ix->first_block = to_le32(insert_index);
987         ext4_idx_store_pblock(ix, insert_block);
988         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
989
990         if (ix > EXT_LAST_INDEX(eh)) {
991                 err = EIO;
992                 goto out;
993         }
994
995         err = ext4_ext_dirty(inode_ref, curp);
996
997 out:
998         if (err == EOK && set_to_ix) {
999                 curp->index = ix;
1000                 curp->p_block = ext4_idx_pblock(ix);
1001         }
1002         return err;
1003 }
1004
1005 static int ext4_ext_split_node(struct ext4_inode_ref *inode_ref,
1006                                struct ext4_extent_path *path, int at,
1007                                struct ext4_extent *newext,
1008                                struct ext4_extent_path *npath,
1009                                bool *ins_right_leaf)
1010 {
1011         int i, npath_at, ret;
1012         ext4_lblk_t insert_index;
1013         ext4_fsblk_t newblock = 0;
1014         int depth = ext_depth(inode_ref->inode);
1015         npath_at = depth - at;
1016
1017         ext4_assert(at > 0);
1018
1019         if (path[depth].extent != EXT_MAX_EXTENT(path[depth].header))
1020                 insert_index = path[depth].extent[1].first_block;
1021         else
1022                 insert_index = newext->first_block;
1023
1024         for (i = depth; i >= at; i--, npath_at--) {
1025                 struct ext4_block bh = EXT4_BLOCK_ZERO();
1026
1027                 /* FIXME: currently we split at the point after the current
1028                  * extent. */
1029                 newblock =
1030                     ext4_ext_new_meta_block(inode_ref, path, newext, &ret, 0);
1031                 if (ret != EOK)
1032                         goto cleanup;
1033
1034                 /*  For write access.*/
1035                 ret = ext4_trans_block_get_noread(inode_ref->fs->bdev, &bh,
1036                                                   newblock);
1037                 if (ret != EOK)
1038                         goto cleanup;
1039
1040                 if (i == depth) {
1041                         /* start copy from next extent */
1042                         int m = EXT_MAX_EXTENT(path[i].header) - path[i].extent;
1043                         struct ext4_extent_header *neh;
1044                         struct ext4_extent *ex;
1045                         neh = ext_block_hdr(&bh);
1046                         ex = EXT_FIRST_EXTENT(neh);
1047                         ext4_ext_init_header(inode_ref, neh, 0);
1048                         if (m) {
1049                                 memmove(ex, path[i].extent + 1,
1050                                         sizeof(struct ext4_extent) * m);
1051                                 neh->entries_count =
1052                                     to_le16(to_le16(neh->entries_count) + m);
1053                                 path[i].header->entries_count = to_le16(
1054                                     to_le16(path[i].header->entries_count) - m);
1055                                 ret = ext4_ext_dirty(inode_ref, path + i);
1056                                 if (ret != EOK)
1057                                         goto cleanup;
1058
1059                                 npath[npath_at].p_block = ext4_ext_pblock(ex);
1060                                 npath[npath_at].extent = ex;
1061                         } else {
1062                                 npath[npath_at].p_block = 0;
1063                                 npath[npath_at].extent = NULL;
1064                         }
1065
1066                         npath[npath_at].depth = to_le16(neh->depth);
1067                         npath[npath_at].maxdepth = 0;
1068                         npath[npath_at].index = NULL;
1069                         npath[npath_at].header = neh;
1070                         npath[npath_at].block = bh;
1071
1072                         ext4_trans_set_block_dirty(bh.buf);
1073                 } else {
1074                         int m = EXT_MAX_INDEX(path[i].header) - path[i].index;
1075                         struct ext4_extent_header *neh;
1076                         struct ext4_extent_index *ix;
1077                         neh = ext_block_hdr(&bh);
1078                         ix = EXT_FIRST_INDEX(neh);
1079                         ext4_ext_init_header(inode_ref, neh, depth - i);
1080                         ix->first_block = to_le32(insert_index);
1081                         ext4_idx_store_pblock(ix,
1082                                               npath[npath_at + 1].block.lb_id);
1083                         neh->entries_count = to_le16(1);
1084                         if (m) {
1085                                 memmove(ix + 1, path[i].index + 1,
1086                                         sizeof(struct ext4_extent) * m);
1087                                 neh->entries_count =
1088                                     to_le16(to_le16(neh->entries_count) + m);
1089                                 path[i].header->entries_count = to_le16(
1090                                     to_le16(path[i].header->entries_count) - m);
1091                                 ret = ext4_ext_dirty(inode_ref, path + i);
1092                                 if (ret != EOK)
1093                                         goto cleanup;
1094                         }
1095
1096                         npath[npath_at].p_block = ext4_idx_pblock(ix);
1097                         npath[npath_at].depth = to_le16(neh->depth);
1098                         npath[npath_at].maxdepth = 0;
1099                         npath[npath_at].extent = NULL;
1100                         npath[npath_at].index = ix;
1101                         npath[npath_at].header = neh;
1102                         npath[npath_at].block = bh;
1103
1104                         ext4_trans_set_block_dirty(bh.buf);
1105                 }
1106         }
1107         newblock = 0;
1108
1109         /*
1110          * If newext->first_block can be included into the
1111          * right sub-tree.
1112          */
1113         if (to_le32(newext->first_block) < insert_index)
1114                 *ins_right_leaf = false;
1115         else
1116                 *ins_right_leaf = true;
1117
1118         ret = ext4_ext_insert_index(inode_ref, path, at - 1, insert_index,
1119                                     npath[0].block.lb_id, *ins_right_leaf);
1120
1121 cleanup:
1122         if (ret != EOK) {
1123                 if (newblock)
1124                         ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
1125
1126                 npath_at = depth - at;
1127                 while (npath_at >= 0) {
1128                         if (npath[npath_at].block.lb_id) {
1129                                 newblock = npath[npath_at].block.lb_id;
1130                                 ext4_block_set(inode_ref->fs->bdev,
1131                                                &npath[npath_at].block);
1132                                 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
1133                                 memset(&npath[npath_at].block, 0,
1134                                        sizeof(struct ext4_block));
1135                         }
1136                         npath_at--;
1137                 }
1138         }
1139         return ret;
1140 }
1141
1142 /*
1143  * ext4_ext_correct_indexes:
1144  * if leaf gets modified and modified extent is first in the leaf,
1145  * then we have to correct all indexes above.
1146  */
1147 static int ext4_ext_correct_indexes(struct ext4_inode_ref *inode_ref,
1148                                     struct ext4_extent_path *path)
1149 {
1150         struct ext4_extent_header *eh;
1151         int32_t depth = ext_depth(inode_ref->inode);
1152         struct ext4_extent *ex;
1153         uint32_t border;
1154         int32_t k;
1155         int err = EOK;
1156
1157         eh = path[depth].header;
1158         ex = path[depth].extent;
1159
1160         if (ex == NULL || eh == NULL)
1161                 return EIO;
1162
1163         if (depth == 0) {
1164                 /* there is no tree at all */
1165                 return EOK;
1166         }
1167
1168         if (ex != EXT_FIRST_EXTENT(eh)) {
1169                 /* we correct tree if first leaf got modified only */
1170                 return EOK;
1171         }
1172
1173         k = depth - 1;
1174         border = path[depth].extent->first_block;
1175         path[k].index->first_block = border;
1176         err = ext4_ext_dirty(inode_ref, path + k);
1177         if (err != EOK)
1178                 return err;
1179
1180         while (k--) {
1181                 /* change all left-side indexes */
1182                 if (path[k + 1].index != EXT_FIRST_INDEX(path[k + 1].header))
1183                         break;
1184                 path[k].index->first_block = border;
1185                 err = ext4_ext_dirty(inode_ref, path + k);
1186                 if (err != EOK)
1187                         break;
1188         }
1189
1190         return err;
1191 }
1192
1193 static inline bool ext4_ext_can_prepend(struct ext4_extent *ex1,
1194                                         struct ext4_extent *ex2)
1195 {
1196         if (ext4_ext_pblock(ex2) + ext4_ext_get_actual_len(ex2) !=
1197             ext4_ext_pblock(ex1))
1198                 return 0;
1199
1200 #ifdef AGGRESSIVE_TEST
1201         if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
1202                 return 0;
1203 #else
1204         if (ext4_ext_is_unwritten(ex1)) {
1205                 if (ext4_ext_get_actual_len(ex1) +
1206                         ext4_ext_get_actual_len(ex2) >
1207                     EXT_UNWRITTEN_MAX_LEN)
1208                         return 0;
1209         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
1210                    EXT_INIT_MAX_LEN)
1211                 return 0;
1212 #endif
1213
1214         if (to_le32(ex2->first_block) + ext4_ext_get_actual_len(ex2) !=
1215             to_le32(ex1->first_block))
1216                 return 0;
1217
1218         return 1;
1219 }
1220
1221 static inline bool ext4_ext_can_append(struct ext4_extent *ex1,
1222                                        struct ext4_extent *ex2)
1223 {
1224         if (ext4_ext_pblock(ex1) + ext4_ext_get_actual_len(ex1) !=
1225             ext4_ext_pblock(ex2))
1226                 return 0;
1227
1228 #ifdef AGGRESSIVE_TEST
1229         if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
1230                 return 0;
1231 #else
1232         if (ext4_ext_is_unwritten(ex1)) {
1233                 if (ext4_ext_get_actual_len(ex1) +
1234                         ext4_ext_get_actual_len(ex2) >
1235                     EXT_UNWRITTEN_MAX_LEN)
1236                         return 0;
1237         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
1238                    EXT_INIT_MAX_LEN)
1239                 return 0;
1240 #endif
1241
1242         if (to_le32(ex1->first_block) + ext4_ext_get_actual_len(ex1) !=
1243             to_le32(ex2->first_block))
1244                 return 0;
1245
1246         return 1;
1247 }
1248
1249 static int ext4_ext_insert_leaf(struct ext4_inode_ref *inode_ref,
1250                                 struct ext4_extent_path *path, int at,
1251                                 struct ext4_extent *newext, int flags,
1252                                 bool *need_split)
1253 {
1254         struct ext4_extent_path *curp = path + at;
1255         struct ext4_extent *ex = curp->extent;
1256         int len, err, unwritten;
1257         struct ext4_extent_header *eh;
1258
1259         *need_split = false;
1260
1261         if (curp->extent &&
1262             to_le32(newext->first_block) == to_le32(curp->extent->first_block))
1263                 return EIO;
1264
1265         if (!(flags & EXT4_EXT_NO_COMBINE)) {
1266                 if (curp->extent && ext4_ext_can_append(curp->extent, newext)) {
1267                         unwritten = ext4_ext_is_unwritten(curp->extent);
1268                         curp->extent->block_count =
1269                             to_le16(ext4_ext_get_actual_len(curp->extent) +
1270                                     ext4_ext_get_actual_len(newext));
1271                         if (unwritten)
1272                                 ext4_ext_mark_unwritten(curp->extent);
1273
1274                         err = ext4_ext_dirty(inode_ref, curp);
1275                         goto out;
1276                 }
1277
1278                 if (curp->extent &&
1279                     ext4_ext_can_prepend(curp->extent, newext)) {
1280                         unwritten = ext4_ext_is_unwritten(curp->extent);
1281                         curp->extent->first_block = newext->first_block;
1282                         curp->extent->block_count =
1283                             to_le16(ext4_ext_get_actual_len(curp->extent) +
1284                                     ext4_ext_get_actual_len(newext));
1285                         if (unwritten)
1286                                 ext4_ext_mark_unwritten(curp->extent);
1287
1288                         err = ext4_ext_dirty(inode_ref, curp);
1289                         goto out;
1290                 }
1291         }
1292
1293         if (to_le16(curp->header->entries_count) ==
1294             to_le16(curp->header->max_entries_count)) {
1295                 err = EIO;
1296                 *need_split = true;
1297                 goto out;
1298         } else {
1299                 eh = curp->header;
1300                 if (curp->extent == NULL) {
1301                         ex = EXT_FIRST_EXTENT(eh);
1302                         curp->extent = ex;
1303                 } else if (to_le32(newext->first_block) >
1304                            to_le32(curp->extent->first_block)) {
1305                         /* insert after */
1306                         ex = curp->extent + 1;
1307                 } else {
1308                         /* insert before */
1309                         ex = curp->extent;
1310                 }
1311         }
1312
1313         len = EXT_LAST_EXTENT(eh) - ex + 1;
1314         ext4_assert(len >= 0);
1315         if (len > 0)
1316                 memmove(ex + 1, ex, len * sizeof(struct ext4_extent));
1317
1318         if (ex > EXT_MAX_EXTENT(eh)) {
1319                 err = EIO;
1320                 goto out;
1321         }
1322
1323         ex->first_block = newext->first_block;
1324         ex->block_count = newext->block_count;
1325         ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
1326         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
1327
1328         if (ex > EXT_LAST_EXTENT(eh)) {
1329                 err = EIO;
1330                 goto out;
1331         }
1332
1333         err = ext4_ext_correct_indexes(inode_ref, path);
1334         if (err != EOK)
1335                 goto out;
1336         err = ext4_ext_dirty(inode_ref, curp);
1337
1338 out:
1339         if (err == EOK) {
1340                 curp->extent = ex;
1341                 curp->p_block = ext4_ext_pblock(ex);
1342         }
1343
1344         return err;
1345 }
1346
1347 /*
1348  * ext4_ext_grow_indepth:
1349  * implements tree growing procedure:
1350  * - allocates new block
1351  * - moves top-level data (index block or leaf) into the new block
1352  * - initializes new top-level, creating index that points to the
1353  *   just created block
1354  */
1355 static int ext4_ext_grow_indepth(struct ext4_inode_ref *inode_ref,
1356                                  uint32_t flags)
1357 {
1358         struct ext4_extent_header *neh;
1359         struct ext4_block bh = EXT4_BLOCK_ZERO();
1360         ext4_fsblk_t newblock, goal = 0;
1361         int err = EOK;
1362
1363         /* Try to prepend new index to old one */
1364         if (ext_depth(inode_ref->inode))
1365                 goal = ext4_idx_pblock(
1366                     EXT_FIRST_INDEX(ext_inode_hdr(inode_ref->inode)));
1367         else
1368                 goal = ext4_fs_inode_to_goal_block(inode_ref);
1369
1370         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, &err);
1371         if (newblock == 0)
1372                 return err;
1373
1374         /* # */
1375         err = ext4_trans_block_get_noread(inode_ref->fs->bdev, &bh, newblock);
1376         if (err != EOK) {
1377                 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
1378                 return err;
1379         }
1380
1381         /* move top-level index/leaf into new block */
1382         memmove(bh.data, inode_ref->inode->blocks,
1383                 sizeof(inode_ref->inode->blocks));
1384
1385         /* set size of new block */
1386         neh = ext_block_hdr(&bh);
1387         /* old root could have indexes or leaves
1388          * so calculate e_max right way */
1389         if (ext_depth(inode_ref->inode))
1390                 neh->max_entries_count =
1391                     to_le16(ext4_ext_space_block_idx(inode_ref));
1392         else
1393                 neh->max_entries_count =
1394                     to_le16(ext4_ext_space_block(inode_ref));
1395
1396         neh->magic = to_le16(EXT4_EXTENT_MAGIC);
1397         ext4_extent_block_csum_set(inode_ref, neh);
1398
1399         /* Update top-level index: num,max,pointer */
1400         neh = ext_inode_hdr(inode_ref->inode);
1401         neh->entries_count = to_le16(1);
1402         ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1403         if (neh->depth == 0) {
1404                 /* Root extent block becomes index block */
1405                 neh->max_entries_count =
1406                     to_le16(ext4_ext_space_root_idx(inode_ref));
1407                 EXT_FIRST_INDEX(neh)
1408                     ->first_block = EXT_FIRST_EXTENT(neh)->first_block;
1409         }
1410         neh->depth = to_le16(to_le16(neh->depth) + 1);
1411
1412         ext4_trans_set_block_dirty(bh.buf);
1413         inode_ref->dirty = true;
1414         ext4_block_set(inode_ref->fs->bdev, &bh);
1415
1416         return err;
1417 }
1418
1419 static inline void ext4_ext_replace_path(struct ext4_inode_ref *inode_ref,
1420                                          struct ext4_extent_path *path,
1421                                          struct ext4_extent_path *newpath,
1422                                          int at)
1423 {
1424         ext4_ext_drop_refs(inode_ref, path + at, 1);
1425         path[at] = *newpath;
1426         memset(newpath, 0, sizeof(struct ext4_extent_path));
1427 }
1428
1429 int ext4_ext_insert_extent(struct ext4_inode_ref *inode_ref,
1430                            struct ext4_extent_path **ppath,
1431                            struct ext4_extent *newext, int flags)
1432 {
1433         int depth, level = 0, ret = 0;
1434         struct ext4_extent_path *path = *ppath;
1435         struct ext4_extent_path *npath = NULL;
1436         bool ins_right_leaf = false;
1437         bool need_split;
1438
1439 again:
1440         depth = ext_depth(inode_ref->inode);
1441         ret = ext4_ext_insert_leaf(inode_ref, path, depth, newext, flags,
1442                                    &need_split);
1443         if (ret == EIO && need_split == true) {
1444                 int i;
1445                 for (i = depth, level = 0; i >= 0; i--, level++)
1446                         if (EXT_HAS_FREE_INDEX(path + i))
1447                                 break;
1448
1449                 /* Do we need to grow the tree? */
1450                 if (i < 0) {
1451                         ret = ext4_ext_grow_indepth(inode_ref, 0);
1452                         if (ret != EOK)
1453                                 goto out;
1454
1455                         ret = ext4_find_extent(
1456                             inode_ref, to_le32(newext->first_block), ppath, 0);
1457                         if (ret != EOK)
1458                                 goto out;
1459
1460                         path = *ppath;
1461                         /*
1462                          * After growing the tree, there should be free space in
1463                          * the only child node of the root.
1464                          */
1465                         level--;
1466                         depth++;
1467                 }
1468
1469                 i = depth - (level - 1);
1470                 /* We split from leaf to the i-th node */
1471                 if (level > 0) {
1472                         npath = ext4_calloc(1, sizeof(struct ext4_extent_path) *
1473                                               (level));
1474                         if (!npath) {
1475                                 ret = ENOMEM;
1476                                 goto out;
1477                         }
1478                         ret = ext4_ext_split_node(inode_ref, path, i, newext,
1479                                                   npath, &ins_right_leaf);
1480                         if (ret != EOK)
1481                                 goto out;
1482
1483                         while (--level >= 0) {
1484                                 if (ins_right_leaf)
1485                                         ext4_ext_replace_path(inode_ref, path,
1486                                                               &npath[level],
1487                                                               i + level);
1488                                 else if (npath[level].block.lb_id)
1489                                         ext4_ext_drop_refs(inode_ref,
1490                                                            npath + level, 1);
1491                         }
1492                 }
1493                 goto again;
1494         }
1495
1496 out:
1497         if (ret != EOK) {
1498                 if (path)
1499                         ext4_ext_drop_refs(inode_ref, path, 0);
1500
1501                 while (--level >= 0 && npath) {
1502                         if (npath[level].block.lb_id) {
1503                                 ext4_fsblk_t block = npath[level].block.lb_id;
1504                                 ext4_ext_free_blocks(inode_ref, block, 1, 0);
1505                                 ext4_ext_drop_refs(inode_ref, npath + level, 1);
1506                         }
1507                 }
1508         }
1509         if (npath)
1510                 ext4_free(npath);
1511
1512         return ret;
1513 }
1514
1515 static void ext4_ext_remove_blocks(struct ext4_inode_ref *inode_ref,
1516                                    struct ext4_extent *ex, ext4_lblk_t from,
1517                                    ext4_lblk_t to)
1518 {
1519         ext4_lblk_t len = to - from + 1;
1520         ext4_lblk_t num;
1521         ext4_fsblk_t start;
1522         num = from - to_le32(ex->first_block);
1523         start = ext4_ext_pblock(ex) + num;
1524         ext4_dbg(DEBUG_EXTENT,
1525                  "Freeing %" PRIu32 " at %" PRIu64 ", %" PRIu32 "\n", from,
1526                  start, len);
1527
1528         ext4_ext_free_blocks(inode_ref, start, len, 0);
1529 }
1530
1531 static int ext4_ext_remove_idx(struct ext4_inode_ref *inode_ref,
1532                                struct ext4_extent_path *path, int32_t depth)
1533 {
1534         int err = EOK;
1535         int32_t i = depth;
1536         ext4_fsblk_t leaf;
1537
1538         /* free index block */
1539         leaf = ext4_idx_pblock(path[i].index);
1540
1541         if (path[i].index != EXT_LAST_INDEX(path[i].header)) {
1542                 ptrdiff_t len = EXT_LAST_INDEX(path[i].header) - path[i].index;
1543                 memmove(path[i].index, path[i].index + 1,
1544                         len * sizeof(struct ext4_extent_index));
1545         }
1546
1547         path[i].header->entries_count =
1548             to_le16(to_le16(path[i].header->entries_count) - 1);
1549         err = ext4_ext_dirty(inode_ref, path + i);
1550         if (err != EOK)
1551                 return err;
1552
1553         ext4_dbg(DEBUG_EXTENT, "IDX: Freeing %" PRIu32 " at %" PRIu64 ", %d\n",
1554                  to_le32(path[i].index->first_block), leaf, 1);
1555         ext4_ext_free_blocks(inode_ref, leaf, 1, 0);
1556
1557         /*
1558          * We may need to correct the paths after the first extents/indexes in
1559          * a node being modified.
1560          *
1561          * We do not need to consider whether there's any extents presenting or
1562          * not, as garbage will be cleared soon.
1563          */
1564         while (i > 0) {
1565                 if (path[i].index != EXT_FIRST_INDEX(path[i].header))
1566                         break;
1567
1568                 path[i - 1].index->first_block = path[i].index->first_block;
1569                 err = ext4_ext_dirty(inode_ref, path + i - 1);
1570                 if (err != EOK)
1571                         break;
1572
1573                 i--;
1574         }
1575         return err;
1576 }
1577
1578 static int ext4_ext_remove_leaf(struct ext4_inode_ref *inode_ref,
1579                                 struct ext4_extent_path *path, ext4_lblk_t from,
1580                                 ext4_lblk_t to)
1581 {
1582
1583         int32_t depth = ext_depth(inode_ref->inode);
1584         struct ext4_extent *ex = path[depth].extent;
1585         struct ext4_extent *start_ex, *ex2 = NULL;
1586         struct ext4_extent_header *eh = path[depth].header;
1587         int32_t len;
1588         int err = EOK;
1589         uint16_t new_entries;
1590
1591         start_ex = ex;
1592         new_entries = to_le16(eh->entries_count);
1593         while (ex <= EXT_LAST_EXTENT(path[depth].header) &&
1594                to_le32(ex->first_block) <= to) {
1595                 int32_t new_len = 0;
1596                 int unwritten;
1597                 ext4_lblk_t start, new_start;
1598                 ext4_fsblk_t newblock;
1599                 new_start = start = to_le32(ex->first_block);
1600                 len = ext4_ext_get_actual_len(ex);
1601                 newblock = ext4_ext_pblock(ex);
1602                 /*
1603                  * The 1st case:
1604                  *   The position that we start truncation is inside the range of an
1605                  *   extent. Here we should calculate the new length of that extent and
1606                  *   may start the removal from the next extent.
1607                  */
1608                 if (start < from) {
1609                         len -= from - start;
1610                         new_len = from - start;
1611                         start = from;
1612                         start_ex++;
1613                 } else {
1614                         /*
1615                          * The second case:
1616                          *   The last block to be truncated is inside the range of an
1617                          *   extent. We need to calculate the new length and the new
1618                          *   start of the extent.
1619                          */
1620                         if (start + len - 1 > to) {
1621                                 new_len = start + len - 1 - to;
1622                                 len -= new_len;
1623                                 new_start = to + 1;
1624                                 newblock += to + 1 - start;
1625                                 ex2 = ex;
1626                         }
1627                 }
1628
1629                 ext4_ext_remove_blocks(inode_ref, ex, start, start + len - 1);
1630                 /*
1631                  * Set the first block of the extent if it is presented.
1632                  */
1633                 ex->first_block = to_le32(new_start);
1634
1635                 /*
1636                  * If the new length of the current extent we are working on is
1637                  * zero, remove it.
1638                  */
1639                 if (!new_len)
1640                         new_entries--;
1641                 else {
1642                         unwritten = ext4_ext_is_unwritten(ex);
1643                         ex->block_count = to_le16(new_len);
1644                         ext4_ext_store_pblock(ex, newblock);
1645                         if (unwritten)
1646                                 ext4_ext_mark_unwritten(ex);
1647                 }
1648
1649                 ex += 1;
1650         }
1651
1652         if (ex2 == NULL)
1653                 ex2 = ex;
1654
1655         /*
1656          * Move any remaining extents to the starting position of the node.
1657          */
1658         if (ex2 <= EXT_LAST_EXTENT(eh))
1659                 memmove(start_ex, ex2, (EXT_LAST_EXTENT(eh) - ex2 + 1) *
1660                                            sizeof(struct ext4_extent));
1661
1662         eh->entries_count = to_le16(new_entries);
1663         ext4_ext_dirty(inode_ref, path + depth);
1664
1665         /*
1666          * If the extent pointer is pointed to the first extent of the node, and
1667          * there's still extents presenting, we may need to correct the indexes
1668          * of the paths.
1669          */
1670         if (path[depth].extent == EXT_FIRST_EXTENT(eh) && eh->entries_count) {
1671                 err = ext4_ext_correct_indexes(inode_ref, path);
1672                 if (err != EOK)
1673                         return err;
1674         }
1675
1676         /* if this leaf is free, then we should
1677          * remove it from index block above */
1678         if (eh->entries_count == 0 && path[depth].block.lb_id)
1679                 err = ext4_ext_remove_idx(inode_ref, path, depth - 1);
1680         else if (depth > 0)
1681                 path[depth - 1].index++;
1682
1683         return err;
1684 }
1685
1686 /*
1687  * Check if there's more to remove at a specific level.
1688  */
1689 static bool ext4_ext_more_to_rm(struct ext4_extent_path *path, ext4_lblk_t to)
1690 {
1691         if (!to_le16(path->header->entries_count))
1692                 return false;
1693
1694         if (path->index > EXT_LAST_INDEX(path->header))
1695                 return false;
1696
1697         if (to_le32(path->index->first_block) > to)
1698                 return false;
1699
1700         return true;
1701 }
1702
1703 int ext4_extent_remove_space(struct ext4_inode_ref *inode_ref, ext4_lblk_t from,
1704                              ext4_lblk_t to)
1705 {
1706         struct ext4_extent_path *path = NULL;
1707         int ret = EOK;
1708         int32_t depth = ext_depth(inode_ref->inode);
1709         int32_t i;
1710
1711         ret = ext4_find_extent(inode_ref, from, &path, 0);
1712         if (ret != EOK)
1713                 goto out;
1714
1715         if (!path[depth].extent) {
1716                 ret = EOK;
1717                 goto out;
1718         }
1719
1720         bool in_range = IN_RANGE(from, to_le32(path[depth].extent->first_block),
1721                                  ext4_ext_get_actual_len(path[depth].extent));
1722
1723         if (!in_range) {
1724                 ret = EOK;
1725                 goto out;
1726         }
1727
1728         /* If we do remove_space inside the range of an extent */
1729         if ((to_le32(path[depth].extent->first_block) < from) &&
1730             (to < to_le32(path[depth].extent->first_block) +
1731                       ext4_ext_get_actual_len(path[depth].extent) - 1)) {
1732
1733                 struct ext4_extent *ex = path[depth].extent, newex;
1734                 int unwritten = ext4_ext_is_unwritten(ex);
1735                 ext4_lblk_t ee_block = to_le32(ex->first_block);
1736                 int32_t len = ext4_ext_get_actual_len(ex);
1737                 ext4_fsblk_t newblock = to + 1 - ee_block + ext4_ext_pblock(ex);
1738
1739                 ex->block_count = to_le16(from - ee_block);
1740                 if (unwritten)
1741                         ext4_ext_mark_unwritten(ex);
1742
1743                 ext4_ext_dirty(inode_ref, path + depth);
1744
1745                 newex.first_block = to_le32(to + 1);
1746                 newex.block_count = to_le16(ee_block + len - 1 - to);
1747                 ext4_ext_store_pblock(&newex, newblock);
1748                 if (unwritten)
1749                         ext4_ext_mark_unwritten(&newex);
1750
1751                 ret = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1752                 goto out;
1753         }
1754
1755         i = depth;
1756         while (i >= 0) {
1757                 if (i == depth) {
1758                         struct ext4_extent_header *eh;
1759                         struct ext4_extent *first_ex, *last_ex;
1760                         ext4_lblk_t leaf_from, leaf_to;
1761                         eh = path[i].header;
1762                         ext4_assert(to_le16(eh->entries_count) > 0);
1763                         first_ex = EXT_FIRST_EXTENT(eh);
1764                         last_ex = EXT_LAST_EXTENT(eh);
1765                         leaf_from = to_le32(first_ex->first_block);
1766                         leaf_to = to_le32(last_ex->first_block) +
1767                                   ext4_ext_get_actual_len(last_ex) - 1;
1768                         if (leaf_from < from)
1769                                 leaf_from = from;
1770
1771                         if (leaf_to > to)
1772                                 leaf_to = to;
1773
1774                         ext4_ext_remove_leaf(inode_ref, path, leaf_from,
1775                                              leaf_to);
1776                         ext4_ext_drop_refs(inode_ref, path + i, 0);
1777                         i--;
1778                         continue;
1779                 }
1780
1781                 struct ext4_extent_header *eh;
1782                 eh = path[i].header;
1783                 if (ext4_ext_more_to_rm(path + i, to)) {
1784                         struct ext4_block bh = EXT4_BLOCK_ZERO();
1785                         if (path[i + 1].block.lb_id)
1786                                 ext4_ext_drop_refs(inode_ref, path + i + 1, 0);
1787
1788                         ret = read_extent_tree_block(
1789                             inode_ref, ext4_idx_pblock(path[i].index),
1790                             depth - i - 1, &bh, 0);
1791                         if (ret != EOK)
1792                                 goto out;
1793
1794                         path[i].p_block = ext4_idx_pblock(path[i].index);
1795                         path[i + 1].block = bh;
1796                         path[i + 1].header = ext_block_hdr(&bh);
1797                         path[i + 1].depth = depth - i - 1;
1798                         if (i + 1 == depth)
1799                                 path[i + 1].extent =
1800                                     EXT_FIRST_EXTENT(path[i + 1].header);
1801                         else
1802                                 path[i + 1].index =
1803                                     EXT_FIRST_INDEX(path[i + 1].header);
1804
1805                         i++;
1806                 } else {
1807                         if (i > 0) {
1808                                 /*
1809                                  * Garbage entries will finally be cleared here.
1810                                  */
1811                                 if (!eh->entries_count)
1812                                         ret = ext4_ext_remove_idx(inode_ref,
1813                                                                   path, i - 1);
1814                                 else
1815                                         path[i - 1].index++;
1816                         }
1817
1818                         if (i)
1819                                 ext4_block_set(inode_ref->fs->bdev,
1820                                                &path[i].block);
1821
1822                         i--;
1823                 }
1824         }
1825
1826         /* TODO: flexible tree reduction should be here */
1827         if (path->header->entries_count == 0) {
1828                 /*
1829                  * truncate to zero freed all the tree,
1830                  * so we need to correct eh_depth
1831                  */
1832                 ext_inode_hdr(inode_ref->inode)->depth = 0;
1833                 ext_inode_hdr(inode_ref->inode)->max_entries_count =
1834                     to_le16(ext4_ext_space_root(inode_ref));
1835                 ret = ext4_ext_dirty(inode_ref, path);
1836         }
1837
1838 out:
1839         ext4_ext_drop_refs(inode_ref, path, 0);
1840         ext4_free(path);
1841         path = NULL;
1842         return ret;
1843 }
1844
1845 static int ext4_ext_split_extent_at(struct ext4_inode_ref *inode_ref,
1846                                     struct ext4_extent_path **ppath,
1847                                     ext4_lblk_t split, uint32_t split_flag)
1848 {
1849         struct ext4_extent *ex, newex;
1850         ext4_fsblk_t newblock;
1851         ext4_lblk_t ee_block;
1852         int32_t ee_len;
1853         int32_t depth = ext_depth(inode_ref->inode);
1854         int err = EOK;
1855
1856         ex = (*ppath)[depth].extent;
1857         ee_block = to_le32(ex->first_block);
1858         ee_len = ext4_ext_get_actual_len(ex);
1859         newblock = split - ee_block + ext4_ext_pblock(ex);
1860
1861         if (split == ee_block) {
1862                 /*
1863                  * case b: block @split is the block that the extent begins with
1864                  * then we just change the state of the extent, and splitting
1865                  * is not needed.
1866                  */
1867                 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1868                         ext4_ext_mark_unwritten(ex);
1869                 else
1870                         ext4_ext_mark_initialized(ex);
1871
1872                 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1873                 goto out;
1874         }
1875
1876         ex->block_count = to_le16(split - ee_block);
1877         if (split_flag & EXT4_EXT_MARK_UNWRIT1)
1878                 ext4_ext_mark_unwritten(ex);
1879
1880         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1881         if (err != EOK)
1882                 goto out;
1883
1884         newex.first_block = to_le32(split);
1885         newex.block_count = to_le16(ee_len - (split - ee_block));
1886         ext4_ext_store_pblock(&newex, newblock);
1887         if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1888                 ext4_ext_mark_unwritten(&newex);
1889         err = ext4_ext_insert_extent(inode_ref, ppath, &newex,
1890                                      EXT4_EXT_NO_COMBINE);
1891         if (err != EOK)
1892                 goto restore_extent_len;
1893
1894 out:
1895         return err;
1896 restore_extent_len:
1897         ex->block_count = to_le16(ee_len);
1898         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1899         return err;
1900 }
1901
1902 static int ext4_ext_convert_to_initialized(struct ext4_inode_ref *inode_ref,
1903                                            struct ext4_extent_path **ppath,
1904                                            ext4_lblk_t split, uint32_t blocks)
1905 {
1906         int32_t depth = ext_depth(inode_ref->inode), err = EOK;
1907         struct ext4_extent *ex = (*ppath)[depth].extent;
1908
1909         ext4_assert(to_le32(ex->first_block) <= split);
1910
1911         if (split + blocks ==
1912             to_le32(ex->first_block) + ext4_ext_get_actual_len(ex)) {
1913                 /* split and initialize right part */
1914                 err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1915                                                EXT4_EXT_MARK_UNWRIT1);
1916         } else if (to_le32(ex->first_block) == split) {
1917                 /* split and initialize left part */
1918                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1919                                                EXT4_EXT_MARK_UNWRIT2);
1920         } else {
1921                 /* split 1 extent to 3 and initialize the 2nd */
1922                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1923                                                EXT4_EXT_MARK_UNWRIT1 |
1924                                                    EXT4_EXT_MARK_UNWRIT2);
1925                 if (err == EOK) {
1926                         err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1927                                                        EXT4_EXT_MARK_UNWRIT1);
1928                 }
1929         }
1930
1931         return err;
1932 }
1933
1934 static ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_extent_path *path)
1935 {
1936         int32_t depth;
1937
1938         depth = path->depth;
1939
1940         if (depth == 0 && path->extent == NULL)
1941                 return EXT_MAX_BLOCKS;
1942
1943         while (depth >= 0) {
1944                 if (depth == path->depth) {
1945                         /* leaf */
1946                         if (path[depth].extent &&
1947                             path[depth].extent !=
1948                                 EXT_LAST_EXTENT(path[depth].header))
1949                                 return to_le32(
1950                                     path[depth].extent[1].first_block);
1951                 } else {
1952                         /* index */
1953                         if (path[depth].index !=
1954                             EXT_LAST_INDEX(path[depth].header))
1955                                 return to_le32(
1956                                     path[depth].index[1].first_block);
1957                 }
1958                 depth--;
1959         }
1960
1961         return EXT_MAX_BLOCKS;
1962 }
1963
1964 static int ext4_ext_zero_unwritten_range(struct ext4_inode_ref *inode_ref,
1965                                          ext4_fsblk_t block,
1966                                          uint32_t blocks_count)
1967 {
1968         int err = EOK;
1969         uint32_t i;
1970         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
1971         for (i = 0; i < blocks_count; i++) {
1972                 struct ext4_block bh = EXT4_BLOCK_ZERO();
1973                 err = ext4_trans_block_get_noread(inode_ref->fs->bdev, &bh,
1974                                                   block + i);
1975                 if (err != EOK)
1976                         break;
1977
1978                 memset(bh.data, 0, block_size);
1979                 ext4_trans_set_block_dirty(bh.buf);
1980                 err = ext4_block_set(inode_ref->fs->bdev, &bh);
1981                 if (err != EOK)
1982                         break;
1983         }
1984         return err;
1985 }
1986
1987 __unused static void print_path(struct ext4_extent_path *path)
1988 {
1989         int32_t i = path->depth;
1990         while (i >= 0) {
1991
1992                 ptrdiff_t a =
1993                     (path->extent)
1994                         ? (path->extent - EXT_FIRST_EXTENT(path->header))
1995                         : 0;
1996                 ptrdiff_t b =
1997                     (path->index)
1998                         ? (path->index - EXT_FIRST_INDEX(path->header))
1999                         : 0;
2000
2001                 (void)a;
2002                 (void)b;
2003                 ext4_dbg(DEBUG_EXTENT,
2004                          "depth %" PRId32 ", p_block: %" PRIu64 ","
2005                          "p_ext offset: %td, p_idx offset: %td\n",
2006                          i, path->p_block, a, b);
2007                 i--;
2008                 path++;
2009         }
2010 }
2011
2012 int ext4_extent_get_blocks(struct ext4_inode_ref *inode_ref, ext4_lblk_t iblock,
2013                            uint32_t max_blocks, ext4_fsblk_t *result,
2014                            bool create, uint32_t *blocks_count)
2015 {
2016         struct ext4_extent_path *path = NULL;
2017         struct ext4_extent newex, *ex;
2018         ext4_fsblk_t goal;
2019         int err = EOK;
2020         int32_t depth;
2021         uint32_t allocated = 0;
2022         ext4_lblk_t next;
2023         ext4_fsblk_t newblock;
2024
2025         if (result)
2026                 *result = 0;
2027
2028         if (blocks_count)
2029                 *blocks_count = 0;
2030
2031         /* find extent for this block */
2032         err = ext4_find_extent(inode_ref, iblock, &path, 0);
2033         if (err != EOK) {
2034                 path = NULL;
2035                 goto out2;
2036         }
2037
2038         depth = ext_depth(inode_ref->inode);
2039
2040         /*
2041          * consistent leaf must not be empty
2042          * this situations is possible, though, _during_ tree modification
2043          * this is why assert can't be put in ext4_ext_find_extent()
2044          */
2045         ex = path[depth].extent;
2046         if (ex) {
2047                 ext4_lblk_t ee_block = to_le32(ex->first_block);
2048                 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
2049                 uint16_t ee_len = ext4_ext_get_actual_len(ex);
2050                 /* if found exent covers block, simple return it */
2051                 if (IN_RANGE(iblock, ee_block, ee_len)) {
2052                         /* number of remain blocks in the extent */
2053                         allocated = ee_len - (iblock - ee_block);
2054
2055                         if (!ext4_ext_is_unwritten(ex)) {
2056                                 newblock = iblock - ee_block + ee_start;
2057                                 goto out;
2058                         }
2059
2060                         if (!create) {
2061                                 newblock = 0;
2062                                 goto out;
2063                         }
2064
2065                         uint32_t zero_range;
2066                         zero_range = allocated;
2067                         if (zero_range > max_blocks)
2068                                 zero_range = max_blocks;
2069
2070                         newblock = iblock - ee_block + ee_start;
2071                         err = ext4_ext_zero_unwritten_range(inode_ref, newblock,
2072                                                             zero_range);
2073                         if (err != EOK)
2074                                 goto out2;
2075
2076                         err = ext4_ext_convert_to_initialized(
2077                             inode_ref, &path, iblock, zero_range);
2078                         if (err != EOK)
2079                                 goto out2;
2080
2081                         goto out;
2082                 }
2083         }
2084
2085         /*
2086          * requested block isn't allocated yet
2087          * we couldn't try to create block if create flag is zero
2088          */
2089         if (!create) {
2090                 goto out2;
2091         }
2092
2093         /* find next allocated block so that we know how many
2094          * blocks we can allocate without ovelapping next extent */
2095         next = ext4_ext_next_allocated_block(path);
2096         allocated = next - iblock;
2097         if (allocated > max_blocks)
2098                 allocated = max_blocks;
2099
2100         /* allocate new block */
2101         goal = ext4_ext_find_goal(inode_ref, path, iblock);
2102         newblock = ext4_new_meta_blocks(inode_ref, goal, 0, &allocated, &err);
2103         if (!newblock)
2104                 goto out2;
2105
2106         /* try to insert new extent into found leaf and return */
2107         newex.first_block = to_le32(iblock);
2108         ext4_ext_store_pblock(&newex, newblock);
2109         newex.block_count = to_le16(allocated);
2110         err = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
2111         if (err != EOK) {
2112                 /* free data blocks we just allocated */
2113                 ext4_ext_free_blocks(inode_ref, ext4_ext_pblock(&newex),
2114                                      to_le16(newex.block_count), 0);
2115                 goto out2;
2116         }
2117
2118         /* previous routine could use block we allocated */
2119         newblock = ext4_ext_pblock(&newex);
2120
2121 out:
2122         if (allocated > max_blocks)
2123                 allocated = max_blocks;
2124
2125         if (result)
2126                 *result = newblock;
2127
2128         if (blocks_count)
2129                 *blocks_count = allocated;
2130
2131 out2:
2132         if (path) {
2133                 ext4_ext_drop_refs(inode_ref, path, 0);
2134                 ext4_free(path);
2135         }
2136
2137         return err;
2138 }