2 * Copyright (c) 2013 Grzegorz Kostka (kostka.grzegorz@gmail.com)
6 * Copyright (c) 2012 Martin Sucha
7 * Copyright (c) 2012 Frantisek Princ
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
14 * - Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * - Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * - The name of the author may not be used to endorse or promote products
20 * derived from this software without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 /** @addtogroup lwext4
39 * @brief Ext4 data structure definitions.
49 #include "ext4_config.h"
50 #include "ext4_blockdev.h"
56 #define EXT4_CHECKSUM_CRC32C 1
61 * Structure of the super block
64 uint32_t inodes_count; /* I-nodes count */
65 uint32_t blocks_count_lo; /* Blocks count */
66 uint32_t reserved_blocks_count_lo; /* Reserved blocks count */
67 uint32_t free_blocks_count_lo; /* Free blocks count */
68 uint32_t free_inodes_count; /* Free inodes count */
69 uint32_t first_data_block; /* First Data Block */
70 uint32_t log_block_size; /* Block size */
71 uint32_t log_cluster_size; /* Obsoleted fragment size */
72 uint32_t blocks_per_group; /* Number of blocks per group */
73 uint32_t frags_per_group; /* Obsoleted fragments per group */
74 uint32_t inodes_per_group; /* Number of inodes per group */
75 uint32_t mount_time; /* Mount time */
76 uint32_t write_time; /* Write time */
77 uint16_t mount_count; /* Mount count */
78 uint16_t max_mount_count; /* Maximal mount count */
79 uint16_t magic; /* Magic signature */
80 uint16_t state; /* File system state */
81 uint16_t errors; /* Behavior when detecting errors */
82 uint16_t minor_rev_level; /* Minor revision level */
83 uint32_t last_check_time; /* Time of last check */
84 uint32_t check_interval; /* Maximum time between checks */
85 uint32_t creator_os; /* Creator OS */
86 uint32_t rev_level; /* Revision level */
87 uint16_t def_resuid; /* Default uid for reserved blocks */
88 uint16_t def_resgid; /* Default gid for reserved blocks */
90 /* Fields for EXT4_DYNAMIC_REV superblocks only. */
91 uint32_t first_inode; /* First non-reserved inode */
92 uint16_t inode_size; /* Size of inode structure */
93 uint16_t block_group_index; /* Block group index of this superblock */
94 uint32_t features_compatible; /* Compatible feature set */
95 uint32_t features_incompatible; /* Incompatible feature set */
96 uint32_t features_read_only; /* Readonly-compatible feature set */
97 uint8_t uuid[UUID_SIZE]; /* 128-bit uuid for volume */
98 char volume_name[16]; /* Volume name */
99 char last_mounted[64]; /* Directory where last mounted */
100 uint32_t algorithm_usage_bitmap; /* For compression */
103 * Performance hints. Directory preallocation should only
104 * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
106 uint8_t s_prealloc_blocks; /* Number of blocks to try to preallocate */
107 uint8_t s_prealloc_dir_blocks; /* Number to preallocate for dirs */
108 uint16_t s_reserved_gdt_blocks; /* Per group desc for online growth */
111 * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set.
113 uint8_t journal_uuid[UUID_SIZE]; /* UUID of journal superblock */
114 uint32_t journal_inode_number; /* Inode number of journal file */
115 uint32_t journal_dev; /* Device number of journal file */
116 uint32_t last_orphan; /* Head of list of inodes to delete */
117 uint32_t hash_seed[4]; /* HTREE hash seed */
118 uint8_t default_hash_version; /* Default hash version to use */
119 uint8_t journal_backup_type;
120 uint16_t desc_size; /* Size of group descriptor */
121 uint32_t default_mount_opts; /* Default mount options */
122 uint32_t first_meta_bg; /* First metablock block group */
123 uint32_t mkfs_time; /* When the filesystem was created */
124 uint32_t journal_blocks[17]; /* Backup of the journal inode */
126 /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
127 uint32_t blocks_count_hi; /* Blocks count */
128 uint32_t reserved_blocks_count_hi; /* Reserved blocks count */
129 uint32_t free_blocks_count_hi; /* Free blocks count */
130 uint16_t min_extra_isize; /* All inodes have at least # bytes */
131 uint16_t want_extra_isize; /* New inodes should reserve # bytes */
132 uint32_t flags; /* Miscellaneous flags */
133 uint16_t raid_stride; /* RAID stride */
134 uint16_t mmp_interval; /* # seconds to wait in MMP checking */
135 uint64_t mmp_block; /* Block for multi-mount protection */
136 uint32_t raid_stripe_width; /* Blocks on all data disks (N * stride) */
137 uint8_t log_groups_per_flex; /* FLEX_BG group size */
138 uint8_t checksum_type;
139 uint16_t reserved_pad;
140 uint64_t kbytes_written; /* Number of lifetime kilobytes written */
141 uint32_t snapshot_inum; /* I-node number of active snapshot */
142 uint32_t snapshot_id; /* Sequential ID of active snapshot */
144 snapshot_r_blocks_count; /* Reserved blocks for active snapshot's
147 snapshot_list; /* I-node number of the head of the on-disk snapshot
149 uint32_t error_count; /* Number of file system errors */
150 uint32_t first_error_time; /* First time an error happened */
151 uint32_t first_error_ino; /* I-node involved in first error */
152 uint64_t first_error_block; /* Block involved of first error */
153 uint8_t first_error_func[32]; /* Function where the error happened */
154 uint32_t first_error_line; /* Line number where error happened */
155 uint32_t last_error_time; /* Most recent time of an error */
156 uint32_t last_error_ino; /* I-node involved in last error */
157 uint32_t last_error_line; /* Line number where error happened */
158 uint64_t last_error_block; /* Block involved of last error */
159 uint8_t last_error_func[32]; /* Function where the error happened */
160 uint8_t mount_opts[64];
161 uint32_t usr_quota_inum; /* inode for tracking user quota */
162 uint32_t grp_quota_inum; /* inode for tracking group quota */
163 uint32_t overhead_clusters; /* overhead blocks/clusters in fs */
164 uint32_t backup_bgs[2]; /* groups with sparse_super2 SBs */
165 uint8_t encrypt_algos[4]; /* Encryption algorithms in use */
166 uint8_t encrypt_pw_salt[16]; /* Salt used for string2key algorithm */
167 uint32_t lpf_ino; /* Location of the lost+found inode */
168 uint32_t padding[100]; /* Padding to the end of the block */
169 uint32_t checksum; /* crc32c(superblock) */
170 } __attribute__((packed));
172 #define EXT4_SUPERBLOCK_MAGIC 0xEF53
173 #define EXT4_SUPERBLOCK_SIZE 1024
174 #define EXT4_SUPERBLOCK_OFFSET 1024
176 #define EXT4_SUPERBLOCK_OS_LINUX 0
177 #define EXT4_SUPERBLOCK_OS_HURD 1
180 * Misc. filesystem flags
182 #define EXT4_SUPERBLOCK_FLAGS_SIGNED_HASH 0x0001
183 #define EXT4_SUPERBLOCK_FLAGS_UNSIGNED_HASH 0x0002
184 #define EXT4_SUPERBLOCK_FLAGS_TEST_FILESYS 0x0004
188 #define EXT4_SUPERBLOCK_STATE_VALID_FS 0x0001 /* Unmounted cleanly */
189 #define EXT4_SUPERBLOCK_STATE_ERROR_FS 0x0002 /* Errors detected */
190 #define EXT4_SUPERBLOCK_STATE_ORPHAN_FS 0x0004 /* Orphans being recovered */
193 * Behaviour when errors detected
195 #define EXT4_SUPERBLOCK_ERRORS_CONTINUE 1 /* Continue execution */
196 #define EXT4_SUPERBLOCK_ERRORS_RO 2 /* Remount fs read-only */
197 #define EXT4_SUPERBLOCK_ERRORS_PANIC 3 /* Panic */
198 #define EXT4_SUPERBLOCK_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE
201 * Compatible features
203 #define EXT4_FCOM_DIR_PREALLOC 0x0001
204 #define EXT4_FCOM_IMAGIC_INODES 0x0002
205 #define EXT4_FCOM_HAS_JOURNAL 0x0004
206 #define EXT4_FCOM_EXT_ATTR 0x0008
207 #define EXT4_FCOM_RESIZE_INODE 0x0010
208 #define EXT4_FCOM_DIR_INDEX 0x0020
211 * Read-only compatible features
213 #define EXT4_FRO_COM_SPARSE_SUPER 0x0001
214 #define EXT4_FRO_COM_LARGE_FILE 0x0002
215 #define EXT4_FRO_COM_BTREE_DIR 0x0004
216 #define EXT4_FRO_COM_HUGE_FILE 0x0008
217 #define EXT4_FRO_COM_GDT_CSUM 0x0010
218 #define EXT4_FRO_COM_DIR_NLINK 0x0020
219 #define EXT4_FRO_COM_EXTRA_ISIZE 0x0040
220 #define EXT4_FRO_COM_QUOTA 0x0100
221 #define EXT4_FRO_COM_BIGALLOC 0x0200
222 #define EXT4_FRO_COM_METADATA_CSUM 0x0400
225 * Incompatible features
227 #define EXT4_FINCOM_COMPRESSION 0x0001
228 #define EXT4_FINCOM_FILETYPE 0x0002
229 #define EXT4_FINCOM_RECOVER 0x0004 /* Needs recovery */
230 #define EXT4_FINCOM_JOURNAL_DEV 0x0008 /* Journal device */
231 #define EXT4_FINCOM_META_BG 0x0010
232 #define EXT4_FINCOM_EXTENTS 0x0040 /* extents support */
233 #define EXT4_FINCOM_64BIT 0x0080
234 #define EXT4_FINCOM_MMP 0x0100
235 #define EXT4_FINCOM_FLEX_BG 0x0200
236 #define EXT4_FINCOM_EA_INODE 0x0400 /* EA in inode */
237 #define EXT4_FINCOM_DIRDATA 0x1000 /* data in dirent */
238 #define EXT4_FINCOM_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */
239 #define EXT4_FINCOM_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */
240 #define EXT4_FINCOM_INLINE_DATA 0x8000 /* data in inode */
243 * EXT2 supported feature set
245 #define EXT2_SUPPORTED_FCOM 0x0000
247 #define EXT2_SUPPORTED_FINCOM \
248 (EXT4_FINCOM_FILETYPE | EXT4_FINCOM_META_BG)
250 #define EXT2_SUPPORTED_FRO_COM \
251 (EXT4_FRO_COM_SPARSE_SUPER | \
252 EXT4_FRO_COM_LARGE_FILE)
255 * EXT3 supported feature set
257 #define EXT3_SUPPORTED_FCOM (EXT4_FCOM_DIR_INDEX)
259 #define EXT3_SUPPORTED_FINCOM \
260 (EXT4_FINCOM_FILETYPE | EXT4_FINCOM_META_BG)
262 #define EXT3_SUPPORTED_FRO_COM \
263 (EXT4_FRO_COM_SPARSE_SUPER | EXT4_FRO_COM_LARGE_FILE)
266 * EXT4 supported feature set
268 #define EXT4_SUPPORTED_FCOM (EXT4_FCOM_DIR_INDEX)
270 #define EXT4_SUPPORTED_FINCOM \
271 (EXT4_FINCOM_FILETYPE | EXT4_FINCOM_META_BG | \
272 EXT4_FINCOM_EXTENTS | EXT4_FINCOM_FLEX_BG | \
275 #define EXT4_SUPPORTED_FRO_COM \
276 (EXT4_FRO_COM_SPARSE_SUPER | \
277 EXT4_FRO_COM_METADATA_CSUM | \
278 EXT4_FRO_COM_LARGE_FILE | EXT4_FRO_COM_GDT_CSUM | \
279 EXT4_FRO_COM_DIR_NLINK | \
280 EXT4_FRO_COM_EXTRA_ISIZE | EXT4_FRO_COM_HUGE_FILE)
283 * RECOVER - journaling in lwext4 is not supported
284 * (probably won't be ever...)
285 * MMP - multi-mout protection (impossible scenario)
287 #define EXT_FINCOM_IGNORED \
288 EXT4_FINCOM_RECOVER | EXT4_FINCOM_MMP
291 /*TODO: Features incompatible to implement*/
292 #define EXT4_SUPPORTED_FINCOM
293 (EXT4_FINCOM_INLINE_DATA)
295 /*TODO: Features read only to implement*/
296 #define EXT4_SUPPORTED_FRO_COM
297 EXT4_FRO_COM_BIGALLOC |\
302 struct ext4_blockdev *bdev;
303 struct ext4_sblock sb;
305 uint64_t inode_block_limits[4];
306 uint64_t inode_blocks_per_level[4];
308 uint32_t last_inode_bg_id;
311 /* Inode table/bitmap not in use */
312 #define EXT4_BLOCK_GROUP_INODE_UNINIT 0x0001
313 /* Block bitmap not in use */
314 #define EXT4_BLOCK_GROUP_BLOCK_UNINIT 0x0002
315 /* On-disk itable initialized to zero */
316 #define EXT4_BLOCK_GROUP_ITABLE_ZEROED 0x0004
319 * Structure of a blocks group descriptor
322 uint32_t block_bitmap_lo; /* Blocks bitmap block */
323 uint32_t inode_bitmap_lo; /* Inodes bitmap block */
324 uint32_t inode_table_first_block_lo; /* Inodes table block */
325 uint16_t free_blocks_count_lo; /* Free blocks count */
326 uint16_t free_inodes_count_lo; /* Free inodes count */
327 uint16_t used_dirs_count_lo; /* Directories count */
328 uint16_t flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */
329 uint32_t exclude_bitmap_lo; /* Exclude bitmap for snapshots */
330 uint16_t block_bitmap_csum_lo; /* crc32c(s_uuid+grp_num+bbitmap) LE */
331 uint16_t inode_bitmap_csum_lo; /* crc32c(s_uuid+grp_num+ibitmap) LE */
332 uint16_t itable_unused_lo; /* Unused inodes count */
333 uint16_t checksum; /* crc16(sb_uuid+group+desc) */
335 uint32_t block_bitmap_hi; /* Blocks bitmap block MSB */
336 uint32_t inode_bitmap_hi; /* I-nodes bitmap block MSB */
337 uint32_t inode_table_first_block_hi; /* I-nodes table block MSB */
338 uint16_t free_blocks_count_hi; /* Free blocks count MSB */
339 uint16_t free_inodes_count_hi; /* Free i-nodes count MSB */
340 uint16_t used_dirs_count_hi; /* Directories count MSB */
341 uint16_t itable_unused_hi; /* Unused inodes count MSB */
342 uint32_t exclude_bitmap_hi; /* Exclude bitmap block MSB */
343 uint16_t block_bitmap_csum_hi; /* crc32c(s_uuid+grp_num+bbitmap) BE */
344 uint16_t inode_bitmap_csum_hi; /* crc32c(s_uuid+grp_num+ibitmap) BE */
345 uint32_t reserved; /* Padding */
348 struct ext4_block_group_ref {
349 struct ext4_block block;
350 struct ext4_bgroup *block_group;
356 #define EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE 32
357 #define EXT4_MAX_BLOCK_GROUP_DESCRIPTOR_SIZE 64
359 #define EXT4_MIN_BLOCK_SIZE 1024 /* 1 KiB */
360 #define EXT4_MAX_BLOCK_SIZE 65536 /* 64 KiB */
361 #define EXT4_REV0_INODE_SIZE 128
363 #define EXT4_INODE_BLOCK_SIZE 512
365 #define EXT4_INODE_DIRECT_BLOCK_COUNT 12
366 #define EXT4_INODE_INDIRECT_BLOCK EXT4_INODE_DIRECT_BLOCK_COUNT
367 #define EXT4_INODE_DOUBLE_INDIRECT_BLOCK (EXT4_INODE_INDIRECT_BLOCK + 1)
368 #define EXT4_INODE_TRIPPLE_INDIRECT_BLOCK (EXT4_INODE_DOUBLE_INDIRECT_BLOCK + 1)
369 #define EXT4_INODE_BLOCKS (EXT4_INODE_TRIPPLE_INDIRECT_BLOCK + 1)
370 #define EXT4_INODE_INDIRECT_BLOCK_COUNT \
371 (EXT4_INODE_BLOCKS - EXT4_INODE_DIRECT_BLOCK_COUNT)
374 * Structure of an inode on the disk
377 uint16_t mode; /* File mode */
378 uint16_t uid; /* Low 16 bits of owner uid */
379 uint32_t size_lo; /* Size in bytes */
380 uint32_t access_time; /* Access time */
381 uint32_t change_inode_time; /* I-node change time */
382 uint32_t modification_time; /* Modification time */
383 uint32_t deletion_time; /* Deletion time */
384 uint16_t gid; /* Low 16 bits of group id */
385 uint16_t links_count; /* Links count */
386 uint32_t blocks_count_lo; /* Blocks count */
387 uint32_t flags; /* File flags */
388 uint32_t unused_osd1; /* OS dependent - not used in HelenOS */
389 uint32_t blocks[EXT4_INODE_BLOCKS]; /* Pointers to blocks */
390 uint32_t generation; /* File version (for NFS) */
391 uint32_t file_acl_lo; /* File ACL */
393 uint32_t obso_faddr; /* Obsoleted fragment address */
397 uint16_t blocks_high;
398 uint16_t file_acl_high;
401 uint16_t checksum_lo; /* crc32c(uuid+inum+inode) LE */
411 } __attribute__((packed)) osd2;
413 uint16_t extra_isize;
414 uint16_t checksum_hi; /* crc32c(uuid+inum+inode) BE */
415 uint32_t ctime_extra; /* Extra change time (nsec << 2 | epoch) */
416 uint32_t mtime_extra; /* Extra Modification time (nsec << 2 | epoch) */
417 uint32_t atime_extra; /* Extra Access time (nsec << 2 | epoch) */
418 uint32_t crtime; /* File creation time */
420 crtime_extra; /* Extra file creation time (nsec << 2 | epoch) */
421 uint32_t version_hi; /* High 32 bits for 64-bit version */
422 } __attribute__((packed));
424 #define EXT4_INODE_MODE_FIFO 0x1000
425 #define EXT4_INODE_MODE_CHARDEV 0x2000
426 #define EXT4_INODE_MODE_DIRECTORY 0x4000
427 #define EXT4_INODE_MODE_BLOCKDEV 0x6000
428 #define EXT4_INODE_MODE_FILE 0x8000
429 #define EXT4_INODE_MODE_SOFTLINK 0xA000
430 #define EXT4_INODE_MODE_SOCKET 0xC000
431 #define EXT4_INODE_MODE_TYPE_MASK 0xF000
436 #define EXT4_INODE_FLAG_SECRM 0x00000001 /* Secure deletion */
437 #define EXT4_INODE_FLAG_UNRM 0x00000002 /* Undelete */
438 #define EXT4_INODE_FLAG_COMPR 0x00000004 /* Compress file */
439 #define EXT4_INODE_FLAG_SYNC 0x00000008 /* Synchronous updates */
440 #define EXT4_INODE_FLAG_IMMUTABLE 0x00000010 /* Immutable file */
441 #define EXT4_INODE_FLAG_APPEND 0x00000020 /* writes to file may only append */
442 #define EXT4_INODE_FLAG_NODUMP 0x00000040 /* do not dump file */
443 #define EXT4_INODE_FLAG_NOATIME 0x00000080 /* do not update atime */
445 /* Compression flags */
446 #define EXT4_INODE_FLAG_DIRTY 0x00000100
447 #define EXT4_INODE_FLAG_COMPRBLK \
448 0x00000200 /* One or more compressed clusters */
449 #define EXT4_INODE_FLAG_NOCOMPR 0x00000400 /* Don't compress */
450 #define EXT4_INODE_FLAG_ECOMPR 0x00000800 /* Compression error */
452 #define EXT4_INODE_FLAG_INDEX 0x00001000 /* hash-indexed directory */
453 #define EXT4_INODE_FLAG_IMAGIC 0x00002000 /* AFS directory */
454 #define EXT4_INODE_FLAG_JOURNAL_DATA \
455 0x00004000 /* File data should be journaled */
456 #define EXT4_INODE_FLAG_NOTAIL 0x00008000 /* File tail should not be merged */
457 #define EXT4_INODE_FLAG_DIRSYNC \
458 0x00010000 /* Dirsync behaviour (directories only) */
459 #define EXT4_INODE_FLAG_TOPDIR 0x00020000 /* Top of directory hierarchies */
460 #define EXT4_INODE_FLAG_HUGE_FILE 0x00040000 /* Set to each huge file */
461 #define EXT4_INODE_FLAG_EXTENTS 0x00080000 /* Inode uses extents */
462 #define EXT4_INODE_FLAG_EA_INODE 0x00200000 /* Inode used for large EA */
463 #define EXT4_INODE_FLAG_EOFBLOCKS 0x00400000 /* Blocks allocated beyond EOF */
464 #define EXT4_INODE_FLAG_RESERVED 0x80000000 /* reserved for ext4 lib */
466 #define EXT4_INODE_ROOT_INDEX 2
468 struct ext4_inode_ref {
469 struct ext4_block block;
470 struct ext4_inode *inode;
476 #define EXT4_DIRECTORY_FILENAME_LEN 255
478 /**@brief Directory entry types. */
479 enum { EXT4_DE_UNKNOWN = 0,
488 #define EXT4_DIRENTRY_DIR_CSUM 0xDE
490 union ext4_dir_en_internal {
491 uint8_t name_length_high; /* Higher 8 bits of name length */
492 uint8_t inode_type; /* Type of referenced inode (in rev >= 0.5) */
493 } __attribute__((packed));
496 * Linked list directory entry structure
499 uint32_t inode; /* I-node for the entry */
500 uint16_t entry_len; /* Distance to the next directory entry */
501 uint8_t name_len; /* Lower 8 bits of name length */
503 union ext4_dir_en_internal in;
505 uint8_t name[EXT4_DIRECTORY_FILENAME_LEN]; /* Entry name */
506 } __attribute__((packed));
508 struct ext4_dir_iter {
509 struct ext4_inode_ref *inode_ref;
510 struct ext4_block curr_blk;
512 struct ext4_dir_en *curr;
515 struct ext4_dir_search_result {
516 struct ext4_block block;
517 struct ext4_dir_en *dentry;
520 /* Structures for indexed directory */
522 struct ext4_dir_idx_climit {
527 struct ext4_dir_idx_dot_en {
529 uint16_t entry_length;
535 struct ext4_dir_idx_rinfo {
536 uint32_t reserved_zero;
537 uint8_t hash_version;
539 uint8_t indirect_levels;
540 uint8_t unused_flags;
543 struct ext4_dir_idx_entry {
548 struct ext4_dir_idx_root {
549 struct ext4_dir_idx_dot_en dots[2];
550 struct ext4_dir_idx_rinfo info;
551 struct ext4_dir_idx_entry en[];
554 struct ext4_fake_dir_entry {
556 uint16_t entry_length;
561 struct ext4_dir_idx_node {
562 struct ext4_fake_dir_entry fake;
563 struct ext4_dir_idx_entry entries[];
566 struct ext4_dir_idx_block {
568 struct ext4_dir_idx_entry *entries;
569 struct ext4_dir_idx_entry *position;
573 * This goes at the end of each htree block.
575 struct ext4_dir_idx_tail {
577 uint32_t checksum; /* crc32c(uuid+inum+dirblock) */
581 * This is a bogus directory entry at the end of each leaf block that
584 struct ext4_dir_entry_tail {
585 uint32_t reserved_zero1; /* Pretend to be unused */
586 uint16_t rec_len; /* 12 */
587 uint8_t reserved_zero2; /* Zero name length */
588 uint8_t reserved_ft; /* 0xDE, fake file type */
589 uint32_t checksum; /* crc32c(uuid+inum+dirblock) */
592 #define EXT4_DIRENT_TAIL(block, blocksize) \
593 ((struct ext4_dir_entry_tail *)(((char *)(block)) + ((blocksize) - \
594 sizeof(struct ext4_dir_entry_tail))))
596 #define EXT4_ERR_BAD_DX_DIR (-25000)
598 #define EXT4_LINK_MAX 65000
600 #define EXT4_BAD_INO 1
601 #define EXT4_ROOT_INO 2
602 #define EXT4_BOOT_LOADER_INO 5
603 #define EXT4_UNDEL_DIR_INO 6
604 #define EXT4_RESIZE_INO 7
605 #define EXT4_JOURNAL_INO 8
607 #define EXT4_GOOD_OLD_FIRST_INO 11
609 #define EXT4_EXT_UNWRITTEN_MASK (1L << 15)
611 #define EXT4_EXT_MAX_LEN_WRITTEN (1L << 15)
612 #define EXT4_EXT_MAX_LEN_UNWRITTEN \
613 (EXT4_EXT_MAX_LEN_WRITTEN - 1)
615 #define EXT4_EXT_GET_LEN(ex) to_le16((ex)->block_count)
616 #define EXT4_EXT_GET_LEN_UNWRITTEN(ex) \
617 (EXT4_EXT_GET_LEN(ex) &= ~(EXT4_EXT_UNWRITTEN_MASK))
618 #define EXT4_EXT_SET_LEN(ex, count) \
619 ((ex)->block_count = to_le16(count))
621 #define EXT4_EXT_IS_UNWRITTEN(ex) \
622 (EXT4_EXT_GET_LEN(ex) > EXT4_EXT_MAX_LEN_WRITTEN)
623 #define EXT4_EXT_SET_UNWRITTEN(ex) \
624 ((ex)->block_count |= to_le16(EXT4_EXT_UNWRITTEN_MASK))
625 #define EXT4_EXT_SET_WRITTEN(ex) \
626 ((ex)->block_count &= ~(to_le16(EXT4_EXT_UNWRITTEN_MASK)))
628 * This is the extent tail on-disk structure.
629 * All other extent structures are 12 bytes long. It turns out that
630 * block_size % 12 >= 4 for at least all powers of 2 greater than 512, which
631 * covers all valid ext4 block sizes. Therefore, this tail structure can be
632 * crammed into the end of the block without having to rebalance the tree.
634 struct ext4_extent_tail
636 uint32_t et_checksum; /* crc32c(uuid+inum+extent_block) */
640 * This is the extent on-disk structure.
641 * It's used at the bottom of the tree.
644 uint32_t first_block; /* First logical block extent covers */
645 uint16_t block_count; /* Number of blocks covered by extent */
646 uint16_t start_hi; /* High 16 bits of physical block */
647 uint32_t start_lo; /* Low 32 bits of physical block */
651 * This is index on-disk structure.
652 * It's used at all the levels except the bottom.
654 struct ext4_extent_index {
655 uint32_t first_block; /* Index covers logical blocks from 'block' */
658 * Pointer to the physical block of the next
659 * level. leaf or next index could be there
660 * high 16 bits of physical block
668 * Each block (leaves and indexes), even inode-stored has header.
670 struct ext4_extent_header {
672 uint16_t entries_count; /* Number of valid entries */
673 uint16_t max_entries_count; /* Capacity of store in entries */
674 uint16_t depth; /* Has tree real underlying blocks? */
675 uint32_t generation; /* generation of the tree */
682 typedef uint32_t ext4_lblk_t;
683 typedef uint64_t ext4_fsblk_t;
686 * Array of ext4_ext_path contains path to some extent.
687 * Creation/lookup routines use it for traversal/splitting/etc.
688 * Truncate uses it to simulate recursive walking.
690 struct ext4_extent_path {
691 ext4_fsblk_t p_block;
692 struct ext4_block block;
695 struct ext4_extent_header *header;
696 struct ext4_extent_index *index;
697 struct ext4_extent *extent;
702 #define EXT4_EXTENT_MAGIC 0xF30A
704 #define EXT4_EXTENT_FIRST(header) \
705 ((struct ext4_extent *)(((char *)(header)) + \
706 sizeof(struct ext4_extent_header)))
708 #define EXT4_EXTENT_FIRST_INDEX(header) \
709 ((struct ext4_extent_index *)(((char *)(header)) + \
710 sizeof(struct ext4_extent_header)))
713 * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
714 * initialized extent. This is 2^15 and not (2^16 - 1), since we use the
715 * MSB of ee_len field in the extent datastructure to signify if this
716 * particular extent is an initialized extent or an uninitialized (i.e.
718 * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an
719 * uninitialized extent.
720 * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an
721 * uninitialized one. In other words, if MSB of ee_len is set, it is an
722 * uninitialized extent with only one special scenario when ee_len = 0x8000.
723 * In this case we can not have an uninitialized extent of zero length and
724 * thus we make it as a special case of initialized extent with 0x8000 length.
725 * This way we get better extent-to-group alignment for initialized extents.
726 * Hence, the maximum number of blocks we can have in an *initialized*
727 * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767).
729 #define EXT_INIT_MAX_LEN (1L << 15)
730 #define EXT_UNWRITTEN_MAX_LEN (EXT_INIT_MAX_LEN - 1)
732 #define EXT_EXTENT_SIZE sizeof(struct ext4_extent)
733 #define EXT_INDEX_SIZE sizeof(struct ext4_extent_idx)
735 #define EXT_FIRST_EXTENT(__hdr__) \
736 ((struct ext4_extent *)(((char *)(__hdr__)) + \
737 sizeof(struct ext4_extent_header)))
738 #define EXT_FIRST_INDEX(__hdr__) \
739 ((struct ext4_extent_index *)(((char *)(__hdr__)) + \
740 sizeof(struct ext4_extent_header)))
741 #define EXT_HAS_FREE_INDEX(__path__) \
742 ((__path__)->header->entries_count < (__path__)->header->max_entries_count)
743 #define EXT_LAST_EXTENT(__hdr__) \
744 (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->entries_count - 1)
745 #define EXT_LAST_INDEX(__hdr__) \
746 (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->entries_count - 1)
747 #define EXT_MAX_EXTENT(__hdr__) \
748 (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->max_entries_count - 1)
749 #define EXT_MAX_INDEX(__hdr__) \
750 (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->max_entries_count - 1)
752 #define EXT4_EXTENT_TAIL_OFFSET(hdr) \
753 (sizeof(struct ext4_extent_header) + \
754 (sizeof(struct ext4_extent) * (hdr)->max_entries_count))
757 * ext4_ext_next_allocated_block:
758 * returns allocated block in subsequent extent or EXT_MAX_BLOCKS.
759 * NOTE: it considers block number from index entry as
760 * allocated block. Thus, index entries have to be consistent
763 #define EXT_MAX_BLOCKS (ext4_lblk_t) (-1)
765 #define IN_RANGE(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
768 /******************************************************************************/
770 /* EXT3 HTree directory indexing */
771 #define EXT2_HTREE_LEGACY 0
772 #define EXT2_HTREE_HALF_MD4 1
773 #define EXT2_HTREE_TEA 2
774 #define EXT2_HTREE_LEGACY_UNSIGNED 3
775 #define EXT2_HTREE_HALF_MD4_UNSIGNED 4
776 #define EXT2_HTREE_TEA_UNSIGNED 5
778 #define EXT2_HTREE_EOF 0x7FFFFFFFUL
780 struct ext4_hash_info {
783 uint32_t hash_version;
784 const uint32_t *seed;
787 /* Extended Attribute(EA) */
789 /* Magic value in attribute blocks */
790 #define EXT4_XATTR_MAGIC 0xEA020000
792 /* Maximum number of references to one attribute block */
793 #define EXT4_XATTR_REFCOUNT_MAX 1024
796 #define EXT4_XATTR_INDEX_USER 1
797 #define EXT4_XATTR_INDEX_POSIX_ACL_ACCESS 2
798 #define EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT 3
799 #define EXT4_XATTR_INDEX_TRUSTED 4
800 #define EXT4_XATTR_INDEX_LUSTRE 5
801 #define EXT4_XATTR_INDEX_SECURITY 6
802 #define EXT4_XATTR_INDEX_SYSTEM 7
803 #define EXT4_XATTR_INDEX_RICHACL 8
804 #define EXT4_XATTR_INDEX_ENCRYPTION 9
806 struct ext4_xattr_header {
807 uint32_t h_magic; /* magic number for identification */
808 uint32_t h_refcount; /* reference count */
809 uint32_t h_blocks; /* number of disk blocks used */
810 uint32_t h_hash; /* hash value of all attributes */
811 uint32_t h_checksum; /* crc32c(uuid+id+xattrblock) */
812 /* id = inum if refcount=1, blknum otherwise */
813 uint32_t h_reserved[3]; /* zero right now */
814 } __attribute__((packed));
816 struct ext4_xattr_ibody_header {
817 uint32_t h_magic; /* magic number for identification */
818 } __attribute__((packed));
820 struct ext4_xattr_entry {
821 uint8_t e_name_len; /* length of name */
822 uint8_t e_name_index; /* attribute name index */
823 uint16_t e_value_offs; /* offset in disk block of value */
824 uint32_t e_value_block; /* disk block attribute is stored on (n/i) */
825 uint32_t e_value_size; /* size of attribute value */
826 uint32_t e_hash; /* hash value of name and value */
827 } __attribute__((packed));
829 struct ext4_xattr_item {
830 /* This attribute should be stored in inode body */
839 RB_ENTRY(ext4_xattr_item) node;
842 struct ext4_xattr_ref {
844 struct ext4_block block;
845 struct ext4_inode_ref *inode_ref;
851 struct ext4_xattr_item *iter_from;
853 RB_HEAD(ext4_xattr_tree,
854 ext4_xattr_item) root;
857 #define EXT4_XATTR_ITERATE_CONT 0
858 #define EXT4_XATTR_ITERATE_STOP 1
859 #define EXT4_XATTR_ITERATE_PAUSE 2
861 #define EXT4_GOOD_OLD_INODE_SIZE 128
863 #define EXT4_XATTR_PAD_BITS 2
864 #define EXT4_XATTR_PAD (1<<EXT4_XATTR_PAD_BITS)
865 #define EXT4_XATTR_ROUND (EXT4_XATTR_PAD-1)
866 #define EXT4_XATTR_LEN(name_len) \
867 (((name_len) + EXT4_XATTR_ROUND + \
868 sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND)
869 #define EXT4_XATTR_NEXT(entry) \
870 ((struct ext4_xattr_entry *)( \
871 (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)))
872 #define EXT4_XATTR_SIZE(size) \
873 (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND)
874 #define EXT4_XATTR_NAME(entry) \
875 ((char *)((entry) + 1))
877 #define EXT4_XATTR_IHDR(raw_inode) \
878 ((struct ext4_xattr_ibody_header *) \
879 ((char *)raw_inode + \
880 EXT4_GOOD_OLD_INODE_SIZE + \
881 (raw_inode)->extra_isize))
882 #define EXT4_XATTR_IFIRST(hdr) \
883 ((struct ext4_xattr_entry *)((hdr)+1))
885 #define EXT4_XATTR_BHDR(block) \
886 ((struct ext4_xattr_header *)((block)->data))
887 #define EXT4_XATTR_ENTRY(ptr) \
888 ((struct ext4_xattr_entry *)(ptr))
889 #define EXT4_XATTR_BFIRST(block) \
890 EXT4_XATTR_ENTRY(EXT4_XATTR_BHDR(block)+1)
891 #define EXT4_XATTR_IS_LAST_ENTRY(entry) \
892 (*(uint32_t *)(entry) == 0)
894 #define EXT4_ZERO_XATTR_VALUE ((void *)-1)
896 /*****************************************************************************/
899 * JBD stores integers in big endian.
902 #define JBD_MAGIC_NUMBER 0xc03b3998U /* The first 4 bytes of /dev/random! */
905 * Descriptor block types:
908 #define JBD_DESCRIPTOR_BLOCK 1
909 #define JBD_COMMIT_BLOCK 2
910 #define JBD_SUPERBLOCK 3
911 #define JBD_SUPERBLOCK_V2 4
912 #define JBD_REVOKE_BLOCK 5
915 * Standard header for all descriptor blocks:
926 #define JBD_CRC32_CHKSUM 1
927 #define JBD_MD5_CHKSUM 2
928 #define JBD_SHA1_CHKSUM 3
929 #define JBD_CRC32C_CHKSUM 4
931 #define JBD_CRC32_CHKSUM_SIZE 4
933 #define JBD_CHECKSUM_BYTES (32 / sizeof(uint32_t))
935 * Commit block header for storing transactional checksums:
937 * NOTE: If FEATURE_COMPAT_CHECKSUM (checksum v1) is set, the h_chksum*
938 * fields are used to store a checksum of the descriptor and data blocks.
940 * If FEATURE_INCOMPAT_CSUM_V2 (checksum v2) is set, then the h_chksum
941 * field is used to store crc32c(uuid+commit_block). Each journal metadata
942 * block gets its own checksum, and data block checksums are stored in
943 * journal_block_tag (in the descriptor). The other h_chksum* fields are
946 * If FEATURE_INCOMPAT_CSUM_V3 is set, the descriptor block uses
947 * journal_block_tag3_t to store a full 32-bit checksum. Everything else
950 * Checksum v1, v2, and v3 are mutually exclusive features.
952 struct jbd_commit_header {
953 struct jbd_bhdr header;
957 uint32_t chksum[JBD_CHECKSUM_BYTES];
959 uint32_t commit_nsec;
963 * The block tag: used to describe a single buffer in the journal
965 struct jbd_block_tag3 {
966 uint32_t blocknr; /* The on-disk block number */
967 uint32_t flags; /* See below */
968 uint32_t blocknr_high; /* most-significant high 32bits. */
969 uint32_t checksum; /* crc32c(uuid+seq+block) */
972 struct jbd_block_tag {
973 uint32_t blocknr; /* The on-disk block number */
974 uint16_t checksum; /* truncated crc32c(uuid+seq+block) */
975 uint16_t flags; /* See below */
976 uint32_t blocknr_high; /* most-significant high 32bits. */
979 /* Definitions for the journal tag flags word: */
980 #define JBD_FLAG_ESCAPE 1 /* on-disk block is escaped */
981 #define JBD_FLAG_SAME_UUID 2 /* block has same uuid as previous */
982 #define JBD_FLAG_DELETED 4 /* block deleted by this transaction */
983 #define JBD_FLAG_LAST_TAG 8 /* last tag in this descriptor block */
985 /* Tail of descriptor block, for checksumming */
986 struct jbd_block_tail {
991 * The revoke descriptor: used on disk to describe a series of blocks to
992 * be revoked from the log
994 struct jbd_revoke_header {
995 struct jbd_bhdr header;
996 uint32_t count; /* Count of bytes used in the block */
999 /* Tail of revoke block, for checksumming */
1000 struct jbd_revoke_tail {
1004 #define JBD_USERS_MAX 48
1005 #define JBD_USERS_SIZE (UUID_SIZE * JBD_USERS_MAX)
1008 * The journal superblock. All fields are in big-endian byte order.
1012 struct jbd_bhdr header;
1015 /* Static information describing the journal */
1016 uint32_t blocksize; /* journal device blocksize */
1017 uint32_t maxlen; /* total blocks in journal file */
1018 uint32_t first; /* first block of log information */
1021 /* Dynamic information describing the current state of the log */
1022 uint32_t sequence; /* first commit ID expected in log */
1023 uint32_t start; /* blocknr of start of log */
1026 /* Error value, as set by journal_abort(). */
1030 /* Remaining fields are only valid in a version-2 superblock */
1031 uint32_t feature_compat; /* compatible feature set */
1032 uint32_t feature_incompat; /* incompatible feature set */
1033 uint32_t feature_ro_compat; /* readonly-compatible feature set */
1035 uint8_t uuid[UUID_SIZE]; /* 128-bit uuid for journal */
1038 uint32_t nr_users; /* Nr of filesystems sharing log */
1040 uint32_t dynsuper; /* Blocknr of dynamic superblock copy*/
1043 uint32_t max_transaction; /* Limit of journal blocks per trans.*/
1044 uint32_t max_trandata; /* Limit of data blocks per trans. */
1047 uint8_t checksum_type; /* checksum type */
1048 uint8_t padding2[3];
1049 uint32_t padding[42];
1050 uint32_t checksum; /* crc32c(superblock) */
1053 uint8_t users[JBD_USERS_SIZE]; /* ids of all fs'es sharing the log */
1058 #define JBD_SUPERBLOCK_SIZE sizeof(struct jbd_sb)
1060 #define JBD_HAS_COMPAT_FEATURE(jsb,mask) \
1061 ((jsb)->header.blocktype >= to_be32(2) && \
1062 ((jsb)->feature_compat & to_be32((mask))))
1063 #define JBD_HAS_RO_COMPAT_FEATURE(jsb,mask) \
1064 ((jsb)->header.blocktype >= to_be32(2) && \
1065 ((jsb)->feature_ro_compat & to_be32((mask))))
1066 #define JBD_HAS_INCOMPAT_FEATURE(jsb,mask) \
1067 ((jsb)->header.blocktype >= to_be32(2) && \
1068 ((jsb)->feature_incompat & to_be32((mask))))
1070 #define JBD_FEATURE_COMPAT_CHECKSUM 0x00000001
1072 #define JBD_FEATURE_INCOMPAT_REVOKE 0x00000001
1073 #define JBD_FEATURE_INCOMPAT_64BIT 0x00000002
1074 #define JBD_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004
1075 #define JBD_FEATURE_INCOMPAT_CSUM_V2 0x00000008
1076 #define JBD_FEATURE_INCOMPAT_CSUM_V3 0x00000010
1078 /* Features known to this kernel version: */
1079 #define JBD_KNOWN_COMPAT_FEATURES 0
1080 #define JBD_KNOWN_ROCOMPAT_FEATURES 0
1081 #define JBD_KNOWN_INCOMPAT_FEATURES (JBD_FEATURE_INCOMPAT_REVOKE|\
1082 JBD_FEATURE_INCOMPAT_ASYNC_COMMIT|\
1083 JBD_FEATURE_INCOMPAT_64BIT|\
1084 JBD_FEATURE_INCOMPAT_CSUM_V2|\
1085 JBD_FEATURE_INCOMPAT_CSUM_V3)
1088 /* If journal block device is used, bdev will be non-null */
1089 struct ext4_blockdev *bdev;
1090 struct ext4_inode_ref inode_ref;
1097 struct ext4_block block;
1098 struct jbd_trans *trans;
1099 struct jbd_block_rec *block_rec;
1100 LIST_ENTRY(jbd_buf) buf_node;
1103 struct jbd_revoke_rec {
1105 LIST_ENTRY(jbd_revoke_rec) revoke_node;
1108 struct jbd_block_rec {
1110 struct ext4_buf *buf;
1111 struct jbd_trans *trans;
1112 RB_ENTRY(jbd_block_rec) block_rec_node;
1118 uint32_t start_iblock;
1124 struct jbd_journal *journal;
1126 LIST_HEAD(jbd_trans_buf, jbd_buf) buf_list;
1127 LIST_HEAD(jbd_revoke_list, jbd_revoke_rec) revoke_list;
1128 TAILQ_ENTRY(jbd_trans) trans_node;
1131 struct jbd_journal {
1136 uint32_t alloc_trans_id;
1138 uint32_t block_size;
1140 TAILQ_HEAD(jbd_trans_queue, jbd_trans) trans_queue;
1141 TAILQ_HEAD(jbd_cp_queue, jbd_trans) cp_queue;
1142 RB_HEAD(jbd_block, jbd_block_rec) block_rec_root;
1144 struct jbd_fs *jbd_fs;
1147 /*****************************************************************************/
1149 #define EXT4_CRC32_INIT (0xFFFFFFFFUL)
1151 /*****************************************************************************/
1153 static inline uint64_t reorder64(uint64_t n)
1155 return ((n & 0xff) << 56) |
1156 ((n & 0xff00) << 40) |
1157 ((n & 0xff0000) << 24) |
1158 ((n & 0xff000000LL) << 8) |
1159 ((n & 0xff00000000LL) >> 8) |
1160 ((n & 0xff0000000000LL) >> 24) |
1161 ((n & 0xff000000000000LL) >> 40) |
1162 ((n & 0xff00000000000000LL) >> 56);
1165 static inline uint32_t reorder32(uint32_t n)
1167 return ((n & 0xff) << 24) |
1168 ((n & 0xff00) << 8) |
1169 ((n & 0xff0000) >> 8) |
1170 ((n & 0xff000000) >> 24);
1173 static inline uint16_t reorder16(uint16_t n)
1175 return ((n & 0xff) << 8) |
1176 ((n & 0xff00) >> 8);
1179 #ifdef CONFIG_BIG_ENDIAN
1180 #define to_le64(_n) reorder64(_n)
1181 #define to_le32(_n) reorder32(_n)
1182 #define to_le16(_n) reorder16(_n)
1184 #define to_be64(_n) _n
1185 #define to_be32(_n) _n
1186 #define to_be16(_n) _n
1189 #define to_le64(_n) _n
1190 #define to_le32(_n) _n
1191 #define to_le16(_n) _n
1193 #define to_be64(_n) reorder64(_n)
1194 #define to_be32(_n) reorder32(_n)
1195 #define to_be16(_n) reorder16(_n)
1198 /****************************Access macros to ext4 structures*****************/
1200 #define ext4_get32(s, f) to_le32((s)->f)
1201 #define ext4_get16(s, f) to_le16((s)->f)
1202 #define ext4_get8(s, f) (s)->f
1204 #define ext4_set32(s, f, v) \
1206 (s)->f = to_le32(v); \
1208 #define ext4_set16(s, f, v) \
1210 (s)->f = to_le16(v); \
1213 (s, f, v) do { (s)->f = (v); } \
1216 /****************************Access macros to jbd2 structures*****************/
1218 #define jbd_get32(s, f) to_be32((s)->f)
1219 #define jbd_get16(s, f) to_be16((s)->f)
1220 #define jbd_get8(s, f) (s)->f
1222 #define jbd_set32(s, f, v) \
1224 (s)->f = to_be32(v); \
1226 #define jbd_set16(s, f, v) \
1228 (s)->f = to_be16(v); \
1231 (s, f, v) do { (s)->f = (v); } \
1236 #define __unused __attribute__ ((__unused__))
1241 #define offsetof(type, field) \
1242 ((size_t)(&(((type *)0)->field)))
1249 #endif /* EXT4_TYPES_H_ */