inode 中存储了文件的元信息,其中涉及了逻辑地址向物理地址的转换。linux 上用15个指针数组存储,其中12个是直接索引,后面三个分别是一级索引、二级索引和三级索引。如下图示。
(图片源自 https://upload.wikimedia.org/wikipedia/commons/a/a2/Ext2-inode.gif)
以linux-4.17.1 源码为例,为了便于理解,我将相关定义从各文件中拷过来,其中转换部分 ext4_block_to_path 函数的代码如下:
// ---------------------------------------------------------------------
// linux-4.17.1/fs/ext4/ext4.h
/*
* Macro-instructions used to manage several block sizes
*/
#define EXT4_MIN_BLOCK_SIZE 1024
#define EXT4_MAX_BLOCK_SIZE 65536
#define EXT4_MIN_BLOCK_LOG_SIZE 10
#define EXT4_MAX_BLOCK_LOG_SIZE 16
#define EXT4_MAX_CLUSTER_LOG_SIZE 30
#ifdef __KERNEL__
# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize)
#else
# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
#endif
#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32))
// sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); // linux-4.17.1/fs/ext4/super.c
#define EXT4_ADDR_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_addr_per_block_bits)
/*
* Constants relative to the data blocks
*/
#define EXT4_NDIR_BLOCKS 12
#define EXT4_IND_BLOCK EXT4_NDIR_BLOCKS
#define EXT4_DIND_BLOCK (EXT4_IND_BLOCK + 1)
#define EXT4_TIND_BLOCK (EXT4_DIND_BLOCK + 1)
#define EXT4_N_BLOCKS (EXT4_TIND_BLOCK + 1)
// ---------------------------------------------------------------------
// linux-4.17.1/fs/ext4/indirect.c
/**
* ext4_block_to_path - parse the block number into array of offsets
* @inode: inode in question (we are only interested in its superblock)
* @i_block: block number to be parsed
* @offsets: array to store the offsets in
* @boundary: set this non-zero if the referred-to block is likely to be
* followed (on disk) by an indirect block.
*
* To store the locations of file's data ext4 uses a data structure common
* for UNIX filesystems - tree of pointers anchored in the inode, with
* data blocks at leaves and indirect blocks in intermediate nodes.
* This function translates the block number into path in that tree -
* return value is the path length and @offsets[n] is the offset of
* pointer to (n+1)th node in the nth one. If @block is out of range
* (negative or too large) warning is printed and zero returned.
*
* Note: function doesn't find node addresses, so no IO is needed. All
* we need to know is the capacity of indirect blocks (taken from the
* inode->i_sb).
*/
/*
* Portability note: the last comparison (check that we fit into triple
* indirect block) is spelled differently, because otherwise on an
* architecture with 32-bit longs and 8Kb pages we might get into trouble
* if our filesystem had 8Kb blocks. We might use long long, but that would
* kill us on x86. Oh, well, at least the sign propagation does not matter -
* i_block would have to be negative in the very beginning, so we would not
* get there at all.
*/
static int ext4_block_to_path(struct inode *inode,
ext4_lblk_t i_block,
ext4_lblk_t offsets[4], int *boundary)
{
int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); // 1024/4=256
int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); // ilog2(256)=8
const long direct_blocks = EXT4_NDIR_BLOCKS, // 12
indirect_blocks = ptrs, // 256
double_blocks = (1 << (ptrs_bits * 2)); // 2^16=65536
int n = 0;
int final = 0;
if (i_block < direct_blocks) { // i_block< 12
offsets[n++] = i_block; // offset[0]=i_block
final = direct_blocks; // final = 12
} else if ((i_block -= direct_blocks) < indirect_blocks) { // i_block -= 10; i_block < 256
offsets[n++] = EXT4_IND_BLOCK; // offset[0] = 12
offsets[n++] = i_block; // offset[1] = i_block
final = ptrs; // final = 256
} else if ((i_block -= indirect_blocks) < double_blocks) { // i_block-= 256; i_block <65536
offsets[n++] = EXT4_DIND_BLOCK; // offset[0]=12
offsets[n++] = i_block >> ptrs_bits; // offset[1] = i_block/256 的商
offsets[n++] = i_block & (ptrs - 1); // offset[2] = i_block/256 的余数
final = ptrs; // final =256
} else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { // i_block -= 65536; i_block/256/256 < 256
offsets[n++] = EXT4_TIND_BLOCK; // offsets[0] = 12
offsets[n++] = i_block >> (ptrs_bits * 2); // offset[1] = i_block/256/256 的商
offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); // offset[2] = i_block/256)256 的余数
offsets[n++] = i_block & (ptrs - 1); // offset[3]=i_block/256的余数
final = ptrs; // final = 256
} else {
ext4_warning(inode->i_sb, "block %lu > max in inode %lu",
i_block + direct_blocks +
indirect_blocks + double_blocks, inode->i_ino);
}
if (boundary)
*boundary = final - 1 - (i_block & (ptrs - 1)); // final - 1 - i_block/256 的余数
return n;
}
参考资料:
[1] inode. https://en.wikipedia.org/wiki/Inode
[2] inode pointer structure. https://en.wikipedia.org/wiki/Inode_pointer_structure
[3] Ext3 for large filesystems. [Posted June 12, 2006 by corbet] . https://lwn.net/Articles/187321/
[4] 《Unix 操作系统设计》.Maurice J. Bach 著, 陈葆珏, 王旭, 柳纯录, 冯切山译
[5] 《Operating System Concepts》. Avi Silberschatz, Peter Baer Galvin, Greg Gagne