linux stat lstat 内核实现分析

文章详细解析了Linux中stat,lstat和fstat系统调用的实现过程,包括它们如何获取文件或符号链接的元数据。主要关注点在于stat和lstat在处理符号链接时的区别,以及stat结构体的组成。同时,文章提到了EXT4文件系统如何注册并处理getattr操作,以提供文件的特定属性信息。
摘要由CSDN通过智能技术生成

stat是获取文件元数据,如果文件是一个符号链接,则会获得其指向的实际文件元数据。

lstat也是获取文件元数据,但如果文件是一个符号链接,则会获得链接本身文件元数据。

fstat是根据文件描述符获得文件元数据,效果和stat相同。这个函数本次不重点分析,可以根据本方法自己查看

系统调用表项

这里注册系统调用处理函数

< arch/x86/entry/syscalls/syscall_64.tbl >

4	common	stat			sys_newstat
5	common	fstat			sys_newfstat
6	common	lstat			sys_newlstat

调用函数头文件

这里声明系统调用头文件

< include/linux/syscalls.h >

asmlinkage long sys_newstat(const char __user *filename,
				struct stat __user *statbuf);
asmlinkage long sys_newlstat(const char __user *filename,
				struct stat __user *statbuf);
asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf);

lstat系统调用接口定义

实现函数入口

< fs/stat.c >

SYSCALL_DEFINE2(newstat, const char __user *, filename,
		struct stat __user *, statbuf)
{
	struct kstat stat;
	int error = vfs_stat(filename, &stat);

	if (error)
		return error;
	return cp_new_stat(&stat, statbuf);
}

SYSCALL_DEFINE2(newlstat, const char __user *, filename,
		struct stat __user *, statbuf)
{
	struct kstat stat;
	int error;

	error = vfs_lstat(filename, &stat);
	if (error)
		return error;

	return cp_new_stat(&stat, statbuf);
}

看得出,这两个函数流程一样。

从系统调用接口看,filename是文件路径,返回值是struct stat类型的结构体

接下来看看stat结构体

< arch/x86/include/uapi/asm/stat.h >

struct stat {
	unsigned long  st_dev;
	unsigned long  st_ino;
	unsigned short st_mode;
	unsigned short st_nlink;
	unsigned short st_uid;
	unsigned short st_gid;
	unsigned long  st_rdev;
	unsigned long  st_size;
	unsigned long  st_blksize;
	unsigned long  st_blocks;
	unsigned long  st_atime;
	unsigned long  st_atime_nsec;
	unsigned long  st_mtime;
	unsigned long  st_mtime_nsec;
	unsigned long  st_ctime;
	unsigned long  st_ctime_nsec;
	unsigned long  __unused4;
	unsigned long  __unused5;
};

man 2 stat

通过查看可知,和struct stat是一致的

struct stat {
               dev_t     st_dev;         /* ID of device containing file */
               ino_t     st_ino;         /* Inode number */
               mode_t    st_mode;        /* File type and mode */
               nlink_t   st_nlink;       /* Number of hard links */
               uid_t     st_uid;         /* User ID of owner */
               gid_t     st_gid;         /* Group ID of owner */
               dev_t     st_rdev;        /* Device ID (if special file) */
               off_t     st_size;        /* Total size, in bytes */
               blksize_t st_blksize;     /* Block size for filesystem I/O */
               blkcnt_t  st_blocks;      /* Number of 512B blocks allocated */

               /* Since Linux 2.6, the kernel supports nanosecond
                  precision for the following timestamp fields.
                  For the details before Linux 2.6, see NOTES. */

               struct timespec st_atim;  /* Time of last access */
               struct timespec st_mtim;  /* Time of last modification */
               struct timespec st_ctim;  /* Time of last status change */

           #define st_atime st_atim.tv_sec      /* Backward compatibility */
           #define st_mtime st_mtim.tv_sec
           #define st_ctime st_ctim.tv_sec
           };

下面看真正的实现函数

vfs_lstat()分析

< include/linux/fs.h >

static inline int vfs_lstat(const char __user *name, struct kstat *stat)
{
	return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW);
}

vfs_stat()分析

和vfs_lstat的区别唯一的差别就是没有设置AT_SYMLINK_NOFOLLOW

static inline int vfs_stat(const char __user *filename, struct kstat *stat)
{
	return vfs_fstatat(AT_FDCWD, filename, stat, 0);
}

vfs_fstatat()

< fs/stat.c >

int vfs_fstatat(int dfd, const char __user *filename,
			      struct kstat *stat, int flags)
{
	int ret;
	int statx_flags = flags | AT_NO_AUTOMOUNT;
	struct filename *name;

    /* getname_flags 函数的作用是从用户空间复制文件路径到内核空间 */
	name = getname_flags(filename, getname_statx_lookup_flags(statx_flags), NULL);
	ret = vfs_statx(dfd, name, statx_flags, stat, STATX_BASIC_STATS);
	putname(name);

	return ret;
}

vfs_statx()

< fs/stat.c >

static int vfs_statx(int dfd, struct filename *filename, int flags,
	      struct kstat *stat, u32 request_mask)
{
	struct path path;
	unsigned int lookup_flags = getname_statx_lookup_flags(flags);
	int error;

	if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT | AT_EMPTY_PATH |
		      AT_STATX_SYNC_TYPE))
		return -EINVAL;

retry:
	error = filename_lookup(dfd, filename, lookup_flags, &path, NULL);
	if (error)
		goto out;

	error = vfs_getattr(&path, stat, request_mask, flags);

	stat->mnt_id = real_mount(path.mnt)->mnt_id;
	stat->result_mask |= STATX_MNT_ID;

	if (path.mnt->mnt_root == path.dentry)
		stat->attributes |= STATX_ATTR_MOUNT_ROOT;
	stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT;

	/* Handle STATX_DIOALIGN for block devices. */
	if (request_mask & STATX_DIOALIGN) {
		struct inode *inode = d_backing_inode(path.dentry);

		if (S_ISBLK(inode->i_mode))
			bdev_statx_dioalign(inode, stat);
	}

	path_put(&path);
	if (retry_estale(error, lookup_flags)) {
		lookup_flags |= LOOKUP_REVAL;
		goto retry;
	}
out:
	return error;
}

vfs_getattr()

< fs/stat.c >

int vfs_getattr(const struct path *path, struct kstat *stat,
		u32 request_mask, unsigned int query_flags)
{
	int retval;
	
    /* 对path 指向的文件进行安全检 */
	retval = security_inode_getattr(path);
	if (retval)
		return retval;
    /* security_inode_getattr已经检查过安全了,因此这里通过非安全的方法获得文件的基本扩展属性 */
	return vfs_getattr_nosec(path, stat, request_mask, query_flags);
}

vfs_getattr_nosec()

< fs/stat.c >

int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
		      u32 request_mask, unsigned int query_flags)
{
	struct user_namespace *mnt_userns;
    /* 通过path转换为inode */
	struct inode *inode = d_backing_inode(path->dentry);

	memset(stat, 0, sizeof(*stat));
	stat->result_mask |= STATX_BASIC_STATS;
	query_flags &= AT_STATX_SYNC_TYPE;

	/* allow the fs to override these if it really wants to */
	/* SB_NOATIME means filesystem supplies dummy atime value */
	if (inode->i_sb->s_flags & SB_NOATIME)
		stat->result_mask &= ~STATX_ATIME;

	/*
	 * Note: If you add another clause to set an attribute flag, please
	 * update attributes_mask below.
	 */
	if (IS_AUTOMOUNT(inode))
		stat->attributes |= STATX_ATTR_AUTOMOUNT;

	if (IS_DAX(inode))
		stat->attributes |= STATX_ATTR_DAX;

	stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT |
				  STATX_ATTR_DAX);

	mnt_userns = mnt_user_ns(path->mnt);
	if (inode->i_op->getattr)           /* 通过文件系统注册的接口,获取属性 */
		return inode->i_op->getattr(mnt_userns, path, stat,
					    request_mask, query_flags);

    /* 文件系统没有提供接口,就使用通用接口获取 */
	generic_fillattr(mnt_userns, inode, stat);
	return 0;
}

ext4文件系统注册接口

< fs/ext4/namei.c >

const struct inode_operations ext4_dir_inode_operations = {
	.create		= ext4_create,
	.lookup		= ext4_lookup,
	.link		= ext4_link,
	.unlink		= ext4_unlink,
	.symlink	= ext4_symlink,
	.mkdir		= ext4_mkdir,
	.rmdir		= ext4_rmdir,
	.mknod		= ext4_mknod,
	.tmpfile	= ext4_tmpfile,
	.rename		= ext4_rename2,
	.setattr	= ext4_setattr,
	.getattr	= ext4_getattr,
	.listxattr	= ext4_listxattr,
	.get_acl	= ext4_get_acl,
	.set_acl	= ext4_set_acl,
	.fiemap         = ext4_fiemap,
	.fileattr_get	= ext4_fileattr_get,
	.fileattr_set	= ext4_fileattr_set,
};

ext4_getattr()

< fs/ext4/inode.c >

int ext4_getattr(struct user_namespace *mnt_userns, const struct path *path,
		 struct kstat *stat, u32 request_mask, unsigned int query_flags)
{
	struct inode *inode = d_inode(path->dentry);
	struct ext4_inode *raw_inode;
	struct ext4_inode_info *ei = EXT4_I(inode);
	unsigned int flags;

	/* 判断是否需要获取i_crtime */
	if ((request_mask & STATX_BTIME) &&
	    EXT4_FITS_IN_INODE(raw_inode, ei, i_crtime)) {   /*  也要判断ext4是否有i_crtime */
		stat->result_mask |= STATX_BTIME;
		stat->btime.tv_sec = ei->i_crtime.tv_sec;
		stat->btime.tv_nsec = ei->i_crtime.tv_nsec;
	}

	/*
	 * Return the DIO alignment restrictions if requested.  We only return
	 * this information when requested, since on encrypted files it might
	 * take a fair bit of work to get if the file wasn't opened recently.
	 */
	if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) {
		u32 dio_align = ext4_dio_alignment(inode);

		stat->result_mask |= STATX_DIOALIGN;
		if (dio_align == 1) {
			struct block_device *bdev = inode->i_sb->s_bdev;

			/* iomap defaults */
			stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
			stat->dio_offset_align = bdev_logical_block_size(bdev);
		} else {
			stat->dio_mem_align = dio_align;
			stat->dio_offset_align = dio_align;
		}
	}

	flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
	if (flags & EXT4_APPEND_FL)
		stat->attributes |= STATX_ATTR_APPEND;
	if (flags & EXT4_COMPR_FL)
		stat->attributes |= STATX_ATTR_COMPRESSED;
	if (flags & EXT4_ENCRYPT_FL)
		stat->attributes |= STATX_ATTR_ENCRYPTED;
	if (flags & EXT4_IMMUTABLE_FL)
		stat->attributes |= STATX_ATTR_IMMUTABLE;
	if (flags & EXT4_NODUMP_FL)
		stat->attributes |= STATX_ATTR_NODUMP;
	if (flags & EXT4_VERITY_FL)
		stat->attributes |= STATX_ATTR_VERITY;

	stat->attributes_mask |= (STATX_ATTR_APPEND |
				  STATX_ATTR_COMPRESSED |
				  STATX_ATTR_ENCRYPTED |
				  STATX_ATTR_IMMUTABLE |
				  STATX_ATTR_NODUMP |
				  STATX_ATTR_VERITY);

	generic_fillattr(mnt_userns, inode, stat);
	return 0;
}

可见,只要带着STATX_BTIME信息,无论stat,或者lstat,或者fstat 都能获取到i_crtime信息

当然,前提是使用statx系统调用

generic_fillattr() 分析

就是将inode信息填充到stat结构体

< fs/stat.c >

void generic_fillattr(struct user_namespace *mnt_userns, struct inode *inode,
		      struct kstat *stat)
{
	stat->dev = inode->i_sb->s_dev;
	stat->ino = inode->i_ino;
	stat->mode = inode->i_mode;
	stat->nlink = inode->i_nlink;
	stat->uid = i_uid_into_mnt(mnt_userns, inode);
	stat->gid = i_gid_into_mnt(mnt_userns, inode);
	stat->rdev = inode->i_rdev;
	stat->size = i_size_read(inode);
	stat->atime = inode->i_atime;
	stat->mtime = inode->i_mtime;
	stat->ctime = inode->i_ctime;
	stat->blksize = i_blocksize(inode);
	stat->blocks = inode->i_blocks;
}

至此,inode信息就完全获取到了

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值