文件系统初始化
/********************************************************************/
init/main.cstart_kernel:
vfs_caches_init_early();
vfs_caches_init(totalram_pages);
void __init vfs_caches_init_early(void)
{
dcache_init_early();
inode_init_early();
}
/********************************************************************/
dentry cache and inode cache: hash table
static void __init dcache_init_early(void)
{
unsigned int loop;
/* If hashes are distributed across NUMA nodes, defer
* hash allocation until vmalloc space is available.
*/
if (hashdist)
return;
dentry_hashtable =
alloc_large_system_hash("Dentry cache",
sizeof(struct hlist_bl_head),
dhash_entries,
13,
HASH_EARLY,
&d_hash_shift,
&d_hash_mask,
0);
for (loop = 0; loop < (1U << d_hash_shift); loop++)
INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
}
void __init inode_init_early(void)
{
unsigned int loop;
/* If hashes are distributed across NUMA nodes, defer
* hash allocation until vmalloc space is available.
*/
if (hashdist)
return;
inode_hashtable =
alloc_large_system_hash("Inode-cache",
sizeof(struct hlist_head),
ihash_entries,
14,
HASH_EARLY,
&i_hash_shift,
&i_hash_mask,
0);
for (loop = 0; loop < (1U << i_hash_shift); loop++)
INIT_HLIST_HEAD(&inode_hashtable[loop]);
}
/********************************************************************/
void __init vfs_caches_init(unsigned long mempages)
{
unsigned long reserve;
/* Base hash sizes on available memory, with a reserve equal to
150% of current kernel size */
reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
mempages -= reserve;
/*slab cache for names_cache文件名*/
names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
dcache_init();
inode_init();
files_init(mempages);
mnt_init();
bdev_cache_init();
chrdev_init();
}
/*dcache_init_early 和 dcache_init的区别是什么?
*dcache_init_early 比 dcache_init少了kmem_cache_create
*inode_init_early和inode_init同样如此
*/
static void __init dcache_init(void)
{
unsigned int loop;
/*
* A constructor could be added for stable state like the lists,
* but it is probably not worth it because of the cache nature
* of the dcache.
*/
dentry_cache = KMEM_CACHE(dentry,
SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
/* Hash may have been set up in dcache_init_early */
if (!hashdist)
return;
dentry_hashtable =
alloc_large_system_hash("Dentry cache",
sizeof(struct hlist_bl_head),
dhash_entries,
13,
0,
&d_hash_shift,
&d_hash_mask,
0);
for (loop = 0; loop < (1U << d_hash_shift); loop++)
INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
}
/*有关file的操作是 分配 slab cache of file and 文件的个数*/
void __init files_init(unsigned long mempages)
{
unsigned long n;
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
/*
* One file with associated inode and dcache is very roughly 1K.
* Per default don't use more than 10% of our memory for files.
*/
n = (mempages * (PAGE_SIZE / 1024)) / 10;
files_stat.max_files = max_t(unsigned long, n, NR_FILE);
files_defer_init();
lg_lock_init(files_lglock);
percpu_counter_init(&nr_files, 0);
}
/********************************************************************/
void __init mnt_init(void)
{
unsigned u;
/*分配 slab cache for struct mount*/
mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
err = sysfs_init();
fs_kobj = kobject_create_and_add("fs", NULL);
init_rootfs();
init_mount_tree();
}
int __init sysfs_init(void)
{
int err = -ENOMEM;
sysfs_dir_cachep = kmem_cache_create("sysfs_dir_cache",
sizeof(struct sysfs_dirent),
0, 0, NULL);
err = sysfs_inode_init();
err = register_filesystem(&sysfs_fs_type);
if (!err) {
sysfs_mnt = kern_mount(&sysfs_fs_type);
}
}
file_system_type
/*先看一下register_filesystem(&sysfs_fs_type)
* and kern_mount(&sysfs_fs_type);*/
/*从函数register_filesystem可以看出所说的注册文件系统:就是根据文件系统的名字在
*file_systems指向的单向链表中查找,如果没有在加入该list中,已有这个名字【也就是已注册】
*返回busy flag.
*/
crash> file_system_type
struct file_system_type {
const char *name;
int fs_flags;
struct dentry *(*mount)(struct file_system_type *, int, const char *, void *);
void (*kill_sb)(struct super_block *);
struct module *owner;
struct file_system_type *next;
struct hlist_head fs_supers;
struct lock_class_key s_lock_key;
struct lock_class_key s_umount_key;
struct lock_class_key s_vfs_rename_key;
struct lock_class_key i_lock_key;
struct lock_class_key i_mutex_key;
struct lock_class_key i_mutex_dir_key;
}
SIZE: 28
int register_filesystem(struct file_system_type * fs)
{
int res = 0;
struct file_system_type ** p;
p = find_filesystem(fs->name, strlen(fs->name));
if (*p)
res = -EBUSY;
else
*p = fs;
return res;
}
static struct file_system_type **find_filesystem(const char *name, unsigned len)
{
struct file_system_type **p;
for (p=&file_systems; *p; p=&(*p)->next)
if (strlen((*p)->name) == len &&
strncmp((*p)->name, name, len) == 0)
break;
return p;
}
crash> sym file_systems
c0815088 (b) file_systems
crash> file_systems
file_systems = $3 = (struct file_system_type *) 0xc06e3f80 <sysfs_fs_type>
crash> list file_system_type.next -s file_system_type.name 0xc06e3f80
c06e3f80
name = 0xc061108d "sysfs"
c06e4600
name = 0xc0603ace "rootfs"
c06e3a74
name = 0xc06467f5 "bdev"
c06e3d00
name = 0xc0652628 "proc"
c06e2c88
name = 0xc06012c2 "tmpfs"
c06fdd60
name = 0xc06583c1 "sockfs"
c06f1540
name = 0xc063694f "usbfs"
c06e388c
name = 0xc060382c "pipefs"
c06e3bd8
name = 0xc06043ef "anon_inodefs"
c06e3ff4
name = 0xc06057e2 "devpts"
c06e4154
name = 0xc060759d "ext3"
c06e4178
name = 0xc0608361 "ext2"
c06e41c4
name = 0xc0608fe5 "ext4"
c06e442c
name = 0xc060c8ec "cramfs"
c06e4460
name = 0xc060c8ed "ramfs"
c06e4674
name = 0xc060d234 "vfat"
c06e4690
name = 0xc060d239 "msdos"
c06e47a0
name = 0xc060d3fb "fuseblk"
c06e4784
name = 0xc05fa8d3 "fuse"
c06e47ec
name = 0xc060d44c "fusectl"
/*这里加 -h参数都解不出来?另外file_systems表示该变量存储在那个位置*/
crash> list file_system_type.next -s file_system_type.name file_systems
c0815088
name = 0xc06e3f80 <sysfs_fs_type> "\215\020a\300"
c0815098
name = 0xc0815098 <namespace_sem+12> "\230P\201\300\230P\201\300\024"
crash> sym file_systems
c0815088 (b) file_systems
crash> file_systems /*变量的内容是struct file_system_type *类型,list后面参数说的肯定是这个意思*/
file_systems = $3 = (struct file_system_type *) 0xc06e3f80 <sysfs_fs_type>
/********************************************************************/
从下面我们可以 结构体 mount/vfsmount/dentry/super_block/file_system_type/之间的关系
crash> struct mountstruct mount {
struct list_head mnt_hash;
struct mount *mnt_parent;
struct dentry *mnt_mountpoint;
struct vfsmount mnt;
struct mnt_pcp *mnt_pcp;
atomic_t mnt_longterm;
struct list_head mnt_mounts;
struct list_head mnt_child;
struct list_head mnt_instance;
const char *mnt_devname;
struct list_head mnt_list;
struct list_head mnt_expire;
struct list_head mnt_share;
struct list_head mnt_slave_list;
struct list_head mnt_slave;
struct mount *mnt_master;
struct mnt_namespace *mnt_ns;
struct hlist_head mnt_fsnotify_marks;
__u32 mnt_fsnotify_mask;
int mnt_id;
int mnt_group_id;
int mnt_expiry_mark;
int mnt_pinned;
int mnt_ghosts;
}
SIZE: 140
crash> vfsmount
struct vfsmount {
struct dentry *mnt_root;
struct super_block *mnt_sb;
int mnt_flags;
}
#define kern_mount(type) kern_mount_data(type, NULL)
struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
{
struct vfsmount *mnt;
mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, data);
return mnt;
}
struct vfsmount *
vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
{
struct mount *mnt;
struct dentry *root;
mnt = alloc_vfsmnt(name);
if (flags & MS_KERNMOUNT)
mnt->mnt.mnt_flags = MNT_INTERNAL;
root = mount_fs(type, flags, name, data);
mnt->mnt.mnt_root = root;
mnt->mnt.mnt_sb = root->d_sb;
mnt->mnt_mountpoint = mnt->mnt.mnt_root;
mnt->mnt_parent = mnt;
br_write_lock(vfsmount_lock);
list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
br_write_unlock(vfsmount_lock);
return &mnt->mnt;
}
struct dentry *
mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
{
struct dentry *root;
struct super_block *sb;
char *secdata = NULL;
int error = -ENOMEM;
/*这里调用具体文件系统的mount函数创建了dentry and super_block*/
root = type->mount(type, flags, name, data);
sb = root->d_sb;
return root;
}
/*bdi是个什么东西?该函数只是注册了文件系统rootfs_fs_type*/
struct mnt_namespace {
atomic_t count;
struct mount * root;
struct list_head list;
wait_queue_head_t poll;
int event;
};
struct path {
struct vfsmount *mnt;
struct dentry *dentry;
};
/*
* A structure to contain pointers to all per-process
* namespaces - fs (mount), uts, network, sysvipc, etc.
*
* 'count' is the number of tasks holding a reference.
* The count for each namespace, then, will be the number
* of nsproxies pointing to it, not the number of tasks.
*
* The nsproxy is shared by tasks which share all namespaces.
* As soon as a single namespace is cloned or unshared, the
* nsproxy is copied.
*/
struct nsproxy {
atomic_t count;
struct uts_namespace *uts_ns;
struct ipc_namespace *ipc_ns;
struct mnt_namespace *mnt_ns;
struct pid_namespace *pid_ns;
struct net *net_ns;
};
rootfs
int __init init_rootfs(void)
{
int err;
err = bdi_init(&ramfs_backing_dev_info);
if (err)
return err;
err = register_filesystem(&rootfs_fs_type);
if (err)
bdi_destroy(&ramfs_backing_dev_info);
return err;
}
static void __init init_mount_tree(void)
{
struct vfsmount *mnt;
struct mnt_namespace *ns;
struct path root;
mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
ns = create_mnt_ns(mnt);
init_task.nsproxy->mnt_ns = ns;
get_mnt_ns(ns);
root.mnt = mnt;
root.dentry = mnt->mnt_root;
/*当前工作路径 and 根工作路径*/
set_fs_pwd(current->fs, &root);
set_fs_root(current->fs, &root);
}
static struct file_system_type rootfs_fs_type = {
.name = "rootfs",
.mount = rootfs_mount,
.kill_sb = kill_litter_super,
};
static struct dentry *rootfs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
return mount_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super);
}
struct dentry *mount_nodev(struct file_system_type *fs_type,
int flags, void *data,
int (*fill_super)(struct super_block *, void *, int))
{
int error;
struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
s->s_flags = flags;
error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
s->s_flags |= MS_ACTIVE;
return dget(s->s_root);
}
int ramfs_fill_super(struct super_block *sb, void *data, int silent)
{
struct ramfs_fs_info *fsi;
struct inode *inode;
int err;
save_mount_options(sb, data);
fsi = kzalloc(sizeof(struct ramfs_fs_info), GFP_KERNEL);
sb->s_fs_info = fsi;
if (!fsi)
return -ENOMEM;
err = ramfs_parse_options(data, &fsi->mount_opts);
if (err)
return err;
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
sb->s_magic = RAMFS_MAGIC;
sb->s_op = &ramfs_ops;
sb->s_time_gran = 1;
inode = ramfs_get_inode(sb, NULL, S_IFDIR | fsi->mount_opts.mode, 0);
sb->s_root = d_make_root(inode);
if (!sb->s_root)
return -ENOMEM;
return 0;
}
struct dentry *d_make_root(struct inode *root_inode)
{
struct dentry *res = NULL;
if (root_inode) {
static const struct qstr name = { .name = "/", .len = 1 };
res = __d_alloc(root_inode->i_sb, &name);
if (res)
d_instantiate(res, root_inode);
else
iput(root_inode);
}
return res;
}
/*
* Prepare the namespace - decide what/where to mount, load ramdisks, etc.
*/
void __init prepare_namespace(void)
{
int is_floppy;
/* wait for the known devices to complete their probing
*
* Note: this is a potential source of long boot delays.
* For example, it is not atypical to wait 5 seconds here
* for the touchpad of a laptop to initialize.
*/
wait_for_device_probe();
if (saved_root_name[0]) {
root_device_name = saved_root_name;
if (!strncmp(root_device_name, "mtd", 3) ||
!strncmp(root_device_name, "ubi", 3)) {
mount_block_root(root_device_name, root_mountflags);
goto out;
}
ROOT_DEV = name_to_dev_t(root_device_name);
if (strncmp(root_device_name, "/dev/", 5) == 0)
root_device_name += 5;
}
/* wait for any asynchronous scanning to complete */
if ((ROOT_DEV == 0) && root_wait) {
printk(KERN_INFO "Waiting for root device %s...\n",
saved_root_name);
while (driver_probe_done() != 0 ||
(ROOT_DEV = name_to_dev_t(saved_root_name)) == 0)
msleep(100);
async_synchronize_full();
}
mount_root();
out:
devtmpfs_mount("dev");
sys_mount(".", "/", NULL, MS_MOVE, NULL);
sys_chroot((const char __user __force *)".");
}
void __init mount_root(void)
{
#ifdef CONFIG_BLOCK
create_dev("/dev/root", ROOT_DEV);
mount_block_root("/dev/root", root_mountflags);
#endif
}
注册的文件系统和 mount的文件系统
crash> mountMOUNT SUPERBLK TYPE DEVNAME DIRNAME
ee01d940 ee021c00 rootfs rootfs /
ee01d440 edb20000 ext4 /dev/root /
ee01de40 ee264000 tmpfs tmpfs /dev
ee01dda0 ed8f7400 devpts devpts /dev/pts
ee01dd00 ee022800 proc proc /proc
ee01dc60 edb61c00 sysfs sysfs /sys
ee01d300 ed40ec00 tmpfs tmpfs /mnt/secure
ee01d3a0 ed40e800 tmpfs tmpfs /mnt/asec
ed4d2f00 ed40e400 tmpfs tmpfs /mnt/obb
ed4d2e60 ed40e000 ext4 /dev/block/mmcblk1p3 /system
ed4d2dc0 edb0e400 ext4 /dev/block/mmcblk1p5 /data
ed4d2d20 edb7a800 ext4 /dev/block/mmcblk1p6 /cache
ed4d2c80 ed572c00 ext4 /dev/block/mmcblk1p7 /misc
ed4d26e0 e3ea4c00 vfat /dev/block/vold/179:1 /mnt/sdcard
ed4d2780 e3ea4c00 vfat /dev/block/vold/179:1 /mnt/secure/asec
ed4d2820 e3cb8800 tmpfs tmpfs /mnt/sdcard/.android_secure
ed4d2500 e8404c00 vfat /dev/block/vold/179:25 /mnt/sdcardEx
何时mount的文件系统
如sysfs:
system/core/init/readme.txt: mount sysfs sysfs /sys
system/core/init/init.c:mount("sysfs", "/sys", "sysfs", 0, NULL);
mkdir("/dev", 0755);
mkdir("/proc", 0755);
mkdir("/sys", 0755);
mount("tmpfs", "/dev", "tmpfs", MS_NOSUID, "mode=0755");
mkdir("/dev/pts", 0755);
mkdir("/dev/socket", 0755);
mount("devpts", "/dev/pts", "devpts", 0, NULL);
mount("proc", "/proc", "proc", 0, NULL);
mount("sysfs", "/sys", "sysfs", 0, NULL);
打开文件的进程和文件直接的关系
crash> task_struct | grep file
struct files_struct *files;
crash> files_struct
struct files_struct {
atomic_t count;
struct fdtable *fdt;
struct fdtable fdtab;
spinlock_t file_lock;
int next_fd;
unsigned long close_on_exec_init[1];
unsigned long open_fds_init[1];
struct file *fd_array[32];
}
crash> file
struct file {
union {
struct list_head fu_list;
struct rcu_head fu_rcuhead;
} f_u;
struct path f_path;
const struct file_operations *f_op;
spinlock_t f_lock;
int f_sb_list_cpu;
atomic_long_t f_count;
unsigned int f_flags;
fmode_t f_mode;
loff_t f_pos;
struct fown_struct f_owner;
const struct cred *f_cred;
struct file_ra_state f_ra;
u64 f_version;
void *private_data;
struct list_head f_ep_links;
struct list_head f_tfile_llink;
struct address_space *f_mapping;
}
SIZE: 152
crash> file_operations
struct file_operations {
struct module *owner;
loff_t (*llseek)(struct file *, loff_t, int);
ssize_t (*read)(struct file *, char *, size_t, loff_t *);
ssize_t (*write)(struct file *, const char *, size_t, loff_t *);
ssize_t (*aio_read)(struct kiocb *, const struct iovec *, unsigned long, loff_t);
ssize_t (*aio_write)(struct kiocb *, const struct iovec *, unsigned long, loff_t);
int (*readdir)(struct file *, void *, filldir_t);
unsigned int (*poll)(struct file *, struct poll_table_struct *);
long (*unlocked_ioctl)(struct file *, unsigned int, unsigned long);
long (*compat_ioctl)(struct file *, unsigned int, unsigned long);
int (*mmap)(struct file *, struct vm_area_struct *);
int (*open)(struct inode *, struct file *);
int (*flush)(struct file *, fl_owner_t);
int (*release)(struct inode *, struct file *);
int (*fsync)(struct file *, loff_t, loff_t, int);
int (*aio_fsync)(struct kiocb *, int);
int (*fasync)(int, struct file *, int);
int (*lock)(struct file *, int, struct file_lock *);
ssize_t (*sendpage)(struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
int (*check_flags)(int);
int (*flock)(struct file *, int, struct file_lock *);
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
int (*setlease)(struct file *, long, struct file_lock **);
long (*fallocate)(struct file *, int, loff_t, loff_t);
}
SIZE: 104