进程proc文件介绍

进程proc文件

proc/${pid}/下是每个进程的proc信息,/proc/self比较特殊,哪个进程打开,对应链接到哪个进程。

在这里插入图片描述

1)内存相关:

  • status: 进程使用虚拟内存和物理内存(RSS)信息
  • statm: 内存详细信息;
  • maps: 段内核映射VMA以及加载动态库信息;(映射分为文件映射和匿名映射)
  • maps_files: 映射文件信息
  • smaps: 是maps的扩展,比maps更详细,展示内核每个VMA的信息,可以看堆的实际物理内存。
  • oom_xxx: oom评分;

2)调度相关

  • sched

  • sched_stat

  • stack: 可以追踪死锁问题,死循环问题,打印进程的调用栈;

3)文件IO

  • fd:打开文件信息

  • fdinfo

  • mountinfo

  • mounts

  • mountstat

  • io: io读写统计信息

  • limits: 进程的限制参数:最常用coredump的文件大小。

4)系统、状态相关

  • net: 网络参数

  • ns: 使用的命名空间;

  • cwd: 当前运行目录

  • cmdline:启动参数

  • exe:可执行程序

  • environ: 进程的环境变量

  • coredump_filter: 不产生coredump的信号。

  • comm: 进程名

  • cgroup: 控制组信息

  • setgroups:允许设置组;

  • loginuid: 登录的uid信息。

内核部分

进程相关的proc文件主要在proc/base.c定义。

在数组tid_base_stuff中定义线程的文件列表,tgid_base_stuff数组中定义进程的列表。

每一项代表一个proc文件,如果是目录需要定义一个一个inode_operations和一个file_operation。

static const struct pid_entry tid_base_stuff[] = {
	DIR("fd",        S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
	DIR("fdinfo",    S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
	DIR("ns",	 S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
#ifdef CONFIG_NET
	DIR("net",        S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
#endif
	REG("environ",   S_IRUSR, proc_environ_operations),
	REG("auxv",      S_IRUSR, proc_auxv_operations),
	ONE("status",    S_IRUGO, proc_pid_status),
	ONE("personality", S_IRUSR, proc_pid_personality),
	ONE("limits",	 S_IRUGO, proc_pid_limits),
#ifdef CONFIG_SCHED_DEBUG
	REG("sched",     S_IRUGO|S_IWUSR, proc_pid_sched_operations),
#endif
	NOD("comm",      S_IFREG|S_IRUGO|S_IWUSR,
			 &proc_tid_comm_inode_operations,
			 &proc_pid_set_comm_operations, {}),
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
	ONE("syscall",   S_IRUSR, proc_pid_syscall),
#endif
	REG("cmdline",   S_IRUGO, proc_pid_cmdline_ops),
	ONE("stat",      S_IRUGO, proc_tid_stat),
	ONE("statm",     S_IRUGO, proc_pid_statm),
	REG("maps",      S_IRUGO, proc_pid_maps_operations),
#ifdef CONFIG_PROC_CHILDREN
	REG("children",  S_IRUGO, proc_tid_children_operations),
#endif
#ifdef CONFIG_NUMA
	REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations),
#endif
	REG("mem",       S_IRUSR|S_IWUSR, proc_mem_operations),
	LNK("cwd",       proc_cwd_link),
	LNK("root",      proc_root_link),
	LNK("exe",       proc_exe_link),
	REG("mounts",    S_IRUGO, proc_mounts_operations),
	REG("mountinfo",  S_IRUGO, proc_mountinfo_operations),
#ifdef CONFIG_PROC_PAGE_MONITOR
	REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
	REG("smaps",     S_IRUGO, proc_pid_smaps_operations),
	REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
	REG("pagemap",    S_IRUSR, proc_pagemap_operations),
#endif
#ifdef CONFIG_SECURITY
	DIR("attr",      S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
#endif
#ifdef CONFIG_KALLSYMS
	ONE("wchan",     S_IRUGO, proc_pid_wchan),
#endif
#ifdef CONFIG_STACKTRACE
	ONE("stack",      S_IRUSR, proc_pid_stack),
#endif
#ifdef CONFIG_SCHED_INFO
	ONE("schedstat", S_IRUGO, proc_pid_schedstat),
#endif
#ifdef CONFIG_LATENCYTOP
	REG("latency",  S_IRUGO, proc_lstats_operations),
#endif
#ifdef CONFIG_PROC_PID_CPUSET
	ONE("cpuset",    S_IRUGO, proc_cpuset_show),
#endif
#ifdef CONFIG_CGROUPS
	ONE("cgroup",  S_IRUGO, proc_cgroup_show),
#endif
	ONE("oom_score", S_IRUGO, proc_oom_score),
	REG("oom_adj",   S_IRUGO|S_IWUSR, proc_oom_adj_operations),
	REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
#ifdef CONFIG_AUDITSYSCALL
	REG("loginuid",  S_IWUSR|S_IRUGO, proc_loginuid_operations),
	REG("sessionid",  S_IRUGO, proc_sessionid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
	REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
	REG("fail-nth", 0644, proc_fail_nth_operations),
#endif
#ifdef CONFIG_TASK_IO_ACCOUNTING
	ONE("io",	S_IRUSR, proc_tid_io_accounting),
#endif
#ifdef CONFIG_USER_NS
	REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
	REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
	REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
	REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
#endif
#ifdef CONFIG_LIVEPATCH
	ONE("patch_state",  S_IRUSR, proc_pid_patch_state),
#endif
};

从proc跟文件的初始化跟踪,这些文件是如何初始化的。

proc根文件的初始化函数: <proc/root.c>

void __init proc_root_init(void)
{
	proc_init_kmemcache();
	set_proc_pid_nlink();
	proc_self_init();
	proc_thread_self_init();
	proc_symlink("mounts", NULL, "self/mounts");

	proc_net_init();
	proc_mkdir("fs", NULL);
	proc_mkdir("driver", NULL);
	proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */
#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
	/* just give it a mountpoint */
	proc_create_mount_point("openprom");
#endif
	proc_tty_init();
	proc_mkdir("bus", NULL);
	proc_sys_init();

	register_filesystem(&proc_fs_type);
}

根目录的proc_root

struct proc_dir_entry proc_root = {
	.low_ino	= PROC_ROOT_INO, 
	.namelen	= 5, 
	.mode		= S_IFDIR | S_IRUGO | S_IXUGO, 
	.nlink		= 2, 
	.refcnt		= REFCOUNT_INIT(1),
	.proc_iops	= &proc_root_inode_operations, 
	.proc_fops	= &proc_root_operations,
	.parent		= &proc_root,
	.subdir		= RB_ROOT,
	.name		= "/proc",
};

inode的lookup函数对应proc_root_lookup,查找该目录下的inode列表:

static const struct inode_operations proc_root_inode_operations = {
	.lookup		= proc_root_lookup,
	.getattr	= proc_root_getattr,
};

proc_root_lookup中会创建各pid对应的目录,调用关系如下:

proc_root_lookup

-> proc_pid_lookup

->-> proc_pid_instantiate

proc_pid_instantiate创建pid对应的proc目录,指定该目录的inode_operations和file_operations为proc_tgid_base_inode_operations和proc_tgid_base_operations。

static struct dentry *proc_pid_instantiate(struct dentry * dentry,
				   struct task_struct *task, const void *ptr)
{
	struct inode *inode;

	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
	if (!inode)
		return ERR_PTR(-ENOENT);

	inode->i_op = &proc_tgid_base_inode_operations;
	inode->i_fop = &proc_tgid_base_operations;
	inode->i_flags|=S_IMMUTABLE;

	set_nlink(inode, nlink_tgid);
	pid_update_inode(task, inode);

	d_set_d_op(dentry, &pid_dentry_operations);
	return d_splice_alias(inode, dentry);
}

proc_tgid_base_inode_operations的lookup函数和proc_tgid_base_operations的iterate_shared 都会遍历数组tgid_base_stuff。

static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
{
	return proc_pident_readdir(file, ctx,
				   tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
}

static const struct file_operations proc_tgid_base_operations = {
	.read		= generic_read_dir,
	.iterate_shared	= proc_tgid_base_readdir,
	.llseek		= generic_file_llseek,
};

static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
	return proc_pident_lookup(dir, dentry,
				  tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
}

重点文件的实现

内存

  • maps

包括匿名映射,和文件映射,内核对应VMA结构,每个VMA的起始地址和结束地址。

56018dfba000-56018e0c2000 r-xp 00000000 08:01 2359769                    /usr/bin/bash
56018e2c1000-56018e2c5000 r--p 00107000 08:01 2359769                    /usr/bin/bash
56018e2c5000-56018e2ce000 rw-p 0010b000 08:01 2359769                    /usr/bin/bash
56018e2ce000-56018e2d8000 rw-p 00000000 00:00 0
56018efcc000-56018f027000 rw-p 00000000 00:00 0                          [heap]
7f93f9542000-7f93f954d000 r-xp 00000000 08:01 2362059                    /usr/lib64/libnss_files-2.28.so
 ......
7f93fa174000-7f93fa175000 rw-p 00000000 00:00 0
7fffc82fa000-7fffc831b000 rw-p 00000000 00:00 0                          [stack]
7fffc83a1000-7fffc83a3000 r--p 00000000 00:00 0                          [vvar]
7fffc83a3000-7fffc83a5000 r-xp 00000000 00:00 0                          [vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]

内核文件fs/proc/task_mmu.c

  • smaps

此目录有每个虚拟段对应使用的物理地址RSS和PSS信息。

7fffc83a3000-7fffc83a5000 r-xp 00000000 00:00 0                          [vdso]
Size:                  8 kB
Rss:                   4 kB
Pss:                   0 kB
Shared_Clean:          4 kB
Shared_Dirty:          0 kB
Private_Clean:         0 kB
Private_Dirty:         0 kB
Referenced:            4 kB
Anonymous:             0 kB
AnonHugePages:         0 kB
ShmemPmdMapped:        0 kB
Shared_Hugetlb:        0 kB
Private_Hugetlb:       0 kB
Swap:                  0 kB
SwapPss:               0 kB
KernelPageSize:        4 kB
MMUPageSize:           4 kB
Locked:                0 kB
VmFlags: rd ex mr mw me de
  • statm
[root@a82dc075718f self]# cat statm
3031 824 681 264 0 165 0

内存信息, 分别表示含义:

int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
			struct pid *pid, struct task_struct *task)
{
	unsigned long size = 0, resident = 0, shared = 0, text = 0, data = 0;
	struct mm_struct *mm = get_task_mm(task);

	if (mm) {
		size = task_statm(mm, &shared, &text, &data, &resident);
		mmput(mm);
	}
	/*
	 * For quick read, open code by putting numbers directly
	 * expected format is
	 * seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n",
	 *               size, resident, shared, text, data);
	 */
	seq_put_decimal_ull(m, "", size);
	seq_put_decimal_ull(m, " ", resident);
	seq_put_decimal_ull(m, " ", shared);
	seq_put_decimal_ull(m, " ", text);
	seq_put_decimal_ull(m, " ", 0);
	seq_put_decimal_ull(m, " ", data);
	seq_put_decimal_ull(m, " ", 0);
	seq_putc(m, '\n');

	return 0;
}

size: 虚拟内存;

resident:物理内存;

share: 共享内存,so等;

text: 代码段;

data: 数据段和栈。

  • status
[root@a82dc075718f self]# cat status
Name:   bash
Umask:  0022
State:  S (sleeping)
Tgid:   1
Ngid:   0
Pid:    1
PPid:   0
TracerPid:      0
Uid:    0       0       0       0
Gid:    0       0       0       0
FDSize: 256
Groups:
NStgid: 1
NSpid:  1
NSpgid: 1
NSsid:  1
VmPeak:    12160 kB
VmSize:    12124 kB
VmLck:         0 kB
VmPin:         0 kB
VmHWM:      3296 kB
VmRSS:      3296 kB
RssAnon:             572 kB
RssFile:            2724 kB
RssShmem:              0 kB
VmData:      528 kB
VmStk:       132 kB
VmExe:      1056 kB
VmLib:      2156 kB
VmPTE:        52 kB
VmPMD:        12 kB
VmSwap:        0 kB
HugetlbPages:          0 kB
Threads:        1
SigQ:   0/7864
SigPnd: 0000000000000000
ShdPnd: 0000000000000000
SigBlk: 0000000000010000
SigIgn: 0000000000380004
SigCgt: 000000004b817efb
CapInh: 00000000a80425fb
CapPrm: 00000000a80425fb
CapEff: 00000000a80425fb
CapBnd: 00000000a80425fb
CapAmb: 0000000000000000
Seccomp:        2
Speculation_Store_Bypass:       thread force mitigated
Cpus_allowed:   00000000,00000000,00000000,00000003
Cpus_allowed_list:      0-1
Mems_allowed:   1
Mems_allowed_list:      0
voluntary_ctxt_switches:        1116
nonvoluntary_ctxt_switches:     76

VmSize: 虚拟内存总数;

VmRSS:物理内存,共享内存没有在多个进程之间平均。另外有一个PSS,是多进程平分共享内存的占用。

RssAnon:匿名映射大小;

RssFile: 文件映射大小。

调度

  • stack

调用栈信息,可以一定程度跟踪死锁问题,用户态栈空间。

  • sched

调度信息,以及优先级。

[root@a82dc075718f self]# cat sched
bash (3996, #threads: 1)
-------------------------------------------------------------------
se.exec_start                                :      15948317.402577
se.vruntime                                  :           919.743657
se.sum_exec_runtime                          :           248.854012
se.nr_migrations                             :                  301
nr_switches                                  :                  771
nr_voluntary_switches                        :                  724
nr_involuntary_switches                      :                   47
se.load.weight                               :              1048576
se.avg.load_sum                              :               313658
se.avg.util_sum                              :               311654
se.avg.load_avg                              :                    6
se.avg.util_avg                              :                    6
se.avg.last_update_time                      :       15948317402577
policy                                       :                    0
prio                                         :                  120
clock-delta                                  :                  204
  • schedstat

    [root@a82dc075718f self]# cat schedstat
    302087201 23041833 1038
    

3个值分别是

sum_exec_runtime: 总共执行的时间;

run_delay:在run_queue中等待调度的总时长;

pcount: 进程调度到该进程的次数。

schedstat文件读取schedinfo信息,需要打开CONFIG_SCHED_INFO宏

#ifdef CONFIG_SCHED_INFO
/*
 * Provides /proc/PID/schedstat
 */
static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
			      struct pid *pid, struct task_struct *task)
{
	if (unlikely(!sched_info_on()))
		seq_printf(m, "0 0 0\n");
	else
		seq_printf(m, "%llu %llu %lu\n",
		   (unsigned long long)task->se.sum_exec_runtime,
		   (unsigned long long)task->sched_info.run_delay,
		   task->sched_info.pcount);

	return 0;
}
#endif

sched_info数据结构

struct sched_info {
#ifdef CONFIG_SCHED_INFO
	/* Cumulative counters: */

	/* # of times we have run on this CPU: */
	unsigned long			pcount;

	/* Time spent waiting on a runqueue: */
	unsigned long long		run_delay;

	/* Timestamps: */

	/* When did we last run on a CPU? */
	unsigned long long		last_arrival;

	/* When were we last queued to run? */
	unsigned long long		last_queued;

#endif /* CONFIG_SCHED_INFO */
};

pcount: 调度到该进程的次数。

run_delay: 在队列中等待的总时间。

last_arrival: 上一次执行的时间;

last_queued: 上一次进入队列的时机。

sched info只打印了run_delay和pcount。

  • stat

这个打印太多,无法阅读,对着函数**do_task_stat<proc/arry.c>**解释即可。有调度信息,也有内存段信息,很杂乱。

  • wchan
cat /proc/2406/wchan
futex_wait_queue_meadmin

进程睡眠时的kernel 当前运行的函数。

static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
			  struct pid *pid, struct task_struct *task)
{
	unsigned long wchan;
	char symname[KSYM_NAME_LEN];

	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
		goto print0;

	wchan = get_wchan(task);
	if (wchan && !lookup_symbol_name(wchan, symname)) {
		seq_puts(m, symname);
		return 0;
	}

print0:
	seq_putc(m, '0');
	return 0;
}
#endif /* CONFIG_KALLSYMS */

io

  • io

读写文件的字符,次数,实际从磁盘读写的字节数。

测试命令:
dd if=/dev/zero of=/tmp/test.dat &

test:/tmp # cat /proc/3828/io
rchar: 323934931         // 读出的总字节数,read或者pread()中的长度参数总和(pagecache中统计而来,不代表实际磁盘的读入)
wchar: 323929600        // 写入的总字节数,write或者pwrite中的长度参数总和
syscr: 632687               // read()或者pread()总的调用次数
syscw: 632675              // write()或者pwrite()总的调用次数
read_bytes: 0                      // 实际从磁盘中读取的字节总数   (这里if=/dev/zero 所以没有实际的读入字节数)
write_bytes: 323932160      // 实际写入到磁盘中的字节总数
cancelled_write_bytes: 0     // 由于截断pagecache导致应该发生而没有发生的写入字节数(可能为负数)

打开io统计,要开内核CONFIG_TASK_IO_ACCOUNTING, CONFIG_TASK_XACCT宏。

#ifdef CONFIG_TASK_IO_ACCOUNTING
	ONE("io",	S_IRUSR, proc_tid_io_accounting),
#endif

#ifdef CONFIG_TASK_IO_ACCOUNTING
static int do_io_accounting(struct task_struct *task, struct seq_file *m, int whole)
{
	struct task_io_accounting acct = task->ioac;
	unsigned long flags;
	int result;

	result = mutex_lock_killable(&task->signal->cred_guard_mutex);
	if (result)
		return result;

	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
		result = -EACCES;
		goto out_unlock;
	}

	if (whole && lock_task_sighand(task, &flags)) {
		struct task_struct *t = task;

		task_io_accounting_add(&acct, &task->signal->ioac);
		while_each_thread(task, t)
			task_io_accounting_add(&acct, &t->ioac);

		unlock_task_sighand(task, &flags);
	}
	seq_printf(m,
		   "rchar: %llu\n"
		   "wchar: %llu\n"
		   "syscr: %llu\n"
		   "syscw: %llu\n"
		   "read_bytes: %llu\n"
		   "write_bytes: %llu\n"
		   "cancelled_write_bytes: %llu\n",
		   (unsigned long long)acct.rchar,
		   (unsigned long long)acct.wchar,
		   (unsigned long long)acct.syscr,
		   (unsigned long long)acct.syscw,
		   (unsigned long long)acct.read_bytes,
		   (unsigned long long)acct.write_bytes,
		   (unsigned long long)acct.cancelled_write_bytes);
	result = 0;

out_unlock:
	mutex_unlock(&task->signal->cred_guard_mutex);
	return result;
}

task_struct 有io统计的成员。

struct task_struct {
	......
	struct task_io_accounting	ioac;
}
struct task_io_accounting {
#ifdef CONFIG_TASK_XACCT
	/* bytes read */
	u64 rchar;
	/*  bytes written */
	u64 wchar;
	/* # of read syscalls */
	u64 syscr;
	/* # of write syscalls */
	u64 syscw;
#endif /* CONFIG_TASK_XACCT */

#ifdef CONFIG_TASK_IO_ACCOUNTING
	/*
	 * The number of bytes which this task has caused to be read from
	 * storage.
	 */
	u64 read_bytes;

	/*
	 * The number of bytes which this task has caused, or shall cause to be
	 * written to disk.
	 */
	u64 write_bytes;

	/*
	 * A task can cause "negative" IO too.  If this task truncates some
	 * dirty pagecache, some IO which another task has been accounted for
	 * (in its write_bytes) will not be happening.  We _could_ just
	 * subtract that from the truncating task's write_bytes, but there is
	 * information loss in doing that.
	 */
	u64 cancelled_write_bytes;
#endif /* CONFIG_TASK_IO_ACCOUNTING */
};
  • fd
    打开文件以及fd的对应关系
[root@a82dc075718f fd]# ls -l
total 0
lrwx------ 1 root root 64 Mar 12 03:39 0 -> /dev/pts/0
lrwx------ 1 root root 64 Mar 12 03:39 1 -> /dev/pts/0
lrwx------ 1 root root 64 Mar 12 03:39 2 -> /dev/pts/0
lrwx------ 1 root root 64 Mar 12 07:40 255 -> /dev/pts/0
  • limits

进程的限制参数, 使用ulimit可以修改限制参数,如ulimit -c unlimit,设置coredump文件限制大小。

cat limits
Limit                     Soft Limit           Hard Limit           Units
Max cpu time              unlimited            unlimited            seconds
Max file size             unlimited            unlimited            bytes
Max data size             unlimited            unlimited            bytes
Max stack size            8388608              unlimited            bytes
Max core file size        0                    unlimited            bytes
Max resident set          unlimited            unlimited            bytes
Max processes             unlimited            unlimited            processes
Max open files            1048576              1048576              files
Max locked memory         83968000             83968000             bytes
Max address space         unlimited            unlimited            bytes
Max file locks            unlimited            unlimited            locks
Max pending signals       7864                 7864                 signals
Max msgqueue size         819200               819200               bytes
Max nice priority         0                    0
Max realtime priority     0                    0
Max realtime timeout      unlimited            unlimited            us

参考资料

https://www.cnblogs.com/aspirs/p/13896571.html

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值