Linux top命令的cpu使用率和内存使用率_top命令的cpu占用率怎么算的

open("/proc/stat", O_RDONLY)            = 4
open("/proc/uptime", O_RDONLY)          = 5
open("/proc/1/stat", O_RDONLY)          = 7
open("/proc/1/statm", O_RDONLY)         = 7
open("/etc/nsswitch.conf", O_RDONLY|O_CLOEXEC) = 7
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 7
open("/lib64/libnss\_files.so.2", O_RDONLY|O_CLOEXEC) = 7
open("/etc/passwd", O_RDONLY|O_CLOEXEC) = 7
open("/proc/2/stat", O_RDONLY)          = 7
open("/proc/2/statm", O_RDONLY)         = 7
open("/proc/3/stat", O_RDONLY)          = 7
open("/proc/3/statm", O_RDONLY)         = 7
open("/proc/5/stat", O_RDONLY)          = 7
open("/proc/5/statm", O_RDONLY)         = 7
open("/proc/7/stat", O_RDONLY)          = 7
open("/proc/7/statm", O_RDONLY)         = 7
open("/proc/8/stat", O_RDONLY)          = 7
open("/proc/8/statm", O_RDONLY)         = 7
open("/proc/9/stat", O_RDONLY)          = 7
open("/proc/9/statm", O_RDONLY)         = 7
open("/proc/10/stat", O_RDONLY)         = 7
open("/proc/10/statm", O_RDONLY)        = 7
open("/proc/11/stat", O_RDONLY)         = 7
open("/proc/11/statm", O_RDONLY)        = 7
open("/proc/12/stat", O_RDONLY)         = 7
open("/proc/12/statm", O_RDONLY)        = 7
open("/proc/13/stat", O_RDONLY)         = 7
open("/proc/13/statm", O_RDONLY)        = 7
open("/proc/14/stat", O_RDONLY)         = 7
open("/proc/14/statm", O_RDONLY)        = 7
open("/proc/16/stat", O_RDONLY)         = 7
open("/proc/16/statm", O_RDONLY)        = 7
open("/proc/17/stat", O_RDONLY)         = 7
open("/proc/17/statm", O_RDONLY)        = 7
open("/proc/18/stat", O_RDONLY)         = 7
open("/proc/18/statm", O_RDONLY)        = 7
open("/proc/19/stat", O_RDONLY)         = 7
open("/proc/19/statm", O_RDONLY)        = 7
open("/proc/21/stat", O_RDONLY)         = 7
open("/proc/21/statm", O_RDONLY)        = 7
open("/proc/22/stat", O_RDONLY)         = 7
open("/proc/22/statm", O_RDONLY)        = 7
open("/proc/23/stat", O_RDONLY)         = 7
open("/proc/23/statm", O_RDONLY)        = 7
open("/proc/24/stat", O_RDONLY)         = 7

1.3 内核相关源码

// linux-3.10/include/linux/kernel\_stat.h

/\*
 \* 'kernel\_stat.h' contains the definitions needed for doing
 \* some kernel statistics (CPU usage, context switches ...),
 \* used by rstatd/perfmeter
 \*/

enum cpu\_usage\_stat {
	CPUTIME_USER,
	CPUTIME_NICE,
	CPUTIME_SYSTEM,
	CPUTIME_SOFTIRQ,
	CPUTIME_IRQ,
	CPUTIME_IDLE,
	CPUTIME_IOWAIT,
	CPUTIME_STEAL,
	CPUTIME_GUEST,
	CPUTIME_GUEST_NICE,
	NR_STATS,
};

struct kernel\_cpustat {
	u64 cpustat[NR_STATS];
};

DECLARE\_PER\_CPU(struct kernel\_cpustat, kernel_cpustat);

/\* Must have preemption disabled for this to be meaningful. \*/
#define kcpustat\_this\_cpu (&\_\_get\_cpu\_var(kernel\_cpustat))
#define kcpustat\_cpu(cpu) per\_cpu(kernel\_cpustat, cpu)

// linux-3.10/kernel/sched/core.c

DEFINE\_PER\_CPU(struct kernel\_cpustat, kernel_cpustat);
EXPORT\_PER\_CPU\_SYMBOL(kernel_cpustat);

[root@localhost ~]# cat /proc/kallsyms | grep '\<kernel\_cpustat\>'
0000000000015b00 A kernel_cpustat

[root@localhost ~]# cat /proc/kallsyms | grep '\<\_\_per\_cpu\_start\>'
0000000000000000 A __per_cpu_start
[root@localhost ~]# cat /proc/kallsyms | grep '\<\_\_per\_cpu\_end\>'
000000000001d000 A __per_cpu_end

kernel_cpustat 在 _per_cpu_start 和 __per_cpu_end 范围内,是内核中的per-cpu变量。

更新cpu的使用率数据 cpu_usage_stat ,是在时钟中断中完成的,时钟中断处理程序每次都会调用 account_process_tick 函数来更新 cpu_usage_stat结构体:

// linux-3.10/kernel/sched/cputime.c

/\*
 \* Account a single tick of cpu time.
 \* @p: the process that the cpu time gets accounted to
 \* @user\_tick: indicates if the tick is a user or a system tick
 \*/
void account\_process\_tick(struct task\_struct \*p, int user_tick)
{
	cputime\_t one_jiffy_scaled = cputime\_to\_scaled(cputime_one_jiffy);
	struct rq \*rq = this\_rq();

	if (vtime\_accounting\_enabled())
		return;

	if (sched_clock_irqtime) {
		irqtime\_account\_process\_tick(p, user_tick, rq);
		return;
	}
	
	// 统计CPUTIME\_STEAL
	if (steal\_account\_process\_tick())
		return;

	if (user_tick)
		(1) 统计用户时间(包括 CPUTIME_USER 和 CPUTIME_NICE )
		account\_user\_time(p, cputime_one_jiffy, one_jiffy_scaled);
		
	else if ((p != rq->idle) || (irq\_count() != HARDIRQ_OFFSET))
		(2) 统计内核时间(包括 CPUTIME_SYSTEM、CPUTIME_IRQ 和 CPUTIME_SOFTIRQ )
		account\_system\_time(p, HARDIRQ_OFFSET, cputime_one_jiffy,
				    one_jiffy_scaled);
				    
	else
		(3) 统计空闲时间(包括 CPUTIME_IOWAIT 和 CPUTIME_IDLE)
		account\_idle\_time(cputime_one_jiffy);
}


(1) 统计用户时间(包括 CPUTIME_USER 和 CPUTIME_NICE )

// linux-3.10/kernel/sched/cputime.c

/\*
 \* Account user cpu time to a process.
 \* @p: the process that the cpu time gets accounted to
 \* @cputime: the cpu time spent in user space since the last update
 \* @cputime\_scaled: cputime scaled by cpu frequency
 \*/
void account\_user\_time(struct task\_struct \*p, cputime\_t cputime,
		       cputime\_t cputime_scaled)
{
	int index;

	/\* Add user time to process. \*/
	p->utime += cputime;
	p->utimescaled += cputime_scaled;
	account\_group\_user\_time(p, cputime);

	index = (TASK\_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;

	/\* Add user time to cpustat. \*/
	task\_group\_account\_field(p, index, (__force u64) cputime);

	/\* Account for user time used \*/
	acct\_account\_cputime(p);
}

(2) 统计内核时间(包括 CPUTIME_SYSTEM、CPUTIME_IRQ 和 CPUTIME_SOFTIRQ )

// linux-3.10/kernel/sched/cputime.c

 \* Account system cpu time to a process.
 \* @p: the process that the cpu time gets accounted to
 \* @hardirq_offset: the offset to subtract from hardirq\_count()
 \* @cputime: the cpu time spent in kernel space since the last update
 \* @cputime_scaled: cputime scaled by cpu frequency
 \*/
void account\_system\_time(struct task\_struct \*p, int hardirq_offset,
			 cputime\_t cputime, cputime\_t cputime_scaled)
{
	int index;

	if ((p->flags & PF_VCPU) && (irq\_count() - hardirq_offset == 0)) {
		// 统计 CPUTIME\_GUEST 和 CPUTIME\_GUEST\_NICE
		account\_guest\_time(p, cputime, cputime_scaled);
		return;
	}

	if (hardirq\_count() - hardirq_offset)
		index = CPUTIME_IRQ;
	else if (in\_serving\_softirq())
		index = CPUTIME_SOFTIRQ;
	else
		index = CPUTIME_SYSTEM;

	\_\_account\_system\_time(p, cputime, cputime_scaled, index);
}

(3) 统计空闲时间(包括 CPUTIME_IOWAIT 和 CPUTIME_IDLE)

// linux-3.10/kernel/sched/cputime.c

/\*
 \* Account for idle time.
 \* @cputime: the cpu time spent in idle wait
 \*/
void account\_idle\_time(cputime\_t cputime)
{
	u64 \*cpustat = kcpustat_this_cpu->cpustat;
	struct rq \*rq = this\_rq();

	if (atomic\_read(&rq->nr_iowait) > 0)
		cpustat[CPUTIME_IOWAIT] += (__force u64) cputime;
	else
		cpustat[CPUTIME_IDLE] += (__force u64) cputime;
}

(4)统计CPUTIME_STEAL

/\*
 \* Account for involuntary wait time.
 \* @cputime: the cpu time spent in involuntary wait
 \*/
void account\_steal\_time(cputime\_t cputime)
{
	u64 \*cpustat = kcpustat_this_cpu->cpustat;

	cpustat[CPUTIME_STEAL] += (__force u64) cputime;
}

(5)统计 CPUTIME_GUEST 和 CPUTIME_GUEST_NICE

// linux-3.10/kernel/sched/cputime.c

/\*
 \* Account guest cpu time to a process.
 \* @p: the process that the cpu time gets accounted to
 \* @cputime: the cpu time spent in virtual machine since the last update
 \* @cputime\_scaled: cputime scaled by cpu frequency
 \*/
static void account\_guest\_time(struct task\_struct \*p, cputime\_t cputime,
			       cputime\_t cputime_scaled)
{
	u64 \*cpustat = kcpustat_this_cpu->cpustat;

	/\* Add guest time to process. \*/
	p->utime += cputime;
	p->utimescaled += cputime_scaled;
	account\_group\_user\_time(p, cputime);
	p->gtime += cputime;

	/\* Add guest time to cpustat. \*/
	if (TASK\_NICE(p) > 0) {
		cpustat[CPUTIME_NICE] += (__force u64) cputime;
		cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime;
	} else {
		cpustat[CPUTIME_USER] += (__force u64) cputime;
		cpustat[CPUTIME_GUEST] += (__force u64) cputime;
	}
}

从 account_guest_time 函数中我们可以看到 guest cpu time 是被统计到了 用户态 cpu 时间中,即:
user(通常缩写为 us),代表用户态 CPU 时间,包括了 guest 时间。CPUTIME_NICE成员中包含了CPUTIME_GUEST_NICE成员。
nice(通常缩写为 ni),代表低优先级用户态 CPU 时间,包括了 guest_nice 时间。CPUTIME_USER成员包含了CPUTIME_GUEST成员。

二、内存使用率

2.1 总内存有关的数据

KiB Mem :  7890812 total,  6313084 free,   501864 used,  1075864 buff/cache
KiB Swap:  8126460 total,  8126460 free,        0 used.  7008280 avail Mem

默认情况下,第1行显示物理内存:total = free + used + buff/cache

运行top后,按E可以切换内存大小显示的单位:

GiB Mem :      7.5 total,      6.0 free,      0.5 used,      1.0 buff/cache
GiB Swap:      7.7 total,      7.7 free,      0.0 used.      6.7 avail Mem

2.2 进程使用内存有关的数据

  PID USER      PR  NI    VIRT    RES    SHR S  %CPU %MEM     TIME+ COMMAND
17487 root      20   0  162148   2324   1548 R   6.2  0.0   0:00.01 top
    1 root      20   0  193800   6972   4200 S   0.0  0.1   0:18.30 systemd
    2 root      20   0       0      0      0 S   0.0  0.0   0:00.17 kthreadd
    3 root      20   0       0      0      0 S   0.0  0.0   0:01.04 ksoftirqd/0
    5 root       0 -20       0      0      0 S   0.0  0.0   0:00.00 kworker/0:0H


其中与进程内存有关的字段VIRT、RES、SHR和%MEM

 VIRT  --  Virtual Memory Size (KiB)
           The total amount of virtual memory used by the task.  It includes all code, data and shared libraries plus pages that have been swapped out and pages that have  been  mapped  but  not
           used.

RES  --  Resident Memory Size (KiB)
           The non-swapped physical memory a task is using.

RES 中有一些物理内存是被一些进程给共享的。

 SHR  --  Shared Memory Size (KiB)
           The amount of shared memory available to a task, not all of which is typically resident.  It simply reflects memory that could be potentially shared with other processes.

 %MEM  --  Memory Usage (RES)
           A task's currently used share of available physical memory.

运行top后,按g,在按3进入内存模式:

  PID %MEM    VIRT    RES   CODE    DATA    SHR nMaj nDRT  %CPU COMMAND
 5736  1.3 3269676 100932     16 2134452  40200   85    0   0.0 gnome-shell
 4635  0.4  358196  29168      4   94984   7028    1    0   0.0 firewalld
 5265  0.3  264324  26332   2284   90140  10616    0    0   0.0 X
 5184  0.3 1012032  21440    468  641332  12948    0    0   0.0 libvirtd
 4533  0.2  620088  17908    108  529600   5352    2    0   0.0 polkitd
 5177  0.2  573828  17408      4  304768   6116    1    0   0.0 tuned
 ......

在内存模式中,我们可以看到各个进程内存的 %MEM、VIRT、RES、CODE、DATA、SHR、nMaj、nDRT,这些信息通过 strace 来跟踪 top 进程,你会发现这些信息都是从 /proc/[pid]/statm 和 /proc/[pid]/stat 这个文件里面读取的。
除了 nMaj(Major Page Fault, 主缺页中断,指内容不在内存中然后从磁盘中来读取的页数)外,%MEM 则是从 RES 计算而来的,其余的内存信息都是从 statm 文件里面读取的,如下是 top 命令中的字段和 statm 中字段的对应关系:

[root@localhost]# cat /proc/1/statm
48450 1743 1050 353 0 37524 0

在这里插入图片描述

CODE  --  Code Size (KiB)
           The amount of physical memory devoted to executable code, also known as the Text Resident Set size or TRS.

DATA  --  Data + Stack Size (KiB)
           The amount of physical memory devoted to other than executable code, also known as the Data Resident Set size or DRS.

nMaj  --  Major Page Fault Count
           The number of major page faults that have occurred for a task.  A page fault occurs when a process attempts to read from or write to a virtual page that is not  currently  present  in
           its address space.  A major page fault is when auxiliary storage access is involved in making that page available.

与nMaj相对应的还有nMin:

nMin  --  Minor Page Fault count
           The  number  of  minor page faults that have occurred for a task.  A page fault occurs when a process attempts to read from or write to a virtual page that is not currently present in
           its address space.  A minor page fault does not involve auxiliary storage access in making that page available.

major fault与minor fault的区别:

major fault:
user space address触发缺页异常时,若被访问的地址映射的物理页已经被swap到磁盘空间,需要从磁盘中将页面换入。
user space address触发缺页异常时,若被访问的地址空间是被mmap映射到磁盘文件的话且page cache中还未缓存文件内容,需要通过磁盘IO将内容读入page cache。

minor fault:
当user space address触发缺页异常时,kernel可直接从buddy system中分配出内存用来满足该缺页异常即minor page fault
简单来说,major fault和minor fault的区别就是是否会触发读写磁盘的动作。

来源于Linux内存管理:缺页异常(一)

nDRT  --  Dirty Pages Count
           The number of pages that have been modified since they were last written to auxiliary storage.  Dirty pages must be written to auxiliary storage before the corresponding physical mem‐
           ory location can be used for some other virtual page.

2.3 内存使用率的来源

总的内存使用率数据来源:/proc/meminfo

open("/proc/meminfo", O_RDONLY)         = 6
open("/proc/meminfo", O_RDONLY|O_CLOEXEC) = 8

每个进程内存使用率数据来源:/proc/pid/statm

open("/proc/2/stat", O_RDONLY)          = 7
open("/proc/2/statm", O_RDONLY)         = 7
open("/proc/3/stat", O_RDONLY)          = 7
open("/proc/3/statm", O_RDONLY)         = 7
open("/proc/5/stat", O_RDONLY)          = 7
open("/proc/5/statm", O_RDONLY)         = 7
open("/proc/7/stat", O_RDONLY)          = 7
open("/proc/7/statm", O_RDONLY)         = 7
open("/proc/8/stat", O_RDONLY)          = 7
open("/proc/8/statm", O_RDONLY)         = 7
open("/proc/9/stat", O_RDONLY)          = 7


**网上学习资料一大堆,但如果学到的知识不成体系,遇到问题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。**

**[需要这份系统化的资料的朋友,可以点击这里获取!](https://bbs.csdn.net/topics/618542503)**

**一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!**
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值