open(“/proc/stat”, O_RDONLY) = 4
open(“/proc/uptime”, O_RDONLY) = 5
open(“/proc/1/stat”, O_RDONLY) = 7
open(“/proc/1/statm”, O_RDONLY) = 7
open(“/etc/nsswitch.conf”, O_RDONLY|O_CLOEXEC) = 7
open(“/etc/ld.so.cache”, O_RDONLY|O_CLOEXEC) = 7
open(“/lib64/libnss_files.so.2”, O_RDONLY|O_CLOEXEC) = 7
open(“/etc/passwd”, O_RDONLY|O_CLOEXEC) = 7
open(“/proc/2/stat”, O_RDONLY) = 7
open(“/proc/2/statm”, O_RDONLY) = 7
open(“/proc/3/stat”, O_RDONLY) = 7
open(“/proc/3/statm”, O_RDONLY) = 7
open(“/proc/5/stat”, O_RDONLY) = 7
open(“/proc/5/statm”, O_RDONLY) = 7
open(“/proc/7/stat”, O_RDONLY) = 7
open(“/proc/7/statm”, O_RDONLY) = 7
open(“/proc/8/stat”, O_RDONLY) = 7
open(“/proc/8/statm”, O_RDONLY) = 7
open(“/proc/9/stat”, O_RDONLY) = 7
open(“/proc/9/statm”, O_RDONLY) = 7
open(“/proc/10/stat”, O_RDONLY) = 7
open(“/proc/10/statm”, O_RDONLY) = 7
open(“/proc/11/stat”, O_RDONLY) = 7
open(“/proc/11/statm”, O_RDONLY) = 7
open(“/proc/12/stat”, O_RDONLY) = 7
open(“/proc/12/statm”, O_RDONLY) = 7
open(“/proc/13/stat”, O_RDONLY) = 7
open(“/proc/13/statm”, O_RDONLY) = 7
open(“/proc/14/stat”, O_RDONLY) = 7
open(“/proc/14/statm”, O_RDONLY) = 7
open(“/proc/16/stat”, O_RDONLY) = 7
open(“/proc/16/statm”, O_RDONLY) = 7
open(“/proc/17/stat”, O_RDONLY) = 7
open(“/proc/17/statm”, O_RDONLY) = 7
open(“/proc/18/stat”, O_RDONLY) = 7
open(“/proc/18/statm”, O_RDONLY) = 7
open(“/proc/19/stat”, O_RDONLY) = 7
open(“/proc/19/statm”, O_RDONLY) = 7
open(“/proc/21/stat”, O_RDONLY) = 7
open(“/proc/21/statm”, O_RDONLY) = 7
open(“/proc/22/stat”, O_RDONLY) = 7
open(“/proc/22/statm”, O_RDONLY) = 7
open(“/proc/23/stat”, O_RDONLY) = 7
open(“/proc/23/statm”, O_RDONLY) = 7
open(“/proc/24/stat”, O_RDONLY) = 7
### 1.3 内核相关源码
// linux-3.10/include/linux/kernel_stat.h
/*
* ‘kernel_stat.h’ contains the definitions needed for doing
* some kernel statistics (CPU usage, context switches …),
* used by rstatd/perfmeter
*/
enum cpu_usage_stat {
CPUTIME_USER,
CPUTIME_NICE,
CPUTIME_SYSTEM,
CPUTIME_SOFTIRQ,
CPUTIME_IRQ,
CPUTIME_IDLE,
CPUTIME_IOWAIT,
CPUTIME_STEAL,
CPUTIME_GUEST,
CPUTIME_GUEST_NICE,
NR_STATS,
};
struct kernel_cpustat {
u64 cpustat[NR_STATS];
};
DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
/* Must have preemption disabled for this to be meaningful. */
#define kcpustat_this_cpu (&__get_cpu_var(kernel_cpustat))
#define kcpustat_cpu(cpu) per_cpu(kernel_cpustat, cpu)
// linux-3.10/kernel/sched/core.c
DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
[root@localhost ~]# cat /proc/kallsyms | grep ‘<kernel_cpustat>’
0000000000015b00 A kernel_cpustat
[root@localhost ~]# cat /proc/kallsyms | grep ‘<__per_cpu_start>’
0000000000000000 A __per_cpu_start
[root@localhost ~]# cat /proc/kallsyms | grep ‘<__per_cpu_end>’
000000000001d000 A __per_cpu_end
kernel\_cpustat 在 \_per\_cpu\_start 和 \_\_per\_cpu\_end 范围内,是内核中的per-cpu变量。
更新cpu的使用率数据 cpu\_usage\_stat ,是在时钟中断中完成的,时钟中断处理程序每次都会调用 account\_process\_tick 函数来更新 cpu\_usage\_stat结构体:
// linux-3.10/kernel/sched/cputime.c
/*
* Account a single tick of cpu time.
* @p: the process that the cpu time gets accounted to
* @user_tick: indicates if the tick is a user or a system tick
*/
void account_process_tick(struct task_struct *p, int user_tick)
{
cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
struct rq *rq = this_rq();
if (vtime\_accounting\_enabled())
return;
if (sched_clock_irqtime) {
irqtime\_account\_process\_tick(p, user_tick, rq);
return;
}
// 统计CPUTIME\_STEAL
if (steal\_account\_process\_tick())
return;
if (user_tick)
(1) 统计用户时间(包括 CPUTIME_USER 和 CPUTIME_NICE )
account\_user\_time(p, cputime_one_jiffy, one_jiffy_scaled);
else if ((p != rq->idle) || (irq\_count() != HARDIRQ_OFFSET))
(2) 统计内核时间(包括 CPUTIME_SYSTEM、CPUTIME_IRQ 和 CPUTIME_SOFTIRQ )
account\_system\_time(p, HARDIRQ_OFFSET, cputime_one_jiffy,
one_jiffy_scaled);
else
(3) 统计空闲时间(包括 CPUTIME_IOWAIT 和 CPUTIME_IDLE)
account\_idle\_time(cputime_one_jiffy);
}
(1) 统计用户时间(包括 CPUTIME\_USER 和 CPUTIME\_NICE )
// linux-3.10/kernel/sched/cputime.c
/*
* Account user cpu time to a process.
* @p: the process that the cpu time gets accounted to
* @cputime: the cpu time spent in user space since the last update
* @cputime_scaled: cputime scaled by cpu frequency
*/
void account_user_time(struct task_struct *p, cputime_t cputime,
cputime_t cputime_scaled)
{
int index;
/\* Add user time to process. \*/
p->utime += cputime;
p->utimescaled += cputime_scaled;
account\_group\_user\_time(p, cputime);
index = (TASK\_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
/\* Add user time to cpustat. \*/
task\_group\_account\_field(p, index, (__force u64) cputime);
/\* Account for user time used \*/
acct\_account\_cputime(p);
}
(2) 统计内核时间(包括 CPUTIME\_SYSTEM、CPUTIME\_IRQ 和 CPUTIME\_SOFTIRQ )
// linux-3.10/kernel/sched/cputime.c
* Account system cpu time to a process.
* @p: the process that the cpu time gets accounted to
* @hardirq_offset: the offset to subtract from hardirq_count()
* @cputime: the cpu time spent in kernel space since the last update
* @cputime_scaled: cputime scaled by cpu frequency
*/
void account_system_time(struct task_struct *p, int hardirq_offset,
cputime_t cputime, cputime_t cputime_scaled)
{
int index;
if ((p->flags & PF_VCPU) && (irq\_count() - hardirq_offset == 0)) {
// 统计 CPUTIME\_GUEST 和 CPUTIME\_GUEST\_NICE
account\_guest\_time(p, cputime, cputime_scaled);
return;
}
if (hardirq\_count() - hardirq_offset)
index = CPUTIME_IRQ;
else if (in\_serving\_softirq())
index = CPUTIME_SOFTIRQ;
else
index = CPUTIME_SYSTEM;
\_\_account\_system\_time(p, cputime, cputime_scaled, index);
}
(3) 统计空闲时间(包括 CPUTIME\_IOWAIT 和 CPUTIME\_IDLE)
// linux-3.10/kernel/sched/cputime.c
/*
* Account for idle time.
* @cputime: the cpu time spent in idle wait
*/
void account_idle_time(cputime_t cputime)
{
u64 *cpustat = kcpustat_this_cpu->cpustat;
struct rq *rq = this_rq();
if (atomic\_read(&rq->nr_iowait) > 0)
cpustat[CPUTIME_IOWAIT] += (__force u64) cputime;
else
cpustat[CPUTIME_IDLE] += (__force u64) cputime;
}
(4)统计CPUTIME\_STEAL
/*
* Account for involuntary wait time.
* @cputime: the cpu time spent in involuntary wait
*/
void account_steal_time(cputime_t cputime)
{
u64 *cpustat = kcpustat_this_cpu->cpustat;
cpustat[CPUTIME_STEAL] += (__force u64) cputime;
}
(5)统计 CPUTIME\_GUEST 和 CPUTIME\_GUEST\_NICE
// linux-3.10/kernel/sched/cputime.c
/*
* Account guest cpu time to a process.
* @p: the process that the cpu time gets accounted to
* @cputime: the cpu time spent in virtual machine since the last update
* @cputime_scaled: cputime scaled by cpu frequency
*/
static void account_guest_time(struct task_struct *p, cputime_t cputime,
cputime_t cputime_scaled)
{
u64 *cpustat = kcpustat_this_cpu->cpustat;
/\* Add guest time to process. \*/
p->utime += cputime;
p->utimescaled += cputime_scaled;
account\_group\_user\_time(p, cputime);
p->gtime += cputime;
/\* Add guest time to cpustat. \*/
if (TASK\_NICE(p) > 0) {
cpustat[CPUTIME_NICE] += (__force u64) cputime;
cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime;
} else {
cpustat[CPUTIME_USER] += (__force u64) cputime;
cpustat[CPUTIME_GUEST] += (__force u64) cputime;
}
}
从 account\_guest\_time 函数中我们可以看到 guest cpu time 是被统计到了 用户态 cpu 时间中,即:
user(通常缩写为 us),代表用户态 CPU 时间,包括了 guest 时间。CPUTIME\_NICE成员中包含了CPUTIME\_GUEST\_NICE成员。
nice(通常缩写为 ni),代表低优先级用户态 CPU 时间,包括了 guest\_nice 时间。CPUTIME\_USER成员包含了CPUTIME\_GUEST成员。
## 二、内存使用率
### 2.1 总内存有关的数据
KiB Mem : 7890812 total, 6313084 free, 501864 used, 1075864 buff/cache
KiB Swap: 8126460 total, 8126460 free, 0 used. 7008280 avail Mem
默认情况下,第1行显示物理内存:total = free + used + buff/cache
运行top后,按E可以切换内存大小显示的单位:
GiB Mem : 7.5 total, 6.0 free, 0.5 used, 1.0 buff/cache
GiB Swap: 7.7 total, 7.7 free, 0.0 used. 6.7 avail Mem
### 2.2 进程使用内存有关的数据
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
17487 root 20 0 162148 2324 1548 R 6.2 0.0 0:00.01 top
1 root 20 0 193800 6972 4200 S 0.0 0.1 0:18.30 systemd
2 root 20 0 0 0 0 S 0.0 0.0 0:00.17 kthreadd
3 root 20 0 0 0 0 S 0.0 0.0 0:01.04 ksoftirqd/0
5 root 0 -20 0 0 0 S 0.0 0.0 0:00.00 kworker/0:0H
其中与进程内存有关的字段VIRT、RES、SHR和%MEM
VIRT – Virtual Memory Size (KiB)
The total amount of virtual memory used by the task. It includes all code, data and shared libraries plus pages that have been swapped out and pages that have been mapped but not
used.
RES – Resident Memory Size (KiB)
The non-swapped physical memory a task is using.
RES 中有一些物理内存是被一些进程给共享的。
SHR – Shared Memory Size (KiB)
The amount of shared memory available to a task, not all of which is typically resident. It simply reflects memory that could be potentially shared with other processes.
%MEM – Memory Usage (RES)
A task’s currently used share of available physical memory.
运行top后,按g,在按3进入内存模式:
PID %MEM VIRT RES CODE DATA SHR nMaj nDRT %CPU COMMAND
5736 1.3 3269676 100932 16 2134452 40200 85 0 0.0 gnome-shell
4635 0.4 358196 29168 4 94984 7028 1 0 0.0 firewalld
5265 0.3 264324 26332 2284 90140 10616 0 0 0.0 X
5184 0.3 1012032 21440 468 641332 12948 0 0 0.0 libvirtd
4533 0.2 620088 17908 108 529600 5352 2 0 0.0 polkitd
5177 0.2 573828 17408 4 304768 6116 1 0 0.0 tuned
…
在内存模式中,我们可以看到各个进程内存的 %MEM、VIRT、RES、CODE、DATA、SHR、nMaj、nDRT,这些信息通过 strace 来跟踪 top 进程,你会发现这些信息都是从 /proc/[pid]/statm 和 /proc/[pid]/stat 这个文件里面读取的。
除了 nMaj(Major Page Fault, 主缺页中断,指内容不在内存中然后从磁盘中来读取的页数)外,%MEM 则是从 RES 计算而来的,其余的内存信息都是从 statm 文件里面读取的,如下是 top 命令中的字段和 statm 中字段的对应关系:
[root@localhost]# cat /proc/1/statm
48450 1743 1050 353 0 37524 0
![在这里插入图片描述](https://img-blog.csdnimg.cn/4ff58bbb3134473d917e7b4afc3bf594.png)
CODE – Code Size (KiB)
The amount of physical memory devoted to executable code, also known as the Text Resident Set size or TRS.
DATA – Data + Stack Size (KiB)
The amount of physical memory devoted to other than executable code, also known as the Data Resident Set size or DRS.
nMaj – Major Page Fault Count
The number of major page faults that have occurred for a task. A page fault occurs when a process attempts to read from or write to a virtual page that is not currently present in
its address space. A major page fault is when auxiliary storage access is involved in making that page available.
与nMaj相对应的还有nMin:
nMin – Minor Page Fault count
The number of minor page faults that have occurred for a task. A page fault occurs when a process attempts to read from or write to a virtual page that is not currently present in
its address space. A minor page fault does not involve auxiliary storage access in making that page available.
major fault与minor fault的区别:
major fault:
user space address触发缺页异常时,若被访问的地址映射的物理页已经被swap到磁盘空间,需要从磁盘中将页面换入。
user space address触发缺页异常时,若被访问的地址空间是被mmap映射到磁盘文件的话且page cache中还未缓存文件内容,需要通过磁盘IO将内容读入page cache。
minor fault:
当user space address触发缺页异常时,kernel可直接从buddy system中分配出内存用来满足该缺页异常即minor page fault
简单来说,major fault和minor fault的区别就是是否会触发读写磁盘的动作。
来源于[Linux内存管理:缺页异常(一)](https://bbs.csdn.net/topics/618542503)
nDRT – Dirty Pages Count
The number of pages that have been modified since they were last written to auxiliary storage. Dirty pages must be written to auxiliary storage before the corresponding physical mem‐
ory location can be used for some other virtual page.
### 2.3 内存使用率的来源
总的内存使用率数据来源:/proc/meminfo
open(“/proc/meminfo”, O_RDONLY) = 6
open(“/proc/meminfo”, O_RDONLY|O_CLOEXEC) = 8
每个进程内存使用率数据来源:/proc/pid/statm
open(“/proc/2/stat”, O_RDONLY) = 7
open(“/proc/2/statm”, O_RDONLY) = 7
open(“/proc/3/stat”, O_RDONLY) = 7
open(“/proc/3/statm”, O_RDONLY) = 7
open(“/proc/5/stat”, O_RDONLY) = 7
open(“/proc/5/statm”, O_RDONLY) = 7
open(“/proc/7/stat”, O_RDONLY) = 7
open(“/proc/7/statm”, O_RDONLY) = 7
open(“/proc/8/stat”, O_RDONLY) = 7
open(“/proc/8/statm”, O_RDONLY) = 7
open(“/proc/9/stat”, O_RDONLY) = 7
open(“/proc/9/statm”, O_RDONLY) = 7
先自我介绍一下,小编浙江大学毕业,去过华为、字节跳动等大厂,目前在阿里
深知大多数程序员,想要提升技能,往往是自己摸索成长,但自己不成体系的自学效果低效又漫长,而且极易碰到天花板技术停滞不前!
因此收集整理了一份《2024年最新Linux运维全套学习资料》,初衷也很简单,就是希望能够帮助到想自学提升又不知道该从何学起的朋友。
既有适合小白学习的零基础资料,也有适合3年以上经验的小伙伴深入学习提升的进阶课程,涵盖了95%以上运维知识点,真正体系化!
由于文件比较多,这里只是将部分目录截图出来,全套包含大厂面经、学习笔记、源码讲义、实战项目、大纲路线、讲解视频,并且后续会持续更新
效又漫长,而且极易碰到天花板技术停滞不前!**
因此收集整理了一份《2024年最新Linux运维全套学习资料》,初衷也很简单,就是希望能够帮助到想自学提升又不知道该从何学起的朋友。
[外链图片转存中…(img-I8mGMIB4-1714134176249)]
[外链图片转存中…(img-wLz8fqVZ-1714134176249)]
[外链图片转存中…(img-zX95GDBh-1714134176250)]
[外链图片转存中…(img-ECvo70mp-1714134176250)]
[外链图片转存中…(img-4p8OHcMf-1714134176251)]
既有适合小白学习的零基础资料,也有适合3年以上经验的小伙伴深入学习提升的进阶课程,涵盖了95%以上运维知识点,真正体系化!
由于文件比较多,这里只是将部分目录截图出来,全套包含大厂面经、学习笔记、源码讲义、实战项目、大纲路线、讲解视频,并且后续会持续更新