进程
进程的状态
R,表示进程正在CPU就绪队列中,正在运行或正在等待运行
D,不可中断状态睡眠,一般表示进程正在跟硬件交互,并且交互过程中不允许被其他进程或中断打断
Z,是僵尸进程,子进程结束了但相应资源还没被父进程回收
S,是可中断状态睡眠,表示进程正在等待某个事件而被系统挂Q,当进程等待的事件发生时会进入R状态
I,是idle的缩写就是空闲进程,用在不可中断睡眠的内核线程上,硬件交互导致的不可中断进程用D表示,但对
某些内核线程来说,他们有可能实际上并没有任何负载,用Idle正式为了区分这种情况,注意D状态
的进程会导致平均负载升高,I状态的进程不会
T,stopped或者trace的缩写,表示进程处于暂停或跟踪状态,向一个进程法送SIGSTOP他就会影响这个状态,
再发送SIGCONT信号,又会变成恢复状态,用gdb打断点就会使这个进程变成t状态
X,表示进程已经消亡,所以不会在top或者ps中看到ta
man ps的结果
PROCESS STATE CODES
Here are the different values that the s, stat and state output specifiers (header "STAT" or "S") will display to describe the state of a
process.
D Uninterruptible sleep (usually IO)
R Running or runnable (on run queue)
S Interruptible sleep (waiting for an event to complete)
T Stopped, either by a job control signal or because it is being traced.
W paging (not valid since the 2.6.xx kernel)
X dead (should never be seen)
Z Defunct ("zombie") process, terminated but not reaped by its parent.
For BSD formats and when the stat keyword is used, additional characters may be displayed:
< high-priority (not nice to other users)
N low-priority (nice to other users)
L has pages locked into memory (for real-time and custom IO)
s is a session leader
l is multi-threaded (using CLONE_THREAD, like NPTL pthreads do)
+ is in the foreground process group
不可中断进程是为了保证进程数据与硬件状态一直,在正常情况下,不可中断在很短时间内就会结束,所以
短时间的不可中断可以忽略
但如果系统或硬件发生了故障,进程可能会在不可中断状态保持很久,甚至导致系统中出现大量不可中断进程,
这时,就需要注意了,系统是不是出现了I/O性能问题
僵尸进程,父进程没有及时回收导致的
子进程结束后会向父进程法送SIGCHLD信号,父进程接受此信号后会回收
父进程也可以屏蔽这个信号,让init去处理
进程组 和 会话
进程组 表示一组相互关联的进程,比如每个子进程都是父进程所在组的成员
会话 是指共享同一个控制终端的一个或多个进程组
通过SSH登录服务器,打开一个控制终端TTY,这个控制终端就对应一个会话,在终端中运行的命令以及他们的子进程就够成了一个个的进程组,其中在后台运行的命令构成后台进程组,在前台运行的命令构成前台进程组
例子分析
#define _GNU_SOURCE
#define BUF_SIZE 64 * 1024 * 1024
#define BUF_COUNT 20
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <unistd.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <dirent.h>
#include <string.h>
#include <sys/file.h>
#include <fcntl.h>
#include <ctype.h>
char *select_disk()
{
DIR *dirptr = opendir("/dev/");
if (dirptr == NULL)
{
perror("Failed to open dir");
return NULL;
}
struct dirent *entry;
char *result = (char *)calloc(512, sizeof(char));
const char *sd_prefix = "sd";
const char *xvd_prefix = "xvd";
while (entry = readdir(dirptr))
{
if (strncmp(sd_prefix, entry->d_name, 2) == 0 || strncmp(xvd_prefix, entry->d_name, 3) == 0)
{
snprintf(result, 512 * sizeof(char), "/dev/%s", entry->d_name);
return result;
}
}
free(result);
return NULL;
}
long int get_value(char *str)
{
char *endptr = NULL;
long int value = strtol(str, &endptr, 10);
if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) || (errno != 0 && value == 0))
{
perror("strtol");
return -1;
}
if (endptr == str)
{
perror("not number");
return -1;
}
if (value <= 0)
{
perror("not positive number");
return -1;
}
return value;
}
void sub_process(const char *disk, size_t buffer_size, size_t count)
{
int fd = open(disk, O_RDONLY | O_DIRECT | O_LARGEFILE, 0755);
if (fd < 0)
{
perror("failed to open disk");
_exit(1);
}
unsigned char *buf;
posix_memalign((void **)&buf, 512, buffer_size);
size_t read_bytes = 0;
while (read_bytes < count * buffer_size)
{
size_t ret = read(fd, buf, buffer_size);
if (ret < 0)
{
perror("failed to read contents");
close(fd);
free(buf);
_exit(1);
}
read_bytes += ret;
}
close(fd);
free(buf);
_exit(0);
}
int main(int argc, char **argv)
{
int status = 0;
int c = 0;
char *disk = NULL;
char *size = NULL;
char *count = NULL;
while ((c = getopt(argc, argv, "d:s:c:")) != -1)
{
switch (c)
{
case 'd':
disk = optarg;
break;
case 's':
size = optarg;
break;
case 'c':
count = optarg;
break;
case '?':
printf("Illegal option: -%c\n", isprint(optopt) ? optopt : '#');
_exit(1);
default:
_exit(1);
}
}
if (disk == NULL)
{
disk = select_disk();
}
if (disk == NULL)
{
_exit(1);
}
long int buffer_size = BUF_SIZE;
long int buffer_count = BUF_COUNT;
if (size != NULL)
{
buffer_size = get_value(size);
if (buffer_size < 0)
{
exit(1);
}
}
if (count != NULL)
{
buffer_count = get_value(count);
if (buffer_count < 0)
{
exit(1);
}
}
printf("Reading data from disk %s with buffer size %ld and count %ld\n", disk, buffer_size, buffer_count);
int i = 0;
for (;;)
{
for (i = 0; i < 2; i++)
{
if (fork() == 0)
{
sub_process(disk, buffer_size, buffer_count);
}
}
sleep(50);
}
while (wait(&status) > 0);
return 0;
}
运行程序并分析
#运行程序
./app -d /dev/vda1 -s 1024000 -c 200
#dstat分析程序
----system---- ----total-cpu-usage---- ---load-avg--- -net/total- ------memory-usage----- -dsk/total- ---procs--- ----swap--- --io/total- ---system-- ---paging--
time |usr sys idl wai hiq siq| 1m 5m 15m | recv send| used buff cach free| read writ|run blk new| used free| read writ| int csw | in out
29-01 21:30:16| 0 0 99 0 0 0|0.34 0.12 0.36| 0 0 | 159M 26.0M 426M 374M| 134k 37k|0.0 0.0 0.0| 0 0 |0.86 2.29 | 115 302 | 0 0
29-01 21:30:17| 1 1 0 98 0 0|0.47 0.15 0.37| 54B 106B| 159M 26.0M 426M 374M| 108M 0 | 0 2.0 0| 0 0 | 111 0 | 250 563 | 0 0
29-01 21:30:18| 1 1 0 98 0 0|0.47 0.15 0.37| 172B 2362B| 159M 26.0M 426M 374M| 104M 0 | 0 2.0 0| 0 0 | 107 0 | 237 525 | 0 0
29-01 21:30:19| 0 1 0 99 0 0|0.47 0.15 0.37| 106B 678B| 159M 26.0M 426M 374M| 108M 0 | 0 2.0 0| 0 0 | 111 0 | 237 514 | 0 0
29-01 21:30:20| 0 1 0 99 0 0|0.47 0.15 0.37| 108B 672B| 159M 26.0M 426M 374M| 107M 0 | 0 2.0 0| 0 0 | 110 0 | 233 520 | 0 0
29-01 21:30:21| 1 1 0 98 0 0|0.47 0.15 0.37| 54B 2998B| 159M 26.0M 426M 374M| 108M 0 | 0 2.0 0| 0 0 | 111 0 | 226 516 | 0 0
29-01 21:30:22| 0 1 0 99 0 0|0.60 0.18 0.38| 54B 618B| 159M 26.0M 426M 374M| 76M 0 |1.0 2.0 0| 0 0 |78.0 0 | 189 444 | 0 0
29-01 21:30:23| 1 0 0 99 0 0|0.60 0.18 0.38| 54B 618B| 159M 26.0M 426M 374M| 109M 0 |1.0 2.0 0| 0 0 | 112 0 | 232 518 | 0 0
29-01 21:30:24| 0 1 0 99 0 0|0.60 0.18 0.38| 54B 618B| 159M 26.0M 426M 374M| 104M 0 | 0 2.0 0| 0 0 | 106 0 | 226 500 | 0 0
29-01 21:30:25| 1 0 0 99 0 0|0.60 0.18 0.38| 54B 618B| 159M 26.0M 426M 374M| 105M 0 | 0 2.0 0| 0 0 | 108 0 | 225 507 | 0 0
29-01 21:30:26| 1 1 0 98 0 0|0.60 0.18 0.38| 54B 618B| 159M 26.0M 426M 374M| 111M 0 | 0 2.0 0| 0 0 | 114 0 | 229 513 | 0 0 ^C
#用pidstat分析程序
09:32:58 PM UID PID kB_rd/s kB_wr/s kB_ccwr/s Command
09:32:59 PM 0 21027 56122.45 0.00 0.00 app
09:32:59 PM 0 21028 56122.45 0.00 0.00 app
09:32:59 PM UID PID kB_rd/s kB_wr/s kB_ccwr/s Command
09:33:00 PM 0 21027 54455.45 0.00 0.00 app
09:33:00 PM 0 21028 55445.54 0.00 0.00 app
pidstat查看单个进程都很正常,pidstat全部进程发现是有app进程io很高
应该就是短进程导致的
通过strace 分析进程,查看clone函数
strace -p 21084 -ff -e trace=clone
strace: Process 21084 attached
clone(strace: Process 21091 attached
child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f0ec9f1da10) = 21091
[pid 21084] clone(strace: Process 21092 attached
child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f0ec9f1da10) = 21092
[pid 21091] +++ exited with 0 +++
[pid 21084] --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=21091, si_uid=0, si_status=0, si_utime=0, si_stime=0} ---
[pid 21092] +++ exited with 0 +++
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=21092, si_uid=0, si_status=0, si_utime=0, si_stime=0} ---
#分析调用open的进程
opensnoop
PID COMM FD ERR PATH
21266 app 3 0 /dev/vda1
21267 app 3 0 /dev/vda1
21273 app 3 0 /dev/vda1
21272 app 3 0 /dev/vda1
#查看app进程,有很多僵尸进程
ps aux | grep app
root 21232 0.0 0.0 4228 716 pts/2 S+ 21:54 0:00 ./app -d /dev/vda1 -s 102400 -c 100
root 21233 0.0 0.0 0 0 pts/2 Z+ 21:54 0:00 [app] <defunct>
root 21234 0.0 0.0 0 0 pts/2 Z+ 21:54 0:00 [app] <defunct>
root 21235 0.0 0.0 0 0 pts/2 Z+ 21:54 0:00 [app] <defunct>
root 21236 0.0 0.0 0 0 pts/2 Z+ 21:54 0:00 [app] <defunct>
#通过pstree分析这个程序,查看父子进程关系
pstree -aps 21339
systemd,1 --switched-root --system --deserialize 21
└─sshd,2149 -D
└─sshd,20461
└─bash,20463
└─app,21339 -d /dev/vda1 -s 102400 -c 100
├─(app,21340)
├─(app,21341)
├─(app,21342)
├─(app,21343)
├─(app,21344)
├─(app,21345)
├─(app,21346)
├─(app,21347)
├─(app,21348)
├─(app,21349)
├─(app,21350)
├─(app,21351)
├─(app,21354)
├─(app,21355)
├─(app,21356)
├─(app,21357)
├─(app,21358)
└─(app,21359)
perf record -g,再perf report分析app进程
分析app这个c程序,发现其代码有问题
//这里用的是直接I/O的方式读取数据的
void sub_process(const char *disk, size_t buffer_size, size_t count)
{
int fd = open(disk, O_RDONLY | O_DIRECT | O_LARGEFILE, 0755);
。。。
//这里的 wait()实际没机会执行,是在for死循环外面的
for (;;)
{
for (i = 0; i < 2; i++)
{
if (fork() == 0)
{
sub_process(disk, buffer_size, buffer_count);
}
}
sleep(5);
}
while (wait(&status) > 0);
总结
iowait高不一定代表I/O有性能瓶颈,当系统中只有I/O类型的进程在运行时,iowait也会很高,但实际上,磁盘的读写远没有达到性能瓶颈的程序
因此,碰到iowait升高时,需要先用dstat,pidstat等工具,确认是不是磁盘I/O的问题,然后再找到是哪些进程导致了I/O问题
等待I/O的进程一般是不可中断状态,所以用ps命令找到D状态(不可中断状态)的进程,多位可疑进程
如果是僵尸进程,可疑用perf工具,来分析系统的CPU时钟事件,在参考的例子中,是直接I/O导致的问题
对应源码找到位置中的问题
僵尸进程可以用perf top,再加上pstree找出父进程,检查wait/waitpid调用,或者SIGCHLD信号处理函数注册