问题描述
公司原有的程序最近遇到一个有趣的问题, 系统有内存3T , 内存消耗2.5T , 这个时候日志显示打开gzip 格式文件失败。 程序是老派的C程序, 打开gzip文件的方式是这样的:
fd = popen("gzip -dc xxx.gz","r");
我在后面添加了打印errno , 发现居然是ENOMEM (12) 这个错误!
问题调研
查询关键字 popen +ENOMEM , 一下子很多帖子都提出了是popen 过程的clone 调用在复制父进程的时候失败, 感觉有些道理,又没那么有道理, 有道理的是如果真的尝试完全复制父进程, 那么由于父进程以及消耗了系统的绝大部分内存, 绝对没有可能复制成功, 没道理则是由于linux 的 进程复制应该是copy-on-write , 这个进程复制之后立刻就换壳执行gzip 了 , 内存消耗应该不大才对!
手头有一台5G 内存的linux 虚拟机, 写下以下代码测试之 ;
我先使用4G 内存, 然后分别使用popen 或者zlib 库的接口来尝试读取一个gzip格式的文件。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <iostream>
#include <zlib.h>
int main()
{
#define SIZE 4000000000L
#define USE_ZLIB 0
char * used = (char *)malloc(sizeof(char) *SIZE );
for( long i = 0 ; i< SIZE ; i++ )
{
used[i] = '1';
}
std::cout<<" 1!"<<std::endl;
sleep(1);
std::cout<<" 2!"<<std::endl;
if ( USE_ZLIB )
{
gzFile fp = gzopen("test.gz","r");
if( fp == NULL )
{
std::cout<<"zlib err !"<<std::endl;
}
else
{
std::cout<<"zlib corr!"<<std::endl;
char buff[1024];
while( gzgets(fp,buff,1024) != NULL )
{
std::cout<<buff;
}
}
}
else
{
FILE * fp = popen("gzip -dc test.gz","r");
sleep(1);
std::cout<<" 3!"<<std::endl;
if( fp == NULL )
{
std::cout<<" err !"<<std::endl;
}
else
{
std::cout<<" corr!"<<std::endl;
char buff[1024];
while( fgets(buff,1024,fp) != NULL )
{
std::cout<<buff;
}
}
}
return 0 ;
}
编译命令:
g++ -g test.cpp -lz
果然 , 使用popen 的时候失败了,但是将USE_ZLIB宏设置1 就可以运行!
./a.out
1!
2!
3!
err !
执行
strace ./a.out
execve(“./a.out”, [“./a.out”], [/* 23 vars */]) = 0
brk(0) = 0x15b7000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f24c1895000
access(“/etc/ld.so.preload”, R_OK) = -1 ENOENT (No such file or directory)
open(“/etc/ld.so.cache”, O_RDONLY) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=40201, …}) = 0
mmap(NULL, 40201, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f24c188b000
close(3) = 0
open(“/usr/lib64/libstdc++.so.6”, O_RDONLY) = 3
read(3, “\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\360c\5\0\0\0\0\0”…, 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=987096, …}) = 0
mmap(NULL, 3166648, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f24c1371000
mprotect(0x7f24c1459000, 2097152, PROT_NONE) = 0
mmap(0x7f24c1659000, 36864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xe8000) = 0x7f24c1659000
mmap(0x7f24c1662000, 82360, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f24c1662000
close(3) = 0
open(“/lib64/libm.so.6”, O_RDONLY) = 3
read(3, “\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0p>\0\0\0\0\0\0”…, 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=596360, …}) = 0
mmap(NULL, 2633912, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f24c10ed000
mprotect(0x7f24c1170000, 2093056, PROT_NONE) = 0
mmap(0x7f24c136f000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x82000) = 0x7f24c136f000
close(3) = 0
open(“/lib64/libgcc_s.so.1”, O_RDONLY) = 3
read(3, “\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20)\0\0\0\0\0\0”…, 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=90880, …}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f24c188a000
mmap(NULL, 2186584, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f24c0ed7000
mprotect(0x7f24c0eed000, 2093056, PROT_NONE) = 0
mmap(0x7f24c10ec000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x15000) = 0x7f24c10ec000
close(3) = 0
open(“/lib64/libc.so.6”, O_RDONLY) = 3
read(3, “\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0000\356\1\0\0\0\0\0”…, 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=1923352, …}) = 0
mmap(NULL, 3750184, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f24c0b43000
mprotect(0x7f24c0ccd000, 2097152, PROT_NONE) = 0
mmap(0x7f24c0ecd000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x18a000) = 0x7f24c0ecd000
mmap(0x7f24c0ed3000, 14632, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f24c0ed3000
close(3) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f24c1889000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f24c1887000
arch_prctl(ARCH_SET_FS, 0x7f24c1887720) = 0
mprotect(0x7f24c0ecd000, 16384, PROT_READ) = 0
mprotect(0x7f24c136f000, 4096, PROT_READ) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f24c1886000
mprotect(0x7f24c1659000, 28672, PROT_READ) = 0
mprotect(0x7f24c1896000, 4096, PROT_READ) = 0
munmap(0x7f24c188b000, 40201) = 0
mmap(NULL, 1100001280, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f247f238000
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 1), …}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f24c1894000
write(1, ” 1!\n”, 4 1!
) = 4
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
rt_sigaction(SIGCHLD, NULL, {SIG_DFL, [], 0}, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
nanosleep({1, 0}, 0x7ffd9908b5f0) = 0
write(1, ” 2!\n”, 4 2!
) = 4
brk(0) = 0x15b7000
brk(0x15d8000) = 0x15d8000
pipe2([3, 4], O_CLOEXEC) = 0
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f24c18879f0) = -1 ENOMEM (Cannot allocate memory)
close(4) = 0
close(3) = 0
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
rt_sigaction(SIGCHLD, NULL, {SIG_DFL, [], 0}, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
nanosleep({1, 0}, 0x7ffd9908b5f0) = 0
write(1, ” 3!\n”, 4 3!
) = 4
write(1, ” err !\n”, 7 err !
) = 7
exit_group(0) = ?
+++ exited with 0 +++
这下没差了,虽然这里不确定clone的时候到底需要多少内存, 但是应该是和父进程内存消耗有一定的关系。 父进程内存消耗过大的时候不建议使用popen 。
这里我直接换用zlib库了, 但是如果一定要使用popen 的话 , 网友提供下面这个解决方案 :
使用vfork ( no clone ) 写一个函数来代替popen
//#ifdef OPEN_MAX
//static long openmax = OPEN_MAX;
//#else
static long openmax = 0;
//#endif
/*
* If OPEN_MAX is indeterminate, we're not
* guaranteed that this is adequate.
*/
#define OPEN_MAX_GUESS 1024
long open_max(void)
{
if (openmax == 0) { /* first time through */
errno = 0;
if ((openmax = sysconf(_SC_OPEN_MAX)) < 0) {
if (errno == 0)
openmax = OPEN_MAX_GUESS; /* it's indeterminate */
else
printf("sysconf error for _SC_OPEN_MAX");
}
}
return(openmax);
}
static pid_t *childpid = NULL; /* ptr to array allocated at run-time */
static int maxfd; /* from our open_max(), {Prog openmax} */
FILE *vpopen(const char* cmdstring, const char *type)
{
int pfd[2];
FILE *fp;
pid_t pid;
if((type[0]!='r' && type[0]!='w')||type[1]!=0)
{
errno = EINVAL;
return(NULL);
}
if (childpid == NULL) { /* first time through */
/* allocate zeroed out array for child pids */
maxfd = open_max();
if ( (childpid = (pid_t *)calloc(maxfd, sizeof(pid_t))) == NULL)
return(NULL);
}
if(pipe(pfd)!=0)
{
return NULL;
}
if((pid = vfork())<0)
{
return(NULL); /* errno set by fork() */
}
else if (pid == 0) { /* child */
if (*type == 'r')
{
close(pfd[0]);
if (pfd[1] != STDOUT_FILENO) {
dup2(pfd[1], STDOUT_FILENO);
close(pfd[1]);
}
}
else
{
close(pfd[1]);
if (pfd[0] != STDIN_FILENO) {
dup2(pfd[0], STDIN_FILENO);
close(pfd[0]);
}
}
/* close all descriptors in childpid[] */
for (int i = 0; i < maxfd; i++)
if (childpid[ i ] > 0)
close(i);
execl("/bin/sh", "sh", "-c", cmdstring, (char *) 0);
_exit(127);
}
if (*type == 'r') {
close(pfd[1]);
if ( (fp = fdopen(pfd[0], type)) == NULL)
return(NULL);
} else {
close(pfd[0]);
if ( (fp = fdopen(pfd[1], type)) == NULL)
return(NULL);
}
childpid[fileno(fp)] = pid; /* remember child pid for this fd */
return(fp);
}
int vpclose(FILE *fp)
{
int fd, stat;
pid_t pid;
if (childpid == NULL)
return(-1); /* popen() has never been called */
fd = fileno(fp);
if ( (pid = childpid[fd]) == 0)
return(-1); /* fp wasn't opened by popen() */
childpid[fd] = 0;
if (fclose(fp) == EOF)
return(-1);
while (waitpid(pid, &stat, 0) < 0)
if (errno != EINTR)
return(-1); /* error other than EINTR from waitpid() */
return(stat); /* return child's termination status */
}