popen 遇到ENOMEN 的失败的bug调研

问题描述

公司原有的程序最近遇到一个有趣的问题, 系统有内存3T , 内存消耗2.5T , 这个时候日志显示打开gzip 格式文件失败。 程序是老派的C程序, 打开gzip文件的方式是这样的:

fd = popen("gzip -dc xxx.gz","r");

我在后面添加了打印errno , 发现居然是ENOMEM (12) 这个错误!

问题调研

查询关键字 popen +ENOMEM , 一下子很多帖子都提出了是popen 过程的clone 调用在复制父进程的时候失败, 感觉有些道理,又没那么有道理, 有道理的是如果真的尝试完全复制父进程, 那么由于父进程以及消耗了系统的绝大部分内存, 绝对没有可能复制成功, 没道理则是由于linux 的 进程复制应该是copy-on-write , 这个进程复制之后立刻就换壳执行gzip 了 , 内存消耗应该不大才对!

手头有一台5G 内存的linux 虚拟机, 写下以下代码测试之 ;
我先使用4G 内存, 然后分别使用popen 或者zlib 库的接口来尝试读取一个gzip格式的文件。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <iostream>
#include <zlib.h>
int main()
{
#define SIZE 4000000000L 
#define USE_ZLIB 0

    char * used = (char *)malloc(sizeof(char) *SIZE  );
    for( long i = 0 ; i< SIZE ; i++ )
    {
        used[i] = '1';
    }
    std::cout<<" 1!"<<std::endl;
    sleep(1);
    std::cout<<" 2!"<<std::endl;
    if ( USE_ZLIB )
    {
        gzFile fp = gzopen("test.gz","r");
        if( fp == NULL )
        {
            std::cout<<"zlib err !"<<std::endl;
        }
        else
        {
            std::cout<<"zlib corr!"<<std::endl;
            char buff[1024];
            while( gzgets(fp,buff,1024) != NULL )
            {
                std::cout<<buff;
            }
        }
    }
    else
    {
        FILE * fp = popen("gzip -dc test.gz","r");
        sleep(1);
        std::cout<<" 3!"<<std::endl;
        if( fp == NULL )
        {
            std::cout<<" err !"<<std::endl;
        }
        else
        {
            std::cout<<" corr!"<<std::endl;
            char buff[1024];
            while( fgets(buff,1024,fp) != NULL )
            {
                std::cout<<buff;
            }
        }
    }
    return 0 ;

}

编译命令:

g++ -g test.cpp -lz

果然 , 使用popen 的时候失败了,但是将USE_ZLIB宏设置1 就可以运行!

./a.out
1!
2!
3!
err !

执行

strace ./a.out
execve(“./a.out”, [“./a.out”], [/* 23 vars */]) = 0
brk(0) = 0x15b7000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f24c1895000
access(“/etc/ld.so.preload”, R_OK) = -1 ENOENT (No such file or directory)
open(“/etc/ld.so.cache”, O_RDONLY) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=40201, …}) = 0
mmap(NULL, 40201, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f24c188b000
close(3) = 0
open(“/usr/lib64/libstdc++.so.6”, O_RDONLY) = 3
read(3, “\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\360c\5\0\0\0\0\0”…, 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=987096, …}) = 0
mmap(NULL, 3166648, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f24c1371000
mprotect(0x7f24c1459000, 2097152, PROT_NONE) = 0
mmap(0x7f24c1659000, 36864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xe8000) = 0x7f24c1659000
mmap(0x7f24c1662000, 82360, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f24c1662000
close(3) = 0
open(“/lib64/libm.so.6”, O_RDONLY) = 3
read(3, “\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0p>\0\0\0\0\0\0”…, 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=596360, …}) = 0
mmap(NULL, 2633912, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f24c10ed000
mprotect(0x7f24c1170000, 2093056, PROT_NONE) = 0
mmap(0x7f24c136f000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x82000) = 0x7f24c136f000
close(3) = 0
open(“/lib64/libgcc_s.so.1”, O_RDONLY) = 3
read(3, “\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20)\0\0\0\0\0\0”…, 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=90880, …}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f24c188a000
mmap(NULL, 2186584, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f24c0ed7000
mprotect(0x7f24c0eed000, 2093056, PROT_NONE) = 0
mmap(0x7f24c10ec000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x15000) = 0x7f24c10ec000
close(3) = 0
open(“/lib64/libc.so.6”, O_RDONLY) = 3
read(3, “\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0000\356\1\0\0\0\0\0”…, 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=1923352, …}) = 0
mmap(NULL, 3750184, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f24c0b43000
mprotect(0x7f24c0ccd000, 2097152, PROT_NONE) = 0
mmap(0x7f24c0ecd000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x18a000) = 0x7f24c0ecd000
mmap(0x7f24c0ed3000, 14632, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f24c0ed3000
close(3) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f24c1889000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f24c1887000
arch_prctl(ARCH_SET_FS, 0x7f24c1887720) = 0
mprotect(0x7f24c0ecd000, 16384, PROT_READ) = 0
mprotect(0x7f24c136f000, 4096, PROT_READ) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f24c1886000
mprotect(0x7f24c1659000, 28672, PROT_READ) = 0
mprotect(0x7f24c1896000, 4096, PROT_READ) = 0
munmap(0x7f24c188b000, 40201) = 0
mmap(NULL, 1100001280, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f247f238000
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 1), …}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f24c1894000
write(1, ” 1!\n”, 4 1!
) = 4
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
rt_sigaction(SIGCHLD, NULL, {SIG_DFL, [], 0}, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
nanosleep({1, 0}, 0x7ffd9908b5f0) = 0
write(1, ” 2!\n”, 4 2!
) = 4
brk(0) = 0x15b7000
brk(0x15d8000) = 0x15d8000
pipe2([3, 4], O_CLOEXEC) = 0

clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f24c18879f0) = -1 ENOMEM (Cannot allocate memory)
close(4) = 0
close(3) = 0
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
rt_sigaction(SIGCHLD, NULL, {SIG_DFL, [], 0}, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0

nanosleep({1, 0}, 0x7ffd9908b5f0) = 0
write(1, ” 3!\n”, 4 3!
) = 4
write(1, ” err !\n”, 7 err !
) = 7
exit_group(0) = ?
+++ exited with 0 +++

这下没差了,虽然这里不确定clone的时候到底需要多少内存, 但是应该是和父进程内存消耗有一定的关系。 父进程内存消耗过大的时候不建议使用popen 。
这里我直接换用zlib库了, 但是如果一定要使用popen 的话 , 网友提供下面这个解决方案 :

使用vfork ( no clone ) 写一个函数来代替popen

//#ifdef  OPEN_MAX
//static long openmax = OPEN_MAX;
//#else
static long openmax = 0;
//#endif

/*
 * If OPEN_MAX is indeterminate, we're not
 * guaranteed that this is adequate.
 */
#define OPEN_MAX_GUESS 1024

long open_max(void)
{
    if (openmax == 0) {      /* first time through */
        errno = 0;
        if ((openmax = sysconf(_SC_OPEN_MAX)) < 0) {
           if (errno == 0)
               openmax = OPEN_MAX_GUESS;    /* it's indeterminate */
           else
               printf("sysconf error for _SC_OPEN_MAX");
        }
    }

    return(openmax);
}

static pid_t    *childpid = NULL;  /* ptr to array allocated at run-time */
static int      maxfd;  /* from our open_max(), {Prog openmax} */

FILE *vpopen(const char* cmdstring, const char *type)
{
    int pfd[2];
    FILE *fp;
    pid_t   pid;

    if((type[0]!='r' && type[0]!='w')||type[1]!=0)
    {
        errno = EINVAL;
        return(NULL);
    }

    if (childpid == NULL) {     /* first time through */  
                /* allocate zeroed out array for child pids */  
        maxfd = open_max();  
        if ( (childpid = (pid_t *)calloc(maxfd, sizeof(pid_t))) == NULL)  
            return(NULL);  
    }

    if(pipe(pfd)!=0)
    {
        return NULL;
    }

    if((pid = vfork())<0)
    {
        return(NULL);   /* errno set by fork() */  
    }
    else if (pid == 0) {    /* child */
        if (*type == 'r')
        {
            close(pfd[0]);  
            if (pfd[1] != STDOUT_FILENO) {  
                dup2(pfd[1], STDOUT_FILENO);  
                close(pfd[1]);  
            }           
        }
        else
        {
            close(pfd[1]);  
            if (pfd[0] != STDIN_FILENO) {  
                dup2(pfd[0], STDIN_FILENO);  
                close(pfd[0]);  
            }           
        }

        /* close all descriptors in childpid[] */  
        for (int i = 0; i < maxfd; i++)  
        if (childpid[ i ] > 0)  
            close(i);  

        execl("/bin/sh", "sh", "-c", cmdstring, (char *) 0);  
        _exit(127);     
    }

    if (*type == 'r') {  
        close(pfd[1]);  
        if ( (fp = fdopen(pfd[0], type)) == NULL)  
            return(NULL);  
    } else {  
        close(pfd[0]);  
        if ( (fp = fdopen(pfd[1], type)) == NULL)  
            return(NULL);  
    }

    childpid[fileno(fp)] = pid; /* remember child pid for this fd */  
    return(fp);     
}


int vpclose(FILE *fp)
{
    int     fd, stat;  
    pid_t   pid;  

    if (childpid == NULL)  
        return(-1);     /* popen() has never been called */  

    fd = fileno(fp);  
    if ( (pid = childpid[fd]) == 0)  
        return(-1);     /* fp wasn't opened by popen() */  

    childpid[fd] = 0;  
    if (fclose(fp) == EOF)  
        return(-1);  

    while (waitpid(pid, &stat, 0) < 0)  
        if (errno != EINTR)  
            return(-1); /* error other than EINTR from waitpid() */  

    return(stat);   /* return child's termination status */  

}
  • 2
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值