遇到一个popen遭遇ENOMEM (Cannot allocate memory)的问题,记录一下
我需要在程序里获取标准输出的内容,于是在一个模块里使用了popen这个函数,本来一直运行着都没,但是最近这个模块老是出问题,最后定位到是popen调用出错。返回的errno是ENOMEM (Cannot allocate memory),查看popen的文档并没有ENOMEM 相关的说明,到网上搜索,有人说popen无非是pipe+fork+execel的一个函数,可以跟一下看看是哪个函数的问题。
使用
strace -o app.starce ./app
跟进去看程序的调用信息,找到了对应的出错点
pipe2([17, 18], O_CLOEXEC) = 0
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f084002aab0) = -1 ENOMEM (Cannot allocate memory)
close(18) = 0
close(17)
可见,实际是clone函数出错了,man clone,确实有
ENOMEM Cannot allocate sufficient memory to allocate a task structure for the child, or to copy those parts of the caller's context that need to be copied.
写一个使用了fork的小程序,使用strace查看,
13097 getrlimit(RLIMIT_STACK, {rlim_cur=512*1024, rlim_max=512*1024}) = 0
13097 clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7feed3ccf9f0) = 13098
13097 fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 10), ...}) = 0
13097 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...>
的确,fork函数调用了clone的。
我们都知道,linux使用fork创建子进程会copy父进程的堆,栈,静态存储区,文件描述符等等,那么就有可能父进程内存使用过多,导致子进程无法再从剩余的内存上分配内存。
同样使用system也有
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
clone(child_stack=0, flags=CLONE_PARENT_SETTID|SIGCHLD, parent_tidptr=0x7fffed9206f8) = 32710
wait4(32710, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 32710
那么使用vfork呢
vfork() = 13123
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 10), ...}) = 0
fstat(1, <unfinished ...>
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...>
可将,用vfork就并没有调用clone
于是自己写了一个函数来代替popen
<pre name="code" class="cpp" style="white-space: pre-wrap; word-wrap: break-word;">//#ifdef OPEN_MAX
//static long openmax = OPEN_MAX;
//#else
static long openmax = 0;
//#endif
/*
* If OPEN_MAX is indeterminate, we're not
* guaranteed that this is adequate.
*/
#define OPEN_MAX_GUESS 1024
long open_max(void)
{
if (openmax == 0) { /* first time through */
errno = 0;
if ((openmax = sysconf(_SC_OPEN_MAX)) < 0) {
if (errno == 0)
openmax = OPEN_MAX_GUESS; /* it's indeterminate */
else
printf("sysconf error for _SC_OPEN_MAX");
}
}
return(openmax);
}
static pid_t *childpid = NULL; /* ptr to array allocated at run-time */
static int maxfd; /* from our open_max(), {Prog openmax} */
FILE *vpopen(const char* cmdstring, const char *type)
{
int pfd[2];
FILE *fp;
pid_t pid;
if((type[0]!='r' && type[0]!='w')||type[1]!=0)
{
errno = EINVAL;
return(NULL);
}
if (childpid == NULL) { /* first time through */
/* allocate zeroed out array for child pids */
maxfd = open_max();
if ( (childpid = (pid_t *)calloc(maxfd, sizeof(pid_t))) == NULL)
return(NULL);
}
if(pipe(pfd)!=0)
{
return NULL;
}
if((pid = vfork())<0)
{
return(NULL); /* errno set by fork() */
}
else if (pid == 0) { /* child */
if (*type == 'r')
{
close(pfd[0]);
if (pfd[1] != STDOUT_FILENO) {
dup2(pfd[1], STDOUT_FILENO);
close(pfd[1]);
}
}
else
{
close(pfd[1]);
if (pfd[0] != STDIN_FILENO) {
dup2(pfd[0], STDIN_FILENO);
close(pfd[0]);
}
}
/* close all descriptors in childpid[] */
for (int i = 0; i < maxfd; i++)
if (childpid[ i ] > 0)
close(i);
execl("/bin/sh", "sh", "-c", cmdstring, (char *) 0);
_exit(127);
}
if (*type == 'r') {
close(pfd[1]);
if ( (fp = fdopen(pfd[0], type)) == NULL)
return(NULL);
} else {
close(pfd[0]);
if ( (fp = fdopen(pfd[1], type)) == NULL)
return(NULL);
}
childpid[fileno(fp)] = pid; /* remember child pid for this fd */
return(fp);
}
int vpclose(FILE *fp)
{
int fd, stat;
pid_t pid;
if (childpid == NULL)
return(-1); /* popen() has never been called */
fd = fileno(fp);
if ( (pid = childpid[fd]) == 0)
return(-1); /* fp wasn't opened by popen() */
childpid[fd] = 0;
if (fclose(fp) == EOF)
return(-1);
while (waitpid(pid, &stat, 0) < 0)
if (errno != EINTR)
return(-1); /* error other than EINTR from waitpid() */
return(stat); /* return child's termination status */
}
</pre><pre name="code" class="cpp" style="white-space: pre-wrap; word-wrap: break-word;"><p>//我一度怀疑是程序使用的内存多,导致fork时无法给子进程分配内存</p><p>//但是文档上都说,fork时并非马上分配,只有实际使用到时才真正分配。
//如果这样的话,那出现ENOMEM 的错误是为什么呢?不解</p>