Linux-Select上层调用到底层实现过程
1、select简介
文章中的内核文件基于2.6.2版本内核。
头文件:
#include <sys/select.h>
select应用层的使用:select简单使用及其Demo
2、调用逻辑
2.1、应用层调用select函数
int select (int maxfd + 1,fd_set *readset,fd_set *writeset,
fd_set *exceptset,const struct timeval * timeout);
2.2、调用系统的sys_select函数
以下代码为内核2.6.2内核源码
asmlinkage long
sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp)
{
fd_set_bits fds;
char *bits;
long timeout;
int ret, size, max_fdset;
timeout = MAX_SCHEDULE_TIMEOUT;
if (tvp) {
time_t sec, usec;
if ((ret = verify_area(VERIFY_READ, tvp, sizeof(*tvp)))
|| (ret = __get_user(sec, &tvp->tv_sec))
|| (ret = __get_user(usec, &tvp->tv_usec)))
goto out_nofds;
ret = -EINVAL;
if (sec < 0 || usec < 0)
goto out_nofds;
if ((unsigned long) sec < MAX_SELECT_SECONDS) {
timeout = ROUND_UP(usec, 1000000/HZ);
timeout += sec * (unsigned long) HZ;
}
}
ret = -EINVAL;
if (n < 0)
goto out_nofds;
/* max_fdset can increase, so grab it once to avoid race */
max_fdset = current->files->max_fdset;
if (n > max_fdset)
n = max_fdset;
/*
* We need 6 bitmaps (in/out/ex for both incoming and outgoing),
* since we used fdset we need to allocate memory in units of
* long-words.
*/
ret = -ENOMEM;
size = FDS_BYTES(n);
bits = select_bits_alloc(size);
if (!bits)
goto out_nofds;
fds.in = (unsigned long *) bits;
fds.out = (unsigned long *) (bits + size);
fds.ex = (unsigned long *) (bits + 2*size);
fds.res_in = (unsigned long *) (bits + 3*size);
fds.res_out = (unsigned long *) (bits + 4*size);
fds.res_ex = (unsigned long *) (bits + 5*size);
if ((ret = get_fd_set(n, inp, fds.in)) ||
(ret = get_fd_set(n, outp, fds.out)) ||
(ret = get_fd_set(n, exp, fds.ex)))
goto out;
zero_fd_set(n, fds.res_in);
zero_fd_set(n, fds.res_out);
zero_fd_set(n, fds.res_ex);
ret = do_select(n, &fds, &timeout);
if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
time_t sec = 0, usec = 0;
if (timeout) {
sec = timeout / HZ;
usec = timeout % HZ;
usec *= (1000000/HZ);
}
put_user(sec, &tvp->tv_sec);
put_user(usec, &tvp->tv_usec);
}
if (ret < 0)
goto out;
if (!ret) {
ret = -ERESTARTNOHAND;
if (signal_pending(current))
goto out;
ret = 0;
}
set_fd_set(n, inp, fds.res_in);
set_fd_set(n, outp, fds.res_out);
set_fd_set(n, exp, fds.res_ex);
out:
select_bits_free(bits, size);
out_nofds:
return ret;
}
在以上代码中最主要的功能是调用do_select(n, &fds, &timeout)
2.3、系统的sys_select调用do_select
int do_select(int n, fd_set_bits *fds, long *timeout)
{
struct poll_wqueues table;
poll_table *wait;
int retval, i;
long __timeout = *timeout;
spin_lock(¤t->files->file_lock);
retval = max_select_fd(n, fds);
spin_unlock(¤t->files->file_lock);
if (retval < 0)
return retval;
n = retval;
poll_initwait(&table);
wait = &table.pt;
if (!__timeout)
wait = NULL;
retval = 0;
for (;;) {
unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
set_current_state(TASK_INTERRUPTIBLE);
inp = fds->in; outp = fds->out; exp = fds->ex;
rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;
for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
unsigned long in, out, ex, all_bits, bit = 1, mask, j;
unsigned long res_in = 0, res_out = 0, res_ex = 0;
struct file_operations *f_op = NULL;
struct file *file = NULL;
in = *inp++; out = *outp++; ex = *exp++;
all_bits = in | out | ex;
if (all_bits == 0) {
i += __NFDBITS;
continue;
}
for (j = 0; j < __NFDBITS; ++j, ++i, bit <<= 1) {
if (i >= n)
break;
if (!(bit & all_bits))
continue;
file = fget(i);
if (file) {
f_op = file->f_op;
mask = DEFAULT_POLLMASK;
if (f_op && f_op->poll)
mask = (*f_op->poll)(file, retval ? NULL : wait);
fput(file);
if ((mask & POLLIN_SET) && (in & bit)) {
res_in |= bit;
retval++;
}
if ((mask & POLLOUT_SET) && (out & bit)) {
res_out |= bit;
retval++;
}
if ((mask & POLLEX_SET) && (ex & bit)) {
res_ex |= bit;
retval++;
}
}
}
if (res_in)
*rinp = res_in;
if (res_out)
*routp = res_out;
if (res_ex)
*rexp = res_ex;
}
wait = NULL;
if (retval || !__timeout || signal_pending(current))
break;
if(table.error) {
retval = table.error;
break;
}
__timeout = schedule_timeout(__timeout);
}
__set_current_state(TASK_RUNNING);
poll_freewait(&table);
/*
* Up-to-date the caller timeout.
*/
*timeout = __timeout;
return retval;
}
从这开始才是真的的select函数的实现。
在do_select函数中,从第二个参数fd_set_bits *fds我们可以发现,我们app层传入的fd_set *readset,fd_set *writeset,fd_set *exceptset这三个参数都整合到了fd_set_bits的指针中去了,所以我们在进行监听的时候这三个参数随便填一个就OK了。
之后进入这个函数,首先进行了监听个数(n)和超时时间(timeout)的判断,之后便进入了死循环中。
在这个死循环中我们还有还有一个for循环,在这个for循环中我们发现了我们select传入的第一个参数(监听的最大的FD加一),这也就解释我们为什么要给最大的fd加一的原因了(for循环中给轮询到上限所使用的)。
接着在这死循环中能引起返回的有两种情况:
①、发生错误时:
if(table.error) {
retval = table.error;
break;
这里也就是我们调用select时错误返回的地方。
②、正确返回时:
if (retval || !__timeout || signal_pending(current))
break;
在这个if判断中三种情况都能实现do_select的返回,第一:监听到有句柄有输入输出时的正确返回retval 。第二:超时返回__timeout ,第三:被信号打断导致返回signal_pending。