分析下accept系统调用流程,基于linux 6.1.4内核
SYSCALL_DEFINE3
< net/socket.c >
SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
int __user *, upeer_addrlen, int, flags)
{
return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
}
SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
int __user *, upeer_addrlen)
{
return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
}
__sys_accept4()
< net/socket.c >
int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags)
{
int ret = -EBADF;
struct fd f;
f = fdget(fd);
if (f.file) {
ret = __sys_accept4_file(f.file, upeer_sockaddr,
upeer_addrlen, flags);
fdput(f);
}
return ret;
}
__sys_accept4_file()
< net/socket.c >
static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags)
{
struct file *newfile;
int newfd;
if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
return -EINVAL;
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
newfd = get_unused_fd_flags(flags);
if (unlikely(newfd < 0))
return newfd;
/* 实际处理函数 */
newfile = do_accept(file, 0, upeer_sockaddr, upeer_addrlen,
flags);
if (IS_ERR(newfile)) {
put_unused_fd(newfd);
return PTR_ERR(newfile);
}
fd_install(newfd, newfile);
return newfd;
}
do_accept()
< net/socket.c >
struct file *do_accept(struct file *file, unsigned file_flags,
struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags)
{
struct socket *sock, *newsock;
struct file *newfile;
int err, len;
struct sockaddr_storage address;
sock = sock_from_file(file);
if (!sock)
return ERR_PTR(-ENOTSOCK);
newsock = sock_alloc();
if (!newsock)
return ERR_PTR(-ENFILE);
newsock->type = sock->type;
newsock->ops = sock->ops;
/*
* We don't need try_module_get here, as the listening socket (sock)
* has the protocol module (sock->ops->owner) held.
*/
__module_get(newsock->ops->owner);
newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
if (IS_ERR(newfile))
return newfile;
err = security_socket_accept(sock, newsock);
if (err)
goto out_fd;
err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
false);
if (err < 0)
goto out_fd;
if (upeer_sockaddr) {
len = newsock->ops->getname(newsock,
(struct sockaddr *)&address, 2);
if (len < 0) {
err = -ECONNABORTED;
goto out_fd;
}
err = move_addr_to_user(&address,
len, upeer_sockaddr, upeer_addrlen);
if (err < 0)
goto out_fd;
}
/* File flags are not inherited via accept() unlike another OSes. */
return newfile;
out_fd:
fput(newfile);
return ERR_PTR(err);
}
accept()注册
.accept = inet_accept
< net/dccp/ipv4.c >
static const struct proto_ops inet_dccp_ops = {
.family = PF_INET,
.owner = THIS_MODULE,
.release = inet_release,
.bind = inet_bind,
.connect = inet_stream_connect,
.socketpair = sock_no_socketpair,
.accept = inet_accept,
.getname = inet_getname,
/* FIXME: work on tcp_poll to rename it to inet_csk_poll */
.poll = dccp_poll,
.ioctl = inet_ioctl,
.gettstamp = sock_gettstamp,
/* FIXME: work on inet_listen to rename it to sock_common_listen */
.listen = inet_dccp_listen,
.shutdown = inet_shutdown,
.setsockopt = sock_common_setsockopt,
.getsockopt = sock_common_getsockopt,
.sendmsg = inet_sendmsg,
.recvmsg = sock_common_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};
inet_accept()
< net/ipv4/af_inet.c >
int inet_accept(struct socket *sock, struct socket *newsock, int flags,
bool kern)
{
struct sock *sk1 = sock->sk, *sk2;
int err = -EINVAL;
/* IPV6_ADDRFORM can change sk->sk_prot under us. */
sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, flags, &err, kern); /* 从建立连接成功的sock队列中拿出一个sock,赋值给sk2 */
if (!sk2)
goto do_err;
lock_sock(sk2);
sock_rps_record_flow(sk2); /* 处理流控表相关 */
WARN_ON(!((1 << sk2->sk_state) &
(TCPF_ESTABLISHED | TCPF_SYN_RECV |
TCPF_CLOSE_WAIT | TCPF_CLOSE)));
if (test_bit(SOCK_SUPPORT_ZC, &sock->flags))
set_bit(SOCK_SUPPORT_ZC, &newsock->flags);
sock_graft(sk2, newsock);
newsock->state = SS_CONNECTED;
err = 0;
release_sock(sk2);
do_err:
return err;
}