[网络编程]socket创建流程

今天分析下socket的创建流程

关于用户态socket如何通过系统调用进入内核态到sys_socke,这里就不再分析,直接看内核态socket入口
SYSCALL_DEFINE3( socket , int, family, int, type, int, protocol)
{
     int retval;
     struct socket *sock;
     int flags;

     /* Check the SOCK_* constants for consistency.  */
     BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
     BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
     BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
     BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);

     flags = type & ~SOCK_TYPE_MASK;
     if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
          return -EINVAL;
     type &= SOCK_TYPE_MASK;

     if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
          flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;                以上为一系列参数的检查和配置

      retval = sock_create(family, type, protocol, &sock);                           sock创建
     if (retval < 0)
          goto out;

      retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));     与文件系统关联
     if (retval < 0)
          goto out_release;

out:
     /* It may be already another descriptor 8) Not kernel problem. */
     return retval;

out_release:
     sock_release(sock);
     return retval;
}
int  sock_create (int family, int type, int protocol, struct socket **res)
{
     return  __sock_create (current->nsproxy->net_ns, family, type, protocol, res, 0);
}
static int   __sock_create(struct net *net, int family, int type, int protocol,
               struct socket **res, int kern)
->sock =   sock_alloc();
->pf = rcu_dereference(net_families[family]);      取得协议族操作表
->err = pf->create(net, sock, protocol);           调用协议族操作创建函数

static struct socket * sock_alloc(void)
{
     struct inode *inode;
     struct socket *sock;

     inode = new_inode(sock_mnt->mnt_sb);             在网络文件系统中创建inode结构
     if (!inode)
          return NULL;

     sock = SOCKET_I(inode);                                   从inode结构得到socket结构

     kmemcheck_annotate_bitfield(sock, type);               进行初始化工作
     inode->i_mode = S_IFSOCK | S_IRWXUGO;
     inode->i_uid = current_fsuid();
     inode->i_gid = current_fsgid();

     percpu_add(sockets_in_use, 1);
     return sock;
}
->pf = rcu_dereference( net_families[family]);      取得协议族操作表
net_families数组内容是通过sock_register注册的
int sock_register(const struct net_proto_family *ops)
{
     int err;

     if (ops->family >= NPROTO) {
          printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
                 NPROTO);
          return -ENOBUFS;
     }

     spin_lock(&net_family_lock);
     if (net_families[ops->family])
          err = -EEXIST;
     else {
            net_families[ops->family] = ops;
          err = 0;
     }
     spin_unlock(&net_family_lock);

     printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
     return err;
}

我们如果调用socket( PF_INET,SOCK_STREAM,0);
在inet_init()中
(void)sock_register(& inet_family_ops);      将 PF_INET协议族的函数操作列表进行注册
static struct net_proto_family   inet_family_ops  = {
     .family =   PF_INET,
     .create = inet_create,
     .owner     = THIS_MODULE,
};
我们看下inet_create函数

static int   inet_create(struct net *net, struct socket *sock, int protocol)
{
     struct sock *sk;
     struct inet_protosw *answer;
     struct inet_sock *inet;
     struct proto *answer_prot;
     unsigned char answer_flags;
     char answer_no_check;
     int try_loading_module = 0;
     int err;

     if (unlikely(!inet_ehash_secret))                   加密项处理
          if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
               build_ehash_secret();

     sock->state = SS_UNCONNECTED;               将socket的状态设为未连接状态

     /* Look for the requested type/protocol pair. */
lookup_protocol:
     err = -ESOCKTNOSUPPORT;
     rcu_read_lock();
     list_for_each_entry_rcu(answer, & inetsw[sock->type], list) {            
           查看内核是否注册了协议处理
inetsw的结构由void inet_register_protosw(struct inet_protosw *p)函数进行了注册
          而inet_init()中
          for (q = inetsw_array; q < & inetsw_array[INETSW_ARRAY_LEN]; ++q)
                          inet_register_protosw (q);
追根溯源

     static struct inet_protosw inetsw_array[] =
{
     {
          .type =        SOCK_STREAM,                       流套接字
          .protocol =   IPPROTO_TCP,
          .prot =       &tcp_prot,
           .ops =        &inet_stream_ops,
          .capability = -1,
          .no_check =   0,
          .flags =      INET_PROTOSW_PERMANENT |
                     INET_PROTOSW_ICSK,
     },

     {
          .type =        SOCK_DGRAM,              数据包套接字
          .protocol =   IPPROTO_UDP,
          .prot =       &udp_prot,
          .ops =        &inet_dgram_ops,
          .capability = -1,
          .no_check =   UDP_CSUM_DEFAULT,
          .flags =      INET_PROTOSW_PERMANENT,
       },


       {
            .type =        SOCK_RAW,                 原始套接字
            .protocol =   IPPROTO_IP,     /* wild card */
            .prot =       &raw_prot,
            .ops =        &inet_sockraw_ops,
            .capability = CAP_NET_RAW,
            .no_check =   UDP_CSUM_DEFAULT,
            .flags =      INET_PROTOSW_REUSE,
       }
}
      sock->ops = answer->ops;         即为   .ops =        &inet_stream_ops,
          。。。

     err = -ENOBUFS;
     sk = sk_alloc(net, PF_INET, GFP_KERNEL,   answer_prot);    创建sock结构      
以TCP为例 answer_prot 为 inet_stream_ops
                                          对sock的sk_prot的设置:sk->sk_prot = sk->sk_prot_creator = prot;
                                                                                                                             
     if (sk == NULL)
          goto out;

  。。。

      sock_init_data(sock, sk);                  初始化sock与socket结构,并将两者进行关联

     sk->sk_destruct        = inet_sock_destruct;
     sk->sk_protocol        = protocol;
     sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;

     inet->uc_ttl     = -1;
     inet->mc_loop     = 1;
     inet->mc_ttl     = 1;
     inet->mc_all     = 1;
     inet->mc_index     = 0;
     inet->mc_list     = NULL;

     sk_refcnt_debug_inc(sk);

     if (inet->num) {
          /* It assumes that any protocol which allows
          * the user to assign a number at socket
          * creation time automatically
          * shares.
          */
          inet->sport = htons(inet->num);
          /* Add to protocol hash chains. */
          sk->sk_prot->hash(sk);
     }

     if (sk->sk_prot->init) {
          err =   sk->sk_prot->init(sk);
          if (err)
               sk_common_release(sk);
     }
out:
     return err;
out_rcu_unlock:
     rcu_read_unlock();
     goto out;
}

const struct proto_ops inet_stream_ops = {            该结构为TCP类型的sock 操作函数列表   包括listen bind accept  poll 等函数
     .family             = PF_INET,
     .owner             = THIS_MODULE,
     .release        = inet_release,
     .bind             = inet_bind,
     .connect        = inet_stream_connect,
     .socketpair        = sock_no_socketpair,
     .accept             = inet_accept,
     .getname        = inet_getname,
     .poll             = tcp_poll,
     .ioctl             = inet_ioctl,
     .listen             = inet_listen,
     .shutdown        = inet_shutdown,
     .setsockopt        = sock_common_setsockopt,
     .getsockopt        = sock_common_getsockopt,
     .sendmsg        = tcp_sendmsg,
     .recvmsg        = sock_common_recvmsg,
     .mmap             = sock_no_mmap,
     .sendpage        = tcp_sendpage,
     .splice_read        = tcp_splice_read,
#ifdef CONFIG_COMPAT
     .compat_setsockopt = compat_sock_common_setsockopt,
     .compat_getsockopt = compat_sock_common_getsockopt,
#endif
};

struct proto tcp_prot = {               TCP协议注册的协议层处理函数
     .name               = "TCP",
     .owner               = THIS_MODULE,
     .close               = tcp_close,
     .connect          = tcp_v4_connect,
     .disconnect          = tcp_disconnect,
     .accept               = inet_csk_accept,
     .ioctl               = tcp_ioctl,
       .init               = tcp_v4_init_sock,
     .destroy          = tcp_v4_destroy_sock,
     .shutdown          = tcp_shutdown,
     .setsockopt          = tcp_setsockopt,
     .getsockopt          = tcp_getsockopt,
     .recvmsg          = tcp_recvmsg,
     .backlog_rcv          = tcp_v4_do_rcv,
     .hash               = inet_hash,
     .unhash               = inet_unhash,
     .get_port          = inet_csk_get_port,
     .enter_memory_pressure     = tcp_enter_memory_pressure,
     .sockets_allocated     = &tcp_sockets_allocated,
     .orphan_count          = &tcp_orphan_count,
     .memory_allocated     = &tcp_memory_allocated,
     .memory_pressure     = &tcp_memory_pressure,
     .sysctl_mem          = sysctl_tcp_mem,
     .sysctl_wmem          = sysctl_tcp_wmem,
     .sysctl_rmem          = sysctl_tcp_rmem,
     .max_header          = MAX_TCP_HEADER,
     .obj_size          = sizeof(struct tcp_sock),
     .slab_flags          = SLAB_DESTROY_BY_RCU,
     .twsk_prot          = &tcp_timewait_sock_ops,
     .rsk_prot          = &tcp_request_sock_ops,
     .h.hashinfo          = &tcp_hashinfo,
#ifdef CONFIG_COMPAT
     .compat_setsockopt     = compat_tcp_setsockopt,
     .compat_getsockopt     = compat_tcp_getsockopt,
#endif
};

sock_create(family, type, protocol, &sock);                           
socket的创建过程基本就是这样 ,从网络文件系统中申请了inode,并得到并建立socket结构 ,申请了新的sock结构,并将两者进行关联 根据socket(family,type ,flag)传入的参数协议族类型、是流式套接字还是数据报套接字等挂载相应的处理函数集等。
我们再看下
sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));  的工作

int sock_map_fd(struct socket *sock, int flags)
{
     struct file *newfile;
     int fd =   sock_alloc_fd(&newfile, flags);               为socket分配文件号与文件结构

     if (likely(fd >= 0)) {
          int err =   sock_attach_fd(sock, newfile, flags);   将socket与file文件结构进行关联

          if (unlikely(err < 0)) {
               put_filp(newfile);
               put_unused_fd(fd);
               return err;
          }
            fd_install(fd, newfile);            将文件号与文件挂钩
     }
     return fd;
}

static int   sock_attach_fd(struct socket *sock, struct file *file, int flags)
{
     struct dentry *dentry;
     struct qstr name = { .name = "" };

      dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);          分配目录项
     if (unlikely(!dentry))
          return -ENOMEM;

     dentry->d_op = &sockfs_dentry_operations;
     /*
     * We dont want to push this dentry into global dentry hash table.
     * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
     * This permits a working /proc/$pid/fd/XXX on sockets
     */
     dentry->d_flags &= ~DCACHE_UNHASHED;
     d_instantiate(dentry, SOCK_INODE(sock));                  将目录项与inode关联

     sock->file = file;
       init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
            &
socket_file_ops );            
对file结构成员赋值,并将file->f_op 更新为 socket_file_ops socket类型文件的操作函数集
     SOCK_INODE(sock)->i_fop = &socket_file_ops;
     file->f_flags = O_RDWR | (flags & O_NONBLOCK);
     file->f_pos = 0;
       file->private_data = sock;              file的私有数据被更新为socket变量

     return 0;
}

摘一副linux内核源代码情景分析中的图,主要数据结构的组织关系大概就是这样
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值