socket系统调用

当user space调用socket 编程是,最终会调用到linux kernel系统调用如下:
SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
{
    int retval;
    struct socket *sock;
    int flags;

    
//主要是建立一个struct socket的结构变量
    retval = sock_create(family, type, protocol, &sock);
    if (retval < 0)
        goto out;
//为socket 分配一个文件描述符
    retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
    if (retval < 0)
        goto out_release;

out:
    /* It may be already another descriptor 8) Not kernel problem. */
    return retval;

out_release:
    sock_release(sock);
    return retval;
}

int sock_create(int family, int type, int protocol, struct socket **res)
{
    return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
}
int __sock_create(struct net *net, int family, int type, int protocol,
             struct socket **res, int kern)
{
    int err;
    struct socket *sock;
    const struct net_proto_family *pf;

    /*
     *      Check protocol is in range
     */
    if (family < 0 || family >= NPROTO)
        return -EAFNOSUPPORT;
    if (type < 0 || type >= SOCK_MAX)
        return -EINVAL;

    /* Compatibility.

       This uglymoron is moved from INET layer to here to avoid
       deadlock in module load.
     */
    if (family == PF_INET && type == SOCK_PACKET) {
        pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
                 current->comm);
        family = PF_PACKET;
    }
//secure check,如果没有开的话为null
    err = security_socket_create(family, type, protocol, kern);
    if (err)
        return err;

    /*
     *    Allocate the socket and allow the family to set things up. if
     *    the protocol is 0, the family is instructed to select an appropriate
     *    default.
     */
//申请一个新的inode节点,并分配一个struct socket的结构体
    sock = sock_alloc();
    if (!sock) {
        net_warn_ratelimited("socket: no more sockets\n");
        return -ENFILE;    /* Not exactly a match, but its the
                   closest posix thing */
    }

    sock->type = type;

#ifdef CONFIG_MODULES
    /* Attempt to load a protocol module if the find failed.
     *
     * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
     * requested real, full-featured networking support upon configuration.
     * Otherwise module support will break!
     */
    if (rcu_access_pointer(net_families[family]) == NULL)
        request_module("net-pf-%d", family);
#endif

    rcu_read_lock();
//根据family得到    const struct net_proto_family *pf;

    pf = rcu_dereference(net_families[family]);
    err = -EAFNOSUPPORT;
    if (!pf)
        goto out_release;

    /*
     * We will call the ->create function, that possibly is in a loadable
     * module, so we have to bump that loadable module refcnt first.
     */
    if (!try_module_get(pf->owner))
        goto out_release;

    /* Now protected by module ref count */
    rcu_read_unlock();
//调用net_proto_family的create函数
    err = pf->create(net, sock, protocol, kern);
    if (err < 0)
        goto out_module_put;
}
 这里的net_families 定义如下:
static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
在kernel中会通过sock_register 来往net_families中添加选项
int sock_register(const struct net_proto_family *ops)
{
    int err;

    if (ops->family >= NPROTO) {
        pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
        return -ENOBUFS;
    }

    spin_lock(&net_family_lock);
    if (rcu_dereference_protected(net_families[ops->family],
                      lockdep_is_held(&net_family_lock)))
        err = -EEXIST;
    else {
        rcu_assign_pointer(net_families[ops->family], ops);
        err = 0;
    }
    spin_unlock(&net_family_lock);

    pr_info("NET: Registered protocol family %d\n", ops->family);
    return err;
}

这里以inet 协议为例
其源码在net/ipv4/af_inet.c 中的inet_init
    (void)sock_register(&inet_family_ops);

static const struct net_proto_family inet_family_ops = {
    .family = PF_INET,
    .create = inet_create,
    .owner    = THIS_MODULE,
};

所以最终在__sock_create调用的就是inet_create
static int inet_create(struct net *net, struct socket *sock, int protocol,
               int kern)
{
    struct sock *sk;
    struct inet_protosw *answer;
    struct inet_sock *inet;
    struct proto *answer_prot;
    unsigned char answer_flags;
    int try_loading_module = 0;
    int err;

    if (protocol < 0 || protocol >= IPPROTO_MAX)
        return -EINVAL;
//表示未连接状态
    sock->state = SS_UNCONNECTED;
//申请一个struct sock *sk;后面基本就是对sk的赋值
    sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern);
    if (!sk)
        goto out;

    err = 0;
    if (INET_PROTOSW_REUSE & answer_flags)
        sk->sk_reuse = SK_CAN_REUSE;

    inet = inet_sk(sk);
    inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;

    inet->nodefrag = 0;

    if (SOCK_RAW == sock->type) {
        inet->inet_num = protocol;
        if (IPPROTO_RAW == protocol)
            inet->hdrincl = 1;
    }

    if (net->ipv4.sysctl_ip_no_pmtu_disc)
        inet->pmtudisc = IP_PMTUDISC_DONT;
    else
        inet->pmtudisc = IP_PMTUDISC_WANT;

    inet->inet_id = 0;

    sock_init_data(sock, sk);

    sk->sk_destruct       = inet_sock_destruct;
    sk->sk_protocol       = protocol;
    sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;

    inet->uc_ttl    = -1;
    inet->mc_loop    = 1;
    inet->mc_ttl    = 1;
    inet->mc_all    = 1;
    inet->mc_index    = 0;
    inet->mc_list    = NULL;
    inet->rcv_tos    = 0;

    sk_refcnt_debug_inc(sk);

    if (inet->inet_num) {
        /* It assumes that any protocol which allows
         * the user to assign a number at socket
         * creation time automatically
         * shares.
         */
        inet->inet_sport = htons(inet->inet_num);
        /* Add to protocol hash chains. */
        err = sk->sk_prot->hash(sk);
        if (err) {
            sk_common_release(sk);
            goto out;
        }
    }

    if (sk->sk_prot->init) {
        err = sk->sk_prot->init(sk);
        if (err) {
            sk_common_release(sk);
            goto out;
        }
    }

    if (!kern) {
        err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
        if (err) {
            sk_common_release(sk);
            goto out;
        }
    }

}

回到SYSCALL_DEFINE3 中,剩下就是sock_map_fd
static int sock_map_fd(struct socket *sock, int flags)
{
    struct file *newfile;
根据flags找到一个没有使用的fd
    int fd = get_unused_fd_flags(flags);
    if (unlikely(fd < 0))
        return fd;
//申请一个新的sock file
    newfile = sock_alloc_file(sock, flags, NULL);
    if (likely(!IS_ERR(newfile))) {
//安装这个file,也就是建立fd和file的关系
        fd_install(fd, newfile);
        return fd;
    }

    put_unused_fd(fd);
    return PTR_ERR(newfile);
}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值