socket.linux内核,Linux内核Socket实现之------Socket创建(2)

转载请注明:http://blog.chinaunix.net/uid-20788636-id-4408276.html

1.2 sock_map_fd函数

在用户空间创建了一个socket后,返回值是一个文件描述符,下面分析一下创建socket时怎么和文件描述符联系的。在SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)最后调用sock_map_fd进行关联,其中返回的retval就是用户空间获取的文件描述符fd,sock就是调用sock_create创建成功的socket.

sock_map_fd()主要用于对socket的*file指针初始化,经过sock_map_fd()操作后,socket就通过其*file指针与VFS管理的文件进行了关联,便可以进行文件的各种操作,如read、write、lseek、ioctl等.

retval = sock_map_fd(sock, flags &

(O_CLOEXEC | O_NONBLOCK));

static int sock_map_fd(struct socket *sock,

int flags)

{

struct

file *newfile;

int fd = get_unused_fd_flags(flags);//根据flags获取没有使用的fd,具体分析见1.2.1

if

(unlikely(fd < 0))

return

fd;

newfile

= sock_alloc_file(sock, flags, NULL);

if

(likely(!IS_ERR(newfile))) {

fd_install(fd,

newfile);

return

fd;

}

put_unused_fd(fd);

return

PTR_ERR(newfile);

}

1.2.1   get_unused_fd_flags函数

get_unused_fd_flags()函数调用__alloc_fd分配一个新的可用的fd

int __alloc_fd(struct files_struct *files,

unsigned start, unsigned end, unsigned

flags)

{

unsigned

int fd;

int

error;

struct

fdtable *fdt;

spin_lock(&files->file_lock);

repeat:

/*得到本进程的文件描述符表*/

fdt

= files_fdtable(files);

fd

= start;//从start开始,这里的start为0

/* files->next_fd为上一次查找确定的下一个可用空闲的文件描述符,这样可以提高获取的效率,如果fd小于files->next_fd的话就可以直接使用next_fd */

if

(fd < files->next_fd)

fd

= files->next_fd;

/*当fd小于目前进程支持的最大的描述符号,那么可以通过fds_bits位图,从fd位开始查找,找到下一个0位,即下一个空闲描述符。*/

if

(fd < fdt->max_fds)

fd

= find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd);

/*

* N.B. For clone tasks sharing a files

structure, this test

* will limit the total number of files that

can be opened.

*/

error

= -EMFILE;

if

(fd >= end)

goto

out;

/*如需要则扩展文件描述符表*/

error

= expand_files(files, fd);

if

(error < 0)

goto

out;

/*

* If we needed to expand the fs array we

* might have blocked - try again.

*/

if

(error)

goto

repeat;

/*

设置next_fd,用于下次加速查找空闲的fd。

当start大于next_fd时,不会设置next_fd以避免文件描述符的不连续

*/

if

(start <= files->next_fd)

files->next_fd

= fd + 1;

/*将fd添加到已打开的文件描述符表中*/

__set_open_fd(fd,

fdt);

if

(flags & O_CLOEXEC)

__set_close_on_exec(fd,

fdt);

else

__clear_close_on_exec(fd,

fdt);

error

= fd;

#if 1

/*

Sanity check */

if

(rcu_dereference_raw(fdt->fd[fd]) != NULL) {

printk(KERN_WARNING

"alloc_fd: slot %d not NULL!\n", fd);

rcu_assign_pointer(fdt->fd[fd],

NULL);

}

#endif

out:

spin_unlock(&files->file_lock);

return

error;

}

1.2.2 sock_alloc_file函数

struct file *sock_alloc_file(struct socket

*sock, int flags, const char *dname)

{

struct

qstr name = { .name = "" };

struct

path path;

struct

file *file;

if

(dname) {//这里的dname为空

name.name

= dname;

name.len

= strlen(name.name);

}

else if (sock->sk) {

/*这里的name应该是TCP根据struct proto tcp_prot */

name.name

= sock->sk->sk_prot_creator->name;

name.len

= strlen(name.name);

}

/*申请一个新的dentry,其中sock_mnt->mnt_sb在前面已经分析过了,是一个sock_fs_type文件系统挂载点,*/

path.dentry

= d_alloc_pseudo(sock_mnt->mnt_sb, &name);

if

(unlikely(!path.dentry))

return

ERR_PTR(-ENOMEM);

path.mnt

= mntget(sock_mnt);

/*将文件操作的函数绑定到inode,对于dentry是在sockfs_mount函数中sockfs_dentry_operations,该函数在sock_init是调用,在前面有分析*/

d_instantiate(path.dentry,

SOCK_INODE(sock));

SOCK_INODE(sock)->i_fop

= &socket_file_ops;

/*申请新的file,将path,file,关联起来*/

file

= alloc_file(&path, FMODE_READ | FMODE_WRITE,

&socket_file_ops);

if

(unlikely(IS_ERR(file))) {

/*

drop dentry, keep inode */

ihold(path.dentry->d_inode);

path_put(&path);

return

file;

}

sock->file

= file;//sock->file和刚分配的file关联起来

file->f_flags

= O_RDWR | (flags & O_NONBLOCK);//设置file的标志

file->private_data

= sock;//file的私有数据指针指向sock.

return

file;

}

68602d4e0fbeb203f47ef7a8ae5450b9.png

Socket创建流程图

附录:对于sk_alloc分配的内存大小问题分析

在分析中经常看到此种类型的强制转换inet = inet_sk(sk);,其中inet被定义为struct inet_sock *inet;结构体,我们看结构体的定义sock结构体的大小小于struct

inet_sock,这样是无法进行强制类型转换的,但在实际分配的过程中sock分配的大小为tcp_sock的大小,而该结构足够大。

struct sock *sk_alloc(struct net *net, int

family, gfp_t priority,

struct proto *prot)

{

struct

sock *sk;

sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);

if

(sk) {

sk->sk_family

= family;

/*

* See comment in struct sock definition to

understand

* why we need sk_prot_creator -acme

*/

sk->sk_prot

= sk->sk_prot_creator = prot;

sock_lock_init(sk);

sock_net_set(sk,

get_net(net));

atomic_set(&sk->sk_wmem_alloc,

1);

sock_update_classid(sk);

sock_update_netprioidx(sk);

}

return

sk;

}

static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,

int

family)

{

struct

sock *sk;

struct

kmem_cache *slab;

/*这里分配内存空间时,分为两种情况,第一种情况是从高速缓存上分配,第二种是普通的分配*/

slab = prot->slab;

if

(slab != NULL) {

sk =

kmem_cache_alloc(slab, priority & ~__GFP_ZERO);---------------------(1)

if

(!sk)

return

sk;

if

(priority & __GFP_ZERO) {

if

(prot->clear_sk)

prot->clear_sk(sk,

prot->obj_size);

else

sk_prot_clear_nulls(sk,

prot->obj_size);

}

}

else

sk

= kmalloc(prot->obj_size, priority);---------------------------(2)

if

(sk != NULL) {

kmemcheck_annotate_bitfield(sk,

flags);

if

(security_sk_alloc(sk, family, priority))

goto

out_free;

if

(!try_module_get(prot->owner))

goto

out_free_sec;

sk_tx_queue_clear(sk);

}

return

sk;

out_free_sec:

security_sk_free(sk);

out_free:

if

(slab != NULL)

kmem_cache_free(slab,

sk);

else

kfree(sk);

return

NULL;

}

(1)第一种情况:sk = kmem_cache_alloc(slab, priority &

~__GFP_ZERO)这里的slap等于slab = prot->slab;也就是函数传递过来的struct proto *prot,再看一下这个结构体是怎么定义的?在inet_create函数中sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot);,这里的answer_prot为answer_prot = answer->prot;在看一下answer->prot是如何来的?

在inet_ctreate函数中通过遍历inetsw数组获取到struct inet_protosw *answer;

list_for_each_entry_rcu(answer,

&inetsw[sock->type], list) {

err = 0;

/* Check the non-wild match. */

if (protocol == answer->protocol) {

if (protocol != IPPROTO_IP)

break;

} else {

/* Check for the two wild cases. */

if (IPPROTO_IP == protocol) {

protocol = answer->protocol;

break;

}

if (IPPROTO_IP == answer->protocol)

break;

}

err = -EPROTONOSUPPORT;

}

其中inetsw的定义下面类型的数组,如果是SOCK_STREAM类型的socket,这里的prot = tcp_prot

static

struct inet_protosw inetsw_array[] =

{

{

.type =       SOCK_STREAM,

.protocol =   IPPROTO_TCP,

.prot =       &tcp_prot,

.ops =        &inet_stream_ops,

.no_check =   0,

.flags =      INET_PROTOSW_PERMANENT |

INET_PROTOSW_ICSK,

},

{

.type =       SOCK_DGRAM,

.protocol =   IPPROTO_UDP,

.prot =       &udp_prot,

.ops =        &inet_dgram_ops,

.no_check =   UDP_CSUM_DEFAULT,

.flags =      INET_PROTOSW_PERMANENT,

},

{

.type =       SOCK_DGRAM,

.protocol =   IPPROTO_ICMP,

.prot =       &ping_prot,

.ops =        &inet_dgram_ops,

.no_check =   UDP_CSUM_DEFAULT,

.flags =      INET_PROTOSW_REUSE,

},

{

.type =       SOCK_RAW,

.protocol =   IPPROTO_IP,       /* wild card */

.prot =       &raw_prot,

.ops =

&inet_sockraw_ops,

.no_check =   UDP_CSUM_DEFAULT,

.flags =      INET_PROTOSW_REUSE,

}

};

再看一下

struct

proto tcp_prot = {

.name                         =

"TCP",

.owner                        =

THIS_MODULE,

.close                          =

tcp_close,

.connect            = tcp_v4_connect,

.disconnect                = tcp_disconnect,

.accept                       =

inet_csk_accept,

.ioctl                            =

tcp_ioctl,

.init                     =

tcp_v4_init_sock,

.destroy            = tcp_v4_destroy_sock,

.shutdown                 = tcp_shutdown,

.setsockopt               = tcp_setsockopt,

.getsockopt               = tcp_getsockopt,

.recvmsg           = tcp_recvmsg,

.sendmsg                   = tcp_sendmsg,

.sendpage                  = tcp_sendpage,

.backlog_rcv              = tcp_v4_do_rcv,

.release_cb               = tcp_release_cb,

.mtu_reduced          = tcp_v4_mtu_reduced,

.hash                           =

inet_hash,

.unhash                      =

inet_unhash,

.get_port          = inet_csk_get_port,

.enter_memory_pressure       = tcp_enter_memory_pressure,

.stream_memory_free    = tcp_stream_memory_free,

.sockets_allocated  = &tcp_sockets_allocated,

.orphan_count                   = &tcp_orphan_count,

.memory_allocated = &tcp_memory_allocated,

.memory_pressure = &tcp_memory_pressure,

.sysctl_mem             = sysctl_tcp_mem,

.sysctl_wmem          = sysctl_tcp_wmem,

.sysctl_rmem            = sysctl_tcp_rmem,

.max_header            = MAX_TCP_HEADER,

.obj_size           =

sizeof(struct tcp_sock),

.slab_flags                 = SLAB_DESTROY_BY_RCU,

.twsk_prot                 = &tcp_timewait_sock_ops,

.rsk_prot           = &tcp_request_sock_ops,

.h.hashinfo                = &tcp_hashinfo,

.no_autobind            = true,

#ifdef

CONFIG_COMPAT

.compat_setsockopt        = compat_tcp_setsockopt,

.compat_getsockopt        = compat_tcp_getsockopt,

#endif

#ifdef

CONFIG_MEMCG_KMEM

.init_cgroup               = tcp_init_cgroup,

.destroy_cgroup                = tcp_destroy_cgroup,

.proto_cgroup          = tcp_proto_cgroup,

#endif

};

在af_inet.c文件中的inet_init函数中的

static

int __init inet_init(void)

{

struct inet_protosw *q;

struct list_head *r;

int rc = -EINVAL;

BUILD_BUG_ON(sizeof(struct

inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));

sysctl_local_reserved_ports =

kzalloc(65536 / 8, GFP_KERNEL);

if (!sysctl_local_reserved_ports)

goto out;

//该函数是注册tcp_prot,在该函数中对tcp_prot->slab进行内存分配

rc = proto_register(&tcp_prot, 1);

if (rc)

goto out_free_reserved_ports;

rc = proto_register(&udp_prot, 1);

if (rc)

goto

out_unregister_tcp_proto;

rc = proto_register(&raw_prot, 1);

if (rc)

goto

out_unregister_udp_proto;

rc = proto_register(&ping_prot, 1);

if (rc)

goto

out_unregister_raw_proto;

/*

*     Tell

SOCKET that we are alive...

*/

(void)sock_register(&inet_family_ops);

#ifdef

CONFIG_SYSCTL

ip_static_sysctl_init();

#endif

/*

*     Add

all the base protocols.

*/

if

(inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)

pr_crit("%s: Cannot add

ICMP protocol\n", __func__);

if

(inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)

pr_crit("%s: Cannot add

UDP protocol\n", __func__);

if

(inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)

pr_crit("%s: Cannot add

TCP protocol\n", __func__);

#ifdef

CONFIG_IP_MULTICAST

if

(inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)

pr_crit("%s: Cannot add

IGMP protocol\n", __func__);

#endif

/* Register the socket-side information

for inet_create.对inetsw进行初始化操作*/

for (r = &inetsw[0]; r <

&inetsw[SOCK_MAX]; ++r)

INIT_LIST_HEAD(r);

/*将inetsw_array加入到对于的inetsw链表中,就可以在inet_create函数中进行遍历*/

for (q = inetsw_array; q <

&inetsw_array[INETSW_ARRAY_LEN]; ++q)

inet_register_protosw(q);

/*

*     Set

the ARP module up

*/

arp_init();

/*

*     Set

the IP module up

*/

ip_init();

tcp_v4_init();

/* Setup TCP slab cache for open

requests. */

tcp_init();

/* Setup UDP memory threshold */

udp_init();

/* Add UDP-Lite (RFC 3828) */

udplite4_register();

ping_init();

/*

*     Set

the ICMP layer up

*/

if (icmp_init() < 0)

panic("Failed to create

the ICMP control socket.\n");

/*

*     Initialise

the multicast router

*/

#if

defined(CONFIG_IP_MROUTE)

if (ip_mr_init())

pr_crit("%s: Cannot init

ipv4 mroute\n", __func__);

#endif

/*

*     Initialise

per-cpu ipv4 mibs

*/

if (init_ipv4_mibs())

pr_crit("%s: Cannot init

ipv4 mibs\n", __func__);

ipv4_proc_init();

ipfrag_init();

dev_add_pack(&ip_packet_type);

rc = 0;

out:

return rc;

out_unregister_raw_proto:

proto_unregister(&raw_prot);

out_unregister_udp_proto:

proto_unregister(&udp_prot);

out_unregister_tcp_proto:

proto_unregister(&tcp_prot);

out_free_reserved_ports:

kfree(sysctl_local_reserved_ports);

goto out;

}

在proto_register函数中,主要是关注prot->slab进行了初始化。

int

proto_register(struct proto *prot, int alloc_slab)

{

if (alloc_slab) {

prot->slab = kmem_cache_create(prot->name,prot->obj_size, 0,

SLAB_HWCACHE_ALIGN |

prot->slab_flags,

NULL);//这里的饿prot->obj_size为.obj_size               =

sizeof(struct tcp_sock),

if (prot->slab == NULL) {

pr_crit("%s:

Can't create sock SLAB cache!\n",

prot->name);

goto out;

}

……………………..

}

(2)对于第二种情况,主要prot->obj_size,就是struct proto tcp_prot中初始化的.obj_size            = sizeof(struct tcp_sock)。sk = kmalloc(prot->obj_size, priority);---------------------------(2)

下面是五个相关的数据结构,tcp_sock结构体占用的空间是最大的,所以在分配内存空间时,都是分配的tcp_sock的大小,这样在后面进行强制转换的过程中可以保证正确。

99fbd601ae2a9fa4b09ef70fa337f945.png

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值