linux0.99网络模块-应用层 or 传输层读写

本文深入剖析Linux内核中关于UDP和TCP的read/write函数,探讨应用层通过系统调用与内核交互的细节,涉及sock结构体的初始化、端口分配以及数据包处理流程。通过对关键代码的分析,展示了如何根据socket类型创建及监听套接字,并处理TCP连接请求。
摘要由CSDN通过智能技术生成
应用层的操作大都会以系统调用的形式调用相应的内核函数,本文就分析一下相关的系统调用以及其他一些辅助函数。
本文重点分析UDP与TCP的read/write函数,以达到承前启后的目的(其他函数读者可以大致浏览一下)。

create

 884 static int
 885 ip_proto_create (struct socket *sock, int protocol)
 886 {
 887   volatile struct sock *sk;
 888   struct proto *prot;
 889   int err;
 891   sk = kmalloc (sizeof (*sk), GFP_KERNEL);
 892   if (sk == NULL)
 893     return (-ENOMEM);
 894   sk->num = 0;
 897   switch (sock->type)
 898     {
 899     case SOCK_STREAM:
 900     case SOCK_SEQPACKET:
 901        if (protocol && protocol != IPPROTO_TCP)
 902      {
 903         kfree_s ((void *)sk, sizeof (*sk));
 904         return (-EPROTONOSUPPORT);
 905      }
 906        sk->no_check = TCP_NO_CHECK;
 907        prot = &tcp_prot;
 908        break;
 910     case SOCK_DGRAM:
 911        if (protocol && protocol != IPPROTO_UDP)
 912      {
 913         kfree_s ((void *)sk, sizeof (*sk));
 914         return (-EPROTONOSUPPORT);
 915      }
 916        sk->no_check = UDP_NO_CHECK;
 917        prot=&udp_prot;
 918        break;
 920      case SOCK_RAW:
 921        if (!suser())
 922      {
 923         kfree_s ((void *)sk, sizeof (*sk));
 924         return (-EPERM);
 925      }
 927        if (!protocol)
 928      {
 929         kfree_s ((void *)sk, sizeof (*sk));
 930         return (-EPROTONOSUPPORT);
 931      }
 932        prot = &raw_prot;
 933        sk->reuse = 1;
 934        sk->no_check = 0; /* doesn't matter no checksum is preformed
 935                 anyway. */
 936        sk->num = protocol;
 937        break;
 939     case SOCK_PACKET:
 940        if (!suser())
 941      {
 942         kfree_s ((void *)sk, sizeof (*sk));
 943         return (-EPERM);
 944      }
 946        if (!protocol)
 947      {
 948         kfree_s ((void *)sk, sizeof (*sk));
 949         return (-EPROTONOSUPPORT);
 950      }
 951        prot = &packet_prot;
 952        sk->reuse = 1;
 953        sk->no_check = 0; /* doesn't matter no checksum is preformed
 954                 anyway. */
 955        sk->num = protocol;
 956        break;
 959     default:
 960        kfree_s ((void *)sk, sizeof (*sk));
 961        return (-ESOCKTNOSUPPORT);
 963     }
以上根据sock的不同类型赋值不同的协议
 964   sk->protocol = protocol;
 965   sk->wmem_alloc = 0;
 966   sk->rmem_alloc = 0;
 967   sk->pair = NULL;
 968   sk->opt = NULL;
 969   sk->send_seq = 0;
 970   sk->acked_seq = 0;
 971   sk->copied_seq = 0;
 972   sk->fin_seq = 0;
 973   sk->proc = 0;
 974   sk->rtt = TCP_WRITE_TIME;
 975   sk->packets_out = 0;
 976   sk->cong_window = 1; /* start with only sending one packet at a time. */
 977   sk->exp_growth = 1;  /* if set cong_window grow exponentially every time
 978               we get an ack. */
 979   sk->urginline = 0;
 980   sk->intr = 0;
 981   sk->linger = 0;
 982   sk->destroy = 0;
 983   sk->reuse = 0;
 984   sk->priority = 1;
 985   sk->shutdown = 0;
 986   sk->urg = 0;
 987   sk->keepopen = 0;
 988   sk->done = 0;
 989   sk->ack_backlog = 0;
 990   sk->window = 0;
 991   sk->bytes_rcv = 0;
 992   sk->state = TCP_CLOSE;
 993   sk->dead = 0;
 994   sk->ack_timed = 0;
 995   sk->send_tmp = NULL;
 996   sk->mss = 0; /* we will try not to send any packets smaller
 997            than this. */
 999   /* this is how many unacked bytes we will accept for
1000      this socket.  */
1002   sk->max_unacked = 2048; /* needs to be at most 2 full packets. */
1004   /* how many packets we should send before forcing an ack.
1005      if this is set to zero it is the same as sk->delay_acks = 0 */
1007   sk->max_ack_backlog = MAX_ACK_BACKLOG;
1008   sk->inuse = 0;
1009   sk->delay_acks = 1; /* default to waiting a while before sending
1010              acks.  */
1011   sk->wback = NULL;
1012   sk->wfront = NULL;
1013   sk->rqueue = NULL;
1014   sk->mtu = 576;
1015   sk->prot = prot;
1016   sk->sleep = sock->wait;
1017   sk->daddr = 0;
1018   sk->saddr = MY_IP_ADDR;
1019   sk->err = 0;
1020   sk->next = NULL;
1021   sk->pair = NULL;
1022   sk->send_tail = NULL;
1023   sk->send_head = NULL;
1024   sk->time_wait.len = TCP_CONNECT_TIME;
1025   sk->time_wait.when = 0;
1026   sk->time_wait.sk = sk;
1027   sk->time_wait.next = NULL;
1028   sk->timeout = 0;
1029   sk->back_log = NULL;
1030   sk->blog = 0;
1031   sock->data =(void *) sk;
1032   sk->dummy_th.doff = sizeof (sk->dummy_th)/4;
1033   sk->dummy_th.res1=0;
1034   sk->dummy_th.res2=0;
1035   sk->dummy_th.urg_ptr = 0;
1036   sk->dummy_th.fin = 0;
1037   sk->dummy_th.syn = 0;
1038   sk->dummy_th.rst = 0;
1039   sk->dummy_th.psh = 0;
1040   sk->dummy_th.ack = 0;
1041   sk->dummy_th.urg = 0;
1042   sk->dummy_th.dest = 0;
1044   if (sk->num)
1045     {
1046       /* it assumes that any protocol which allows
1047      the user to assign a number at socket
1048      creation time automatically
1049      shares. */
1050       put_sock (sk->num, sk);
1051       sk->dummy_th.source = net16(sk->num);
1052     }
1054   if (sk->prot->init)
1055     {
1056        err = sk->prot->init(sk);
1057        if (err != 0)
1058      {
1059         destroy_sock (sk);
1060         return (err);
1061      }
1062     }
1063   return (0);
1064 }
代码虽长,逻辑却很简单,根据sock类型的不同选择相应的协议创建sock类型变量sk加入到sock的data域中,之后如果端口号大于0救将其加入到sock_array数组中,最后调用初始化函数进行初始化。


listen

 808 static int
 809 ip_proto_listen(struct socket *sock, int backlog)
 810 {
 811   volatile struct sock *sk;
 812   sk = sock->data;
(对应上面create函数的1031行)
 813    if (sk == NULL)
 814      {
 815     printk ("Warning: sock->data = NULL: %d\n" ,__LINE__);
 816     return (0);
 817      }
 
 819   /* we may need to bind the socket. */
 820   if (sk->num == 0)
 821     {
 822       sk->num = get_new_socknum (sk->prot, 0);
 823       if (sk->num == 0) return (-EAGAIN);
 824       put_sock (sk->num, sk);
 825       sk->dummy_th.source = net16(sk->num);
 826     }
 上面的create函数中,如果sock类型是SOCK_STREAM,创建的是TCP连接,它没有设置num属性。因此现在需要为其寻找一个端口,找到之后把该sock加入到sock_array数组中
 828   /* we might as well re use these. */
 829   sk->max_ack_backlog = backlog;     //最大积压确认队列
 830   sk->ack_backlog = 0;         
 831   sk->state = TCP_LISTEN;     //设置状态为listen
 832   return (0);
 833 }
可以看到listen也比较简单,从参数指明的sock的data域中取出在创建过程中保存的sk,如果没有分配端口的话就为其分配端口,并把该sk加入到sock_array中,同时把该端口赋值到dummy_th的source字段。设置最大积压确认队列的大小,设置状态为listen。对于TCP连接来说,只是创建socket并不会把它加入到sock_array中,在调用listen后才被加入到sock_array.




tcp_conn_request(这个函数是从tcp_rcv中调用来建立TCP连接的,可以参考《 linux0.99网络模块-传输层(TCP接收)


1455 /* This routine handles a connection request.  This should make sure
1456    we haven't already responded. */
1457 /* Because of the way BSD works, we have to send a syn/ack now. This also
1458  means it will be harder to close a socket which is listening. */
1460 static  void
1461 tcp_conn_request(volatile struct sock *sk, struct sk_buff *skb,
1462          unsigned long daddr,
1463          unsigned long saddr, struct options *opt, struct device *dev)
1464 {
1465   struct sk_buff *buff;
1466   struct tcp_header *t1;
1467   unsigned char *ptr;
1468   volatile struct sock *newsk;
1469   struct tcp_header *th;
1470   int tmp;
1471   th = skb->h.th;

1473   PRINTK ("tcp_conn_request (sk = %X, skb = %X, daddr = %X, sadd4= %X, \n"
1474       "                  opt = %X, dev = %X)\n",
1475       sk, skb, daddr, saddr, opt, dev);
1477   /* if the socket is dead, don't accept the connection. */
1478   if (!sk->dead)
1479     {
1480        wake_up(sk->sleep);
1481     }
1482   else
1483     {
1484        PRINTK ("tcp_conn_request on dead socket\n");
1485        tcp_reset (daddr, saddr, th, sk->prot, opt, dev);
1486        kfree_skb (skb, FREE_READ);
1487        return;
1488     }

1490   /* make sure we can accept more.  This will prevent a flurry of
1491      syns from eating up all our memory. */
1492   if (sk->ack_backlog >= sk->max_ack_backlog)
1493     {
1494        kfree_skb (skb, FREE_READ);
1495        return;
1496     }
因为可能会有非常多的主机想要连接到服务器,在服务器接受连接之前,这些TCP连接会暂时保存,为了防止内存耗尽,需要限制最多积压确认数

1498   /* we need to build a new sock struct. */
1499   /* It is sort of bad to have a socket without an inode attached to
1500      it, but the wake_up's will just wake up the listening socket,
1501      and if the listening socket is destroyed before this is taken
1502      off of the queue, this will take care of it. */

1504   newsk = kmalloc(sizeof (struct sock), GFP_ATOMIC);
1505   if (newsk == NULL)
1506     {
1507        /* just ignore the syn.  It will get retransmitted. */
1508        kfree_skb (skb, FREE_READ);
1509        return;
1510     }

1513   PRINTK ("newsk = %X\n", newsk);
1514   memcpy ((void *)newsk, (void *)sk, sizeof (*newsk));
1515   newsk->wback = NULL;
1516   newsk->wfront = NULL;
1517   newsk->rqueue = NULL;
1518   newsk->send_head = NULL;
1519   newsk->send_tail = NULL;
1520   newsk->back_log = NULL;
1521   newsk->blog = 0;
1522   newsk->intr = 0;
1523   newsk->proc = 0;
1524   newsk->done = 0;
1525   newsk->send_tmp = NULL;
1526   newsk->pair = NULL;
1527   newsk->wmem_alloc = 0;
1528   newsk->rmem_alloc = 0;
1530   newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
1532   newsk->err = 0;
1533   newsk->shutdown = 0;
1534   newsk->ack_backlog = 0;
1535   newsk->acked_seq = skb->h.th->seq+1;
1536   newsk->fin_seq = skb->h.th->seq;
1537   newsk->copied_seq = skb->h.th->seq;
1538   newsk->state = TCP_SYN_RECV;
1539   newsk->timeout = 0;
1540   newsk->send_seq = timer_seq*SEQ_TICK-seq_offset;
1541   newsk->rcv_ack_seq = newsk->send_seq;
1542   newsk->urg =0;
1543   newsk->retransmits = 0;
1544   newsk->destroy = 0;
1545   newsk->time_wait.sk = newsk;
1546   newsk->time_wait.next = NULL;
1547   newsk->dummy_th.source = skb->h.th->dest;
1548   newsk->dummy_th.dest = skb->h.th->source;
1549   /* swap these two, they are from our point of view. */
1550   newsk->daddr=saddr;
1551   newsk->saddr=daddr;

1553   put_sock (newsk->num,newsk);
1554   newsk->dummy_th.res1=0;
1555   newsk->dummy_th.doff=6;
1556   newsk->dummy_th.fin=0;
1557   newsk->dummy_th.syn=0;
1558   newsk->dummy_th.rst=0;
1559   newsk->dummy_th.psh=0;
1560   newsk->dummy_th.ack=0;
1561   newsk->dummy_th.urg=0;
1562   newsk->dummy_th.res2=0;
1563   newsk->acked_seq = skb->h.th->seq+1;
1564   newsk->copied_seq
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值