TCP传输结束时传入一个序列号

TCP传输结束时,会调用两次 tcp_close(), 也就是发送两次FIN包,FIN就是finish的意思。

具体的tcp close原理就不分析了,网上文章很多。

这里想在发FIN包的时候,传入指定的序列号:sequence number,而且不影响数据包的收发。

做法是,通过应用程序,调用inet的ioctl, 用来把指定序列号传递到内核,然后在内核把要发送的FIN包的序列号用指定的序列号替换。

修改的代码主要在 net/ipv4/af_inet.c, 如下所示:

新指定的序列号,用如下结构体表示:

typedef struct{
        unsigned int seq;
        unsigned int ack;
}HTTP_SEQ_ACK_INFO;
HTTP_SEQ_ACK_INFO  *http_seq_ack_info;
有两个内容,一个是发送序列号,一个是接收序列号

修改后的inet_ioctl, 这是IP层的

int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
        struct sock *sk = sock->sk;
        int err = 0;
        struct tcp_sock *tp = tcp_sk(sk);

        switch (cmd) {
                case SIOCGSTAMP:
                        err = sock_get_timestamp(sk, (struct timeval __user *)arg);
                        break;
                case SIOCGSTAMPNS:
                        err = sock_get_timestampns(sk, (struct timespec __user *)arg);
                        break;
                case SIOCADDRT:
                case SIOCDELRT:
                case SIOCRTMSG:
                        err = ip_rt_ioctl(cmd, (void __user *)arg);
                        break;
                case SIOCDARP:
                case SIOCGARP:
                case SIOCSARP:
                        err = arp_ioctl(cmd, (void __user *)arg);
                        break;
                case SIOCGIFADDR:
                case SIOCSIFADDR:
                case SIOCGIFBRDADDR:
                case SIOCSIFBRDADDR:
                case SIOCGIFNETMASK:
                case SIOCSIFNETMASK:
                case SIOCGIFDSTADDR:
                case SIOCSIFDSTADDR:
                case SIOCSIFPFLAGS:
                case SIOCGIFPFLAGS:
                case SIOCSIFFLAGS:
                        err = devinet_ioctl(cmd, (void __user *)arg);
                        break;
                case SIOCSETSEQACK:
                        http_seq_ack_info = (struct HTTP_SEQ_ACK_INFO *)arg;
                        printk("before changing.\n");
                        printk("snd_nxt:%08x\n",tp->snd_nxt);
                        printk("rcv_nxt:%08x\n",tp->rcv_nxt);
                        tp->snd_nxt = http_seq_ack_info->seq;
                        tp->rcv_nxt = http_seq_ack_info->ack;
                        tp->write_seq = http_seq_ack_info->seq;
                        tp->snd_una = http_seq_ack_info->seq;
                        printk("after changing.\n");
                        printk("snd_nxt:%08x\n",tp->snd_nxt);
                        printk("rcv_nxt:%08x\n",tp->rcv_nxt);
                        break;
                default:
                        if (sk->sk_prot->ioctl)
                                err = sk->sk_prot->ioctl(sk, cmd, arg);
                        else
                                err = -ENOIOCTLCMD;
                        break;
        }
        return err;
}
这里其实就是修改 sock 里面的tcp sock的 一些值, 如: snd_nxt, snd_una, write_seq, rcv_nxt

tcp_sock 内容很多,定义很长:

struct tcp_sock {
        /* inet_connection_sock has to be the first member of tcp_sock */
        struct inet_connection_sock     inet_conn;
        u16     tcp_header_len; /* Bytes of tcp header to send          */
        u16     xmit_size_goal; /* Goal for segmenting output packets   */

/*
 *      Header prediction flags
 *      0x5?10 << 16 + snd_wnd in net byte order
 */
        __be32  pred_flags;

/*
 *      RFC793 variables by their proper names. This means you can
 *      read the code and the spec side by side (and laugh ...)
 *      See RFC793 and RFC1122. The RFC writes these in capitals.
 */
        u32     rcv_nxt;        /* What we want to receive next         */
        u32     copied_seq;     /* Head of yet unread data              */
        u32     rcv_wup;        /* rcv_nxt on last window update sent   */
        u32     snd_nxt;        /* Next sequence we send                */

        u32     snd_una;        /* First byte we want an ack for        */
        u32     snd_sml;        /* Last byte of the most recently transmitted small packet */
        u32     rcv_tstamp;     /* timestamp of last received ACK (for keepalives) */
        u32     lsndtime;       /* timestamp of last sent data packet (for restart window) */

        /* Data for direct copy to user */
        struct {
                struct sk_buff_head     prequeue;
                struct task_struct      *task;
                struct iovec            *iov;
                int                     memory;
                int                     len;
#ifdef CONFIG_NET_DMA
                /* members for async copy */
                struct dma_chan         *dma_chan;
                int                     wakeup;
                struct dma_pinned_list  *pinned_list;
                dma_cookie_t            dma_cookie;
#endif
        } ucopy;

        u32     snd_wl1;        /* Sequence for window update           */
        u32     snd_wnd;        /* The window we expect to receive      */
        u32     max_window;     /* Maximal window ever seen from peer   */
        u32     mss_cache;      /* Cached effective mss, not including SACKS */

        u32     window_clamp;   /* Maximal window to advertise          */
        u32     rcv_ssthresh;   /* Current window clamp                 */

        u32     frto_highmark;  /* snd_nxt when RTO occurred */
        u8      reordering;     /* Packet reordering metric.            */
        u8      frto_counter;   /* Number of new acks after RTO */
        u8      nonagle;        /* Disable Nagle algorithm?             */
        u8      keepalive_probes; /* num of allowed keep alive probes   */

/* RTT measurement */
        u32     srtt;           /* smoothed round trip time << 3        */
        u32     mdev;           /* medium deviation                     */
        u32     mdev_max;       /* maximal mdev for the last rtt period */
        u32     rttvar;         /* smoothed mdev_max                    */
        u32     rtt_seq;        /* sequence number to update rttvar     */

        u32     packets_out;    /* Packets which are "in flight"        */
        u32     left_out;       /* Packets which leaved network */
        u32     retrans_out;    /* Retransmitted packets out            */
/*
 *      Options received (usually on last packet, some only on SYN packets).
 */
        struct tcp_options_received rx_opt;

/*
 *      Slow start and congestion control (see also Nagle, and Karn & Partridge)
 */
        u32     snd_ssthresh;   /* Slow start size threshold            */
        u32     snd_cwnd;       /* Sending congestion window            */
        u16     snd_cwnd_cnt;   /* Linear increase counter              */
        u32     snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
        u32     snd_cwnd_used;
        u32     snd_cwnd_stamp;

        struct sk_buff_head     out_of_order_queue; /* Out of order segments go here */

        u32     rcv_wnd;        /* Current receiver window              */
        u32     write_seq;      /* Tail(+1) of data held in tcp send buffer */
        u32     pushed_seq;     /* Last pushed seq, required to talk to windows */

/*      SACKs data      */
        struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
        struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/

        struct tcp_sack_block_wire recv_sack_cache[4];

        /* from STCP, retrans queue hinting */
        struct sk_buff* lost_skb_hint;

        struct sk_buff *scoreboard_skb_hint;
        struct sk_buff *retransmit_skb_hint;
        struct sk_buff *forward_skb_hint;
        struct sk_buff *fastpath_skb_hint;

        int     fastpath_cnt_hint;
        int     lost_cnt_hint;
        int     retransmit_cnt_hint;
        int     forward_cnt_hint;

        u16     advmss;         /* Advertised MSS                       */
        u16     prior_ssthresh; /* ssthresh saved at recovery start     */
        u32     lost_out;       /* Lost packets                 */
        u32     sacked_out;     /* SACK'd packets                       */
        u32     fackets_out;    /* FACK'd packets                       */
        u32     high_seq;       /* snd_nxt at onset of congestion       */

        u32     retrans_stamp;  /* Timestamp of the last retransmit,
                                 * also used in SYN-SENT to remember stamp of
                                 * the first SYN. */
        u32     undo_marker;    /* tracking retrans started here. */
        int     undo_retrans;   /* number of undoable retransmissions. */
        u32     urg_seq;        /* Seq of received urgent pointer */
        u16     urg_data;       /* Saved octet of OOB data and control flags */
        u8      urg_mode;       /* In urgent mode               */
        u8      ecn_flags;      /* ECN status bits.                     */
        u32     snd_up;         /* Urgent pointer               */

        u32     total_retrans;  /* Total retransmits for entire connection */
        u32     bytes_acked;    /* Appropriate Byte Counting - RFC3465 */

        unsigned int            keepalive_time;   /* time before keep alive takes place */
        unsigned int            keepalive_intvl;  /* time interval between keep alive probes */
        int                     linger2;

        unsigned long last_synq_overflow;

        u32     tso_deferred;

/* Receiver side RTT estimation */
        struct {
                u32     rtt;
                u32     seq;
                u32     time;
        } rcv_rtt_est;

/* Receiver queue space */
        struct {
                int     space;
                u32     seq;
                u32     time;
        } rcvq_space;

/* TCP-specific MTU probe information. */
        struct {
                u32               probe_seq_start;
                u32               probe_seq_end;
        } mtu_probe;

#ifdef CONFIG_TCP_MD5SIG
/* TCP AF-Specific parts; only used by MD5 Signature support so far */
        struct tcp_sock_af_ops  *af_specific;

/* TCP MD5 Signagure Option information */
        struct tcp_md5sig_info  *md5sig_info;
#endif
};
snd_nxt :是下次要发送的序列号,但是单改这个值还不够,还要修改write_seq, TCP协议还真挺复杂,修改完了write_seq,发出去的包中,序列号是变了,但是会产生一个错误包,seq=0,后来修改了snd_una值,这个错误包就没有了。序列号也用新指定的序列号替换了。
这个新序列号也可以在net/ipv4/tcp.c中修改, 如tcp_close()函数:

void tcp_close(struct sock *sk, long timeout)
{
        struct sk_buff *skb;
        int data_was_unread = 0;
        int state;

        lock_sock(sk);
        sk->sk_shutdown = SHUTDOWN_MASK;

        if (sk->sk_state == TCP_LISTEN) {
                tcp_set_state(sk, TCP_CLOSE);

                /* Special case. */
                inet_csk_listen_stop(sk);

                goto adjudge_to_death;
        }

        /*  We need to flush the recv. buffs.  We do this only on the
         *  descriptor close, not protocol-sourced closes, because the
         *  reader process may not have drained the data yet!
         */
        while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
                u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -
                          tcp_hdr(skb)->fin;
                data_was_unread += len;
                __kfree_skb(skb);
        }

        sk_stream_mem_reclaim(sk);

        /* As outlined in RFC 2525, section 2.17, we send a RST here because
         * data was lost. To witness the awful effects of the old behavior of
         * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk
         * GET in an FTP client, suspend the process, wait for the client to
         * advertise a zero window, then kill -9 the FTP client, wheee...
         * Note: timeout is always zero in such a case.
         */
        if (data_was_unread) {
                /* Unread data was tossed, zap the connection. */
                NET_INC_STATS_USER(LINUX_MIB_TCPABORTONCLOSE);
                tcp_set_state(sk, TCP_CLOSE);
                tcp_send_active_reset(sk, GFP_KERNEL);
        } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
                /* Check zero linger _after_ checking for unread data. */
                sk->sk_prot->disconnect(sk, 0);
                NET_INC_STATS_USER(LINUX_MIB_TCPABORTONDATA);
        } else if (tcp_close_state(sk)) {
                /* We FIN if the application ate all the data before
                 * zapping the connection.
                 */

                /* RED-PEN. Formally speaking, we have broken TCP state
                 * machine. State transitions:
                 *
                 * TCP_ESTABLISHED -> TCP_FIN_WAIT1
                 * TCP_SYN_RECV -> TCP_FIN_WAIT1 (forget it, it's impossible)
                 * TCP_CLOSE_WAIT -> TCP_LAST_ACK
                 *
                 * are legal only when FIN has been sent (i.e. in window),
                 * rather than queued out of window. Purists blame.
                 *
                 * F.e. "RFC state" is ESTABLISHED,
                 * if Linux state is FIN-WAIT-1, but FIN is still not sent.
                 *
                 * The visible declinations are that sometimes
                 * we enter time-wait state, when it is not required really
                 * (harmless), do not send active resets, when they are
                 * required by specs (TCP_ESTABLISHED, TCP_CLOSE_WAIT, when
                 * they look as CLOSING or LAST_ACK for Linux)
                 * Probably, I missed some more holelets.
                 *                                              --ANK
                 */
                tcp_send_fin(sk);
        }

        sk_stream_wait_close(sk, timeout);

adjudge_to_death:
        state = sk->sk_state;
        sock_hold(sk);
        sock_orphan(sk);
        atomic_inc(sk->sk_prot->orphan_count);

        /* It is the last release_sock in its life. It will remove backlog. */
        release_sock(sk);


        /* Now socket is owned by kernel and we acquire BH lock
           to finish close. No need to check for user refs.
         */
        local_bh_disable();
        bh_lock_sock(sk);
        BUG_TRAP(!sock_owned_by_user(sk));

        /* Have we already been destroyed by a softirq or backlog? */
        if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
                goto out;

        /*      This is a (useful) BSD violating of the RFC. There is a
         *      problem with TCP as specified in that the other end could
         *      keep a socket open forever with no application left this end.
         *      We use a 3 minute timeout (about the same as BSD) then kill
         *      our end. If they send after that then tough - BUT: long enough
         *      that we won't make the old 4*rto = almost no time - whoops
         *      reset mistake.
         *
         *      Nope, it was not mistake. It is really desired behaviour
         *      f.e. on http servers, when such sockets are useless, but
         *      consume significant resources. Let's do it with special
         *      linger2 option.                                 --ANK
         */

        if (sk->sk_state == TCP_FIN_WAIT2) {
                struct tcp_sock *tp = tcp_sk(sk);
                if (tp->linger2 < 0) {
                        tcp_set_state(sk, TCP_CLOSE);
                        tcp_send_active_reset(sk, GFP_ATOMIC);
                        NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER);
                } else {
                        const int tmo = tcp_fin_time(sk);

                        if (tmo > TCP_TIMEWAIT_LEN) {
                                inet_csk_reset_keepalive_timer(sk,
                                                tmo - TCP_TIMEWAIT_LEN);
                        } else {
                                tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
                                goto out;
                        }
                }
        }
        if (sk->sk_state != TCP_CLOSE) {
                sk_stream_mem_reclaim(sk);
                if (tcp_too_many_orphans(sk,
                                atomic_read(sk->sk_prot->orphan_count))) {
                        if (net_ratelimit())
                                printk(KERN_INFO "TCP: too many of orphaned "
                                       "sockets\n");
                        tcp_set_state(sk, TCP_CLOSE);
                        tcp_send_active_reset(sk, GFP_ATOMIC);
                        NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
                }
        }

        if (sk->sk_state == TCP_CLOSE)
                inet_csk_destroy_sock(sk);
        /* Otherwise, socket is reprieved until protocol close. */
}

out:
        bh_unlock_sock(sk);
        local_bh_enable();
        sock_put(sk);
这里面会调用到tcp_send_fin(sk);

/* Send a fin.  The caller locks the socket for us.  This cannot be
 * allowed to fail queueing a FIN frame under any circumstances.
 */
void tcp_send_fin(struct sock *sk)
{
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb = tcp_write_queue_tail(sk);
        int mss_now;

        /* Optimization, tack on the FIN if we have a queue of
         * unsent frames.  But be careful about outgoing SACKS
         * and IP options.
         */
        mss_now = tcp_current_mss(sk, 1);

        if (tcp_send_head(sk) != NULL) {
                TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
                TCP_SKB_CB(skb)->end_seq++;
                tp->write_seq++;
        } else {
                /* Socket is locked, keep trying until memory is available. */
                for (;;) {
                        skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL);
                        if (skb)
                                break;
                        yield();
                }

                /* Reserve space for headers and prepare control bits. */
                skb_reserve(skb, MAX_TCP_HEADER);
                skb->csum = 0;
                TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
                TCP_SKB_CB(skb)->sacked = 0;
                skb_shinfo(skb)->gso_segs = 1;
                skb_shinfo(skb)->gso_size = 0;
                skb_shinfo(skb)->gso_type = 0;

                /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
                TCP_SKB_CB(skb)->seq = tp->write_seq;
                TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
                tcp_queue_skb(sk, skb);
        }
        __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
}
FIN包的构造就在这函数中完成,序列号就是通过
                TCP_SKB_CB(skb)->seq = tp->write_seq;
                TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
                tcp_queue_skb(sk, skb);
完成赋值和+1递增的。
__tcp_push_pending_frames() 实现包的发送。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值