协议栈(Netfilter)

这里写图片描述

ip_rcv()

int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
   struct iphdr *iph;
   u32 len;
 
   /* When the interface is in promisc. mode, drop all the crap
    * that it receives, do not try to analyse it.
    */
    // 若目的mac地址不是本机mac地址,丢包
   if (skb->pkt_type == PACKET_OTHERHOST)
      goto drop;
 
 
   IP_UPD_PO_STATS_BH(dev_net(dev), IPSTATS_MIB_IN, skb->len);
 
    // 检查skb是否共享,若共享,clone skb
   if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
      IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
      goto out;
   }
 
    // 检查data到tail的长度是否 >= sizeof(struct iphdr)
   if (!pskb_may_pull(skb, sizeof(struct iphdr)))
      goto inhdr_error;
 
   iph = ip_hdr(skb); // ip_hdr() -> skb_network_header()
 
   /*
    * RFC1122: 3.2.1.2 MUST silently discard any IP frame that fails the checksum.
    *
    * Is the datagram acceptable?
    *
    * 1. Length at least the size of an ip header
    * 2. Version of 4
    * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums]
    * 4. Doesn't have a bogus length
    */
 
   if (iph->ihl < 5 || iph->version != 4)
      goto inhdr_error;
 
    // 检查data到tail的长度是否 >= iph->ihl*4
   if (!pskb_may_pull(skb, iph->ihl*4))
      goto inhdr_error;
 
   iph = ip_hdr(skb);
 
   if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
      goto inhdr_error;
 
   len = ntohs(iph->tot_len);
   if (skb->len < len) { // 若data到tail的长度 < 总长度
      IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS);
      goto drop;
   } else if (len < (iph->ihl*4)) // 若总长度 < iph->ihl*4
      goto inhdr_error;
 
   /* Our transport medium may have padded the buffer out. Now we know it
    * is IP we can trim to the true length of the frame.
    * Note this now means skb->len holds ntohs(iph->tot_len).
    */
   if (pskb_trim_rcsum(skb, len)) {
      IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
      goto drop;
   }
 
   /* Remove any debris in the socket control block */
   memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
 
   /* Must drop socket now because of tproxy. */
   skb_orphan(skb);
 
   return NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, dev, NULL,
             ip_rcv_finish); // 调用NF_HOOK宏
 
inhdr_error:
   IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
drop:
   kfree_skb(skb);
out:
   return NET_RX_DROP;
}

NF_HOOK

#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \
   NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, INT_MIN)
 
 
#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh)         \
({int __ret;                               \
if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, thresh, 1)) == 1)\
   __ret = (okfn)(skb);                         \ // 执行okfn
__ret;})
 
 
static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
             struct sk_buff *skb,
             struct net_device *indev,
             struct net_device *outdev,
             int (*okfn)(struct sk_buff *), int thresh,
             int cond)
{
   if (!cond)
      return 1;
#ifndef CONFIG_NETFILTER_DEBUG
   if (list_empty(&nf_hooks[pf][hook]))
      return 1;
#endif
   return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh);
}
 
 
int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
       struct net_device *indev,
       struct net_device *outdev,
       int (*okfn)(struct sk_buff *),
       int hook_thresh)
{
   struct list_head *elem;
   unsigned int verdict;
   int ret = 0;
 
   /* We may already have this, but read-locks nest anyway */
   rcu_read_lock();
 
   elem = &nf_hooks[pf][hook];
next_hook:
   verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev,
              outdev, &elem, okfn, hook_thresh);
   if (verdict == NF_ACCEPT || verdict == NF_STOP) {
      ret = 1;
   } else if (verdict == NF_DROP) {
      kfree_skb(skb);
      ret = -EPERM;
   } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
      if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
               verdict >> NF_VERDICT_BITS))
         goto next_hook;
   }
   rcu_read_unlock();
   return ret;
}
 
 
unsigned int nf_iterate(struct list_head *head,
         struct sk_buff *skb,
         unsigned int hook,
         const struct net_device *indev,
         const struct net_device *outdev,
         struct list_head **i,
         int (*okfn)(struct sk_buff *),
         int hook_thresh)
{
   unsigned int verdict;
 
   /*
    * The caller must not block between calls to this
    * function because of risk of continuing from deleted element.
    */
   list_for_each_continue_rcu(*i, head) { // 遍历NF_INET_PRE_ROUTING链表上的nf_hook_ops
      struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
 
      if (hook_thresh > elem->priority)
         continue;
 
      /* Optimization: we don't need to hold module
         reference here, since function can't sleep. --RR */
      verdict = elem->hook(hook, skb, indev, outdev, okfn); // 执行hook函数
      if (verdict != NF_ACCEPT) {
#ifdef CONFIG_NETFILTER_DEBUG
         if (unlikely((verdict & NF_VERDICT_MASK)
                     > NF_MAX_VERDICT)) {
            NFDEBUG("Evil return from %p(%u).\n",
               elem->hook, hook);
            continue;
         }
#endif
         if (verdict != NF_REPEAT)
            return verdict;
         *i = (*i)->prev;
      }
   }
   return NF_ACCEPT;
}

ip_rcv_finish()

static int ip_rcv_finish(struct sk_buff *skb)
{
   const struct iphdr *iph = ip_hdr(skb);
   struct rtable *rt;
 
   /*
    * Initialise the virtual path cache for the packet. It describes
    * how the packet travels inside Linux networking.
    */
   if (skb_dst(skb) == NULL) { // 若未查找路由
      int err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
                skb->dev); // 查找路由
      if (unlikely(err)) { // 若查找失败,丢包
         if (err == -EHOSTUNREACH)
            IP_INC_STATS_BH(dev_net(skb->dev),
                  IPSTATS_MIB_INADDRERRORS);
         else if (err == -ENETUNREACH)
            IP_INC_STATS_BH(dev_net(skb->dev),
                  IPSTATS_MIB_INNOROUTES);
         goto drop;
      }
   }
 
#ifdef CONFIG_NET_CLS_ROUTE
   if (unlikely(skb_dst(skb)->tclassid)) {
      struct ip_rt_acct *st = per_cpu_ptr(ip_rt_acct, smp_processor_id());
      u32 idx = skb_dst(skb)->tclassid;
      st[idx&0xFF].o_packets++;
      st[idx&0xFF].o_bytes += skb->len;
      st[(idx>>16)&0xFF].i_packets++;
      st[(idx>>16)&0xFF].i_bytes += skb->len;
   }
#endif
 
   if (iph->ihl > 5 && ip_rcv_options(skb)) // 若有IP选项,解析IP选项
      goto drop; // 解析失败,丢包
 
   rt = skb_rtable(skb);
   if (rt->rt_type == RTN_MULTICAST) {
      IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCAST,
            skb->len);
   } else if (rt->rt_type == RTN_BROADCAST)
      IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCAST,
            skb->len);
 
   return dst_input(skb); // 调用dst_input()
 
drop:
   kfree_skb(skb);
   return NET_RX_DROP;
}
 
 
static inline int dst_input(struct sk_buff *skb)
{
   return skb_dst(skb)->input(skb); // 执行skb->_skb_dst->input
}

dst_entry、rtable、flowi

struct dst_entry
{
   struct rcu_head       rcu_head;
   struct dst_entry   *child;
   struct net_device       *dev;
   short        error;
   short        obsolete;
   int          flags;
#define DST_HOST      1
#define DST_NOXFRM    2
#define DST_NOPOLICY      4
#define DST_NOHASH    8
   unsigned long     expires;
 
   unsigned short    header_len;    /* more space at head required */
   unsigned short    trailer_len;   /* space to reserve at tail */
 
   unsigned int      rate_tokens;
   unsigned long     rate_last; /* rate limiting for ICMP */
 
   struct dst_entry   *path;
 
   struct neighbour   *neighbour;
   struct hh_cache       *hh;
#ifdef CONFIG_XFRM
   struct xfrm_state  *xfrm;
#else
   void         *__pad1;
#endif
   int          (*input)(struct sk_buff*);
   int          (*output)(struct sk_buff*);
 
   struct  dst_ops            *ops;
 
   u32          metrics[RTAX_MAX];
 
#ifdef CONFIG_NET_CLS_ROUTE
   __u32        tclassid;
#else
   __u32        __pad2;
#endif
 
 
   /*
    * Align __refcnt to a 64 bytes alignment
    * (L1_CACHE_SIZE would be too much)
    */
#ifdef CONFIG_64BIT
   long         __pad_to_align_refcnt[2];
#else
   long         __pad_to_align_refcnt[1];
#endif
   /*
    * __refcnt wants to be on a different cache line from
    * input/output/ops or performance tanks badly
    */
   atomic_t      __refcnt;  /* client references   */
   int          __use;
   unsigned long     lastuse;
   union {
      struct dst_entry *next;
      struct rtable    *rt_next;
      struct rt6_info   *rt6_next;
      struct dn_route  *dn_next;
   };
};
 
 
struct rtable
{
   union
   {
      struct dst_entry   dst;
   } u;
 
   /* Cache lookup keys */
   struct flowi      fl;
 
   struct in_device   *idev;
    
   int          rt_genid;
   unsigned      rt_flags;
   __u16        rt_type;
 
   __be32       rt_dst;    /* Path destination    */
   __be32       rt_src;    /* Path source    */
   int          rt_iif;
 
   /* Info on neighbour */
   __be32       rt_gateway;
 
   /* Miscellaneous cached information */
   __be32       rt_spec_dst; /* RFC1122 specific destination */
   struct inet_peer   *peer; /* long-living peer info */
};
 
 
struct flowi {
   int    oif;
   int    iif;
   __u32  mark;
 
   union {
      struct {
         __be32       daddr;
         __be32       saddr;
         __u8         tos;
         __u8         scope;
      } ip4_u;
       
      struct {
         struct in6_addr       daddr;
         struct in6_addr       saddr;
         __be32       flowlabel;
      } ip6_u;
 
      struct {
         __le16       daddr;
         __le16       saddr;
         __u8         scope;
      } dn_u;
   } nl_u;
#define fld_dst       nl_u.dn_u.daddr
#define fld_src       nl_u.dn_u.saddr
#define fld_scope  nl_u.dn_u.scope
#define fl6_dst       nl_u.ip6_u.daddr
#define fl6_src       nl_u.ip6_u.saddr
#define fl6_flowlabel  nl_u.ip6_u.flowlabel
#define fl4_dst       nl_u.ip4_u.daddr
#define fl4_src       nl_u.ip4_u.saddr
#define fl4_tos       nl_u.ip4_u.tos
#define fl4_scope  nl_u.ip4_u.scope
 
   __u8   proto;
   __u8   flags;
#define FLOWI_FLAG_ANYSRC 0x01
   union {
      struct {
         __be16 sport;
         __be16 dport;
      } ports;
 
      struct {
         __u8   type;
         __u8   code;
      } icmpt;
 
      struct {
         __le16 sport;
         __le16 dport;
      } dnports;
 
      __be32    spi;
 
      struct {
         __u8   type;
      } mht;
   } uli_u;
#define fl_ip_sport    uli_u.ports.sport
#define fl_ip_dport    uli_u.ports.dport
#define fl_icmp_type   uli_u.icmpt.type
#define fl_icmp_code   uli_u.icmpt.code
#define fl_ipsec_spi   uli_u.spi
#define fl_mh_type uli_u.mht.type
   __u32           secid; /* used by xfrm; see secid.txt */
} __attribute__((__aligned__(BITS_PER_LONG/8)));
 
 
static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
{
   return (struct dst_entry *)skb->_skb_dst;
}
 
 
static inline struct rtable *skb_rtable(const struct sk_buff *skb)
{
   return (struct rtable *)skb_dst(skb);
}
  • 2
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值