邻居子系统:地址解析协议

地址解析协议 

无端ARP

一般来说,发出一个ARPOP_REQUEST是由于发送方想和一个IP地址通信,需要找到其对应的L2地址。但有时候,发送方生成一个ARPOP_REQUEST是为了通知接收方一些信息,而不是请求信息。这种封包就称为无端ARP,它通常用在下面列出的情况中:

  • L2地址发生变化,改变地址的节点通过无端ARP来触发L2和L3的关联更新
  • 重复地址检测,同一个局域网中不应该出现在有两台主机有相同的L3地址,但这种情况总会发生。主机可以使用无端ARP来检测重复地址的存在。如果你发出一个目的地是你自己地址的ARP请求,那么只有当存在一台与你相同ip配置的主机时,才会受到应答
  • 虚拟IP,故障迁移。通常为了使一个站点有冗余度,该站点除了有一台活跃服务器外,还有一定数量的配置类似的主机处于备用模式。当活跃服务器由于某些原因宕机后,与heartbeat定时器相关一个机制就会检测到这个故障,并启动新活跃服务器的选择。这个服务器会生成一个无端ARP包来更新网络中的所有其他主机上的ARP缓存。因为新服务器继承了就服务器的ip地址,发出的ARPOP_REQUESE不会被应答,但所有的接收者会更新它们的缓存。

调整ARP选项

  • 编译选项
    • ARPD(CONFIG_ARPD),这个选项允许用户空间的守护进程处理ARP包,这样做在超大型且流量比较大的网络中,可以提高传输性能。
  • /proc选项
    • 这个选项中的大部分特性可以配置为全局的,也可以给每个设备单独配置。

初始化ARP协议

   1:  void __init arp_init(void)
   2:  {
   3:      neigh_table_init(&arp_tbl);
   4:   
   5:      dev_add_pack(&arp_packet_type);
   6:      arp_proc_init();
   7:  #ifdef CONFIG_SYSCTL
   8:      neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4,
   9:                    NET_IPV4_NEIGH, "ipv4", NULL, NULL);
  10:  #endif
  11:      register_netdevice_notifier(&arp_netdev_notifier);
  12:  }

初始化函数首先是注册一个虚函数表和ARP协议是用的其他常用参数;这个工作由neigh_table_init函数完成。dev_add_pack主要用于注册接收封包函数,arp_proc_init函数会建立/proc/net/arp文件,读取该文件就可以看到ARP缓存的内容。若内核支持sysctl,就能依靠neigh_sysctl_register创建目录/proc/sys/net/ipv4/neigh,用于输出neigh_parms结构的默认调节参数。register_netdevice_notifier向内核注册一个回调函数,用于接收设备状态和配置变化的通知。

  • arp_tbl表
   1:  struct neigh_table arp_tbl = {
   2:      .family =    AF_INET,
   3:      .entry_size =    sizeof(struct neighbour) + 4,
   4:      .key_len =    4,
   5:      .hash =        arp_hash,
   6:      .constructor =    arp_constructor,
   7:      .proxy_redo =    parp_redo,
   8:      .id =        "arp_cache",
   9:      .parms = {
  10:          .tbl =            &arp_tbl,
  11:          .base_reachable_time =    30 * HZ,
  12:          .retrans_time =    1 * HZ,
  13:          .gc_staletime =    60 * HZ,
  14:          .reachable_time =        30 * HZ,
  15:          .delay_probe_time =    5 * HZ,
  16:          .queue_len =        3,
  17:          .ucast_probes =    3,
  18:          .mcast_probes =    3,
  19:          .anycast_delay =    1 * HZ,
  20:          .proxy_delay =        (8 * HZ) / 10,
  21:          .proxy_qlen =        64,
  22:          .locktime =        1 * HZ,
  23:      },
  24:      .gc_interval =    30 * HZ,
  25:      .gc_thresh1 =    128,
  26:      .gc_thresh2 =    512,
  27:      .gc_thresh3 =    1024,
  28:  };

初始化neighbour结构

   1:  static int arp_constructor(struct neighbour *neigh)
   2:  {
   3:      __be32 addr = *(__be32*)neigh->primary_key;
   4:      struct net_device *dev = neigh->dev;
   5:      struct in_device *in_dev;
   6:      struct neigh_parms *parms;
   7:   
   8:      rcu_read_lock();
   9:      in_dev = __in_dev_get_rcu(dev);
  10:      if (in_dev == NULL) {
  11:          rcu_read_unlock();
  12:          return -EINVAL;
  13:      }
  14:   
  15:      neigh->type = inet_addr_type(dev_net(dev), addr);
  16:   
  17:      parms = in_dev->arp_parms;
  18:      __neigh_parms_put(neigh->parms);
  19:      neigh->parms = neigh_parms_clone(parms);
  20:      rcu_read_unlock();
  21:   
  22:      if (!dev->header_ops) {
  23:          neigh->nud_state = NUD_NOARP;
  24:          neigh->ops = &arp_direct_ops;
  25:          neigh->output = neigh->ops->queue_xmit;
  26:      } else {
  27:          /* Good devices (checked by reading texts, but only Ethernet is
  28:             tested)
  29:  
  30:             ARPHRD_ETHER: (ethernet, apfddi)
  31:             ARPHRD_FDDI: (fddi)
  32:             ARPHRD_IEEE802: (tr)
  33:             ARPHRD_METRICOM: (strip)
  34:             ARPHRD_ARCNET:
  35:             etc. etc. etc.
  36:  
  37:             ARPHRD_IPDDP will also work, if author repairs it.
  38:             I did not it, because this driver does not work even
  39:             in old paradigm.
  40:           */
  41:   
  42:  #if 1
  43:          /* So... these "amateur" devices are hopeless.
  44:             The only thing, that I can say now:
  45:             It is very sad that we need to keep ugly obsolete
  46:             code to make them happy.
  47:  
  48:             They should be moved to more reasonable state, now
  49:             they use rebuild_header INSTEAD OF hard_start_xmit!!!
  50:             Besides that, they are sort of out of date
  51:             (a lot of redundant clones/copies, useless in 2.1),
  52:             I wonder why people believe that they work.
  53:           */
  54:          switch (dev->type) {
  55:          default:
  56:              break;
  57:          case ARPHRD_ROSE:
  58:  #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
  59:          case ARPHRD_AX25:
  60:  #if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
  61:          case ARPHRD_NETROM:
  62:  #endif
  63:              neigh->ops = &arp_broken_ops;
  64:              neigh->output = neigh->ops->output;
  65:              return 0;
  66:  #endif
  67:          ;}
  68:  #endif
  69:          if (neigh->type == RTN_MULTICAST) {
  70:              neigh->nud_state = NUD_NOARP;
  71:              arp_mc_map(addr, neigh->ha, dev, 1);
  72:          } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
  73:              neigh->nud_state = NUD_NOARP;
  74:              memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
  75:          } else if (neigh->type == RTN_BROADCAST || dev->flags&IFF_POINTOPOINT) {
  76:              neigh->nud_state = NUD_NOARP;
  77:              memcpy(neigh->ha, dev->broadcast, dev->addr_len);
  78:          }
  79:   
  80:          if (dev->header_ops->cache)
  81:              neigh->ops = &arp_hh_ops;
  82:          else
  83:              neigh->ops = &arp_generic_ops;
  84:   
  85:          if (neigh->nud_state&NUD_VALID)
  86:              neigh->output = neigh->ops->connected_output;
  87:          else
  88:              neigh->output = neigh->ops->output;
  89:      }
  90:      return 0;
  91:  }
   1:  static const struct neigh_ops arp_generic_ops = {
   2:      .family =        AF_INET,
   3:      .solicit =        arp_solicit,
   4:      .error_report =        arp_error_report,
   5:      .output =        neigh_resolve_output,
   6:      .connected_output =    neigh_connected_output,
   7:      .hh_output =        dev_queue_xmit,
   8:      .queue_xmit =        dev_queue_xmit,
   9:  };
  10:   
  11:  static const struct neigh_ops arp_hh_ops = {
  12:      .family =        AF_INET,
  13:      .solicit =        arp_solicit,
  14:      .error_report =        arp_error_report,
  15:      .output =        neigh_resolve_output,
  16:      .connected_output =    neigh_resolve_output,
  17:      .hh_output =        dev_queue_xmit,
  18:      .queue_xmit =        dev_queue_xmit,
  19:  };
  20:   
  21:  static const struct neigh_ops arp_direct_ops = {
  22:      .family =        AF_INET,
  23:      .output =        dev_queue_xmit,
  24:      .connected_output =    dev_queue_xmit,
  25:      .hh_output =        dev_queue_xmit,
  26:      .queue_xmit =        dev_queue_xmit,
  27:  };
  28:   
  29:  const struct neigh_ops arp_broken_ops = {
  30:      .family =        AF_INET,
  31:      .solicit =        arp_solicit,
  32:      .error_report =        arp_error_report,
  33:      .output =        neigh_compat_output,
  34:      .connected_output =    neigh_compat_output,
  35:      .hh_output =        dev_queue_xmit,
  36:      .queue_xmit =        dev_queue_xmit,
  37:  };

 

image

传输和接收ARP包

image

arp_send:

   1:  static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
   2:  {
   3:      __be32 saddr = 0;
   4:      u8  *dst_ha = NULL;
   5:      struct net_device *dev = neigh->dev;
   6:      __be32 target = *(__be32*)neigh->primary_key;
   7:      int probes = atomic_read(&neigh->probes);
   8:      struct in_device *in_dev = in_dev_get(dev);
   9:   
  10:      if (!in_dev)
  11:          return;
  12:   
  13:      switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
  14:      default:
  15:      case 0:        /* By default announce any local IP */
  16:          if (skb && inet_addr_type(dev_net(dev), ip_hdr(skb)->saddr) == RTN_LOCAL)
  17:              saddr = ip_hdr(skb)->saddr;
  18:          break;
  19:      case 1:        /* Restrict announcements of saddr in same subnet */
  20:          if (!skb)
  21:              break;
  22:          saddr = ip_hdr(skb)->saddr;
  23:          if (inet_addr_type(dev_net(dev), saddr) == RTN_LOCAL) {
  24:              /* saddr should be known to target */
  25:              if (inet_addr_onlink(in_dev, target, saddr))
  26:                  break;
  27:          }
  28:          saddr = 0;
  29:          break;
  30:      case 2:        /* Avoid secondary IPs, get a primary/preferred one */
  31:          break;
  32:      }
  33:   
  34:      if (in_dev)
  35:          in_dev_put(in_dev);
  36:      if (!saddr)
  37:          saddr = inet_select_addr(dev, target, RT_SCOPE_LINK);
  38:   
  39:      if ((probes -= neigh->parms->ucast_probes) < 0) {
  40:          if (!(neigh->nud_state&NUD_VALID))
  41:              printk(KERN_DEBUG "trying to ucast probe in NUD_INVALID\n");
  42:          dst_ha = neigh->ha;
  43:          read_lock_bh(&neigh->lock);
  44:      } else if ((probes -= neigh->parms->app_probes) < 0) {
  45:  #ifdef CONFIG_ARPD
  46:          neigh_app_ns(neigh);
  47:  #endif
  48:          return;
  49:      }
  50:   
  51:      arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
  52:           dst_ha, dev->dev_addr, NULL);
  53:      if (dst_ha)
  54:          read_unlock_bh(&neigh->lock);
  55:  }

neigh,L3地址需要被解析的邻居;skb,保存数据封包的缓冲区,该封包的传输由solicitation请求触发。

  • skb缓冲区中IP头的源IP地址,和arp_solicit选择的要放到ARP头中的源IP地址,如果封包是本地网络产生的,IP包头中源IP地址就是本机的。若封包是要转发的,源IP地址就是最初发送方的
  • skb缓冲区IP包头中的目的地址,和arp_solicit请求解析的目的IP地址

arp_solicit的主要任务:

  • 选择要放在ARP头中的源IP地址,区分内核生成的请求和用户空间产生的请求
    • 对于内核生成的请求,使用arp_send传输solicitation请求
    • 对于用户空间请求,arp_solicit调用neigh_app_ns来通知相应的用户空间程序:需要生成一个solicitation请求。如果内核不支持ARPD,arp_solicit就不做solicitation请求,直接返回。
  • 更新生成的solicitation请求数目
  • 使用arp_send函数传输请求

 

image

ARP_ANNOUNCE与源IP地址的选择:

大部分主机只有一个IP地址,因此将其直接拷贝到ARP头中。若一个主机有多个IP地址,ARP_ANNOUNCE的设置就会影响IP的选择。主要用到三个函数:

  1. inet_addr_type,在该函数的输入参数中给一个IP地址,它就返回该地址的类型。RTN_LOCAL
  2. inet_addr_onlink,输入一个设备和两个IP地址,该函数会检查这两个地址是否属于同一个子网
  3. inet_select_add,输入一个设备,一个IP地址(通常不是本地主机的地址)和一个scope,该函数会在设备配置信息中查找这个IP地址,该地址应该和入口IP地址位于同一个子网中,并且给定的scope比入口scope小,或者相同。
   1:  static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
   2:             struct packet_type *pt, struct net_device *orig_dev)
   3:  {
   4:      struct arphdr *arp;
   5:   
   6:      /* ARP header, plus 2 device addresses, plus 2 IP addresses.  */
   7:      if (!pskb_may_pull(skb, arp_hdr_len(dev)))
   8:          goto freeskb;
   9:   
  10:      arp = arp_hdr(skb);
  11:      if (arp->ar_hln != dev->addr_len ||
  12:          dev->flags & IFF_NOARP ||
  13:          skb->pkt_type == PACKET_OTHERHOST ||
  14:          skb->pkt_type == PACKET_LOOPBACK ||
  15:          arp->ar_pln != 4)
  16:          goto freeskb;
  17:   
  18:      if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
  19:          goto out_of_mem;
  20:   
  21:      memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
  22:   
  23:      return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
  24:   
  25:  freeskb:
  26:      kfree_skb(skb);
  27:  out_of_mem:
  28:      return 0;
  29:  }
   1:  static int arp_process(struct sk_buff *skb)
   2:  {
   3:      struct net_device *dev = skb->dev;
   4:      struct in_device *in_dev = in_dev_get(dev);
   5:      struct arphdr *arp;
   6:      unsigned char *arp_ptr;
   7:      struct rtable *rt;
   8:      unsigned char *sha;
   9:      __be32 sip, tip;
  10:      u16 dev_type = dev->type;
  11:      int addr_type;
  12:      struct neighbour *n;
  13:      struct net *net = dev_net(dev);
  14:   
  15:      /* arp_rcv below verifies the ARP header and verifies the device
  16:       * is ARP'able.
  17:       */
  18:   
  19:      if (in_dev == NULL)
  20:          goto out;
  21:   
  22:      arp = arp_hdr(skb);
  23:   
  24:      switch (dev_type) {
  25:      default:
  26:          if (arp->ar_pro != htons(ETH_P_IP) ||
  27:              htons(dev_type) != arp->ar_hrd)
  28:              goto out;
  29:          break;
  30:      case ARPHRD_ETHER:
  31:      case ARPHRD_IEEE802_TR:
  32:      case ARPHRD_FDDI:
  33:      case ARPHRD_IEEE802:
  34:          /*
  35:           * ETHERNET, Token Ring and Fibre Channel (which are IEEE 802
  36:           * devices, according to RFC 2625) devices will accept ARP
  37:           * hardware types of either 1 (Ethernet) or 6 (IEEE 802.2).
  38:           * This is the case also of FDDI, where the RFC 1390 says that
  39:           * FDDI devices should accept ARP hardware of (1) Ethernet,
  40:           * however, to be more robust, we'll accept both 1 (Ethernet)
  41:           * or 6 (IEEE 802.2)
  42:           */
  43:          if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
  44:               arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
  45:              arp->ar_pro != htons(ETH_P_IP))
  46:              goto out;
  47:          break;
  48:      case ARPHRD_AX25:
  49:          if (arp->ar_pro != htons(AX25_P_IP) ||
  50:              arp->ar_hrd != htons(ARPHRD_AX25))
  51:              goto out;
  52:          break;
  53:      case ARPHRD_NETROM:
  54:          if (arp->ar_pro != htons(AX25_P_IP) ||
  55:              arp->ar_hrd != htons(ARPHRD_NETROM))
  56:              goto out;
  57:          break;
  58:      }
  59:   
  60:      /* Understand only these message types */
  61:   
  62:      if (arp->ar_op != htons(ARPOP_REPLY) &&
  63:          arp->ar_op != htons(ARPOP_REQUEST))
  64:          goto out;
  65:   
  66:  /*
  67:   *    Extract fields
  68:   */
  69:      arp_ptr= (unsigned char *)(arp+1);
  70:      sha    = arp_ptr;
  71:      arp_ptr += dev->addr_len;
  72:      memcpy(&sip, arp_ptr, 4);
  73:      arp_ptr += 4;
  74:      arp_ptr += dev->addr_len;
  75:      memcpy(&tip, arp_ptr, 4);
  76:  /*
  77:   *    Check for bad requests for 127.x.x.x and requests for multicast
  78:   *    addresses.  If this is one such, delete it.
  79:   */
  80:      if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
  81:          goto out;
  82:   
  83:  /*
  84:   *     Special case: We must set Frame Relay source Q.922 address
  85:   */
  86:      if (dev_type == ARPHRD_DLCI)
  87:          sha = dev->broadcast;
  88:   
  89:  /*
  90:   *  Process entry.  The idea here is we want to send a reply if it is a
  91:   *  request for us or if it is a request for someone else that we hold
  92:   *  a proxy for.  We want to add an entry to our cache if it is a reply
  93:   *  to us or if it is a request for our address.
  94:   *  (The assumption for this last is that if someone is requesting our
  95:   *  address, they are probably intending to talk to us, so it saves time
  96:   *  if we cache their address.  Their address is also probably not in
  97:   *  our cache, since ours is not in their cache.)
  98:   *
  99:   *  Putting this another way, we only care about replies if they are to
 100:   *  us, in which case we add them to the cache.  For requests, we care
 101:   *  about those for us and those for our proxies.  We reply to both,
 102:   *  and in the case of requests for us we add the requester to the arp
 103:   *  cache.
 104:   */
 105:   
 106:      /* Special case: IPv4 duplicate address detection packet (RFC2131) */
 107:      if (sip == 0) {
 108:          if (arp->ar_op == htons(ARPOP_REQUEST) &&
 109:              inet_addr_type(net, tip) == RTN_LOCAL &&
 110:              !arp_ignore(in_dev, sip, tip))
 111:              arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
 112:                   dev->dev_addr, sha);
 113:          goto out;
 114:      }
 115:   
 116:      if (arp->ar_op == htons(ARPOP_REQUEST) &&
 117:          ip_route_input(skb, tip, sip, 0, dev) == 0) {
 118:   
 119:          rt = skb_rtable(skb);
 120:          addr_type = rt->rt_type;
 121:   
 122:          if (addr_type == RTN_LOCAL) {
 123:              int dont_send = 0;
 124:   
 125:              if (!dont_send)
 126:                  dont_send |= arp_ignore(in_dev,sip,tip);
 127:              if (!dont_send && IN_DEV_ARPFILTER(in_dev))
 128:                  dont_send |= arp_filter(sip,tip,dev);
 129:              if (!dont_send) {
 130:                  n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
 131:                  if (n) {
 132:                      arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
 133:                      neigh_release(n);
 134:                  }
 135:              }
 136:              goto out;
 137:          } else if (IN_DEV_FORWARD(in_dev)) {
 138:                  if (addr_type == RTN_UNICAST  && rt->u.dst.dev != dev &&
 139:                   (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) {
 140:                  n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
 141:                  if (n)
 142:                      neigh_release(n);
 143:   
 144:                  if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED ||
 145:                      skb->pkt_type == PACKET_HOST ||
 146:                      in_dev->arp_parms->proxy_delay == 0) {
 147:                      arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
 148:                  } else {
 149:                      pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb);
 150:                      in_dev_put(in_dev);
 151:                      return 0;
 152:                  }
 153:                  goto out;
 154:              }
 155:          }
 156:      }
 157:   
 158:      /* Update our ARP tables */
 159:   
 160:      n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
 161:   
 162:      if (IPV4_DEVCONF_ALL(dev_net(dev), ARP_ACCEPT)) {
 163:          /* Unsolicited ARP is not accepted by default.
 164:             It is possible, that this option should be enabled for some
 165:             devices (strip is candidate)
 166:           */
 167:          if (n == NULL &&
 168:              arp->ar_op == htons(ARPOP_REPLY) &&
 169:              inet_addr_type(net, sip) == RTN_UNICAST)
 170:              n = __neigh_lookup(&arp_tbl, &sip, dev, 1);
 171:      }
 172:   
 173:      if (n) {
 174:          int state = NUD_REACHABLE;
 175:          int override;
 176:   
 177:          /* If several different ARP replies follows back-to-back,
 178:             use the FIRST one. It is possible, if several proxy
 179:             agents are active. Taking the first reply prevents
 180:             arp trashing and chooses the fastest router.
 181:           */
 182:          override = time_after(jiffies, n->updated + n->parms->locktime);
 183:   
 184:          /* Broadcast replies and request packets
 185:             do not assert neighbour reachability.
 186:           */
 187:          if (arp->ar_op != htons(ARPOP_REPLY) ||
 188:              skb->pkt_type != PACKET_HOST)
 189:              state = NUD_STALE;
 190:          neigh_update(n, sha, state, override ? NEIGH_UPDATE_F_OVERRIDE : 0);
 191:          neigh_release(n);
 192:      }
 193:   
 194:  out:
 195:      if (in_dev)
 196:          in_dev_put(in_dev);
 197:      consume_skb(skb);
 198:      return 0;
 199:  }

转载于:https://my.oschina.net/longscu/blog/61649

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值