linux tcp/ip协议栈源码分析---arp协议的实现

参见linux内核网络栈源代码情景分析一书
arp协议是围着一个数组链表的数据结构进行的,包括对节点的增删改查,一些回调函数的设置。
相关数据结构:
相关数据结构

arp协议流程图:
这里写图片描述

// 维护ip和mac地址映射的数组链表
struct arp_table
{
    struct arp_table        *next;          /* Linked entry list        */
    // 该条数据上一次使用的时间,用来判断该数据是否已经过期
    unsigned long           last_used;      /* For expiry           */
    // 该条数据的一些设置,比如是否是永久性的,如果是则不会过期,也就是会一直存在该数组链表中
    unsigned int            flags;          /* Control status       */
    unsigned long           ip;         /* ip address of entry      */
    unsigned long           mask;           /* netmask - used for generalised proxy arps (tridge)       */
    unsigned char           ha[MAX_ADDR_LEN];   /* Hardware address     */
    unsigned char           hlen;           /* Length of hardware address   */
    unsigned short          htype;          /* Type of hardware in use  */
    struct device           *dev;           /* Device the entry is tied to  */

    /*
     *  The following entries are only used for unresolved hw addresses.
     */
    // 定时器
    struct timer_list       timer;          /* expire timer         */
    // 重试的次数
    int             retries;        /* remaining retries        */
    // 因为还没有完成ip和mac映射而导致无法发送的数据包,完成映射会进行处理这些数据包
    struct sk_buff_head     skb;            /* list of queued packets   */
};


/*
 *  Configurable Parameters (don't touch unless you know what you are doing
 */

/*
 *  If an arp request is send, ARP_RES_TIME is the timeout value until the
 *  next request is send.
 */
// 如果第一个查询的包发出去后,ARP_RES_TIME长的时间还没有收到回复,就重新发一个
#define ARP_RES_TIME        (250*(HZ/10))

/*
 *  The number of times an arp request is send, until the host is
 *  considered unreachable.
 */

// 最多重发ARP_MAX_TRIES次查询包
#define ARP_MAX_TRIES       3

/*
 *  After that time, an unused entry is deleted from the arp table.
 */
// arp_table数组链表中的数据最多存活的时长
#define ARP_TIMEOUT     (600*HZ)

/*
 *  How often is the function 'arp_check_retries' called.
 *  An entry is invalidated in the time between ARP_TIMEOUT and
 *  (ARP_TIMEOUT+ARP_CHECK_INTERVAL).
 */
// 隔ARP_CHECK_INTERVAL秒查一次arp_table数组链表中的数据,看是否有过期的数据
#define ARP_CHECK_INTERVAL  (60 * HZ)

enum proxy {
   PROXY_EXACT=0,
   PROXY_ANY,
   PROXY_NONE,
};

/* Forward declarations. */
static void arp_check_expire (unsigned long);  
static struct arp_table *arp_lookup(unsigned long paddr, enum proxy proxy);


static struct timer_list arp_timer =
    { NULL, NULL, ARP_CHECK_INTERVAL, 0L, &arp_check_expire };

/*
 * The default arp netmask is just 255.255.255.255 which means it's
 * a single machine entry. Only proxy entries can have other netmasks
 *
*/
// 默认掩码是全1
#define DEF_ARP_NETMASK (~0)


/*
 *  The size of the hash table. Must be a power of two.
 *  Maybe we should remove hashing in the future for arp and concentrate
 *  on Patrick Schaaf's Host-Cache-Lookup...
 */

//arp_table数组的大小,不包括代理的的
#define ARP_TABLE_SIZE  16

/* The ugly +1 here is to cater for proxy entries. They are put in their 
   own list for efficiency of lookup. If you don't want to find a proxy
   entry then don't look in the last entry, otherwise do 
*/
// 整个arp_table数组的大小,包括代理的
#define FULL_ARP_TABLE_SIZE (ARP_TABLE_SIZE+1)
// 初始化arp_table数组链表
struct arp_table *arp_tables[FULL_ARP_TABLE_SIZE] =
{
    NULL,
};


/*
 *  The last bits in the IP address are used for the cache lookup.
 *      A special entry is used for proxy arp entries
 */
// arp_table中的数组是通过hash的方式存储到相应的位置的,这里是hash算法的实现
#define HASH(paddr)         (htonl(paddr) & (ARP_TABLE_SIZE - 1))
// 代理的位置索引
#define PROXY_HASH ARP_TABLE_SIZE

/*
 *  Check if there are too old entries and remove them. If the ATF_PERM
 *  flag is set, they are always left in the arp cache (permanent entry).
 *  Note: Only fully resolved entries, which don't have any packets in
 *  the queue, can be deleted, since ARP_TIMEOUT is much greater than
 *  ARP_MAX_TRIES*ARP_RES_TIME.
 */
// 隔一段时间检查arp_table中的数组,看是否有需要删除的
static void arp_check_expire(unsigned long dummy)
{
    int i;
    // 当前时间
    unsigned long now = jiffies;
    unsigned long flags;
    save_flags(flags);
    cli();

    for (i = 0; i < FULL_ARP_TABLE_SIZE; i++){
        struct arp_table *entry;
        // 指向整个arp_table数组链表
        struct arp_table **pentry = &arp_tables[i];

        while ((entry = *pentry) != NULL)
        {   // 如果上一次使用的时间离现在超过了ARP_TIMEOUT的大小,并且该数组没有设置永久存储标记,则删除该数据
            if ((now - entry->last_used) > ARP_TIMEOUT && !(entry->flags & ATF_PERM))
            {
                *pentry = entry->next;  /* remove from list */
                // 清除定时器
                del_timer(&entry->timer);   /* Paranoia */
                // 释放该数据对应的结构体
                kfree_s(entry, sizeof(struct arp_table));
            }
            else
                pentry = &entry->next;  /* go to next entry */
        }
    }
    restore_flags(flags);

    /*
     *  Set the timer again.
     */
    // 删除旧的定时器,增加新的定时器(重置定时器),add_timer函数会自动加上当前时间jiffies,所以只需要设置时间间隔ARP_CHECK_INTERVAL就行
    del_timer(&arp_timer);
    arp_timer.expires = ARP_CHECK_INTERVAL;
    add_timer(&arp_timer);
}


/*
 *  Release all linked skb's and the memory for this entry.
 */
/* 
    释放某条arp缓存相关的内存,包括:
    1.挂在arp缓存结构体的sk_buff队列
    2.定时器
    3.arp缓存项对应的结构体
*/
static void arp_release_entry(struct arp_table *entry)
{
    struct sk_buff *skb;
    unsigned long flags;

    save_flags(flags);
    cli();
    // 释放该数据对应的sk_buff,也就是因为没有完成映射导致还没有发送出去的数据包
    /* Release the list of `skb' pointers. */
    while ((skb = skb_dequeue(&entry->skb)) != NULL)
    {
        skb_device_lock(skb);
        restore_flags(flags);
        dev_kfree_skb(skb, FREE_WRITE);
    }
    restore_flags(flags);
    // 释放定时器
    del_timer(&entry->timer);
    // 释放该数据本身的结构体
    kfree_s(entry, sizeof(struct arp_table));
    return;
}

/*
 *  Purge a device from the ARP queue
 */
// 硬件有问题时释放该硬件对应的arp数据
int arp_device_event(unsigned long event, void *ptr)
{
    struct device *dev=ptr;
    int i;
    unsigned long flags;

    if(event!=NETDEV_DOWN)
        return NOTIFY_DONE;
    /*
     *  This is a bit OTT - maybe we need some arp semaphores instead.
     */

    save_flags(flags);
    cli();
    // 遍历arp缓存数组链表
    for (i = 0; i < FULL_ARP_TABLE_SIZE; i++)
    {
        struct arp_table *entry;
        struct arp_table **pentry = &arp_tables[i];

        while ((entry = *pentry) != NULL)
        {   // 找到和该设备相关的arp缓存项
            if(entry->dev==dev)
            {
                *pentry = entry->next;  /* remove from list */
                del_timer(&entry->timer);   /* Paranoia */
                kfree_s(entry, sizeof(struct arp_table));
            }
            else
                pentry = &entry->next;  /* go to next entry */
        }
    }
    restore_flags(flags);
    return NOTIFY_DONE; 
}


/*
 *  Create and send an arp packet. If (dest_hw == NULL), we create a broadcast
 *  message.
 */
// 发送arp包
void arp_send(int type, int ptype, unsigned long dest_ip, struct device *dev, unsigned long src_ip, unsigned char *dest_hw, unsigned char *src_hw){
    struct sk_buff *skb;
    struct arphdr *arp;
    unsigned char *arp_ptr;

    /*
     *  No arp on this interface.
     */

    if(dev->flags&IFF_NOARP)
        return;

    /*
     *  Allocate a buffer
     */
    // 申请一个sk_buff和设置包的内容
    skb = alloc_skb(sizeof(struct arphdr)+ 2*(dev->addr_len+4)
                + dev->hard_header_len, GFP_ATOMIC);
    if (skb == NULL)
    {
        printk("ARP: no memory to send an arp packet\n");
        return;
    }
    // len为有效数据的长度,此时为arp头(sizeof(struct arphdr))+两个ip和硬件地址(2*(dev->addr_len+4))+mac头长度(dev->hard_header_len)
    skb->len = sizeof(struct arphdr) + dev->hard_header_len + 2*(dev->addr_len+4);
    skb->arp = 1;
    skb->dev = dev;
    skb->free = 1;

    /*
     *  Fill the device header for the ARP frame
     */
     // 设置mac头部
    dev->hard_header(skb->data,dev,ptype,dest_hw?dest_hw:dev->broadcast,src_hw?src_hw:NULL,skb->len,skb);

    /* Fill out the arp protocol part. */
    arp = (struct arphdr *) (skb->data + dev->hard_header_len);
    arp->ar_hrd = htons(dev->type);
    // 设置arp的上层协议
#ifdef CONFIG_AX25
    arp->ar_pro = (dev->type != ARPHRD_AX25)? htons(ETH_P_IP) : htons(AX25_P_IP);
#else
    arp->ar_pro = htons(ETH_P_IP);
#endif
    arp->ar_hln = dev->addr_len;
    arp->ar_pln = 4;
    arp->ar_op = htons(type);

    arp_ptr=(unsigned char *)(arp+1);

    memcpy(arp_ptr, src_hw, dev->addr_len);
    arp_ptr+=dev->addr_len;
    memcpy(arp_ptr, &src_ip,4);
    arp_ptr+=4;
    if (dest_hw != NULL)
        memcpy(arp_ptr, dest_hw, dev->addr_len);
    else
        memset(arp_ptr, 0, dev->addr_len);
    arp_ptr+=dev->addr_len;
    memcpy(arp_ptr, &dest_ip, 4);
    // 传给链路层进行处理
    dev_queue_xmit(skb, dev, 0);
}


/*
 *  This function is called, if an entry is not resolved in ARP_RES_TIME.
 *  Either resend a request, or give it up and free the entry.
 */
// arp解析请求超时重发
static void arp_expire_request (unsigned long arg)
{
    struct arp_table *entry = (struct arp_table *) arg;
    struct arp_table **pentry;
    unsigned long hash;
    unsigned long flags;

    save_flags(flags);
    cli();

    /*
     *  Since all timeouts are handled with interrupts enabled, there is a
     *  small chance, that this entry has just been resolved by an incoming
     *  packet. This is the only race condition, but it is handled...
     */
    // 说明该数据已经完成了映射
    if (entry->flags & ATF_COM)
    {
        restore_flags(flags);
        return;
    }
    // 如果重传次数还大于0,
    if (--entry->retries > 0)
    {
        unsigned long ip = entry->ip;
        struct device *dev = entry->dev;
        // 重试定时器,如果ARP_RES_TIME秒后还没有回复,可能需要继续发送
        /* Set new timer. */
        del_timer(&entry->timer);
        entry->timer.expires = ARP_RES_TIME;
        add_timer(&entry->timer);
        restore_flags(flags);
        // 发送arp包
        arp_send(ARPOP_REQUEST, ETH_P_ARP, ip, dev, dev->pa_addr, 
             NULL, dev->dev_addr);
        return;
    }

    /*
     *  Arp request timed out. Delete entry and all waiting packets.
     *  If we give each entry a pointer to itself, we don't have to
     *  loop through everything again. Maybe hash is good enough, but
     *  I will look at it later.
     */
    // 找到该数据对应的ip在arp_table里的位置
    hash = HASH(entry->ip);

    /* proxy entries shouldn't really time out so this is really
       only here for completeness
    */
    if (entry->flags & ATF_PUBL)
      pentry = &arp_tables[PROXY_HASH];
    else
      pentry = &arp_tables[hash];
    // 删除解析失败的arp数据
    while (*pentry != NULL)
    {
        if (*pentry == entry)
        {
            *pentry = entry->next;  /* delete from linked list */
            del_timer(&entry->timer);
            restore_flags(flags);
            arp_release_entry(entry);
            return;
        }
        pentry = &(*pentry)->next;
    }
    restore_flags(flags);
    printk("Possible ARP queue corruption.\n");
    /*
     *  We should never arrive here.
     */
}


/*
 *  This will try to retransmit everything on the queue.
 */
// 完成某条数据的arp解析后,需要处理该数据对应的sk_buff链表
static void arp_send_q(struct arp_table *entry, unsigned char *hw_dest)
{
    struct sk_buff *skb;

    unsigned long flags;

    /*
     *  Empty the entire queue, building its data up ready to send
     */
    // 该数据没有完成解析,则不能发送对应的数据包
    if(!(entry->flags&ATF_COM))
    {
        printk("arp_send_q: incomplete entry for %s\n",
                in_ntoa(entry->ip));
        return;
    }

    save_flags(flags);

    cli();
    // 处理该条数据对应的sk_buff链表
    while((skb = skb_dequeue(&entry->skb)) != NULL)
    {
        IS_SKB(skb);
        skb_device_lock(skb);
        restore_flags(flags);
        // 系统发送数据包时,如果没有找到需要的ip和马刺地址的映射,则不会创建mac头,这里需要加上
        if(!skb->dev->rebuild_header(skb->data,skb->dev,skb->raddr,skb))
        {   // 标记已经完成arp解析
            skb->arp  = 1;
            // 传给链路层处理
            if(skb->sk==NULL)
                dev_queue_xmit(skb, skb->dev, 0);
            else
                dev_queue_xmit(skb,skb->dev,skb->sk->priority);
        }
        else
        {
            /* This routine is only ever called when 'entry' is
               complete. Thus this can't fail. */
            printk("arp_send_q: The impossible occurred. Please notify Alan.\n");
            printk("arp_send_q: active entity %s\n",in_ntoa(entry->ip));
            printk("arp_send_q: failed to find %s\n",in_ntoa(skb->raddr));
        }
    }
    restore_flags(flags);
}


/*
 *  Delete an ARP mapping entry in the cache.
 */
// 删除某个ip对应的arp缓存
void arp_destroy(unsigned long ip_addr, int force)
{
    int checked_proxies = 0;
    struct arp_table *entry;
    struct arp_table **pentry;
    // 找到该ip对应的位置索引
    unsigned long hash = HASH(ip_addr);

ugly:
    cli();
    pentry = &arp_tables[hash];
    // 如果没找到,继续找arp代理的
    if (! *pentry) /* also check proxy entries */
      pentry = &arp_tables[PROXY_HASH];

    while ((entry = *pentry) != NULL)
    {   // 遍历链表,找到和该ip相等的数据
        if (entry->ip == ip_addr)
        {   // 判断是否是持久性的数据,如果是则根据force来判断是否强制删除
            if ((entry->flags & ATF_PERM) && !force)
                return;
            *pentry = entry->next;
            del_timer(&entry->timer);
            sti();
            arp_release_entry(entry);
            /* this would have to be cleaned up */
            goto ugly;
            /* perhaps like this ?
            cli();
            entry = *pentry;
            */
        }
        pentry = &entry->next;
        // 检查完一般的arp缓存后,还需要检查arp代理链表
        if (!checked_proxies && ! *pentry)
          { /* ugly. we have to make sure we check proxy
               entries as well */
            checked_proxies = 1;
            pentry = &arp_tables[PROXY_HASH];
          }
    }
    sti();
}


/*
 *  Receive an arp request by the device layer. Maybe I rewrite it, to
 *  use the incoming packet for the reply. The time for the current
 *  "overhead" isn't that high...
 */
// 处理从链路层上报的数据包
int arp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
{
/*
 *  We shouldn't use this type conversion. Check later.
 */

    struct arphdr *arp = (struct arphdr *)skb->h.raw;
    unsigned char *arp_ptr= (unsigned char *)(arp+1);
    struct arp_table *entry;
    struct arp_table *proxy_entry;
    int addr_hint,hlen,htype;
    unsigned long hash;
    unsigned char ha[MAX_ADDR_LEN]; /* So we can enable ints again. */
    long sip,tip;
    unsigned char *sha,*tha;

/*
 *  The hardware length of the packet should match the hardware length
 *  of the device.  Similarly, the hardware types should match.  The
 *  device should be ARP-able.  Also, if pln is not 4, then the lookup
 *  is not from an IP number.  We can't currently handle this, so toss
 *  it. 
 */  
    // 硬件地址长度和类型是否相等,协议长度是否等于4,即ip协议,目前只支持这种
    if (arp->ar_hln != dev->addr_len    || 
            dev->type != ntohs(arp->ar_hrd) || 
        dev->flags & IFF_NOARP          ||
        arp->ar_pln != 4)
    {
        kfree_skb(skb, FREE_READ);
        return 0;
    }

/*
 *  Another test.
 *  The logic here is that the protocol being looked up by arp should 
 *  match the protocol the device speaks.  If it doesn't, there is a
 *  problem, so toss the packet.
 */
    switch(dev->type)
    {
#ifdef CONFIG_AX25
        case ARPHRD_AX25:
            if(arp->ar_pro != htons(AX25_P_IP))
            {
                kfree_skb(skb, FREE_READ);
                return 0;
            }
            break;
#endif
        case ARPHRD_ETHER:
        case ARPHRD_ARCNET:
            if(arp->ar_pro != htons(ETH_P_IP))
            {
                kfree_skb(skb, FREE_READ);
                return 0;
            }
            break;

        default:
            printk("ARP: dev->type mangled!\n");
            kfree_skb(skb, FREE_READ);
            return 0;
    }

/*
 *  Extract fields
 */
    // 硬件长度和类型
    hlen  = dev->addr_len;
    htype = dev->type;
    // arp层数据
    // arp_ptr指向数据首地址,sha等于发送者的硬件地址
    sha=arp_ptr;
    // 移动硬件长度hlen个字节
    arp_ptr+=hlen;
    // sip等于发送者ip
    memcpy(&sip,arp_ptr,4);
    // 继续移动4个字节,即移动ip长度个字节
    arp_ptr+=4;
    // 接收者的硬件地址
    tha=arp_ptr;
    // 继续移动
    arp_ptr+=hlen;
    // 接收者ip
    memcpy(&tip,arp_ptr,4);

/* 
 *  Check for bad requests for 127.0.0.1.  If this is one such, delete it.
 */
    // 该包是自己发的
    if(tip == INADDR_LOOPBACK)
    {
        kfree_skb(skb, FREE_READ);
        return 0;
    }

/*
 *  Process entry.  The idea here is we want to send a reply if it is a
 *  request for us or if it is a request for someone else that we hold
 *  a proxy for.  We want to add an entry to our cache if it is a reply
 *  to us or if it is a request for our address.  
 *  (The assumption for this last is that if someone is requesting our 
 *  address, they are probably intending to talk to us, so it saves time 
 *  if we cache their address.  Their address is also probably not in 
 *  our cache, since ours is not in their cache.)
 * 
 *  Putting this another way, we only care about replies if they are to
 *  us, in which case we add them to the cache.  For requests, we care
 *  about those for us and those for our proxies.  We reply to both,
 *  and in the case of requests for us we add the requester to the arp 
 *  cache.
 */
    // 检测接收者ip类型
    addr_hint = ip_chk_addr(tip);
    // 如果该包是一个回复包
    if(arp->ar_op == htons(ARPOP_REPLY))
    {   // 但是接收地址不是本机地址,则丢弃
        if(addr_hint!=IS_MYADDR)
        {
/* 
 *  Replies to other machines get tossed. 
 */
            kfree_skb(skb, FREE_READ);
            return 0;
        }
/*
 *  Fall through to code below that adds sender to cache. 
 */
    }
    // 是一个请求包
    else
    { 
/* 
 *  It is now an arp request 
 */
/*
 * Only reply for the real device address or when it's in our proxy tables
 */     // 接收者地址不是本机,这时候需要判断本机代理的arp缓存中是否有接收者的数据
        if(tip!=dev->pa_addr)
        {
/*
 *  To get in here, it is a request for someone else.  We need to
 *  check if that someone else is one of our proxies.  If it isn't,
 *  we can toss it.
 */
            cli();
            for(proxy_entry=arp_tables[PROXY_HASH];
                proxy_entry;
                proxy_entry = proxy_entry->next)
            {
              /* we will respond to a proxy arp request
                 if the masked arp table ip matches the masked
                 tip. This allows a single proxy arp table
                 entry to be used on a gateway machine to handle
                 all requests for a whole network, rather than
                 having to use a huge number of proxy arp entries
                 and having to keep them uptodate.
                 */
              if (proxy_entry->dev != dev && proxy_entry->htype == htype &&
                  !((proxy_entry->ip^tip)&proxy_entry->mask))
                break;

            }
            // 找到了发送回复包
            if (proxy_entry)
            {
                memcpy(ha, proxy_entry->ha, hlen);
                sti();
                arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,ha);
                kfree_skb(skb, FREE_READ);
                return 0;
            }
            // 找不到则丢弃包
            else
            {
                sti();
                kfree_skb(skb, FREE_READ);
                return 0;
            }
        }
        // 接收者是本机的包,则发送回复包
        else
        {
/*
 *  To get here, it must be an arp request for us.  We need to reply.
 */
            arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr);
        }
    }


/*
 * Now all replies are handled.  Next, anything that falls through to here
 * needs to be added to the arp cache, or have its entry updated if it is 
 * there.
 */
    // 更新arp缓存的数据
    hash = HASH(sip);
    cli();
    for(entry=arp_tables[hash];entry;entry=entry->next)
        if(entry->ip==sip && entry->htype==htype)
            break;
    // 如果本来就有发送者ip的缓存项,则更新数据
    if(entry)
    {
/*
 *  Entry found; update it.
 */
        memcpy(entry->ha, sha, hlen);
        entry->hlen = hlen;
        entry->last_used = jiffies;
        // 判断该ip对应的arp项是否处于已经完成解析状态,不是的话,把他置为解析完成并且发送缓存在sk_buff中的包
        if (!(entry->flags & ATF_COM))
        {
/*
 *  This entry was incomplete.  Delete the retransmit timer
 *  and switch to complete status.
 */         // 删除定时器
            del_timer(&entry->timer);
            // 修改该数据的状态为解析完成
            entry->flags |= ATF_COM;
            sti();
/* 
 *  Send out waiting packets. We might have problems, if someone is 
 *  manually removing entries right now -- entry might become invalid 
 *  underneath us.
 */         // 发送滞留的包
            arp_send_q(entry, sha);
        }
        else
        {
            sti();
        }
    }
    // 没有找到发送者ip对应的数据,则插入一条新的arp缓存项
    else
    {
/*
 *  No entry found.  Need to add a new entry to the arp table.
 */
        entry = (struct arp_table *)kmalloc(sizeof(struct arp_table),GFP_ATOMIC);
        if(entry == NULL)
        {
            sti();
            printk("ARP: no memory for new arp entry\n");

            kfree_skb(skb, FREE_READ);
            return 0;
        }

        entry->mask = DEF_ARP_NETMASK;
        entry->ip = sip;
        entry->hlen = hlen;
        entry->htype = htype;
        entry->flags = ATF_COM;
        init_timer(&entry->timer);
        memcpy(entry->ha, sha, hlen);
        entry->last_used = jiffies;
        entry->dev = skb->dev;
        skb_queue_head_init(&entry->skb);
        // 头插法
        entry->next = arp_tables[hash];
        arp_tables[hash] = entry;
        sti();
    }

/*
 *  Replies have been sent, and entries have been added.  All done.
 */
    kfree_skb(skb, FREE_READ);
    return 0;
}


/*
 *  Find an arp mapping in the cache. If not found, post a request.
 */
// 在根据ip在arp缓存表里找相关的数据,找不到则发送arp请求去找
int arp_find(unsigned char *haddr, unsigned long paddr, struct device *dev,
       unsigned long saddr, struct sk_buff *skb)
{
    struct arp_table *entry;
    unsigned long hash;
#ifdef CONFIG_IP_MULTICAST
    unsigned long taddr;
#endif  

    switch (ip_chk_addr(paddr))
    {   // 如果找的是本机ip和mac地址的映射,则返回本机的硬件地址,并设置完成解析标记
        case IS_MYADDR:
            printk("ARP: arp called for own IP address\n");
            memcpy(haddr, dev->dev_addr, dev->addr_len);
            skb->arp = 1;
            return 0;
#ifdef CONFIG_IP_MULTICAST
        // 如果多播地址,则组装对应的mac地址
        case IS_MULTICAST:
            if(dev->type==ARPHRD_ETHER || dev->type==ARPHRD_IEEE802)
            {   
                // ip多播地址和mac多播地址的关系是ip多播地址的后23位直接映射到mac多播地址,而mac多播地址的前25位是固定的
                // 前面25位是固定的
                haddr[0]=0x01;
                haddr[1]=0x00;
                haddr[2]=0x5e;
                taddr=ntohl(paddr);
                // 取后8位进行赋值
                haddr[5]=taddr&0xff;
                // 剩下的值往右挪8位
                taddr=taddr>>8;
                // 再取8位进行赋值
                haddr[4]=taddr&0xff;
                // 剩下的值往右挪8位
                taddr=taddr>>8;
                // 取7位进行赋值即可
                haddr[3]=taddr&0x7f;
                return 0;
            }
        /*
         *  If a device does not support multicast broadcast the stuff (eg AX.25 for now)
         */
#endif
        // 广播地址,则取mac广播地址进行赋值
        case IS_BROADCAST:
            memcpy(haddr, dev->broadcast, dev->addr_len);
            skb->arp = 1;
            return 0;
    }

    hash = HASH(paddr);
    cli();

    /*
     *  Find an entry
     */
    // 通过ip找对应的arp缓存,并设置不从代理里找
    entry = arp_lookup(paddr, PROXY_NONE);
    // 找到
    if (entry != NULL)  /* It exists */
    {       // 缓存中有对应的数据但没有完成解析,先把数据包挂到该条数据的sk_buff队列中
            if (!(entry->flags & ATF_COM))
            {
            /*
             *  A request was already send, but no reply yet. Thus
             *  queue the packet with the previous attempt
             */

                if (skb != NULL)
                {
                    skb_queue_tail(&entry->skb, skb);
                    skb_device_unlock(skb);
                }
                sti();
                return 1;
            }
        // 缓存中有对应的数据并且已经解析完成
        /*
         *  Update the record
         */
        // 更新缓存中该条数据的信息
        entry->last_used = jiffies;
        // 把找到的arp缓存项的mac地址赋值给haddr
        memcpy(haddr, entry->ha, dev->addr_len);
        // 设置arp解析完成标记位
        if (skb)
            skb->arp = 1;
        sti();
        return 0;
    }

    /*
     *  Create a new unresolved entry.
     */
    // 没有找到该ip对应的数据,则新增一条数据
    entry = (struct arp_table *) kmalloc(sizeof(struct arp_table),
                    GFP_ATOMIC);
    if (entry != NULL)
    {   
        // 初始化新增数据的内容
        entry->mask = DEF_ARP_NETMASK;
        entry->ip = paddr;
        entry->hlen = dev->addr_len;
        entry->htype = dev->type;
        entry->flags = 0;
        memset(entry->ha, 0, dev->addr_len);
        entry->dev = dev;
        entry->last_used = jiffies;
        init_timer(&entry->timer);
        entry->timer.function = arp_expire_request;
        entry->timer.data = (unsigned long)entry;
        entry->timer.expires = ARP_RES_TIME;
        entry->next = arp_tables[hash];
        arp_tables[hash] = entry;
        add_timer(&entry->timer);
        entry->retries = ARP_MAX_TRIES;
        skb_queue_head_init(&entry->skb);
        if (skb != NULL)
        {
            skb_queue_tail(&entry->skb, skb);
            skb_device_unlock(skb);
        }
    }
    else
    {
        if (skb != NULL && skb->free)
            kfree_skb(skb, FREE_WRITE);
    }
    sti();

    /*
     *  If we didn't find an entry, we will try to send an ARP packet.
     */
    // 加完新增的数据后,发送arp进行ip和mac地址的解析
    arp_send(ARPOP_REQUEST, ETH_P_ARP, paddr, dev, saddr, NULL, 
         dev->dev_addr);

    return 1;
}


/*
 *  Write the contents of the ARP cache to a PROCfs file.
 */

#define HBUFFERLEN 30

int arp_get_info(char *buffer, char **start, off_t offset, int length)
{
    int len=0;
    off_t begin=0;
    off_t pos=0;
    int size;
    struct arp_table *entry;
    char hbuffer[HBUFFERLEN];
    int i,j,k;
    const char hexbuf[] =  "0123456789ABCDEF";

    size = sprintf(buffer,"IP address       HW type     Flags       HW address            Mask\n");

    pos+=size;
    len+=size;

    cli();
    for(i=0; i<FULL_ARP_TABLE_SIZE; i++)
    {
        for(entry=arp_tables[i]; entry!=NULL; entry=entry->next)
        {
/*
 *  Convert hardware address to XX:XX:XX:XX ... form.
 */
#ifdef CONFIG_AX25

            if(entry->htype==ARPHRD_AX25)
                 strcpy(hbuffer,ax2asc((ax25_address *)entry->ha));
            else {
#endif

            for(k=0,j=0;k<HBUFFERLEN-3 && j<entry->hlen;j++)
            {
                hbuffer[k++]=hexbuf[ (entry->ha[j]>>4)&15 ];
                hbuffer[k++]=hexbuf[  entry->ha[j]&15     ];
                hbuffer[k++]=':';
            }
            hbuffer[--k]=0;

#ifdef CONFIG_AX25
            }
#endif
            size = sprintf(buffer+len,
                "%-17s0x%-10x0x%-10x%s",
                in_ntoa(entry->ip),
                (unsigned int)entry->htype,
                entry->flags,
                hbuffer);
            size += sprintf(buffer+len+size,
                 "     %-17s\n",
                  entry->mask==DEF_ARP_NETMASK?
                   "*":in_ntoa(entry->mask));

            len+=size;
            pos=begin+len;

            if(pos<offset)
            {
                len=0;
                begin=pos;
            }
            if(pos>offset+length)
                break;
        }
    }
    sti();

    *start=buffer+(offset-begin);   /* Start of wanted data */
    len-=(offset-begin);        /* Start slop */
    if(len>length)
        len=length;             /* Ending slop */
    return len;
}


/*
 *  This will find an entry in the ARP table by looking at the IP address.
 *      If proxy is PROXY_EXACT then only exact IP matches will be allowed
 *      for proxy entries, otherwise the netmask will be used
 */
// 根据ip到arp缓存里找arp项,找不到就返回空
static struct arp_table *arp_lookup(unsigned long paddr, enum proxy proxy)
{
    struct arp_table *entry;
    unsigned long hash = HASH(paddr);

    for (entry = arp_tables[hash]; entry != NULL; entry = entry->next)
        if (entry->ip == paddr) break;

    /* it's possibly a proxy entry (with a netmask) */
    // 如果找不到并且没有设置不需要从代理里找,则到代理中找
    if (!entry && proxy != PROXY_NONE){
        for (entry=arp_tables[PROXY_HASH]; entry != NULL; entry = entry->next){
            /* 代理匹配两种方式,一种是精确匹配(PROXY_EXACT),一直是网络号匹配即可(PROXY_ANY)
                (entry->ip^paddr)&entry->mask)逻辑为,异或是不相等的话结果是1,否则为0,比如0^1等于1,
                所以entry->ip^paddr比较好,前n位如果相等则结果的前n位都是0,而entry->mask是前n位是网络号且全
                为1,后面为全0,如果(entry->ip^paddr)&entry->mask)结果为0,所说明entry->ip^paddr的结果中,前n位
                等于0的个数大于等于mask中前面的1,说明网络号是一样的。
            */
            if ((proxy==PROXY_EXACT) ? (entry->ip==paddr) : !((entry->ip^paddr)&entry->mask)) 
                break;
        }
    }


    return entry;
}


/*
 *  Set (create) an ARP cache entry.
 */
// 修改或新增arp缓存项
static int arp_req_set(struct arpreq *req)
{
    struct arpreq r;
    struct arp_table *entry;
    struct sockaddr_in *si;
    int htype, hlen;
    unsigned long ip;
    struct rtable *rt;

    memcpy_fromfs(&r, req, sizeof(r));

    /* We only understand about IP addresses... */
    if (r.arp_pa.sa_family != AF_INET)
        return -EPFNOSUPPORT;

    /*
     * Find out about the hardware type.
     * We have to be compatible with BSD UNIX, so we have to
     * assume that a "not set" value (i.e. 0) means Ethernet.
     */

    switch (r.arp_ha.sa_family) {
        case ARPHRD_ETHER:
            htype = ARPHRD_ETHER;
            hlen = ETH_ALEN;
            break;

        case ARPHRD_ARCNET:
            htype = ARPHRD_ARCNET;
            hlen = 1;   /* length of arcnet addresses */
            break;

#ifdef CONFIG_AX25
        case ARPHRD_AX25:
            htype = ARPHRD_AX25;
            hlen = 7;
            break;
#endif
        default:
            return -EPFNOSUPPORT;
    }

    si = (struct sockaddr_in *) &r.arp_pa;
    ip = si->sin_addr.s_addr;
    if (ip == 0)
    {
        printk("ARP: SETARP: requested PA is 0.0.0.0 !\n");
        return -EINVAL;
    }

    /*
     *  Is it reachable directly ?
     */
    // ip是否可达,不可达的ip不允许更新
    rt = ip_rt_route(ip, NULL, NULL);
    if (rt == NULL)
        return -ENETUNREACH;

    /*
     *  Is there an existing entry for this address?
     */

    cli();

    /*
     *  Find the entry
     */
    // 如果已经存在该ip对应的arp缓存项,使用精确匹配
    entry = arp_lookup(ip, PROXY_EXACT);
    // 新的缓存项和原来的标记位不一样,则先删除再新增
    if (entry && (entry->flags & ATF_PUBL) != (r.arp_flags & ATF_PUBL))
    {
        sti();
        arp_destroy(ip,1);
        cli();
        entry = NULL;
    }

    /*
     *  Do we need to create a new entry
     */
    // entry为NULL可能是找不到该ip对应的缓存项,或者找到了,但是标记位不一样,被删除了,这里需要新增
    if (entry == NULL)
    {   // 先预先得到一个位置索引
        unsigned long hash = HASH(ip);
        // 如果设置了ATF_PUBL标记位,说明该arp项应该挂到arp代理的链表中
        if (r.arp_flags & ATF_PUBL)
            hash = PROXY_HASH;

        entry = (struct arp_table *) kmalloc(sizeof(struct arp_table),
                    GFP_ATOMIC);
        if (entry == NULL)
        {
            sti();
            return -ENOMEM;
        }
        entry->ip = ip;
        entry->hlen = hlen;
        entry->htype = htype;
        init_timer(&entry->timer);
        // 头插法
        entry->next = arp_tables[hash];
        arp_tables[hash] = entry;
        skb_queue_head_init(&entry->skb);
    }
    /*
     *  We now have a pointer to an ARP entry.  Update it!
     */
    // 此处的entry代表的可能是一个新生成的arp项,也可能是arp缓存链表里原本就存在的项
    memcpy(&entry->ha, &r.arp_ha.sa_data, hlen);
    entry->last_used = jiffies;
    /*  
        在原有的标记位上,追加设置该arp缓存项已经解析完成标记位
        或者逻辑为:
            如果arp_flags是0,证明没有任何标记位,与运算后arp_flags等于ATF_COM的值
            如果arp_flags是等于ATF_COM的值,证明之前已经设置了该标记位,与运算后,值不变
            如果arp_flags为其他的标记位,则标记位累加。具体可见下面的标记位,他们都有自己的位,不会冲突。
            #define ATF_COM     0x02        
            #define ATF_PERM    0x04        
            #define ATF_PUBL    0x08        
            #define ATF_USETRAILERS 0x10    
            #define ATF_NETMASK     0x20        
    */
    entry->flags = r.arp_flags | ATF_COM;
    // 设置了这个两个位说明是一个代理项,则对掩码进行赋值,否则使用DEF_ARP_NETMASK作为掩码
    if ((entry->flags & ATF_PUBL) && (entry->flags & ATF_NETMASK))
      {
        si = (struct sockaddr_in *) &r.arp_netmask;
        entry->mask = si->sin_addr.s_addr;
      }
    else
      entry->mask = DEF_ARP_NETMASK;
    entry->dev = rt->rt_dev;
    sti();

    return 0;
}


/*
 *  Get an ARP cache entry.
 */
// 获取arp缓存项
static int arp_req_get(struct arpreq *req)
{
    struct arpreq r;
    struct arp_table *entry;
    struct sockaddr_in *si;

    /*
     *  We only understand about IP addresses...
     */

    memcpy_fromfs(&r, req, sizeof(r));
    // 只支持AF_INET协议簇
    if (r.arp_pa.sa_family != AF_INET)
        return -EPFNOSUPPORT;

    /*
     *  Is there an existing entry for this address?
     */

    si = (struct sockaddr_in *) &r.arp_pa;
    cli();
    // 通过ip找arp缓存项
    entry = arp_lookup(si->sin_addr.s_addr,PROXY_ANY);

    if (entry == NULL)
    {
        sti();
        return -ENXIO;
    }

    /*
     *  We found it; copy into structure.
     */
    // 赋值硬件信息
    memcpy(r.arp_ha.sa_data, &entry->ha, entry->hlen);
    r.arp_ha.sa_family = entry->htype;
    // arp缓存项标记位赋值
    r.arp_flags = entry->flags;
    sti();

    /*
     *  Copy the information back
     */

    memcpy_tofs(req, &r, sizeof(r));
    return 0;
}


/*
 *  Handle an ARP layer I/O control request.
 */
// 操作arp_table里的数据,从而管理arp缓存
int arp_ioctl(unsigned int cmd, void *arg)
{
    struct arpreq r;
    struct sockaddr_in *si;
    int err;

    switch(cmd)
    {
        case SIOCDARP:
            // 权限校验
            if (!suser())
                return -EPERM;
            err = verify_area(VERIFY_READ, arg, sizeof(struct arpreq));
            if(err)
                return err;
            memcpy_fromfs(&r, arg, sizeof(r));
            if (r.arp_pa.sa_family != AF_INET)
                return -EPFNOSUPPORT;
            si = (struct sockaddr_in *) &r.arp_pa;
            // 删除
            arp_destroy(si->sin_addr.s_addr, 1);
            return 0;
        case SIOCGARP:
            err = verify_area(VERIFY_WRITE, arg, sizeof(struct arpreq));
            if(err)
                return err;
            // 查询
            return arp_req_get((struct arpreq *)arg);
        case SIOCSARP:
            if (!suser())
                return -EPERM;
            err = verify_area(VERIFY_READ, arg, sizeof(struct arpreq));
            if(err)
                return err;
            // 修改、增加
            return arp_req_set((struct arpreq *)arg);
        default:
            return -EINVAL;
    }
    /*NOTREACHED*/
    return 0;
}


/*
 *  Called once on startup.
 */
// 挂到链路层packet_type链表的节点,链路层收到arp包会调用arp_rcv函数进行处理
static struct packet_type arp_packet_type =
{
    0,  /* Should be: __constant_htons(ETH_P_ARP) - but this _doesn't_ come out constant! */
    NULL,       /* All devices */
    arp_rcv,
    NULL,
    NULL
};
// 硬件设备状态变更会调用arp_device_event处理
static struct notifier_block arp_dev_notifier={
    arp_device_event,
    NULL,
    0
};
// 初始化arp协议
void arp_init (void)
{
    /* Register the packet type */
    // 注册arp协议到链路层,链路层收到包后会根据packet_type链表判断上层协议,然后上报数据包
    arp_packet_type.type=htons(ETH_P_ARP);
    dev_add_pack(&arp_packet_type);
    /* Start with the regular checks for expired arp entries. */
    // 系统启动时就开始隔段时间检查arp缓存的数据
    add_timer(&arp_timer);
    /* Register for device down reports */
    // 注册回调事件,arp的数据和具体硬件设备有关,所以设备状态发生变化时,需要通知arp协议进行处理
    register_netdevice_notifier(&arp_dev_notifier);
}
  • 2
    点赞
  • 13
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值