分析内核版本:Linux 2.6.36.2。 分析网卡:DM9000
一、网络设备驱动程序分析。
1、Linux 网络设备驱动程序 分层:
Linux 网络设备驱动程序从上到下可分为4层,依次为:网络协议接口层、网络设备接口层、提供实际功能的设备 驱动功能层、以及 网络设备与媒介层。这4层作用如下:
1) 网络协议接口层:网络协议接口层想网络协议提供统一的数据包收发接口,不论是上层ARP,还是IP,都通过dev_queue_xmit() 函数收发数据,并通过netif_rx 函数
接收数据。这一层的存在使得上层协议独立于具体的设备。
2)网络设备接口层:网络设备接口层向协议层接口层提供统一的用于描述具体网络设备属性 和 操作的结构体 net_device,该结构体是设备驱动功能层中各函数的容器,
实际上,网络设备接口层从宏观上规划了具体操作硬件的设备驱动功能层的结构。
3)设备功能层:该层的各个函数是网络设备接口层net_device 数据结构具体成员,是驱使网络设备硬件完成相应动作的程序,它通过har_start_xmit() 函数启动发送操作,
并通过网络设备上的中断触发接收操作。
4)网络设备 与 媒介层: 是完成数据包发送和接收的物理实体,包括网络适配器和具体的传输媒介,网络适配器被设备驱动功能层中的函数物理上驱动,对于Linux而言,
网络设备和媒介都是可以虚拟的。
在设计具体的网络设备驱动程序是,我们需要完成的主要工作是编写设备驱动功能层 的 相关函数以以填充net_device 数据结构的内容并将net_device 注册入内核。
2、网络协议接口层程序分析:
1) int netif_xmit (struct sk_buff *skb);// 发送一个sk_buff 数据包
int netif_rx (struct sk_buff *skb); // 接收一个数据包
以上函数定义在 net/core/dev.c 中
sk_buff 结构体定义在 include/linux/skbuff.h 文件, 它的含义为“套接字缓冲区”,用于在Linux 网络子系统中个层之间传递数据,是Linux 网络子系统数据传递的“中枢神经”
2) 套接字缓冲区 sk_buff 结构体
- struct sk_buff {
- /* These two members must be first. */
- struct sk_buff *next;
- struct sk_buff *prev;
- ktime_t tstamp;
- struct sock *sk;
- struct net_device *dev;
- /*
- * This is the control buffer. It is free to use for every
- * layer. Please put your private variables there. If you
- * want to keep them across layers you have to do a skb_clone()
- * first. This is owned by whoever has the skb queued ATM.
- */
- char cb[48] __aligned(8);
- unsigned long _skb_refdst;
- #ifdef CONFIG_XFRM
- struct sec_path *sp;
- #endif
- unsigned int len,
- data_len;
- __u16 mac_len,
- hdr_len;
- union {
- __wsum csum;
- struct {
- __u16 csum_start;
- __u16 csum_offset;
- };
- };
- __u32 priority;
- kmemcheck_bitfield_begin(flags1);
- __u8 local_df:1,
- cloned:1,
- ip_summed:2,
- nohdr:1,
- nfctinfo:3;
- __u8 pkt_type:3,
- fclone:2,
- ipvs_property:1,
- peeked:1,
- nf_trace:1;
- kmemcheck_bitfield_end(flags1);
- __be16 protocol;
- void (*destructor)(struct sk_buff *skb);
- #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
- struct nf_conntrack *nfct;
- struct sk_buff *nfct_reasm;
- #endif
- #ifdef CONFIG_BRIDGE_NETFILTER
- struct nf_bridge_info *nf_bridge;
- #endif
- int skb_iif;
- #ifdef CONFIG_NET_SCHED
- __u16 tc_index; /* traffic control index */
- #ifdef CONFIG_NET_CLS_ACT
- __u16 tc_verd; /* traffic control verdict */
- #endif
- #endif
- __u32 rxhash;
- kmemcheck_bitfield_begin(flags2);
- __u16 queue_mapping:16;
- #ifdef CONFIG_IPV6_NDISC_NODETYPE
- __u8 ndisc_nodetype:2,
- deliver_no_wcard:1;
- #else
- __u8 deliver_no_wcard:1;
- #endif
- kmemcheck_bitfield_end(flags2);
- /* 0/14 bit hole */
- #ifdef CONFIG_NET_DMA
- dma_cookie_t dma_cookie;
- #endif
- #ifdef CONFIG_NETWORK_SECMARK
- __u32 secmark;
- #endif
- union {
- __u32 mark;
- __u32 dropcount;
- };
- __u16 vlan_tci;
- sk_buff_data_t transport_header; // 传输层
- sk_buff_data_t network_header; // 网络层
- sk_buff_data_t mac_header; // MAC 层
- /* These elements must be at the end, see alloc_skb() for details. */
- sk_buff_data_t tail;
- sk_buff_data_t end;
- unsigned char *head, // 缓冲区头指针
- *data; // 有效数据头指针
- unsigned int truesize;
- atomic_t users;
- };
3.1) 分配
struct sk_buff *alloc_skb(unsigned int len, gfp_t priority);
struct sk_buff *dev_alloc_skb (unsigned int len);
3.2) 释放
- <span style="color: rgb(51, 51, 51); font-family: Arial, Helvetica, sans-serif; font-size: 16px; white-space: normal; background-color: rgb(255, 255, 255); ">void kfree_skb ( struct sk_buff *skb);</span>
- <span style="font-family: Arial, Helvetica, sans-serif; white-space: normal; background-color: rgb(255, 255, 255); ">void dev_kfree_skb(struct sk_buff *skb);</span>
void dev_kfree_skb_irq (struct sk_buff *skb);
void dev_kfree_skb_any (struct sk_buff *skb);
3.3) 变更
unsigned char *skb_put (struct sk_buff *skb, unsigned int len);
unsigned char *skb_push ( struct sk_buff *skb, unsigned int len);
static inline void skb_reserve (struct sk_buff *skb, int len);
4、 网络设备接口层
网络设备接口层的主要功能是为各种网络设备定义了统一、抽象的数据结构 net_device 结构体。定义在 include/linux/netdevice.h 中
4.1)以下仅仅对 net_device 结构体 的一些关键成员进行标注和说明:
- struct net_device {
- /*
- * This is the first field of the "visible" part of this structure
- * (i.e. as seen by users in the "Space.c" file). It is the name
- * of the interface.
- */
- char name[IFNAMSIZ]; // 全局信息 ,网络设备名称
- struct pm_qos_request_list pm_qos_req;
- /* device name hash chain */
- struct hlist_node name_hlist;
- /* snmp alias */
- char *ifalias;
- /*
- * I/O specific fields
- * FIXME: Merge these and struct ifmap into one
- */
- unsigned long mem_end; /* shared mem end */ // 硬件信息, 共享内存起始地址
- unsigned long mem_start; /* shared mem start */ // 共享内存结束地址
- unsigned long base_addr; /* device I/O address */ // 设备I/O 基地址
- unsigned int irq; /* device IRQ number */ // 设备中断号
- /*
- * Some hardware also needs these fields, but they are not
- * part of the usual set specified in Space.c.
- */
- unsigned char if_port; /* Selectable AUI, TP,..*/ // 指定设备使用的端口
- unsigned char dma; /* DMA channel */
- unsigned long state;
- struct list_head dev_list;
- struct list_head napi_list;
- struct list_head unreg_list;
- /* Net device features */
- unsigned long features;
- #define NETIF_F_SG 1 /* Scatter/gather IO. */
- #define NETIF_F_IP_CSUM 2 /* Can checksum TCP/UDP over IPv4. */
- #define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */
- #define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */
- #define NETIF_F_IPV6_CSUM 16 /* Can checksum TCP/UDP over IPV6 */
- #define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */
- #define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */
- #define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */
- #define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */
- #define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */
- #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */
- #define NETIF_F_GSO 2048 /* Enable software GSO. */
- #define NETIF_F_LLTX 4096 /* LockLess TX - deprecated. Please */
- /* do not use LLTX in new drivers */
- #define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */
- #define NETIF_F_GRO 16384 /* Generic receive offload */
- #define NETIF_F_LRO 32768 /* large receive offload */
- /* the GSO_MASK reserves bits 16 through 23 */
- #define NETIF_F_FCOE_CRC (1 << 24) /* FCoE CRC32 */
- #define NETIF_F_SCTP_CSUM (1 << 25) /* SCTP checksum offload */
- #define NETIF_F_FCOE_MTU (1 << 26) /* Supports max FCoE MTU, 2158 bytes*/
- #define NETIF_F_NTUPLE (1 << 27) /* N-tuple filters supported */
- #define NETIF_F_RXHASH (1 << 28) /* Receive hashing offload */
- /* Segmentation offload features */
- #define NETIF_F_GSO_SHIFT 16
- #define NETIF_F_GSO_MASK 0x00ff0000
- #define NETIF_F_TSO (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT)
- #define NETIF_F_UFO (SKB_GSO_UDP << NETIF_F_GSO_SHIFT)
- #define NETIF_F_GSO_ROBUST (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT)
- #define NETIF_F_TSO_ECN (SKB_GSO_TCP_ECN << NETIF_F_GSO_SHIFT)
- #define NETIF_F_TSO6 (SKB_GSO_TCPV6 << NETIF_F_GSO_SHIFT)
- #define NETIF_F_FSO (SKB_GSO_FCOE << NETIF_F_GSO_SHIFT)
- /* List of features with software fallbacks. */
- #define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \
- NETIF_F_TSO6 | NETIF_F_UFO)
- #define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
- #define NETIF_F_V4_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM)
- #define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM)
- #define NETIF_F_ALL_CSUM (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM)
- /*
- * If one device supports one of these features, then enable them
- * for all in netdev_increment_features.
- */
- #define NETIF_F_ONE_FOR_ALL (NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \
- NETIF_F_SG | NETIF_F_HIGHDMA | \
- NETIF_F_FRAGLIST)
- /* Interface index. Unique device identifier */
- int ifindex;
- int iflink;
- struct net_device_stats stats;
- #ifdef CONFIG_WIRELESS_EXT
- /* List of functions to handle Wireless Extensions (instead of ioctl).
- * See <net/iw_handler.h> for details. Jean II */
- const struct iw_handler_def * wireless_handlers;
- /* Instance data managed by the core of Wireless Extensions. */
- struct iw_public_data * wireless_data;
- #endif
- /* Management operations */
- const struct net_device_ops *netdev_ops;
- const struct ethtool_ops *ethtool_ops;
- /* Hardware header description */
- const struct header_ops *header_ops;
- unsigned int flags; /* interface flags (a la BSD) */
- unsigned short gflags;
- unsigned short priv_flags; /* Like 'flags' but invisible to userspace. */
- unsigned short padded; /* How much padding added by alloc_netdev() */
- unsigned char operstate; /* RFC2863 operstate */
- unsigned char link_mode; /* mapping policy to operstate */
- unsigned int mtu; /* interface MTU value */ // 最大传输单元
- unsigned short type; /* interface hardware type */ // 接口的硬件类型
- unsigned short hard_header_len; /* hardware hdr length */ // 硬件报文头长度
- /* extra head- and tailroom the hardware may need, but not in all cases
- * can this be guaranteed, especially tailroom. Some cases also use
- * LL_MAX_HEADER instead to allocate the skb.
- */
- unsigned short needed_headroom;
- unsigned short needed_tailroom;
- struct net_device *master; /* Pointer to master device of a group,
- * which this device is member of.
- */
- /* Interface address info. */
- unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */
- unsigned char addr_assign_type; /* hw address assignment type */
- unsigned char addr_len; /* hardware address length */
- unsigned short dev_id; /* for shared network cards */
- spinlock_t addr_list_lock;
- struct netdev_hw_addr_list uc; /* Unicast mac addresses */
- struct netdev_hw_addr_list mc; /* Multicast mac addresses */
- int uc_promisc;
- unsigned int promiscuity;
- unsigned int allmulti;
- /* Protocol specific pointers */
- #ifdef CONFIG_NET_DSA
- void *dsa_ptr; /* dsa specific data */
- #endif
- void *atalk_ptr; /* AppleTalk link */
- void *ip_ptr; /* IPv4 specific data */
- void *dn_ptr; /* DECnet specific data */
- void *ip6_ptr; /* IPv6 specific data */
- void *ec_ptr; /* Econet specific data */
- void *ax25_ptr; /* AX.25 specific data */
- struct wireless_dev *ieee80211_ptr; /* IEEE 802.11 specific data,
- assign before registering */
- /*
- * Cache line mostly used on receive path (including eth_type_trans())
- */
- unsigned long last_rx; /* Time of last Rx */
- /* Interface address info used in eth_type_trans() */
- unsigned char *dev_addr; /* hw address, (before bcast
- because most packets are
- unicast) */
- struct netdev_hw_addr_list dev_addrs; /* list of device
- hw addresses */
- unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ // 广播地址
- #ifdef CONFIG_RPS
- struct kset *queues_kset;
- struct netdev_rx_queue *_rx;
- /* Number of RX queues allocated at alloc_netdev_mq() time */
- unsigned int num_rx_queues;
- #endif
- struct netdev_queue rx_queue;
- rx_handler_func_t *rx_handler;
- void *rx_handler_data;
- struct netdev_queue *_tx ____cacheline_aligned_in_smp;
- /* Number of TX queues allocated at alloc_netdev_mq() time */
- unsigned int num_tx_queues;
- /* Number of TX queues currently active in device */
- unsigned int real_num_tx_queues;
- /* root qdisc from userspace point of view */
- struct Qdisc *qdisc;
- unsigned long tx_queue_len; /* Max frames per queue allowed */
- spinlock_t tx_global_lock;
- /*
- * One part is mostly used on xmit path (device)
- */
- /* These may be needed for future network-power-down code. */
- /*
- * trans_start here is expensive for high speed devices on SMP,
- * please use netdev_queue->trans_start instead.
- */
- unsigned long trans_start; /* Time (in jiffies) of last Tx */
- int watchdog_timeo; /* used by dev_watchdog() */
- struct timer_list watchdog_timer;
- /* Number of references to this device */
- atomic_t refcnt ____cacheline_aligned_in_smp;
- /* delayed register/unregister */
- struct list_head todo_list;
- /* device index hash chain */
- struct hlist_node index_hlist;
- struct list_head link_watch_list;
- /* register/unregister state machine */
- enum { NETREG_UNINITIALIZED=0,
- NETREG_REGISTERED, /* completed register_netdevice */
- NETREG_UNREGISTERING, /* called unregister_netdevice */
- NETREG_UNREGISTERED, /* completed unregister todo */
- NETREG_RELEASED, /* called free_netdev */
- NETREG_DUMMY, /* dummy device for NAPI poll */
- } reg_state:16;
- enum {
- RTNL_LINK_INITIALIZED,
- RTNL_LINK_INITIALIZING,
- } rtnl_link_state:16;
- /* Called from unregister, can be used to call free_netdev */
- void (*destructor)(struct net_device *dev);
- #ifdef CONFIG_NETPOLL
- struct netpoll_info *npinfo;
- #endif
- #ifdef CONFIG_NET_NS
- /* Network namespace this network device is inside */
- struct net *nd_net;
- #endif
- /* mid-layer private */
- void *ml_priv;
- /* GARP */
- struct garp_port *garp_port;
- /* class/net/name entry */
- struct device dev;
- /* space for optional device, statistics, and wireless sysfs groups */
- const struct attribute_group *sysfs_groups[4];
- /* rtnetlink link ops */
- const struct rtnl_link_ops *rtnl_link_ops;
- /* VLAN feature mask */
- unsigned long vlan_features;
- /* for setting kernel sock attribute on TCP connection setup */
- #define GSO_MAX_SIZE 65536
- unsigned int gso_max_size;
- #ifdef CONFIG_DCB
- /* Data Center Bridging netlink ops */
- const struct dcbnl_rtnl_ops *dcbnl_ops;
- #endif
- #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
- /* max exchange id for FCoE LRO by ddp */
- unsigned int fcoe_ddp_xid;
- #endif
- /* n-tuple filter list attached to this device */
- struct ethtool_rx_ntuple_list ethtool_ntuple_list;
- /* phy device may attach itself for hardware timestamping */
- struct phy_device *phydev;
- };
4.2) 设备操作函数:部分定义在 include/linux/hdlc.h 中
- int (*open) (struct net_device *dev);
- int (*stop) (struct net_device *dev);
- int (*hard_start_xmit) (struct sk_buff *skb, struct net_device *dev);
- // hard_start_xmit() 函数会启动数据包的发送。当系统调用驱动程序的 hard_start_xmit() 函数时,需要向其传入一个sk_buff 结构体指针,
- // 致使驱动程序能获得从上层传下来的数据包。
- void (*tx_timeout) (struct net_device *dev); // 时间超时函数
- int (*hard_header) (struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len );
- // hard_header() 函数完成硬件帧头填充,参数为sk_buff 指针,设备指针,协议类型,目的地址,源地址 和 数据长度。
- struct net_device_stats* (*get_stats) (struct net_device *dev); // 获取网络设备状态信息
- int (*do_ioctl) (struct ent_device *dev, struct ifreq *ifr, int cmd); // do_ioctl() 函数用于进行设备特定的I/O 控制
- int (*set_config) (struct net_device *dev, struct ifmap *map); // 进行配置接口, 可用于改变设备的I/O 地址和中断号
- int (*set_mac_address) (struct net_device *dev, void *addr); // 用于设置设备 MAC 地址
5、设备驱动功能层
net_device 结构的成员,(属性 和 函数指针)需要被设备驱动功能层的具体数值 和 函数赋予。 对具体的设备 xxx ,工程师应该编写设备功能层的函数,
这些函数如: xxx_open(). xxx_stop(), xxx_tx(), xxx_hard_header(), xxx_get_stats(), xxxx_tx_timeout() 等。
6、 网络设备 与 媒介层
网络设备与媒介层 直接对应于实际的硬件设备,设计范例:
- /* 寄存器定义 */
- #define DATA_REG 0x0004
- #define CMD_REG 0x0008
- /* 寄存器读写函数 */
- static u16 xxx_readword( u32 base_addr, int portno)
- { ... . . . }
- static void xxx_writeword(u32 base_addr, int portno , u16 value)
- { .. . . . . }
二、 DM9000 网卡驱动程序分析:
DM9000 驱动需要完成的工作 和 其对应的函数有:
1、 对网络设备驱动进行 注册 和 注销:
int register_netdevi( struct net_device *dev);
void unregister_netdev ( struct net_device *dev);
2、 对网络设备进行初始化,初始化主要完成的工作为:
2.1) 进行硬件上的准备工作,检查网咯设备是否存在,如果存在,则检测设备所使用的硬件资源。
2.2) 进行软件接口上的准备工作,分配 net_device 结构体并对其数据 和 函数指针成员赋值。
2.3) 获得设备的私有信息指针 并初始化其个成员的值。
3、对对应的网络设备大打开 和释放
void netif_start_queue (struct net_device *dev);
void netif_stop_queue (struct net_device *dev);
4、进行数据发送, 发送数据流程如下:
(1) 网络设备驱动程序从上层协议传递过来的 sk_buff 参数获得数据包的有效数据 和 长度, 将有效数据放入临时缓冲区。
(2) 对于以太网,如果有效数据的长度小于以太网冲突检测所要求数据帧的最小长度,ETH_ZLEN,则给临时缓冲区末尾填充0.
(3) 设置硬件的寄存器, 驱使网络设备进行发送操作。
5、进行数据接收
网络设备接收数据的主要方法是由中断引发中断处理函数,中断处理函数判断中断类型,如果为接收中断,则读取接收到
的数据, 分配 sk_buff 数据结构 和 数据缓冲区,将接收到的数据复制到数据缓冲区, 并调用netif_rx() 函数将 sk_buff 传递给
上层协议。
6、检查网络状态:
网络适配器硬件电路可以检测出链路上是否有载波,,载波反映了网络的连接是否正常,网络设备驱动可以通过 netif_carrier_on()
和 netif_carrier_off() 函数改变设备的连接状态, 如果驱动检测到连接状态发生了变化,也应该以 netif_carrier_on() 和 netif_carrier_off()
函数的形式来通知内核。也可以以 netif_carrier_of() 来检测载波信号是否存在。
7、对参数进行设置,和统计数据
在网络设备的驱动程序中还提供一些方法供系统对设备的参数进行设置 或 读取设备相关的信息。
8、 DM9000 驱动源码主要函数分析:
- typedef struct board_info { /./ 板载信息
- void __iomem *io_addr; /* Register I/O base address */
- void __iomem *io_data; /* Data I/O address */
- u16 irq; /* IRQ */
- }
- static int dm9000_ioctl(struct net_device *dev, struct ifreq *req, int cmd) // ioctl 函数
- {
- board_info_t *dm = to_dm9000_board(dev);
- if (!netif_running(dev))
- return -EINVAL;
- return generic_mii_ioctl(&dm->mii, if_mii(req), cmd, NULL);
- }
- dm9000_hash_table_unlocked(struct net_device *dev) // 设置 DM9000 广播地址
- static void dm9000_timeout(struct net_device *dev) // 看门狗超时,网络层将调用该函数
- {
- board_info_t *db = netdev_priv(dev);
- u8 reg_save;
- unsigned long flags;
- /* Save previous register address */
- reg_save = readb(db->io_addr);
- spin_lock_irqsave(&db->lock, flags);
- netif_stop_queue(dev);
- dm9000_reset(db);
- dm9000_init_dm9000(dev);
- /* We can accept TX packets again */
- dev->trans_start = jiffies; /* prevent tx timeout */
- netif_wake_queue(dev);
- /* Restore previous register address */
- writeb(reg_save, db->io_addr);
- spin_unlock_irqrestore(&db->lock, flags);
- }
- dm9000_start_xmit(struct sk_buff *skb, struct net_device *dev)
- {
- /* Move data to DM9000 TX RAM */
- writeb(DM9000_MWCMD, db->io_addr); // 将发送数据移至 DM9000 的 TX RAM
- }
- static void dm9000_tx_done () // 数据包发送完成
- static void dm9000_rx(struct net_device *dev) // 接收数据并传递给上层
- static irqreturn _t dm9000_interrup()
- { }
- static int dm9000_open (struct net_device *dev) // 打开网卡端口
- {
- netif_static _queue (dev);
- }