linux协议栈层次---
osi模型(open systems interconnection)但是linux中网络栈的介绍一般分为四层的internet模型。
要从协议层向设备发送数据,需要使用dev_queue_xmit函数,这个函数对数据进行排队,并交由底层设备驱动程序进行最终传输,报文的接收通常是使用netif_re执行的。当底层设备驱动程序接收到一个报文(包含在所分配的sk_buff中)时,就会通过调用netif_rx_chedule将sk_buff在上层协议队列中进行排队,供以后进行处理。
驱动程序---
网络体系结构的最底部是负责管理物理网络设备的设备驱动程序层。
linux网卡设备描述----
每个网络接口都由一个net_device结构来描述,该结构可使用如下内核函数动态分配:
1struct net_device *alloc_netdev
(
int sizeof_piv,私有数据区大小
const char *mask,设备名
void (*setup)(struct net_device *)初始化函数
)
2--struct net_device *alloc_echerdev(int sizeof_priv)
两个函数的关系???
struct net_device
{
/*
* This is the first field of the "visible" part of this structure
* (i.e. as seen by users in the "Space.c" file). It is the name
* the interface.
*/
char name[IFNAMSIZ];设备名如eth%d
/* device name hash chain */
struct hlist_node name_hlist;
/* snmp alias */
char *ifalias;
/*
* I/O specific fields
* FIXME: Merge these and struct ifmap into one
*/
unsigned long mem_end; /* shared mem end */
unsigned long mem_start; /* shared mem start */
unsigned long base_addr; i/o基地址/* device I/O address */
unsigned int irq; 中断号/* device IRQ number */
/*
* Some hardware also needs these fields, but they are not
* part of the usual set specified in Space.c.
*/
unsigned char if_port; /* Selectable AUI, TP,..*/
unsigned char dma; /* DMA channel */
unsigned long state;设备状态
struct list_head dev_list;
struct list_head napi_list;
/* Net device features */
unsigned long features;
#define NETIF_F_SG 1 /* Scatter/gather IO. */
#define NETIF_F_IP_CSUM 2 /* Can checksum TCP/UDP over IPv4. */
#define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */
#define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */
#define NETIF_F_IPV6_CSUM 16 /* Can checksum TCP/UDP over IPV6 */
#define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */
#define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */
#define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */
#define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */
#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */
#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */
#define NETIF_F_GSO 2048 /* Enable software GSO. */
#define NETIF_F_LLTX 4096 /* LockLess TX - deprecated. Please */
/* do not use LLTX in new drivers */
#define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */
#define NETIF_F_GRO 16384 /* Generic receive offload */
#define NETIF_F_LRO 32768 /* large receive offload */
/* Segmentation offload features */
#define NETIF_F_GSO_SHIFT 16
#define NETIF_F_GSO_MASK 0xffff0000
#define NETIF_F_TSO (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT)
#define NETIF_F_UFO (SKB_GSO_UDP << NETIF_F_GSO_SHIFT)
#define NETIF_F_GSO_ROBUST (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT)
#define NETIF_F_TSO_ECN (SKB_GSO_TCP_ECN << NETIF_F_GSO_SHIFT)
#define NETIF_F_TSO6 (SKB_GSO_TCPV6 << NETIF_F_GSO_SHIFT)
/* List of features with software fallbacks. */
#define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6)
#define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
#define NETIF_F_V4_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM)
#define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM)
#define NETIF_F_ALL_CSUM (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM)
/*
* If one device supports one of these features, then enable them
* for all in netdev_increment_features.
*/
#define NETIF_F_ONE_FOR_ALL (NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \
NETIF_F_SG | NETIF_F_HIGHDMA | \
NETIF_F_FRAGLIST)
/* Interface index. Unique device identifier */
int ifindex;
int iflink;
struct net_device_stats stats;
#ifdef CONFIG_WIRELESS_EXT
/* List of functions to handle Wireless Extensions (instead of ioctl).
* See <net/iw_handler.h> for details. Jean II */
const struct iw_handler_def * wireless_handlers;
/* Instance data managed by the core of Wireless Extensions. */
struct iw_public_data * wireless_data;
#endif
/* Management operations */
const struct net_device_ops *netdev_ops;
const struct ethtool_ops *ethtool_ops;
/* Hardware header description */
const struct header_ops *header_ops;
unsigned int flags; /* interface flags (a la BSD) */
unsigned short gflags;
unsigned short priv_flags; /* Like 'flags' but invisible to userspace. */
unsigned short padded; /* How much padding added by alloc_netdev() */
unsigned char operstate; /* RFC2863 operstate */
unsigned char link_mode; /* mapping policy to operstate */
unsigned mtu; /* interface MTU value */
unsigned short type; /* interface hardware type */
unsigned short hard_header_len; /* hardware hdr length */
/* extra head- and tailroom the hardware may need, but not in all cases
* can this be guaranteed, especially tailroom. Some cases also use
* LL_MAX_HEADER instead to allocate the skb.
*/
unsigned short needed_headroom;
unsigned short needed_tailroom;
struct net_device *master; /* Pointer to master device of a group,
* which this device is member of.
*/
/* Interface address info. */
unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */
unsigned char addr_len; /* hardware address length */
unsigned short dev_id; /* for shared network cards */
spinlock_t addr_list_lock;
struct dev_addr_list *uc_list; /* Secondary unicast mac addresses */
int uc_count; /* Number of installed ucasts */
int uc_promisc;
struct dev_addr_list *mc_list; /* Multicast mac addresses */
int mc_count; /* Number of installed mcasts */
unsigned int promiscuity;
unsigned int allmulti;
/* Protocol specific pointers */
#ifdef CONFIG_NET_DSA
void *dsa_ptr; /* dsa specific data */
#endif
void *atalk_ptr; /* AppleTalk link */
void *ip_ptr; /* IPv4 specific data */
void *dn_ptr; /* DECnet specific data */
void *ip6_ptr; /* IPv6 specific data */
void *ec_ptr; /* Econet specific data */
void *ax25_ptr; /* AX.25 specific data */
struct wireless_dev *ieee80211_ptr; /* IEEE 802.11 specific data,
assign before registering */
/*
* Cache line mostly used on receive path (including eth_type_trans())
*/
unsigned long last_rx; /* Time of last Rx */
/* Interface address info used in eth_type_trans() */
unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address, (before bcast
because most packets are unicast) */
unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */
struct netdev_queue rx_queue;
struct netdev_queue *_tx ____cacheline_aligned_in_smp;
/* Number of TX queues allocated at alloc_netdev_mq() time */
unsigned int num_tx_queues;
/* Number of TX queues currently active in device */
unsigned int real_num_tx_queues;
unsigned long tx_queue_len; /* Max frames per queue allowed */
spinlock_t tx_global_lock;
/*
* One part is mostly used on xmit path (device)
*/
/* These may be needed for future network-power-down code. */
unsigned long trans_start; /* Time (in jiffies) of last Tx */
int watchdog_timeo; /* used by dev_watchdog() */
struct timer_list watchdog_timer;
/* Number of references to this device */
atomic_t refcnt ____cacheline_aligned_in_smp;
/* delayed register/unregister */
struct list_head todo_list;
/* device index hash chain */
struct hlist_node index_hlist;
struct net_device *link_watch_next;
/* register/unregister state machine */
enum { NETREG_UNINITIALIZED=0,
NETREG_REGISTERED, /* completed register_netdevice */
NETREG_UNREGISTERING, /* called unregister_netdevice */
NETREG_UNREGISTERED, /* completed unregister todo */
NETREG_RELEASED, /* called free_netdev */
NETREG_DUMMY, /* dummy device for NAPI poll */
} reg_state;
/* Called from unregister, can be used to call free_netdev */
void (*destructor)(struct net_device *dev);
#ifdef CONFIG_NETPOLL
struct netpoll_info *npinfo;
#endif
#ifdef CONFIG_NET_NS
/* Network namespace this network device is inside */
struct net *nd_net;
#endif
/* mid-layer private */
void *ml_priv;
/* bridge stuff */
struct net_bridge_port *br_port;
/* macvlan */
struct macvlan_port *macvlan_port;
/* GARP */
struct garp_port *garp_port;
/* class/net/name entry */
struct device dev;
/* space for optional statistics and wireless sysfs groups */
struct attribute_group *sysfs_groups[3];
/* rtnetlink link ops */
const struct rtnl_link_ops *rtnl_link_ops;
/* VLAN feature mask */
unsigned long vlan_features;
/* for setting kernel sock attribute on TCP connection setup */
#define GSO_MAX_SIZE 65536
unsigned int gso_max_size;
#ifdef CONFIG_DCB
/* Data Center Bridging netlink ops */
struct dcbnl_rtnl_ops *dcbnl_ops;
#endif
#ifdef CONFIG_COMPAT_NET_DEV_OPS
struct {
int (*init)(struct net_device *dev);
void (*uninit)(struct net_device *dev);
int (*open)(struct net_device *dev);
int (*stop)(struct net_device *dev);
int (*hard_start_xmit) (struct sk_buff *skb,
struct net_device *dev);
u16 (*select_queue)(struct net_device *dev,
struct sk_buff *skb);
void (*change_rx_flags)(struct net_device *dev,
int flags);
void (*set_rx_mode)(struct net_device *dev);
void (*set_multicast_list)(struct net_device *dev);
int (*set_mac_address)(struct net_device *dev,
void *addr);
int (*validate_addr)(struct net_device *dev);
int (*do_ioctl)(struct net_device *dev,
struct ifreq *ifr, int cmd);
int (*set_config)(struct net_device *dev,
struct ifmap *map);
int (*change_mtu)(struct net_device *dev, int new_mtu);
int (*neigh_setup)(struct net_device *dev,
struct neigh_parms *);
void (*tx_timeout) (struct net_device *dev);
struct net_device_stats* (*get_stats)(struct net_device *dev);
void (*vlan_rx_register)(struct net_device *dev,
struct vlan_group *grp);
void (*vlan_rx_add_vid)(struct net_device *dev,
unsigned short vid);
void (*vlan_rx_kill_vid)(struct net_device *dev,
unsigned short vid);
#ifdef CONFIG_NET_POLL_CONTROLLER
void (*poll_controller)(struct net_device *dev);
#endif
};
#endif
}
初始化函数---
int (*init)(struct net_device *dev)
该函数在register_netdev时被调用,完成对net_device结构初始化。
基本方法-----
int (*open) (struct net_device *dev)
打开接口,ficonfig激活时,接口将被打开。
int (*stop) (struct net_device *dev)
停止接口
int (*hard_start_xmit) (struct sk_buff *skb,struct net_device *dev)
数据发送函数
可选操作-----
int (*do_ioctl) (struct net_device *dev,struct ifreq *ifr,int cmd)
处理特定接口的ioctl命令
int (*set_mac_address) (struct net_device *dev, void *addr)
改变mac地址的函数,需要硬件支持该功能。
设备注册----
网络接口驱动的注册方式与字符驱动不同之处在于它没有主次设备号,并使用如下函数注册。
int register_netdev(struct net_device *dev)
sk_buff
linux内核中的每个网络数据包都由一个套接字缓冲区结构struct sk_buff描述,即一个sk_buff结构就是一个包,指向sk_buff的指针通常被称作skb。
struct sk_buff {
/* These two members must be first. */
struct sk_buff *next;
struct sk_buff *prev;
struct sock *sk;
ktime_t tstamp;
struct net_device *dev;处理该包的设备
union {
struct dst_entry *dst;
struct rtable *rtable;
};
#ifdef CONFIG_XFRM
struct sec_path *sp;
#endif
/*
* This is the control buffer. It is free to use for every
* layer. Please put your private variables there. If you
* want to keep them across layers you have to do a skb_clone()
* first. This is owned by whoever has the skb queued ATM.
*/
char cb[48];
unsigned int len,
data_len;
__u16 mac_len,
hdr_len;
union {
__wsum csum;
struct {
__u16 csum_start;
__u16 csum_offset;
};
};
__u32 priority;
__u8 local_df:1,
cloned:1,
ip_summed:2,
nohdr:1,
nfctinfo:3;
__u8 pkt_type:3,
fclone:2,
ipvs_property:1,
peeked:1,
nf_trace:1;
__be16 protocol;
void (*destructor)(struct sk_buff *skb);
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct nf_conntrack *nfct;
struct sk_buff *nfct_reasm;
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
struct nf_bridge_info *nf_bridge;
#endif
int iif;
__u16 queue_mapping;
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
#ifdef CONFIG_NET_CLS_ACT
__u16 tc_verd; /* traffic control verdict */
#endif
#endif
#ifdef CONFIG_IPV6_NDISC_NODETYPE
__u8 ndisc_nodetype:2;
#endif
#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
__u8 do_not_encrypt:1;
__u8 requeue:1;
#endif
/* 0/13/14 bit hole */
#ifdef CONFIG_NET_DMA
dma_cookie_t dma_cookie;
#endif
#ifdef CONFIG_NETWORK_SECMARK
__u32 secmark;
#endif
__u32 mark;
__u16 vlan_tci;
sk_buff_data_t transport_header;
sk_buff_data_t network_header;
sk_buff_data_t mac_header;
/* These elements must be at the end, see alloc_skb() for details. */
sk_buff_data_t tail;有效数据的结束
sk_buff_data_t end;分配空间的结束
unsigned char *head,分配空间的开始
*data;有效数据的开始
unsigned int truesize;
atomic_t users;
};
skb操作函数-----
struct sk_buff *alloc_skb(unsigned int len, int priority)
分配一个sk_buff结构,供协议栈代码使用
struct sk_buff *dev_alloc_skb(unsigned int len)
分配一个sk_buff结构,供驱动代码使用
unsigned char *skb_push(struct sk_buff *skb,int len)
向后移动skb的tail指针,并返回tail移动之前的值。
unsigned char *skb_put(struct sk_buf *skb,int len)
向前移动skb的head指针,并返回head移动之后的值。
kfree_skb(struct sk_buff *skb)
释放一个sk_buff结构,供协议栈代码使用
dev_kfree_skb(struct sk_buff *skb)
释放一个sk_buff结构,供驱动代码使用。
设备打开
open请求任何它需要的系统资源并且启动接口:
注册中断,dma等。
设置寄存器,启动设备。
启动发送队列。
设备打开一个列子
int net_open(struct net _device *dev)
{
申请中断}
request_irq(dev->irq,&net_interrupt,SA_SHIRQ,"dm9000",dev);
设置寄存器,启动设备
。。。。。。。。。。。
启动发送队列
netif_start_queue(dev);
数据发送---
当核心需要发送一个数据包时,它调用hard_start_transmit函数,该函数将最终调用到net_device结构中的hard_start_xmit函数指针。
数据接收---
网络接口驱动可以实现两种方式的报文接收
中断和查询,linux中驱动多采用中断方式。
数据接收流程
1分配skb
skb=dev_alloc_skb(pkt->datalen+2)
2从硬件中读取数据到skb
3调用netif_rx将数据交给协议栈
netif_rx(skb)
中断处理
网络接口通常支持3种类型的中断:
新报文到达中断,报文发送完成中断,和出错中断。中断处理程序可通过查看网卡中的中断寄存器,来分辨出中断的类型
总结------
两个重要的结构
sk_buff
net_device