1、net_device结构体
net_device (include/linux/netdevice.h )
/**
* struct net_device - The DEVICE structure.
* Actually, this whole structure is a big mistake. It mixes I/O
* data with strictly "high-level" data, and it has to know about
* almost every data structure used in the INET module.
*
* @name: This is the first field of the "visible" part of this structure
* (i.e. as seen by users in the "Space.c" file). It is the name
* of the interface.
*
* @name_hlist: Device name hash chain, please keep it close to name[]
* @ifalias: SNMP alias
* @mem_end: Shared memory end
* @mem_start: Shared memory start
* @base_addr: Device I/O address
* @irq: Device IRQ number
*
* @carrier_changes: Stats to monitor carrier on<->off transitions
*
* @state: Generic network queuing layer state, see netdev_state_t
* @dev_list: The global list of network devices
* @napi_list: List entry, that is used for polling napi devices
* @unreg_list: List entry, that is used, when we are unregistering the
* device, see the function unregister_netdev
* @close_list: List entry, that is used, when we are closing the device
*
* @adj_list: Directly linked devices, like slaves for bonding
* @all_adj_list: All linked devices, *including* neighbours
* @features: Currently active device features
* @hw_features: User-changeable features
*
* @wanted_features: User-requested features
* @vlan_features: Mask of features inheritable by VLAN devices
*
* @hw_enc_features: Mask of features inherited by encapsulating devices
* This field indicates what encapsulation
* offloads the hardware is capable of doing,
* and drivers will need to set them appropriately.
*
* @mpls_features: Mask of features inheritable by MPLS
*
* @ifindex: interface index
* @group: The group, that the device belongs to
*
* @stats: Statistics struct, which was left as a legacy, use
* rtnl_link_stats64 instead
*
* @rx_dropped: Dropped packets by core network,
* do not use this in drivers
* @tx_dropped: Dropped packets by core network,
* do not use this in drivers
*
* @wireless_handlers: List of functions to handle Wireless Extensions,
* instead of ioctl,
* see <net/iw_handler.h> for details.
* @wireless_data: Instance data managed by the core of wireless extensions
*
* @netdev_ops: Includes several pointers to callbacks,
* if one wants to override the ndo_*() functions
* @ethtool_ops: Management operations
* @header_ops: Includes callbacks for creating,parsing,caching,etc
* of Layer 2 headers.
*
* @flags: Interface flags (a la BSD)
* @priv_flags: Like 'flags' but invisible to userspace,
* see if.h for the definitions
* @gflags: Global flags ( kept as legacy )
* @padded: How much padding added by alloc_netdev()
* @operstate: RFC2863 operstate
* @link_mode: Mapping policy to operstate
* @if_port: Selectable AUI, TP, ...
* @dma: DMA channel
* @mtu: Interface MTU value
* @type: Interface hardware type
* @hard_header_len: Hardware header length
*
* @needed_headroom: Extra headroom the hardware may need, but not in all
* cases can this be guaranteed
* @needed_tailroom: Extra tailroom the hardware may need, but not in all
* cases can this be guaranteed. Some cases also use
* LL_MAX_HEADER instead to allocate the skb
*
* interface address info:
*
* @perm_addr: Permanent hw address
* @addr_assign_type: Hw address assignment type
* @addr_len: Hardware address length
* @neigh_priv_len; Used in neigh_alloc(),
* initialized only in atm/clip.c
* @dev_id: Used to differentiate devices that share
* the same link layer address
* @dev_port: Used to differentiate devices that share
* the same function
* @addr_list_lock: XXX: need comments on this one
* @uc_promisc: Counter, that indicates, that promiscuous mode
* has been enabled due to the need to listen to
* additional unicast addresses in a device that
* does not implement ndo_set_rx_mode()
* @uc: unicast mac addresses
* @mc: multicast mac addresses
* @dev_addrs: list of device hw addresses
* @queues_kset: Group of all Kobjects in the Tx and RX queues
* @promiscuity: Number of times, the NIC is told to work in
* Promiscuous mode, if it becomes 0 the NIC will
* exit from working in Promiscuous mode
* @allmulti: Counter, enables or disables allmulticast mode
*
* @vlan_info: VLAN info
* @dsa_ptr: dsa specific data
* @tipc_ptr: TIPC specific data
* @atalk_ptr: AppleTalk link
* @ip_ptr: IPv4 specific data
* @dn_ptr: DECnet specific data
* @ip6_ptr: IPv6 specific data
* @ax25_ptr: AX.25 specific data
* @ieee80211_ptr: IEEE 802.11 specific data, assign before registering
*
* @last_rx: Time of last Rx
* @dev_addr: Hw address (before bcast,
* because most packets are unicast)
*
* @_rx: Array of RX queues
* @num_rx_queues: Number of RX queues
* allocated at register_netdev() time
* @real_num_rx_queues: Number of RX queues currently active in device
*
* @rx_handler: handler for received packets
* @rx_handler_data: XXX: need comments on this one
* @ingress_queue: XXX: need comments on this one
* @broadcast: hw bcast address
*
* @rx_cpu_rmap: CPU reverse-mapping for RX completion interrupts,
* indexed by RX queue number. Assigned by driver.
* This must only be set if the ndo_rx_flow_steer
* operation is defined
* @index_hlist: Device index hash chain
*
* @_tx: Array of TX queues
* @num_tx_queues: Number of TX queues allocated at alloc_netdev_mq() time
* @real_num_tx_queues: Number of TX queues currently active in device
* @qdisc: Root qdisc from userspace point of view
* @tx_queue_len: Max frames per queue allowed
* @tx_global_lock: XXX: need comments on this one
*
* @xps_maps: XXX: need comments on this one
*
* @trans_start: Time (in jiffies) of last Tx
* @watchdog_timeo: Represents the timeout that is used by
* the watchdog ( see dev_watchdog() )
* @watchdog_timer: List of timers
*
* @pcpu_refcnt: Number of references to this device
* @todo_list: Delayed register/unregister
* @link_watch_list: XXX: need comments on this one
*
* @reg_state: Register/unregister state machine
* @dismantle: Device is going to be freed
* @rtnl_link_state: This enum represents the phases of creating
* a new link
*
* @destructor: Called from unregister,
* can be used to call free_netdev
* @npinfo: XXX: need comments on this one
* @nd_net: Network namespace this network device is inside
*
* @ml_priv: Mid-layer private
* @lstats: Loopback statistics
* @tstats: Tunnel statistics
* @dstats: Dummy statistics
* @vstats: Virtual ethernet statistics
*
* @garp_port: GARP
* @mrp_port: MRP
*
* @dev: Class/net/name entry
* @sysfs_groups: Space for optional device, statistics and wireless
* sysfs groups
*
* @sysfs_rx_queue_group: Space for optional per-rx queue attributes
* @rtnl_link_ops: Rtnl_link_ops
*
* @gso_max_size: Maximum size of generic segmentation offload
* @gso_max_segs: Maximum number of segments that can be passed to the
* NIC for GSO
* @gso_min_segs: Minimum number of segments that can be passed to the
* NIC for GSO
*
* @dcbnl_ops: Data Center Bridging netlink ops
* @num_tc: Number of traffic classes in the net device
* @tc_to_txq: XXX: need comments on this one
* @prio_tc_map XXX: need comments on this one
*
* @fcoe_ddp_xid: Max exchange id for FCoE LRO by ddp
*
* @priomap: XXX: need comments on this one
* @phydev: Physical device may attach itself
* for hardware timestamping
*
* @qdisc_tx_busylock: XXX: need comments on this one
*
* FIXME: cleanup struct net_device such that network protocol info
* moves out.
*/
struct net_device {
char name[IFNAMSIZ];
struct hlist_node name_hlist;
char *ifalias;
/*
* I/O specific fields
* FIXME: Merge these and struct ifmap into one
*/
unsigned long mem_end;
unsigned long mem_start;
unsigned long base_addr;
int irq;
atomic_t carrier_changes;
/*
* Some hardware also needs these fields (state,dev_list,
* napi_list,unreg_list,close_list) but they are not
* part of the usual set specified in Space.c.
*/
unsigned long state;
struct list_head dev_list;
struct list_head napi_list;
struct list_head unreg_list;
struct list_head close_list;
struct list_head ptype_all;
struct list_head ptype_specific;
struct {
struct list_head upper;
struct list_head lower;
} adj_list;
struct {
struct list_head upper;
struct list_head lower;
} all_adj_list;
netdev_features_t features;
netdev_features_t hw_features;
netdev_features_t wanted_features;
netdev_features_t vlan_features;
netdev_features_t hw_enc_features;
netdev_features_t mpls_features;
int ifindex;
int group;
struct net_device_stats stats;
atomic_long_t rx_dropped;
atomic_long_t tx_dropped;
#ifdef CONFIG_WIRELESS_EXT
const struct iw_handler_def * wireless_handlers;
struct iw_public_data * wireless_data;
#endif
const struct net_device_ops *netdev_ops;
const struct ethtool_ops *ethtool_ops;
#ifdef CONFIG_NET_SWITCHDEV
const struct swdev_ops *swdev_ops;
#endif
const struct header_ops *header_ops;
unsigned int flags;
unsigned int priv_flags;
unsigned short gflags;
unsigned short padded;
unsigned char operstate;
unsigned char link_mode;
unsigned char if_port;
unsigned char dma;
unsigned int mtu;
unsigned short type;
unsigned short hard_header_len;
unsigned short needed_headroom;
unsigned short needed_tailroom;
/* Interface address info. */
unsigned char perm_addr[MAX_ADDR_LEN];
unsigned char addr_assign_type;
unsigned char addr_len;
unsigned short neigh_priv_len;
unsigned short dev_id;
unsigned short dev_port;
spinlock_t addr_list_lock;
unsigned char name_assign_type;
bool uc_promisc;
struct netdev_hw_addr_list uc;
struct netdev_hw_addr_list mc;
struct netdev_hw_addr_list dev_addrs;
#ifdef CONFIG_SYSFS
struct kset *queues_kset;
#endif
unsigned int promiscuity;
unsigned int allmulti;
/* Protocol specific pointers */
#if IS_ENABLED(CONFIG_VLAN_8021Q)
struct vlan_info __rcu *vlan_info;
#endif
#if IS_ENABLED(CONFIG_NET_DSA)
struct dsa_switch_tree *dsa_ptr;
#endif
#if IS_ENABLED(CONFIG_TIPC)
struct tipc_bearer __rcu *tipc_ptr;
#endif
void *atalk_ptr;
struct in_device __rcu *ip_ptr;
struct dn_dev __rcu *dn_ptr;
struct inet6_dev __rcu *ip6_ptr;
void *ax25_ptr;
struct wireless_dev *ieee80211_ptr;
struct wpan_dev *ieee802154_ptr;
#if IS_ENABLED(CONFIG_MPLS_ROUTING)
struct mpls_dev __rcu *mpls_ptr;
#endif
/*
* Cache lines mostly used on receive path (including eth_type_trans())
*/
unsigned long last_rx;
/* Interface address info used in eth_type_trans() */
unsigned char *dev_addr;
#ifdef CONFIG_SYSFS
struct netdev_rx_queue *_rx;
unsigned int num_rx_queues;
unsigned int real_num_rx_queues;
#endif
unsigned long gro_flush_timeout;
rx_handler_func_t __rcu *rx_handler;
void __rcu *rx_handler_data;
struct netdev_queue __rcu *ingress_queue;
unsigned char broadcast[MAX_ADDR_LEN];
#ifdef CONFIG_RFS_ACCEL
struct cpu_rmap *rx_cpu_rmap;
#endif
struct hlist_node index_hlist;
/*
* Cache lines mostly used on transmit path
*/
struct netdev_queue *_tx ____cacheline_aligned_in_smp;
unsigned int num_tx_queues;
unsigned int real_num_tx_queues;
struct Qdisc *qdisc;
unsigned long tx_queue_len;
spinlock_t tx_global_lock;
int watchdog_timeo;
#ifdef CONFIG_XPS
struct xps_dev_maps __rcu *xps_maps;
#endif
/* These may be needed for future network-power-down code. */
/*
* trans_start here is expensive for high speed devices on SMP,
* please use netdev_queue->trans_start instead.
*/
unsigned long trans_start;
struct timer_list watchdog_timer;
int __percpu *pcpu_refcnt;
struct list_head todo_list;
struct list_head link_watch_list;
enum { NETREG_UNINITIALIZED=0,
NETREG_REGISTERED, /* completed register_netdevice */
NETREG_UNREGISTERING, /* called unregister_netdevice */
NETREG_UNREGISTERED, /* completed unregister todo */
NETREG_RELEASED, /* called free_netdev */
NETREG_DUMMY, /* dummy device for NAPI poll */
} reg_state:8;
bool dismantle;
enum {
RTNL_LINK_INITIALIZED,
RTNL_LINK_INITIALIZING,
} rtnl_link_state:16;
void (*destructor)(struct net_device *dev);
#ifdef CONFIG_NETPOLL
struct netpoll_info __rcu *npinfo;
#endif
possible_net_t nd_net;
/* mid-layer private */
union {
void *ml_priv;
struct pcpu_lstats __percpu *lstats;
struct pcpu_sw_netstats __percpu *tstats;
struct pcpu_dstats __percpu *dstats;
struct pcpu_vstats __percpu *vstats;
};
struct garp_port __rcu *garp_port;
struct mrp_port __rcu *mrp_port;
struct device dev;
const struct attribute_group *sysfs_groups[4];
const struct attribute_group *sysfs_rx_queue_group;
const struct rtnl_link_ops *rtnl_link_ops;
/* for setting kernel sock attribute on TCP connection setup */
#define GSO_MAX_SIZE 65536
unsigned int gso_max_size;
#define GSO_MAX_SEGS 65535
u16 gso_max_segs;
u16 gso_min_segs;
#ifdef CONFIG_DCB
const struct dcbnl_rtnl_ops *dcbnl_ops;
#endif
u8 num_tc;
struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
u8 prio_tc_map[TC_BITMASK + 1];
#if IS_ENABLED(CONFIG_FCOE)
unsigned int fcoe_ddp_xid;
#endif
#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
struct netprio_map __rcu *priomap;
#endif
struct phy_device *phydev;
struct lock_class_key *qdisc_tx_busylock;
};
重要成员
类型 | 类型名 | 成员名 | 作用 |
char | char | name | 网卡名,如eth0 |
struct | net_device_ops * | netdev_ops; | 网卡操作函数,如up,down,发包等 |
int | int __percpu | pcpu_refcnt | 网卡引用计数,为0时才可以在卸载时完全销毁 |
2、申请net_device
3、注册流程 register_netdev
/**
* register_netdevice - register a network device
* @dev: device to register
*
* Take a completed network device structure and add it to the kernel
* interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
* chain. 0 is returned on success. A negative errno code is returned
* on a failure to set up the device, or if the name is a duplicate.
*
* Callers must hold the rtnl semaphore. You may want
* register_netdev() instead of this.
*
* BUGS:
* The locking appears insufficient to guarantee two parallel registers
* will not get the same name.
*/
int register_netdevice(struct net_device *dev)
{
int ret;
struct net *net = dev_net(dev);
BUG_ON(dev_boot_phase);
ASSERT_RTNL();
might_sleep();
/* When net_device's are persistent, this will be fatal. */
BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
BUG_ON(!net);
spin_lock_init(&dev->addr_list_lock);
netdev_set_addr_lockdep_class(dev);
/* 网卡名合法性检查 */
ret = dev_get_valid_name(net, dev, dev->name);
if (ret < 0)
goto out;
/* Init, if this function is available */
/* 如果有初始化回调函数则调用 */
if (dev->netdev_ops->ndo_init) {
ret = dev->netdev_ops->ndo_init(dev);
if (ret) {
if (ret > 0)
ret = -EIO;
goto out;
}
}
if (((dev->hw_features | dev->features) &
NETIF_F_HW_VLAN_CTAG_FILTER) &&
(!dev->netdev_ops->ndo_vlan_rx_add_vid ||
!dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
ret = -EINVAL;
goto err_uninit;
}
ret = -EBUSY;
/* 分配一个唯一的接口索引 */
if (!dev->ifindex)
dev->ifindex = dev_new_index(net);
else if (__dev_get_by_index(net, dev->ifindex))
goto err_uninit;
/* Transfer changeable features to wanted_features and enable
* software offloads (GSO and GRO).
*/
dev->hw_features |= NETIF_F_SOFT_FEATURES;
dev->features |= NETIF_F_SOFT_FEATURES;
dev->wanted_features = dev->features & dev->hw_features;
if (!(dev->flags & IFF_LOOPBACK)) {
dev->hw_features |= NETIF_F_NOCACHE_COPY;
}
/* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
*/
dev->vlan_features |= NETIF_F_HIGHDMA;
/* Make NETIF_F_SG inheritable to tunnel devices.
*/
dev->hw_enc_features |= NETIF_F_SG;
/* Make NETIF_F_SG inheritable to MPLS.
*/
dev->mpls_features |= NETIF_F_SG;
/* 通知链:网卡初始化 */
ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
ret = notifier_to_errno(ret);
if (ret)
goto err_uninit;
/* 将网络设备注册到设备模型中,用户态的/sys/class/net目录下会有该设备目录 */
ret = netdev_register_kobject(dev);
if (ret)
goto err_uninit;
/* 设置为已注册状态 */
dev->reg_state = NETREG_REGISTERED;
__netdev_update_features(dev);
/*
* Default initial state at registry is that the
* device is present.
*/
set_bit(__LINK_STATE_PRESENT, &dev->state);
linkwatch_init_dev(dev);
dev_init_scheduler(dev);
/* 引用计数+1=1 */
dev_hold(dev);
/* 将网络设备加入到命名空间对应链表中 */
list_netdevice(dev);
add_device_randomness(dev->dev_addr, dev->addr_len);
/* If the device has permanent device address, driver should
* set dev_addr and also addr_assign_type should be set to
* NET_ADDR_PERM (default value).
*/
if (dev->addr_assign_type == NET_ADDR_PERM)
memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
/* Notify protocols, that a new device appeared. */
/* 通知其他模块NETDEV_REGISTER事件 */
ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
ret = notifier_to_errno(ret);
if (ret) {
rollback_registered(dev);
dev->reg_state = NETREG_UNREGISTERED;
}
/*
* Prevent userspace races by waiting until the network
* device is fully setup before sending notifications.
*/
if (!dev->rtnl_link_ops ||
dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
out:
return ret;
err_uninit:
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
goto out;
}
EXPORT_SYMBOL(register_netdevice);