网卡驱动设备watchdog-dev_watchdog


网络设备watchdog用于监控网卡驱动发送数据是否异常,如果异常就报错,并调用网卡驱动提供的超时处理函数。
基本原理:网卡设备初始化时,初始化watchdog定时器,用户空间打开网卡设备时,同时开启watchdog定时器,每次watchdog定时器超时,就检查网卡设备的发送队列发送数据是否超时,如果超时报错并调用网卡驱动提供的超时处理函数。
然后重启watchdog定时器超时。
linux3.10.xx
netdevice.h
\net\core\dev.c
\net\sched\sch_generic.c

初始化

网卡驱动在register_netdevice注册网络设备时,调用dev_init_scheduler初始化watchdog定时器,

register_netdevice---->dev_init_scheduler
void dev_init_scheduler(struct net_device *dev)
{
	dev->qdisc = &noop_qdisc;
	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
	if (dev_ingress_queue(dev))
		dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);

	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); 初始化watchdog_timer
}

开启watchdog

网卡up时,开启watchdog。
__dev_change_flags–》__dev_open–》dev_activate(dev);

void dev_activate(struct net_device *dev)
{
	int need_watchdog;

	/* No queueing discipline is attached to device;
	   create default one i.e. pfifo_fast for devices,
	   which need queueing and noqueue_qdisc for
	   virtual interfaces
	 */

	if (dev->qdisc == &noop_qdisc) 流控策略初始化时为noop_qdisc
		attach_default_qdiscs(dev); 改变流控测试为默认pfifo_fast

	if (!netif_carrier_ok(dev))
		/* Delay activation until next carrier-on event */
		return;

	need_watchdog = 0;
	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog); 判断是否开启watchdog,不为noqueue_qdisc,就开启watchdog
	if (dev_ingress_queue(dev))
		transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);

	if (need_watchdog) {前面判断开启
		dev->trans_start = jiffies; 记录开启时间
		dev_watchdog_up(dev); 开启watchdog
	}
}

static void transition_one_qdisc(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_need_watchdog)
{
	struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
	int *need_watchdog_p = _need_watchdog;

	if (!(new_qdisc->flags & TCQ_F_BUILTIN))
		clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);

	rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
	if (need_watchdog_p && new_qdisc != &noqueue_qdisc) { 不为noqueue_qdisc,就开启watchdog
		dev_queue->trans_start = 0;
		*need_watchdog_p = 1;
	}
}

dev_watchdog_up(dev); 开启watchdog

static void dev_watchdog_up(struct net_device *dev)
{
	__netdev_watchdog_up(dev);
}

void __netdev_watchdog_up(struct net_device *dev)
{
	if (dev->netdev_ops->ndo_tx_timeout) {ndo_tx_timeout为网卡驱动提供的发送超时处理函数
		if (dev->watchdog_timeo <= 0)
			dev->watchdog_timeo = 5*HZ; 默认超时处理 5*HZ
		if (!mod_timer(&dev->watchdog_timer,
			       round_jiffies(jiffies + dev->watchdog_timeo))) 开启定时器
			dev_hold(dev);
	}
}

超时处理dev_watchdog

dev_watchdog为watchdog定时器超时处理函数,只有网卡还在正常运行时,才进一步检查网卡发送队列是否超时。
判断发送队列是否超时:
1,netif_xmit_stopped发送队列已经停止
2,txq->trans_start最近一次发送数据的时间已经超过watchdog_timeo时间

static void dev_watchdog(unsigned long arg)
{
	struct net_device *dev = (struct net_device *)arg;

	netif_tx_lock(dev);
	判断
	if (!qdisc_tx_is_noop(dev)) { txq->qdisc不是noop_qdisc,
		if (netif_device_present(dev) &&  网卡设备还存在
		    netif_running(dev) && 网卡设备还在运行
		    netif_carrier_ok(dev)) { 网卡设备在线
			int some_queue_timedout = 0;
			unsigned int i;
			unsigned long trans_start;

			for (i = 0; i < dev->num_tx_queues; i++) {
				struct netdev_queue *txq;

				txq = netdev_get_tx_queue(dev, i);
				/*
				 * old device drivers set dev->trans_start
				 */
				trans_start = txq->trans_start ? : dev->trans_start; 记录最近一次网卡设备发送时间
				if (netif_xmit_stopped(txq) &&  网卡发送队列在运行,
				    time_after(jiffies, (trans_start +
							 dev->watchdog_timeo))) {  当前时间已经超过了最近一次网卡设备发送时间+watchdog_timeo
					some_queue_timedout = 1;
					txq->trans_timeout++;
					break;
				}
			}

			if (some_queue_timedout) { 超时后,告警,并调用网卡驱动的ndo_tx_timeout
				WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
				       dev->name, netdev_drivername(dev), i);
				dev->netdev_ops->ndo_tx_timeout(dev);
			}
			if (!mod_timer(&dev->watchdog_timer,
				       round_jiffies(jiffies +
						     dev->watchdog_timeo)))重新开启定时器
				dev_hold(dev);
		}
	}
	netif_tx_unlock(dev);

	dev_put(dev);
}

网卡在open时,网卡驱动提供的ndo_open都会调用netif_start_queue (net); 开启网卡队列,这样dev_watchdog函数中netif_xmit_stopped返回0,就不会判断是否超时。
网卡在异常或发送队列中长度超过了门限时会停止发送队列netif_xmit_stopped,netif_xmit_stopped就返回1,那就进一步判断最近一次发送数据时间是否超时。

enum netdev_queue_state_t {
	__QUEUE_STATE_DRV_XOFF,
	__QUEUE_STATE_STACK_XOFF,
	__QUEUE_STATE_FROZEN,
#define QUEUE_STATE_ANY_XOFF ((1 << __QUEUE_STATE_DRV_XOFF)		| \
			      (1 << __QUEUE_STATE_STACK_XOFF))
#define QUEUE_STATE_ANY_XOFF_OR_FROZEN (QUEUE_STATE_ANY_XOFF		| \
					(1 << __QUEUE_STATE_FROZEN))
};
ndo_open都会调用netif_start_queue (net),清除__QUEUE_STATE_DRV_XOFF
static inline void netif_tx_start_queue(struct netdev_queue *dev_queue)
{
	clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
}

static inline void netif_start_queue(struct net_device *dev)
{
	netif_tx_start_queue(netdev_get_tx_queue(dev, 0));
}

网卡在异常或发送队列中长度超过了门限时会停止发送队列netif_stop_queue,设置__QUEUE_STATE_DRV_XOFF
static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue)
{
	if (WARN_ON(!dev_queue)) {
		pr_info("netif_stop_queue() cannot be called before register_netdev()\n");
		return;
	}
	set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
}

/**
 *	netif_stop_queue - stop transmitted packets
 *	@dev: network device
 *
 *	Stop upper layers calling the device hard_start_xmit routine.
 *	Used for flow control when transmit resources are unavailable.
 */
static inline void netif_stop_queue(struct net_device *dev)
{
	netif_tx_stop_queue(netdev_get_tx_queue(dev, 0));
}

判断是否停止,__QUEUE_STATE_DRV_XOFF
static inline bool netif_xmit_stopped(const struct netdev_queue *dev_queue)
{
	return dev_queue->state & QUEUE_STATE_ANY_XOFF;
}

发送时间trans_start 更新

dev_hard_start_xmit调用网卡驱动接口ndo_start_xmit发送完成数据后,调用txq_trans_update更新发送时间。

dev_hard_start_xmit:
		rc = ops->ndo_start_xmit(skb, dev);
		if (rc == NETDEV_TX_OK)
			txq_trans_update(txq);

static inline void txq_trans_update(struct netdev_queue *txq)
{
	if (txq->xmit_lock_owner != -1)
		txq->trans_start = jiffies;
}

usbnet驱动

usbnet_open–》netif_start_queue (net);

发送完成回调函数中tx_complete调用netif_stop_queue

tx_complete
		case -EPROTO:
		case -ETIME:
		case -EILSEQ:
			usb_mark_last_busy(dev->udev);
			if (!timer_pending (&dev->delay)) {
				mod_timer (&dev->delay,
					jiffies + THROTTLE_JIFFIES);
				netif_dbg(dev, link, dev->net,
					  "tx throttle %d\n", urb->status); 
			}
			netif_stop_queue (dev->net); 异常停止发送队列
			break;

usbnet_start_xmit发送数据中,发送队列数据格式超过了门限

usbnet_start_xmit
	case 0:
		net->trans_start = jiffies;更新每次发送数据更新发送时间
		__usbnet_queue_skb(&dev->txq, skb, tx_start);
		if (dev->txq.qlen >= TX_QLEN (dev)) 超过门限
			netif_stop_queue (net);
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值