默认网络设备流量控制


绝大多数的场景下,网络设备都是使用默认的流量控制机制在工作,这篇笔记分析了默认的流量控制机制。

数据结构

排队规则: Qdisc

struct Qdisc
{
    // 入队与出队操作
	int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev);
	struct sk_buff *(*dequeue)(struct Qdisc *dev);
	unsigned		flags;
#define TCQ_F_BUILTIN	1
#define TCQ_F_THROTTLED	2
#define TCQ_F_INGRESS	4
	int	padded; // Qdisc的第一个成员是字节对齐的,这使得开头可能会有一定的padding
	struct Qdisc_ops *ops; // 具体排队规则实现的函数操作集
	struct qdisc_size_table	*stab;
	u32	handle; // 句柄
	u32	parent; // 父节点句柄,通过这两个字段可以构建复杂的排队规则
	atomic_t		refcnt;
	unsigned long		state;
	struct sk_buff		*gso_skb;
	struct sk_buff_head	q;
	struct netdev_queue	*dev_queue;
	struct Qdisc *next_sched;
	struct list_head	list;

	struct gnet_stats_basic	bstats;
	struct gnet_stats_queue	qstats;
	struct gnet_stats_rate_est	rate_est;
	int			(*reshape_fail)(struct sk_buff *skb,
					struct Qdisc *q);

	void *u32_node;

	/* This field is deprecated, but it is still used by CBQ
	 * and it will live until better solution will be invented.
	 */
	struct Qdisc *__parent;
};

默认网络设备排队规则

dev_init_scheduler_queue()

在register_netdevice()中有调用dev_init_scheduler()对网络设备对象中的流量控制字段进行初始化。

static void dev_init_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_qdisc)
{
	struct Qdisc *qdisc = _qdisc;

	dev_queue->qdisc = qdisc;
	dev_queue->qdisc_sleeping = qdisc;
}

void dev_init_scheduler(struct net_device *dev)
{
    // 将每个发送队列的排队规则都设置为noop_qdisc
	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
	// 将接收队列的排队规则也设置为noop_qdisc
	dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);

    // 初始化watchdog_timer
	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
}

可见,设备注册时,会将所有的发送队列和接收队列的排队规则都设置为noop_qdisc,顾名思义,这个队列什么都不做,仅仅是释放入队列的skb,所以使用该队列是没有办法进行数据包收发的。

dev_activate()

设备打开时,在dev_open()中会调用dev_activate()激活网络设备对象的发送队列,激活后,设备接口层的dev_queue_xmit()才能通过流控机制向网卡发送数据。

void dev_activate(struct net_device *dev)
{
	int need_watchdog;

	/* No queueing discipline is attached to device;
	   create default one i.e. pfifo_fast for devices,
	   which need queueing and noqueue_qdisc for
	   virtual interfaces
	 */
	// 如果所有发送队列的排队规则都是noop_qdisc,那么将默认的改为pfifo_fast
	if (dev_all_qdisc_sleeping_noop(dev))
		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);

	// 没有__LINK_STATE_NOCARRIER标记说明网络设备物理层信号正常,可以发送数据
	if (!netif_carrier_ok(dev))
		/* Delay activation until next carrier-on event */
		return;

	need_watchdog = 0;
	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
	// 设置接收队列排队规则
	transition_one_qdisc(dev, &dev->rx_queue, NULL);

	if (need_watchdog) {
		dev->trans_start = jiffies;
		dev_watchdog_up(dev);
	}
}

attach_one_default_qdisc()

static void attach_one_default_qdisc(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_unused)
{
	struct Qdisc *qdisc;

    // 设备驱动程序若要使用排队规则,必须设置该字段,指定每个发送队列可以排队的skb数目
	if (dev->tx_queue_len) {
	    // 使用pfifo_fast排队规则作为默认的排队规则
		qdisc = qdisc_create_dflt(dev, dev_queue, &pfifo_fast_ops, TC_H_ROOT);
		if (!qdisc) {
			printk(KERN_INFO "%s: activation failed\n", dev->name);
			return;
		}
	} else {
	    // 使用noqueue排队规则作为默认的排队规则,该排队规则没有提供enqueue()方法,使得
	    // dev_queue_xmit()过程中走无流量控制发送流程
		qdisc =  &noqueue_qdisc;
	}
	dev_queue->qdisc_sleeping = qdisc;
}

// 使用qdisc_create_dflt()创建默认的pfifo_fast排队规则
struct Qdisc * qdisc_create_dflt(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 struct Qdisc_ops *ops,
				 unsigned int parentid)
{
	struct Qdisc *sch;

    // 根据ops分配一个排队规则
	sch = qdisc_alloc(dev_queue, ops);
	if (IS_ERR(sch))
		goto errout;
	// 复杂的排队规则可以定义父子关系
	sch->parent = parentid;

    // 调用排队规则提供的初始化回调
	if (!ops->init || ops->init(sch, NULL) == 0)
		return sch;
    // 初始化失败
	qdisc_destroy(sch);
errout:
	return NULL;
}

struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
			  struct Qdisc_ops *ops)
{
	void *p;
	struct Qdisc *sch;
	unsigned int size;
	int err = -ENOBUFS;

	/* ensure that the Qdisc and the private data are 32-byte aligned */
	// 计算排队规则实际的内存占用,包括私有数据结构
	size = QDISC_ALIGN(sizeof(*sch));
	size += ops->priv_size + (QDISC_ALIGNTO - 1);
	p = kzalloc(size, GFP_KERNEL);
	if (!p)
		goto errout;

    // 初始化成员
	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
	sch->padded = (char *) sch - (char *) p;
	INIT_LIST_HEAD(&sch->list);
	skb_queue_head_init(&sch->q);
	sch->ops = ops;
	sch->enqueue = ops->enqueue;
	sch->dequeue = ops->dequeue;
	sch->dev_queue = dev_queue;
	dev_hold(qdisc_dev(sch));
	atomic_set(&sch->refcnt, 1);
	return sch;
errout:
	return ERR_PTR(err);
}

transition_one_qdisc()

让排队规则真正生效。

static void transition_one_qdisc(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_need_watchdog)
{
	struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
	int *need_watchdog_p = _need_watchdog;

	if (!(new_qdisc->flags & TCQ_F_BUILTIN))
		clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);

	rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
	if (need_watchdog_p && new_qdisc != &noqueue_qdisc)
		*need_watchdog_p = 1;
}

dev_deactivate()

设备关闭时,在dev_close()中会调用dev_deactivate()将设备的排队规则恢复为初始化时指定的noop_qdisc,这样后续设备将无法发送数据包。

void dev_deactivate(struct net_device *dev)
{
    // 将发送队列的排队规则都设置为noop_qdisc
	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
	// 将接收队列的排队规则设置为noop_qdisc
	dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc);

    // 停止watchdog定时器
	dev_watchdog_down(dev);

	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
	synchronize_rcu();

	/* Wait for outstanding qdisc_run calls. */
	// 等待所有cpu中发送过程的结束
	while (some_qdisc_is_busy(dev))
		yield();
}

发送过程中的入队出队

在dev_queue_xmit()中,有看到入队是调用qdisc_enqueue_root()接口;在qdisc_restart()中,有看到出队是调用dequeue_skb(),其实现如下:

qdisc_enqueue_root()

static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
#ifdef CONFIG_NET_SCHED
	if (sch->stab)
		qdisc_calculate_pkt_len(skb, sch->stab);
#endif
	return sch->enqueue(skb, sch);
}

static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch)
{
	qdisc_skb_cb(skb)->pkt_len = skb->len;
	return qdisc_enqueue(skb, sch) & NET_XMIT_MASK;
}

dequeue_skb()

static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
{
	struct sk_buff *skb = q->gso_skb;

    // 发送时,优先处理GSO剩余的skb,然后才是队列中的数据包
	if (unlikely(skb)) {
		struct net_device *dev = qdisc_dev(q);
		struct netdev_queue *txq;

		/* check the reason of requeuing without tx lock first */
		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
		if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
			q->gso_skb = NULL;
		else
			skb = NULL;
	} else {
		skb = q->dequeue(q);
	}
	return skb;
}

排队规则: noop_qdisc

noop_qdisc是网络设备在注册时指定的排队规则,意思是这种排队规则是无法进行数据包发送,其实现如下。

/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
   under all circumstances. It is difficult to invent anything faster or
   cheaper.
 */
static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
{
	kfree_skb(skb);
	return NET_XMIT_CN;
}

static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
{
	return NULL;
}

struct Qdisc_ops noop_qdisc_ops __read_mostly = {
	.id		=	"noop",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.peek		=	noop_dequeue,
	.owner		=	THIS_MODULE,
};

static struct netdev_queue noop_netdev_queue = {
	.qdisc		=	&noop_qdisc,
	.qdisc_sleeping	=	&noop_qdisc,
};

struct Qdisc noop_qdisc = {
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
	.ops		=	&noop_qdisc_ops,
	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
	.dev_queue	=	&noop_netdev_queue,
};

可见,如果配置成了noop_qdisc,那么设备是无法发送数据的,因为其入队操作是释放skb,出队操作返回空,表示出队失败。

排队规则:noqueue_qdisc

如上介绍,noqueue_qdisc是用来给那些不需要流量控制的网络设备使用的,之所以定义这样一个排队规则,完全是为了让设备接口层的框架部分代码能够统一处理有流量控制和无流量控制两种发送过程。

static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
	.id		=	"noqueue",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.peek		=	noop_dequeue,
	.owner		=	THIS_MODULE,
};

static struct Qdisc noqueue_qdisc;
static struct netdev_queue noqueue_netdev_queue = {
	.qdisc		=	&noqueue_qdisc,
	.qdisc_sleeping	=	&noqueue_qdisc,
};

static struct Qdisc noqueue_qdisc = {
    // 这里的enqueue指定为NULL会使得dev_queue_xmit()中走无流量控制发送流程
	.enqueue	=	NULL,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
	.ops		=	&noqueue_qdisc_ops,
	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
	.q.lock		=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
	.dev_queue	=	&noqueue_netdev_queue,
};

排队规则: pfifo_fast

如前面介绍,pfifo_fast是设备打开后默认使用的排队规则,下面看其入队、出队操作都是如何实现的。

#define TC_PRIO_MAX	15
static const u8 prio2band[TC_PRIO_MAX+1] =
	{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };

/* 3-band FIFO queue: old style, but should be a bit faster than
   generic prio+fifo combination.
 */
#define PFIFO_FAST_BANDS 3

static inline struct sk_buff_head *prio2list(struct sk_buff *skb,  struct Qdisc *qdisc)
{
	// 根据skb的priority字段确定将skb放入那个发送队列
	struct sk_buff_head *list = qdisc_priv(qdisc);
	return list + prio2band[skb->priority & TC_PRIO_MAX];
}

static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
{
	// 确定要把skb放入3个skb队列中的哪一个
	struct sk_buff_head *list = prio2list(skb, qdisc);
	// 如果发送队列有剩余空间,则将skb入队列
	if (skb_queue_len(list) < qdisc->dev->tx_queue_len) {
		qdisc->q.qlen++;
		// 除了入队列,__qdisc_enqueue_tail()还会更新统计信息
		return __qdisc_enqueue_tail(skb, qdisc, list);
	}
	// 队列已满,则丢弃
	return qdisc_drop(skb, qdisc);
}

static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
{
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);
	// 从0~2的顺序遍历,找到第一个skb。从遍历顺序来看,显然这种排队规则规定
	// 0号队列的发送优先级最高、1号次之、2号最低
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
		if (!skb_queue_empty(list + prio)) {
			qdisc->q.qlen--;
			return __qdisc_dequeue_head(qdisc, list + prio);
		}
	}
	return NULL;
}

static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
{
	qdisc->q.qlen++;
	return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
}

static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
{
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);
	// 初始化3个私有的skb队列
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
		skb_queue_head_init(list + prio);
	return 0;
}

static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
	.id		=	"pfifo_fast",
	// 私有数据结构是3个skb队列
	.priv_size	=	PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
	.enqueue	=	pfifo_fast_enqueue,
	.dequeue	=	pfifo_fast_dequeue,
	.peek		=	pfifo_fast_peek,
	.init		=	pfifo_fast_init,
	.reset		=	pfifo_fast_reset,
	.dump		=	pfifo_fast_dump,
	.owner		=	THIS_MODULE,
};
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
目 录.................................................................................................................................................... 前言........................................................................................................................................................ 0.1 出厂默认设置........................................................................................................................ 0.2 管理界面登陆........................................................................................................................ 0.3 确认系统时间........................................................................................................................ 0.4 查看License许可信息........................................................................................................... 0.5 配置与测试网桥.................................................................................................................... 0.6 设备上线................................................................................................................................ 0.7 修改口令................................................................................................................................ 1.网络配置.......................................................................................................................................... 1.1 管理接口................................................................................................................................ 1.2 数据接口................................................................................................................................ 2.对象管理......................................................................................................................................... 1 2.1 自定义协议........................................................................................................................... 1 2.2 IP群组.................................................................................................................................. 1 2.3 自定义协议组.......................................................................................................................1 2.4虚拟链路................................................................................................................................1 3.策略管理.........................................................................................................................................1 3.1 参数设置................................................................................................................................1 3.1.1 网桥带宽............................................................................................................................1 3.1.2 内网IP统计........................................................................................................................1 3.1.3 伪IP防护............................................................................................................................1 3.1.4 TOS设置.............................................................................................................................1 3.2 流量控制...............................................................................................................................1 3.2.1 数据通道............................................................................................................................1 3.2.2 策略组................................................................................................................................1 3.2.3 策略调度............................................................................................................................2 3.2.4策略生效确认.....................................................................................................................2 3.3 连接控制...............................................................................................................................2 3.4 HTTP管控..............................................................................................................................2 4.监控统计.........................................................................................................................................2 4.1 分桥统计...............................................................................................................................2 4.2 流量趋势...............................................................................................................................2 4.2 三日对比...............................................................................................................................3 4.4 历史图表...............................................................................................................................3 4.5 TOP 应用..............................................................................................................................3 4.6 TOP IP ...................................................................................................................................3 4.6.1 IP档案-流量概况................................................................................................................3 4.6.2 IP档案-连接信息................................................................................................................3 4.6.3 IP档案-相关身份................................................................................................................3 4.6.4 IP档案-共享用户................................................................................................................3 p://www.panabit.com 第2页/共46页

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值