Linux 流量控制源码学习
队列的数据结构图
网络设备的结构体
红色粗体斜体,是流控相关的部分。
struct net_device
{
/*
* This is the first field of the "visible" part of this structure
* (i.e. as seen by users in the "Space.c" file). It is the name
* the interface.
*/
char name[IFNAMSIZ];
/* device name hash chain */
struct hlist_node name_hlist;
/*
* I/O specific fields
* FIXME: Merge these and struct ifmap into one
*/
unsigned long mem_end; /* shared mem end */
unsigned long mem_start; /* shared mem start */
unsigned long base_addr; /* device I/O address */
unsigned int irq; /* device IRQ number */
/*
* Some hardware also needs these fields, but they are not
* part of the usual set specified in Space.c.
*/
unsigned char if_port; /* Selectable AUI, TP,..*/
unsigned char dma; /* DMA channel */
unsigned long state;
struct net_device *next;
/* The device initialization function. Called only once. */
int (*init)(struct net_device *dev);
/* ------- Fields preinitialized in Space.c finish here ------- */
/* Net device features */
unsigned long features;
#define NETIF_F_SG 1 /* Scatter/gather IO. */
#define NETIF_F_IP_CSUM 2 /* Can checksum only TCP/UDP over IPv4. */
#define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */
#define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */
#define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */
#define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */
#define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */
#define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */
#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */
#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */
#define NETIF_F_GSO 2048 /* Enable software GSO. */
#define NETIF_F_LLTX 4096 /* LockLess TX */
/* Segmentation offload features */
#define NETIF_F_GSO_SHIFT 16
#define NETIF_F_GSO_MASK 0xffff0000
#define NETIF_F_TSO (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT)
#define NETIF_F_UFO (SKB_GSO_UDP << NETIF_F_GSO_SHIFT)
#define NETIF_F_GSO_ROBUST (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT)
#define NETIF_F_TSO_ECN (SKB_GSO_TCP_ECN << NETIF_F_GSO_SHIFT)
#define NETIF_F_TSO6 (SKB_GSO_TCPV6 << NETIF_F_GSO_SHIFT)
/* List of features with software fallbacks. */
#define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6)
#define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
#define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM)
struct net_device *next_sched;
/* Interface index. Unique device identifier */
int ifindex;
int iflink;
struct net_device_stats* (*get_stats)(struct net_device *dev);
struct iw_statistics* (*get_wireless_stats)(struct net_device *dev);
/* List of functions to handle Wireless Extensions (instead of ioctl).
* See <net/iw_handler.h> for details. Jean II */
const struct iw_handler_def * wireless_handlers;
/* Instance data managed by the core of Wireless Extensions. */
struct iw_public_data * wireless_data;
struct ethtool_ops *ethtool_ops;
/*
* This marks the end of the "visible" part of the structure. All
* fields hereafter are internal to the system, and may change at
* will (read: may be cleaned up at will).
*/
unsigned int flags; /* interface flags (a la BSD) */
unsigned short gflags;
unsigned short priv_flags; /* Like 'flags' but invisible to userspace. */
unsigned short padded; /* How much padding added by alloc_netdev() */
unsigned char operstate; /* RFC2863 operstate */
unsigned char link_mode; /* mapping policy to operstate */
unsigned mtu; /* interface MTU value */
unsigned short type; /* interface hardware type */
unsigned short hard_header_len; /* hardware hdr length */
struct net_device *master; /* Pointer to master device of a group,
* which this device is member of.
*/
/* Interface address info. */
unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */
unsigned char addr_len; /* hardware address length */
unsigned short dev_id; /* for shared network cards */
struct dev_mc_list *mc_list; /* Multicast mac addresses */
int mc_count; /* Number of installed mcasts */
int promiscuity;
int allmulti;
/* Protocol specific pointers */
void *atalk_ptr; /* AppleTalk link */
void *ip_ptr; /* IPv4 specific data */
void *dn_ptr; /* DECnet specific data */
void *ip6_ptr; /* IPv6 specific data */
void *ec_ptr; /* Econet specific data */
void *ax25_ptr; /* AX.25 specific data */
/*
* Cache line mostly used on receive path (including eth_type_trans())
*/
struct list_head poll_list ____cacheline_aligned_in_smp;
/* Link to poll list */
int (*poll) (struct net_device *dev, int *quota);
int quota;
int weight;
unsigned long last_rx; /* Time of last Rx */
/* Interface address info used in eth_type_trans() */
unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address, (before bcast
because most packets are unicast) */
unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */
/*
* Cache line mostly used on queue transmit path (qdisc)
*/
/* device queue lock */
spinlock_t queue_lock ____cacheline_aligned_in_smp;
struct Qdisc *qdisc;
struct Qdisc *qdisc_sleeping;
struct list_head qdisc_list;
unsigned long tx_queue_len; /* Max frames per queue allowed */
/* Partially transmitted GSO packet. */
struct sk_buff *gso_skb;
/* ingress path synchronizer */
spinlock_t ingress_lock;
struct Qdisc *qdisc_ingress;
/*
* One part is mostly used on xmit path (device)
*/
/* hard_start_xmit synchronizer */
spinlock_t _xmit_lock ____cacheline_aligned_in_smp;
/* cpu id of processor entered to hard_start_xmit or -1,
if nobody entered there.
*/
int xmit_lock_owner;
void *priv; /* pointer to private data */
int (*hard_start_xmit) (struct sk_buff *skb,
struct net_device *dev);
/* These may be needed for future network-power-down code. */
unsigned long trans_start; /* Time (in jiffies) of last Tx */
int watchdog_timeo; /* used by dev_watchdog() */
struct timer_list watchdog_timer;
/*
* refcnt is a very hot point, so align it on SMP
*/
/* Number of references to this device */
atomic_t refcnt ____cacheline_aligned_in_smp;
/* delayed register/unregister */
struct list_head todo_list;
/* device index hash chain */
struct hlist_node index_hlist;
/* register/unregister state machine */
enum { NETREG_UNINITIALIZED=0,
NETREG_REGISTERED, /* completed register_netdevice */
NETREG_UNREGISTERING, /* called unregister_netdevice */
NETREG_UNREGISTERED, /* completed unregister todo */
NETREG_RELEASED, /* called free_netdev */
} reg_state;
/* Called after device is detached from network. */
void (*uninit)(struct net_device *dev);
/* Called after last user reference disappears. */
void (*destructor)(struct net_device *dev);
/* Pointers to interface service routines. */
int (*open)(struct net_device *dev);
int (*stop)(struct net_device *dev);
#define HAVE_NETDEV_POLL
int (*hard_header) (struct sk_buff *skb,
struct net_device *dev,
unsigned short type,
void *daddr,
void *saddr,
unsigned len);
int (*rebuild_header)(struct sk_buff *skb);
#define HAVE_MULTICAST
void (*set_multicast_list)(struct net_device *dev);
#define HAVE_SET_MAC_ADDR
int (*set_mac_address)(struct net_device *dev,
void *addr);
#define HAVE_PRIVATE_IOCTL
int (*do_ioctl)(struct net_device *dev,
struct ifreq *ifr, int cmd);
#define HAVE_SET_CONFIG
int (*set_config)(struct net_device *dev,
struct ifmap *map);
#define HAVE_HEADER_CACHE
int (*hard_header_cache)(struct neighbour *neigh,
struct hh_cache *hh);
void (*header_cache_update)(struct hh_cache *hh,
struct net_device *dev,
unsigned char * haddr);
#define HAVE_CHANGE_MTU
int (*change_mtu)(struct net_device *dev, int new_mtu);
#define HAVE_TX_TIMEOUT
void (*tx_timeout) (struct net_device *dev);
void (*vlan_rx_register)(struct net_device *dev,
struct vlan_group *grp);
void (*vlan_rx_add_vid)(struct net_device *dev,
unsigned short vid);
void (*vlan_rx_kill_vid)(struct net_device *dev,
unsigned short vid);
int (*hard_header_parse)(struct sk_buff *skb,
unsigned char *haddr);
int (*neigh_setup)(struct net_device *dev, struct neigh_parms *);
#ifdef CONFIG_NETPOLL
struct netpoll_info *npinfo;
#endif
#ifdef CONFIG_NET_POLL_CONTROLLER
void (*poll_controller)(struct net_device *dev);
#endif
/* bridge stuff */
struct net_bridge_port *br_port;
#ifdef CONFIG_NET_DIVERT
/* this will get initialized at each interface type init routine */
struct divert_blk *divert;
#endif /* CONFIG_NET_DIVERT */
/* class/net/name entry */
struct class_device class_dev;
/* space for optional statistics and wireless sysfs groups */
struct attribute_group *sysfs_groups[3];
};
通用队列规则结构体
struct Qdisc
{
int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev);//对应于队列操作的入队
struct sk_buff * (*dequeue)(struct Qdisc *dev);//对应于队列操作的出队
unsigned flags;
#define TCQ_F_BUILTIN 1
#define TCQ_F_THROTTLED 2
#define TCQ_F_INGRESS 4
int padded;
struct Qdisc_ops *ops;//对应于该队列的操作结构体
u32 handle;//该队列的标识
u32 parent;//对于我们的流控实现来说是
atomic_t refcnt;
struct sk_buff_head q;
struct net_device *dev;//该队列关联的接口设备
struct list_head list;//关联到接口net_device的qdisc_list链表
struct gnet_stats_basic bstats;
struct gnet_stats_queue qstats;
struct gnet_stats_rate_est rate_est;
spinlock_t *stats_lock;//对应于接口net_device的queue_lock
struct rcu_head q_rcu;
int (*reshape_fail)(struct sk_buff *skb,
struct Qdisc *q);
/* This field is deprecated, but it is still used by CBQ
* and it will live until better solution will be invented.
*/
struct Qdisc *__parent;
};
通用队列规则操作结构体
struct Qdisc_ops
{
struct Qdisc_ops *next;//下一个队列
struct Qdisc_class_ops *cl_ops;//该队列的class的操作
char id[IFNAMSIZ];//队列类型标识,htb等
int priv_size;//对于htb,对应于htb_sched结构体大小
int (*enqueue)(struct sk_buff *, struct Qdisc *);//入队操作
struct sk_buff * (*dequeue)(struct Qdisc *);//出队操作
int (*requeue)(struct sk_buff *, struct Qdisc *);//重新排队操作
unsigned int (*drop)(struct Qdisc *);//丢包操作
int (*init)(struct Qdisc *, struct rtattr *arg);//初始化操作
void (*reset)(struct Qdisc *);//重置操作
void (*destroy)(struct Qdisc *);//销毁操作
int (*change)(struct Qdisc *, struct rtattr *arg);//对于htb设置为空
int (*dump)(struct Qdisc *, struct sk_buff *);//调试
int (*dump_stats)(struct Qdisc *, struct gnet_dump *);调试
struct module *owner;//当前模块
};
Htb队列规则结构体
struct htb_sched
{
struct list_head root; /* root classes list *///所有基类链表
struct list_head hash[HTB_HSIZE]; /* hashed by classid *///hash classes
struct list_head drops[TC_HTB_NUMPRIO]; /* active leaves (for drops) */
/* self list - roots of self generating tree */
struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
int row_mask[TC_HTB_MAXDEPTH];
struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
/* self wait list - roots of wait PQs per row */
struct rb_root wait_pq[TC_HTB_MAXDEPTH];
/* time of nearest event per level (row) */
unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
/* cached value of jiffies in dequeue */
unsigned long jiffies;
/* whether we hit non-work conserving class during this dequeue; we use */
int nwc_hit; /* this to disable mindelay complaint in dequeue */
int defcls; /* class where unclassified flows go to */
u32 debug; /* subsystem debug levels */
/* filters for qdisc itself */
struct tcf_proto *filter_list;//该类型的所有filter
int filter_cnt;
int rate2quantum; /* quant = rate / rate2quantum */
psched_time_t now; /* cached dequeue time */
struct timer_list timer; /* send delay timer */
#ifdef HTB_RATECM
struct timer_list rttim; /* rate computer timer */
int recmp_bucket; /* which hash bucket to recompute next */
#endif
/* non shaped skbs; let them go directly thru */
struct sk_buff_head direct_queue;
int direct_qlen; /* max qlen of above */
long direct_pkts;
};
Htb队列的class结构体
struct htb_class
{
#ifdef HTB_DEBUG
unsigned magic;
#endif
/* general class parameters */
u32 classid;
struct gnet_stats_basic bstats;
struct gnet_stats_queue qstats;
struct gnet_stats_rate_est rate_est;
struct tc_htb_xstats xstats;/* our special stats */
int refcnt; /* usage count of this class */
#ifdef HTB_RATECM
/* rate measurement counters */
unsigned long rate_bytes,sum_bytes;
unsigned long rate_packets,sum_packets;
#endif
/* topology */
int level; /* our level (see above) */
struct htb_class *parent; /* parent class */
struct list_head hlist; /* classid hash list item */
struct list_head sibling; /* sibling list item */
struct list_head children; /* children list */
union {
struct htb_class_leaf {
struct Qdisc *q;
int prio;
int aprio;
int quantum;
int deficit[TC_HTB_MAXDEPTH];
struct list_head drop_list;
} leaf;
struct htb_class_inner {
struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
/* When class changes from state 1->2 and disconnects from
parent's feed then we lost ptr value and start from the
first child again. Here we store classid of the
last valid ptr (used when ptr is NULL). */
u32 last_ptr_id[TC_HTB_NUMPRIO];
} inner;
} un;
struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
struct rb_node pq_node; /* node for event queue */
unsigned long pq_key; /* the same type as jiffies global */
int prio_activity; /* for which prios are we active */
enum htb_cmode cmode; /* current mode of the class */
/* class attached filters */
struct tcf_proto *filter_list;
int filter_cnt;
int warned; /* only one warning about non work conserving .. */
/* token bucket parameters */
struct qdisc_rate_table *rate; /* rate table of the class itself */
struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */
long buffer,cbuffer; /* token bucket depth/rate */
psched_tdiff_t mbuffer; /* max wait time */
long tokens,ctokens; /* current number of tokens *///当前的令牌数
psched_time_t t_c; /* checkpoint time */
};
Htb队列的class配置选项
struct tc_htb_opt
{
struct tc_ratespec rate;//保证速率
struct tc_ratespec ceil;//最高速率
__u32 buffer;
__u32 cbuffer;
__u32 quantum;
__u32 level; /* out only */
__u32 prio;
};
过滤器结构体
struct tcf_proto
{
/* Fast access part */
struct tcf_proto *next;
void *root;
int (*classify)(struct sk_buff*, struct tcf_proto*,
struct tcf_result *);
u32 protocol;
/* All the rest */
u32 prio;
u32 classid;
struct Qdisc *q;
void *data;
struct tcf_proto_ops *ops;
};
过滤器操作结构体
struct tcf_proto_ops
{
struct tcf_proto_ops *next;
char kind[IFNAMSIZ];
int (*classify)(struct sk_buff*, struct tcf_proto*,
struct tcf_result *);
int (*init)(struct tcf_proto*);
void (*destroy)(struct tcf_proto*);
unsigned long (*get)(struct tcf_proto*, u32 handle);
void (*put)(struct tcf_proto*, unsigned long);
int (*change)(struct tcf_proto*, unsigned long,
u32 handle, struct rtattr **,
unsigned long *);
int (*delete)(struct tcf_proto*, unsigned long);
void (*walk)(struct tcf_proto*, struct tcf_walker *arg);
/* rtnetlink specific */
int (*dump)(struct tcf_proto*, unsigned long,
struct sk_buff *skb, struct tcmsg*);
struct module *owner;
};
特定filter头结构体
struct fw_head
{
struct fw_filter *ht[HTSIZE];
};
特定filter结构体
struct fw_filter
{
struct fw_filter *next;
u32 id;
struct tcf_result res;
#ifdef CONFIG_NET_CLS_IND
char indev[IFNAMSIZ];
#endif /* CONFIG_NET_CLS_IND */
struct tcf_exts exts;
};
过滤的数据结构图
全局变量
当前注册在系统中的所有队列规则qdisc_base
Htb队列规则htb_qdisc_ops
内核中设备索引表dev_index_head
队列规则的速率表项qdisc_rtab_list
所有已经安装的过滤器类型tcf_proto_base
Fwmark 过滤器操作全局变量cls_fw_ops
典型的操作过程
队列规则操作的注册
将全局变量Htb队列规则操作htb_qdisc_ops加入到全局变量当前注册在系统中的所有队列规则操作链表qdisc_base中。
创建队列
Linux源码对应的函数调用如下:
Qdisc_create
注意;
- Qdisc_create会一次性的将Qdisc 和htb_sched分配好见对qdisc_alloc函数的调用。
- 在这里设置同步定时器
Qdisc_graft
对于原有的class进行移植,对于我们的实现不存在这种情况。
Qdisc_notify
构造响应消息,通过netlink连接,发送响应消息到用户层。
htb_destroy
执行如下操作
- 删除同步定时器
- 删除所有tc filter
- 删除所有tc class
- 释放该队列所关联的所有skb
创建class
Linux源码中创建class的函数调用如下所示,
注意:
叶子class指向的qdisc是fifo qdisc,而不是初始化的htb qdisc。
创建filter
Fwmark过滤器操作的注册
将全局变量过滤器操作cls_fw_ops加入到全局变量当前注册在系统中的所有过滤器链表tcf_proto_base。
tc_filter_init
在全局变量rtnetlink_links中添加过滤器操作。
创建一个过滤器节点tc_ctl_tfilter
基于tc filter和tc class进行流量控制
代码结构如下所示:
入队操作htb_enqueue
入队的代码结构如下所示,
出队列操作
采用round robin color tree结构进行class管理
- 基于优先级的当前是borrow模式的管理树
- 基于优先级的当前是send模式的管理树
- 队列中等待管理树
着色树的管理
高效管理,见附录,红黑树
队列调整的计算方法(基于令牌桶的管理方法)
主要参照htb_charge_class函数的统计操作。
- cl->mbuffer值为60,000,000;在class配置函数htb_change_class中设定。
- cl->tokens和cl->ctokens初始值分别是配置结构体tc_htb_opt参数的buffer和cbuffer属性。。具体参看iproute源码中的函数htb_parse_class_opt
- cl->buffer和cl->cbuffer分别是配置结构体tc_htb_opt参数的buffer和cbuffer属性。具体参看iproute源码中的函数htb_parse_class_opt
- cl->rate->rate.cell_log和cl->ceil->rate.cell_log是根据mtu计算出来的,因为mtu是固定值1600,所以,它为固定值3。是配置结构体tc_htb_opt参数的rate结构体的cell_log属性和ceil结构体的cell_log属性。具体参看iproute源码中的函数tc_calc_rtable。
- cl->rate->rate->data[slot]和cl->ceil->rate->data[slot]分别对应于发送报文大小和报文的速率对应要消耗的时间,对于我们的系统来说是基于微秒计算的。具体参看iproute源码中的函数tc_calc_rtable。
- 如果class是叶子节点,则cl->level为0;如果他有父亲节点,并且父亲节点的level为0,则他的父亲节点的level设置有两种情况:如果他祖父节点存在,则设置为祖父节点的level -1;否则,设置为TC_HTB_MAXDEPTH – 1(数值为7)。
先介绍配置结构体tc_htb_opt参数:
Rate和ceil:配置输入的是bit/s,在输入内核前转换成byte/s。
Tokens和ctokens:(参看iproute源码的htb_parse_class_opt函数)
- 通过如下计算
buffer = opt.rate.rate / get_hz() + mtu;
转换成文字表达为:
Buffer = 配置的速率转换成byte/s的值/250 + mtu
其中opt.rate.rate是转换成byte/s的值,mtu默认为1600;get_hz()的值为250。
opt.buffer = tc_calc_xmittime(opt.rate.rate, buffer);
转换成文字表达为:
opt.buffer = (1000000*buffer)/配置的速率转换成byte/s的值
说明:可以看出opt.buffer随配置的速率的增加而减少,极限值为1000000/250 = 4000。
举例说明:
./tc class add dev eth0 parent 1:1 classid 1:12 htb rate 1kbit ceil 1kbit prio 0
打印信息为:
htb_parse_class_opt: <<<<get_hz() <250> rate <125> mtu <1600> buffer <1600>>>>
htb_parse_class_opt: <<<<get_hz() <250> ceil <125> mtu <1600> cbuffer <1600>>>>
htb_parse_class_opt: <<<<opt.buffer <13107200> opt.cbuffer <13107200> >>>
介绍diff的计算(PSCHED_TDIFF_SAFE函数)
有如下几种情况:
- 如果当前时间q->now与cl->t_c之间的秒时间差超过2s;则判断时间差是否超过1分钟;如果没有超过一分钟,返回值为时间差(微秒为计算单位),如果超过一分钟返回值为1分钟;
- 如果当前时间q->now与cl->t_c之间的秒时间差为2s或1s,则为1秒(微秒为计算单位)+ q->now与cl->t_c之间的毫秒时间差(可能为负)
- 如果当前时间q->now与cl->t_c之间的秒时间差为0,为q->now与cl->t_c之间的毫秒时间差。
计算令牌桶的方法(HTB_ACCNT宏定义函数)
令牌桶的一个令牌,指的是该tc class 发送1个byte需要的ticks。
#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
if (toks > cl->B) toks = cl->B; \
toks -= L2T(cl, cl->R, bytes); \
if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
cl->T = toks
static __inline__ long L2T(struct htb_class *cl,struct qdisc_rate_table *rate,
int size)
{
int slot = size >> rate->rate.cell_log;
if (slot > 255) {
cl->xstats.giants++;
slot = 255;
}
return rate->data[slot];
}
根据
进行状态改变(htb_class_mode函数)
- 如果当前类的状态是发送状态,如果当前持有的ceil令牌数小于0,将当前的时间差设置为时间差减去令牌数,设置为状态可发送。
- 如果当前类的状态不是发送状态,如果当前持有的ceil令牌数小于当前类cbuffer的负值,将当前的时间差设置为时间差减去令牌数,设置为状态可发送。
上面的两个条件是对上限的限制,所以,判断条件是ceil令牌数小于某个给定的值,而不是大于某个给定的值。
- 如果当前类的状态是发送状态,如果当前持有的rate令牌数大于或等于当前类cbuffer的负值,设置为状态可发送。
- 如果当前类的状态不是发送状态,如果当前持有的ceil令牌数大于或等于0,设置为状态可发送。
- 其它情况设置该类的状态为HTB_MAY_BORROW状态,可以从他的父亲获取带宽。
至于class如何利用父类借用带宽,可以通过其父类是否在发送模式的状态树中决定。在htb_dequeue函数中会将所有的level都遍历到,如果父亲节点有剩余带宽可以使用,则它的子节点会继续进行报文发送的。
从上面的分析,和网上下面描述的理论基本上一致的:
根据所耗用的带宽,每个HTB类可能处与3种状态之一。
绿色:类的实际带宽小于等于其limit-at带宽。在这种状态下,类被附在它所在层的相应优先级的inner slot上,并被允许满足其limit-at带宽,无论它父类的限制带宽是多少。例如:如果有一个leaf class的limit-at=512000,它父类的max-limit=limit-at=128000,该leaf class可以达到512kbps!
黄色:类的实际带宽大于其limit-at,但小于等于max-limit。在此状态下,类被附于它父类inner feed的与它优先级相应的inner slot上;而父类的inner feed可能附于“祖父类”的相应优先级的inner slot(在父类也是黄色状态时)或者它(父类)所在层相应优先级的self slot(在父类是绿色状态时)上。在转换为这种状态时,类与他所在层的self feed“断开”,与他父类的inner feed“连接”。
红色:类的实际带宽超过max-limit。这个类不能从其父类处借用带宽。
大体的流程应该是这个样子,具体很多细节没有进行分析,时间和精力有限就分析到这里了。