默认情况下,IPv6是开启状态,即PROC文件disable_ipv6的值为零。
$ cat /proc/sys/net/ipv6/conf/all/disable_ipv6
0
$ cat /proc/sys/net/ipv6/conf/default/disable_ipv6
0
$ cat /proc/sys/net/ipv6/conf/ens33/disable_ipv6
0
如下代码所示,默认情况下all和default中的disable_ipv6配置值都为0。
static struct ipv6_devconf ipv6_devconf __read_mostly = {
.disable_ipv6 = 0,
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.disable_ipv6 = 0,
在初始化函数中,ipv6_defaults将覆盖default的disable_ipv6值,其中ipv6_defaults成员disable_ipv6的值,可在IPv6模块加载时指定。
static int __net_init addrconf_init_net(struct net *net)
{
/* these will be inherited by all namespaces */
dflt->autoconf = ipv6_defaults.autoconf;
dflt->disable_ipv6 = ipv6_defaults.disable_ipv6;
修改IPv6开关
如果修改的是默认default目录下的disable_ipv6文件值,修改完成之后返回零,不做进一步处理。否则,如果用户修改的是all目录下的disable_ipv6值,将新值更新到网络命名空间中的default配置值。
最后,对于具体网络接口下的disable_ipv6值,如果新旧值不同,由函数dev_disable_change处理。
static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf)
{
struct net *net;
net = (struct net *)table->extra2;
old = *p;
*p = newf;
if (p == &net->ipv6.devconf_dflt->disable_ipv6) {
rtnl_unlock();
return 0;
}
if (p == &net->ipv6.devconf_all->disable_ipv6) {
net->ipv6.devconf_dflt->disable_ipv6 = newf;
addrconf_disable_change(net, newf);
} else if ((!newf) ^ (!old))
dev_disable_change((struct inet6_dev *)table->extra1);
对于修改all目录下disable_ipv6配置的情况,函数for_each_netdev遍历网络命名空间中的所有接口设备,更新所有设备的disable_ipv6配置值,在新值与旧值不同时,由函数dev_disable_change进行相应处理,见以下介绍。
static void addrconf_disable_change(struct net *net, __s32 newf)
{
struct net_device *dev;
struct inet6_dev *idev;
for_each_netdev(net, dev) {
idev = __in6_dev_get(dev);
if (idev) {
int changed = (!idev->cnf.disable_ipv6) ^ (!newf);
idev->cnf.disable_ipv6 = newf;
if (changed)
dev_disable_change(idev);
}
}
}
对于修改网络接口目录下disable_ipv6配置的情况,根据接口的disable_ipv6配置发送NETDEV_DOWN或者NETDEV_UP通知。
static void dev_disable_change(struct inet6_dev *idev)
{
struct netdev_notifier_info info;
if (!idev || !idev->dev) return;
netdev_notifier_info_init(&info, idev->dev);
if (idev->cnf.disable_ipv6)
addrconf_notify(NULL, NETDEV_DOWN, &info);
else
addrconf_notify(NULL, NETDEV_UP, &info);
}
通知处理
如下通知处理函数,如果在处理NETDEV_UP事件时,发现disable_ipv6配置项被打开,即IPv6又被关闭,结束处理。
static int addrconf_notify(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct inet6_dev *idev = __in6_dev_get(dev);
switch (event) {
case NETDEV_UP:
case NETDEV_CHANGE:
if (idev && idev->cnf.disable_ipv6)
break;
函数addrconf_permanent_addr遍历设备地址列表中,那些设置了IFA_F_PERMANENT标志的永久地址,如果没有设置IFA_F_NOPREFIXROUTE标志,为此地址项添加相应的前缀路由。如果地址项的状态等于INET6_IFADDR_STATE_PREDAD,开启DAD检测。
由上节函数dev_disable_change可知,idev不为空,这里不再新建idev,跳过ipv6_add_dev函数。
if (event == NETDEV_UP) {
addrconf_permanent_addr(net, dev); /* restore routes for permanent addresses */
if (!addrconf_link_ready(dev)) {
/* device is not ready yet. */
pr_debug("ADDRCONF(NETDEV_UP): %s: link is not ready\n", dev->name);
break;
}
if (!idev && dev->mtu >= IPV6_MIN_MTU)
idev = ipv6_add_dev(dev);
if (!IS_ERR_OR_NULL(idev)) {
idev->if_flags |= IF_READY;
run_pending = 1;
}
}
对于以太网类型的设备,函数addrconf_dev_config增加默认的接口本地多播地址路由(addrconf_add_mroute); 生成链路本地地址,并且增加相应的前缀路由,开启链路本地地址的DAD检测。
switch (dev->type) {
...
case ARPHRD_LOOPBACK:
init_loopback(dev);
break;
default:
addrconf_dev_config(dev);
break;
}
函数addrconf_dad_run将遍历接口设备的地址链表,对于临时地址(标志IFA_F_TENTATIVE),并且状态为INET6_IFADDR_STATE_DAD的地址项,开启DAD检测。函数rt6_sync_up遍历网络命名空间中与当前设备相关的路由表项,清除其中的RTNH_F_LINKDOWN标志。
另外,对于在接口DOWN其将,修改了接口MTU值的情况,如果新的MTU值小于IPv6要求的最小值(1280字节),禁止此接口的IPv6功能。参见以下addrconf_ifdown函数。
if (!IS_ERR_OR_NULL(idev)) {
if (run_pending) addrconf_dad_run(idev, false);
/* Device has an address by now */
rt6_sync_up(dev, RTNH_F_DEAD);
/* If the MTU changed during the interface down, when the interface up,
* the changed MTU must be reflected in the idev as well as routers.
*/
if (idev->cnf.mtu6 != dev->mtu && dev->mtu >= IPV6_MIN_MTU) {
rt6_mtu_change(dev, dev->mtu);
idev->cnf.mtu6 = dev->mtu;
}
idev->tstamp = jiffies;
inet6_ifinfo_notify(RTM_NEWLINK, idev);
/*If the changed mtu during down is lower than IPV6_MIN_MTU stop IPv6 on this interface.
*/
if (dev->mtu < IPV6_MIN_MTU)
addrconf_ifdown(dev, dev != net->loopback_dev);
}
break;
如果通知函数接收到的事件为NETDEV_DOWN,由函数addrconf_ifdown清除接口上的所有地址。
case NETDEV_DOWN:
case NETDEV_UNREGISTER:
/* Remove all addresses from this interface.
*/
addrconf_ifdown(dev, event != NETDEV_DOWN);
break;
函数rt6_disable_ip清空路由表项及缓存项。
static int addrconf_ifdown(struct net_device *dev, bool unregister)
{
unsigned long event = unregister ? NETDEV_UNREGISTER : NETDEV_DOWN;
bool keep_addr = false;
rt6_disable_ip(dev, event);
idev = __in6_dev_get(dev);
if (!idev) return -ENODEV;
第一步,如果disable_ipv6为0,即IPv6功能并没有关闭,如以上MTU小于IPv6要求的最小值的情况下,根据命名空间中all配置和设备配置项keep_addr_on_down两者来确定是否要保留接口的永久地址,两者中有一个为真即保留。
/* Step 1: remove reference to ipv6 device from parent device. Do not dev_put!
*/
if (unregister) { ... }
/* combine the user config with event to determine if permanent
* addresses are to be removed from address hash table
*/
if (!unregister && !idev->cnf.disable_ipv6) {
/* aggregate the system setting and interface setting */
int _keep_addr = net->ipv6.devconf_all->keep_addr_on_down;
if (!_keep_addr)
_keep_addr = idev->cnf.keep_addr_on_down;
keep_addr = (_keep_addr > 0);
}
遍历IPv6全局地址链表,停止与当前设备相关地址的DAD检查。如果keep_addr为假,或者当前遍历地址不是永久地址,或者地址为链路本地,或环回地址,从链表中移除此地址项。注意,这里的移除操作,将影响到链表的遍历,需要从新开始一个完整的遍历(restart)。
之后,删除邻居发现协议的RS定时器。清除设备的无状态地址配置(SLAAC)标记。
/* Step 2: clear hash table */
for (i = 0; i < IN6_ADDR_HSIZE; i++) {
struct hlist_head *h = &inet6_addr_lst[i];
spin_lock_bh(&addrconf_hash_lock);
restart:
hlist_for_each_entry_rcu(ifa, h, addr_lst) {
if (ifa->idev == idev) {
addrconf_del_dad_work(ifa);
/* combined flag + permanent flag decide if address is retained on a down event
*/
if (!keep_addr ||
!(ifa->flags & IFA_F_PERMANENT) ||
addr_is_local(&ifa->addr)) {
hlist_del_init_rcu(&ifa->addr_lst);
goto restart;
}
}
}
spin_unlock_bh(&addrconf_hash_lock);
}
write_lock_bh(&idev->lock);
addrconf_del_rs_timer(idev);
/* Step 2: clear flags for stateless addrconf */
if (!unregister) idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
第三步,清空设备的临时地址链表。
/* Step 3: clear tempaddr list */
while (!list_empty(&idev->tempaddr_list)) {
ifa = list_first_entry(&idev->tempaddr_list, struct inet6_ifaddr, tmp_list);
list_del(&ifa->tmp_list);
write_unlock_bh(&idev->lock);
spin_lock_bh(&ifa->lock);
if (ifa->ifpub) {
in6_ifa_put(ifa->ifpub);
ifa->ifpub = NULL;
}
spin_unlock_bh(&ifa->lock);
in6_ifa_put(ifa);
write_lock_bh(&idev->lock);
}
第四步,遍历设备的地址链表,停止DAD检查,如果需要保留永久地址,将地址项的状态修改为PREDAD,如果地址具备DAD功能,将其设置为临时地址(IFA_F_TENTATIVE)。在保留地址的情况下,需要删除地址所对应的路由表项。
list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
struct fib6_info *rt = NULL;
addrconf_del_dad_work(ifa);
keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) && !addr_is_local(&ifa->addr);
write_unlock_bh(&idev->lock);
spin_lock_bh(&ifa->lock);
if (keep) {
/* set state to skip the notifier below */
state = INET6_IFADDR_STATE_DEAD;
ifa->state = INET6_IFADDR_STATE_PREDAD;
if (!(ifa->flags & IFA_F_NODAD))
ifa->flags |= IFA_F_TENTATIVE;
rt = ifa->rt;
ifa->rt = NULL;
} else {
state = ifa->state;
ifa->state = INET6_IFADDR_STATE_DEAD;
}
spin_unlock_bh(&ifa->lock);
if (rt) ip6_del_rt(net, rt, false);
在保留地址的情况下,需要退出ANYCAST组地址,以及本地链路solicited-node组地址,避免错误的响应。否则,无需保留地址时,向上层发送RTM_DELADDR的netlink消息,并且删除接口上的地址项。
之后,在不保留地址的情况下,从地址链表中删除当前地址。
if (state != INET6_IFADDR_STATE_DEAD) {
__ipv6_ifa_notify(RTM_DELADDR, ifa);
inet6addr_notifier_call_chain(NETDEV_DOWN, ifa);
} else {
if (idev->cnf.forwarding)
addrconf_leave_anycast(ifa);
addrconf_leave_solict(ifa->idev, &ifa->addr);
}
write_lock_bh(&idev->lock);
if (!keep) {
list_del_rcu(&ifa->if_list);
in6_ifa_put(ifa);
}
}
write_unlock_bh(&idev->lock);
第五步,禁止多播地址相关配置。
/* Step 5: Discard anycast and multicast list */
if (unregister) {
ipv6_ac_destroy_dev(idev);
ipv6_mc_destroy_dev(idev);
} else {
ipv6_mc_down(idev);
}
idev->tstamp = jiffies;
disable_ipv6判断
在用户层添加IPv6地址时,如果设备(ens33)的配置项disable_ipv6为真,配置失败。
# ip address add 2010::2585 dev ens33
static struct inet6_ifaddr * ipv6_add_addr(struct inet6_dev *idev,
struct ifa6_config *cfg, bool can_block, struct netlink_ext_ack *extack)
{
int addr_type = ipv6_addr_type(cfg->pfx);
struct net *net = dev_net(idev->dev);
...
if (idev->cnf.disable_ipv6) {
err = -EACCES;
goto out;
}
在配置IPv6地址,或者生产链路本地地址时,函数addrconf_add_dev都将判断接口配置的disable_ipv6值。
static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
{
struct inet6_dev *idev;
idev = ipv6_find_idev(dev);
if (IS_ERR(idev))
return idev;
if (idev->cnf.disable_ipv6)
return ERR_PTR(-EACCES);
在DAD检查到地址冲突后,如果为基于MAC地址依据EUI64所生成的IPv6地址,这里将设备配置disable_ipv6设置为真,禁用接口的IPv6功能,并且停用接口上的地址等相关功能。
static void addrconf_dad_work(struct work_struct *w)
{
bool bump_id, disable_ipv6 = false;
...
} else if (ifp->state == INET6_IFADDR_STATE_ERRDAD) {
action = DAD_ABORT;
ifp->state = INET6_IFADDR_STATE_POSTDAD;
if ((dev_net(idev->dev)->ipv6.devconf_all->accept_dad > 1 ||
idev->cnf.accept_dad > 1) &&!idev->cnf.disable_ipv6 &&
!(ifp->flags & IFA_F_STABLE_PRIVACY)) {
addr.s6_addr32[0] = htonl(0xfe800000); addr.s6_addr32[1] = 0;
if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) &&
ipv6_addr_equal(&ifp->addr, &addr)) {
idev->cnf.disable_ipv6 = 1; /* DAD failed for link-local based on MAC */
pr_info("%s: IPv6 being disabled!\n", ifp->idev->dev->name);
disable_ipv6 = true;
....
} else if (action == DAD_ABORT) {
in6_ifa_hold(ifp);
addrconf_dad_stop(ifp, 1);
if (disable_ipv6) addrconf_ifdown(idev->dev, false);
goto out;
在用户层手动添加邻居地址时,如果disable_ipv6为真,不允许添加。
$ ip neigh add 2080::1122 lladdr ec:d6:8a:0f:2f:99 dev ens33
static bool ndisc_allow_add(const struct net_device *dev, struct netlink_ext_ack *extack)
{
struct inet6_dev *idev = __in6_dev_get(dev);
if (!idev || idev->cnf.disable_ipv6) {
NL_SET_ERR_MSG(extack, "IPv6 is disabled on this device");
return false;
}
return true;
}
static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack)
{
...
if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
err = -EINVAL;
goto out;
}
在用户层添加nexthop时,内核将验证下一跳设备(ens33)是否开启了IPv6,只有开启状态下,才能成功添加。
$ ip nexthop add id 3 via 2080::11ff dev ens33
$ ip -6 address add 2080::ea2e:27ff dev ens33
int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack)
{
...
if (idev->cnf.disable_ipv6) {
NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
err = -EACCES;
goto out;
}
在创建VXLAN设备时,对于IPv6地址,如果底层设备没有开启IPv6,配置将出错。
$ ip link add vxlan1 type vxlan id 43 group ff02::11ff local 3301::22 dev ens32 dstport 4789
static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf,
struct net_device **lower, struct vxlan_dev *old, struct netlink_ext_ack *extack)
{
if (conf->remote_ifindex) {
struct net_device *lowerdev;
lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex);
...
#if IS_ENABLED(CONFIG_IPV6)
if (use_ipv6) {
struct inet6_dev *idev = __in6_dev_get(lowerdev);
if (idev && idev->cnf.disable_ipv6) {
NL_SET_ERR_MSG(extack, "IPv6 support disabled by administrator");
return -EPERM;
IPv6发送路径
如下函数ip6_output,如果disable_ipv6为真,不执行发送操作,释放skb。
int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev;
if (unlikely(idev->cnf.disable_ipv6)) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
return 0;
}
IPv6接收函数
如下接收函数ip6_rcv_core,如果接收设备的配置项disable_ipv6位置,丢弃报文。
static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, struct net *net)
{
const struct ipv6hdr *hdr;
...
idev = __in6_dev_get(skb->dev);
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL ||
!idev || unlikely(idev->cnf.disable_ipv6)) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
goto drop;
}
IPv6模块参数
内核IPv6模块,在加载时可指定disable和disable_ipv6两个参数,后者对应于以上的disable_ipv6的默认值,如果为真,将在所有接口上禁用IPv6功能。前者disable为真,将禁用IPv6模块,如果后续要使能ipv6模块,需要重启机器。
struct ipv6_params ipv6_defaults = {
.disable_ipv6 = 0,
.autoconf = 1,
};
static int disable_ipv6_mod;
module_param_named(disable, disable_ipv6_mod, int, 0444);
MODULE_PARM_DESC(disable, "Disable IPv6 module such that it is non-functional");
module_param_named(disable_ipv6, ipv6_defaults.disable_ipv6, int, 0444);
MODULE_PARM_DESC(disable_ipv6, "Disable IPv6 on all interfaces");
bool ipv6_mod_enabled(void)
{
return disable_ipv6_mod == 0;
}
在IPv6模块初始化开始时,如果判断到disable_ipv6_mod为真,将跳过初始化代码。
static int __init inet6_init(void)
{
struct list_head *r;
sock_skb_cb_check_size(sizeof(struct inet6_skb_parm));
/* Register the socket-side information for inet6_create. */
for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
INIT_LIST_HEAD(r);
if (disable_ipv6_mod) {
pr_info("Loaded, but administratively disabled, reboot required to enable\n");
goto out;
}
在创建VRF模块的IPv6路由表时,如果ipv6模块没有使能,不进行创建,返回0。
static int vrf_rt6_create(struct net_device *dev)
{
int flags = DST_NOPOLICY | DST_NOXFRM;
struct net_vrf *vrf = netdev_priv(dev);
/* IPv6 can be CONFIG enabled and then disabled runtime */
if (!ipv6_mod_enabled())
return 0;
在添加IPv6单播或者组播路由策略时,也需要进行IPv6模块是否使能的判断。
static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it)
{
struct fib_rule_hdr *frh;
struct nlmsghdr *nlh;
if ((family == AF_INET6 || family == RTNL_FAMILY_IP6MR) &&
!ipv6_mod_enabled())
return 0;
在ip6隧道模块初始化时,首先判断ipv6模块是否使能,否则,不进行初始化。
static int __init ip6_tunnel_init(void)
{
if (!ipv6_mod_enabled())
return -EOPNOTSUPP;
err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET);
IPv6相关的还有其它模块,并没有进行ipv6_mod_enabled相关判断。
内核版本 5.10