参考地址:http://blog.csdn.net/xiabodan/article/details/53766927
通过debug我们可以发现,用户空间总共为当前的内核设置了3个路由项如下:
android\system\netd\server\RouteController.cpp ---- modifyRoute
01-30 15:03:55.825 448 4288 E Netd : Jon interface = wlan0,destination = fe80::/64,nexthop = (null),tableType = 1022
01-30 15:03:55.827 448 4288 E Netd : Jon interface = wlan0,destination = 192.168.22.0/24,nexthop = (null),tableType = 1022
01-30 15:03:55.829 448 4288 E Netd : Jon interface = wlan0,destination = 0.0.0.0/0,nexthop = 192.168.22.1,tableType = 1022
RT_TABLE_UNSPEC 0 //未指定的路由表
RT_TABLE_DEFAULT 253 //默认的表(default table)
RT_TABLE_MAIN 254 //主表(main table)
RT_TABLE_LOCAL 255 //本地表(local table)
那么我们怎么在文件系统中查看到路由表项呢:
注意上面红色标注的内容,这是一个默认路由,也就说当我们无法从路由表中找到合适的路由选项的时候,内核会选中这个也就是说数据报文会通过wlan0送出,下一跳的IP地址是192.168.22.1.结合我当前的应用目的IP地址是192.168.5.174,这个IP地址无法在当前路由表中的其他可选项中匹配上,因此内核会使用默认路由送出。
当然如果我们知道了路由表的表号也可以直接找到它如下:
Android下路由的详细设置过程:
android\system\netd\server\RouteController.cpp
modifyRoute
action:RTM_NEWROUTE
table: 路由表的索引,当前值为1022
interface:wlan0
destination:0.0.0.0/0 ---代表默认路由
nexthop:下一跳地址 ---当前值为192.168.22.1
modifyIpRoute(action, table, interface, destination, nexthop)
sendNetlinkRequest
这里务必注意最后一个参数,它会决定内核中netlink的接收函数如下:
static int __net_init rtnetlink_net_init(struct net *net)
{
struct sock *sk;
struct netlink_kernel_cfg cfg = {
.groups = RTNLGRP_MAX,
.input = rtnetlink_rcv,
.cb_mutex = &rtnl_mutex,
.flags = NL_CFG_F_NONROOT_RECV,
};
sk = netlink_kernel_create(net, NETLINK_ROUTE, &cfg);
if (!sk)
return -ENOMEM;
net->rtnl = sk;
return 0;
}
int sock = socket(AF_NETLINK, SOCK_DGRAM | SOCK_CLOEXEC, NETLINK_ROUTE);
connect(sock,...)
writev(sock, iov, iovlen) != -1
ret = recv(sock, &response, sizeof(response), 0)
这里我们详细分析分析下 modifyIpRoute这个函数
WARN_UNUSED_RESULT int modifyIpRoute(uint16_t action,...)
{
/*destination的IP地址转换后的address --- 当前的ip是0.0.0.0*/
uint8_t rawAddress[sizeof(in6_addr)];
/*返回的ip类型,我们当前是ipv4的地址,则返回的是AF_INET*/
uint8_t family;
/*返回的是IP掩码的长度,当前是0,该值是“0.0.0.0/0”的'/'之后的一位,同时也是最后一位*/
uint8_t prefixLength;
//rawLength当前返回的值为4,注:ipv4返回的值都是4,ipv6返回的值都是16
int rawLength = parsePrefix(destination, &family, rawAddress, sizeof(rawAddress),
&prefixLength);
uint8_t type = RTN_UNICAST;
uint32_t ifindex;
//rawNexthop对应的是下一跳IP地址转换后的地址值,当前的下一跳地址是:192.168.22.1
uint8_t rawNexthop[sizeof(in6_addr)];
if (nexthop && !strcmp(nexthop, "unreachable")) {
...
} else if (nexthop && !strcmp(nexthop, "throw")) {
...
} else {
//如果下一跳的地址不为空,那么转换下一跳的地址
if (nexthop && inet_pton(family, nexthop, rawNexthop) <= 0) {
return -EINVAL;
}
}
//设置路由
rtmsg route = {
.rtm_protocol = RTPROT_STATIC,
.rtm_type = type,
.rtm_family = family,
.rtm_dst_len = prefixLength,
.rtm_scope = static_cast<uint8_t>(nexthop ? RT_SCOPE_UNIVERSE : RT_SCOPE_LINK),
};
//设置路由的目的地址和网管的IP地址长度,针对IPV4而言都是4个字节
rtattr rtaDst = { U16_RTA_LENGTH(rawLength), RTA_DST };
rtattr rtaGateway = { U16_RTA_LENGTH(rawLength), RTA_GATEWAY };
//提交给内核的信息如下
iovec iov[] = {
{ NULL, 0 },
//路由参数
{ &route, sizeof(route) },
//路由表
{ &RTATTR_TABLE, sizeof(RTATTR_TABLE) },
{ &table, sizeof(table) },
//路由目的地址大小
{ &rtaDst, sizeof(rtaDst) },
//路由目的IP地址转换后的值
{ rawAddress, static_cast<size_t>(rawLength) },
//路由输出接口
{ &RTATTR_OIF, interface != OIF_NONE ? sizeof(RTATTR_OIF) : 0 },
{ &ifindex, interface != OIF_NONE ? sizeof(ifindex) : 0 },
//路由网关设置
{ &rtaGateway, nexthop ? sizeof(rtaGateway) : 0 },
//下一跳IP地址转换后的值
{ rawNexthop, nexthop ? static_cast<size_t>(rawLength) : 0 },
};
//对于路由添加而言,该值为NETLINK_CREATE_REQUEST_FLAGS
uint16_t flags = (action == RTM_NEWROUTE) ? NETLINK_CREATE_REQUEST_FLAGS :
NETLINK_REQUEST_FLAGS;
return sendNetlinkRequest(action, flags, iov, ARRAY_SIZE(iov));
}
最后参数是通过sendNetlinkRequest->writev下发到内核的,然而writev是一个系统调用,我们看下这个系统是怎么执行的
kernel\fs\Read_write.c
SYSCALL_DEFINE3(writev, unsigned long, fd,...)
ret = vfs_writev(f.file, vec, vlen, &pos);
do_readv_writev
do_sync_readv_writev
sock_aio_write
do_sock_write
msg->msg_name = NULL;
msg->msg_namelen = 0;
msg->msg_control = NULL;
msg->msg_controllen = 0;
msg->msg_iov= (struct iovec *)iov;
msg->msg_iovlen = nr_segs;
__sock_sendmsg(iocb, sock, msg, size)
__sock_sendmsg_nosec
sock->ops->sendmsg(iocb, sock, msg, size)
毫无疑问这里需要知道sock->ops到底指向何处
kernel\net\netlink\Af_netlink.c
static const struct net_proto_family netlink_family_ops = {
.family = PF_NETLINK,
.create = netlink_create,
.owner = THIS_MODULE, /* for consistency 8) */
};
netlink_proto_init
sock_register(&netlink_family_ops)
//在用户态的socket创建的时候会调用这儿的netlink_create
netlink_create
__netlink_create
sock->ops = &netlink_ops;
因此问题回到了netlink_ops中static const struct proto_ops netlink_ops = {
.family = PF_NETLINK,
.owner = THIS_MODULE,
.release = netlink_release,
.bind = netlink_bind,
.connect = netlink_connect,
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = netlink_getname,
.poll = netlink_poll,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = netlink_setsockopt,
.getsockopt = netlink_getsockopt,
.sendmsg = netlink_sendmsg,
.recvmsg = netlink_recvmsg,
.mmap = netlink_mmap,
.sendpage = sock_no_sendpage,
};
因此sock->ops->sendmsg指向的就是netlink_sendmsg
netlink_sendmsg
构造一个skb
if (msg->msg_namelen) {
....
}else{
这里要留意一下下面2个参数都被赋值为0了,原因是在用户态connect函数传进去的地址信息设置的如下:
const sockaddr_nl NETLINK_ADDRESS = {AF_NETLINK, 0, 0, 0};就是最后的2个参数
dst_portid = nlk->dst_portid; //这里的portid注意下如果这个值为0说明是发现内核的,而我们这儿这条消息正是发给内核的
dst_group = nlk->dst_group;
}
skb = alloc_skb(len, GFP_KERNEL);
我们知道这里的msg->msg_iov正是我们用户态发送到内核的数据,现在将这部分数据拷贝到skb的数据域中
if (memcpy_fromiovec(skb_put(skb, len),msg->msg_iov, len)) {
kfree_skb(skb);
goto out;
}
将skb发送出去
err =netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT);
netlink_unicast
sk = netlink_getsockbyportid(ssk, portid);
通过port号查找sock结构,这里的portid为0
->sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid)
if (netlink_is_kernel(sk))
return netlink_unicast_kernel(sk, skb, ssk);
那么既然上面要查找sock了,那肯定之前有地方插入sock,不然这儿怎么找的到呢。我们看看
kernel\net\core\Rtnetlink.c
static int __net_init rtnetlink_net_init(struct net *net)
{
struct sock *sk;
struct netlink_kernel_cfg cfg = {
.groups = RTNLGRP_MAX,
//这里的rtnetlink_rcv要重点关注
.input = rtnetlink_rcv,
.cb_mutex = &rtnl_mutex,
.flags = NL_CFG_F_NONROOT_RECV,
};
sk = netlink_kernel_create(net, NETLINK_ROUTE, &cfg);
if (!sk)
return -ENOMEM;
net->rtnl = sk;
return 0;
}
netlink_kernel_create
->__netlink_kernel_create
struct sock *
__netlink_kernel_create(struct net *net, int unit, struct module *module,
struct netlink_kernel_cfg *cfg)
{
struct socket *sock;
struct sock *sk;
struct netlink_sock *nlk;
struct listeners *listeners = NULL;
struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL;
unsigned int groups;
if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
return NULL;
if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
goto out_sock_release_nosk;
sk = sock->sk;
sk_change_net(sk, net);
listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
if (!listeners)
goto out_sock_release;
sk->sk_data_ready = netlink_data_ready;
//这里就是socket接收函数的位置了,特别留意
if (cfg && cfg->input)
nlk_sk(sk)->netlink_rcv = cfg->input;
nlk = nlk_sk(sk);
//设置flags,NETLINK_KERNEL_SOCKET这个标志意味着,如果用户态的消息是发往内核的那么是在这儿进行接收
nlk->flags |= NETLINK_KERNEL_SOCKET;
//啦啦啦,看见了吗,我们终于看见了插入sock插入的位置了,最后一个参数portid为0,和之前发送的地方是匹配的
if (netlink_insert(sk, net, 0))
goto out_sock_release;
...
}
OK,再看一个函数
static inline int netlink_is_kernel(struct sock *sk)
{
return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
}
所以portid为0的时候,
netlink_unicast最终会触发netlink_unicast_kernel
static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
struct sock *ssk)
{
...
if (nlk->netlink_rcv != NULL) {
ret = skb->len;
netlink_skb_set_owner_r(skb, sk);
NETLINK_CB(skb).sk = ssk;
函数最终调用到netlink_rcv函数指针,也就是cfg->input,即rtnetlink_rcv
nlk->netlink_rcv(skb);
consume_skb(skb);
} else {
kfree_skb(skb);
}
sock_put(sk);
return ret;
}
那么代码流程最终走向路由的netlink接收函数
rtnetlink_rcv,而
rtnetlink_rcv会调用
netlink_rcv_skbrtnetlink_rcv
netlink_rcv_skb(skb, & rtnetlink_rcv_msg);
int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
struct nlmsghdr *))
{
struct nlmsghdr *nlh;
int err;
...
//取出skb的头部信息,实际上头部只包含了3条信息[sendNetlinkRequest --RouteController.cpp]
//nlh->nlmsg_type: RTM_NEWROUTE
//nlh->nlmsg_flags: NETLINK_CREATE_REQUEST_FLAGS
//nlh->nlmsg_len: 用户态发送的数据总长度,包含头部大小
nlh = nlmsg_hdr(skb);
err = cb(skb, nlh); ---cb指向rtnetlink_rcv_msg
...
}
ok,我们继续分析
rtnetlink_rcv_msg
static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
rtnl_doit_func doit;
int sz_idx, kind;
int family;
int type;
type = nlh->nlmsg_type;
//type: 8
type -= RTM_BASE;
//指向的是用户空间iovec iov[]的第2个参数,也就是路由参数的第一个字节,即:AF_INET
//family: AF_INET
family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
//sz_idx: 2
sz_idx = type>>2;
//kind: 0
kind = type&3;
...
//这里是通过family:AF_INET和type: 8查找doit
doit = rtnl_get_doit(family, type);
//call it,这里的doit就是inet_rtm_newroute
return doit(skb, nlh);
}
kernel\net\ipv4\Fib_frontend.c
ip_fib_init
->rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
//路由配置
struct fib_config cfg;
//为每一个新加入的路由创建一个路由表
struct fib_table *tb;
int err;
//将用户空间的属性信息和路由信息拷贝到cfg指针中
err = rtm_to_fib_config(net, skb, nlh, &cfg);
if (err < 0)
goto errout;
tb = fib_new_table(net, cfg.fc_table);
if (tb == NULL) {
err = -ENOBUFS;
goto errout;
}
err = fib_table_insert(tb, &cfg);
errout:
return err;
}
rtm_to_fib_config:
static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
struct nlmsghdr *nlh, struct fib_config *cfg)
{
struct nlattr *attr;
int err, remaining;
struct rtmsg *rtm;
err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
memset(cfg, 0, sizeof(*cfg));
//取出用户空间设置的路由参数,赋值给cfg指针
rtm = nlmsg_data(nlh);
cfg->fc_dst_len = rtm->rtm_dst_len;
cfg->fc_tos = rtm->rtm_tos;
cfg->fc_table = rtm->rtm_table;
cfg->fc_protocol = rtm->rtm_protocol;
cfg->fc_scope = rtm->rtm_scope;
cfg->fc_type = rtm->rtm_type;
cfg->fc_flags = rtm->rtm_flags;
//取出netlink头部flags: NETLINK_CREATE_REQUEST_FLAGS
cfg->fc_nlflags = nlh->nlmsg_flags;
//设置端口号为0
cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
cfg->fc_nlinfo.nlh = nlh;
cfg->fc_nlinfo.nl_net = net;
//逐个取出用户空间IOV中的属性信息,并赋值给cfg中
nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
switch (nla_type(attr)) {
case RTA_DST:
cfg->fc_dst = nla_get_be32(attr);
break;
case RTA_OIF:
cfg->fc_oif = nla_get_u32(attr);
break;
case RTA_GATEWAY:
cfg->fc_gw = nla_get_be32(attr);
break;
case RTA_PRIORITY:
cfg->fc_priority = nla_get_u32(attr);
break;
case RTA_PREFSRC:
cfg->fc_prefsrc = nla_get_be32(attr);
break;
case RTA_METRICS:
cfg->fc_mx = nla_data(attr);
cfg->fc_mx_len = nla_len(attr);
break;
case RTA_MULTIPATH:
cfg->fc_mp = nla_data(attr);
cfg->fc_mp_len = nla_len(attr);
break;
case RTA_FLOW:
cfg->fc_flow = nla_get_u32(attr);
break;
case RTA_TABLE:
cfg->fc_table = nla_get_u32(attr);
break;
}
}
return 0;
}
fib_new_table:
struct fib_table *fib_new_table(struct net *net, u32 id)
{
struct fib_table *tb;
unsigned int h;
if (id == 0)
id = RT_TABLE_MAIN;
//如果用户空间下发的id不是255或者254,那么需要重新创建一个tb,并将
//tb->tb_id = id,这里是将它赋值为1022
tb = fib_get_table(net, id);
if (tb)
return tb;
//如果用户空间下发的id不是255或者254,那么需要重新创建一个tb,并将
//tb->tb_id = id,这里是将它赋值为1022
tb = fib_trie_table(id);
if (!tb)
return NULL;
//如果是默认表,主表,本地表需要特别的处理
switch (id) {
case RT_TABLE_LOCAL:
net->ipv4.fib_local = tb;
break;
case RT_TABLE_MAIN:
net->ipv4.fib_main = tb;
break;
case RT_TABLE_DEFAULT:
net->ipv4.fib_default = tb;
break;
default:
break;
}
//将我们的新创建的tb表插入到对应的fib_table_hash中,我们这儿是将1022插入到了
//254的主表中,但是需要特别注意的是tb->tb_id = 1022而不是254.
h = id & (FIB_TABLE_HASHSZ - 1);
hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
return tb;
}
再之后关于路由的插入请参考:
trie路由--基于Linux3.10