1.介绍
NETLINK Kernel-user communication protocol;
Netlink Messages and Attributes Interface;
Netlink主要用于用户空间与内核之间的通信;它是一种面向数据报的服务;
在用户空间使用标准Socket接口即可,而内核空间则使用内部API接口;
2.协议
2.1消息格式
2.2 宏、结构体定义说明
/*netlink支持的协议:include/uapi/linux/netlink.h*/
#define NETLINK_ROUTE 0 /* Routing/device hook */
#define NETLINK_UNUSED 1 /* Unused number */
#define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */
#define NETLINK_FIREWALL 3 /* Unused number, formerly ip_queue */
#define NETLINK_SOCK_DIAG 4 /* socket monitoring */
#define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */
#define NETLINK_XFRM 6 /* ipsec */
#define NETLINK_SELINUX 7 /* SELinux event notifications */
#define NETLINK_ISCSI 8 /* Open-iSCSI */
#define NETLINK_AUDIT 9 /* auditing */
#define NETLINK_FIB_LOOKUP 10
#define NETLINK_CONNECTOR 11
#define NETLINK_NETFILTER 12 /* netfilter subsystem */
#define NETLINK_IP6_FW 13
#define NETLINK_DNRTMSG 14 /* DECnet routing messages */
#define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */
#define NETLINK_GENERIC 16
/* leave room for NETLINK_DM (DM Events) */
#define NETLINK_SCSITRANSPORT 18 /* SCSI Transports */
#define NETLINK_ECRYPTFS 19
#define NETLINK_RDMA 20
#define NETLINK_CRYPTO 21 /* Crypto layer */
#define NETLINK_SMC 22 /* SMC monitoring */
#define NETLINK_INET_DIAG NETLINK_SOCK_DIAG
/*netlink相关的响应状态:include/uapi/linux/netlink.h*/
#define NLMSG_NOOP 0x1 /* Nothing. */
#define NLMSG_ERROR 0x2 /* Error */
#define NLMSG_DONE 0x3 /* End of a dump */
#define NLMSG_OVERRUN 0x4 /* Data lost */
/*netlink消息类型划分:include/uapi/linux/netlink.h*/
#define NLM_F_REQUEST 0x01 /* It is request message. */
#define NLM_F_MULTI 0x02 /* Multipart message, terminated by NLMSG_DONE */
#define NLM_F_ACK 0x04 /* Reply with ack, with zero or error code */
#define NLM_F_ECHO 0x08 /* Echo this request */
#define NLM_F_DUMP_INTR 0x10 /* Dump was inconsistent due to sequence change */
#define NLM_F_DUMP_FILTERED 0x20 /* Dump was filtered as requested */
/* Modifiers to GET request */
#define NLM_F_ROOT 0x100 /* specify tree root */
#define NLM_F_MATCH 0x200 /* return all matching */
#define NLM_F_ATOMIC 0x400 /* atomic GET */
#define NLM_F_DUMP (NLM_F_ROOT|NLM_F_MATCH)
/* Modifiers to NEW request */
#define NLM_F_REPLACE 0x100 /* Override existing */
#define NLM_F_EXCL 0x200 /* Do not touch, if it exists */
#define NLM_F_CREATE 0x400 /* Create, if it does not exist */
#define NLM_F_APPEND 0x800 /* Add to end of list */
/* Modifiers to DELETE request */
#define NLM_F_NONREC 0x100 /* Do not delete recursively */
...
/*文件中定义网络相关的类型:include/uapi/linux/rtnetlink.h*/
enum {
RTM_NEWLINK = 16,
RTM_DELLINK,
RTM_GETLINK,
RTM_SETLINK,
RTM_NEWADDR = 20,
RTM_DELADDR,
RTM_GETADDR,
RTM_NEWROUTE = 24,
RTM_DELROUTE,
RTM_GETROUTE,
...
};
Group types values for NETLINK_ROUTE
/*文件中定义网络相关的组类型:include/uapi/linux/rtnetlink.h*/
/* RTnetlink multicast groups */
enum rtnetlink_groups {
RTNLGRP_NONE,
RTNLGRP_LINK,
RTNLGRP_NOTIFY,
RTNLGRP_NEIGH,
RTNLGRP_TC,
RTNLGRP_IPV4_IFADDR,
RTNLGRP_IPV4_MROUTE,
RTNLGRP_IPV4_ROUTE,
RTNLGRP_IPV4_RULE,
RTNLGRP_IPV6_IFADDR,
RTNLGRP_IPV6_MROUTE,
RTNLGRP_IPV6_ROUTE,
RTNLGRP_IPV6_IFINFO,
...
};
注意:(Group values of NETLINK_ROUTE)、(Types values of NETLINK_ROUTE)是实现网络相关的netlink接口。另外还有SELINUX\NETFILTER\ipsec等接口。
/*socket结构体用于创建一个socket*/
struct sockaddr_nl {
__kernel_sa_family_t nl_family; /* AF_NETLINK */
unsigned short nl_pad; /* zero */
__u32 nl_pid; /* port ID */
__u32 nl_groups; /* multicast groups mask */
};
nl_family: 协议族,AF_NETLINK / PF_NETLINK
nl_pid: 手动指定端口ID或者不设置交由系统给分配,一般使用系统分配,可通过getsockname函数获取到系统分配的nl_pid。端口ID类似于网络的端口号或者身份证号码的东东,有它才能有效的通信来往。
nl_groups:如果要监控内核某些子系统的状态,需要加入到指定的组(Group values of NETLINK_ROUTE列表)。如需要关心网络方面的动向,可以加入RTNLGRP_LINK(网卡状态)、RTNLGRP_IPV4_ROUTE(路由信息)、RTNLGRP_IPV4_IFADDR(IP地址状态)组。
/*netlink相关的结构体:include/uapi/linux/netlink.h*/
struct nlmsghdr {
__u32 nlmsg_len; /* Length of message including header */
__u16 nlmsg_type; /* Message content */
__u16 nlmsg_flags; /* Additional flags */
__u32 nlmsg_seq; /* Sequence number */
__u32 nlmsg_pid; /* Sending process port ID */
};
nlmsg_len: 整个消息报文长度,包含头部长度
nlmsg_type: 请求类型/响应状态;
- 请求时作为类型,网络相关数据则对应的(Types values of NETLINK_ROUTE),另外需要别的子系统接口信息,可查询内核源文件或者文档
- 响应时作为状态,查看(Types Values列表)
nlmsg_flags:消息类型,可设置多个选项(Flags values列表),建议根据涉及子系统而设置
nlmsg_seq:消息序列号,用于消息的先后顺序,在netlink协议中该字段暂未被处理,但凡事定义出来的字段都有它意义的存在,建议处理。另外该字段可校验回应的报文是否属于本次请求
nlmsg_pid:发送者的端口ID,类似于身份证号码,对于内核的端口ID为0,而用户进程则在创建Socket时指定(bind函数会进行设置)。另外该字段可校验回应的报文是否属于本端socket
3.用户空间的使用
注意:在用户空间使用标准Socket API且创建Socket时,只能是SOCK_RAW或者SOCK_DGRAM类型(内核接口的限制);
3.1打印路由表的案例(见代码注释、报文整体是按 2.1消息格式)
/*
打印路由信息函数
nlmsghdr 回应的报文信息,参照 2.1消息格式
len 报文信息大小
*/
void print_route_table(struct nlmsghdr *n, int len)
{
/*报文由多个nlmsghdr结构组成 -> nlmsghdr header部分*/
for (; NLMSG_OK(n, len); n = NLMSG_NEXT(n, len)) {
/*事先定义好与路由相关的属性结构rtattr -> 等同于nlattr结构 */
struct rtattr *tb[RTA_MAX+1] = {NULL};
/*路由相关的rtmsg结构:条目的基本信息 -> family header部分*/
struct rtmsg *familyhdr = (struct rtmsg*)NLMSG_DATA(n);
/*路由条目具有的属性值:src ip, dst ip, metric, dev, table id, gateway等以及条目中包含多少个属性值*/
struct rtattr *attrs = RTM_RTA(familyhdr);
int attrs_len = NLMSG_LENGTH(sizeof(*familyhdr));
char buf[256] = {0};
int buflen = sizeof(buf);
/*过滤条件:只要ipv4/ipv6的条目,以及主路由表信息。*/
/*if (AF_INET != familyhdr->rtm_family && AF_INET6 != familyhdr->rtm_family) continue;*/
/*if (familyhdr->rtm_table != RT_TABLE_MAIN) {
continue;
}*/
/*each attrubte save to tb-list*/
/*把报文中的属性整理成数组,方便处理*/
while(RTA_OK(attrs, attrs_len)) {
if (attrs->rta_type <= RTA_MAX) {
tb[attrs->rta_type] = attrs;
}
attrs = RTA_NEXT(attrs, len);
}
/*下面就是打印该条目中带有的属性值,以下只输出常用的属性*/
if (tb[RTA_DST]) {
printf("%s/%u ", inet_ntop(familyhdr->rtm_family, RTA_DATA(tb[RTA_DST]), buf, buflen),
familyhdr->rtm_dst_len);
} else {
printf("default ");
}
if(tb[RTA_SRC]) {
printf("from %s/%u ", inet_ntop(familyhdr->rtm_family, RTA_DATA(tb[RTA_SRC]), buf, buflen),
familyhdr->rtm_src_len);
} else if(familyhdr->rtm_src_len) {
printf("from 0/%u ", familyhdr->rtm_src_len);
}
if (tb[RTA_GATEWAY]) {
printf("via %s ", inet_ntop(familyhdr->rtm_family, RTA_DATA(tb[RTA_GATEWAY]), buf, buflen));
}
if (tb[RTA_OIF]) {
char ifname[32] = {0};
if_indextoname((*(int*)RTA_DATA(tb[RTA_OIF])), ifname);
printf("dev %s ",ifname);
}
if (tb[RTA_PRIORITY]) {
printf("metric %u ", *(uint32_t *)RTA_DATA(tb[RTA_PRIORITY]));
}
if (tb[RTA_TABLE]) {
printf("table %u ", *(uint32_t *)RTA_DATA(tb[RTA_TABLE]));
}
printf("\n");
}
}
int main()
{
int seq = 100;
int sock = -1;
struct sockaddr_nl sa = {0,};
struct msghdr msg = {NULL,};
struct iovec iov = {NULL,};
char buf[4092] = {0};
struct nlmsghdr *nh = NULL;
int len = 0;
int ret = 0;
/*mark for get route table of request pkg*/
/*请求路由的报文结构*/
struct {
struct nlmsghdr n;
/*类似于请求参数,对应具体的请求结构。没有特殊过滤可以使用公共请求结构struct rtgenmsg,里面只有一个属性rtgen_family*/
struct rtmsg r;
} req = {
.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)), /*大小包括 nlmsg header + payload*/
.n.nlmsg_type = RTM_GETROUTE, /*请求类型:RTM_GETROUTE, 请求其它类型:RTM_GETLINK、RTM_GETADDR*/
.n.nlmsg_flags = NLM_F_ROOT|NLM_F_REQUEST,
.n.nlmsg_seq = ++seq, /*请求序号,可以用作检验回应报文是否属于本次请求*/
.r.rtm_family = AF_INET /*指定路由的协议族: NETLINK_ROUTE对应有AF_INET、AF_INET6和AF_UNSPEC。AF_UNPSEC会把NETLINK_ROUTE的协议族都请求,相当于默认请求*/
};
/*create socket*/
sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);/*创建AF_NETLINK协议族下netlink_route的套接字*/
if (sock < 0) {
printf("create netlink socket: %s\n", strerror(errno));
return -ENOENT;
}
/*bind socket to netlink*/
sa.nl_family = AF_NETLINK;/*要设置套接字的协议族为AF_NETLINK,不然会找到对应NETLINK模块处理*/
if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {/*绑定NETLINK相关信息到套接字,此处没有设置nl_pid,交由内核分配即可*/
printf("bind to netlink: %s\n", strerror(errno));
close(sock);
return -ENOENT;
}
/*make to raw packet for socket*/
iov.iov_base = (void *)&req.n;
iov.iov_len = req.n.nlmsg_len;
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
/*nl_pid=0,发送给内核处理, 不设置默认发给内核*/
/*msg.msg_name = &sa;
msg.msg_namelen = sizeof(sa);*/
/*send to netlink module*/
ret = sendmsg(sock, &msg, 0);
if (ret < 0) {
printf("send to netlink: %s\n", strerror(errno));
close(sock);
return -EIO;
}
/*receive from netlink module*/
/*读取回应报文,读到NLMSG_DONE消息或者IO失败为止*/
char * buf_ptr = buf;
len = 0;
while (1) {
ret = recv(sock, buf_ptr, sizeof(buf) - len, 0);
if (ret < 0) {
printf("recv from netlink: %s\n", strerror(ret));
close(sock);
return -EIO;
}
nh = buf_ptr;
if (NLMSG_DONE == nh->nlmsg_type) {
/* printf("NLMSG DONW: %d,0x%08X\n", nh->nlmsg_type, nh->nlmsg_type);*/
break;
}
buf_ptr += ret;
len += ret;
}
/*转换成netlink报文: [nlmsghdr][payload][nlmsghdr][payload]...*/
nh = (struct nlmsghdr *)buf;
/*printf("read buffer %p, len %d\n", nh, len);*/
/*output route table info*/
/*输出路由条目*/
print_route_table(nh, len);
/*用完记住:释放资源哦,养成好习惯*/
close(sock);
return 0;
}
3.2 输出路由表的结果
default via 192.168.47.2 dev eno16777736 metric 100 table 254
172.17.0.0/16 dev docker0 table 254
192.168.47.0/24 dev eno16777736 metric 100 table 254
192.168.122.0/24 dev virbr0 table 254
127.0.0.0/32 dev lo table 255
127.0.0.0/8 dev lo table 255
127.0.0.1/32 dev lo table 255
127.255.255.255/32 dev lo table 255
172.17.0.0/32 dev docker0 table 255
172.17.0.1/32 dev docker0 table 255
172.17.255.255/32 dev docker0 table 255
3.3 编译环境要求
linux 3.10.0(2.6以上支持NETLINK)
gcc version 4.8.5
—越简单,易接受。在折腾路上…