三唔识七的NETLINK

1.介绍

NETLINK Kernel-user communication protocol;
Netlink Messages and Attributes Interface;
Netlink主要用于用户空间与内核之间的通信;它是一种面向数据报的服务;
在用户空间使用标准Socket接口即可,而内核空间则使用内部API接口;

用户空间-内核空间图

2.协议

2.1消息格式

NetLink协议消息格式

2.2 宏、结构体定义说明

netlink支持的协议

/*netlink支持的协议:include/uapi/linux/netlink.h*/

#define NETLINK_ROUTE		0	/* Routing/device hook				*/
#define NETLINK_UNUSED		1	/* Unused number				*/
#define NETLINK_USERSOCK	2	/* Reserved for user mode socket protocols 	*/
#define NETLINK_FIREWALL	3	/* Unused number, formerly ip_queue		*/
#define NETLINK_SOCK_DIAG	4	/* socket monitoring				*/
#define NETLINK_NFLOG		5	/* netfilter/iptables ULOG */
#define NETLINK_XFRM		6	/* ipsec */
#define NETLINK_SELINUX		7	/* SELinux event notifications */
#define NETLINK_ISCSI		8	/* Open-iSCSI */
#define NETLINK_AUDIT		9	/* auditing */
#define NETLINK_FIB_LOOKUP	10	
#define NETLINK_CONNECTOR	11
#define NETLINK_NETFILTER	12	/* netfilter subsystem */
#define NETLINK_IP6_FW		13
#define NETLINK_DNRTMSG		14	/* DECnet routing messages */
#define NETLINK_KOBJECT_UEVENT	15	/* Kernel messages to userspace */
#define NETLINK_GENERIC		16
/* leave room for NETLINK_DM (DM Events) */
#define NETLINK_SCSITRANSPORT	18	/* SCSI Transports */
#define NETLINK_ECRYPTFS	19
#define NETLINK_RDMA		20
#define NETLINK_CRYPTO		21	/* Crypto layer */
#define NETLINK_SMC		22	/* SMC monitoring */

#define NETLINK_INET_DIAG	NETLINK_SOCK_DIAG

Types values

/*netlink相关的响应状态:include/uapi/linux/netlink.h*/

#define NLMSG_NOOP		0x1	/* Nothing.		*/
#define NLMSG_ERROR		0x2	/* Error		*/
#define NLMSG_DONE		0x3	/* End of a dump	*/
#define NLMSG_OVERRUN		0x4	/* Data lost		*/

Flags values

/*netlink消息类型划分:include/uapi/linux/netlink.h*/

#define NLM_F_REQUEST		0x01	/* It is request message. 	*/
#define NLM_F_MULTI		0x02	/* Multipart message, terminated by NLMSG_DONE */
#define NLM_F_ACK		0x04	/* Reply with ack, with zero or error code */
#define NLM_F_ECHO		0x08	/* Echo this request 		*/
#define NLM_F_DUMP_INTR		0x10	/* Dump was inconsistent due to sequence change */
#define NLM_F_DUMP_FILTERED	0x20	/* Dump was filtered as requested */

/* Modifiers to GET request */
#define NLM_F_ROOT	0x100	/* specify tree	root	*/
#define NLM_F_MATCH	0x200	/* return all matching	*/
#define NLM_F_ATOMIC	0x400	/* atomic GET		*/
#define NLM_F_DUMP	(NLM_F_ROOT|NLM_F_MATCH)

/* Modifiers to NEW request */
#define NLM_F_REPLACE	0x100	/* Override existing		*/
#define NLM_F_EXCL	0x200	/* Do not touch, if it exists	*/
#define NLM_F_CREATE	0x400	/* Create, if it does not exist	*/
#define NLM_F_APPEND	0x800	/* Add to end of list		*/

/* Modifiers to DELETE request */
#define NLM_F_NONREC	0x100	/* Do not delete recursively	*/
...

Types values of NETLINK_ROUTE

/*文件中定义网络相关的类型:include/uapi/linux/rtnetlink.h*/
enum {
	RTM_NEWLINK	= 16,
	RTM_DELLINK,
	RTM_GETLINK,
	RTM_SETLINK,

	RTM_NEWADDR	= 20,
	RTM_DELADDR,
	RTM_GETADDR,

	RTM_NEWROUTE	= 24,
	RTM_DELROUTE,
	RTM_GETROUTE,
	...
};

Group types values for NETLINK_ROUTE

/*文件中定义网络相关的组类型:include/uapi/linux/rtnetlink.h*/
/* RTnetlink multicast groups */
enum rtnetlink_groups {
	RTNLGRP_NONE,
	RTNLGRP_LINK,
	RTNLGRP_NOTIFY,
	RTNLGRP_NEIGH,
	RTNLGRP_TC,
	RTNLGRP_IPV4_IFADDR,
	RTNLGRP_IPV4_MROUTE,
	RTNLGRP_IPV4_ROUTE,
	RTNLGRP_IPV4_RULE,
	RTNLGRP_IPV6_IFADDR,
	RTNLGRP_IPV6_MROUTE,
	RTNLGRP_IPV6_ROUTE,
	RTNLGRP_IPV6_IFINFO,
	...
};

注意:(Group values of NETLINK_ROUTE)、(Types values of NETLINK_ROUTE)是实现网络相关的netlink接口。另外还有SELINUX\NETFILTER\ipsec等接口。

/*socket结构体用于创建一个socket*/
struct sockaddr_nl {
	__kernel_sa_family_t	nl_family;	/* AF_NETLINK	*/
	unsigned short	        nl_pad;		/* zero		*/
	__u32		            nl_pid;		/* port ID	*/
    __u32		            nl_groups;	/* multicast groups mask */
};

nl_family: 协议族,AF_NETLINK / PF_NETLINK

nl_pid: 手动指定端口ID或者不设置交由系统给分配,一般使用系统分配,可通过getsockname函数获取到系统分配的nl_pid。端口ID类似于网络的端口号或者身份证号码的东东,有它才能有效的通信来往。

nl_groups:如果要监控内核某些子系统的状态,需要加入到指定的组(Group values of NETLINK_ROUTE列表)。如需要关心网络方面的动向,可以加入RTNLGRP_LINK(网卡状态)、RTNLGRP_IPV4_ROUTE(路由信息)、RTNLGRP_IPV4_IFADDR(IP地址状态)组。

/*netlink相关的结构体:include/uapi/linux/netlink.h*/
struct nlmsghdr {
	__u32		nlmsg_len;	/* Length of message including header */
	__u16		nlmsg_type;	/* Message content */
	__u16		nlmsg_flags;	/* Additional flags */
	__u32		nlmsg_seq;	/* Sequence number */
	__u32		nlmsg_pid;	/* Sending process port ID */
}; 

nlmsg_len: 整个消息报文长度,包含头部长度

nlmsg_type: 请求类型/响应状态;

nlmsg_flags:消息类型,可设置多个选项(Flags values列表),建议根据涉及子系统而设置

nlmsg_seq:消息序列号,用于消息的先后顺序,在netlink协议中该字段暂未被处理,但凡事定义出来的字段都有它意义的存在,建议处理。另外该字段可校验回应的报文是否属于本次请求

nlmsg_pid:发送者的端口ID,类似于身份证号码,对于内核的端口ID为0,而用户进程则在创建Socket时指定(bind函数会进行设置)。另外该字段可校验回应的报文是否属于本端socket

3.用户空间的使用

注意:在用户空间使用标准Socket API且创建Socket时,只能是SOCK_RAW或者SOCK_DGRAM类型(内核接口的限制);
内核空间创建netlink socket的限制条件

3.1打印路由表的案例(见代码注释、报文整体是按 2.1消息格式

/*
	打印路由信息函数
	nlmsghdr  回应的报文信息,参照 2.1消息格式
	len       报文信息大小
*/
void print_route_table(struct nlmsghdr *n, int len)
{   
	/*报文由多个nlmsghdr结构组成 -> nlmsghdr header部分*/
    for (; NLMSG_OK(n, len); n = NLMSG_NEXT(n, len)) {
    	/*事先定义好与路由相关的属性结构rtattr -> 等同于nlattr结构 */
        struct rtattr *tb[RTA_MAX+1] = {NULL};
        
        /*路由相关的rtmsg结构:条目的基本信息 -> family header部分*/
        struct rtmsg *familyhdr = (struct rtmsg*)NLMSG_DATA(n);
        /*路由条目具有的属性值:src ip, dst ip, metric, dev, table id, gateway等以及条目中包含多少个属性值*/
        struct rtattr *attrs = RTM_RTA(familyhdr);
        int attrs_len = NLMSG_LENGTH(sizeof(*familyhdr));
        
        char buf[256] = {0};
        int buflen = sizeof(buf);
        
        /*过滤条件:只要ipv4/ipv6的条目,以及主路由表信息。*/
        /*if (AF_INET != familyhdr->rtm_family && AF_INET6 != familyhdr->rtm_family) continue;*/
        /*if (familyhdr->rtm_table != RT_TABLE_MAIN) {
            continue;
        }*/
        
        /*each attrubte save to tb-list*/
        /*把报文中的属性整理成数组,方便处理*/
        while(RTA_OK(attrs, attrs_len)) {
            if (attrs->rta_type <= RTA_MAX) {
                tb[attrs->rta_type] = attrs;
            }
            attrs = RTA_NEXT(attrs, len);
        }
        /*下面就是打印该条目中带有的属性值,以下只输出常用的属性*/
        if (tb[RTA_DST]) {
            printf("%s/%u ", inet_ntop(familyhdr->rtm_family, RTA_DATA(tb[RTA_DST]), buf, buflen),
                        familyhdr->rtm_dst_len);
        } else {
            printf("default ");
        }
        if(tb[RTA_SRC]) {
            printf("from %s/%u ", inet_ntop(familyhdr->rtm_family, RTA_DATA(tb[RTA_SRC]), buf, buflen),
                        familyhdr->rtm_src_len);
        } else if(familyhdr->rtm_src_len) {
            printf("from 0/%u ", familyhdr->rtm_src_len);
        }
        if (tb[RTA_GATEWAY]) {
            printf("via %s ", inet_ntop(familyhdr->rtm_family, RTA_DATA(tb[RTA_GATEWAY]), buf, buflen));
        }
        if (tb[RTA_OIF]) {
            char ifname[32] = {0};
            if_indextoname((*(int*)RTA_DATA(tb[RTA_OIF])), ifname);
            printf("dev %s ",ifname);
        }
        if (tb[RTA_PRIORITY]) {
            printf("metric %u ", *(uint32_t *)RTA_DATA(tb[RTA_PRIORITY]));
        }
        if (tb[RTA_TABLE]) {
            printf("table %u ", *(uint32_t *)RTA_DATA(tb[RTA_TABLE]));
        }
        printf("\n");
    }

}
int main()
{
    int seq               = 100;
    int sock              = -1;
    struct sockaddr_nl sa = {0,};
    struct msghdr msg     = {NULL,};
    struct iovec iov      = {NULL,};
    char buf[4092]        = {0};
    struct nlmsghdr *nh   = NULL;
    int len               = 0;
    int ret               = 0;

    /*mark for get route table of request pkg*/
    /*请求路由的报文结构*/
    struct {
        struct nlmsghdr n;
        /*类似于请求参数,对应具体的请求结构。没有特殊过滤可以使用公共请求结构struct rtgenmsg,里面只有一个属性rtgen_family*/
        struct rtmsg    r;
    } req = {
        .n.nlmsg_len   = NLMSG_LENGTH(sizeof(struct rtmsg)),	/*大小包括 nlmsg header + payload*/
        .n.nlmsg_type  = RTM_GETROUTE,							/*请求类型:RTM_GETROUTE, 请求其它类型:RTM_GETLINK、RTM_GETADDR*/
        .n.nlmsg_flags = NLM_F_ROOT|NLM_F_REQUEST,				
        .n.nlmsg_seq   = ++seq,									/*请求序号,可以用作检验回应报文是否属于本次请求*/
        .r.rtm_family  = AF_INET 								/*指定路由的协议族: NETLINK_ROUTE对应有AF_INET、AF_INET6和AF_UNSPEC。AF_UNPSEC会把NETLINK_ROUTE的协议族都请求,相当于默认请求*/					
    };

    /*create socket*/
    sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);/*创建AF_NETLINK协议族下netlink_route的套接字*/
    if (sock < 0) {
        printf("create netlink socket: %s\n", strerror(errno));
        return -ENOENT;
    }

    /*bind socket to netlink*/
    sa.nl_family = AF_NETLINK;/*要设置套接字的协议族为AF_NETLINK,不然会找到对应NETLINK模块处理*/
    if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {/*绑定NETLINK相关信息到套接字,此处没有设置nl_pid,交由内核分配即可*/
        printf("bind to netlink: %s\n", strerror(errno));
        close(sock);
        return -ENOENT;
    }

    /*make to raw packet for socket*/
    iov.iov_base = (void *)&req.n;
    iov.iov_len = req.n.nlmsg_len;
    msg.msg_iov = &iov;
    msg.msg_iovlen = 1;
    /*nl_pid=0,发送给内核处理, 不设置默认发给内核*/
    /*msg.msg_name = &sa;
    msg.msg_namelen = sizeof(sa);*/

    /*send to netlink module*/
    ret = sendmsg(sock, &msg, 0);
    if (ret < 0) {
        printf("send to netlink: %s\n", strerror(errno));
        close(sock);
        return -EIO;
    }

    /*receive from netlink module*/
    /*读取回应报文,读到NLMSG_DONE消息或者IO失败为止*/
    char * buf_ptr = buf;
    len = 0;
    while (1) {
        ret = recv(sock, buf_ptr, sizeof(buf) - len, 0);
        if (ret < 0) {
            printf("recv from netlink: %s\n", strerror(ret));
            close(sock);
            return -EIO;
        }
        nh = buf_ptr;
        if (NLMSG_DONE == nh->nlmsg_type) {
/*            printf("NLMSG DONW: %d,0x%08X\n", nh->nlmsg_type, nh->nlmsg_type);*/
            break;
        }
        buf_ptr += ret;
        len += ret;
    }

	/*转换成netlink报文: [nlmsghdr][payload][nlmsghdr][payload]...*/
    nh = (struct nlmsghdr *)buf;
    /*printf("read buffer %p, len %d\n", nh, len);*/
    /*output route table info*/
    /*输出路由条目*/
    print_route_table(nh, len);

	/*用完记住:释放资源哦,养成好习惯*/
    close(sock);
    return 0;
}

3.2 输出路由表的结果

default via 192.168.47.2 dev eno16777736 metric 100 table 254 
172.17.0.0/16 dev docker0 table 254 
192.168.47.0/24 dev eno16777736 metric 100 table 254 
192.168.122.0/24 dev virbr0 table 254 
127.0.0.0/32 dev lo table 255 
127.0.0.0/8 dev lo table 255 
127.0.0.1/32 dev lo table 255 
127.255.255.255/32 dev lo table 255 
172.17.0.0/32 dev docker0 table 255 
172.17.0.1/32 dev docker0 table 255 
172.17.255.255/32 dev docker0 table 255 

3.3 编译环境要求

linux 3.10.0(2.6以上支持NETLINK)
gcc version 4.8.5

—越简单,易接受。在折腾路上…

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值