Netlink
内核和用户层通信的方式
在内核态主动发起消息,而用户态的程序可以阻塞等待消息。
- 自己独立的地址编码
struct sockaddr_nl
{
sa_family_t nl_family; //AF_NETLINK, PF_NETLINK
unsigned short nl_pad; //必须为0
__u32 nl_pid; //通信端口,一般使用用户进程PID
//当是内核的地址,该值必须用0, sendto时,这个对端地址为0
__u32 nl_groups; //组播掩码
};
- 创建socket的时候,需要指定 netlink的通信协议号,支持自定义
socket(PF_NETLINK, SOCK_DGRAM | SOCK_CLOEXEC, NETLINK_KOBJECT_UEVENT);
#define NETLINK_ROUTE 0 /* Routing/device hook */
#define NETLINK_UNUSED 1 /* Unused number */
#define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */
#define NETLINK_FIREWALL 3 /* Firewalling hook */
#define NETLINK_INET_DIAG 4 /* INET socket monitoring */
#define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */
#define NETLINK_XFRM 6 /* ipsec */
#define NETLINK_SELINUX 7 /* SELinux event notifications */
#define NETLINK_ISCSI 8 /* Open-iSCSI */
#define NETLINK_AUDIT 9 /* auditing */
#define NETLINK_FIB_LOOKUP 10
#define NETLINK_CONNECTOR 11
#define NETLINK_NETFILTER 12 /* netfilter subsystem */
#define NETLINK_IP6_FW 13
#define NETLINK_DNRTMSG 14 /* DECnet routing messages */
#define NETLINK_KOBJECT_UEVENT 15 /*!!!! Kernel messages to userspace */
#define NETLINK_GENERIC 16
/* leave room for NETLINK_DM (DM Events) */
#define NETLINK_SCSITRANSPORT 18 /* SCSI Transports */
#define NETLINK_ECRYPTFS 19
- recvfrom sendto
每个消息都附带一个netlink消息头,struct nlmsghdr;
struct nlmsghdr {
_u32 nlmsg_len; /* Length of msg including header */
_u32 nlmsg_type; /* 操作命令 */
_u16 nlmsg_flags; /* various flags */
_u32 nlmsg_seq; /* Sequence number */
_u32 nlmsg_pid; /* 进程PID */
};
0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Length |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Type | Flags |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Sequence Number |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Process ID (PID) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- recvmsg sendmsg
1.标记
2.套接口地址与长度
3.分散写入
4.附属数据
返回值 msg_iov数据总的大小
ssize_t sendto(int sockfd, const void *buf, size_t len, int flags,
const struct sockaddr *dest_addr, socklen_t addrlen);
ssize_t sendmsg(int sockfd, const struct msghdr *msg, int flags);
msg_name 和 msg_namelen 表示地址相当于dest_addr,addrlen
msg_iov,msg_iovlen表示传输的数据相当于 buf,len
msg_control 辅助数据
msg_flags只用于recvmsg
MSG_EOR 当接收到记录结尾时会设置这一位。
MSG_TRUNC 这个标记位表明数据的结尾被截短,因为接收缓冲区太小不足以接收全部的数据。
MSG_CTRUNC 这个标记位表明某些控制数据(附属数据)被截短,因为缓冲区太小。
MSG_OOB 这个标记位表明接收了带外数据。
MSG_ERRQUEUE 这个标记位表明没有接收到数据,但是返回一个扩展错误。
struct msghdr {
void *msg_name; // protocol address
socklen_t msg_namelen; // size of protocol address
struct iovec *msg_iov; // scatter/gather array
int msg_iovlen; // elements in msg_iov
void *msg_control; // ancillary data (cmsghdr struct)
socklen_t msg_controllen; // length of ancillary data
int msg_flags; // flags returned by recvmsg()
};
//在一次函数调用中读、写多个非连续缓冲区
//散布读(scatter read)和聚集写(gather write)
struct iovec {
ptr_t iov_base; /* Starting address */
size_t iov_len; /* Length in bytes */
};
//iovcnt 传输的iovec的数量
ssize_t readv(int filedes, const struct iovec *iov, int iovcnt);
ssize_t writev(int filedes, const struct iovec *iov, int iovcnt);
附属数据
bionic/libc/include/sys/socket.h
struct cmsghdr {
socklen_t cmsg_len;
int cmsg_level;
int cmsg_type;
};
//四个字节对齐
#define CMSG_ALIGN(len) ( ((len)+sizeof(long)-1) & ~(sizeof(long)-1) )
#define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr)) + (len))
cmsghdr头结构+len
#define CMSG_SPACE(len) (CMSG_ALIGN(sizeof(struct cmsghdr)) + CMSG_ALIGN(len)
总的空间cmsghdr头结构长度 + len --4字节对齐
#define CMSG_DATA(cmsg) ((void*)((char*)(cmsg) + CMSG_ALIGN(sizeof(struct cmsghdr))))
指针指向跟随在头部以及填充字节之后的附属数据的第一个字节
#define CMSG_FIRSTHDR(msg) \
((msg)->msg_controllen >= sizeof(struct cmsghdr) \
? (struct cmsghdr*) (msg)->msg_control : (struct cmsghdr*) NULL)
附属数据缓冲区内的第一个附属对象的struct cmsghdr
{//例子
}
#define CMSG_NXTHDR(mhdr, cmsg) __cmsg_nxthdr((mhdr), (cmsg))
下一个struct cmsghdr
对齐的原理
(len + 0b0000,0000,0000,0000,0000,0000,0000,0011) & 0b1111,1111,1111,1111,1111,1111,1111,1100
#define alignment_down(a, size) (a & (~(size-1)) )
#define alignment_up(a, size) ((a+size-1) & (~ (size-1)))
#define _ALIGN(addr,size) (((addr)+(size)-1)&(~((size)-1)))
#define PAGE_SIZE 4096
#define PAGE_MASK (~(PAGE_SIZE-1))
#define PAGE_ALIGN(addr) -(((addr)+PAGE_SIZE-1) & PAGE_MASK)
uevent中的例子
char control[CMSG_SPACE(sizeof(struct ucred))]; //control申请这么多空间
struct msghdr hdr = {
&addr,
sizeof(addr),
&iov,
1,
control,
sizeof(control),
0,
};
ssize_t n = recvmsg(socket, &hdr, 0);
struct cmsghdr *cmsg = CMSG_FIRSTHDR(&hdr);
struct ucred *cred = (struct ucred *)CMSG_DATA(cmsg);