sockopt是内核与用户空间通信方法中非常简单的一种,其本质是通过copy_to_user/copy_from_user在用户与内核中传递数据,但是效率不高,常用于传递控制/状态等信息。copy_to_user/copy_from_user函数会引发阻塞,所以不能用在硬、软中断中。一般将这两个特殊拷贝函数用于类似于系统调用一类函数中,此类函数一般穿梭于“用户态”和“内核态”。此类方法的工作原理如图:
用户态进程 内核态
-------------------- -------------------
| 调用相关系统调用 | <----------> | 系统调用过程 |
-------------------- ^ -------------------
|
|
此处内核态通过参数获取用户空间内存区指针,
再由copy_from_user和copy_to_user来读写
用户空间的内存区
内核模块注册了一组 设置 套接字选项 的函数,使得用户空间进程可以调用此组函数对内核态数据进行读写。
socket编程中经常用到了两个函数:
这两个函数用来控制相关socket文件描述符的一些选项值,如设置/获取接收或发送缓冲区的大小、设置/获取接收或发送超时值、允许/禁止重用本地端口和地址等等
int setsockopt(int sockfd, int level, int optname, void *optval, socklen_t optlen);
int getsockopt(int sockfd, int level, int optname, void *optval, socklen_t *optlen);
参数说明:
sockfd:为socket的文件描述符;
level:选项所在的协议层。(sock协议, IP RAW的就用SOL_SOCKET/SOL_IP等,TCP/UDP socket的可用SOL_SOCKET/SOL_IP/SOL_TCP/SOL_UDP等,即高层的socket可以用底层的socket的命令字)
optname: 需要访问的选项名(操作命令字,由自己定义,一般用于扩充;)
optval:数据缓冲区起始位置指针,set操作时是将缓冲区数据写入内核,get的时候是将内核中的数据读入该缓冲区;
optlen:参数optval中的数据长度。
返回值说明:
0:成功
-1: 失败
errno被设置为以下的某个值
EBADF: sockfd不是有效的文件描述符
EFAULT: optval指向的内存并非有效的进程空间
EINVAL: 在调用setsockopt()时,optlen无效
ENOPROTOOPT:指定的协议层不能识别选项
ENOTSOCK:sockfd描述的不是套接字
参数详细说明:
1、level指定控制套接字的层次,可以取三种值:
1)SOL_SOCKET:通用套接字选项
2)IPPROTO_IP:IP选项
3)IPPROTO_TCP:TCP选项
2、optname指定控制的方式(选项的名称)
3、optval获得或设置套接字选项,根据选项名称的数据类型进行转换
一个标准的setsockopt调用流程
在ip_setsockopt调用时,如果发现一个没有定义的协议,并且判断这个optname是否为netfilter所在设置,如果是则调用netfilter锁设置的特殊处理函数。加入netfilter特殊处理的sokcopt的流程
套接字层次level | 选项名称 | 说明 | 对应数据类型 |
SOL_SOCKET | SO_BROADCAST | 允许发送广播数据 | int |
SO_DEBUG | 允许调试 | int | |
SO_DONTROUTE | 不查找路由 | int | |
SO_ERROR | 获取套接字错误 | int | |
SO_KEEPALIVE | 保持连接 | int | |
SO_LINGER | 延迟关闭连接 | struct linger | |
SO_OOBINLINE | 带外数据放入正常数据流 | int | |
SO_RCVBUF | 接收缓冲区大小 | int | |
SO_SNDBUF | 发送缓冲区大小 | int | |
SO_RCVLOWAT | 接收缓冲区下限 | int | |
SO_SNDLOWAT | 发送缓冲区下限 | int | |
SO_RCVTIMEO | 接收超时 | struct timeval | |
SO_SNDTIMEO | 发送超时 | struct timeval | |
SO_REUSERADDR | 允许重用本地地址和端口 | int | |
SO_TYPE | 获取套接字类型 | int | |
SO_BSDCOMPAT | 与BSD系统兼容 | int | |
IPPROTO_IP | IP_HDRINCL | 在数据包中包含IP首部 | int |
IP_OPTINOS | IP首部选项 | int | |
IP_TOS | 服务类型 | int | |
IP_TTL | 生存时间 | int | |
IPPROTO_TCP | TCP_MAXSEG | TCP最大数据段的大小 | int |
TCP_NODELAY | 不使用Nagle算法 | int |
针对int fd = socket (PF_INET, SOCK_RAW, IPPROTO_RAW),对参数的几个概念进行梳理
SOCK_RAW(原始套接字)
首先,普通的套接字无法处理ICMP、IGMP等网络报文,而SOCK_RAW可以;
其次,SOCK_RAW也可以处理特殊的IPv4报文;
此外,利用原始套接字,可以通过IP_HDRINCL套接字选项由用户构造IP头。
总体来说,SOCK_RAW可以处理普通的网络报文之外,还可以处理一些特殊协议报文以及操作IP层及其以上的数据。
/* Types of sockets. */
enum __socket_type
{
SOCK_STREAM = 1, /* Sequenced, reliable, connection-based byte streams. */
#define SOCK_STREAM SOCK_STREMA
SOCK_DGRAM = 2, /* Connectionless, unreliable datagram of fixed maximum length. */
#define SOCK_DGRAM SOCK_DGRAM
SOCK_RAW = 3, /* Raw protocol interface. */
#define SOCK_RAW SOCK_RAW
SOCK_RDM = 4, /* Reliably-delivered messages. */
#define SOCK_RDM SOCK_RDM
SOCK_SEQPACKET = 5, /* Sequenced, reliable, connection-based,
datagrams of fixed maximum length. */
#define SOCK_SEQPACKET SOCK_SEQPACKET
SOCK_DCCP = 6, /* Datagram Congestion Control Protocol. */
#define SOCK_DCCP SOCK_DCCP
SOCK_PACKET = 10, /* Linux specific way of getting packets
at the dev level. For writing rarp and
other similar things on the user level. */
#define SOCK_PACKET SOCK_PACKET
/* Flags to be ORed into the type parameter of socket and socketpair and
used for the flags parameter of paccept. */
SOCK_CLOEXEC = 02000000, /* Atomically set close-on-exec flag for the
new descriptor(s). */
#define SOCK_CLOEXEC SOCK_CLOEXEC
SOCK_NONBLOCK = 00004000 /* Atomically mark descriptor(s) as
non-blocking. */
#define SOCK_NONBLOCK SOCK_NONBLOCK
};
PF_INET(IP协议族)
/* Protocol families. */
#define PF_UNSPEC 0 /* Unspecified. */
#define PF_LOCAL 1 /* Local to host (pipes and file-domain). */
#define PF_UNIX PF_LOCAL /* POSIX name for PF_LOCAL. */
#define PF_FILE PF_LOCAL /* Another non-standard name for PF_LOCAL. */
#define PF_INET 2 /* IP protocol family. */
#define PF_AX25 3 /* Amateur Radio AX.25. */
#define PF_IPX 4 /* Novell Internet Protocol. */
#define PF_APPLETALK 5 /* Appletalk DDP. */
#define PF_NETROM 6 /* Amateur radio NetROM. */
#define PF_BRIDGE 7 /* Multiprotocol bridge. */
#define PF_ATMPVC 8 /* ATM PVCs. */
#define PF_X25 9 /* Reserved for X.25 project. */
#define PF_INET6 10 /* IP version 6. */
#define PF_ROSE 11 /* Amateur Radio X.25 PLP. */
#define PF_DECnet 12 /* Reserved for DECnet project. */
#define PF_NETBEUI 13 /* Reserved for 802.2LLC project. */
#define PF_SECURITY 14 /* Security callback pseudo AF. */
#define PF_KEY 15 /* PF_KEY key management API. */
#define PF_NETLINK 16
#define PF_ROUTE PF_NETLINK /* Alias to emulate 4.4BSD. */
#define PF_PACKET 17 /* Packet family. */
#define PF_ASH 18 /* Ash. */
#define PF_ECONET 19 /* Acorn Econet. */
#define PF_ATMSVC 20 /* ATM SVCs. */
#define PF_RDS 21 /* RDS sockets. */
#define PF_SNA 22 /* Linux SNA Project */
#define PF_IRDA 23 /* IRDA sockets. */
#define PF_PPPOX 24 /* PPPoX sockets. */
#define PF_WANPIPE 25 /* Wanpipe API sockets. */
#define PF_LLC 26 /* Linux LLC. */
#define PF_CAN 29 /* Controller Area Network. */
#define PF_TIPC 30 /* TIPC sockets. */
#define PF_BLUETOOTH 31 /* Bluetooth sockets. */
#define PF_IUCV 32 /* IUCV sockets. */
#define PF_RXRPC 33 /* RxRPC sockets. */
#define PF_ISDN 34 /* mISDN sockets. */
#define PF_PHONET 35 /* Phonet sockets. */
#define PF_IEEE802154 36 /* IEEE 802.15.4 sockets. */
#define PF_CAIF 37 /* CAIF sockets. */
#define PF_ALG 38 /* Algorithm sockets. */
#define PF_NFC 39 /* NFC sockets. */
#define PF_VSOCK 40 /* vSockets. */
#define PF_MAX 41 /* For now.. */
/* Address families. */
#define AF_UNSPEC PF_UNSPEC
#define AF_LOCAL PF_LOCAL
#define AF_UNIX PF_UNIX
#define AF_FILE PF_FILE
#define AF_INET PF_INET
#define AF_AX25 PF_AX25
#define AF_IPX PF_IPX
#define AF_APPLETALK PF_APPLETALK
#define AF_NETROM PF_NETROM
#define AF_BRIDGE PF_BRIDGE
#define AF_ATMPVC PF_ATMPVC
#define AF_X25 PF_X25
#define AF_INET6 PF_INET6
#define AF_ROSE PF_ROSE
#define AF_DECnet PF_DECnet
#define AF_NETBEUI PF_NETBEUI
#define AF_SECURITY PF_SECURITY
#define AF_KEY PF_KEY
#define AF_NETLINK PF_NETLINK
#define AF_ROUTE PF_ROUTE
#define AF_PACKET PF_PACKET
#define AF_ASH PF_ASH
#define AF_ECONET PF_ECONET
#define AF_ATMSVC PF_ATMSVC
#define AF_RDS PF_RDS
#define AF_SNA PF_SNA
#define AF_IRDA PF_IRDA
#define AF_PPPOX PF_PPPOX
#define AF_WANPIPE PF_WANPIPE
#define AF_LLC PF_LLC
#define AF_CAN PF_CAN
#define AF_TIPC PF_TIPC
#define AF_BLUETOOTH PF_BLUETOOTH
#define AF_IUCV PF_IUCV
#define AF_RXRPC PF_RXRPC
#define AF_ISDN PF_ISDN
#define AF_PHONET PF_PHONET
#define AF_IEEE802154 PF_IEEE802154
#define AF_CAIF PF_CAIF
#define AF_ALG PF_ALG
#define AF_NFC PF_NFC
#define AF_VSOCK PF_VSOCK
#define AF_MAX PF_MAX
IPPROTO_RAW(IP协议:原始的ip包)
/* Standard well-defined IP protocols. */
enum {
IPPROTO_IP = 0, /* Dummy protocol for TCP */
#define IPPROTO_IP IPPROTO_IP
IPPROTO_ICMP = 1, /* Internet Control Message Protocol */
#define IPPROTO_ICMP IPPROTO_ICMP
IPPROTO_IGMP = 2, /* Internet Group Management Protocol */
#define IPPROTO_IGMP IPPROTO_IGMP
IPPROTO_IPIP = 4, /* IPIP tunnels (older KA9Q tunnels use 94) */
#define IPPROTO_IPIP IPPROTO_IPIP
IPPROTO_TCP = 6, /* Transmission Control Protocol */
#define IPPROTO_TCP IPPROTO_TCP
IPPROTO_EGP = 8, /* Exterior Gateway Protocol */
#define IPPROTO_EGP IPPROTO_EGP
IPPROTO_PUP = 12, /* PUP protocol */
#define IPPROTO_PUP IPPROTO_PUP
IPPROTO_UDP = 17, /* User Datagram Protocol */
#define IPPROTO_UDP IPPROTO_UDP
IPPROTO_IDP = 22, /* XNS IDP protocol */
#define IPPROTO_IDP IPPROTO_IDP
IPPROTO_TP = 29, /* SO Transport Protocol Class 4 */
#define IPPROTO_TP IPPROTO_TP
IPPROTO_DCCP = 33, /* Datagram Congestion Control Protocol */
#define IPPROTO_DCCP IPPROTO_DCCP
IPPROTO_IPV6 = 41, /* IPv6-in-IPv4 tunnelling */
#define IPPROTO_IPV6 IPPROTO_IPV6
IPPROTO_RSVP = 46, /* RSVP Protocol */
#define IPPROTO_RSVP IPPROTO_RSVP
IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */
#define IPPROTO_GRE IPPROTO_GRE
IPPROTO_ESP = 50, /* Encapsulation Security Payload protocol */
#define IPPROTO_ESP IPPROTO_ESP
IPPROTO_AH = 51, /* Authentication Header protocol */
#define IPPROTO_AH IPPROTO_AH
IPPROTO_MTP = 92, /* Multicast Transport Protocol */
#define IPPROTO_MTP IPPROTO_MTP
IPPROTO_BEETPH = 94, /* IP option pseudo header for BEET */
#define IPPROTO_BEETPH IPPROTO_BEETPH
IPPROTO_ENCAP = 98, /* Encapsulation Header */
#define IPPROTO_ENCAP IPPROTO_ENCAP
IPPROTO_PIM = 103, /* Protocol Independent Multicast */
#define IPPROTO_PIM IPPROTO_PIM
IPPROTO_COMP = 108, /* Compression Header Protocol */
#define IPPROTO_COMP IPPROTO_COMP
IPPROTO_SCTP = 132, /* Stream Control Transport Protocol */
#define IPPROTO_SCTP IPPROTO_SCTP
IPPROTO_UDPLITE = 136, /* UDP-Lite (RFC 3828) */
#define IPPROTO_UDPLITE IPPROTO_UDPLITE
IPPROTO_MPLS = 137, /* MPLS in IP (RFC 4023) */
#define IPPROTO_MPLS IPPROTO_MPLS
IPPROTO_RAW = 255, /* Raw IP packets */
#define IPPROTO_RAW IPPROTO_RAW
IPPROTO_MAX
};