原始套接字提供如下功能:
1、读写ICMPv4,IGMPv4及ICMPv6分组。如ping程序,就是使用原始套接口中发送ICMP回显请求,并接受ICMP回显应答
2、读写特殊的IPv4数据报。大多数内核处理值为1(ICMP),2(IGMP)、6(TCP)和17(UDP)的数据报。协议字段还可能为其他值
3、使用IP_HDRINCL套接口选项可以构造自己的IPv4头部。
原始套接口的创建
一般分为以下几步
1、第二个参数为SOCK_RAW,调用 socket函数创建一个原始套接口。第三个参数一般不为0。protocol可以为IPPROTO_ICMP或者 IPPROTO_IGMP。只有超级用户才有权创建原始套接字
2、可以设置 IP_HDRINCL套接口选项,如
const int on = 1;
setsockopt(sockfd, IPPROTO_IP, IP_HDRINCL, &on, sizeof(on));
3、可以对原始套接品调用 bind函数,但是不常用 。这个仅用来设置 本地地址,对于端口号没有意义
4、可以在原始套接口上调用 connect函数,也不常用 ,只是设置 目地地址,对于端口也没有意义。调用 connect后,因为指定了目的地址,可以调用 write 或send,而不是sendto
原始套接口输出
输出有以下规则
1、普通输出调用 sendto或sendmsg并指定目的IP地址来完成。如果套接口已连接,可以用write,writev或send
2、如果IP_HDRINCL没有设置 ,写的数据起始地址为IP头部后的第一个字节,其中头部协议字段填写为socket调用时的第三个参数
3、如果IP_HDRINCL已经设置,写的数据起始地址为IP头部的第一个字节,用户提供的数据大小值必须包括头部的字节数。此进进程除了标识字段和检验和字段外,其它可能由进程来设置 。检验和是由内核计算填充的
4、对于 超出外出接口MTU的分组,内核将其分片。
原始套接口输入
接收到的以下分组哪些会及不会传递给原始套接口,有以下规则
1、TCP和UDP分组不会传递给原始套接口
2、当内核处理完ICMP消息后,绝大部分 ICMP分组会传递给原始套接口
3、内核处理完IGMP消息后,所有IGMP分组都将传递给原始套接口
4、内核不能识别的协议字段的IP数据报都将传递给原始套接口。内核对这些分组唯一做的就是检验IP头部的某些字段:IP版本,IPv4头部检验和,头部长度及目的IP地址
5、如果数据以片段形式到达,则该分组将原所有片段到达重组后传给原始套接口
下面是用原始套接口中写的类似Ping程序
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <arpa/inet.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/ip_icmp.h>
#include <netdb.h>
#include <signal.h>
#include <sys/time.h>
#include <stdio.h>
#define BUFSIZE 1500
char recvbuf[BUFSIZE];
char sendbuf[BUFSIZE];
//int datalen;
char *host;
int nsent;
pid_t pid;
int sockfd;
int verbose;
void proc_v4(char *, ssize_t, struct timeval *);
//void proc_v6(char *, ssize_t, struct timeval *);
void send_v4();
//void send_v6();
void readloop();
void sig_alrm(int);
void tv_sub(struct timeval*, struct timeval*);
struct proto
{
void (*fproc)(char *, ssize_t, struct timeval*);
void (*fsend)(void);
struct sockaddr *sasend;
struct sockaddr *sarecv;
socklen_t salen;
int icmpproto;
}*pr;
char *sock_ntop(struct sockaddr *sa, socklen_t len)
{
char portstr[7];
static char str[128];
switch (sa->sa_family) {
case AF_INET:
{
struct sockaddr_in *sin = (struct sockaddr_in*)sa;
if (inet_ntop(AF_INET, &sin->sin_addr, str, sizeof(str)) == NULL) return NULL;
if (ntohs(sin->sin_port) != 0) {
snprintf(portstr, sizeof(portstr), "port=%d", ntohs(sin->sin_port));
strcat(str, portstr);
}
return str;
}
}
}
struct addrinfo *host_serv(const char *host, const char *serv, int family, int socktype)
{
struct addrinfo hints, *res;
int n;
bzero(&hints, sizeof(hints));
hints.ai_flags = AI_CANONNAME;
hints.ai_family = family;
hints.ai_socktype = socktype;
if ((n = getaddrinfo(host, serv, &hints, &res)) != 0) return NULL;
return res;
}
struct proto proto_v4 = {proc_v4, send_v4, NULL, NULL, 0, IPPROTO_ICMP};
int datalen = 56;
int main(int argc, char **argv)
{
int c;
struct addrinfo *ai;
pid = getpid();
signal(SIGALRM, sig_alrm);
ai = host_serv(argv[1], NULL, 0, 0);
pr = &proto_v4;
printf("ICMP_ECHO=%d\n", ICMP_ECHO);
pr->sasend = ai->ai_addr;
pr->sarecv = calloc(1, ai->ai_addrlen);
pr->salen = ai->ai_addrlen;
readloop();
exit(0);
}
void readloop(void)
{
int size;
char recvbuf[BUFSIZE];
socklen_t len;
ssize_t n;
struct timeval tval;
sockfd = socket(pr->sasend->sa_family, SOCK_RAW, pr->icmpproto);
setuid(getuid());
size = 60 * 1024;
setsockopt(sockfd, SOL_SOCKET, SO_RCVBUF, &size, sizeof(size));
sig_alrm(SIGALRM);
for (;;)
{
len = pr->salen;
n = recvfrom(sockfd, recvbuf, sizeof(recvbuf), 0, pr->sarecv, &len);
if (n < 0) {
if (errno == EINTR) continue;
else {
printf("recvfrom error:%s\n", strerror(errno));
return;
}
}
gettimeofday(&tval, NULL);
(*pr->fproc)(recvbuf, n, &tval);
}
}
void tv_sub(struct timeval* out, struct timeval *in)
{
if ((out->tv_usec -= in->tv_usec) < 0) {
--out->tv_sec;
out->tv_usec += 1000000;
}
out->tv_sec -= in->tv_sec;
}
void proc_v4(char *ptr, ssize_t len, struct timeval *tvrecv)
{
int hlen1, icmplen;
double rtt;
struct ip *ip;
struct icmp *icmp;
struct timeval *tvsend;
ip = (struct ip*)ptr;
hlen1 = ip->ip_hl << 2;
icmp = (struct icmp*)(ptr + hlen1);
if ((icmplen = len - hlen1) < 8) {
fprintf(stderr, "icmp len error\n");
return;
}
if (icmp->icmp_type == ICMP_ECHOREPLY) {
if (icmp->icmp_id != pid) return;
if (icmplen < 16) {
fprintf(stderr, "icmplen (%d) < 16\n", icmplen);
return;
}
tvsend = (struct timeval *)icmp->icmp_data;
tv_sub(tvrecv, tvsend);
rtt = tvrecv->tv_sec * 1000 + tvrecv->tv_usec / 1000;
printf("%d bytes from %s:seq=%u, ttl=%d, rtt=%.3f ms\n",
icmplen, sock_ntop(pr->sarecv, pr->salen), icmp->icmp_seq, ip->ip_ttl, rtt);
} else if (verbose) {
printf("%d bytes from %s:type=%d, code=%d\n", icmplen,
sock_ntop(pr->sarecv, pr->salen), icmp->icmp_type, icmp->icmp_code);
}
}
void sig_alrm(int signo)
{
(*pr->fsend)();
alarm(1);
return;
}
unsigned short in_cksum(unsigned short *addr, int len)
{
int nleft = len;
int sum = 0;
unsigned short *w = addr;
unsigned short answer = 0;
while (nleft > 1)
{
sum += *w++;
nleft -= 2;
}
if (nleft == 1)
{
*(unsigned char *)(&answer) = *(unsigned char *)w;
sum += answer;
}
sum = (sum >> 16) + (sum & 0xffff);
sum += (sum >> 16);
answer = ~sum;
return answer;
}
void send_v4()
{
int len;
struct icmp *icmp;
icmp = (struct icmp *)sendbuf;
icmp->icmp_type = ICMP_ECHO;
icmp->icmp_code = 0;
icmp->icmp_id = pid;
icmp->icmp_seq = nsent++;
gettimeofday((struct timeval*)icmp->icmp_data, NULL);
len = 8 + datalen;
icmp->icmp_cksum = 0;
icmp->icmp_cksum = in_cksum((unsigned short*)icmp, len);
sendto(sockfd, sendbuf, len, 0, pr->sasend, pr->salen);
}