RAW协议用于在IP层上实现自己的协议,该协议需要自己填充IP头和数据部分,内核只负责填充MAC头和源ip字段,该版本还没实现通过参数让内核填充IP头的功能。该协议的位置和TCP类似,内核在创建一个socket的时候,在底层会创建一个sock结构体,sock结构体会保存一系列操作函数的指针。
实例代码(参考https://sock-raw.org/papers/sock_raw)
#include <sys/socket.h>
#include <sys/types.h>
#include <netinet/ip.h>
#include <arpa/inet.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
int main(void)
{
int s;
struct sockaddr_in daddr;
char packet[50];
/* point the iphdr to the beginning of the packet */
struct iphdr *ip = (struct iphdr *)packet;
if ((s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW)) < 0) {
perror("error:");
exit(EXIT_FAILURE);
}
daddr.sin_family = AF_INET;
daddr.sin_port = 0; /* not needed in SOCK_RAW */
inet_pton(AF_INET, DEST, (struct in_addr *)&daddr.sin_addr.s_addr);
memset(daddr.sin_zero, 0, sizeof(daddr.sin_zero));
memset(packet, 'A', sizeof(packet)); /* payload will be all As */
ip->ihl = 5;
ip->version = 4;
ip->tos = 0;
ip->tot_len = htons(40); /* 16 byte value */
ip->frag_off = 0; /* no fragment */
ip->ttl = 64; /* default value */
ip->protocol = IPPROTO_RAW; /* protocol at L4 */
ip->check = 0; /* not needed in iphdr */
ip->saddr = daddr.sin_addr.s_addr;
ip->daddr = daddr.sin_addr.s_addr;
sendto(s, (char *)packet, sizeof(packet), 0, (struct sockaddr *)&daddr, (socklen_t)sizeof(daddr))
}
#include <sys/socket.h>
#include <sys/types.h>
#include <netinet/ip.h>
#include <arpa/inet.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
int main(void)
{
int s;
struct sockaddr_in saddr;
char packet[50];
if ((s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW)) < 0) {
perror("error:");
exit(EXIT_FAILURE);
}
memset(packet, 0, sizeof(packet));
socklen_t *len = (socklen_t *)sizeof(saddr);
int fromlen = sizeof(saddr);
while(1) {
if (recvfrom(s, (char *)&packet, sizeof(packet), 0,
(struct sockaddr *)&saddr, &fromlen) < 0)
perror("packet receive error:");
int i = sizeof(struct iphdr); /* print the payload */
while (i < sizeof(packet)) {
fprintf(stderr, "%c", packet[i]);
i++;
}
printf("\n");
}
}
下面是RAW协议的实现源码,当我们对一个RAW协议的socket进行读写时,对应的以下的函数。
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* RAW - implementation of IP "raw" sockets.
*
* Version: @(#)raw.c 1.0.4 05/25/93
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
*
* Fixes:
* Alan Cox : verify_area() fixed up
* Alan Cox : ICMP error handling
* Alan Cox : EMSGSIZE if you send too big a packet
* Alan Cox : Now uses generic datagrams and shared skbuff
* library. No more peek crashes, no more backlogs
* Alan Cox : Checks sk->broadcast.
* Alan Cox : Uses skb_free_datagram/skb_copy_datagram
* Alan Cox : Raw passes ip options too
* Alan Cox : Setsocketopt added
* Alan Cox : Fixed error return for broadcasts
* Alan Cox : Removed wake_up calls
* Alan Cox : Use ttl/tos
* Alan Cox : Cleaned up old debugging
* Alan Cox : Use new kernel side addresses
* Arnt Gulbrandsen : Fixed MSG_DONTROUTE in raw sockets.
* Alan Cox : BSD style RAW socket demultiplexing.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <asm/system.h>
#include <asm/segment.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include "ip.h"
#include "protocol.h"
#include <linux/skbuff.h>
#include "sock.h"
#include "icmp.h"
#include "udp.h"
static inline unsigned long min(unsigned long a, unsigned long b)
{
if (a < b)
return(a);
return(b);
}
// 没用到
/* raw_err gets called by the icmp module. */
void raw_err (int err, unsigned char *header, unsigned long daddr,
unsigned long saddr, struct inet_protocol *protocol)
{
struct sock *sk;
if (protocol == NULL)
return;
sk = (struct sock *) protocol->data;
if (sk == NULL)
return;
/* This is meaningless in raw sockets. */
if (err & 0xff00 == (ICMP_SOURCE_QUENCH << 8))
{
if (sk->cong_window > 1) sk->cong_window = sk->cong_window/2;
return;
}
sk->err = icmp_err_convert[err & 0xff].errno;
sk->error_report(sk);
return;
}
/*
* This should be the easiest of all, all we do is
* copy it into a buffer. All demultiplexing is done
* in ip.c
*/
// ip层有数据到达时调用该函数,该函数把数据缓存到sk的接收队列,等待应用层使用
int raw_rcv(struct sock *sk, struct sk_buff *skb, struct device *dev, long saddr, long daddr)
{
/* Now we need to copy this into memory. */
skb->sk = sk;
// ip头+数据部分的长度
skb->len = ntohs(skb->ip_hdr->tot_len);
skb->h.raw = (unsigned char *) skb->ip_hdr;
skb->dev = dev;
// daddr和saddr是ip头中的字段
skb->saddr = daddr;
skb->daddr = saddr;
/* Charge it to the socket. */
// 挂载到sk的接收队列
if(sock_queue_rcv_skb(sk,skb)<0)
{
ip_statistics.IpInDiscards++;
skb->sk=NULL;
kfree_skb(skb, FREE_READ);
return(0);
}
ip_statistics.IpInDelivers++;
release_sock(sk);
return(0);
}
/*
* Send a RAW IP packet.
*/
static int raw_sendto(struct sock *sk, unsigned char *from,
int len, int noblock, unsigned flags, struct sockaddr_in *usin, int addr_len)
{
struct sk_buff *skb;
struct device *dev=NULL;
struct sockaddr_in sin;
int tmp;
int err;
/*
* Check the flags. Only MSG_DONTROUTE is permitted.
*/
if (flags & MSG_OOB) /* Mirror BSD error message compatibility */
return -EOPNOTSUPP;
// 只支持MSG_DONTROUTE
if (flags & ~MSG_DONTROUTE)
return(-EINVAL);
/*
* Get and verify the address.
*/
// 传了地址
if (usin)
{
if (addr_len < sizeof(sin))
return(-EINVAL);
memcpy(&sin, usin, sizeof(sin));
// 只支持AF_INET协议簇
if (sin.sin_family && sin.sin_family != AF_INET)
return(-EINVAL);
}
else
{
if (sk->state != TCP_ESTABLISHED)
return(-EINVAL);
sin.sin_family = AF_INET;
sin.sin_port = sk->protocol;
sin.sin_addr.s_addr = sk->daddr;
}
if (sin.sin_port == 0)
sin.sin_port = sk->protocol;
if (sin.sin_addr.s_addr == INADDR_ANY)
sin.sin_addr.s_addr = ip_my_addr();
if (sk->broadcast == 0 && ip_chk_addr(sin.sin_addr.s_addr)==IS_BROADCAST)
return -EACCES;
// 在socket的写缓冲区申请一个skb
skb=sock_alloc_send_skb(sk, len+sk->prot->max_header, noblock, &err);
if(skb==NULL)
return err;
skb->sk = sk;
// 发送完可以释放掉,不需要重传
skb->free = 1;
skb->localroute = sk->localroute | (flags&MSG_DONTROUTE);
// 构造mac头,ip头由用户构建,即from中应该包括ip头
tmp = sk->prot->build_header(skb, sk->saddr,
sin.sin_addr.s_addr, &dev,
sk->protocol, sk->opt, skb->mem_len, sk->ip_tos,sk->ip_ttl);
if (tmp < 0)
{
kfree_skb(skb,FREE_WRITE);
release_sock(sk);
return(tmp);
}
// 把用户传进来的ip头和数据复制到skb中
memcpy_fromfs(skb->data + tmp, from, len);
/*
* If we are using IPPROTO_RAW, we need to fill in the source address in
* the IP header
*/
if(sk->protocol==IPPROTO_RAW)
{
unsigned char *buff;
struct iphdr *iph;
// 指向ip头
buff = skb->data;
buff += tmp;
iph = (struct iphdr *)buff;
// 写源ip字段
iph->saddr = sk->saddr;
}
// 更新skb的数据长度,即mac头长度+用户传进来的数据长度(ip+数据)
skb->len = tmp + len;
// 发送
sk->prot->queue_xmit(sk, dev, skb, 1);
release_sock(sk);
return(len);
}
static int raw_write(struct sock *sk, unsigned char *buff, int len, int noblock,
unsigned flags)
{
return(raw_sendto(sk, buff, len, noblock, flags, NULL, 0));
}
static void raw_close(struct sock *sk, int timeout)
{
sk->state = TCP_CLOSE;
}
static int raw_init(struct sock *sk)
{
return(0);
}
/*
* This should be easy, if there is something there
* we return it, otherwise we block.
*/
int raw_recvfrom(struct sock *sk, unsigned char *to, int len,
int noblock, unsigned flags, struct sockaddr_in *sin,
int *addr_len)
{
int copied=0;
struct sk_buff *skb;
int err;
int truesize;
if (flags & MSG_OOB)
return -EOPNOTSUPP;
if (sk->shutdown & RCV_SHUTDOWN)
return(0);
if (addr_len)
*addr_len=sizeof(*sin);
// 从sk的接收队列中获取skb
skb=skb_recv_datagram(sk,flags,noblock,&err);
if(skb==NULL)
return err;
truesize=skb->len;
copied = min(len, truesize);
// 把数据复制到用户空间,大小取决于用户传进来的数据大小和数据的大小
skb_copy_datagram(skb, 0, to, copied);
sk->stamp=skb->stamp;
/* Copy the address. */
// 复制地址到用户空间
if (sin)
{
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = skb->daddr;
}
skb_free_datagram(skb);
release_sock(sk);
return (truesize); /* len not copied. BSD returns the true size of the message so you know a bit fell off! */
}
int raw_read (struct sock *sk, unsigned char *buff, int len, int noblock,unsigned flags)
{
return(raw_recvfrom(sk, buff, len, noblock, flags, NULL, NULL));
}
struct proto raw_prot = {
sock_wmalloc,
sock_rmalloc,
sock_wfree,
sock_rfree,
sock_rspace,
sock_wspace,
raw_close,
raw_read,
raw_write,
raw_sendto,
raw_recvfrom,
ip_build_header,
udp_connect,
NULL,
ip_queue_xmit,
NULL,
NULL,
NULL,
NULL,
datagram_select,
NULL,
raw_init,
NULL,
ip_setsockopt,
ip_getsockopt,
128,
0,
{NULL,},
"RAW",
0, 0
};