一、背景
在云主机上,如果每次对外请求都要经过114.114.114.114主机查询DNS,这是非常耗时的;可以考虑内部搭建一个DNS服务器,就不需要频繁的查询外部DNS服务器,从而提高响应时间。
DNS就是一个查表的过程,客户端发起请求,返回对应的IP地址,DNS没有磁盘操作,而是纯内存操作;所以能影响DNS性能的主要在网卡上(即取决于网卡的性能)。DPDK可以作为DNS服务器业务处理的底层框架。
二、实现逻辑
- 绑定53的端口。
- 接收一帧数据:recvfrom。
- 解析数据:decode。主要是解出域名。
- 查表,找到域名对应的IP。
- 打包数据:encode。
- 发送出去:sendto。
三、环境配置
(1)导出dpdk环境变量。
cd dpdk路径
# 如 dpdk/dpdk-stable-19.08.2/
# 切换root权限
sudo su
export RTE_SDK=dpdk路径
export RTE_TARGET=x86_64-native-linux-gcc
(2)配置dpdk。
./usertools/dpdk-setup.sh
依次执行:
43(加载DPDK UIO 模块,即插入driver)
44(加载VFIO模块,也是一种driver)
45(加载KNI模块,将一些数据写回内核)
46(设置巨页,可以不需要频繁页交换,512)
47(设置巨页,可512)
49(执行之前需要eth0 down掉,执行sudo ifconfig eth0 down,使绑定dpdk)pci地址=对应eth0的(如0000:03:00.0)
60(退出)
四、完整代码实现
代码中实现了:
- udp协议的收发。
- KNI将不关注的协议写回内核以及从内核获取响应数据发送到网卡。
- 基于udp的dns服务器。
- 性能测试可以使用dnsperf工具。
(dns.h)
#ifndef __DPDK_DNS_H__
#define __DPDK_DNS_H__
/* Response Type */
enum {
Ok_ResponseType = 0,
FormatError_ResponseType = 1,
ServerFailure_ResponseType = 2,
NameError_ResponseType = 3,
NotImplemented_ResponseType = 4,
Refused_ResponseType = 5
};
/* Resource Record Types */
enum {
A_Resource_RecordType = 1,
NS_Resource_RecordType = 2,
CNAME_Resource_RecordType = 5,
SOA_Resource_RecordType = 6,
PTR_Resource_RecordType = 12,
MX_Resource_RecordType = 15,
TXT_Resource_RecordType = 16,
AAAA_Resource_RecordType = 28,
SRV_Resource_RecordType = 33
};
/* Operation Code */
enum {
QUERY_OperationCode = 0, /* standard query */
IQUERY_OperationCode = 1, /* inverse query */
STATUS_OperationCode = 2, /* server status request */
NOTIFY_OperationCode = 4, /* request zone transfer */
UPDATE_OperationCode = 5 /* change resource records */
};
/* Response Code */
enum {
NoError_ResponseCode = 0,
FormatError_ResponseCode = 1,
ServerFailure_ResponseCode = 2,
NameError_ResponseCode = 3
};
/* Query Type */
enum {
IXFR_QueryType = 251,
AXFR_QueryType = 252,
MAILB_QueryType = 253,
MAILA_QueryType = 254,
STAR_QueryType = 255
};
/*
* Types.
*/
/* Question Section */
struct Question {
char *qName;
uint16_t qType;
uint16_t qClass;
struct Question *next; // for linked list
};
/* Data part of a Resource Record */
union ResourceData {
struct {
uint8_t txt_data_len;
char *txt_data;
} txt_record;
struct {
uint8_t addr[4];
} a_record;
struct {
uint8_t addr[16];
} aaaa_record;
};
/* Resource Record Section */
struct ResourceRecord {
char *name;
uint16_t type;
uint16_t class;
uint32_t ttl;
uint16_t rd_length;
union ResourceData rd_data;
struct ResourceRecord *next; // for linked list
};
struct Message {
uint16_t id; /* Identifier */
/* Flags */
uint16_t qr; /* Query/Response Flag */
uint16_t opcode; /* Operation Code */
uint16_t aa; /* Authoritative Answer Flag */
uint16_t tc; /* Truncation Flag */
uint16_t rd; /* Recursion Desired */
uint16_t ra; /* Recursion Available */
uint16_t rcode; /* Response Code */
uint16_t qdCount; /* Question Count */
uint16_t anCount; /* Answer Record Count */
uint16_t nsCount; /* Authority Record Count */
uint16_t arCount; /* Additional Record Count */
/* At least one question; questions are copied to the response 1:1 */
struct Question *questions;
/*
* Resource records to be send back.
* Every resource record can be in any of the following places.
* But every place has a different semantic.
*/
struct ResourceRecord *answers;
struct ResourceRecord *authorities;
struct ResourceRecord *additionals;
};
int decode_msg(struct Message *msg, const uint8_t *buffer, int size);
void resolve_query(struct Message *msg);
int encode_msg(struct Message *msg, uint8_t **buffer);
void free_questions(struct Question *qq);
void free_resource_records(struct ResourceRecord *rr);
void print_message(struct Message *msg);
#endif
(dpdk-dns.c)
#include <stdio.h>
#include <stdlib.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#include <netdb.h>
#include <ifaddrs.h>
#include <errno.h>
#include <string.h>
#include <stdint.h>
#include "dns.h"
#define BUF_SIZE 1500
#define MIN(x, y) ((x) <= (y) ? (x) : (y))
/*
* This software is licensed under the CC0.
*
* This is a _basic_ DNS Server for educational use.
* It does not prevent invalid packets from crashing
* the server.
*
* To test start the program and issue a DNS request:
* dig @127.0.0.1 -p 9000 foo.bar.com
*/
/*
* Masks and constants.
*/
static const uint32_t QR_MASK = 0x8000;
static const uint32_t OPCODE_MASK = 0x7800;
static const uint32_t AA_MASK = 0x0400;
static const uint32_t TC_MASK = 0x0200;
static const uint32_t RD_MASK = 0x0100;
static const uint32_t RA_MASK = 0x8000;
static const uint32_t RCODE_MASK = 0x000F;
#undef strdup
char *strdup(const char *s ) {
char *t = NULL;
if (s && (t = (char*)malloc(strlen(s) + 1)))
strcpy(t, s);
return t;
}
static int get_A_Record(uint8_t addr[4], const char domain_name[])
{
if (strcmp("foo.bar.com", domain_name) == 0) {
addr[0] = 192;
addr[1] = 168;
addr[2] = 232;
addr[3] = 133;
return 0;
} else {
return -1;
}
}
static int get_AAAA_Record(uint8_t addr[16], const char domain_name[])
{
if (strcmp("foo.bar.com", domain_name) == 0) {
addr[0] = 0xfe;
addr[1] = 0x80;
addr[2] = 0x00;
addr[3] = 0x00;
addr[4] = 0x00;
addr[5] = 0x00;
addr[6] = 0x00;
addr[7] = 0x00;
addr[8] = 0x00;
addr[9] = 0x00;
addr[10] = 0x00;
addr[11] = 0x00;
addr[12] = 0x00;
addr[13] = 0x00;
addr[14] = 0x00;
addr[15] = 0x01;
return 0;
} else {
return -1;
}
}
static int get_TXT_Record(char **addr, const char domain_name[])
{
if (strcmp("foo.bar.com", domain_name) == 0) {
*addr = "abcdefg";
return 0;
} else {
return -1;
}
}
/*
* Debuggigng functions.
*/
/*
static void print_hex(uint8_t *buf, size_t len)
{
size_t i;
printf("%zu bytes:\n", len);
for (i = 0; i < len; ++i)
printf("%02x ", buf[i]);
printf("\n");
}
*/
static void print_resource_record(struct ResourceRecord *rr)
{
int i;
while (rr) {
printf(" ResourceRecord { name '%s', type %u, class %u, ttl %u, rd_length %u, ",
rr->name,
rr->type,
rr->class,
rr->ttl,
rr->rd_length
);
union ResourceData *rd = &rr->rd_data;
switch (rr->type) {
case A_Resource_RecordType:
printf("Address Resource Record { address ");
for(i = 0; i < 4; ++i)
printf("%s%u", (i ? "." : ""), rd->a_record.addr[i]);
printf(" }");
break;
case AAAA_Resource_RecordType:
printf("AAAA Resource Record { address ");
for(i = 0; i < 16; ++i)
printf("%s%02x", (i ? ":" : ""), rd->aaaa_record.addr[i]);
printf(" }");
break;
case TXT_Resource_RecordType:
printf("Text Resource Record { txt_data '%s' }",
rd->txt_record.txt_data
);
break;
default:
printf("Unknown Resource Record { ??? }");
}
printf("}\n");
rr = rr->next;
}
}
void print_message(struct Message *msg)
{
struct Question *q;
printf("QUERY { ID: %02x", msg->id);
printf(". FIELDS: [ QR: %u, OpCode: %u ]", msg->qr, msg->opcode);
printf(", QDcount: %u", msg->qdCount);
printf(", ANcount: %u", msg->anCount);
printf(", NScount: %u", msg->nsCount);
printf(", ARcount: %u,\n", msg->arCount);
q = msg->questions;
while (q) {
printf(" Question { qName '%s', qType %u, qClass %u }\n",
q->qName,
q->qType,
q->qClass
);
q = q->next;
}
print_resource_record(msg->answers);
print_resource_record(msg->authorities);
print_resource_record(msg->additionals);
printf("}\n");
}
/*
* Basic memory operations.
*/
static size_t get16bits(const uint8_t **buffer)
{
uint16_t value;
memcpy(&value, *buffer, 2);
*buffer += 2;
return ntohs(value);
}
static void put8bits(uint8_t **buffer, uint8_t value)
{
memcpy(*buffer, &value, 1);
*buffer += 1;
}
static void put16bits(uint8_t **buffer, uint16_t value)
{
value = htons(value);
memcpy(*buffer, &value, 2);
*buffer += 2;
}
static void put32bits(uint8_t **buffer, uint32_t value)
{
value = htonl(value);
memcpy(*buffer, &value, 4);
*buffer += 4;
}
/*
* Deconding/Encoding functions.
*/
// 3foo3bar3com0 => foo.bar.com (No full validation is done!)
static char *decode_domain_name(const uint8_t **buf, size_t len)
{
char domain[256];
unsigned int i = 0;
for ( i = 1; i < MIN(256, len); i += 1) {
uint8_t c = (*buf)[i];
if (c == 0) {
domain[i - 1] = 0;
*buf += i + 1;
return strdup(domain);
} else if (c <= 63) {
domain[i - 1] = '.';
} else {
domain[i - 1] = c;
}
}
return NULL;
}
// foo.bar.com => 3foo3bar3com0
static void encode_domain_name(uint8_t **buffer, const char *domain)
{
uint8_t *buf = *buffer;
const char *beg = domain;
const char *pos;
int len = 0;
int i = 0;
while ((pos = strchr(beg, '.'))) {
len = pos - beg;
buf[i] = len;
i += 1;
memcpy(buf+i, beg, len);
i += len;
beg = pos + 1;
}
len = strlen(domain) - (beg - domain);
buf[i] = len;
i += 1;
memcpy(buf + i, beg, len);
i += len;
buf[i] = 0;
i += 1;
*buffer += i;
}
static void decode_header(struct Message *msg, const uint8_t **buffer)
{
msg->id = get16bits(buffer);
uint32_t fields = get16bits(buffer);
msg->qr = (fields & QR_MASK) >> 15;
msg->opcode = (fields & OPCODE_MASK) >> 11;
msg->aa = (fields & AA_MASK) >> 10;
msg->tc = (fields & TC_MASK) >> 9;
msg->rd = (fields & RD_MASK) >> 8;
msg->ra = (fields & RA_MASK) >> 7;
msg->rcode = (fields & RCODE_MASK) >> 0;
msg->qdCount = get16bits(buffer);
msg->anCount = get16bits(buffer);
msg->nsCount = get16bits(buffer);
msg->arCount = get16bits(buffer);
}
static void encode_header(struct Message *msg, uint8_t **buffer)
{
put16bits(buffer, msg->id);
int fields = 0;
fields |= (msg->qr << 15) & QR_MASK;
fields |= (msg->rcode << 0) & RCODE_MASK;
// TODO: insert the rest of the fields
put16bits(buffer, fields);
put16bits(buffer, msg->qdCount);
put16bits(buffer, msg->anCount);
put16bits(buffer, msg->nsCount);
put16bits(buffer, msg->arCount);
}
int decode_msg(struct Message *msg, const uint8_t *buffer, int size)
{
unsigned int i;
decode_header(msg, &buffer);
if (msg->anCount != 0 || msg->nsCount != 0) {
printf("Only questions expected!\n");
return -1;
}
// parse questions
uint32_t qcount = msg->qdCount;
for (i = 0; i < qcount; ++i) {
struct Question *q = malloc(sizeof(struct Question));
q->qName = decode_domain_name(&buffer, size);
q->qType = get16bits(&buffer);
q->qClass = get16bits(&buffer);
if (q->qName == NULL) {
printf("Failed to decode domain name!\n");
return -1;
}
// prepend question to questions list
q->next = msg->questions;
msg->questions = q;
}
// We do not expect any resource records to parse here.
return 0;
}
// For every question in the message add a appropiate resource record
// in either section 'answers', 'authorities' or 'additionals'.
void resolve_query(struct Message *msg)
{
struct ResourceRecord *beg;
struct ResourceRecord *rr;
struct Question *q;
int rc;
// leave most values intact for response
msg->qr = 1; // this is a response
msg->aa = 1; // this server is authoritative
msg->ra = 0; // no recursion available
msg->rcode = Ok_ResponseType;
// should already be 0
msg->anCount = 0;
msg->nsCount = 0;
msg->arCount = 0;
// for every question append resource records
q = msg->questions;
while (q) {
rr = malloc(sizeof(struct ResourceRecord)); //malloc
memset(rr, 0, sizeof(struct ResourceRecord));
rr->name = strdup(q->qName);
rr->type = q->qType;
rr->class = q->qClass;
rr->ttl = 60*60; // in seconds; 0 means no caching
//printf("Query for '%s'\n", q->qName);
// We only can only answer two question types so far
// and the answer (resource records) will be all put
// into the answers list.
// This behavior is probably non-standard!
switch (q->qType) {
case A_Resource_RecordType:
rr->rd_length = 4;
rc = get_A_Record(rr->rd_data.a_record.addr, q->qName);
if (rc < 0)
{
free(rr->name);
free(rr);
goto next;
}
break;
case AAAA_Resource_RecordType:
rr->rd_length = 16;
rc = get_AAAA_Record(rr->rd_data.aaaa_record.addr, q->qName);
if (rc < 0)
{
free(rr->name);
free(rr);
goto next;
}
break;
case TXT_Resource_RecordType:
rc = get_TXT_Record(&(rr->rd_data.txt_record.txt_data), q->qName);
if (rc < 0) {
free(rr->name);
free(rr);
goto next;
}
int txt_data_len = strlen(rr->rd_data.txt_record.txt_data);
rr->rd_length = txt_data_len + 1;
rr->rd_data.txt_record.txt_data_len = txt_data_len;
break;
/*
case NS_Resource_RecordType:
case CNAME_Resource_RecordType:
case SOA_Resource_RecordType:
case PTR_Resource_RecordType:
case MX_Resource_RecordType:
case TXT_Resource_RecordType:
*/
default:
free(rr);
msg->rcode = NotImplemented_ResponseType;
printf("Cannot answer question of type %d.\n", q->qType);
goto next;
}
msg->anCount++;
// prepend resource record to answers list
beg = msg->answers;
msg->answers = rr;
rr->next = beg;
// jump here to omit question
next:
// process next question
q = q->next;
}
}
/* @return 0 upon failure, 1 upon success */
static int encode_resource_records(struct ResourceRecord *rr, uint8_t **buffer)
{
int i;
while (rr) {
// Answer questions by attaching resource sections.
encode_domain_name(buffer, rr->name);
put16bits(buffer, rr->type);
put16bits(buffer, rr->class);
put32bits(buffer, rr->ttl);
put16bits(buffer, rr->rd_length);
switch (rr->type) {
case A_Resource_RecordType:
for(i = 0; i < 4; ++i)
put8bits(buffer, rr->rd_data.a_record.addr[i]);
break;
case AAAA_Resource_RecordType:
for(i = 0; i < 16; ++i)
put8bits(buffer, rr->rd_data.aaaa_record.addr[i]);
break;
case TXT_Resource_RecordType:
put8bits(buffer, rr->rd_data.txt_record.txt_data_len);
for(i = 0; i < rr->rd_data.txt_record.txt_data_len; i++)
put8bits(buffer, rr->rd_data.txt_record.txt_data[i]);
break;
default:
fprintf(stderr, "Unknown type %u. => Ignore resource record.\n", rr->type);
return 1;
}
rr = rr->next;
}
return 0;
}
/* @return 0 upon failure, 1 upon success */
int encode_msg(struct Message *msg, uint8_t **buffer)
{
struct Question *q;
int rc;
encode_header(msg, buffer);
q = msg->questions;
while (q) {
encode_domain_name(buffer, q->qName);
put16bits(buffer, q->qType);
put16bits(buffer, q->qClass);
q = q->next;
}
rc = 0;
rc |= encode_resource_records(msg->answers, buffer);
rc |= encode_resource_records(msg->authorities, buffer);
rc |= encode_resource_records(msg->additionals, buffer);
return rc;
}
void free_resource_records(struct ResourceRecord *rr)
{
struct ResourceRecord *next;
while (rr) {
free(rr->name);
next = rr->next;
free(rr);
rr = next;
}
}
void free_questions(struct Question *qq)
{
struct Question *next;
while (qq) {
free(qq->qName);
next = qq->next;
free(qq);
qq = next;
}
}
(dpdk_udp.c)
#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_mbuf.h>
#include <rte_kni.h>
#include <stdio.h>
#include <arpa/inet.h>
#include "dns.h"
#define MBUF_NUMBER 8196
#define MBUF_SIZE 32
#define ENABLE_SEND 1
#define ENABLE_KNI_APP 1
#define ENABLE_DNS_APP 1
#define ENABLE_PROMISCUOUS 0
#define DNS_UDP_PORT 53
int gDpdkPortId = 0;
#if ENABLE_KNI_APP
struct rte_kni *global_kni = NULL;
#endif
//
#if ENABLE_SEND
static uint8_t gSrcMac[RTE_ETHER_ADDR_LEN];
static uint8_t gDstMac[RTE_ETHER_ADDR_LEN];
// 192.168.1.123
static uint32_t gSrcIp;
static uint32_t gDstIp;
static uint16_t gSrcPort;
static uint16_t gDstPort;
#endif
//int encode_udp_pkt()
#if ENABLE_KNI_APP
static int g_config_network_if(uint16_t port_id, uint8_t if_up) {
if (!rte_eth_dev_is_valid_port(port_id)) {
return -EINVAL;
}
int ret = 0;
if (if_up) {
rte_eth_dev_stop(port_id);
ret = rte_eth_dev_start(port_id);
} else {
rte_eth_dev_stop(port_id);
}
if (ret < 0) {
printf("Failed to start port : %d\n", port_id);
}
return 0;
}
#endif
#if ENABLE_SEND
static struct rte_mbuf *alloc_udp_pkt(struct rte_mempool *pool, uint8_t *data,
uint16_t length) {
// 32, 2048 + hdrsize
struct rte_mbuf *mbuf = rte_pktmbuf_alloc(pool); //
if (!mbuf) {
rte_exit(EXIT_FAILURE, "rte_pktmbuf_alloc error\n");
}
mbuf->pkt_len = length + sizeof(struct rte_ipv4_hdr) + sizeof(struct rte_ether_hdr);
mbuf->data_len = length + sizeof(struct rte_ipv4_hdr) + sizeof(struct rte_ether_hdr);
uint8_t *msg = rte_pktmbuf_mtod(mbuf, uint8_t*);
// ether
struct rte_ether_hdr *eth = (struct rte_ether_hdr *)msg;
rte_memcpy(eth->s_addr.addr_bytes, gSrcMac, RTE_ETHER_ADDR_LEN);
rte_memcpy(eth->d_addr.addr_bytes, gDstMac, RTE_ETHER_ADDR_LEN);
eth->ether_type = htons(RTE_ETHER_TYPE_IPV4);
// 6 +
/* 6 bytes 6 bytes 2 bytes
+----------+----------+------+
| src mac | dst mac | type |
+----------+----------+------+
*/
// iphdr
struct rte_ipv4_hdr *ip = (struct rte_ipv4_hdr *)(msg + sizeof(struct rte_ether_hdr));
ip->version_ihl = 0x45;
ip->type_of_service = 0;
ip->total_length = htons(length + sizeof(struct rte_ipv4_hdr));
ip->packet_id = 0;
ip->fragment_offset = 0;
ip->time_to_live = 64; // ttl = 64
ip->next_proto_id = IPPROTO_UDP;
ip->src_addr = gSrcIp;
ip->dst_addr = gDstIp;
ip->hdr_checksum = 0;
ip->hdr_checksum = rte_ipv4_cksum(ip);
// udphdr
struct rte_udp_hdr *udp = (struct rte_udp_hdr *)(msg + sizeof(struct rte_ether_hdr) + sizeof(struct rte_ipv4_hdr));
udp->src_port = gSrcPort;
udp->dst_port = gDstPort;
//uint16_t udplen = length - sizeof(struct rte_ether_hdr) - sizeof(struct rte_ipv4_hdr);
udp->dgram_len = htons(length);
rte_memcpy((uint8_t*)(udp+1), data, length-sizeof(struct rte_udp_hdr));
udp->dgram_cksum = 0;
udp->dgram_cksum = rte_ipv4_udptcp_cksum(ip, udp);
return mbuf;
}
#endif
// 192.168.1.26
// echo 1 > /sys/devices/virtual/net/vEth0/carrier
// ifconfig vEth0 192.168.1.33 up
int main(int argc, char *argv[]) {
// 4G, hugepage, bind pci
if (rte_eal_init(argc, argv) < 0) {
rte_exit(EXIT_FAILURE, "Error\n");
}
//per_lcore_socket_id;
struct rte_mempool *mbuf_pool = rte_pktmbuf_pool_create("mbufpool", MBUF_NUMBER,0,0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
if (!mbuf_pool) {
rte_exit(EXIT_FAILURE, "mbuf Error\n");
}
#if ENABLE_KNI_APP
if (-1 == rte_kni_init(gDpdkPortId)) {
rte_exit(EXIT_FAILURE, "kni init failed\n");
}
#endif
// setup
uint16_t nb_rx_queues = 1;
#if ENABLE_SEND
uint16_t nb_tx_queues = 1;
#else
uint16_t nb_tx_queues = 0;
#endif
const struct rte_eth_conf port_conf_default = {
.rxmode = {.max_rx_pkt_len = RTE_ETHER_MAX_LEN }
};
rte_eth_dev_configure(gDpdkPortId, nb_rx_queues, nb_tx_queues, &port_conf_default);
rte_eth_rx_queue_setup(gDpdkPortId, 0, 128,
rte_eth_dev_socket_id(gDpdkPortId), NULL, mbuf_pool);
#if ENABLE_SEND
rte_eth_tx_queue_setup(gDpdkPortId, 0, 1024, rte_eth_dev_socket_id(gDpdkPortId),
NULL);
#endif
rte_eth_dev_start(gDpdkPortId);
// disable
#if ENABLE_PROMISCUOUS
rte_eth_promiscuous_enable(gDpdkPortId); //
#endif
#if ENABLE_KNI_APP
struct rte_kni_conf conf;
memset(&conf, 0, sizeof(conf));
snprintf(conf.name, RTE_KNI_NAMESIZE, "vEth%d", gDpdkPortId);
conf.group_id = gDpdkPortId;
conf.mbuf_size = RTE_MBUF_DEFAULT_BUF_SIZE;
//conf.
rte_eth_macaddr_get(gDpdkPortId, (struct rte_ether_addr*)conf.mac_addr);
rte_eth_dev_get_mtu(gDpdkPortId, &conf.mtu);
struct rte_kni_ops ops;
memset(&ops, 0, sizeof(ops));
ops.port_id = gDpdkPortId;
ops.config_network_if = g_config_network_if;
global_kni = rte_kni_alloc(mbuf_pool, &conf, &ops);
#endif
#if ENABLE_DNS_APP
struct Message msg;
memset(&msg, 0, sizeof(struct Message));
#endif
while (1) {
unsigned num_recvd = 0;
unsigned i = 0;
#if ENABLE_KNI_APP
struct rte_mbuf *kni_burst[MBUF_SIZE];
num_recvd = rte_kni_rx_burst(global_kni, kni_burst, MBUF_SIZE);
if (num_recvd > MBUF_SIZE) {
rte_exit(EXIT_FAILURE, "rte_kni_rx_burst Error\n");
}
unsigned nb_tx = rte_eth_tx_burst(gDpdkPortId, 0, kni_burst, num_recvd);
if (nb_tx < num_recvd) {
for (i = nb_tx;i < num_recvd;i ++) {
rte_pktmbuf_free(kni_burst[i]);
kni_burst[i] = NULL;
}
}
#endif
struct rte_mbuf *mbufs[MBUF_SIZE];
num_recvd = rte_eth_rx_burst(gDpdkPortId, 0, mbufs, MBUF_SIZE);
if (num_recvd > MBUF_SIZE) {
rte_exit(EXIT_FAILURE, "rte_eth_rx_burst Error\n");
}
for (i = 0;i < num_recvd;i ++) {
struct rte_ether_hdr *ehdr = rte_pktmbuf_mtod(mbufs[i], struct rte_ether_hdr *);
if (ehdr->ether_type != rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
#if ENABLE_KNI_APP
rte_kni_tx_burst(global_kni, &mbufs[i], 1);
rte_kni_handle_request(global_kni);
#endif
continue;
}
struct rte_ipv4_hdr *iphdr = rte_pktmbuf_mtod_offset(mbufs[i], struct rte_ipv4_hdr *, sizeof(struct rte_ether_hdr));
if (iphdr->next_proto_id == IPPROTO_UDP) {
struct rte_udp_hdr* udphdr = (struct rte_udp_hdr*)(iphdr + 1);
#if ENABLE_DNS_APP
if (ntohs(udphdr->dst_port) == DNS_UDP_PORT) { //dns
// dns -->
printf("dns request\n");
rte_memcpy(gSrcMac, ehdr->d_addr.addr_bytes, RTE_ETHER_ADDR_LEN);
rte_memcpy(gDstMac, ehdr->s_addr.addr_bytes, RTE_ETHER_ADDR_LEN);
rte_memcpy(&gSrcIp, &iphdr->dst_addr, sizeof(uint32_t));
rte_memcpy(&gDstIp, &iphdr->src_addr, sizeof(uint32_t));
rte_memcpy(&gSrcPort, &udphdr->dst_port, sizeof(uint16_t));
rte_memcpy(&gDstPort, &udphdr->src_port, sizeof(uint16_t));
uint16_t length = ntohs(udphdr->dgram_len);
uint16_t nbytes = length - sizeof(struct rte_udp_hdr);
uint8_t *data = (uint8_t*)(udphdr + 1);
// -->
free_questions(msg.questions);
free_resource_records(msg.answers);
free_resource_records(msg.authorities);
free_resource_records(msg.additionals);
memset(&msg, 0, sizeof(struct Message));
if (decode_msg(&msg, data, nbytes) != 0) {
rte_pktmbuf_free(mbufs[i]); //
continue;
}
resolve_query(&msg);
uint8_t *p = data;
if (encode_msg(&msg, &p) != 0) {
rte_pktmbuf_free(mbufs[i]);
continue;
}
uint16_t len = p - data;
struct rte_mbuf *mbuf = alloc_udp_pkt(mbuf_pool, data, len+sizeof(struct rte_udp_hdr));
rte_eth_tx_burst(gDpdkPortId, 0, &mbuf, 1);
}
#endif
else if (ntohs(udphdr->dst_port) != 8888) {
rte_pktmbuf_free(mbufs[i]);
continue;
}
uint16_t length = ntohs(udphdr->dgram_len);
*((char*) udphdr + length) = '\0';
struct in_addr addr;
addr.s_addr = iphdr->src_addr;
printf("src: %s:%d, ", inet_ntoa(addr), ntohs(udphdr->src_port));
addr.s_addr = iphdr->dst_addr;
printf("dst: %s:%d, %s\n", inet_ntoa(addr), ntohs(udphdr->dst_port),
(char *)(udphdr+1));
#if ENABLE_SEND
rte_memcpy(gSrcMac, ehdr->d_addr.addr_bytes, RTE_ETHER_ADDR_LEN);
rte_memcpy(gDstMac, ehdr->s_addr.addr_bytes, RTE_ETHER_ADDR_LEN);
rte_memcpy(&gSrcIp, &iphdr->dst_addr, sizeof(uint32_t));
rte_memcpy(&gDstIp, &iphdr->src_addr, sizeof(uint32_t));
rte_memcpy(&gSrcPort, &udphdr->dst_port, sizeof(uint16_t));
rte_memcpy(&gDstPort, &udphdr->src_port, sizeof(uint16_t));
//
//length + sizeof(struct iphdr)
struct rte_mbuf *mbuf = alloc_udp_pkt(mbuf_pool, (uint8_t*)(udphdr+1), length);
rte_eth_tx_burst(gDpdkPortId, 0, &mbuf, 1);
#endif
} else {
#if ENABLE_KNI_APP
rte_kni_tx_burst(global_kni, &mbufs[i], 1);
#endif
}
}
#if ENABLE_KNI_APP
rte_kni_handle_request(global_kni);
#endif
}
}
Makefile
# binary name
APP = dpdk_udp
# all source are stored in SRCS-y
SRCS-y := dpdk_udp.c dpdk-dns.c
# Build using pkg-config variables if possible
ifeq ($(shell pkg-config --exists libdpdk && echo 0),0)
all: shared
.PHONY: shared static
shared: build/$(APP)-shared
ln -sf $(APP)-shared build/$(APP)
static: build/$(APP)-static
ln -sf $(APP)-static build/$(APP)
PKGCONF=pkg-config --define-prefix
PC_FILE := $(shell $(PKGCONF) --path libdpdk)
CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk)
LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk)
LDFLAGS_STATIC = -Wl,-Bstatic $(shell $(PKGCONF) --static --libs libdpdk)
build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build
$(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED)
build/$(APP)-static: $(SRCS-y) Makefile $(PC_FILE) | build
$(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_STATIC)
build:
@mkdir -p $@
.PHONY: clean
clean:
rm -f build/$(APP) build/$(APP)-static build/$(APP)-shared
test -d build && rmdir -p build || true
else
ifeq ($(RTE_SDK),)
$(error "Please define RTE_SDK environment variable")
endif
# Default target, detect a build directory, by looking for a path with a .config
RTE_TARGET ?= $(notdir $(abspath $(dir $(firstword $(wildcard $(RTE_SDK)/*/.config)))))
include $(RTE_SDK)/mk/rte.vars.mk
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS)
include $(RTE_SDK)/mk/rte.extapp.mk
endif
注意:dpdk-dns.c隐含一个问题,它使用malloc来分配内存,这是不合适的,应该改为rte_malloc(…)来申请内存。
4.1、 编译和执行
编译:
make
运行:
./build/dpdk_udp
启动网卡:
ifconfig vEth0 192.168.7.26 up
设置可读写内核:
echo 1 > /sys/devices/virtual/net/vEth0/carrier
总结
(1)DPDK单独使用时,主要用于测试网卡性能,一般DPDK搭配协议栈使用(DPDK+协议栈+适配层+应用),这会集成协议栈,可以应用在路由器、网关、防火墙等。
(2)DPDK不仅可以处理DNS协议,也可以处理http、mqtt等协议。但是,这些协议不仅DPDK可以做,像redis、mysql等也可以做;使用DPDK来开发的场景:
- 网卡性能瓶颈导致,DNS、NTP等协议主要是查表,可以使用DPDK提高网卡性能 。
- 协议比较简单。
- 没有磁盘操作,或磁盘操作较少。