====================================================
main.h
====================================================
#ifndef __MAIN_H__
#define __MAIN_H__
/* Useful macro for error handling */
#define FATAL_ERROR(fmt, args...) rte_exit(EXIT_FAILURE, fmt "\n", ##args)
/* Function prototypes */
static int main_loop_producer(__attribute__((unused)) void * arg);
static void sig_handler(int signo);
static void init_port(int i);
static int parse_args(int argc, char **argv);
void print_stats (void);
void alarm_routine (__attribute__((unused)) int unused);
int isPowerOfTwo (unsigned int x);
/* RSS Symmetrical 40 Byte seed (Shinae Woo, KyoungSoo Park from KAIST) */
uint8_t rss_symmetric_seed [] =
{
0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a
};
/* RSS Original 40 Byte seed (Shinae Woo, KyoungSoo Park from KAIST) */
uint8_t rss_original_seed [] =
{
0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
};
/* This seed is to load balance only respect source IP, according to me (Martino Trevisan, from nowhere particular) */
uint8_t rss_seed_src_ip [] =
{
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};
/* This seed is to load balance only destination source IP, according to me (Martino Trevisan, from nowhere particular) */
uint8_t rss_seed_dst_ip [] =
{
0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};
/* Struct for devices configuration for const defines see rte_ethdev.h */
static const struct rte_eth_conf port_conf = {
.rxmode = {
.mq_mode = ETH_MQ_RX_RSS, /* Enable RSS */
},
.txmode = {
.mq_mode = ETH_MQ_TX_NONE,
},
.rx_adv_conf = {
.rss_conf = {
.rss_key = rss_symmetric_seed, /* Set the symmetric or original seed */
.rss_key_len = 40, /* and the seed length. */
.rss_hf = (ETH_RSS_IPV4_TCP | ETH_RSS_UDP) , /* Set the mask of protocols RSS will be applied to */
}
}
};
/* Struct for configuring each rx queue. These are default values */
static const struct rte_eth_rxconf rx_conf = {
.rx_thresh = {
.pthresh = 8, /* Ring prefetch threshold */
.hthresh = 8, /* Ring host threshold */
.wthresh = 4, /* Ring writeback threshold */
},
.rx_free_thresh = 32, /* Immediately free RX descriptors */
};
/* Struct for configuring each tx queue. These are default values */
static const struct rte_eth_txconf tx_conf = {
.tx_thresh = {
.pthresh = 36, /* Ring prefetch threshold */
.hthresh = 0, /* Ring host threshold */
.wthresh = 0, /* Ring writeback threshold */
},
.tx_free_thresh = 0, /* Use PMD default values */
.txq_flags = ETH_TXQ_FLAGS_NOOFFLOADS | ETH_TXQ_FLAGS_NOMULTSEGS, /* IMPORTANT for vmxnet3, otherwise it won't work */
.tx_rs_thresh = 0, /* Use PMD default values */
};
struct pcap_hdr_t {
uint32_t magic_number; /* magic number */
uint16_t version_major; /* major version number */
uint16_t version_minor; /* minor version number */
int32_t thiszone; /* GMT to local correction */
uint32_t sigfigs; /* accuracy of timestamps */
uint32_t snaplen; /* max length of captured packets, in octets */
uint32_t network; /* data link type */
} ;
struct pcaprec_hdr_t {
uint32_t ts_sec; /* timestamp seconds */
uint32_t ts_usec; /* timestamp microseconds */
uint32_t incl_len; /* number of octets of packet saved in file */
uint32_t orig_len; /* actual length of packet */
} ;
#endif
================ ==== 华丽丽的分割线============================
DPDK main.c
====================================================
#define _GNU_SOURCE
#include <pcap/pcap.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <signal.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <libgen.h>
#include <sys/queue.h>
#include <sys/syscall.h>
#include <math.h>
#include <sched.h>
#include <pthread.h>
#include <unistd.h>
#include <rte_common.h>
#include <rte_log.h>
#include <rte_memory.h>
#include <rte_memcpy.h>
#include <rte_memzone.h>
#include <rte_tailq.h>
#include <rte_errno.h>
#include <rte_eal.h>
#include <rte_per_lcore.h>
#include <rte_launch.h>
#include <rte_lcore.h>
#include <rte_branch_prediction.h>
#include <rte_interrupts.h>
#include <rte_pci.h>
#include <rte_debug.h>
#include <rte_ether.h>
#include <rte_ip.h>
#include <rte_ethdev.h>
#include <rte_ring.h>
#include <rte_log.h>
#include <rte_mempool.h>
#include <rte_mbuf.h>
#include <rte_string_fns.h>
#include <rte_cycles.h>
#include <rte_atomic.h>
#include <rte_version.h>
#include "main.h"
/* Constants of the system */
#define MEMPOOL_NAME "cluster_mem_pool" // Name of the NICs' mem_pool, useless comment....
#define MEMPOOL_ELEM_SZ 2048 // Power of two greater than 1500
#define MEMPOOL_CACHE_SZ 512 // Max is 512
#define RX_QUEUE_SZ 256 // The size of rx queue. Max is 4096 and is the one you'll have best performances with. Use lower if you want to use Burst Bulk Alloc.
#define TX_QUEUE_SZ 4096 // Unused, you don't tx packets
#define uint64_t unsigned long long int
/* Global vars */
char * file_name = NULL;
pcap_t *pt;
int times = 1;
int64_t time_out = 0;
uint64_t buffer_size = 1048576;
uint64_t max_pkt = 0;
int do_shutdown = 0;
int sum_value = 0;
double rate = 0;
pcap_t *pd;
int nb_sys_ports;
static struct rte_mempool * pktmbuf_pool;
uint64_t num_pkt_good_sent = 0;
uint64_t num_bytes_good_sent = 0;
uint64_t old_num_pkt_good_sent = 0;
uint64_t old_num_bytes_good_sent = 0;
struct timeval start_time;
struct timeval last_time;
void print_stats (void)
{
int ret;
struct timeval now_time;
double delta_ms;
double tot_ms;
double gbps_inst, gbps_tot, mpps_inst, mpps_tot;
/* Get actual time */
ret = gettimeofday (&now_time, NULL);
if (ret != 0)
{
FATAL_ERROR("Error: gettimeofday failed. Quitting...\n");
}
/* Compute stats */
delta_ms = (now_time.tv_sec - last_time.tv_sec ) * 1000 + (now_time.tv_usec - last_time.tv_usec ) / 1000 ;
tot_ms = (now_time.tv_sec - start_time.tv_sec ) * 1000 + (now_time.tv_usec - start_time.tv_usec ) / 1000 ;
gbps_inst = (double)(num_bytes_good_sent - old_num_bytes_good_sent)/delta_ms/1000000*8;
gbps_tot = (double)(num_bytes_good_sent)/tot_ms/1000000*8;
mpps_inst = (double)(num_pkt_good_sent - old_num_pkt_good_sent)/delta_ms/1000;
mpps_tot = (double)(num_pkt_good_sent)/tot_ms/1000;
printf("Rate: %8.3fGbps %8.3fMpps [Average rate: %8.3fGbps %8.3fMpps]\n", gbps_inst, mpps_inst, gbps_tot, mpps_tot);
/* Update counters */
old_num_bytes_good_sent = num_bytes_good_sent;
old_num_pkt_good_sent = num_pkt_good_sent;
last_time = now_time;
}
void alarm_routine (__attribute__((unused)) int unused){
/* If the program is quitting don't print anymore */
if(do_shutdown) return;
/* Print per port stats */
print_stats();
/* Schedule an other print */
alarm(1);
signal(SIGALRM, alarm_routine);
}
/* Signal handling function */
static void sig_handler(int signo)
{
uint64_t diff;
int ret;
struct timeval t_end;
/* Catch just SIGINT */
if (signo == SIGINT){
/* Signal the shutdown */
do_shutdown=1;
/* Print the per stats */
printf("\n\nQUITTING...\n");
ret = gettimeofday(&t_end, NULL);
if (ret != 0) FATAL_ERROR("Error: gettimeofday failed. Quitting...\n");
diff = t_end.tv_sec - start_time.tv_sec;
printf("The replay lasted %ld seconds. Sent %ld packets on every interface\n", diff, num_pkt_good_sent);
print_stats();
/* Close the pcap file */
pcap_close(pt);
exit(0);
}
}
/* Loop function, batch timing implemented */
static int main_loop_producer(__attribute__((unused)) void * arg){
struct rte_mbuf * m;
struct pcap_pkthdr *h;
struct timeval now;
struct ipv4_hdr * ip_h;
void * pkt;
double mult_start = 0, mult = 0, real_rate, deltaMillisec;
char ebuf[256];
int i, ix, ret;
uint64_t tick_start;
/* Open the trace */
printf("Opening file: %s\n", file_name);
printf("Replay on %d interface(s)\n", nb_sys_ports);
pt = pcap_open_offline(file_name, ebuf);
if (pt == NULL){
printf("Unable to open file: %s\n", file_name);
exit(1);
}
/* Prepare variables to rate setting if needed */
if(rate != 0){
mult_start = (double )rte_get_tsc_hz () / 1000000000L;
mult = mult_start;
ix = 0;
}
/* Init start time */
ret = gettimeofday(&start_time, NULL);
if (ret != 0) FATAL_ERROR("Error: gettimeofday failed. Quitting...\n");
last_time = start_time;
tick_start = rte_get_tsc_cycles();
/* Start stats */
alarm(1);
for (i=0;i<nb_sys_ports; i++)
rte_eth_stats_reset ( i );
/* Infinite loop */
for (;;) {
/* If the system is quitting, break the cycle */
if (do_shutdown)
break;
/* Read packet from trace */
ret = pcap_next_ex(pt, &h, (const u_char**)&pkt);
if(ret <= 0) break;
/* For each received packet. */
for (i = 0; likely( i < nb_sys_ports * times ) ; i++) {
/* Add a number to ip address if needed */
ip_h = (struct ipv4_hdr*)((char*)pkt + sizeof(struct ether_hdr));
if (sum_value > 0){
ip_h->src_addr+=sum_value*256*256*256;
ip_h->dst_addr+=sum_value*256*256*256;
}
/* Alloc the buffer */
m = rte_pktmbuf_alloc (pktmbuf_pool);
/* Compile the buffer */
m->data_len = m->pkt_len = h->caplen;
rte_memcpy ( (char*) m->buf_addr + m->data_off, pkt, h->caplen);
/* Loop untill it is not sent */
while ( rte_eth_tx_burst (i / times, 0, &m , 1) != 1)
if (unlikely(do_shutdown)) break;
}
/* Rate set */
if(rate > 0) {
/* Adjust the rate every 100 packets sent */
if (ix++%1 ==0){
/* Calculate the actual rate */
ret = gettimeofday(&now, NULL);
if (ret != 0) FATAL_ERROR("Error: gettimeofday failed. Quitting...\n");
deltaMillisec = (double)(now.tv_sec - start_time.tv_sec ) * 1000 + (double)(now.tv_usec - start_time.tv_usec ) / 1000 ;
real_rate = (double)(num_bytes_good_sent * 1000)/deltaMillisec * 8/(1000*1000*1000);
mult = mult + (real_rate - rate); // CONTROL LAW;
/* Avoid negative numbers. Avoid problems when the NICs are stuck for a while */
if (mult < 0) mult = 0;
}
/* Wait to adjust the rate*/
while(( rte_get_tsc_cycles() - tick_start) < (num_bytes_good_sent * mult / rate ))
if (unlikely(do_shutdown)) break;
}
/* Update stats */
num_pkt_good_sent+= times;
num_bytes_good_sent += (h->caplen + 24) * times; /* 8 Preamble + 4 CRC + 12 IFG*/
/* Check if time_out elapsed*/
if (time_out != 0){
ret = gettimeofday(&now, NULL);
if (ret != 0) FATAL_ERROR("Error: gettimeofday failed. Quitting...\n");
if (now.tv_sec-start_time.tv_sec >= time_out ){
printf("Timeout of %ld seconds elapsed...\n", time_out);
sig_handler(SIGINT);
}
}
/* Check if max_pkt have been sent */
if (max_pkt != 0 && num_pkt_good_sent >= max_pkt){
printf("Sent %ld packets...\n", max_pkt);
sig_handler(SIGINT);
}
}
sig_handler(SIGINT);
return 0;
}
int isPowerOfTwo (unsigned int x)
{
return ((x != 0) && !(x & (x - 1)));
}
/*parse args*/
static int parse_args(int argc, char **argv)
{
int option;
/* Retrive arguments */
while ((option = getopt(argc, argv,"f:s:r:B:C:t:T:")) != -1)
{
switch (option)
{
case 'f' :
file_name = strdup(optarg); /* File name, mandatory */
break;
case 's':
sum_value = atol (optarg); /* Sum this value each time duplicate a packet */
break;
case 'B':
buffer_size = atol (optarg); /* Buffer size in packets. Must be a power of two . Default is 1048576 */
break;
case 'r':
rate = atof (optarg); /* Rate in Gbps */
break;
case 't':
times = atoi (optarg); /* Times to send a packet */
break;
case 'T':
time_out = atol (optarg); /* Timeout of the replay in seconds. Quit after it is reached */
break;
case 'C':
max_pkt = atof (optarg); /* Max packets before quitting */
break;
default:
return -1;
}
}
/* Returning bad value in case of wrong arguments */
if((NULL == file_name) || (1 != isPowerOfTwo (buffer_size)))
{
return -1;
}
return 0;
}
/* Init each port with the configuration contained in the structs. Every interface has nb_sys_cores queues */
static void init_port(int iPortid)
{
int iRet = 0;
uint8_t rss_key [40];
struct rte_eth_link link;
struct rte_eth_dev_info dev_info;
struct rte_eth_rss_conf rss_conf;
struct rte_eth_fdir fdir_conf;
/* Retreiving and printing device infos */
rte_eth_dev_info_get(i, &dev_info);
printf("Name:%s\n\tDriver name: %s\n\tMax rx queues: %d\n\tMax tx queues: %d\n",
dev_info.pci_dev->driver->name,dev_info.driver_name, dev_info.max_rx_queues, dev_info.max_tx_queues);
printf("\tPCI Adress: %04d:%02d:%02x:%01d\n",
dev_info.pci_dev->addr.domain, dev_info.pci_dev->addr.bus, dev_info.pci_dev->addr.devid, dev_info.pci_dev->addr.function);
/* Configure device with '1' rx queues and 1 tx queue */
iRet = rte_eth_dev_configure (iPortid, 1, 1, &port_conf);
if (0 > iRet)
{
rte_panic("Error: rte_eth_dev_configure failed. Ret = %d\n", iRet);
}
/* For each RX queue in each NIC */
/* Configure rx queue j of current device on current NUMA socket. It takes elements from the mempool */
iRet = rte_eth_rx_queue_setup (iPortid, 0, RX_QUEUE_SZ, rte_socket_id(), &rx_conf, pktmbuf_pool);
if (0 > iRet)
{
FATAL_ERROR("Error rte_eth_rx_queue_setup failed. Ret = %d\n", iRet);
}
/* Configure port mapping */
iRet = rte_eth_dev_set_rx_queue_stats_mapping (iPortid, 0, 0);
if (0 > iRet)
{
FATAL_ERROR("Error: rte_eth_dev_set_rx_queue_stats_mapping failed. Port : %d Ret= %d\n", iPortid, iRet);
}
/* Configure DPDK tx queue */
iRet = rte_eth_tx_queue_setup (iPortid, 0, TX_QUEUE_SZ, rte_socket_id(), &tx_conf);
if (0 > iRet)
{
FATAL_ERROR ("Error: rte_eth_tx_queue_setup failed. iRet: %d (%d bad arg, %d no mem)\n", iRet, EINVAL ,ENOMEM);
}
/* for DPDK to start all dev port */
iRet = rte_eth_dev_start (iPortid);
if (0 > iRet)
{
FATAL_ERROR("Error: rte_eth_dev_start failed. iRet = %d\n", iRet);
}
/* Enable receipt in promiscuous mode for an Ethernet device */
rte_eth_promiscuous_enable(iPortid);
/* DPDK check this port link status. */
rte_eth_link_get_nowait(iPortid, &link);
if (link.link_status)
{
printf("\tPort %d Link Up - speed %u Mbps - %s\n", (uint8_t)iPortid, (unsigned)link.link_speed,
(link.link_duplex == ETH_LINK_FULL_DUPLEX) ?("full-duplex") : ("half-duplex\n"));
}
else
{
printf("\tPort %d Link Down\n",(uint8_t)iPortid);
}
/* Using RSS to improve tx or rx packets. */
rss_conf.rss_key = rss_key;
iRet = rte_eth_dev_rss_hash_conf_get(iPortid, &rss_conf);
if (0 != iRet)
{
printf("\tDevice DOES NOT support RSS.\n");
}
/* DPDK Flow-Director Support or Not */
iRet = rte_eth_dev_fdir_get_infos(iPortid, &fdir_conf);
if (0 != iRet)
{
printf ("\tDevice DOES NOT Support Flow-Director.\n");
}
}
/* Modified by fangtong */
int main(int argc, char **argv)
{
int iRet = 0;
int iportid;
int nb_lcores = 0;
/* Init EAL */
iRet = rte_eal_init(argc, argv);
if (iRet < 0)
{
FATAL_ERROR("Init EAL Failed. ret=%d \n", iRet);
}
argc -= iRet;
argv += iRet;
/* Signal */
signal(SIGINT, sig_handler);
signal(SIGALRM, alarm_routine);
/* Check if this application can only use 1 core*/
nb_lcores = rte_lcore_count ();
if (1 != nb_lcores)
{
FATAL_ERROR("This application needs exactly 1 cores.\n");
}
/* Parse arguments */
iRet = parse_args(argc, argv);
if (iRet < 0)
{
FATAL_ERROR("Invalid parameters.\n");
}
#if 0
/* Probe PCI bus for ethernet devices, mandatory only in DPDK < 1.8.0 */
#if RTE_VER_MAJOR == 1 && RTE_VER_MINOR < 8
iRet = rte_eal_pci_probe();
if (iRet < 0) FATAL_ERROR("Cannot probe PCI\n");
#endif
#endif
/* check that there is an even number of ports to send/recv on.*/
nb_sys_ports = rte_eth_dev_count();
if (!nb_sys_ports)
{
FATAL_ERROR("No dpdk ports found!, nPorts=%d\n", nb_sys_ports);
}
/* Create a mempool with per-core cache, initializing every element for be used as mbuf, and allocating on the current NUMA node */
pktmbuf_pool = rte_mempool_create (MEMPOOL_NAME, buffer_size-1, MEMPOOL_ELEM_SZ, MEMPOOL_CACHE_SZ,
sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init, NULL,
rte_pktmbuf_init, NULL, rte_socket_id(), 0);
if (NULL == pktmbuf_pool)
{
FATAL_ERROR("ERROR: Create mbuf pool failed. \n");
}
/* Operate all ports */
for(iportid = 0; iportid < nb_sys_ports; iportid++)
{
init_port(iportid);
}
/* ... and then loop in consumer */
main_loop_producer (NULL);
return 0;
}