基于Intel DPDK的pkt reply实现

====================================================

main.h

====================================================

#ifndef __MAIN_H__

#define __MAIN_H__

/* Useful macro for error handling */
#define FATAL_ERROR(fmt, args...)       rte_exit(EXIT_FAILURE, fmt "\n", ##args)

/* Function prototypes */
static int main_loop_producer(__attribute__((unused)) void * arg);
static void sig_handler(int signo);
static void init_port(int i);
static int parse_args(int argc, char **argv);
void print_stats (void);
void alarm_routine (__attribute__((unused)) int unused);
int isPowerOfTwo (unsigned int x);

/* RSS Symmetrical 40 Byte seed (Shinae Woo, KyoungSoo Park from KAIST) */
uint8_t rss_symmetric_seed [] =
{
    0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
    0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
    0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
    0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
    0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a
};

/* RSS Original 40 Byte seed (Shinae Woo, KyoungSoo Park from KAIST) */
uint8_t rss_original_seed [] =
{
    0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
    0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
    0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
    0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
    0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
};


/* This seed is to load balance only respect source IP, according to me (Martino Trevisan, from nowhere particular) */
uint8_t rss_seed_src_ip [] = 
{  
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};
/* This seed is to load balance only destination source IP, according to me (Martino Trevisan, from nowhere particular) */
uint8_t rss_seed_dst_ip [] = 
{  
    0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x00,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};


/* Struct for devices configuration for const defines see rte_ethdev.h */
static const struct rte_eth_conf port_conf = {
    .rxmode = {
        .mq_mode = ETH_MQ_RX_RSS,   /* Enable RSS */
    },
    .txmode = {
        .mq_mode = ETH_MQ_TX_NONE,
    },
    .rx_adv_conf = {
        .rss_conf = {
            .rss_key = rss_symmetric_seed,  /* Set the symmetric or original seed   */
            .rss_key_len = 40,              /* and the seed length.                 */
            .rss_hf = (ETH_RSS_IPV4_TCP | ETH_RSS_UDP) ,    /* Set the mask of protocols RSS will be applied to     */
        }   
    }
};

/* Struct for configuring each rx queue. These are default values */
static const struct rte_eth_rxconf rx_conf = {
    .rx_thresh = {
        .pthresh = 8,   /* Ring prefetch threshold */
        .hthresh = 8,   /* Ring host threshold */
        .wthresh = 4,   /* Ring writeback threshold */
    },
    .rx_free_thresh = 32,    /* Immediately free RX descriptors */
};

/* Struct for configuring each tx queue. These are default values */
static const struct rte_eth_txconf tx_conf = {
    .tx_thresh = {
        .pthresh = 36,  /* Ring prefetch threshold */
        .hthresh = 0,   /* Ring host threshold */
        .wthresh = 0,   /* Ring writeback threshold */
    },
    .tx_free_thresh = 0,    /* Use PMD default values */
    .txq_flags = ETH_TXQ_FLAGS_NOOFFLOADS | ETH_TXQ_FLAGS_NOMULTSEGS,  /* IMPORTANT for vmxnet3, otherwise it won't work */
    .tx_rs_thresh = 0,      /* Use PMD default values */
};


struct pcap_hdr_t {
        uint32_t magic_number;   /* magic number */
        uint16_t version_major;  /* major version number */
        uint16_t version_minor;  /* minor version number */
        int32_t  thiszone;       /* GMT to local correction */
        uint32_t sigfigs;        /* accuracy of timestamps */
        uint32_t snaplen;        /* max length of captured packets, in octets */
        uint32_t network;        /* data link type */
} ;

struct pcaprec_hdr_t {
   uint32_t ts_sec;         /* timestamp seconds */
   uint32_t ts_usec;        /* timestamp microseconds */
   uint32_t incl_len;       /* number of octets of packet saved in file */
   uint32_t orig_len;       /* actual length of packet */
} ;

#endif

================ ==== 华丽丽的分割线============================

DPDK main.c

====================================================

#define _GNU_SOURCE
#include <pcap/pcap.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <signal.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <libgen.h>
#include <sys/queue.h>
#include <sys/syscall.h>
#include <math.h>
#include <sched.h>
#include <pthread.h>
#include <unistd.h>

#include <rte_common.h>
#include <rte_log.h>
#include <rte_memory.h>
#include <rte_memcpy.h>
#include <rte_memzone.h>
#include <rte_tailq.h>
#include <rte_errno.h>
#include <rte_eal.h>
#include <rte_per_lcore.h>
#include <rte_launch.h>
#include <rte_lcore.h>
#include <rte_branch_prediction.h>
#include <rte_interrupts.h>
#include <rte_pci.h>
#include <rte_debug.h>
#include <rte_ether.h>
#include <rte_ip.h>
#include <rte_ethdev.h>
#include <rte_ring.h>
#include <rte_log.h>
#include <rte_mempool.h>
#include <rte_mbuf.h>
#include <rte_string_fns.h>
#include <rte_cycles.h>
#include <rte_atomic.h>
#include <rte_version.h>

#include "main.h"

/* Constants of the system */
#define MEMPOOL_NAME "cluster_mem_pool"          // Name of the NICs' mem_pool, useless comment....
#define MEMPOOL_ELEM_SZ 2048                     // Power of two greater than 1500
#define MEMPOOL_CACHE_SZ 512                    // Max is 512

#define RX_QUEUE_SZ 256         // The size of rx queue. Max is 4096 and is the one you'll have best performances with. Use lower if you want to use Burst Bulk Alloc.
#define TX_QUEUE_SZ 4096            // Unused, you don't tx packets

#define uint64_t unsigned long long int

/* Global vars */
char * file_name = NULL;
pcap_t *pt;
int times = 1;
int64_t time_out = 0;
uint64_t buffer_size = 1048576;
uint64_t max_pkt = 0;
int do_shutdown = 0;
int sum_value = 0;
double rate = 0;
pcap_t *pd;
int nb_sys_ports;
static struct rte_mempool * pktmbuf_pool;

uint64_t num_pkt_good_sent = 0;
uint64_t num_bytes_good_sent = 0;
uint64_t old_num_pkt_good_sent = 0;
uint64_t old_num_bytes_good_sent = 0;

struct timeval start_time;
struct timeval last_time;

void print_stats (void)
{
    int ret;
    struct timeval now_time;
    double delta_ms;
    double tot_ms;
    double gbps_inst, gbps_tot, mpps_inst, mpps_tot;

    /* Get actual time */
    ret = gettimeofday (&now_time, NULL);
    if (ret != 0)
    {
        FATAL_ERROR("Error: gettimeofday failed. Quitting...\n");
    }
    
    /* Compute stats */
    delta_ms =  (now_time.tv_sec - last_time.tv_sec ) * 1000 + (now_time.tv_usec - last_time.tv_usec ) / 1000 ;
    tot_ms = (now_time.tv_sec - start_time.tv_sec ) * 1000 + (now_time.tv_usec - start_time.tv_usec ) / 1000 ;
    gbps_inst = (double)(num_bytes_good_sent - old_num_bytes_good_sent)/delta_ms/1000000*8;
    gbps_tot = (double)(num_bytes_good_sent)/tot_ms/1000000*8;
    mpps_inst = (double)(num_pkt_good_sent - old_num_pkt_good_sent)/delta_ms/1000;
    mpps_tot = (double)(num_pkt_good_sent)/tot_ms/1000;

    printf("Rate: %8.3fGbps  %8.3fMpps [Average rate: %8.3fGbps  %8.3fMpps]\n", gbps_inst, mpps_inst, gbps_tot, mpps_tot);

    /* Update counters */
    old_num_bytes_good_sent = num_bytes_good_sent;
    old_num_pkt_good_sent = num_pkt_good_sent;
    last_time = now_time;

}

void alarm_routine (__attribute__((unused)) int unused){

    /* If the program is quitting don't print anymore */
    if(do_shutdown) return;

    /* Print per port stats */
    print_stats();

    /* Schedule an other print */
    alarm(1);
    signal(SIGALRM, alarm_routine);

}


/* Signal handling function */
static void sig_handler(int signo)
{
    uint64_t diff;
    int ret;
    struct timeval t_end;

    /* Catch just SIGINT */
    if (signo == SIGINT){

        /* Signal the shutdown */
        do_shutdown=1;

        /* Print the per stats  */
        printf("\n\nQUITTING...\n");
        ret = gettimeofday(&t_end, NULL);
        if (ret != 0) FATAL_ERROR("Error: gettimeofday failed. Quitting...\n");     
        diff = t_end.tv_sec - start_time.tv_sec;
        printf("The replay lasted %ld seconds. Sent %ld packets on every interface\n", diff, num_pkt_good_sent);
        print_stats();

        /* Close the pcap file */
        pcap_close(pt);
        exit(0);    
    }
}

/* Loop function, batch timing implemented */
static int main_loop_producer(__attribute__((unused)) void * arg){
    struct rte_mbuf * m;
    struct pcap_pkthdr *h;
    struct timeval now;
    struct ipv4_hdr * ip_h;
    void * pkt;
    double mult_start = 0, mult = 0, real_rate, deltaMillisec;
    char ebuf[256];
    int i, ix, ret;
    uint64_t tick_start;

    /* Open the trace */
    printf("Opening file: %s\n", file_name);
    printf("Replay on %d interface(s)\n", nb_sys_ports);
    pt = pcap_open_offline(file_name, ebuf);
    if (pt == NULL){    
        printf("Unable to open file: %s\n", file_name);
        exit(1);            
    }   

    /* Prepare variables to rate setting if needed */
    if(rate != 0){
        mult_start = (double )rte_get_tsc_hz  () / 1000000000L; 
        mult = mult_start;
        ix = 0;
    }
    
    /* Init start time */
    ret = gettimeofday(&start_time, NULL);
    if (ret != 0) FATAL_ERROR("Error: gettimeofday failed. Quitting...\n");
    last_time = start_time;
    tick_start =   rte_get_tsc_cycles();

    /* Start stats */
    alarm(1);
    for (i=0;i<nb_sys_ports; i++)
        rte_eth_stats_reset ( i );

    /* Infinite loop */
    for (;;) {

        /* If the system is quitting, break the cycle */
        if (do_shutdown)
            break;

        /* Read packet from trace */
        ret = pcap_next_ex(pt, &h, (const u_char**)&pkt);
        if(ret <= 0) break;
    
        /* For each received packet. */
        for (i = 0; likely( i < nb_sys_ports * times ) ; i++) {

            /* Add a number to ip address if needed */
            ip_h = (struct ipv4_hdr*)((char*)pkt + sizeof(struct  ether_hdr));
            if (sum_value > 0){
                ip_h->src_addr+=sum_value*256*256*256;
                ip_h->dst_addr+=sum_value*256*256*256;
            }

            /* Alloc the buffer */
            m =  rte_pktmbuf_alloc  (pktmbuf_pool);

            /* Compile the buffer */
            m->data_len = m->pkt_len = h->caplen;
            rte_memcpy ( (char*) m->buf_addr + m->data_off, pkt, h->caplen);

            /* Loop untill it is not sent */
            while ( rte_eth_tx_burst (i / times, 0, &m , 1) != 1)
                if (unlikely(do_shutdown)) break;
        }

        /* Rate set */
        if(rate > 0) {
            /* Adjust the rate every 100 packets sent */
            if (ix++%1 ==0){
                /* Calculate the actual rate */
                ret = gettimeofday(&now, NULL);
                if (ret != 0) FATAL_ERROR("Error: gettimeofday failed. Quitting...\n");

                deltaMillisec = (double)(now.tv_sec - start_time.tv_sec ) * 1000 + (double)(now.tv_usec - start_time.tv_usec ) / 1000 ;
                real_rate = (double)(num_bytes_good_sent * 1000)/deltaMillisec * 8/(1000*1000*1000);
                mult = mult + (real_rate - rate); // CONTROL LAW;

                /* Avoid negative numbers. Avoid problems when the NICs are stuck for a while */
                if (mult < 0) mult = 0;
            }
            /* Wait to adjust the rate*/
            while(( rte_get_tsc_cycles() - tick_start) < (num_bytes_good_sent * mult / rate )) 
                if (unlikely(do_shutdown)) break;
        }

        /* Update stats */
        num_pkt_good_sent+= times;
        num_bytes_good_sent += (h->caplen + 24) * times; /* 8 Preamble + 4 CRC + 12 IFG*/

        /* Check if time_out elapsed*/
        if (time_out != 0){
            ret = gettimeofday(&now, NULL);
            if (ret != 0) FATAL_ERROR("Error: gettimeofday failed. Quitting...\n");

            if (now.tv_sec-start_time.tv_sec >= time_out ){
                printf("Timeout of %ld seconds elapsed...\n", time_out);
                sig_handler(SIGINT);
            }
                
        }

        /* Check if max_pkt have been sent */
        if (max_pkt != 0 && num_pkt_good_sent >= max_pkt){
            printf("Sent %ld packets...\n", max_pkt);
            sig_handler(SIGINT);
        }

    }

    sig_handler(SIGINT);
    return 0;
}


int isPowerOfTwo (unsigned int x)
{
  return ((x != 0) && !(x & (x - 1)));
}

/*parse args*/
static int parse_args(int argc, char **argv)
{
    int option;

    /* Retrive arguments */
    while ((option = getopt(argc, argv,"f:s:r:B:C:t:T:")) != -1) 
    {
        switch (option) 
        {
            case 'f' : 
                file_name = strdup(optarg); /* File name, mandatory */
                break;
            case 's': 
                sum_value = atol (optarg); /* Sum this value each time duplicate a packet */
                break;
            case 'B': 
                buffer_size = atol (optarg); /* Buffer size in packets. Must be a power of two . Default is 1048576 */
                break;
            case 'r': 
                rate = atof (optarg); /* Rate in Gbps */
                break;
            case 't': 
                times = atoi (optarg); /* Times to send a packet */
                break;
            case 'T':
                time_out = atol (optarg); /* Timeout of the replay in seconds. Quit after it is reached */
                break;
            case 'C': 
                max_pkt = atof (optarg); /* Max packets before quitting */
                break;

            default: 
                return -1; 
        }
    }

    /* Returning bad value in case of wrong arguments */
    if((NULL == file_name) || (1 != isPowerOfTwo (buffer_size)))
    {
        return -1;
    }
    
    return 0;
}

/* Init each port with the configuration contained in the structs. Every interface has nb_sys_cores queues */
static void init_port(int iPortid) 
{
    int iRet = 0;
    uint8_t rss_key [40];
    struct rte_eth_link link;
    struct rte_eth_dev_info dev_info;
    struct rte_eth_rss_conf rss_conf;
    struct rte_eth_fdir fdir_conf;

    /* Retreiving and printing device infos */
    rte_eth_dev_info_get(i, &dev_info);
    printf("Name:%s\n\tDriver name: %s\n\tMax rx queues: %d\n\tMax tx queues: %d\n", 
           dev_info.pci_dev->driver->name,dev_info.driver_name, dev_info.max_rx_queues, dev_info.max_tx_queues);
    printf("\tPCI Adress: %04d:%02d:%02x:%01d\n", 
           dev_info.pci_dev->addr.domain, dev_info.pci_dev->addr.bus, dev_info.pci_dev->addr.devid, dev_info.pci_dev->addr.function);

    /* Configure device with '1' rx queues and 1 tx queue */
    iRet = rte_eth_dev_configure (iPortid, 1, 1, &port_conf);
    if (0 > iRet)
    {
        rte_panic("Error: rte_eth_dev_configure failed. Ret = %d\n", iRet);
    }
    
    /* For each RX queue in each NIC */
    /* Configure rx queue j of current device on current NUMA socket. It takes elements from the mempool */
    iRet = rte_eth_rx_queue_setup (iPortid, 0, RX_QUEUE_SZ, rte_socket_id(), &rx_conf, pktmbuf_pool);
    if (0 > iRet) 
    {
        FATAL_ERROR("Error rte_eth_rx_queue_setup failed. Ret = %d\n", iRet);
    }
    
    /* Configure port mapping */
    iRet = rte_eth_dev_set_rx_queue_stats_mapping (iPortid, 0, 0);
    if (0 > iRet) 
    {
        FATAL_ERROR("Error: rte_eth_dev_set_rx_queue_stats_mapping failed. Port : %d Ret= %d\n", iPortid, iRet);
    }

    /* Configure DPDK tx queue */
    iRet = rte_eth_tx_queue_setup (iPortid, 0, TX_QUEUE_SZ, rte_socket_id(), &tx_conf);
    if (0 > iRet)
    {
        FATAL_ERROR ("Error: rte_eth_tx_queue_setup failed. iRet: %d (%d bad arg, %d no mem)\n", iRet, EINVAL ,ENOMEM);
    }
    
    /* for DPDK to start all dev port */        
    iRet = rte_eth_dev_start (iPortid);
    if (0 > iRet) 
    {
        FATAL_ERROR("Error: rte_eth_dev_start failed. iRet = %d\n", iRet);
    }
    
    /* Enable receipt in promiscuous mode for an Ethernet device */
    rte_eth_promiscuous_enable(iPortid);

    /* DPDK check this port link status. */
    rte_eth_link_get_nowait(iPortid, &link);
    if (link.link_status)
    {
        printf("\tPort %d Link Up - speed %u Mbps - %s\n", (uint8_t)iPortid, (unsigned)link.link_speed,
                (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?("full-duplex") : ("half-duplex\n"));
    }
    else
    {
        printf("\tPort %d Link Down\n",(uint8_t)iPortid);
    }
    
    /* Using RSS to improve tx or rx packets. */
    rss_conf.rss_key = rss_key;
    iRet = rte_eth_dev_rss_hash_conf_get(iPortid, &rss_conf);
    if (0 != iRet) 
    {
        printf("\tDevice DOES NOT support RSS.\n");
    }
    
    /* DPDK Flow-Director Support or Not */
    iRet = rte_eth_dev_fdir_get_infos(iPortid, &fdir_conf);
    if (0 != iRet)
    {
        printf ("\tDevice DOES NOT Support Flow-Director.\n"); 
    }
}


/* Modified by fangtong */
int main(int argc, char **argv)
{
    int iRet = 0;
    int iportid;
    int nb_lcores = 0;

    /* Init EAL */
    iRet = rte_eal_init(argc, argv);
    if (iRet < 0)
    {
        FATAL_ERROR("Init EAL Failed. ret=%d \n", iRet);
    }
    
    argc -= iRet;
    argv += iRet;

    /* Signal */
    signal(SIGINT, sig_handler);
    signal(SIGALRM, alarm_routine);

    /* Check if this application can only use 1 core*/
    nb_lcores = rte_lcore_count ();
    if (1 != nb_lcores) 
    {
        FATAL_ERROR("This application needs exactly 1 cores.\n");
    }
    
    /* Parse arguments */
    iRet = parse_args(argc, argv);
    if (iRet < 0)
    {
        FATAL_ERROR("Invalid parameters.\n");
    }
    
#if 0
    /* Probe PCI bus for ethernet devices, mandatory only in DPDK < 1.8.0 */
#if RTE_VER_MAJOR == 1 && RTE_VER_MINOR < 8
        iRet = rte_eal_pci_probe();
        if (iRet < 0) FATAL_ERROR("Cannot probe PCI\n");
#endif
#endif

    /* check that there is an even number of ports to send/recv on.*/
    nb_sys_ports = rte_eth_dev_count();
    if (!nb_sys_ports)
    {
        FATAL_ERROR("No dpdk ports found!, nPorts=%d\n", nb_sys_ports);
    }
    
    
    /* Create a mempool with per-core cache, initializing every element for be used as mbuf, and allocating on the current NUMA node */
    pktmbuf_pool = rte_mempool_create (MEMPOOL_NAME, buffer_size-1, MEMPOOL_ELEM_SZ, MEMPOOL_CACHE_SZ, 
                                        sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init, NULL, 
                                        rte_pktmbuf_init, NULL, rte_socket_id(), 0);
    if (NULL == pktmbuf_pool)
    {
        FATAL_ERROR("ERROR: Create mbuf pool failed. \n");
    }
    
    /* Operate all ports */         
    for(iportid = 0; iportid < nb_sys_ports; iportid++)
    {
        init_port(iportid);
    }
    
    /* ... and then loop in consumer */
    main_loop_producer (NULL);    

    return 0;
}

转载于:https://my.oschina.net/u/3253217/blog/1817595

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值