dpdk example——l3fwd

DPDK 专栏收录该内容
6 篇文章 0 订阅

https://www.cnblogs.com/ZCplayground/p/9381961.html

l3fwd example分析

编译

export RTE_KERNELDIR=/usr/src/linux-xxx
make T=x86_64-native-linuxapp-gcc O=x86_64-native-linuxapp-gcc -j16
make examples T=x86_64-native-linuxapp-gcc O=x86_64-native-linuxapp-gcc -j16

基本使用

$./build/l3fwd [EAL options] --
                -p PORTMASK [-P] [-E] [-L]
                --config(port,queue,lcore)[,(port,queue,lcore)]
                [--eth-dest=X,MM:MM:MM:MM:MM:MM]
                [--enable-jumbo [--max-pkt-len PKTLEN]]
                [--no-numa]
                [--hash-entry-num 0x0n]
                [--ipv6]
                [--parse-ptype]
-E: selects the Exact Match lookup method.
-L: selects the LPM lookup method.

example图示

  • 显示了L3fwd应用程序的框图,该应用程序用于使用两个端口从流量生成器转发数据包。
    1
  • 最长前缀匹配(LPM)是一种表搜索方法,通常用于查找IP转发应用程序中的最佳路由匹配。L3fwd应用程序静态地配置一组规则,并在初始化时将它们加载到LPM对象中。默认情况下,L3fwd有一个静态定义的目标LPM表,包含8条路由(L3fwd使用包的IPv4目的地地址来标识下一跳; LPM表的输出端口ID。它还可以基于IPv6地址路由(从DPDK 17.05)。如下图:
    2
  • 精确匹配(EM)是一种基于哈希的表搜索方法,用于查找IP转发应用程序中的最佳路由匹配。在EM查找中,搜索键由五个元组值表示,即源IP地址、目标IP地址、源端口、目标端口和协议。应用程序使用的流集是静态配置的,并在初始化时加载到hash对象中。默认情况下,L3fwd有一个静态定义的目标EM表,有4条路由,如下图:
    3

代码分析

main.c

/*-
 *   BSD LICENSE
 *
 *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
 *   All rights reserved.
 *
 *   Redistribution and use in source and binary forms, with or without
 *   modification, are permitted provided that the following conditions
 *   are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in
 *       the documentation and/or other materials provided with the
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its
 *       contributors may be used to endorse or promote products derived
 *       from this software without specific prior written permission.
 *
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include <sys/types.h>
#include <string.h>
#include <sys/queue.h>
#include <stdarg.h>
#include <errno.h>
#include <getopt.h>
#include <signal.h>
#include <stdbool.h>

#include <rte_common.h>
#include <rte_vect.h>
#include <rte_byteorder.h>
#include <rte_log.h>
#include <rte_memory.h>
#include <rte_memcpy.h>
#include <rte_memzone.h>
#include <rte_eal.h>
#include <rte_per_lcore.h>
#include <rte_launch.h>
#include <rte_atomic.h>
#include <rte_cycles.h>
#include <rte_prefetch.h>
#include <rte_lcore.h>
#include <rte_per_lcore.h>
#include <rte_branch_prediction.h>
#include <rte_interrupts.h>
#include <rte_pci.h>
#include <rte_random.h>
#include <rte_debug.h>
#include <rte_ether.h>
#include <rte_ethdev.h>
#include <rte_ring.h>
#include <rte_mempool.h>
#include <rte_mbuf.h>
#include <rte_ip.h>
#include <rte_tcp.h>
#include <rte_udp.h>
#include <rte_string_fns.h>
#include <rte_cpuflags.h>

#include <cmdline_parse.h>
#include <cmdline_parse_etheraddr.h>

#include "l3fwd.h"

/*
 * Configurable number of RX/TX ring descriptors
 */
#define RTE_TEST_RX_DESC_DEFAULT 128
#define RTE_TEST_TX_DESC_DEFAULT 512

#define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS
#define MAX_RX_QUEUE_PER_PORT 128

#define MAX_LCORE_PARAMS 1024

/* Static global variables used within this file. */
static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;

/**< Ports set in promiscuous mode off by default. */
static int promiscuous_on;

/* Select Longest-Prefix or Exact match. */
static int l3fwd_lpm_on;
static int l3fwd_em_on;

static int numa_on = 1; /**< NUMA is enabled by default. */
static int parse_ptype; /**< Parse packet type using rx callback, and */
            /**< disabled by default */

/* Global variables. */

volatile bool force_quit;

/* ethernet addresses of ports */
uint64_t dest_eth_addr[RTE_MAX_ETHPORTS];
struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];

xmm_t val_eth[RTE_MAX_ETHPORTS];

/* mask of enabled ports */
uint32_t enabled_port_mask;

/* Used only in exact match mode. */
int ipv6; /**< ipv6 is false by default. */
uint32_t hash_entry_number = HASH_ENTRY_NUMBER_DEFAULT;

struct lcore_conf lcore_conf[RTE_MAX_LCORE];

struct lcore_params {
    uint8_t port_id;
    uint8_t queue_id;
    uint8_t lcore_id;
} __rte_cache_aligned;

static struct lcore_params lcore_params_array[MAX_LCORE_PARAMS];
static struct lcore_params lcore_params_array_default[] = {
    {0, 0, 2},
    {0, 1, 2},
    {0, 2, 2},
    {1, 0, 2},
    {1, 1, 2},
    {1, 2, 2},
    {2, 0, 2},
    {3, 0, 3},
    {3, 1, 3},
};

static struct lcore_params * lcore_params = lcore_params_array_default;
static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) /
                sizeof(lcore_params_array_default[0]);

static struct rte_eth_conf port_conf = {
    .rxmode = {
        .mq_mode = ETH_MQ_RX_RSS,
        .max_rx_pkt_len = ETHER_MAX_LEN,
        .split_hdr_size = 0,
        .header_split   = 0, /**< Header Split disabled */
        .hw_ip_checksum = 1, /**< IP checksum offload enabled */
        .hw_vlan_filter = 0, /**< VLAN filtering disabled */
        .jumbo_frame    = 0, /**< Jumbo Frame Support disabled */
        .hw_strip_crc   = 0, /**< CRC stripped by hardware */
    },
    .rx_adv_conf = {
        .rss_conf = {
            .rss_key = NULL,
            .rss_hf = ETH_RSS_IP,
        },
    },
    .txmode = {
        .mq_mode = ETH_MQ_TX_NONE,
    },
};

static struct rte_mempool * pktmbuf_pool[NB_SOCKETS];

struct l3fwd_lkp_mode {
    void  (*setup)(int);
    int   (*check_ptype)(int);
    rte_rx_callback_fn cb_parse_ptype;
    int   (*main_loop)(void *);
    void* (*get_ipv4_lookup_struct)(int);
    void* (*get_ipv6_lookup_struct)(int);
};

static struct l3fwd_lkp_mode l3fwd_lkp;

static struct l3fwd_lkp_mode l3fwd_em_lkp = {
    .setup                  = setup_hash,
    .check_ptype        = em_check_ptype,
    .cb_parse_ptype        = em_cb_parse_ptype,
    .main_loop              = em_main_loop,
    .get_ipv4_lookup_struct = em_get_ipv4_l3fwd_lookup_struct,
    .get_ipv6_lookup_struct = em_get_ipv6_l3fwd_lookup_struct,
};

static struct l3fwd_lkp_mode l3fwd_lpm_lkp = {
    .setup                  = setup_lpm,
    .check_ptype        = lpm_check_ptype,
    .cb_parse_ptype        = lpm_cb_parse_ptype,
    .main_loop              = lpm_main_loop,
    .get_ipv4_lookup_struct = lpm_get_ipv4_l3fwd_lookup_struct,
    .get_ipv6_lookup_struct = lpm_get_ipv6_l3fwd_lookup_struct,
};

/*
 * Setup lookup methods for forwarding.
 * Currently exact-match and longest-prefix-match
 * are supported ones.
 */
static void
setup_l3fwd_lookup_tables(void)
{
    /* Setup HASH lookup functions. */
    if (l3fwd_em_on)
        l3fwd_lkp = l3fwd_em_lkp;
    /* Setup LPM lookup functions. */
    else
        l3fwd_lkp = l3fwd_lpm_lkp;
}

static int
check_lcore_params(void)
{
    uint8_t queue, lcore;
    uint16_t i;
    int socketid;

    for (i = 0; i < nb_lcore_params; ++i) {
        queue = lcore_params[i].queue_id;
        if (queue >= MAX_RX_QUEUE_PER_PORT) {
            printf("invalid queue number: %hhu\n", queue);
            return -1;
        }
        lcore = lcore_params[i].lcore_id;
        if (!rte_lcore_is_enabled(lcore)) {
            printf("error: lcore %hhu is not enabled in lcore mask\n", lcore);
            return -1;
        }
        if ((socketid = rte_lcore_to_socket_id(lcore) != 0) &&
            (numa_on == 0)) {
            printf("warning: lcore %hhu is on socket %d with numa off \n",
                lcore, socketid);
        }
    }
    return 0;
}

static int
check_port_config(const unsigned nb_ports)
{
    unsigned portid;
    uint16_t i;

    for (i = 0; i < nb_lcore_params; ++i) {
        portid = lcore_params[i].port_id;
        if ((enabled_port_mask & (1 << portid)) == 0) {
            printf("port %u is not enabled in port mask\n", portid);
            return -1;
        }
        if (portid >= nb_ports) {
            printf("port %u is not present on the board\n", portid);
            return -1;
        }
    }
    return 0;
}

static uint8_t
get_port_n_rx_queues(const uint8_t port)
{
    int queue = -1;
    uint16_t i;

    for (i = 0; i < nb_lcore_params; ++i) {
        if (lcore_params[i].port_id == port) {
            if (lcore_params[i].queue_id == queue+1)
                queue = lcore_params[i].queue_id;
            else
                rte_exit(EXIT_FAILURE, "queue ids of the port %d must be"
                        " in sequence and must start with 0\n",
                        lcore_params[i].port_id);
        }
    }
    return (uint8_t)(++queue);
}

static int
init_lcore_rx_queues(void)
{
    uint16_t i, j, nb_rx_queue;
    uint8_t lcore;

    for (i = 0; i < nb_lcore_params; ++i) {
        lcore = lcore_params[i].lcore_id;
        nb_rx_queue = lcore_conf[lcore].n_rx_queue;

        if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) {
            printf("error: too many queues (%u) for lcore: %u\n",
                (unsigned)nb_rx_queue + 1, (unsigned)lcore);
            return -1;
        } else {
            lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id =
                lcore_params[i].port_id;
            lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id =
                lcore_params[i].queue_id;
            lcore_conf[lcore].n_rx_queue++;
        }
    }

    // dump出每个逻辑核的收发收包队列分配
    printf("RTE_MAX_LCORE = %d\n", RTE_MAX_LCORE);
    for (i = 0; i < RTE_MAX_LCORE; ++i) {
        for (j = 0; j < lcore_conf[i].n_rx_queue; j++) {
            printf("lcore_conf[%d]: rx_q_idx = %d, queue_id = %d, rx_port_id=%d\n", i,  \
            j, lcore_conf[i].rx_queue_list[j].queue_id, lcore_conf[i].rx_queue_list[j].port_id);
        }
    }
    return 0;
}

/* display usage */
static void
print_usage(const char *prgname)
{
    printf ("%s [EAL options] -- -p PORTMASK -P"
        "  [--config (port,queue,lcore)[,(port,queue,lcore]]"
        "  [--enable-jumbo [--max-pkt-len PKTLEN]]\n"
        "  -p PORTMASK: hexadecimal bitmask of ports to configure\n"
        "  -P : enable promiscuous mode\n"
        "  -E : enable exact match\n"
        "  -L : enable longest prefix match\n"
        "  --config (port,queue,lcore): rx queues configuration\n"
        "  --eth-dest=X,MM:MM:MM:MM:MM:MM: optional, ethernet destination for port X\n"
        "  --no-numa: optional, disable numa awareness\n"
        "  --ipv6: optional, specify it if running ipv6 packets\n"
        "  --enable-jumbo: enable jumbo frame"
        " which max packet len is PKTLEN in decimal (64-9600)\n"
        "  --hash-entry-num: specify the hash entry number in hexadecimal to be setup\n",
        prgname);
}

static int
parse_max_pkt_len(const char *pktlen)
{
    char *end = NULL;
    unsigned long len;

    /* parse decimal string */
    len = strtoul(pktlen, &end, 10);
    if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0'))
        return -1;

    if (len == 0)
        return -1;

    return len;
}

static int
parse_portmask(const char *portmask)
{
    char *end = NULL;
    unsigned long pm;

    /* parse hexadecimal string */
    pm = strtoul(portmask, &end, 16);
    if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
        return -1;

    if (pm == 0)
        return -1;

    return pm;
}

static int
parse_hash_entry_number(const char *hash_entry_num)
{
    char *end = NULL;
    unsigned long hash_en;
    /* parse hexadecimal string */
    hash_en = strtoul(hash_entry_num, &end, 16);
    if ((hash_entry_num[0] == '\0') || (end == NULL) || (*end != '\0'))
        return -1;

    if (hash_en == 0)
        return -1;

    return hash_en;
}

static int
parse_config(const char *q_arg)
{
    char s[256];
    const char *p, *p0 = q_arg;
    char *end;
    enum fieldnames {
        FLD_PORT = 0,
        FLD_QUEUE,
        FLD_LCORE,
        _NUM_FLD
    };
    unsigned long int_fld[_NUM_FLD];
    char *str_fld[_NUM_FLD];
    int i;
    unsigned size;

    nb_lcore_params = 0;

    while ((p = strchr(p0,'(')) != NULL) {
        ++p;
        if((p0 = strchr(p,')')) == NULL)
            return -1;

        size = p0 - p;
        if(size >= sizeof(s))
            return -1;

        snprintf(s, sizeof(s), "%.*s", size, p);
        if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD)
            return -1;
        for (i = 0; i < _NUM_FLD; i++){
            errno = 0;
            int_fld[i] = strtoul(str_fld[i], &end, 0);
            if (errno != 0 || end == str_fld[i] || int_fld[i] > 255)
                return -1;
        }
        if (nb_lcore_params >= MAX_LCORE_PARAMS) {
            printf("exceeded max number of lcore params: %hu\n",
                nb_lcore_params);
            return -1;
        }
        lcore_params_array[nb_lcore_params].port_id =
            (uint8_t)int_fld[FLD_PORT];
        lcore_params_array[nb_lcore_params].queue_id =
            (uint8_t)int_fld[FLD_QUEUE];
        lcore_params_array[nb_lcore_params].lcore_id =
            (uint8_t)int_fld[FLD_LCORE];
        ++nb_lcore_params;
    }
    lcore_params = lcore_params_array;
    return 0;
}

static void
parse_eth_dest(const char *optarg)
{
    uint8_t portid;
    char *port_end;
    uint8_t c, *dest, peer_addr[6];

    errno = 0;
    portid = strtoul(optarg, &port_end, 10);
    if (errno != 0 || port_end == optarg || *port_end++ != ',')
        rte_exit(EXIT_FAILURE,
        "Invalid eth-dest: %s", optarg);
    if (portid >= RTE_MAX_ETHPORTS)
        rte_exit(EXIT_FAILURE,
        "eth-dest: port %d >= RTE_MAX_ETHPORTS(%d)\n",
        portid, RTE_MAX_ETHPORTS);

    if (cmdline_parse_etheraddr(NULL, port_end,
        &peer_addr, sizeof(peer_addr)) < 0)
        rte_exit(EXIT_FAILURE,
        "Invalid ethernet address: %s\n",
        port_end);
    dest = (uint8_t *)&dest_eth_addr[portid];
    for (c = 0; c < 6; c++)
        dest[c] = peer_addr[c];
    *(uint64_t *)(val_eth + portid) = dest_eth_addr[portid];
}

#define MAX_JUMBO_PKT_LEN  9600
#define MEMPOOL_CACHE_SIZE 256

#define CMD_LINE_OPT_CONFIG "config"
#define CMD_LINE_OPT_ETH_DEST "eth-dest"
#define CMD_LINE_OPT_NO_NUMA "no-numa"
#define CMD_LINE_OPT_IPV6 "ipv6"
#define CMD_LINE_OPT_ENABLE_JUMBO "enable-jumbo"
#define CMD_LINE_OPT_HASH_ENTRY_NUM "hash-entry-num"
#define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype"

/*
 * This expression is used to calculate the number of mbufs needed
 * depending on user input, taking  into account memory for rx and
 * tx hardware rings, cache per lcore and mtable per port per lcore.
 * RTE_MAX is used to ensure that NB_MBUF never goes below a minimum
 * value of 8192
 */
#define NB_MBUF RTE_MAX(    \
    (nb_ports*nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT +    \
    nb_ports*nb_lcores*MAX_PKT_BURST +            \
    nb_ports*n_tx_queue*RTE_TEST_TX_DESC_DEFAULT +        \
    nb_lcores*MEMPOOL_CACHE_SIZE),                \
    (unsigned)8192)

/* Parse the argument given in the command line of the application */
static int
parse_args(int argc, char **argv)
{
    int opt, ret;
    char **argvopt;
    int option_index;
    char *prgname = argv[0];
    static struct option lgopts[] = {
        {CMD_LINE_OPT_CONFIG, 1, 0, 0},
        {CMD_LINE_OPT_ETH_DEST, 1, 0, 0},
        {CMD_LINE_OPT_NO_NUMA, 0, 0, 0},
        {CMD_LINE_OPT_IPV6, 0, 0, 0},
        {CMD_LINE_OPT_ENABLE_JUMBO, 0, 0, 0},
        {CMD_LINE_OPT_HASH_ENTRY_NUM, 1, 0, 0},
        {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0},
        {NULL, 0, 0, 0}
    };

    argvopt = argv;

    /* Error or normal output strings. */
    const char *str1 = "L3FWD: Invalid portmask";
    const char *str2 = "L3FWD: Promiscuous mode selected";
    const char *str3 = "L3FWD: Exact match selected";
    const char *str4 = "L3FWD: Longest-prefix match selected";
    const char *str5 = "L3FWD: Invalid config";
    const char *str6 = "L3FWD: NUMA is disabled";
    const char *str7 = "L3FWD: IPV6 is specified";
    const char *str8 =
        "L3FWD: Jumbo frame is enabled - disabling simple TX path";
    const char *str9 = "L3FWD: Invalid packet length";
    const char *str10 = "L3FWD: Set jumbo frame max packet len to ";
    const char *str11 = "L3FWD: Invalid hash entry number";
    const char *str12 =
        "L3FWD: LPM and EM are mutually exclusive, select only one";
    const char *str13 = "L3FWD: LPM or EM none selected, default LPM on";

    while ((opt = getopt_long(argc, argvopt, "p:PLE",
                lgopts, &option_index)) != EOF) {

        switch (opt) {
        /* portmask */
        case 'p':
            enabled_port_mask = parse_portmask(optarg);
            if (enabled_port_mask == 0) {
                printf("%s\n", str1);
                print_usage(prgname);
                return -1;
            }
            break;
        case 'P':
            printf("%s\n", str2);
            promiscuous_on = 1;
            break;

        case 'E':
            printf("%s\n", str3);
            l3fwd_em_on = 1;
            break;

        case 'L':
            printf("%s\n", str4);
            l3fwd_lpm_on = 1;
            break;

        /* long options */
        case 0:
            if (!strncmp(lgopts[option_index].name,
                    CMD_LINE_OPT_CONFIG,
                    sizeof(CMD_LINE_OPT_CONFIG))) {

                ret = parse_config(optarg);
                if (ret) {
                    printf("%s\n", str5);
                    print_usage(prgname);
                    return -1;
                }
            }

            if (!strncmp(lgopts[option_index].name,
                    CMD_LINE_OPT_ETH_DEST,
                    sizeof(CMD_LINE_OPT_ETH_DEST))) {
                    parse_eth_dest(optarg);
            }

            if (!strncmp(lgopts[option_index].name,
                    CMD_LINE_OPT_NO_NUMA,
                    sizeof(CMD_LINE_OPT_NO_NUMA))) {
                printf("%s\n", str6);
                numa_on = 0;
            }

            if (!strncmp(lgopts[option_index].name,
                CMD_LINE_OPT_IPV6,
                sizeof(CMD_LINE_OPT_IPV6))) {
                printf("%sn", str7);
                ipv6 = 1;
            }

            if (!strncmp(lgopts[option_index].name,
                    CMD_LINE_OPT_ENABLE_JUMBO,
                    sizeof(CMD_LINE_OPT_ENABLE_JUMBO))) {
                struct option lenopts = {
                    "max-pkt-len", required_argument, 0, 0
                };

                printf("%s\n", str8);
                port_conf.rxmode.jumbo_frame = 1;

                /*
                 * if no max-pkt-len set, use the default
                 * value ETHER_MAX_LEN.
                 */
                if (0 == getopt_long(argc, argvopt, "",
                        &lenopts, &option_index)) {
                    ret = parse_max_pkt_len(optarg);
                    if ((ret < 64) ||
                        (ret > MAX_JUMBO_PKT_LEN)) {
                        printf("%s\n", str9);
                        print_usage(prgname);
                        return -1;
                    }
                    port_conf.rxmode.max_rx_pkt_len = ret;
                }
                printf("%s %u\n", str10,
                (unsigned int)port_conf.rxmode.max_rx_pkt_len);
            }

            if (!strncmp(lgopts[option_index].name,
                CMD_LINE_OPT_HASH_ENTRY_NUM,
                sizeof(CMD_LINE_OPT_HASH_ENTRY_NUM))) {

                ret = parse_hash_entry_number(optarg);
                if ((ret > 0) && (ret <= L3FWD_HASH_ENTRIES)) {
                    hash_entry_number = ret;
                } else {
                    printf("%s\n", str11);
                    print_usage(prgname);
                    return -1;
                }
            }

            if (!strncmp(lgopts[option_index].name,
                     CMD_LINE_OPT_PARSE_PTYPE,
                     sizeof(CMD_LINE_OPT_PARSE_PTYPE))) {
                printf("soft parse-ptype is enabled\n");
                parse_ptype = 1;
            }

            break;

        default:
            print_usage(prgname);
            return -1;
        }
    }

    /* If both LPM and EM are selected, return error. */
    if (l3fwd_lpm_on && l3fwd_em_on) {
        printf("%s\n", str12);
        return -1;
    }

    /*
     * Nothing is selected, pick longest-prefix match
     * as default match.
     */
    if (!l3fwd_lpm_on && !l3fwd_em_on) {
        l3fwd_lpm_on = 1;
        printf("%s\n", str13);
    }

    /*
     * ipv6 and hash flags are valid only for
     * exact macth, reset them to default for
     * longest-prefix match.
     */
    if (l3fwd_lpm_on) {
        ipv6 = 0;
        hash_entry_number = HASH_ENTRY_NUMBER_DEFAULT;
    }

    if (optind >= 0)
        argv[optind-1] = prgname;

    ret = optind-1;
    optind = 0; /* reset getopt lib */
    return ret;
}

static void
print_ethaddr(const char *name, const struct ether_addr *eth_addr)
{
    char buf[ETHER_ADDR_FMT_SIZE];
    ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
    printf("%s%s", name, buf);
}

static int
init_mem(unsigned nb_mbuf)
{
    struct lcore_conf *qconf;
    int socketid;
    unsigned lcore_id;
    char s[64];

    for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
        if (rte_lcore_is_enabled(lcore_id) == 0)
            continue;

        if (numa_on)
            socketid = rte_lcore_to_socket_id(lcore_id);
        else
            socketid = 0;

        if (socketid >= NB_SOCKETS) {
            rte_exit(EXIT_FAILURE,
                "Socket %d of lcore %u is out of range %d\n",
                socketid, lcore_id, NB_SOCKETS);
        }

        if (pktmbuf_pool[socketid] == NULL) {
            snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
            pktmbuf_pool[socketid] =
                rte_pktmbuf_pool_create(s, nb_mbuf,
                    MEMPOOL_CACHE_SIZE, 0,
                    RTE_MBUF_DEFAULT_BUF_SIZE, socketid);
            if (pktmbuf_pool[socketid] == NULL)
                rte_exit(EXIT_FAILURE,
                    "Cannot init mbuf pool on socket %d\n",
                    socketid);
            else
                printf("Allocated mbuf pool on socket %d\n",
                    socketid);

            /* Setup either LPM or EM(f.e Hash).  */
            l3fwd_lkp.setup(socketid);
        }
        qconf = &lcore_conf[lcore_id];
        qconf->ipv4_lookup_struct =
            l3fwd_lkp.get_ipv4_lookup_struct(socketid);
        qconf->ipv6_lookup_struct =
            l3fwd_lkp.get_ipv6_lookup_struct(socketid);
    }
    return 0;
}

/* Check the link status of all ports in up to 9s, and print them finally */
static void
check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
{
#define CHECK_INTERVAL 100 /* 100ms */
#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
    uint8_t portid, count, all_ports_up, print_flag = 0;
    struct rte_eth_link link;

    printf("\nChecking link status");
    fflush(stdout);
    for (count = 0; count <= MAX_CHECK_TIME; count++) {
        if (force_quit)
            return;
        all_ports_up = 1;
        for (portid = 0; portid < port_num; portid++) {
            if (force_quit)
                return;
            if ((port_mask & (1 << portid)) == 0)
                continue;
            memset(&link, 0, sizeof(link));
            rte_eth_link_get_nowait(portid, &link);
            /* print link status if flag set */
            if (print_flag == 1) {
                if (link.link_status)
                    printf("Port %d Link Up - speed %u "
                        "Mbps - %s\n", (uint8_t)portid,
                        (unsigned)link.link_speed,
                (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
                    ("full-duplex") : ("half-duplex\n"));
                else
                    printf("Port %d Link Down\n",
                        (uint8_t)portid);
                continue;
            }
            /* clear all_ports_up flag if any link down */
            if (link.link_status == ETH_LINK_DOWN) {
                all_ports_up = 0;
                break;
            }
        }
        /* after finally printing all link status, get out */
        if (print_flag == 1)
            break;

        if (all_ports_up == 0) {
            printf(".");
            fflush(stdout);
            rte_delay_ms(CHECK_INTERVAL);
        }

        /* set the print_flag if all ports up or timeout */
        if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
            print_flag = 1;
            printf("done\n");
        }
    }
}

static void
signal_handler(int signum)
{
    if (signum == SIGINT || signum == SIGTERM) {
        printf("\n\nSignal %d received, preparing to exit...\n",
                signum);
        force_quit = true;
    }
}

static int
prepare_ptype_parser(uint8_t portid, uint16_t queueid)
{
    if (parse_ptype) {
        printf("Port %d: softly parse packet type info\n", portid);
        if (rte_eth_add_rx_callback(portid, queueid,
                        l3fwd_lkp.cb_parse_ptype,
                        NULL))
            return 1;

        printf("Failed to add rx callback: port=%d\n", portid);
        return 0;
    }

    if (l3fwd_lkp.check_ptype(portid))
        return 1;

    printf("port %d cannot parse packet type, please add --%s\n",
           portid, CMD_LINE_OPT_PARSE_PTYPE);
    return 0;
}

int
main(int argc, char **argv)
{
    char* name = "/sys/fs/cgroup/cpuset/tasks";
    FILE* globaltask = NULL;
    globaltask = fopen(name,"a");
    int pid = getpid();
    if(globaltask) {
        fprintf(globaltask,"%d\n",pid);
        fclose(globaltask);
    }

    struct lcore_conf *qconf;
    struct rte_eth_dev_info dev_info;
    struct rte_eth_txconf *txconf;
    int ret;
    unsigned nb_ports;
    uint16_t queueid;
    unsigned lcore_id;
    uint32_t n_tx_queue, nb_lcores;
    uint8_t portid, nb_rx_queue, queue, socketid;

    /* init EAL */
    ret = rte_eal_init(argc, argv);
    if (ret < 0)
        rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
    argc -= ret;
    argv += ret;

    force_quit = false;
    signal(SIGINT, signal_handler);
    signal(SIGTERM, signal_handler);

    /* pre-init dst MACs for all ports to 02:00:00:00:00:xx */
    for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
        dest_eth_addr[portid] =
            ETHER_LOCAL_ADMIN_ADDR + ((uint64_t)portid << 40);
        *(uint64_t *)(val_eth + portid) = dest_eth_addr[portid];
    }

    // 单独指定目的接口的mac
    // 0:00:e0:4c:0b:01:f4
    // 1:00:e0:4c:0a:fe:70
    dest_eth_addr[0] = (0xf40000000000 /*<< 40*/) + (0x0100000000/* << 32*/) + (0x0b << 24) + (0x4c << 16) + (0xe0 << 8) + (0x00 << 0);
    dest_eth_addr[1] = (0x700000000000/* << 40*/) + (0xfe00000000/* << 32*/) + (0x0a << 24) + (0x4c << 16) + (0xe0 << 8) + (0x00 << 0);
    *(uint64_t *)(val_eth + 0) = dest_eth_addr[0];
    *(uint64_t *)(val_eth + 1) = dest_eth_addr[1];

    /* parse application arguments (after the EAL ones) */
    ret = parse_args(argc, argv);
    if (ret < 0)
        rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n");

    if (check_lcore_params() < 0)
        rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");

    ret = init_lcore_rx_queues();
    if (ret < 0)
        rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n");

    nb_ports = rte_eth_dev_count();
    if (nb_ports > RTE_MAX_ETHPORTS)
        nb_ports = RTE_MAX_ETHPORTS;

    if (check_port_config(nb_ports) < 0)
        rte_exit(EXIT_FAILURE, "check_port_config failed\n");

    nb_lcores = rte_lcore_count();

    /* Setup function pointers for lookup method. */
    setup_l3fwd_lookup_tables();

    /* initialize all ports */
    for (portid = 0; portid < nb_ports; portid++) {
        /* skip ports that are not enabled */
        if ((enabled_port_mask & (1 << portid)) == 0) {
            printf("\nSkipping disabled port %d\n", portid);
            continue;
        }

        /* init port */
        printf("Initializing port %d ... ", portid );
        fflush(stdout);

        nb_rx_queue = get_port_n_rx_queues(portid);
        n_tx_queue = nb_lcores;
        if (n_tx_queue > MAX_TX_QUEUE_PER_PORT)
            n_tx_queue = MAX_TX_QUEUE_PER_PORT;
        printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
            nb_rx_queue, (unsigned)n_tx_queue );
        ret = rte_eth_dev_configure(portid, nb_rx_queue,
                    (uint16_t)n_tx_queue, &port_conf);
        if (ret < 0)
            rte_exit(EXIT_FAILURE,
                "Cannot configure device: err=%d, port=%d\n",
                ret, portid);

        rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
        print_ethaddr(" Address:", &ports_eth_addr[portid]);
        printf(", ");
        print_ethaddr("Destination:",
            (const struct ether_addr *)&dest_eth_addr[portid]);
        printf(", ");

        /*
         * prepare src MACs for each port.
         */
        ether_addr_copy(&ports_eth_addr[portid],
            (struct ether_addr *)(val_eth + portid) + 1);

        /* init memory */
        ret = init_mem(NB_MBUF);
        if (ret < 0)
            rte_exit(EXIT_FAILURE, "init_mem failed\n");

        /* init one TX queue per couple (lcore,port) */
        /* 每个接口都会在每个逻辑核上有个发送队列
            比如:(逻辑核,port_id,queue_id):1个接口两个逻辑核时,(0,0,0),(1,0,1)
        */
        queueid = 0;
        for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
            if (rte_lcore_is_enabled(lcore_id) == 0)
                continue;

            if (numa_on)
                socketid =
                (uint8_t)rte_lcore_to_socket_id(lcore_id);
            else
                socketid = 0;

            printf("txq=%u,%d,%d ", lcore_id, queueid, socketid);
            fflush(stdout);

            rte_eth_dev_info_get(portid, &dev_info);
            txconf = &dev_info.default_txconf;
            if (port_conf.rxmode.jumbo_frame)
                txconf->txq_flags = 0;
            ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
                             socketid, txconf);
            if (ret < 0)
                rte_exit(EXIT_FAILURE,
                    "rte_eth_tx_queue_setup: err=%d, "
                    "port=%d\n", ret, portid);

            qconf = &lcore_conf[lcore_id];
            qconf->tx_queue_id[portid] = queueid;
            queueid++;

            qconf->tx_port_id[qconf->n_tx_port] = portid;
            qconf->n_tx_port++;
        }
        printf("\n");
    }

    for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
        if (rte_lcore_is_enabled(lcore_id) == 0)
            continue;
        qconf = &lcore_conf[lcore_id];
        printf("\nInitializing rx queues on lcore %u ... ", lcore_id );
        fflush(stdout);
        /* init RX queues */
        for(queue = 0; queue < qconf->n_rx_queue; ++queue) {
            portid = qconf->rx_queue_list[queue].port_id;
            queueid = qconf->rx_queue_list[queue].queue_id;

            if (numa_on)
                socketid =
                (uint8_t)rte_lcore_to_socket_id(lcore_id);
            else
                socketid = 0;

            printf("rxq=%d,%d,%d ", portid, queueid, socketid);
            fflush(stdout);

            ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd,
                    socketid,
                    NULL,
                    pktmbuf_pool[socketid]);
            if (ret < 0)
                rte_exit(EXIT_FAILURE,
                "rte_eth_rx_queue_setup: err=%d, port=%d\n",
                ret, portid);
        }
    }

    printf("\n");

    /* start ports */
    for (portid = 0; portid < nb_ports; portid++) {
        if ((enabled_port_mask & (1 << portid)) == 0) {
            continue;
        }
        /* Start device */
        ret = rte_eth_dev_start(portid);
        if (ret < 0)
            rte_exit(EXIT_FAILURE,
                "rte_eth_dev_start: err=%d, port=%d\n",
                ret, portid);

        /*
         * If enabled, put device in promiscuous mode.
         * This allows IO forwarding mode to forward packets
         * to itself through 2 cross-connected  ports of the
         * target machine.
         */
        if (promiscuous_on)
            rte_eth_promiscuous_enable(portid);
    }

    printf("\n");

    for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
        if (rte_lcore_is_enabled(lcore_id) == 0)
            continue;
        qconf = &lcore_conf[lcore_id];
        for (queue = 0; queue < qconf->n_rx_queue; ++queue) {
            portid = qconf->rx_queue_list[queue].port_id;
            queueid = qconf->rx_queue_list[queue].queue_id;
            if (prepare_ptype_parser(portid, queueid) == 0)
                rte_exit(EXIT_FAILURE, "ptype check fails\n");
        }
    }


    check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask);

    ret = 0;
    /* launch per-lcore init on every lcore */
    rte_eal_mp_remote_launch(l3fwd_lkp.main_loop, NULL, CALL_MASTER);
    RTE_LCORE_FOREACH_SLAVE(lcore_id) {
        if (rte_eal_wait_lcore(lcore_id) < 0) {
            ret = -1;
            break;
        }
    }

    /* stop ports */
    for (portid = 0; portid < nb_ports; portid++) {
        if ((enabled_port_mask & (1 << portid)) == 0)
            continue;
        printf("Closing port %d...", portid);
        rte_eth_dev_stop(portid);
        rte_eth_dev_close(portid);
        printf(" Done\n");
    }
    printf("Bye...\n");

    return ret;
}





l3fwd.h
/*-
 * BSD LICENSE
 *
 * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * * Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
 * * Redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in
 * the documentation and/or other materials provided with the
 * distribution.
 * * Neither the name of Intel Corporation nor the names of its
 * contributors may be used to endorse or promote products derived
 * from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef __L3_FWD_H__
#define __L3_FWD_H__

#include <rte_vect.h>

#define DO_RFC_1812_CHECKS

#define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1

#if !defined(NO_HASH_MULTI_LOOKUP) && defined(RTE_MACHINE_CPUFLAG_NEON)
#define NO_HASH_MULTI_LOOKUP 1
#endif

#define MAX_PKT_BURST 32
#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */

#define MAX_RX_QUEUE_PER_LCORE 16

/*
 * Try to avoid TX buffering if we have at least MAX_TX_BURST packets to send.
 */
#define	MAX_TX_BURST	(MAX_PKT_BURST / 2)

#define NB_SOCKETS 8

/* Configure how many packets ahead to prefetch, when reading packets */
#define PREFETCH_OFFSET	3

/* Used to mark destination port as 'invalid'. */
#define	BAD_PORT ((uint16_t)-1)

#define FWDSTEP	4

/* replace first 12B of the ethernet header. */
#define	MASK_ETH 0x3f

/* Hash parameters. */
#ifdef RTE_ARCH_X86_64
/* default to 4 million hash entries (approx) */
#define L3FWD_HASH_ENTRIES	(1024*1024*4)
#else
/* 32-bit has less address-space for hugepage memory, limit to 1M entries */
#define L3FWD_HASH_ENTRIES	(1024*1024*1)
#endif
#define HASH_ENTRY_NUMBER_DEFAULT	4

struct mbuf_table {
 uint16_t len;
 struct rte_mbuf *m_table[MAX_PKT_BURST];
};

struct lcore_rx_queue {
 uint8_t port_id;
 uint8_t queue_id;
} __rte_cache_aligned;

struct lcore_conf {
 uint16_t n_rx_queue;
 struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
 uint16_t n_tx_port;
 uint16_t tx_port_id[RTE_MAX_ETHPORTS]; // 发送接口列表
 uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; // 每个逻辑核为存储了每个发送接口的队列id
 struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS]; // 每个接口的发送buffer
 void *ipv4_lookup_struct;
 void *ipv6_lookup_struct;
} __rte_cache_aligned;

extern volatile bool force_quit;

/* ethernet addresses of ports */
extern uint64_t dest_eth_addr[RTE_MAX_ETHPORTS];
extern struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];

/* mask of enabled ports */
extern uint32_t enabled_port_mask;

/* Used only in exact match mode. */
extern int ipv6; /**< ipv6 is false by default. */
extern uint32_t hash_entry_number;

extern xmm_t val_eth[RTE_MAX_ETHPORTS];

extern struct lcore_conf lcore_conf[RTE_MAX_LCORE];

/* Send burst of packets on an output interface */
static inline int
send_burst(struct lcore_conf *qconf, uint16_t n, uint8_t port)
{
 struct rte_mbuf **m_table;
 int ret;
 uint16_t queueid;

 queueid = qconf->tx_queue_id[port];
 m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;

 ret = rte_eth_tx_burst(port, queueid, m_table, n);
 if (unlikely(ret < n)) {
  do {
   rte_pktmbuf_free(m_table[ret]);
  } while (++ret < n);
 }

 return 0;
}

/* Enqueue a single packet, and send burst if queue is filled */
static inline int
send_single_packet(struct lcore_conf *qconf,
  struct rte_mbuf *m, uint8_t port)
{
 uint16_t len;

 len = qconf->tx_mbufs[port].len;
 qconf->tx_mbufs[port].m_table[len] = m;
 len++;

 /* enough pkts to be sent */
 if (unlikely(len == MAX_PKT_BURST)) {
  send_burst(qconf, MAX_PKT_BURST, port);
  len = 0;
 }

 qconf->tx_mbufs[port].len = len;
 return 0;
}

#ifdef DO_RFC_1812_CHECKS
static inline int
is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len)
{
 /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */
 /*
  * 1. The packet length reported by the Link Layer must be large
  * enough to hold the minimum length legal IP datagram (20 bytes).
  */
 if (link_len < sizeof(struct ipv4_hdr))
  return -1;

 /* 2. The IP checksum must be correct. */
 /* this is checked in H/W */

 /*
  * 3. The IP version number must be 4. If the version number is not 4
  * then the packet may be another version of IP, such as IPng or
  * ST-II.
  */
 if (((pkt->version_ihl) >> 4) != 4)
  return -3;
 /*
  * 4. The IP header length field must be large enough to hold the
  * minimum length legal IP datagram (20 bytes = 5 words).
  */
 if ((pkt->version_ihl & 0xf) < 5)
  return -4;

 /*
  * 5. The IP total length field must be large enough to hold the IP
  * datagram header, whose length is specified in the IP header length
  * field.
  */
 if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr))
  return -5;

 return 0;
}
#endif /* DO_RFC_1812_CHECKS */

/* Function pointers for LPM or EM functionality. */
void
setup_lpm(const int socketid);

void
setup_hash(const int socketid);

int
em_check_ptype(int portid);

int
lpm_check_ptype(int portid);

uint16_t
em_cb_parse_ptype(uint8_t port, uint16_t queue, struct rte_mbuf *pkts[],
    uint16_t nb_pkts, uint16_t max_pkts, void *user_param);

uint16_t
lpm_cb_parse_ptype(uint8_t port, uint16_t queue, struct rte_mbuf *pkts[],
     uint16_t nb_pkts, uint16_t max_pkts, void *user_param);

int
em_main_loop(__attribute__((unused)) void *dummy);

int
lpm_main_loop(__attribute__((unused)) void *dummy);

/* Return ipv4/ipv6 fwd lookup struct for LPM or EM. */
void *
em_get_ipv4_l3fwd_lookup_struct(const int socketid);

void *
em_get_ipv6_l3fwd_lookup_struct(const int socketid);

void *
lpm_get_ipv4_l3fwd_lookup_struct(const int socketid);

void *
lpm_get_ipv6_l3fwd_lookup_struct(const int socketid);

#endif /* __L3_FWD_H__ */


l3fwd_lpm.c
/*-
 * BSD LICENSE
 *
 * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * * Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
 * * Redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in
 * the documentation and/or other materials provided with the
 * distribution.
 * * Neither the name of Intel Corporation nor the names of its
 * contributors may be used to endorse or promote products derived
 * from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include <sys/types.h>
#include <string.h>
#include <sys/queue.h>
#include <stdarg.h>
#include <errno.h>
#include <getopt.h>
#include <stdbool.h>

#include <rte_debug.h>
#include <rte_ether.h>
#include <rte_ethdev.h>
#include <rte_ring.h>
#include <rte_mempool.h>
#include <rte_cycles.h>
#include <rte_mbuf.h>
#include <rte_ip.h>
#include <rte_tcp.h>
#include <rte_udp.h>
#include <rte_lpm.h>
#include <rte_lpm6.h>

#include "l3fwd.h"

struct ipv4_l3fwd_lpm_route {
 uint32_t ip;
 uint8_t depth;
 uint8_t if_out;
};

struct ipv6_l3fwd_lpm_route {
 uint8_t ip[16];
 uint8_t depth;
 uint8_t if_out;
};

static struct ipv4_l3fwd_lpm_route ipv4_l3fwd_lpm_route_array[] = {
 {IPv4(88, 88, 88, 0), 24, 0},
 {IPv4(99, 99, 99, 0), 24, 1},
 {IPv4(1, 1, 1, 0), 24, 0},
 {IPv4(2, 1, 1, 0), 24, 1},
 {IPv4(3, 1, 1, 0), 24, 2},
 {IPv4(4, 1, 1, 0), 24, 3},
 {IPv4(5, 1, 1, 0), 24, 4},
 {IPv4(6, 1, 1, 0), 24, 5},
 {IPv4(7, 1, 1, 0), 24, 6},
 {IPv4(8, 1, 1, 0), 24, 7},
};

static struct ipv6_l3fwd_lpm_route ipv6_l3fwd_lpm_route_array[] = {
 {{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 0},
 {{2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 1},
 {{3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 2},
 {{4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 3},
 {{5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 4},
 {{6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 5},
 {{7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 6},
 {{8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 7},
};

#define IPV4_L3FWD_LPM_NUM_ROUTES \
 (sizeof(ipv4_l3fwd_lpm_route_array) / sizeof(ipv4_l3fwd_lpm_route_array[0]))
#define IPV6_L3FWD_LPM_NUM_ROUTES \
 (sizeof(ipv6_l3fwd_lpm_route_array) / sizeof(ipv6_l3fwd_lpm_route_array[0]))

#define IPV4_L3FWD_LPM_MAX_RULES 1024
#define IPV4_L3FWD_LPM_NUMBER_TBL8S (1 << 8)
#define IPV6_L3FWD_LPM_MAX_RULES 1024
#define IPV6_L3FWD_LPM_NUMBER_TBL8S (1 << 16)

struct rte_lpm *ipv4_l3fwd_lpm_lookup_struct[NB_SOCKETS];
struct rte_lpm6 *ipv6_l3fwd_lpm_lookup_struct[NB_SOCKETS];

#if defined(__SSE4_1__)
#include "l3fwd_lpm_sse.h"
#else
#include "l3fwd_lpm.h"
#endif

/* main processing loop */
int
lpm_main_loop(__attribute__((unused)) void *dummy)
{
 struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
 unsigned lcore_id;
 uint64_t prev_tsc, diff_tsc, cur_tsc;
 int i, nb_rx;
 uint8_t portid, queueid;
 struct lcore_conf *qconf;
 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
  US_PER_S * BURST_TX_DRAIN_US;

 prev_tsc = 0;

 lcore_id = rte_lcore_id();
 qconf = &lcore_conf[lcore_id];

 if (qconf->n_rx_queue == 0) {
  RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id);
  return 0;
 }

 RTE_LOG(INFO, L3FWD, "entering main loop on lcore %u\n", lcore_id);

 for (i = 0; i < qconf->n_rx_queue; i++) {

  portid = qconf->rx_queue_list[i].port_id;
  queueid = qconf->rx_queue_list[i].queue_id;
  RTE_LOG(INFO, L3FWD,
   " -- lcoreid=%u portid=%hhu rxqueueid=%hhu\n",
   lcore_id, portid, queueid);
 }

 while (!force_quit) {


  cur_tsc = rte_rdtsc();

  /*
   * TX burst queue drain
   */
  diff_tsc = cur_tsc - prev_tsc;
  if (unlikely(diff_tsc > drain_tsc)) {
   // 每个逻辑核为每个接口初始化一个发送队列
   for (i = 0; i < qconf->n_tx_port; ++i) {
    portid = qconf->tx_port_id[i];
    if (qconf->tx_mbufs[portid].len == 0)
     continue;
    send_burst(qconf,
     qconf->tx_mbufs[portid].len,
     portid);
    qconf->tx_mbufs[portid].len = 0;
   }

   prev_tsc = cur_tsc;
  }

  /*
   * Read packet from RX queues
   */
  for (i = 0; i < qconf->n_rx_queue; ++i) {
   portid = qconf->rx_queue_list[i].port_id;
   queueid = qconf->rx_queue_list[i].queue_id;
   nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
    MAX_PKT_BURST);
   if (nb_rx == 0)
    continue;

#if defined(__SSE4_1__)
   l3fwd_lpm_send_packets(nb_rx, pkts_burst,
      portid, qconf);
#else
   l3fwd_lpm_no_opt_send_packets(nb_rx, pkts_burst,
       portid, qconf);
#endif /* __SSE_4_1__ */
  }
 }

 return 0;
}

void
setup_lpm(const int socketid)
{
 struct rte_lpm6_config config;
 struct rte_lpm_config config_ipv4;
 unsigned i;
 int ret;
 char s[64];

 /* create the LPM table */
 config_ipv4.max_rules = IPV4_L3FWD_LPM_MAX_RULES;
 config_ipv4.number_tbl8s = IPV4_L3FWD_LPM_NUMBER_TBL8S;
 config_ipv4.flags = 0;
 snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid);
 ipv4_l3fwd_lpm_lookup_struct[socketid] =
   rte_lpm_create(s, socketid, &config_ipv4);
 if (ipv4_l3fwd_lpm_lookup_struct[socketid] == NULL)
  rte_exit(EXIT_FAILURE,
   "Unable to create the l3fwd LPM table on socket %d\n",
   socketid);

 /* populate the LPM table */
 for (i = 0; i < IPV4_L3FWD_LPM_NUM_ROUTES; i++) {

  /* skip unused ports */
  if ((1 << ipv4_l3fwd_lpm_route_array[i].if_out &
    enabled_port_mask) == 0)
   continue;

  ret = rte_lpm_add(ipv4_l3fwd_lpm_lookup_struct[socketid],
   ipv4_l3fwd_lpm_route_array[i].ip,
   ipv4_l3fwd_lpm_route_array[i].depth,
   ipv4_l3fwd_lpm_route_array[i].if_out);

  if (ret < 0) {
   rte_exit(EXIT_FAILURE,
    "Unable to add entry %u to the l3fwd LPM table on socket %d\n",
    i, socketid);
  }

  printf("LPM: Adding route 0x%08x / %d (%d)\n",
   (unsigned)ipv4_l3fwd_lpm_route_array[i].ip,
   ipv4_l3fwd_lpm_route_array[i].depth,
   ipv4_l3fwd_lpm_route_array[i].if_out);
 }

 /* create the LPM6 table */
 snprintf(s, sizeof(s), "IPV6_L3FWD_LPM_%d", socketid);

 config.max_rules = IPV6_L3FWD_LPM_MAX_RULES;
 config.number_tbl8s = IPV6_L3FWD_LPM_NUMBER_TBL8S;
 config.flags = 0;
 ipv6_l3fwd_lpm_lookup_struct[socketid] = rte_lpm6_create(s, socketid,
    &config);
 if (ipv6_l3fwd_lpm_lookup_struct[socketid] == NULL)
  rte_exit(EXIT_FAILURE,
   "Unable to create the l3fwd LPM table on socket %d\n",
   socketid);

 /* populate the LPM table */
 for (i = 0; i < IPV6_L3FWD_LPM_NUM_ROUTES; i++) {

  /* skip unused ports */
  if ((1 << ipv6_l3fwd_lpm_route_array[i].if_out &
    enabled_port_mask) == 0)
   continue;

  ret = rte_lpm6_add(ipv6_l3fwd_lpm_lookup_struct[socketid],
   ipv6_l3fwd_lpm_route_array[i].ip,
   ipv6_l3fwd_lpm_route_array[i].depth,
   ipv6_l3fwd_lpm_route_array[i].if_out);

  if (ret < 0) {
   rte_exit(EXIT_FAILURE,
    "Unable to add entry %u to the l3fwd LPM table on socket %d\n",
    i, socketid);
  }

  printf("LPM: Adding route %s / %d (%d)\n",
   "IPV6",
   ipv6_l3fwd_lpm_route_array[i].depth,
   ipv6_l3fwd_lpm_route_array[i].if_out);
 }
}

int
lpm_check_ptype(int portid)
{
 int i, ret;
 int ptype_l3_ipv4 = 0, ptype_l3_ipv6 = 0;
 uint32_t ptype_mask = RTE_PTYPE_L3_MASK;

 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0);
 if (ret <= 0)
  return 0;

 uint32_t ptypes[ret];

 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret);
 for (i = 0; i < ret; ++i) {
  if (ptypes[i] & RTE_PTYPE_L3_IPV4)
   ptype_l3_ipv4 = 1;
  if (ptypes[i] & RTE_PTYPE_L3_IPV6)
   ptype_l3_ipv6 = 1;
 }

 if (ptype_l3_ipv4 == 0)
  printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid);

 if (ptype_l3_ipv6 == 0)
  printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid);

 if (ptype_l3_ipv4 && ptype_l3_ipv6)
  return 1;

 return 0;

}

static inline void
lpm_parse_ptype(struct rte_mbuf *m)
{
 struct ether_hdr *eth_hdr;
 uint32_t packet_type = RTE_PTYPE_UNKNOWN;
 uint16_t ether_type;

 eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
 ether_type = eth_hdr->ether_type;
 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
  packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv6))
  packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;

 m->packet_type = packet_type;
}

uint16_t
lpm_cb_parse_ptype(uint8_t port __rte_unused, uint16_t queue __rte_unused,
     struct rte_mbuf *pkts[], uint16_t nb_pkts,
     uint16_t max_pkts __rte_unused,
     void *user_param __rte_unused)
{
 unsigned i;

 for (i = 0; i < nb_pkts; ++i)
  lpm_parse_ptype(pkts[i]);

 return nb_pkts;
}

/* Return ipv4/ipv6 lpm fwd lookup struct. */
void *
lpm_get_ipv4_l3fwd_lookup_struct(const int socketid)
{
 return ipv4_l3fwd_lpm_lookup_struct[socketid];
}

void *
lpm_get_ipv6_l3fwd_lookup_struct(const int socketid)
{
 return ipv6_l3fwd_lpm_lookup_struct[socketid];
}

运行

./l3fwd -c1 -n4 -- -pf0 -L --config="(4,0,0),(5,0,0)" --parse-ptype
PMD: eth_em_dev_init(): port_id 7 vendorID=0x8086 deviceID=0x150c
L3FWD: Longest-prefix match selected
soft parse-ptype is enabled
RTE_MAX_LCORE = 128
lcore_conf[0]: rx_q_idx = 0, queue_id = 0, rx_port_id=4
lcore_conf[0]: rx_q_idx = 1, queue_id = 0, rx_port_id=5

Skipping disabled port 0

Skipping disabled port 1

Skipping disabled port 2

Skipping disabled port 3
Initializing port 4 ... Creating queues: nb_rxq=1 nb_txq=1... Address:00:E0:4C:0B:95:DE, Destination:02:00:00:00:00:04, Allocated mbuf pool on socket 0
LPM: Adding route 0x05010100 / 24 (4)
LPM: Adding route 0x06010100 / 24 (5)
LPM: Adding route 0x07010100 / 24 (6)
LPM: Adding route 0x08010100 / 24 (7)
LPM: Adding route IPV6 / 48 (4)
LPM: Adding route IPV6 / 48 (5)
LPM: Adding route IPV6 / 48 (6)
LPM: Adding route IPV6 / 48 (7)
txq=0,0,0 PMD: eth_em_tx_queue_setup(): sw_ring=0x7fffbab24300 hw_ring=0x7fffbab26400 dma_addr=0x13f526400

Initializing port 5 ... Creating queues: nb_rxq=1 nb_txq=1... Address:00:E0:4C:0B:95:DF, Destination:02:00:00:00:00:05, txq=0,0,0 PMD: eth_em_tx_queue_setup(): sw_ring=0x7fffbab120c0 hw_ring=0x7fffbab141c0 dma_addr=0x13f5141c0

Initializing port 6 ... Creating queues: nb_rxq=0 nb_txq=1... Address:00:E0:4C:0B:95:E0, Destination:02:00:00:00:00:06, txq=0,0,0 PMD: eth_em_tx_queue_setup(): sw_ring=0x7fffbaafff00 hw_ring=0x7fffbab02000 dma_addr=0x13f502000

Initializing port 7 ... Creating queues: nb_rxq=0 nb_txq=1... Address:00:E0:4C:0B:95:E1, Destination:02:00:00:00:00:07, txq=0,0,0 PMD: eth_em_tx_queue_setup(): sw_ring=0x7fffbaaedd40 hw_ring=0x7fffbaaefe40 dma_addr=0x13f4efe40


Initializing rx queues on lcore 0 ... rxq=4,0,0 PMD: eth_em_rx_queue_setup(): sw_ring=0x7fffbaadd800 hw_ring=0x7fffbaaddd00 dma_addr=0x13f4ddd00
rxq=5,0,0 PMD: eth_em_rx_queue_setup(): sw_ring=0x7fffbaacd2c0 hw_ring=0x7fffbaacd7c0 dma_addr=0x13f4cd7c0

PMD: eth_em_start(): <<
PMD: eth_em_start(): <<
PMD: eth_em_start(): <<
PMD: eth_em_start(): <<

Port 4: softly parse packet type info
Port 5: softly parse packet type info

Checking link status..........................................................................................done
Port 4 Link Up - speed 1000 Mbps - full-duplex
Port 5 Link Up - speed 1000 Mbps - full-duplex
Port 6 Link Down
Port 7 Link Down
L3FWD: entering main loop on lcore 0
L3FWD: -- lcoreid=0 portid=4 rxqueueid=0
L3FWD: -- lcoreid=0 portid=5 rxqueueid=0
RTE_MAX_ETHPORTS = 32, lcore_id:0, n_tx_port = 4

转发分析

  • l3fwd可以进行多核转发,每个逻辑核可以在不同的接口的不同队列进行收包,每个逻辑核也会在每个接口的某个队列进行发包
  • l3fwd需要修改来fwd_lpm.c中的ipv4_l3fwd_lpm_route_array来设置路由表
static struct ipv4_l3fwd_lpm_route ipv4_l3fwd_lpm_route_array[] = {
 {IPv4(88, 88, 88, 0), 24, 0},	// 此处为我加入的路由信息
 {IPv4(99, 99, 99, 0), 24, 1},	// 此处为我加入的路由信息
 {IPv4(1, 1, 1, 0), 24, 0},
 {IPv4(2, 1, 1, 0), 24, 1},
 {IPv4(3, 1, 1, 0), 24, 2},
 {IPv4(4, 1, 1, 0), 24, 3},
 {IPv4(5, 1, 1, 0), 24, 4},
 {IPv4(6, 1, 1, 0), 24, 5},
 {IPv4(7, 1, 1, 0), 24, 6},
 {IPv4(8, 1, 1, 0), 24, 7},
};
  • l3fwd需要设置下一条网关的mac地址
 // 单独指定目的接口的mac
 // 0:00:e0:4c:0b:01:f4
 // 1:00:e0:4c:0a:fe:70
    dest_eth_addr[0] = (0xf40000000000 /*<< 40*/) + (0x0100000000/* << 32*/) + (0x0b << 24) + (0x4c << 16) + (0xe0 << 8) + (0x00 << 0);
    dest_eth_addr[1] = (0x700000000000/* << 40*/) + (0xfe00000000/* << 32*/) + (0x0a << 24) + (0x4c << 16) + (0xe0 << 8) + (0x00 << 0);
    *(uint64_t *)(val_eth + 0) = dest_eth_addr[0];
    *(uint64_t *)(val_eth + 1) = dest_eth_addr[1];

路由测试

  • 拓扑:pc(88.88.88.88)---------l3fwd---------linux(99.99.99.99)
./l3fwd -c3 -n4 -- -p3 -L --config="(0,0,0),(1,0,1)"

4
5

  • 3
    点赞
  • 0
    评论
  • 8
    收藏
  • 一键三连
    一键三连
  • 扫一扫,分享海报

相关推荐
©️2020 CSDN 皮肤主题: 技术黑板 设计师:CSDN官方博客 返回首页
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、C币套餐、付费专栏及课程。

余额充值