DPDK基础
basicfwd
运行命令
./basicfwd -c 1 -n 4
源码分析
#include <stdint.h>
#include <inttypes.h>
#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_cycles.h>
#include <rte_lcore.h>
#include <rte_mbuf.h>
#define RX_RING_SIZE 1024
#define TX_RING_SIZE 1024
#define NUM_MBUFS 8191
#define MBUF_CACHE_SIZE 250
#define BURST_SIZE 32
// 网卡默认配置
static const struct rte_eth_conf port_conf_default = {
.rxmode = {
// 最大收包长度.
.max_rx_pkt_len = RTE_ETHER_MAX_LEN,
},
};
// 网卡初始化.
static inline int
port_init(uint16_t port, struct rte_mempool *mbuf_pool)
{
struct rte_eth_conf port_conf = port_conf_default; //网卡配置.
const uint16_t rx_rings = 1, tx_rings = 1; // 收发队列数量.
uint16_t nb_rxd = RX_RING_SIZE; // 收包队列大小.
uint16_t nb_txd = TX_RING_SIZE; // 发包队列大小.
int retval; // 返回值(临时变量).
uint16_t q; // 队列号(临时变量).
struct rte_eth_dev_info dev_info; // 以太网设备信息.
struct rte_eth_txconf txconf; // 发包队列配置?
// 判断网卡是否合法.
if (!rte_eth_dev_is_valid_port(port))
return -1;
// 查询网卡信息, 存储在dev_info中.
retval = rte_eth_dev_info_get(port, &dev_info);
if (retval != 0)
{
printf("Error during getting device (port %u) info: %s\n",
port, strerror(-retval));
return retval;
}
// 判断网卡设备是否支持快速释放mbuf.
if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
port_conf.txmode.offloads |=
DEV_TX_OFFLOAD_MBUF_FAST_FREE;
// 配置网卡.
retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
if (retval != 0)
return retval;
// 判断接受发送队列数量, 若超出最大接受发送队列数量, 则调整, 即不得超过nb_rxd, nb_txd.
retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &nb_rxd, &nb_txd);
if (retval != 0)
return retval;
/* 配置网卡的接受队列
第一个参数为网卡端口号
第二个参数为队列号
第三个参数为队列大小
第四个参数为网卡的socket id
第五个参数为接收队列的配置数据, 缺省时使用默认配置
第六个参数为内存池 */
for (q = 0; q < rx_rings; q++)
{
retval = rte_eth_rx_queue_setup(port, q, nb_rxd,
rte_eth_dev_socket_id(port), NULL, mbuf_pool);
if (retval < 0)
return retval;
}
/* 配置网卡的发送队列
参数同上 */
txconf = dev_info.default_txconf;
txconf.offloads = port_conf.txmode.offloads;
for (q = 0; q < tx_rings; q++)
{
retval = rte_eth_tx_queue_setup(port, q, nb_txd,
rte_eth_dev_socket_id(port), &txconf);
if (retval < 0)
return retval;
}
// 启动该网卡
retval = rte_eth_dev_start(port);
if (retval < 0)
return retval;
// 输出网卡MAC地址信息
struct rte_ether_addr addr;
retval = rte_eth_macaddr_get(port, &addr);
if (retval != 0)
return retval;
printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8
" %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n",
port,
addr.addr_bytes[0], addr.addr_bytes[1],
addr.addr_bytes[2], addr.addr_bytes[3],
addr.addr_bytes[4], addr.addr_bytes[5]);
// 开启混杂模式, 指一台机器能够接收所有经过它的数据流, 而不论其目的地址是否是他.
retval = rte_eth_promiscuous_enable(port);
if (retval != 0)
return retval;
return 0;
}
/* 业务函数入口点
__attribute__((noreturn))标明函数无返回值 */
static __attribute__((noreturn)) void
lcore_main(void)
{
uint16_t port;
// 判断是否收发网卡都在同一个NUMA节点上, 只有在一个NUMA node上时线程轮询的效率最好.
RTE_ETH_FOREACH_DEV(port)
// 当网卡的socket id与当前线程socket id, 即不在一个numa节点上.
if (rte_eth_dev_socket_id(port) >= 0 &&
rte_eth_dev_socket_id(port) !=
(int)rte_socket_id())
printf("WARNING, port %u is on remote NUMA node to "
"polling thread.\n\tPerformance will "
"not be optimal.\n",
port);
printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n",
rte_lcore_id());
// 转发数据
for (;;)
{
RTE_ETH_FOREACH_DEV(port)
{
/*
接受数据并存放在bufs中.
第一个参数为接受网卡的ID
第二个参数为队列ID
第三个参数为存储接受到的报文数据的二维数组
第四个参数为存储数据最大个数
返回接受到的数据包数量 */
struct rte_mbuf *bufs[BURST_SIZE];
const uint16_t nb_rx = rte_eth_rx_burst(port, 0,
bufs, BURST_SIZE);
// 未接受到数据包则跳过, 去收下一个端口的包.
if (unlikely(nb_rx == 0))
continue;
/* 将接受到的数据包发送到port ^ 1即0 -> 1, 1 -> 0, 2 -> 3, 3 -> 2.
参数同上
返回值同上 */
const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
bufs, nb_rx);
// 释放接受到了却没发送的包即nb_rx-nb_tx.
if (unlikely(nb_tx < nb_rx))
{
uint16_t buf;
for (buf = nb_tx; buf < nb_rx; buf++)
rte_pktmbuf_free(bufs[buf]);
}
}
}
}
// 程序入口函数
int main(int argc, char *argv[])
{
struct rte_mempool *mbuf_pool; //使用内存池空间来容纳ring队列, 接收和发送数据包.
unsigned nb_ports; // 网卡数量
uint16_t portid; //网卡ID
// eal环境初始化成功, 前ret个参数为EAL参数, 之后为应用程序参数.
int ret = rte_eal_init(argc, argv);
if (ret < 0)
rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
argc -= ret;
argv += ret;
// 获取可用的网卡数并且判断是否为偶数
nb_ports = rte_eth_dev_count_avail();
if (nb_ports < 2 || (nb_ports & 1))
rte_exit(EXIT_FAILURE, "Error: number of ports must be even\n");
/* 创建内存池.
第一个参数
第二个参数是mbuf数量, mbuf数量为网卡数*预定义mbuf数量(最好为2^n-1).
第三个参数是每个核心的cache数量.
第四个参数是mbuf私有数据空间的大小.
第五个参数是mbuf数据报文的大小(建议使用默认值RTE_MBUF_DEFAULT_BUF_SIZE).
第六个参数是申请内存的socket(使用rte_socket_id()返回正在运行的lcore的socket id). */
mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS * nb_ports,
MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
if (mbuf_pool == NULL)
rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
// 使用宏函数RTE_ETH_FOREACH_DEV()初始化每个网卡.
RTE_ETH_FOREACH_DEV(portid)
if (port_init(portid, mbuf_pool) != 0)
rte_exit(EXIT_FAILURE, "Cannot init port %" PRIu16 "\n",
portid);
if (rte_lcore_count() > 1)
printf("\nWARNING: Too many lcores enabled. Only 1 used.\n");
// 在主线程中调用该函数.
lcore_main();
// 清理EAL环境中的资源.
rte_eal_cleanup();
return 0;
}
l2fwd
运行命令
./l2fwd -c 3 -n 4 -- -p 3 -q 1
源码分析
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2010-2016 Intel Corporation
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <inttypes.h>
#include <sys/types.h>
#include <sys/queue.h>
#include <netinet/in.h>
#include <setjmp.h>
#include <stdarg.h>
#include <ctype.h>
#include <errno.h>
#include <getopt.h>
#include <signal.h>
#include <stdbool.h>
#include <rte_common.h>
#include <rte_log.h>
#include <rte_malloc.h>
#include <rte_memory.h>
#include <rte_memcpy.h>
#include <rte_eal.h>
#include <rte_launch.h>
#include <rte_atomic.h>
#include <rte_cycles.h>
#include <rte_prefetch.h>
#include <rte_lcore.h>
#include <rte_per_lcore.h>
#include <rte_branch_prediction.h>
#include <rte_interrupts.h>
#include <rte_random.h>
#include <rte_debug.h>
#include <rte_ether.h>
#include <rte_ethdev.h>
#include <rte_mempool.h>
#include <rte_mbuf.h>
static volatile bool force_quit;
/* MAC updating enabled by default */
static int mac_updating = 1;
#define RTE_LOGTYPE_L2FWD RTE_LOGTYPE_USER1
#define MAX_PKT_BURST 32
#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
#define MEMPOOL_CACHE_SIZE 256
/*
* Configurable number of RX/TX ring descriptors
*/
#define RTE_TEST_RX_DESC_DEFAULT 1024
#define RTE_TEST_TX_DESC_DEFAULT 1024
static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
/* ethernet addresses of ports */
static struct rte_ether_addr l2fwd_ports_eth_addr[RTE_MAX_ETHPORTS];
/* mask of enabled ports */
static uint32_t l2fwd_enabled_port_mask = 0;
/* list of enabled ports */
static uint32_t l2fwd_dst_ports[RTE_MAX_ETHPORTS];
static unsigned int l2fwd_rx_queue_per_lcore = 1;
#define MAX_RX_QUEUE_PER_LCORE 16
#define MAX_TX_QUEUE_PER_PORT 16
struct lcore_queue_conf
{
unsigned n_rx_port;
unsigned rx_port_list[MAX_RX_QUEUE_PER_LCORE];
} __rte_cache_aligned;
struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE];
static struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
static struct rte_eth_conf port_conf = {
.rxmode = {
.split_hdr_size = 0,
},
.txmode = {
.mq_mode = ETH_MQ_TX_NONE,
},
};
struct rte_mempool *l2fwd_pktmbuf_pool = NULL;
/* Per-port statistics struct */
struct l2fwd_port_statistics
{
uint64_t tx;
uint64_t rx;
uint64_t dropped;
} __rte_cache_aligned;
struct l2fwd_port_statistics port_statistics[RTE_MAX_ETHPORTS];
#define MAX_TIMER_PERIOD 86400 /* 1 day max */
/* A tsc-based timer responsible for triggering statistics printout */
static uint64_t timer_period = 10; /* default period is 10 seconds */
/* Print out statistics on packets dropped */
static void
print_stats(void)
{
uint64_t total_packets_dropped, total_packets_tx, total_packets_rx;
unsigned portid;
total_packets_dropped = 0;
total_packets_tx = 0;
total_packets_rx = 0;
const char clr[] = {27, '[', '2', 'J', '\0'};
const char topLeft[] = {27, '[', '1', ';', '1', 'H', '\0'};
/* Clear screen and move to top left */
printf("%s%s", clr, topLeft);
printf("\nPort statistics ====================================");
for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++)
{
/* skip disabled ports */
if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
continue;
printf("\nStatistics for port %u ------------------------------"
"\nPackets sent: %24" PRIu64
"\nPackets received: %20" PRIu64
"\nPackets dropped: %21" PRIu64,
portid,
port_statistics[portid].tx,
port_statistics[portid].rx,
port_statistics[portid].dropped);
total_packets_dropped += port_statistics[portid].dropped;
total_packets_tx += port_statistics[portid].tx;
total_packets_rx += port_statistics[portid].rx;
}
printf("\nAggregate statistics ==============================="
"\nTotal packets sent: %18" PRIu64
"\nTotal packets received: %14" PRIu64
"\nTotal packets dropped: %15" PRIu64,
total_packets_tx,
total_packets_rx,
total_packets_dropped);
printf("\n====================================================\n");
fflush(stdout);
}
static void
l2fwd_mac_updating(struct rte_mbuf *m, unsigned dest_portid)
{
struct rte_ether_hdr *eth;
void *tmp;
eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
/* 02:00:00:00:00:xx */
tmp = ð->d_addr.addr_bytes[0];
*((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dest_portid << 40);
/* src addr */
rte_ether_addr_copy(&l2fwd_ports_eth_addr[dest_portid], ð->s_addr);
}
static void
l2fwd_simple_forward(struct rte_mbuf *m, unsigned portid)
{
unsigned dst_port;
int sent;
struct rte_eth_dev_tx_buffer *buffer;
dst_port = l2fwd_dst_ports[portid];
if (mac_updating)
l2fwd_mac_updating(m, dst_port);
buffer = tx_buffer[dst_port];
sent = rte_eth_tx_buffer(dst_port, 0, buffer, m);
if (sent)
port_statistics[dst_port].tx += sent;
}
/* main processing loop */
static void
l2fwd_main_loop(void)
{
struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
struct rte_mbuf *m;
int sent;
unsigned lcore_id;
uint64_t prev_tsc, diff_tsc, cur_tsc, timer_tsc;
unsigned i, j, portid, nb_rx;
struct lcore_queue_conf *qconf;
const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S *
BURST_TX_DRAIN_US;
struct rte_eth_dev_tx_buffer *buffer;
prev_tsc = 0;
timer_tsc = 0;
lcore_id = rte_lcore_id();
qconf = &lcore_queue_conf[lcore_id];
if (qconf->n_rx_port == 0)
{
RTE_LOG(INFO, L2FWD, "lcore %u has nothing to do\n", lcore_id);
return;
}
RTE_LOG(INFO, L2FWD, "entering main loop on lcore %u\n", lcore_id);
for (i = 0; i < qconf->n_rx_port; i++)
{
portid = qconf->rx_port_list[i];
RTE_LOG(INFO, L2FWD, " -- lcoreid=%u portid=%u\n", lcore_id,
portid);
}
while (!force_quit)
{
cur_tsc = rte_rdtsc();
/*
* TX burst queue drain
*/
diff_tsc = cur_tsc - prev_tsc;
if (unlikely(diff_tsc > drain_tsc))
{
for (i = 0; i < qconf->n_rx_port; i++)
{
portid = l2fwd_dst_ports[qconf->rx_port_list[i]];
buffer = tx_buffer[portid];
sent = rte_eth_tx_buffer_flush(portid, 0, buffer);
if (sent)
port_statistics[portid].tx += sent;
}
/* if timer is enabled */
if (timer_period > 0)
{
/* advance the timer */
timer_tsc += diff_tsc;
/* if timer has reached its timeout */
if (unlikely(timer_tsc >= timer_period))
{
/* do this only on master core */
if (lcore_id == rte_get_master_lcore())
{
print_stats();
/* reset the timer */
timer_tsc = 0;
}
}
}
prev_tsc = cur_tsc;
}
/*
* Read packet from RX queues
*/
for (i = 0; i < qconf->n_rx_port; i++)
{
portid = qconf->rx_port_list[i];
nb_rx = rte_eth_rx_burst(portid, 0,
pkts_burst, MAX_PKT_BURST);
port_statistics[portid].rx += nb_rx;
for (j = 0; j < nb_rx; j++)
{
m = pkts_burst[j];
rte_prefetch0(rte_pktmbuf_mtod(m, void *));
l2fwd_simple_forward(m, portid);
}
}
}
}
static int
l2fwd_launch_one_lcore(__attribute__((unused)) void *dummy)
{
l2fwd_main_loop();
return 0;
}
// 打印l2fwd的参数
static void
l2fwd_usage(const char *prgname)
{
printf("%s [EAL options] -- -p PORTMASK [-q NQ]\n"
" -p PORTMASK: hexadecimal bitmask of ports to configure\n"
" -q NQ: number of queue (=ports) per lcore (default is 1)\n"
" -T PERIOD: statistics will be refreshed each PERIOD seconds (0 to disable, 10 default, 86400 maximum)\n"
" --[no-]mac-updating: Enable or disable MAC addresses updating (enabled by default)\n"
" When enabled:\n"
" - The source MAC address is replaced by the TX port MAC address\n"
" - The destination MAC address is replaced by 02:00:00:00:00:TX_PORT_ID\n",
prgname);
}
// 将端口参数从stirng转为int
static int
l2fwd_parse_portmask(const char *portmask)
{
char *end = NULL;
unsigned long pm;
/* str: 字符串指针.
end_ptr: 字符串中数字部分转化后的剩下部分的指针的指针.
base: 进制(2-36) */
pm = strtoul(portmask, &end, 16);
if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
return -1;
if (pm == 0)
return -1;
return pm;
}
// 将每个核心队列数量从string转为int.
static unsigned int
l2fwd_parse_nqueue(const char *q_arg)
{
char *end = NULL;
unsigned long n;
n = strtoul(q_arg, &end, 10);
if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
return 0;
if (n == 0)
return 0;
if (n >= MAX_RX_QUEUE_PER_LCORE)
return 0;
return n;
}
// 将时间间隔从string转为int
static int
l2fwd_parse_timer_period(const char *q_arg)
{
char *end = NULL;
int n;
n = strtol(q_arg, &end, 10);
if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
return -1;
if (n >= MAX_TIMER_PERIOD)
return -1;
return n;
}
static const char short_options[] =
"p:" /* portmask */
"q:" /* number of queues */
"T:" /* timer period */
;
#define CMD_LINE_OPT_MAC_UPDATING "mac-updating"
#define CMD_LINE_OPT_NO_MAC_UPDATING "no-mac-updating"
enum
{
/* long options mapped to a short option */
/* first long only option value must be >= 256, so that we won't
* conflict with short options */
CMD_LINE_OPT_MIN_NUM = 256,
};
/* name: 长选项的名称
has_arg: 参数情况(no_argument, required_argument, optional_argument)
flag: 存储getopt_long()返回值的指针, 若为空则存储到val.
val: 当flag == NULL时存储getopt_long()返回值.
*/
static const struct option lgopts[] = {
{CMD_LINE_OPT_MAC_UPDATING, no_argument, &mac_updating, 1},
{CMD_LINE_OPT_NO_MAC_UPDATING, no_argument, &mac_updating, 0},
{NULL, 0, 0, 0}};
// 解析l2fwd参数
static int
l2fwd_parse_args(int argc, char **argv)
{
int opt, ret, timer_secs;
char **argvopt;
int option_index;
char *prgname = argv[0];
argvopt = argv;
/* 参数个数.
参数数组.
短命令数组.
长命令结构体.
返回longopts中符合的参数的下标值. */
while ((opt = getopt_long(argc, argvopt, short_options,
lgopts, &option_index)) != EOF)
{
switch (opt)
{
// 端口号
case 'p':
l2fwd_enabled_port_mask = l2fwd_parse_portmask(optarg);
if (l2fwd_enabled_port_mask == 0)
{
printf("invalid portmask\n");
l2fwd_usage(prgname);
return -1;
}
break;
// 每个核心的收发队列数
case 'q':
l2fwd_rx_queue_per_lcore = l2fwd_parse_nqueue(optarg);
if (l2fwd_rx_queue_per_lcore == 0)
{
printf("invalid queue number\n");
l2fwd_usage(prgname);
return -1;
}
break;
// 打印统计数据到屏幕上的时间间隔
case 'T':
timer_secs = l2fwd_parse_timer_period(optarg);
if (timer_secs < 0)
{
printf("invalid timer period\n");
l2fwd_usage(prgname);
return -1;
}
timer_period = timer_secs;
break;
/* long options */
case 0:
break;
default:
l2fwd_usage(prgname);
return -1;
}
}
if (optind >= 0)
argv[optind - 1] = prgname;
ret = optind - 1;
optind = 1; /* reset getopt lib */
return ret;
}
/* Check the link status of all ports in up to 9s, and print them finally */
static void
check_all_ports_link_status(uint32_t port_mask)
{
#define CHECK_INTERVAL 100 /* 100ms */
#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
uint16_t portid;
uint8_t count, all_ports_up, print_flag = 0;
struct rte_eth_link link;
int ret;
printf("\nChecking link status");
fflush(stdout);
for (count = 0; count <= MAX_CHECK_TIME; count++)
{
if (force_quit)
return;
all_ports_up = 1;
RTE_ETH_FOREACH_DEV(portid)
{
if (force_quit)
return;
if ((port_mask & (1 << portid)) == 0)
continue;
memset(&link, 0, sizeof(link));
ret = rte_eth_link_get_nowait(portid, &link);
if (ret < 0)
{
all_ports_up = 0;
if (print_flag == 1)
printf("Port %u link get failed: %s\n",
portid, rte_strerror(-ret));
continue;
}
/* print link status if flag set */
if (print_flag == 1)
{
if (link.link_status)
printf(
"Port%d Link Up. Speed %u Mbps - %s\n",
portid, link.link_speed,
(link.link_duplex == ETH_LINK_FULL_DUPLEX) ? ("full-duplex") : ("half-duplex"));
else
printf("Port %d Link Down\n", portid);
continue;
}
/* clear all_ports_up flag if any link down */
if (link.link_status == ETH_LINK_DOWN)
{
all_ports_up = 0;
break;
}
}
/* after finally printing all link status, get out */
if (print_flag == 1)
break;
if (all_ports_up == 0)
{
printf(".");
fflush(stdout);
rte_delay_ms(CHECK_INTERVAL);
}
/* set the print_flag if all ports up or timeout */
if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1))
{
print_flag = 1;
printf("done\n");
}
}
}
static void
signal_handler(int signum)
{
if (signum == SIGINT || signum == SIGTERM)
{
printf("\n\nSignal %d received, preparing to exit...\n",
signum);
force_quit = true;
}
}
// 入口函数
int main(int argc, char **argv)
{
struct lcore_queue_conf *qconf;
int ret;
uint16_t nb_ports;
uint16_t nb_ports_available = 0;
uint16_t portid, last_port;
unsigned lcore_id, rx_lcore_id;
unsigned nb_ports_in_mask = 0;
unsigned int nb_lcores = 0;
unsigned int nb_mbufs;
// EAL参数初始化
ret = rte_eal_init(argc, argv);
if (ret < 0)
rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n");
argc -= ret;
argv += ret;
force_quit = false;
signal(SIGINT, signal_handler);
signal(SIGTERM, signal_handler);
// 解析l2fwd参数
ret = l2fwd_parse_args(argc, argv);
if (ret < 0)
rte_exit(EXIT_FAILURE, "Invalid L2FWD arguments\n");
printf("MAC updating %s\n", mac_updating ? "enabled" : "disabled");
// 将时间转为cpu运行周期(s -> hz), rte_get_timer_hz()即为获得cpu主频
timer_period *= rte_get_timer_hz();
nb_ports = rte_eth_dev_count_avail(); // 可用网口数量
if (nb_ports == 0)
rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");
// 检查端口掩码是否与端口数量是否冲突
if (l2fwd_enabled_port_mask & ~((1 << nb_ports) - 1))
rte_exit(EXIT_FAILURE, "Invalid portmask; possible (0x%x)\n",
(1 << nb_ports) - 1);
/* reset l2fwd_dst_ports */
for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++)
l2fwd_dst_ports[portid] = 0;
last_port = 0;
// 给每个逻辑核心配上对应端口的发包队列
RTE_ETH_FOREACH_DEV(portid)
{
// 跳过没激活的端口
if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
continue;
// 将接口一一对应即0 -> 1, 1 -> 0, 2 -> 3, 3 -> 2.
if (nb_ports_in_mask % 2)
{
l2fwd_dst_ports[portid] = last_port;
l2fwd_dst_ports[last_port] = portid;
}
else
last_port = portid;
nb_ports_in_mask++;
}
// 奇数个端口会自己对应自己.
if (nb_ports_in_mask % 2)
{
printf("Notice: odd number of ports in portmask.\n");
l2fwd_dst_ports[last_port] = last_port;
}
rx_lcore_id = 0;
qconf = NULL;
// 在每一个端口上, 配置逻辑核, 配置队列.
RTE_ETH_FOREACH_DEV(portid)
{
// 跳过没激活的端口
if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
continue;
// 为每个端口配置一个逻辑核心.
while (rte_lcore_is_enabled(rx_lcore_id) == 0 ||
lcore_queue_conf[rx_lcore_id].n_rx_port ==
l2fwd_rx_queue_per_lcore)
{
rx_lcore_id++;
if (rx_lcore_id >= RTE_MAX_LCORE)
rte_exit(EXIT_FAILURE, "Not enough cores\n");
}
if (qconf != &lcore_queue_conf[rx_lcore_id])
{
// 给逻辑核加上这个端口号并且逻辑核端口数量加一.
qconf = &lcore_queue_conf[rx_lcore_id];
nb_lcores++;
}
qconf->rx_port_list[qconf->n_rx_port] = portid;
qconf->n_rx_port++;
printf("Lcore %u: RX port %u\n", rx_lcore_id, portid);
}
// 创建内存池
nb_mbufs = RTE_MAX(nb_ports * (nb_rxd + nb_txd + MAX_PKT_BURST +
nb_lcores * MEMPOOL_CACHE_SIZE),
8192U);
l2fwd_pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", nb_mbufs,
MEMPOOL_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
rte_socket_id());
if (l2fwd_pktmbuf_pool == NULL)
rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n");
// 端口初始化
RTE_ETH_FOREACH_DEV(portid)
{
struct rte_eth_rxconf rxq_conf;
struct rte_eth_txconf txq_conf;
struct rte_eth_conf local_port_conf = port_conf;
struct rte_eth_dev_info dev_info;
// 跳过未激活的端口.
if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
{
printf("Skipping disabled port %u\n", portid);
continue;
}
nb_ports_available++;
// 端口初始化.
printf("Initializing port %u... ", portid);
fflush(stdout);
ret = rte_eth_dev_info_get(portid, &dev_info);
if (ret != 0)
rte_exit(EXIT_FAILURE,
"Error during getting device (port %u) info: %s\n",
portid, strerror(-ret));
if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
local_port_conf.txmode.offloads |=
DEV_TX_OFFLOAD_MBUF_FAST_FREE;
ret = rte_eth_dev_configure(portid, 1, 1, &local_port_conf);
if (ret < 0)
rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n",
ret, portid);
ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd,
&nb_txd);
if (ret < 0)
rte_exit(EXIT_FAILURE,
"Cannot adjust number of descriptors: err=%d, port=%u\n",
ret, portid);
ret = rte_eth_macaddr_get(portid,
&l2fwd_ports_eth_addr[portid]);
if (ret < 0)
rte_exit(EXIT_FAILURE,
"Cannot get MAC address: err=%d, port=%u\n",
ret, portid);
// 配置接受队列
fflush(stdout);
rxq_conf = dev_info.default_rxconf;
rxq_conf.offloads = local_port_conf.rxmode.offloads;
ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
rte_eth_dev_socket_id(portid),
&rxq_conf,
l2fwd_pktmbuf_pool);
if (ret < 0)
rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:err=%d, port=%u\n",
ret, portid);
// 给每个端口配置一个发送队列
fflush(stdout);
txq_conf = dev_info.default_txconf;
txq_conf.offloads = local_port_conf.txmode.offloads;
ret = rte_eth_tx_queue_setup(portid, 0, nb_txd,
rte_eth_dev_socket_id(portid),
&txq_conf);
if (ret < 0)
rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:err=%d, port=%u\n",
ret, portid);
// 为每个发送队列配置缓冲区.
tx_buffer[portid] = rte_zmalloc_socket("tx_buffer",
RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0,
rte_eth_dev_socket_id(portid));
if (tx_buffer[portid] == NULL)
rte_exit(EXIT_FAILURE, "Cannot allocate buffer for tx on port %u\n",
portid);
rte_eth_tx_buffer_init(tx_buffer[portid], MAX_PKT_BURST);
ret = rte_eth_tx_buffer_set_err_callback(tx_buffer[portid],
rte_eth_tx_buffer_count_callback,
&port_statistics[portid].dropped);
if (ret < 0)
rte_exit(EXIT_FAILURE,
"Cannot set error callback for tx buffer on port %u\n",
portid);
ret = rte_eth_dev_set_ptypes(portid, RTE_PTYPE_UNKNOWN, NULL,
0);
if (ret < 0)
printf("Port %u, Failed to disable Ptype parsing\n",
portid);
// 启用支持的设备
ret = rte_eth_dev_start(portid);
if (ret < 0)
rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n",
ret, portid);
printf("done: \n");
ret = rte_eth_promiscuous_enable(portid);
if (ret != 0)
rte_exit(EXIT_FAILURE,
"rte_eth_promiscuous_enable:err=%s, port=%u\n",
rte_strerror(-ret), portid);
printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n",
portid,
l2fwd_ports_eth_addr[portid].addr_bytes[0],
l2fwd_ports_eth_addr[portid].addr_bytes[1],
l2fwd_ports_eth_addr[portid].addr_bytes[2],
l2fwd_ports_eth_addr[portid].addr_bytes[3],
l2fwd_ports_eth_addr[portid].addr_bytes[4],
l2fwd_ports_eth_addr[portid].addr_bytes[5]);
/* initialize port stats */
memset(&port_statistics, 0, sizeof(port_statistics));
}
if (!nb_ports_available)
{
rte_exit(EXIT_FAILURE,
"All available ports are disabled. Please set portmask.\n");
}
check_all_ports_link_status(l2fwd_enabled_port_mask);
ret = 0;
// 分配所有 lcore 执行函数
rte_eal_mp_remote_launch(l2fwd_launch_one_lcore, NULL, CALL_MASTER);
RTE_LCORE_FOREACH_SLAVE(lcore_id)
{
if (rte_eal_wait_lcore(lcore_id) < 0)
{
ret = -1;
break;
}
}
RTE_ETH_FOREACH_DEV(portid)
{
if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
continue;
printf("Closing port %d...", portid);
rte_eth_dev_stop(portid);
rte_eth_dev_close(portid);
printf(" Done\n");
}
/* clean up the EAL */
rte_eal_cleanup();
printf("Bye...\n");
return ret;
}
Comments | NOTHING