一、dpdk之kni处理dns案例
1. kni_dns.c
cpp
#include <rte_common.h> //基础通用
#include <rte_eal.h> //环境抽象,负责初始化DPDK运行环境
#include <rte_ethdev.h> //以太网设备驱动,数据包收发api
#include <rte_cycles.h> //高性能时间/周期计数api
#include <rte_lcore.h> //核心管理
#include <rte_mbuf.h> // DPDK 的数据包缓冲区(mbuf)管理
#include <rte_kni.h> //内核网卡接口
#include <rte_ether.h>
#include <rte_ip.h>
#include <rte_udp.h>
#include <rte_byteorder.h> //字节序转换
#include <rte_errno.h> //DPDK错误码处理
#include <rte_hash.h>
#include <rte_jhash.h> // Jenkins 哈希算法实现,一种高效的非加密哈希函数,常配合rte_hash使用,用于生成数据包 / 键值的哈希值
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <inttypes.h> //整数格式化输出
#include <sys/queue.h>
#include <unistd.h>
#include <signal.h>
#include <stdbool.h>
#include <arpa/inet.h>
#include <sys/time.h>
#define PORT_ID 0
#define NUM_MBUFS 8191
#define MBUF_CACHE_SIZE 250
#define BURST_SIZE 32
#define MAX_PACKET_SIZE 2048
#define CPU_DELAY_US 100 // 主循环延迟,单位微秒
// DNS相关定义
#define DNS_PORT 53
#define DNS_HEADER_SIZE 12
//qr位是DNS头部第一个Bit。用于区分是dns查询还是请求
#define DNS_QR_QUERY 0
#define DNS_QR_RESPONSE 1
// 全局变量
static struct rte_mempool *mbuf_pool;
static struct rte_kni *kni;
static volatile int force_quit = 0; //程序退出的标志
//DNS解析表,静态映射
typedef struct {
char domain[256];
uint32_t ip_addr;
}dns_entry_t;
static dns_entry_t dns_table[10];
// 计算UDP校验和(包含伪头部)
static uint16_t calculate_udp_checksum(const struct rte_ipv4_hdr *ip_hdr, const struct rte_udp_hdr *udp_hdr) {
uint32_t sum = 0;
uint16_t *buf = (uint16_t *)udp_hdr;
int len = rte_be_to_cpu_16(udp_hdr->dgram_len);
// 伪头部
sum += (ip_hdr->src_addr >> 16) & 0xFFFF;
sum += ip_hdr->src_addr & 0xFFFF;
sum += (ip_hdr->dst_addr >> 16) & 0xFFFF;
sum += ip_hdr->dst_addr & 0xFFFF;
sum += rte_cpu_to_be_16(IPPROTO_UDP);
sum += rte_cpu_to_be_16(len);
// UDP头部和数据
while (len > 1) {
sum += *buf++;
len -= 2;
}
if (len > 0) {
sum += (*buf) & 0xFF00;
}
// 折叠校验和
while (sum >> 16) {
sum = (sum & 0xFFFF) + (sum >> 16);
}
return (uint16_t)(~sum);
}
// 初始化DNS表
void init_dns_table() {
strcpy(dns_table[0].domain, "www.example.com");
dns_table[0].ip_addr = rte_cpu_to_be_32(inet_addr("93.184.216.34"));
strcpy(dns_table[1].domain, "example.com");
dns_table[1].ip_addr = rte_cpu_to_be_32(inet_addr("93.184.216.34"));
strcpy(dns_table[2].domain, "google.com");
dns_table[2].ip_addr = rte_cpu_to_be_32(inet_addr("8.8.8.8"));
strcpy(dns_table[3].domain, "www.google.com");
dns_table[3].ip_addr = rte_cpu_to_be_32(inet_addr("8.8.8.8"));
strcpy(dns_table[4].domain, ""); // 结束标志
dns_table[4].ip_addr = 0;
}
//解析DNS域名
//data为DNS数据包有效载荷指针;offset为域名的偏移量;domain是输出缓冲区
char *parse_dns_domain(const uint8_t *data,int offset,char *domain,int max_len){
/*
初始化len:读取data中offset位置的字节,这是 DNS 域名的第一个标签长度(DNS 域名由多个 "长度 + 字符" 的标签组成,比如www对应3www);
例:如果第一个标签是www,data[offset]的值就是3
*/
int len = data[offset];
//offset是标签长度3的位置,pos就指向w(www的第一个字符)
int pos = offset + 1;
int domain_pos = 0;
while(len != 0 && pos < max_len - 1){
//0xC0的二进制是11000000,DNS 规定:如果一个字节的最高两位是11,表示这不是标签长度,而是指针;
if ((len & 0xC0) == 0xC0) { //判断是否是DNS 压缩标签
// 压缩标签 - 简化处理,跳转到指针位置
int ptr = ((len & 0x3F) << 8) | data[pos];
pos = ptr;
len = data[pos];
pos++;
}else{
//正常标签
if (domain_pos > 0) {
domain[domain_pos++] = '.';
if (domain_pos >= max_len - 1) break;
}
for (int i = 0; i < len && domain_pos < max_len - 1; i++) {
domain[domain_pos++] = data[pos++];
}
len = data[pos];
if (len != 0) pos++;
}
}
domain[domain_pos] = '\0';
return domain;
}
//构造DNS响应包
struct rte_mbuf *build_dns_response(struct rte_mbuf *request_pkt,uint32_t ip_addr){
struct rte_ether_hdr *eth_hdr = rte_pktmbuf_mtod(request_pkt, struct rte_ether_hdr *);
struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)((char *)eth_hdr + sizeof(struct rte_ether_hdr));
struct rte_udp_hdr *udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr + sizeof(struct rte_ipv4_hdr));
// 计算现有数据长度
uint16_t total_len = rte_pktmbuf_data_len(request_pkt);
uint16_t payload_len = total_len - sizeof(struct rte_ether_hdr) - sizeof(struct rte_ipv4_hdr) - sizeof(struct rte_udp_hdr);
uint8_t *payload = (uint8_t *)udp_hdr + sizeof(struct rte_udp_hdr);
//创建新的响应包
struct rte_mbuf *response_pkt = rte_pktmbuf_alloc(mbuf_pool);
if (response_pkt == NULL) {
return NULL;
}
// 设置响应包大小
uint16_t response_payload_len = payload_len + 16; // 额外空间用于IP地址
uint16_t response_total_len = sizeof(struct rte_ether_hdr) + sizeof(struct rte_ipv4_hdr) + sizeof(struct rte_udp_hdr) + response_payload_len;
//rte_pktmbuf_append(mbuf, len) 用于扩展 mbuf 的可用数据空间,并将 mbuf 的data_len(已用数据长度)增加len字节
if (rte_pktmbuf_append(response_pkt, response_total_len) == NULL) {
rte_pktmbuf_free(response_pkt);
return NULL;
}
// 获取新包的头部指针
struct rte_ether_hdr *resp_eth_hdr = rte_pktmbuf_mtod(response_pkt, struct rte_ether_hdr *);
struct rte_ipv4_hdr *resp_ipv4_hdr = (struct rte_ipv4_hdr *)((char *)resp_eth_hdr + sizeof(struct rte_ether_hdr));
struct rte_udp_hdr *resp_udp_hdr = (struct rte_udp_hdr *)((char *)resp_ipv4_hdr + sizeof(struct rte_ipv4_hdr));
uint8_t *resp_payload = (uint8_t *)resp_udp_hdr + sizeof(struct rte_udp_hdr);
//交换以太网地址
struct rte_ether_addr tmp_addr = eth_hdr->s_addr;
rte_ether_addr_copy(ð_hdr->d_addr, &resp_eth_hdr->s_addr);
rte_ether_addr_copy(&tmp_addr, &resp_eth_hdr->d_addr);
resp_eth_hdr->ether_type = eth_hdr->ether_type;
// 复制IPv4头部,交换源目的IP
memcpy(resp_ipv4_hdr, ipv4_hdr, sizeof(struct rte_ipv4_hdr));
uint32_t tmp_ip = resp_ipv4_hdr->src_addr;
resp_ipv4_hdr->src_addr = resp_ipv4_hdr->dst_addr;
resp_ipv4_hdr->dst_addr = tmp_ip;
resp_ipv4_hdr->total_length = rte_cpu_to_be_16(sizeof(struct rte_ipv4_hdr) + sizeof(struct rte_udp_hdr) + response_payload_len);
resp_ipv4_hdr->hdr_checksum = 0; // 重新计算校验和
// 复制UDP头部,交换源目的端口
memcpy(resp_udp_hdr, udp_hdr, sizeof(struct rte_udp_hdr));
uint16_t tmp_port = resp_udp_hdr->src_port;
resp_udp_hdr->src_port = resp_udp_hdr->dst_port;
resp_udp_hdr->dst_port = tmp_port;
resp_udp_hdr->dgram_len = rte_cpu_to_be_16(sizeof(struct rte_udp_hdr) + response_payload_len);
resp_udp_hdr->dgram_cksum = 0; // 先清零,后面计算
// 复制原始DNS请求内容
memcpy(resp_payload, payload, payload_len);
// 修改DNS头:设置为响应
resp_payload[2] |= 0x80; // QR bit = 1 (response)
resp_payload[3] &= 0xF0; // 清除rcode
resp_payload[6] = 0x00; // NSCOUNT = 0
resp_payload[7] = 0x01; // ANCOUNT = 1 (1 answer record)
resp_payload[8] = 0x00; // ARCOUNT = 0
resp_payload[9] = 0x00; // ARCOUNT = 0
// 添加DNS回答记录,payload_len是请求长度,回答记录要接在原始请求内容的后面,所以起始偏移设为payload_len
int answer_offset = payload_len;
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
//当一个字节的最高两位是11(二进制)时,这个字节不是标签长度,而是压缩指针的 "高位部分",必须和下一个字节组合成完整的 14 位偏移指针。
// Name (压缩指针指向查询名)
resp_payload[answer_offset] = 0xC0; // 指针标志
resp_payload[answer_offset+1] = 0x0C; // 指向第12字节(查询名开始处)
// Type (A记录)
resp_payload[answer_offset+2] = 0x00;
resp_payload[answer_offset+3] = 0x01;
// Class (IN)
resp_payload[answer_offset+4] = 0x00;
resp_payload[answer_offset+5] = 0x01;
// TTL (300秒)
resp_payload[answer_offset+6] = 0x00;
resp_payload[answer_offset+7] = 0x00;
resp_payload[answer_offset+8] = 0x01;
resp_payload[answer_offset+9] = 0x2C;
// Data length (4 bytes for IPv4 address)
resp_payload[answer_offset+10] = 0x00;
resp_payload[answer_offset+11] = 0x04;
// IP Address (网络字节序)
uint8_t *ip_bytes = (uint8_t *)&ip_addr;
resp_payload[answer_offset+12] = ip_bytes[0];
resp_payload[answer_offset+13] = ip_bytes[1];
resp_payload[answer_offset+14] = ip_bytes[2];
resp_payload[answer_offset+15] = ip_bytes[3];
// 重新计算IP校验和
resp_ipv4_hdr->hdr_checksum = rte_ipv4_cksum(resp_ipv4_hdr);
// 计算UDP校验和(关键修复点)
resp_udp_hdr->dgram_cksum = calculate_udp_checksum(resp_ipv4_hdr, resp_udp_hdr);
// 设置mbuf的数据包长度
rte_pktmbuf_data_len(response_pkt) = response_total_len;
rte_pktmbuf_pkt_len(response_pkt) = response_total_len;
return response_pkt;
}
//处理DNS请求
bool process_dns_request(struct rte_mbuf *pkt){
struct rte_ether_hdr *eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
struct rte_ipv4_hdr *ipv4_hdr;
struct rte_udp_hdr *udp_hdr;
uint8_t *payload;
//检查是否为IPV4
if(rte_be_to_cpu_16(eth_hdr->ether_type) != RTE_ETHER_TYPE_IPV4){
return false;
}
ipv4_hdr = (struct rte_ipv4_hdr *)((char *)eth_hdr + sizeof(struct rte_ether_hdr));
//检查是否为UDP
if(ipv4_hdr->next_proto_id != IPPROTO_UDP){
return false;
}
//检查IP头部长度
uint8_t ip_hdr_len = (ipv4_hdr->version_ihl & 0x0F) * 4;
udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr + ip_hdr_len);
payload = (uint8_t *)udp_hdr + sizeof(struct rte_udp_hdr);
// 检查是否为DNS请求(目标端口53)
if (rte_be_to_cpu_16(udp_hdr->dst_port) != DNS_PORT) {
return false;
}
// 检查DNS头部
uint16_t pkt_len = rte_pktmbuf_data_len(pkt);
if (pkt_len < sizeof(struct rte_ether_hdr) + ip_hdr_len + sizeof(struct rte_udp_hdr) + DNS_HEADER_SIZE) {
return false;
}
// 检查是否为查询(QR=0)
if ((payload[2] & 0x80) != 0) {
return false; // 这是响应包,不是查询
}
//解析域名
char domain[256];
parse_dns_domain(payload,DNS_HEADER_SIZE,domain,sizeof(domain));
printf("DNS Query for: %s\n", domain);
//查找DNS表
for(int i = 0;dns_table[i].domain[0] !='\0';i++){
if(strcmp(dns_table[i].domain,domain) == 0){
printf("Found DNS mapping: %s -> %s\n", domain, inet_ntoa(*(struct in_addr*)&dns_table[i].ip_addr));
//构建DNS响应
struct rte_mbuf *response_pkt = build_dns_response(pkt,dns_table[i].ip_addr);
if(response_pkt != NULL){
//发送响应包回到物理端口
unsigned nb_tx = rte_eth_tx_burst(PORT_ID,0,&response_pkt,1);
if (nb_tx == 0) {
printf("Failed to send DNS response\n");
rte_pktmbuf_free(response_pkt);
}
// 释放原请求包
rte_pktmbuf_free(pkt);
return true; // 已处理DNS请求
}else {
printf("Failed to build DNS response\n");
}
}
}
return false;
}
//初始化端口
static int init_port(uint16_t port){
struct rte_eth_conf port_conf = {
.rxmode = {
.max_rx_pkt_len = RTE_ETHER_MAX_LEN, //最大接收数据包长度
.split_hdr_size = 0, //关闭头部拆分功能
},
.txmode = {
.mq_mode = ETH_MQ_TX_NONE, // 关闭多队列发包模式
}
};
//配置端口
if(rte_eth_dev_configure(port,1,1,&port_conf) < 0){
rte_panic("Cannot configure device: err=%d, port=%u\n", rte_errno, port);
return -1;
}
// 分配RX队列
if (rte_eth_rx_queue_setup(port, 0, 1024,
rte_eth_dev_socket_id(port), NULL, mbuf_pool) < 0) {
rte_panic("Setup RX queue failed\n");
return -1;
}
// 分配TX队列
if (rte_eth_tx_queue_setup(port, 0, 1024,
rte_eth_dev_socket_id(port), NULL) < 0) {
rte_panic("Setup TX queue failed\n");
return -1;
}
//启动端口
if(rte_eth_dev_start(port) < 0){
rte_panic("Device start failed\n");
return -1;
}
//开启混杂模式
rte_eth_promiscuous_enable(port);
printf("Port %d initialized successfully\n", port);
return 0;
}
//配置KNI网络接口回调
static int config_network_if(uint16_t port_id,uint8_t if_up){
//1则上线,0则下线
if(if_up){
// 打印日志:KNI接口port_id上线
printf("KNI Interface %u up\n", port_id);
//更新KNI接口的链路状态为上线
rte_kni_update_link(kni,1);
}else{
// 打印日志:KNI接口port_id下线
printf("KNI Interface %u down\n", port_id);
// 核心操作:更新KNI接口的链路状态为"下线"(0表示down)
rte_kni_update_link(kni, 0);
}
return 0;
}
//初始化kni
static struct rte_kni *init_kni(uint16_t port_id){
//kni接口配置结构体
struct rte_kni_conf conf;
//DPDK网卡设备信息结构体
struct rte_eth_dev_info dev_info;
//KNI操作回调结构体
struct rte_kni_ops ops;
memset(&conf, 0, sizeof(conf));
snprintf(conf.name, RTE_KNI_NAMESIZE, "vEth%u", port_id);
conf.group_id = port_id;
conf.mbuf_size = MAX_PACKET_SIZE;
//获取设备信息
rte_eth_dev_info_get(port_id,&dev_info);
struct rte_ether_addr mac_addr;
rte_eth_macaddr_get(port_id,&mac_addr);
//让kni网卡的dodk网卡的MAC地址一致
memcpy(conf.mac_addr,&mac_addr,RTE_ETHER_ADDR_LEN);
//获取MTU
rte_eth_dev_get_mtu(port_id,&conf.mtu);
//配置kni操作
memset(&ops,0,sizeof(ops));
ops.port_id = port_id;
ops.change_mtu = NULL;
ops.config_network_if = config_network_if;
ops.config_mac_address = NULL;
//分配创建KNI接口
struct rte_kni *kni_local = rte_kni_alloc(mbuf_pool,&conf,&ops);
if (kni_local == NULL) {
// rte_strerror + rte_errno:DPDK标准的错误信息打印方式
printf("Error creating KNI interface: %s\n", rte_strerror(rte_errno));
return NULL;
}
printf("KNI interface %s created successfully\n", conf.name);
return kni_local;
}
// 信号处理函数
static void signal_handler(int sig) {
if (sig == SIGINT || sig == SIGTERM) {
printf("\nReceived signal %d, exiting...\n", sig);
force_quit = 1;
}
}
//微秒级延迟函数
static void usleep_safe(uint32_t us) {
struct timeval tv;
tv.tv_sec = us / 1000000;
tv.tv_usec = us % 1000000;
select(0, NULL, NULL, NULL, &tv);
}
//主循环,处理DNS请求并将其他数据包转发给内核
static void main_loop(void){
struct rte_mbuf *pkts_burst[BURST_SIZE];
unsigned nb_rx;
unsigned i;
printf("Starting packet processing loop... DNS requests will be handled locally, others sent to kernel\n");
while(!force_quit){
//从物理端口接收数据包
nb_rx = rte_eth_rx_burst(PORT_ID,0,pkts_burst,BURST_SIZE);
if(likely(nb_rx > 0)){
unsigned processed = 0;
//处理每个接收到的数据包
for(i = 0;i < nb_rx;i++){
//尝试处理DNS请求
if(!process_dns_request(pkts_burst[i])){
//如果不是dns或者处理失败,转发给内核
struct rte_mbuf *pkt_to_kni = pkts_burst[i];
//发送到KNI
unsigned nb_kni_sent = rte_kni_tx_burst(kni,&pkt_to_kni,1);
if (unlikely(nb_kni_sent < 1)) {
// 发送失败,释放包
rte_pktmbuf_free(pkts_burst[i]);
} else {
// 成功发送到KNI
processed++;
}
}else{
// DNS请求已被处理,无需进一步操作
processed++;
}
}
}
//处理来自内核的数据包,复用pkts_burst
unsigned nb_kni_rx = rte_kni_rx_burst(kni,pkts_burst,BURST_SIZE);
if(nb_kni_rx > 0){
//将来自内核的包转发到物理端口
unsigned nb_eth_tx = rte_eth_tx_burst(PORT_ID,0,pkts_burst,nb_kni_rx);
//释放未能发送的包
if(unlikely(nb_eth_tx < nb_kni_rx)){
for (i = nb_eth_tx; i < nb_kni_rx; i++) {
rte_pktmbuf_free(pkts_burst[i]);
}
}
}
//处理Kni请求,否则config_network_if永不触发
rte_kni_handle_request(kni);
// 添加微小延迟,降低CPU占用(核心修改点)
usleep_safe(CPU_DELAY_US);
}
}
int main(int argc,char *argv[]){
int ret;
uint16_t portid;
// 注册信号处理函数
signal(SIGINT, signal_handler);
signal(SIGTERM, signal_handler);
// 初始化EAL
ret = rte_eal_init(argc, argv);
if (ret < 0)
rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n");
argc -= ret;
argv += ret;
//初始化kni子系统
ret = rte_kni_init(1);
if (ret < 0) {
rte_exit(EXIT_FAILURE, "KNI initialization failed\n");
}
// 创建mbuf池
mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS,
MBUF_CACHE_SIZE, 0,
RTE_MBUF_DEFAULT_BUF_SIZE,
rte_socket_id());
if (mbuf_pool == NULL)
rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
//初始化DNS表
init_dns_table();
//初始化端口
portid = PORT_ID;
if (init_port(portid) != 0)
rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu16"\n", portid);
//初始化Kni
kni = init_kni(portid);
if (kni == NULL)
rte_exit(EXIT_FAILURE, "Cannot init KNI\n");
//设置KNI链路状态
rte_kni_update_link(kni, 1);
printf("DPDK KNI DNS Processing Application Started\n");
printf("Port %d, KNI interface: vEth%u\n", portid, portid);
printf("DNS queries will be handled locally, other traffic forwarded to kernel\n");
printf("Predefined DNS mappings:\n");
for (int i = 0; dns_table[i].domain[0] != '\0'; i++) {
printf(" %s -> %s\n", dns_table[i].domain, inet_ntoa(*(struct in_addr*)&dns_table[i].ip_addr));
}
printf("Press Ctrl+C to exit\n");
// 启动主循环
main_loop();
// 清理资源
printf("Cleaning up...\n");
if (kni) {
rte_kni_release(kni);
}
// 清理KNI子系统
rte_kni_close();
return 0;
}
2.Makefile
cpp
# SPDX-License-Identifier: BSD-3-Clause
# Copyright(c) 2010-2014 Intel Corporation
# binary name
APP = kni_dns
# all source are stored in SRCS-y
SRCS-y := kni_dns.c
# Build using pkg-config variables if possible
ifeq ($(shell pkg-config --exists libdpdk && echo 0),0)
all: shared
.PHONY: shared static
shared: build/$(APP)-shared
ln -sf $(APP)-shared build/$(APP)
static: build/$(APP)-static
ln -sf $(APP)-static build/$(APP)
PKGCONF=pkg-config --define-prefix
PC_FILE := $(shell $(PKGCONF) --path libdpdk)
CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk)
CFLAGS += -DALLOW_EXPERIMENTAL_API
LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk)
LDFLAGS_STATIC = -Wl,-Bstatic $(shell $(PKGCONF) --static --libs libdpdk)
build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build
$(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED)
build/$(APP)-static: $(SRCS-y) Makefile $(PC_FILE) | build
$(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_STATIC)
build:
@mkdir -p $@
.PHONY: clean
clean:
rm -f build/$(APP) build/$(APP)-static build/$(APP)-shared
test -d build && rmdir -p build || true
else # Build using legacy build system
ifeq ($(RTE_SDK),)
$(error "Please define RTE_SDK environment variable")
endif
# Default target, detect a build directory, by looking for a path with a .config
RTE_TARGET ?= $(notdir $(abspath $(dir $(firstword $(wildcard $(RTE_SDK)/*/.config)))))
include $(RTE_SDK)/mk/rte.vars.mk
ifneq ($(CONFIG_RTE_EXEC_ENV_LINUX),y)
$(error This application can only operate in a linux environment, \
please change the definition of the RTE_TARGET environment variable)
endif
CFLAGS += -O3
CFLAGS += -DALLOW_EXPERIMENTAL_API
CFLAGS += $(WERROR_FLAGS)
include $(RTE_SDK)/mk/rte.extapp.mk
endif
3.运行案例


此时另一台主机:

程序打印:
