【Linux驱动开发】Linux网络设备驱动底层原理与实现详解

Linux网络设备驱动底层原理与实现详解

1. Linux网络设备驱动架构

1.1 网络子系统整体架构

Linux网络子系统采用分层架构设计,以net_device结构体为核心,构建起协议栈与硬件设备之间的桥梁。

复制代码
用户空间
    ↓
系统调用接口 (socket)
    ↓
协议栈 (TCP/IP)
    ↓
网络设备接口层
    ↓
驱动程序
    ↓
硬件设备
核心组件关系

网络子系统主要包含以下关键组件:

  • Socket层:提供用户空间网络编程接口
  • 协议栈:实现TCP/IP协议族
  • 设备接口层:抽象网络设备操作
  • 驱动层:具体硬件设备控制
  • PHY层:物理层芯片管理

1.2 net_device结构体核心地位

net_device是Linux网络子系统的中枢数据结构,定义了网络设备的所有属性和操作接口:

c 复制代码
struct net_device {
    char name[IFNAMSIZ];           /* 设备名称 */
    struct hlist_node name_hlist;  /* 名称哈希链表 */
    struct hlist_node index_hlist; /* 索引哈希链表 */
    int ifindex;                    /* 设备索引 */
    
    /* 设备状态 */
    unsigned long state;            /* 设备状态位图 */
    unsigned long priv_flags;       /* 私有标志 */
    
    /* 网络层信息 */
    struct net *nd_net;             /* 网络命名空间 */
    struct in_device __rcu *ip_ptr; /* IPv4配置 */
    struct inet6_dev *ip6_ptr;    /* IPv6配置 */
    
    /* 硬件地址 */
    unsigned char perm_addr[MAX_ADDR_LEN]; /* 永久硬件地址 */
    unsigned char addr_assign_type;        /* 地址分配类型 */
    
    /* 设备操作函数 */
    const struct net_device_ops *netdev_ops;
    const struct ethtool_ops *ethtool_ops;
    
    /* 特性标志 */
    netdev_features_t features;     /* 当前特性 */
    netdev_features_t hw_features;  /* 硬件支持特性 */
    netdev_features_t wanted_features; /* 期望特性 */
    
    /* 统计数据 */
    struct net_device_stats stats;  /* 基本统计信息 */
    atomic_long_t rx_dropped;       /* 接收丢包计数 */
    atomic_long_t tx_dropped;       /* 发送丢包计数 */
    
    /* NAPI相关 */
    struct napi_struct *napi_list;  /* NAPI结构链表 */
    
    /* 私有数据 */
    void *priv;                     /* 驱动私有数据 */
};

1.3 协议栈与驱动交互机制

NAPI模式(New API)

NAPI是Linux内核为高性能网络处理设计的接口,采用中断+轮询的混合模式:

c 复制代码
struct napi_struct {
    struct list_head poll_list;     /* 轮询链表 */
    unsigned long state;            /* NAPI状态 */
    int weight;                     /* 轮询权重 */
    int (*poll)(struct napi_struct *, int); /* 轮询函数 */
    struct net_device *dev;         /* 关联设备 */
    struct gro_list gro_list;       /* GRO链表 */
};

NAPI工作流程:

  1. 中断触发:网卡接收到数据包时触发中断
  2. 关闭中断:驱动关闭中断,切换到轮询模式
  3. 轮询处理:内核调用注册的poll函数批量处理数据包
  4. 重新启用:处理完成后重新启用中断
非NAPI模式

传统的中断驱动模式,每个数据包都触发中断处理:

c 复制代码
// 中断处理函数示例
static irqreturn_t network_interrupt(int irq, void *dev_id)
{
    struct net_device *dev = dev_id;
    struct network_priv *priv = netdev_priv(dev);
    
    /* 检查中断状态 */
    status = readl(priv->base + STATUS_REG);
    
    if (status & RX_INTERRUPT) {
        /* 处理接收中断 */
        network_rx(dev);
    }
    
    if (status & TX_INTERRUPT) {
        /* 处理发送完成中断 */
        network_tx_complete(dev);
    }
    
    return IRQ_HANDLED;
}

1.4 内核网络数据流路径

发送路径(Tx)
复制代码
用户空间send() → 系统调用 → 协议栈 → dev_queue_xmit() → 
驱动发送函数 → 硬件DMA → 网卡发送 → 网络介质

详细流程:

  1. 系统调用:用户空间调用send()或sendto()
  2. 协议栈处理:TCP/UDP层添加协议头
  3. 路由选择:根据路由表选择出口设备
  4. 设备队列:调用dev_queue_xmit()进入设备队列
  5. 驱动发送:驱动程序将数据包映射到DMA缓冲区
  6. 硬件发送:网卡通过DMA读取数据并发送到网络
接收路径(Rx)
复制代码
网络介质 → 网卡接收 → DMA传输 → 中断触发 → 
驱动接收函数 → netif_rx() → 协议栈 → 用户空间

详细流程:

  1. 硬件接收:网卡检测到数据包到达
  2. DMA传输:网卡将数据通过DMA写入内存
  3. 中断通知:网卡触发中断通知CPU
  4. 驱动处理:驱动分配sk_buff并填充数据
  5. 协议栈处理:调用netif_rx()进入协议栈
  6. 用户接收:数据最终到达用户空间socket缓冲区

2. 关键数据结构解析

2.1 net_device结构体字段详解

基本属性字段
c 复制代码
struct net_device {
    /* 设备标识 */
    char name[IFNAMSIZ];                    /* 设备名称,如eth0 */
    int ifindex;                            /* 唯一设备索引 */
    
    /* 设备状态 */
    unsigned long state;                    /* 设备状态位 */
    #define __LINK_STATE_START     0       /* 设备已启动 */
    #define __LINK_STATE_PRESENT   1       /* 设备存在 */
    #define __LINK_STATE_NOCARRIER 2       /* 无载波 */
    
    /* 硬件地址 */
    unsigned char perm_addr[MAX_ADDR_LEN];  /* 永久MAC地址 */
    unsigned char dev_addr[MAX_ADDR_LEN];   /* 当前MAC地址 */
    unsigned char broadcast[MAX_ADDR_LEN];  /* 广播地址 */
    
    /* 网络层配置 */
    struct in_device __rcu *ip_ptr;         /* IPv4配置信息 */
    struct inet6_dev *ip6_ptr;              /* IPv6配置信息 */
};
操作函数集
c 复制代码
struct net_device_ops {
    /* 设备生命周期 */
    int (*ndo_init)(struct net_device *dev);
    void (*ndo_uninit)(struct net_device *dev);
    int (*ndo_open)(struct net_device *dev);
    int (*ndo_stop)(struct net_device *dev);
    
    /* 数据发送 */
    netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb,
                                   struct net_device *dev);
    
    /* 设备配置 */
    int (*ndo_set_mac_address)(struct net_device *dev, void *addr);
    int (*ndo_validate_addr)(struct net_device *dev);
    int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd);
    
    /* 统计信息 */
    struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
    
    /* VLAN支持 */
    int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16 vid);
    int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, __be16 proto, u16 vid);
    
    /* 多队列支持 */
    u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
                             struct net_device *sb_dev);
};

2.2 sk_buff结构体内存布局

sk_buff是Linux网络子系统中最重要的数据结构,用于表示网络数据包:

c 复制代码
struct sk_buff {
    /* 链表管理 */
    struct sk_buff *next;           /* 下一个缓冲区 */
    struct sk_buff *prev;           /* 前一个缓冲区 */
    
    /* 数据缓冲区管理 */
    struct sk_buff_head *list;      /* 所属链表头 */
    unsigned char *head;            /* 缓冲区起始位置 */
    unsigned char *data;              /* 数据起始位置 */
    unsigned char *tail;              /* 数据结束位置 */
    unsigned char *end;             /* 缓冲区结束位置 */
    
    /* 数据长度信息 */
    unsigned int len;                 /* 数据总长度 */
    unsigned int data_len;            /* 数据部分长度 */
    __u16 mac_len;                    /* MAC头长度 */
    __u16 hdr_len;                    /* 克隆时头部长度 */
    
    /* 协议信息 */
    __u16 protocol;                   /* 协议类型 */
    __u16 transport_header;           /* 传输层头偏移 */
    __u16 network_header;             /* 网络层头偏移 */
    __u16 mac_header;                 /* MAC层头偏移 */
    
    /* 设备信息 */
    struct net_device *dev;           /* 关联设备 */
    struct net_device *input_dev;     /* 输入设备 */
    
    /* 特性标志 */
    __u32 flags;                      /* 缓冲区标志 */
    
    /* 校验和信息 */
    __u32 ip_summed;                  /* IP校验和状态 */
    __u32 csum;                       /* 校验和值 */
    __u32 csum_start;                 /* 校验和开始位置 */
    __u32 csum_offset;                /* 校验和偏移 */
    
    /* 时间戳 */
    ktime_t tstamp;                   /* 时间戳 */
    
    /* 私有数据 */
    char cb[48] __aligned(8);         /* 控制缓冲区 */
    
    /* 引用计数 */
    atomic_t users;                   /* 引用计数 */
};
sk_buff内存操作API
c 复制代码
/* 分配sk_buff */
struct sk_buff *alloc_skb(unsigned int size, gfp_t priority);
struct sk_buff *dev_alloc_skb(unsigned int length);

/* 释放sk_buff */
void kfree_skb(struct sk_buff *skb);
void dev_kfree_skb(struct sk_buff *skb);

/* 数据预留和对齐 */
static inline void skb_reserve(struct sk_buff *skb, int len);
static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len);
static inline unsigned char *skb_pull(struct sk_buff *skb, unsigned int len);
static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len);

/* 克隆和复制 */
struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority);

/* 协议头处理 */
static inline struct ethhdr *eth_hdr(const struct sk_buff *skb);
static inline struct iphdr *ip_hdr(const struct sk_buff *skb);
static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb);

2.3 网络设备操作集详解

基本操作实现
c 复制代码
static const struct net_device_ops my_netdev_ops = {
    .ndo_init = my_init,
    .ndo_uninit = my_uninit,
    .ndo_open = my_open,
    .ndo_stop = my_stop,
    .ndo_start_xmit = my_xmit,
    .ndo_set_mac_address = my_set_mac,
    .ndo_validate_addr = eth_validate_addr,
    .ndo_do_ioctl = my_ioctl,
    .ndo_get_stats = my_get_stats,
    .ndo_change_mtu = eth_change_mtu,
    .ndo_tx_timeout = my_tx_timeout,
    .ndo_set_rx_mode = my_set_multicast_list,
    .ndo_vlan_rx_add_vid = my_vlan_rx_add_vid,
    .ndo_vlan_rx_kill_vid = my_vlan_rx_kill_vid,
};

3. 驱动实现核心技术

3.1 物理层适配(PHY/MAC交互)

PHY设备管理
c 复制代码
struct phy_device {
    struct bus_type *bus;           /* 总线类型 */
    struct device dev;              /* 设备结构 */
    
    u32 phy_id;                     /* PHY芯片ID */
    const char *drv_name;           /* 驱动名称 */
    
    /* PHY状态 */
    int speed;                      /* 连接速度 */
    int duplex;                     /* 双工模式 */
    int link;                       /* 连接状态 */
    int pause;                      /* 流控状态 */
    
    /* 状态机 */
    enum phy_state state;           /* PHY状态 */
    struct delayed_work state_queue; /* 状态查询工作队列 */
    
    /* 配置 */
    struct phy_driver *drv;         /* PHY驱动 */
    u32 supported;                  /* 支持特性 */
    u32 advertising;                /* 通告特性 */
    u32 lp_advertising;             /* 对端通告特性 */
};
MAC层与PHY层交互
c 复制代码
/* PHY状态变化通知 */
void phy_state_machine(struct work_struct *work)
{
    struct phy_device *phydev = 
        container_of(work, struct phy_device, state_queue.work);
    int old_state = phydev->state;
    int needs_aneg = 0, err = 0;
    
    mutex_lock(&phydev->lock);
    
    switch (phydev->state) {
    case PHY_UP:
        needs_aneg = 1;
        /* 配置自动协商 */
        phydev->state = PHY_AN;
        break;
        
    case PHY_AN:
        err = phy_read_status(phydev);
        if (err)
            break;
            
        if (phydev->link) {
            phydev->state = PHY_RUNNING;
            netif_carrier_on(phydev->attached_dev);
        } else {
            phydev->state = PHY_NOLINK;
            netif_carrier_off(phydev->attached_dev);
        }
        break;
        
    case PHY_NOLINK:
    case PHY_RUNNING:
        err = phy_read_status(phydev);
        if (err)
            break;
            
        if (phydev->link && phydev->state == PHY_NOLINK) {
            phydev->state = PHY_RUNNING;
            netif_carrier_on(phydev->attached_dev);
        } else if (!phydev->link && phydev->state == PHY_RUNNING) {
            phydev->state = PHY_NOLINK;
            netif_carrier_off(phydev->attached_dev);
        }
        break;
        
    default:
        break;
    }
    
    mutex_unlock(&phydev->lock);
    
    if (needs_aneg)
        phy_start_aneg(phydev);
        
    /* 调度下一次状态检查 */
    schedule_delayed_work(&phydev->state_queue, HZ);
}

3.2 DMA环形缓冲区实现原理

环形缓冲区设计
c 复制代码
struct dma_ring {
    dma_addr_t *dma_addr;           /* DMA地址数组 */
    struct sk_buff **skb;           /* sk_buff指针数组 */
    
    u32 size;                       /* 缓冲区大小 */
    u32 count;                      /* 缓冲区数量 */
    
    /* 生产者指针 */
    u32 prod;                       /* 生产索引 */
    u32 prod_next;                  /* 下一个生产索引 */
    
    /* 消费者指针 */
    u32 cons;                       /* 消费索引 */
    u32 cons_next;                  /* 下一个消费索引 */
    
    /* 同步机制 */
    spinlock_t lock;                /* 自旋锁 */
    atomic_t pending;               /* 待处理计数 */
};
DMA映射和内存管理
c 复制代码
/* 分配DMA缓冲区 */
static int dma_ring_alloc(struct dma_ring *ring, u32 size, 
                         struct device *dev)
{
    int i;
    
    ring->size = size;
    ring->count = size;
    ring->prod = 0;
    ring->cons = 0;
    
    /* 分配DMA地址数组 */
    ring->dma_addr = kzalloc(sizeof(dma_addr_t) * size, GFP_KERNEL);
    if (!ring->dma_addr)
        return -ENOMEM;
        
    /* 分配sk_buff指针数组 */
    ring->skb = kzalloc(sizeof(struct sk_buff *) * size, GFP_KERNEL);
    if (!ring->skb) {
        kfree(ring->dma_addr);
        return -ENOMEM;
    }
    
    /* 初始化每个缓冲区 */
    for (i = 0; i < size; i++) {
        struct sk_buff *skb;
        dma_addr_t dma_addr;
        
        /* 分配sk_buff */
        skb = netdev_alloc_skb_ip_align(dev, RX_BUF_SIZE);
        if (!skb)
            goto err_free;
            
        /* 映射DMA */
        dma_addr = dma_map_single(dev, skb->data, 
                                   RX_BUF_SIZE, DMA_FROM_DEVICE);
        if (dma_mapping_error(dev, dma_addr)) {
            dev_kfree_skb_any(skb);
            goto err_free;
        }
        
        ring->skb[i] = skb;
        ring->dma_addr[i] = dma_addr;
    }
    
    spin_lock_init(&ring->lock);
    atomic_set(&ring->pending, 0);
    
    return 0;
    
err_free:
    for (i = 0; i < ring->count; i++) {
        if (ring->skb[i]) {
            dma_unmap_single(dev, ring->dma_addr[i], 
                           RX_BUF_SIZE, DMA_FROM_DEVICE);
            dev_kfree_skb_any(ring->skb[i]);
        }
    }
    kfree(ring->skb);
    kfree(ring->dma_addr);
    return -ENOMEM;
}
DMA接收流程
c 复制代码
static int dma_rx_process(struct net_device *dev, 
                         struct dma_ring *ring, int budget)
{
    struct device *dma_dev = dev->dev.parent;
    int processed = 0;
    
    while (processed < budget && ring->cons != ring->prod) {
        struct sk_buff *skb;
        u32 cons = ring->cons;
        int pkt_len;
        
        /* 获取数据包长度 */
        pkt_len = dma_get_pkt_len(dev, cons);
        if (pkt_len < 0)
            break;
            
        skb = ring->skb[cons];
        
        /* 取消DMA映射 */
        dma_unmap_single(dma_dev, ring->dma_addr[cons], 
                        RX_BUF_SIZE, DMA_FROM_DEVICE);
        
        /* 设置数据包长度 */
        skb_put(skb, pkt_len);
        
        /* 设置协议类型 */
        skb->protocol = eth_type_trans(skb, dev);
        
        /* 更新统计信息 */
        dev->stats.rx_packets++;
        dev->stats.rx_bytes += pkt_len;
        
        /* 提交给协议栈 */
        netif_receive_skb(skb);
        
        /* 分配新的sk_buff */
        skb = netdev_alloc_skb_ip_align(dev, RX_BUF_SIZE);
        if (skb) {
            dma_addr_t dma_addr;
            
            dma_addr = dma_map_single(dma_dev, skb->data,
                                      RX_BUF_SIZE, DMA_FROM_DEVICE);
            if (!dma_mapping_error(dma_dev, dma_addr)) {
                ring->skb[cons] = skb;
                ring->dma_addr[cons] = dma_addr;
                
                /* 重新提交给硬件 */
                dma_submit_rx_desc(dev, cons, dma_addr, RX_BUF_SIZE);
            } else {
                dev_kfree_skb_any(skb);
            }
        }
        
        ring->cons = (cons + 1) % ring->count;
        processed++;
    }
    
    return processed;
}

3.3 中断处理与轮询模式选择

中断处理优化
c 复制代码
/* 中断处理函数 */
static irqreturn_t network_irq_handler(int irq, void *dev_id)
{
    struct net_device *dev = dev_id;
    struct network_priv *priv = netdev_priv(dev);
    u32 status;
    
    /* 读取中断状态 */
    status = readl(priv->base + INTR_STATUS_REG);
    
    /* 清除中断 */
    writel(status, priv->base + INTR_ACK_REG);
    
    if (status & RX_INTR) {
        if (priv->use_napi) {
            /* NAPI模式:关闭中断,调度NAPI */
            writel(0, priv->base + INTR_MASK_REG);
            napi_schedule(&priv->napi);
        } else {
            /* 传统中断模式:直接处理 */
            network_rx(dev);
        }
    }
    
    if (status & TX_INTR) {
        /* 处理发送完成中断 */
        network_tx_complete(dev);
    }
    
    return IRQ_HANDLED;
}

/* NAPI轮询函数 */
static int network_poll(struct napi_struct *napi, int budget)
{
    struct network_priv *priv = container_of(napi, struct network_priv, napi);
    struct net_device *dev = priv->dev;
    int work_done = 0;
    
    /* 处理接收 */
    work_done = network_rx_poll(dev, budget);
    
    /* 处理发送完成 */
    network_tx_complete(dev);
    
    if (work_done < budget) {
        /* 处理完成,重新启用中断 */
        napi_complete(napi);
        writel(INTR_MASK_DEFAULT, priv->base + INTR_MASK_REG);
    }
    
    return work_done;
}
自适应中断调节
c 复制代码
/* 根据负载动态调整中断模式 */
static void network_adaptive_irq(struct net_device *dev)
{
    struct network_priv *priv = netdev_priv(dev);
    unsigned long now = jiffies;
    
    /* 统计最近时间窗口内的数据包数量 */
    if (time_after(now, priv->stats_time + HZ)) {
        u32 pkt_rate = priv->rx_packets - priv->last_rx_packets;
        
        if (pkt_rate > HIGH_PKT_RATE_THRESHOLD) {
            /* 高负载:切换到NAPI轮询模式 */
            if (!priv->use_napi) {
                priv->use_napi = 1;
                /* 配置NAPI参数 */
                priv->napi.weight = 64;
                netif_napi_add(dev, &priv->napi, network_poll, 64);
                napi_enable(&priv->napi);
            }
        } else if (pkt_rate < LOW_PKT_RATE_THRESHOLD) {
            /* 低负载:切换到中断模式 */
            if (priv->use_napi) {
                napi_disable(&priv->napi);
                netif_napi_del(&priv->napi);
                priv->use_napi = 0;
            }
        }
        
        /* 更新统计 */
        priv->last_rx_packets = priv->rx_packets;
        priv->stats_time = now;
    }
}

3.4 流量控制(QoS)实现机制

流量控制框架
c 复制代码
struct netdev_queue {
    struct net_device *dev;         /* 所属设备 */
    
    /* 队列状态 */
    spinlock_t _xmit_lock;          /* 发送锁 */
    int xmit_lock_owner;            /* 锁持有者 */
    
    /* 队列长度管理 */
    unsigned long tx_maxrate;       /* 最大发送速率 */
    unsigned long tx_rate;          /* 当前发送速率 */
    
    /* Qdisc配置 */
    struct Qdisc *qdisc;            /* 排队规则 */
    struct Qdisc *qdisc_sleeping;  /* 休眠Qdisc */
    
    /* 统计数据 */
    struct netdev_queue_stats *stats; /* 统计信息 */
};
QoS队列实现
c 复制代码
/* 多队列发送函数 */
static netdev_tx_t network_select_queue(struct sk_buff *skb,
                                     struct net_device *dev,
                                     struct net_device *sb_dev)
{
    struct network_priv *priv = netdev_priv(dev);
    u16 queue_index = 0;
    
    /* 基于数据包特征选择队列 */
    if (skb->priority >= 0 && skb->priority < priv->num_tx_queues) {
        queue_index = skb->priority;
    } else {
        /* 基于哈希选择队列 */
        queue_index = skb_get_hash(skb) % priv->num_tx_queues;
    }
    
    return queue_index;
}

/* 流量整形 */
static int network_setup_tc(struct net_device *dev, enum tc_setup_type type,
                         void *type_data)
{
    struct network_priv *priv = netdev_priv(dev);
    
    switch (type) {
    case TC_SETUP_QDISC_MQPRIO:
        return network_setup_mqprio(dev, type_data);
        
    case TC_SETUP_CLSFLOWER:
        return network_setup_clsflower(dev, type_data);
        
    case TC_SETUP_QDISC_TBF:
        return network_setup_tbf(dev, type_data);
        
    default:
        return -EOPNOTSUPP;
    }
}

/* 多队列优先级配置 */
static int network_setup_mqprio(struct net_device *dev, void *type_data)
{
    struct tc_mqprio_qopt *qopt = type_data;
    struct network_priv *priv = netdev_priv(dev);
    int i;
    
    /* 验证队列数量 */
    if (qopt->num_tc > priv->max_tc || qopt->num_queues > priv->num_tx_queues)
        return -EINVAL;
        
    /* 配置流量类别 */
    for (i = 0; i < qopt->num_tc; i++) {
        priv->tc_to_queue[i] = qopt->offset[i];
        priv->tc_to_count[i] = qopt->count[i];
    }
    
    priv->num_tc = qopt->num_tc;
    
    /* 更新硬件寄存器 */
    network_hw_setup_tc(priv, qopt);
    
    return 0;
}

4. 性能优化要点

4.1 零拷贝技术实现

DMA零拷贝
c 复制代码
/* 零拷贝发送实现 */
static netdev_tx_t network_xmit_zero_copy(struct sk_buff *skb,
                                          struct net_device *dev)
{
    struct network_priv *priv = netdev_priv(dev);
    struct dma_tx_desc *desc;
    dma_addr_t dma_addr;
    int entry;
    
    /* 获取发送描述符 */
    entry = priv->tx_prod;
    desc = &priv->tx_ring[entry];
    
    /* 直接使用sk_buff数据,避免复制 */
    dma_addr = dma_map_single(dev->dev.parent, skb->data,
                             skb->len, DMA_TO_DEVICE);
    if (dma_mapping_error(dev->dev.parent, dma_addr)) {
        dev_kfree_skb_any(skb);
        dev->stats.tx_dropped++;
        return NETDEV_TX_OK;
    }
    
    /* 填充描述符 */
    desc->addr = dma_addr;
    desc->len = skb->len;
    desc->cmd = TX_DESC_CMD_EOP | TX_DESC_CMD_RS;
    
    /* 保存sk_buff指针用于完成处理 */
    priv->tx_skb[entry] = skb;
    
    /* 更新生产者指针 */
    priv->tx_prod = (entry + 1) % TX_RING_SIZE;
    
    /* 通知硬件 */
    writel(TX_RING_TAIL(priv->tx_prod), priv->base + TX_TAIL_REG);
    
    /* 检查是否需要停止队列 */
    if ((priv->tx_prod + 1) % TX_RING_SIZE == priv->tx_cons) {
        netif_stop_queue(dev);
    }
    
    return NETDEV_TX_OK;
}
页面零拷贝
c 复制代码
/* 页面映射零拷贝 */
static int network_map_page(struct net_device *dev, struct page *page,
                           unsigned int offset, unsigned int size,
                           int direction)
{
    struct network_priv *priv = netdev_priv(dev);
    dma_addr_t dma_addr;
    
    /* 映射页面到DMA */
    dma_addr = dma_map_page(dev->dev.parent, page, offset, size, direction);
    if (dma_mapping_error(dev->dev.parent, dma_addr))
        return -ENOMEM;
        
    /* 配置硬件描述符 */
    network_setup_page_desc(priv, dma_addr, size, offset);
    
    return 0;
}

4.2 多队列网卡驱动设计

RSS(接收端缩放)
c 复制代码
struct rss_config {
    u8 num_queues;                  /* RSS队列数量 */
    u8 hash_key[40];                /* RSS哈希密钥 */
    u32 hash_types;                 /* 哈希类型 */
    u16 indirection_table[128];     /* 重定向表 */
};

/* RSS配置函数 */
static int network_setup_rss(struct net_device *dev, u32 *indir, 
                           u8 *key, u8 hfunc)
{
    struct network_priv *priv = netdev_priv(dev);
    struct rss_config *rss = &priv->rss;
    int i;
    
    /* 验证参数 */
    if (hfunc != ETH_RSS_HASH_TOP)
        return -EOPNOTSUPP;
        
    /* 配置重定向表 */
    if (indir) {
        for (i = 0; i < 128; i++) {
            if (indir[i] >= priv->num_rx_queues)
                return -EINVAL;
            rss->indirection_table[i] = indir[i];
        }
    }
    
    /* 配置哈希密钥 */
    if (key) {
        memcpy(rss->hash_key, key, 40);
    }
    
    /* 更新硬件 */
    network_hw_setup_rss(priv, rss);
    
    return 0;
}

/* RSS哈希计算 */
static u32 network_rss_hash(struct net_device *dev, struct sk_buff *skb)
{
    struct network_priv *priv = netdev_priv(dev);
    u32 hash;
    
    /* 根据数据包类型选择哈希函数 */
    switch (skb->protocol) {
    case htons(ETH_P_IP):
        hash = network_rss_hash_ipv4(priv, skb);
        break;
    case htons(ETH_P_IPV6):
        hash = network_rss_hash_ipv6(priv, skb);
        break;
    default:
        hash = skb_get_hash_raw(skb);
        break;
    }
    
    return hash;
}
RPS(接收包转向)
c 复制代码
/* RPS配置 */
static int network_setup_rps(struct net_device *dev, int queue_id,
                           struct rps_map *map)
{
    struct network_priv *priv = netdev_priv(dev);
    struct netdev_rx_queue *rx_queue;
    
    if (queue_id >= priv->num_rx_queues)
        return -EINVAL;
        
    rx_queue = &priv->rx_queues[queue_id];
    
    /* 配置CPU映射 */
    if (map) {
        rx_queue->rps_map = map;
        rx_queue->rps_enabled = 1;
    } else {
        rx_queue->rps_enabled = 0;
    }
    
    return 0;
}

4.3 软中断负载均衡

软中断处理优化
c 复制代码
/* 软中断处理函数 */
static void network_napi_complete(struct napi_struct *napi)
{
    struct network_priv *priv = container_of(napi, struct network_priv, napi);
    int cpu = smp_processor_id();
    
    /* 动态调整NAPI权重 */
    if (napi->weight < 64 && priv->rx_packets > 10000) {
        napi->weight = 64;
    } else if (napi->weight > 16 && priv->rx_packets < 1000) {
        napi->weight = 16;
    }
    
    /* 重新调度到合适的CPU */
    if (cpu != priv->preferred_cpu && priv->rx_packets > 5000) {
        /* 高负载时迁移到首选CPU */
        irq_set_affinity(priv->irq, cpumask_of(priv->preferred_cpu));
    }
    
    napi_complete(napi);
}
CPU亲和性管理
c 复制代码
/* CPU亲和性配置 */
static int network_set_cpu_affinity(struct net_device *dev, 
                                   const struct cpumask *mask)
{
    struct network_priv *priv = netdev_priv(dev);
    int cpu;
    
    /* 验证CPU有效性 */
    for_each_cpu(cpu, mask) {
        if (!cpu_online(cpu))
            return -EINVAL;
    }
    
    /* 配置IRQ亲和性 */
    if (priv->irq >= 0) {
        int ret = irq_set_affinity(priv->irq, mask);
        if (ret)
            return ret;
    }
    
    /* 配置NAPI亲和性 */
    if (priv->use_napi) {
        netif_set_xps_queue(dev, mask, 0);
    }
    
    /* 保存配置 */
    cpumask_copy(&priv->cpu_affinity, mask);
    
    return 0;
}

4.4 巨型帧(Jumbo Frame)支持

巨型帧配置
c 复制代码
/* MTU变更处理 */
static int network_change_mtu(struct net_device *dev, int new_mtu)
{
    struct network_priv *priv = netdev_priv(dev);
    int max_mtu = priv->hw->max_mtu;
    int min_mtu = ETH_MIN_MTU;
    
    /* 验证MTU范围 */
    if (new_mtu < min_mtu || new_mtu > max_mtu)
        return -EINVAL;
        
    /* 检查是否需要重新分配缓冲区 */
    if (new_mtu > PAGE_SIZE) {
        int needed_pages = (new_mtu + PAGE_SIZE - 1) / PAGE_SIZE;
        
        /* 验证系统支持 */
        if (!priv->hw->support_multi_page) {
            netdev_err(dev, "Hardware doesn't support multi-page frames\n");
            return -EOPNOTSUPP;
        }
    }
    
    /* 停止设备 */
    if (netif_running(dev))
        network_close(dev);
        
    /* 更新缓冲区大小 */
    priv->rx_buf_size = new_mtu + ETH_HLEN + VLAN_HLEN + NET_IP_ALIGN;
    priv->tx_buf_size = new_mtu + ETH_HLEN + VLAN_HLEN;
    
    /* 重新分配环形缓冲区 */
    network_free_rings(dev);
    network_alloc_rings(dev);
    
    /* 重新启动设备 */
    if (priv->state == NETWORK_STATE_OPEN)
        network_open(dev);
        
    dev->mtu = new_mtu;
    
    return 0;
}
巨型帧接收处理
c 复制代码
/* 巨型帧接收处理 */
static int network_rx_jumbo(struct net_device *dev, int budget)
{
    struct network_priv *priv = netdev_priv(dev);
    int processed = 0;
    
    while (processed < budget) {
        struct sk_buff *skb;
        u32 len, status;
        int page_cnt;
        
        /* 读取状态 */
        status = readl(priv->base + RX_STATUS_REG);
        if (!(status & RX_STATUS_VALID))
            break;
            
        len = (status >> 16) & 0x3FFF;
        
        /* 计算需要的页面数 */
        page_cnt = (len + PAGE_SIZE - 1) / PAGE_SIZE;
        
        if (page_cnt == 1) {
            /* 单页帧处理 */
            skb = netdev_alloc_skb_ip_align(dev, len);
            if (!skb) {
                dev->stats.rx_errors++;
                continue;
            }
            
            /* 复制数据 */
            memcpy_fromio(skb->data, priv->rx_buf, len);
            skb_put(skb, len);
        } else {
            /* 多页帧处理 */
            skb = network_build_jumbo_skb(dev, len, page_cnt);
            if (!skb) {
                dev->stats.rx_errors++;
                continue;
            }
        }
        
        /* 设置协议类型 */
        skb->protocol = eth_type_trans(skb, dev);
        
        /* 更新统计 */
        dev->stats.rx_packets++;
        dev->stats.rx_bytes += len;
        
        /* 提交给协议栈 */
        napi_gro_receive(&priv->napi, skb);
        
        processed++;
    }
    
    return processed;
}

/* 构建多页sk_buff */
static struct sk_buff *network_build_jumbo_skb(struct net_device *dev,
                                               u32 len, int page_cnt)
{
    struct sk_buff *skb;
    struct page *page;
    int i, offset = 0;
    
    /* 分配sk_buff */
    skb = netdev_alloc_skb(dev, 0);
    if (!skb)
        return NULL;
        
    /* 分配页面并填充数据 */
    for (i = 0; i < page_cnt; i++) {
        int copy_len = min_t(int, len - offset, PAGE_SIZE);
        
        page = alloc_page(GFP_ATOMIC);
        if (!page)
            goto err_free;
            
        /* 复制数据到页面 */
        memcpy_fromio(page_address(page), 
                     priv->rx_buf + offset, copy_len);
        
        /* 添加到sk_buff */
        if (skb_add_rx_frag(skb, i, page, 0, copy_len, PAGE_SIZE) < 0) {
            __free_page(page);
            goto err_free;
        }
        
        offset += copy_len;
    }
    
    skb->len = len;
    skb->data_len = len;
    skb->truesize += len;
    
    return skb;
    
err_free:
    dev_kfree_skb_any(skb);
    return NULL;
}

5. 实际驱动开发示例

5.1 字符设备注册流程

设备初始化
c 复制代码
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2024 Linux Network Driver Project */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/mii.h>
#include <linux/dma-mapping.h>
#include <linux/interrupt.h>

#define DRV_NAME "example_network"
#define DRV_VERSION "1.0.0"

MODULE_AUTHOR("Linux Network Driver Project");
MODULE_DESCRIPTION("Example Network Driver Implementation");
MODULE_LICENSE("GPL v2");
MODULE_VERSION(DRV_VERSION);

/* 设备私有数据结构 */
struct example_priv {
    struct net_device *dev;         /* 网络设备 */
    void __iomem *base;              /* I/O内存基地址 */
    int irq;                        /* 中断号 */
    
    /* PHY管理 */
    struct phy_device *phydev;      /* PHY设备 */
    struct mii_bus *mii_bus;        /* MII总线 */
    
    /* DMA管理 */
    dma_addr_t rx_dma;              /* RX DMA地址 */
    dma_addr_t tx_dma;              /* TX DMA地址 */
    void *rx_buf;                   /* RX缓冲区 */
    void *tx_buf;                   /* TX缓冲区 */
    
    /* NAPI */
    struct napi_struct napi;        /* NAPI结构 */
    
    /* 统计信息 */
    struct net_device_stats stats;  /* 网络统计 */
    
    /* 配置参数 */
    int rx_ring_size;               /* RX环大小 */
    int tx_ring_size;               /* TX环大小 */
    int msg_enable;                 /* 消息级别 */
};

/* 设备探测函数 */
static int example_probe(struct platform_device *pdev)
{
    struct net_device *dev;
    struct example_priv *priv;
    struct resource *res;
    int ret;
    
    /* 分配网络设备 */
    dev = alloc_etherdev(sizeof(struct example_priv));
    if (!dev) {
        dev_err(&pdev->dev, "Failed to allocate netdev\n");
        return -ENOMEM;
    }
    
    priv = netdev_priv(dev);
    priv->dev = dev;
    
    /* 设置设备操作函数 */
    dev->netdev_ops = &example_netdev_ops;
    dev->ethtool_ops = &example_ethtool_ops;
    
    /* 获取I/O资源 */
    res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
    priv->base = devm_ioremap_resource(&pdev->dev, res);
    if (IS_ERR(priv->base)) {
        ret = PTR_ERR(priv->base);
        goto err_free_netdev;
    }
    
    /* 获取中断资源 */
    priv->irq = platform_get_irq(pdev, 0);
    if (priv->irq < 0) {
        ret = priv->irq;
        goto err_free_netdev;
    }
    
    /* 设置默认配置 */
    priv->rx_ring_size = 256;
    priv->tx_ring_size = 256;
    priv->msg_enable = NETIF_MSG_LINK | NETIF_MSG_IFUP;
    
    /* 设置设备特性 */
    dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
    dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
    
    /* 注册网络设备 */
    ret = register_netdev(dev);
    if (ret) {
        dev_err(&pdev->dev, "Failed to register netdev\n");
        goto err_free_netdev;
    }
    
    platform_set_drvdata(pdev, dev);
    
    dev_info(&pdev->dev, "%s: Example network driver initialized\n", 
             dev->name);
    
    return 0;
    
err_free_netdev:
    free_netdev(dev);
    return ret;
}

/* 设备移除函数 */
static int example_remove(struct platform_device *pdev)
{
    struct net_device *dev = platform_get_drvdata(pdev);
    
    unregister_netdev(dev);
    free_netdev(dev);
    
    return 0;
}

/* 平台驱动结构 */
static struct platform_driver example_driver = {
    .probe = example_probe,
    .remove = example_remove,
    .driver = {
        .name = DRV_NAME,
        .of_match_table = example_of_match,
    },
};

module_platform_driver(example_driver);

5.2 基本发送/接收函数实现

数据包发送函数
c 复制代码
/* 数据包发送函数 */
static netdev_tx_t example_xmit(struct sk_buff *skb, struct net_device *dev)
{
    struct example_priv *priv = netdev_priv(dev);
    unsigned int len = skb->len;
    dma_addr_t dma_addr;
    int ret;
    
    /* 检查设备状态 */
    if (!(readl(priv->base + NET_CTRL_REG) & NET_CTRL_TX_ENABLE)) {
        dev->stats.tx_dropped++;
        dev_kfree_skb_any(skb);
        return NETDEV_TX_OK;
    }
    
    /* 映射DMA缓冲区 */
    dma_addr = dma_map_single(dev->dev.parent, skb->data, len, DMA_TO_DEVICE);
    if (dma_mapping_error(dev->dev.parent, dma_addr)) {
        dev->stats.tx_dropped++;
        dev_kfree_skb_any(skb);
        return NETDEV_TX_OK;
    }
    
    /* 保存sk_buff指针 */
    priv->tx_skb = skb;
    
    /* 配置发送描述符 */
    writel(dma_addr, priv->base + TX_DESC_ADDR_REG);
    writel(len, priv->base + TX_DESC_LEN_REG);
    writel(TX_DESC_CMD_EOP | TX_DESC_CMD_RS, priv->base + TX_DESC_CTRL_REG);
    
    /* 启动发送 */
    writel(TX_START, priv->base + TX_START_REG);
    
    /* 停止队列直到发送完成 */
    netif_stop_queue(dev);
    
    /* 设置超时 */
    priv->tx_timeout = jiffies + TX_TIMEOUT;
    
    return NETDEV_TX_OK;
}

/* 发送完成中断处理 */
static void example_tx_complete(struct net_device *dev)
{
    struct example_priv *priv = netdev_priv(dev);
    struct sk_buff *skb;
    u32 status;
    
    /* 读取发送状态 */
    status = readl(priv->base + TX_STATUS_REG);
    
    if (status & TX_STATUS_COMPLETE) {
        skb = priv->tx_skb;
        if (skb) {
            dma_addr_t dma_addr;
            
            /* 获取DMA地址 */
            dma_addr = readl(priv->base + TX_DESC_ADDR_REG);
            
            /* 取消DMA映射 */
            dma_unmap_single(dev->dev.parent, dma_addr, 
                           skb->len, DMA_TO_DEVICE);
            
            /* 更新统计 */
            dev->stats.tx_packets++;
            dev->stats.tx_bytes += skb->len;
            
            /* 释放sk_buff */
            dev_consume_skb_any(skb);
            priv->tx_skb = NULL;
            
            /* 重新启动队列 */
            netif_wake_queue(dev);
        }
    }
    
    if (status & TX_STATUS_ERROR) {
        dev->stats.tx_errors++;
        
        /* 处理错误 */
        if (status & TX_STATUS_UNDERRUN) {
            /* FIFO下溢,增加FIFO阈值 */
            network_adjust_fifo_threshold(priv);
        }
    }
}
数据包接收函数
c 复制代码
/* 数据包接收函数 */
static int example_rx(struct net_device *dev, int budget)
{
    struct example_priv *priv = netdev_priv(dev);
    int received = 0;
    
    while (received < budget) {
        struct sk_buff *skb;
        dma_addr_t dma_addr;
        u32 len, status;
        
        /* 读取接收状态 */
        status = readl(priv->base + RX_STATUS_REG);
        if (!(status & RX_STATUS_VALID))
            break;
            
        /* 读取数据包长度 */
        len = (status >> 16) & 0x3FFF;
        
        /* 分配sk_buff */
        skb = netdev_alloc_skb_ip_align(dev, len + NET_IP_ALIGN);
        if (!skb) {
            dev->stats.rx_dropped++;
            /* 丢弃数据包 */
            writel(RX_DISCARD, priv->base + RX_CTRL_REG);
            continue;
        }
        
        /* 映射DMA缓冲区 */
        dma_addr = dma_map_single(dev->dev.parent, skb->data,
                                 len, DMA_FROM_DEVICE);
        if (dma_mapping_error(dev->dev.parent, dma_addr)) {
            dev_kfree_skb_any(skb);
            dev->stats.rx_dropped++;
            continue;
        }
        
        /* 配置接收描述符 */
        writel(dma_addr, priv->base + RX_DESC_ADDR_REG);
        writel(len, priv->base + RX_DESC_LEN_REG);
        writel(RX_DESC_CMD_OWN, priv->base + RX_DESC_CTRL_REG);
        
        /* 启动接收 */
        writel(RX_START, priv->base + RX_START_REG);
        
        /* 等待接收完成 */
        if (!network_wait_for_rx(priv)) {
            /* 接收超时 */
            dma_unmap_single(dev->dev.parent, dma_addr,
                           len, DMA_FROM_DEVICE);
            dev_kfree_skb_any(skb);
            dev->stats.rx_errors++;
            continue;
        }
        
        /* 取消DMA映射 */
        dma_unmap_single(dev->dev.parent, dma_addr,
                        len, DMA_FROM_DEVICE);
        
        /* 设置数据包长度 */
        skb_put(skb, len);
        
        /* 设置协议类型 */
        skb->protocol = eth_type_trans(skb, dev);
        
        /* 更新统计 */
        dev->stats.rx_packets++;
        dev->stats.rx_bytes += len;
        
        /* 校验和检查 */
        if (status & RX_STATUS_CSUM_OK) {
            skb->ip_summed = CHECKSUM_UNNECESSARY;
        } else {
            skb->ip_summed = CHECKSUM_NONE;
        }
        
        /* 提交给协议栈 */
        napi_gro_receive(&priv->napi, skb);
        
        received++;
    }
    
    return received;
}

/* NAPI轮询函数 */
static int example_poll(struct napi_struct *napi, int budget)
{
    struct example_priv *priv = container_of(napi, struct example_priv, napi);
    struct net_device *dev = priv->dev;
    int work_done;
    
    /* 处理接收 */
    work_done = example_rx(dev, budget);
    
    /* 处理发送完成 */
    example_tx_complete(dev);
    
    /* 如果处理完成,重新启用中断 */
    if (work_done < budget) {
        napi_complete(napi);
        /* 重新启用中断 */
        writel(INTR_ENABLE, priv->base + INTR_MASK_REG);
    }
    
    return work_done;
}

5.3 统计计数器维护

硬件统计收集
c 复制代码
/* 更新硬件统计信息 */
static void example_update_stats(struct net_device *dev)
{
    struct example_priv *priv = netdev_priv(dev);
    struct net_device_stats *stats = &dev->stats;
    
    /* 读取硬件统计寄存器 */
    stats->rx_packets += readl(priv->base + RX_PACKETS_REG);
    stats->tx_packets += readl(priv->base + TX_PACKETS_REG);
    stats->rx_bytes += readl(priv->base + RX_BYTES_REG);
    stats->tx_bytes += readl(priv->base + TX_BYTES_REG);
    stats->rx_errors += readl(priv->base + RX_ERRORS_REG);
    stats->tx_errors += readl(priv->base + TX_ERRORS_REG);
    stats->rx_dropped += readl(priv->base + RX_DROPPED_REG);
    stats->tx_dropped += readl(priv->base + TX_DROPPED_REG);
    stats->multicast += readl(priv->base + RX_MULTICAST_REG);
    stats->collisions += readl(priv->base + COLLISIONS_REG);
    
    /* 详细错误统计 */
    stats->rx_length_errors += readl(priv->base + RX_LEN_ERRORS_REG);
    stats->rx_over_errors += readl(priv->base + RX_OVER_ERRORS_REG);
    stats->rx_crc_errors += readl(priv->base + RX_CRC_ERRORS_REG);
    stats->rx_frame_errors += readl(priv->base + RX_FRAME_ERRORS_REG);
    stats->rx_fifo_errors += readl(priv->base + RX_FIFO_ERRORS_REG);
    stats->rx_missed_errors += readl(priv->base + RX_MISSED_ERRORS_REG);
    
    stats->tx_aborted_errors += readl(priv->base + TX_ABORT_ERRORS_REG);
    stats->tx_carrier_errors += readl(priv->base + TX_CARRIER_ERRORS_REG);
    stats->tx_fifo_errors += readl(priv->base + TX_FIFO_ERRORS_REG);
    stats->tx_heartbeat_errors += readl(priv->base + TX_HB_ERRORS_REG);
    stats->tx_window_errors += readl(priv->base + TX_WINDOW_ERRORS_REG);
    
    /* 清除硬件计数器 */
    writel(0, priv->base + STATS_CLEAR_REG);
}

/* ethtool统计实现 */
static void example_get_ethtool_stats(struct net_device *dev,
                                    struct ethtool_stats *stats, u64 *data)
{
    struct example_priv *priv = netdev_priv(dev);
    int i;
    
    /* 更新统计信息 */
    example_update_stats(dev);
    
    /* 填充ethtool统计 */
    for (i = 0; i < EXAMPLE_NUM_STATS; i++) {
        data[i] = example_ethtool_stats[i].get_stat(priv);
    }
}

/* ethtool统计信息定义 */
static const struct example_ethtool_stats example_ethtool_stats[] = {
    ETHTOOL_STAT(rx_packets, get_rx_packets),
    ETHTOOL_STAT(tx_packets, get_tx_packets),
    ETHTOOL_STAT(rx_bytes, get_rx_bytes),
    ETHTOOL_STAT(tx_bytes, get_tx_bytes),
    ETHTOOL_STAT(rx_errors, get_rx_errors),
    ETHTOOL_STAT(tx_errors, get_tx_errors),
    ETHTOOL_STAT(rx_dropped, get_rx_dropped),
    ETHTOOL_STAT(tx_dropped, get_tx_dropped),
    /* 更多统计项... */
};

static int example_get_sset_count(struct net_device *dev, int sset)
{
    switch (sset) {
    case ETH_SS_STATS:
        return EXAMPLE_NUM_STATS;
    default:
        return -EOPNOTSUPP;
    }
}

static void example_get_strings(struct net_device *dev, u32 stringset, u8 *data)
{
    u8 *p = data;
    int i;
    
    switch (stringset) {
    case ETH_SS_STATS:
        for (i = 0; i < EXAMPLE_NUM_STATS; i++) {
            memcpy(p, example_ethtool_stats[i].name, ETH_GSTRING_LEN);
            p += ETH_GSTRING_LEN;
        }
        break;
    }
}

5.4 ethtool支持实现

ethtool操作实现
c 复制代码
/* ethtool操作函数 */
static const struct ethtool_ops example_ethtool_ops = {
    .get_link = ethtool_op_get_link,
    .get_drvinfo = example_get_drvinfo,
    .get_msglevel = example_get_msglevel,
    .set_msglevel = example_set_msglevel,
    .get_regs_len = example_get_regs_len,
    .get_regs = example_get_regs,
    .get_wol = example_get_wol,
    .set_wol = example_set_wol,
    .get_eeprom_len = example_get_eeprom_len,
    .get_eeprom = example_get_eeprom,
    .set_eeprom = example_set_eeprom,
    .get_pauseparam = example_get_pauseparam,
    .set_pauseparam = example_set_pauseparam,
    .get_ringparam = example_get_ringparam,
    .set_ringparam = example_set_ringparam,
    .get_coalesce = example_get_coalesce,
    .set_coalesce = example_set_coalesce,
    .get_strings = example_get_strings,
    .get_ethtool_stats = example_get_ethtool_stats,
    .get_sset_count = example_get_sset_count,
};

/* 驱动信息获取 */
static void example_get_drvinfo(struct net_device *dev,
                              struct ethtool_drvinfo *info)
{
    struct example_priv *priv = netdev_priv(dev);
    
    strscpy(info->driver, DRV_NAME, sizeof(info->driver));
    strscpy(info->version, DRV_VERSION, sizeof(info->version));
    strscpy(info->bus_info, dev_name(dev->dev.parent), sizeof(info->bus_info));
    
    /* 固件版本 */
    snprintf(info->fw_version, sizeof(info->fw_version), "%d.%d",
             priv->fw_major, priv->fw_minor);
    
    /* 特性信息 */
    info->n_stats = EXAMPLE_NUM_STATS;
    info->testinfo_len = 0;
    info->eedump_len = example_get_eeprom_len(dev);
    info->regdump_len = example_get_regs_len(dev);
}

/* 寄存器转储 */
static void example_get_regs(struct net_device *dev,
                           struct ethtool_regs *regs, void *p)
{
    struct example_priv *priv = netdev_priv(dev);
    u32 *regs_buff = p;
    int i;
    
    /* 读取所有寄存器 */
    for (i = 0; i < regs->len / 4; i++) {
        regs_buff[i] = readl(priv->base + i * 4);
    }
}

/* 环参数配置 */
static void example_get_ringparam(struct net_device *dev,
                                struct ethtool_ringparam *ring)
{
    struct example_priv *priv = netdev_priv(dev);
    
    ring->rx_max_pending = MAX_RX_RING_SIZE;
    ring->rx_mini_max_pending = 0;
    ring->rx_jumbo_max_pending = MAX_RX_JUMBO_RING_SIZE;
    ring->tx_max_pending = MAX_TX_RING_SIZE;
    
    ring->rx_pending = priv->rx_ring_size;
    ring->rx_mini_pending = 0;
    ring->rx_jumbo_pending = priv->rx_jumbo_ring_size;
    ring->tx_pending = priv->tx_ring_size;
}

static int example_set_ringparam(struct net_device *dev,
                               struct ethtool_ringparam *ring)
{
    struct example_priv *priv = netdev_priv(dev);
    
    /* 验证参数 */
    if (ring->rx_pending > MAX_RX_RING_SIZE ||
        ring->rx_pending < MIN_RX_RING_SIZE ||
        ring->tx_pending > MAX_TX_RING_SIZE ||
        ring->tx_pending < MIN_TX_RING_SIZE)
        return -EINVAL;
        
    /* 如果设备正在运行,需要重新配置 */
    if (netif_running(dev)) {
        /* 停止设备 */
        example_close(dev);
        
        /* 更新配置 */
        priv->rx_ring_size = ring->rx_pending;
        priv->tx_ring_size = ring->tx_pending;
        
        /* 重新启动设备 */
        return example_open(dev);
    }
    
    priv->rx_ring_size = ring->rx_pending;
    priv->tx_ring_size = ring->tx_pending;
    
    return 0;
}

6. 调试与测试方法

6.1 procfs/sysfs接口调试

procfs接口实现
c 复制代码
/* proc文件系统接口 */
static int example_proc_show(struct seq_file *seq, void *v)
{
    struct net_device *dev = seq->private;
    struct example_priv *priv = netdev_priv(dev);
    struct net_device_stats *stats = &dev->stats;
    
    seq_puts(seq, "Example Network Driver Statistics\n");
    seq_puts(seq, "=================================\n");
    
    seq_printf(seq, "Device: %s\n", dev->name);
    seq_printf(seq, "Driver: %s version %s\n", DRV_NAME, DRV_VERSION);
    seq_printf(seq, "Link: %s\n", netif_carrier_ok(dev) ? "up" : "down");
    seq_printf(seq, "Speed: %d Mbps\n", priv->link_speed);
    seq_printf(seq, "Duplex: %s\n", priv->link_duplex ? "full" : "half");
    
    seq_puts(seq, "\nRX Statistics:\n");
    seq_printf(seq, "  Packets: %llu\n", stats->rx_packets);
    seq_printf(seq, "  Bytes: %llu\n", stats->rx_bytes);
    seq_printf(seq, "  Errors: %llu\n", stats->rx_errors);
    seq_printf(seq, "  Dropped: %llu\n", stats->rx_dropped);
    seq_printf(seq, "  Multicast: %llu\n", stats->multicast);
    
    seq_puts(seq, "\nTX Statistics:\n");
    seq_printf(seq, "  Packets: %llu\n", stats->tx_packets);
    seq_printf(seq, "  Bytes: %llu\n", stats->tx_bytes);
    seq_printf(seq, "  Errors: %llu\n", stats->tx_errors);
    seq_printf(seq, "  Dropped: %llu\n", stats->tx_dropped);
    seq_printf(seq, "  Collisions: %llu\n", stats->collisions);
    
    seq_puts(seq, "\nHardware Registers:\n");
    seq_printf(seq, "  Control: 0x%08x\n", readl(priv->base + NET_CTRL_REG));
    seq_printf(seq, "  Status: 0x%08x\n", readl(priv->base + NET_STATUS_REG));
    seq_printf(seq, "  Interrupt: 0x%08x\n", readl(priv->base + INTR_STATUS_REG));
    
    return 0;
}

static int example_proc_open(struct inode *inode, struct file *file)
{
    return single_open(file, example_proc_show, PDE_DATA(inode));
}

static const struct proc_ops example_proc_ops = {
    .proc_open = example_proc_open,
    .proc_read = seq_read,
    .proc_lseek = seq_lseek,
    .proc_release = single_release,
};

/* 创建proc接口 */
static void example_create_proc(struct net_device *dev)
{
    struct example_priv *priv = netdev_priv(dev);
    char name[32];
    
    snprintf(name, sizeof(name), "driver/%s", dev->name);
    priv->proc_entry = proc_create_data(name, 0444, NULL, 
                                       &example_proc_ops, dev);
}

/* 删除proc接口 */
static void example_remove_proc(struct net_device *dev)
{
    struct example_priv *priv = netdev_priv(dev);
    char name[32];
    
    if (priv->proc_entry) {
        snprintf(name, sizeof(name), "driver/%s", dev->name);
        remove_proc_entry(name, NULL);
        priv->proc_entry = NULL;
    }
}
sysfs接口实现
c 复制代码
/* sysfs属性定义 */
static ssize_t link_status_show(struct device *dev,
                               struct device_attribute *attr, char *buf)
{
    struct net_device *netdev = to_net_dev(dev);
    struct example_priv *priv = netdev_priv(netdev);
    
    return sprintf(buf, "%s\n", netif_carrier_ok(netdev) ? "up" : "down");
}

static ssize_t link_speed_show(struct device *dev,
                              struct device_attribute *attr, char *buf)
{
    struct net_device *netdev = to_net_dev(dev);
    struct example_priv *priv = netdev_priv(netdev);
    
    return sprintf(buf, "%d\n", priv->link_speed);
}

static ssize_t registers_show(struct device *dev,
                             struct device_attribute *attr, char *buf)
{
    struct net_device *netdev = to_net_dev(dev);
    struct example_priv *priv = netdev_priv(netdev);
    int len = 0;
    int i;
    
    for (i = 0; i < 0x100; i += 4) {
        len += sprintf(buf + len, "0x%04x: 0x%08x\n", 
                      i, readl(priv->base + i));
    }
    
    return len;
}

static DEVICE_ATTR_RO(link_status);
static DEVICE_ATTR_RO(link_speed);
static DEVICE_ATTR_RO(registers);

static struct attribute *example_attrs[] = {
    &dev_attr_link_status.attr,
    &dev_attr_link_speed.attr,
    &dev_attr_registers.attr,
    NULL
};

static const struct attribute_group example_attr_group = {
    .name = "example",
    .attrs = example_attrs,
};

/* 注册sysfs属性 */
static int example_sysfs_init(struct net_device *dev)
{
    return sysfs_create_group(&dev->dev.kobj, &example_attr_group);
}

/* 注销sysfs属性 */
static void example_sysfs_exit(struct net_device *dev)
{
    sysfs_remove_group(&dev->dev.kobj, &example_attr_group);
}

6.2 内核网络跟踪点使用

跟踪点定义
c 复制代码
/* 定义跟踪点 */
#include <linux/tracepoint.h>

/* 创建跟踪点 */
DECLARE_TRACE(network_rx_entry,
    TP_PROTO(struct net_device *dev, struct sk_buff *skb),
    TP_ARGS(dev, skb));

DECLARE_TRACE(network_rx_exit,
    TP_PROTO(struct net_device *dev, struct sk_buff *skb, int ret),
    TP_ARGS(dev, skb, ret));

DECLARE_TRACE(network_tx_entry,
    TP_PROTO(struct net_device *dev, struct sk_buff *skb),
    TP_ARGS(dev, skb));

DECLARE_TRACE(network_tx_exit,
    TP_PROTO(struct net_device *dev, struct sk_buff *skb, int ret),
    TP_ARGS(dev, skb, ret));

/* 在代码中使用跟踪点 */
static int example_rx(struct net_device *dev, int budget)
{
    struct example_priv *priv = netdev_priv(dev);
    int received = 0;
    
    while (received < budget) {
        struct sk_buff *skb;
        int ret;
        
        /* 跟踪接收开始 */
        trace_network_rx_entry(dev, skb);
        
        /* 处理接收 */
        ret = example_process_rx_packet(dev, skb);
        
        /* 跟踪接收结束 */
        trace_network_rx_exit(dev, skb, ret);
        
        if (ret < 0)
            break;
            
        received++;
    }
    
    return received;
}

/* 跟踪点实现 */
DEFINE_TRACE(network_rx_entry);
DEFINE_TRACE(network_rx_exit);
DEFINE_TRACE(network_tx_entry);
DEFINE_TRACE(network_tx_exit);
使用ftrace调试
bash 复制代码
#!/bin/bash
# ftrace调试脚本

echo "启用网络跟踪点"
echo 1 > /sys/kernel/debug/tracing/events/network/enable

echo "开始跟踪"
echo > /sys/kernel/debug/tracing/trace

echo "运行测试"
ping -c 10 192.168.1.1

echo "查看跟踪结果"
cat /sys/kernel/debug/tracing/trace

echo "禁用跟踪点"
echo 0 > /sys/kernel/debug/tracing/events/network/enable

6.3 性能基准测试方法

吞吐量测试
c 复制代码
/* 内核模块性能测试 */
#include <linux/module.h>
#include <linux/time.h>
#include <linux/netdevice.h>

struct perf_test {
    struct timer_list timer;
    struct net_device *dev;
    atomic_t packets;
    atomic_t bytes;
    u64 start_time;
    u64 end_time;
    int duration;
};
相关推荐
a123560mh4 小时前
国产信创操作系统银河麒麟常见软件适配(MongoDB、 Redis、Nginx、Tomcat)
linux·redis·nginx·mongodb·tomcat·kylin
赖small强4 小时前
【Linux驱动开发】Linux MMC子系统技术分析报告 - 第二部分:协议实现与性能优化
linux·驱动开发·mmc
guygg884 小时前
Linux服务器上安装配置GitLab
linux·运维·gitlab
百***35514 小时前
Linux(CentOS)安装 Nginx
linux·nginx·centos
tzhou644525 小时前
Linux文本处理工具:cut、sort、uniq、tr
linux·运维·服务器
顾安r6 小时前
11.19 脚本 最小web控制linux/termux
linux·服务器·css·flask
Saniffer_SH6 小时前
通过近期测试简单聊一下究竟是直接选择Nvidia Spark还是4090/5090 GPU自建环境
大数据·服务器·图像处理·人工智能·驱动开发·spark·硬件工程
程序媛_MISS_zhang_01106 小时前
vant-ui中List 组件可以与 PullRefresh 组件结合使用,实现下拉刷新的效果
java·linux·ui
dragoooon346 小时前
[Linux网络——Lesson2.socket套接字 && 简易UDP网络程序]
linux·网络·udp
大聪明-PLUS6 小时前
编程语言保证是安全软件开发的基础
linux·嵌入式·arm·smarc