phy降速自愈到100M重试流程分析

我们使用状态机模型,由用户通过Netlink触发的"强制设置速率"事件。

1. 定义状态、事件和Netlink消息类型

复制代码
// 定义PHY自愈状态
typedef enum {
    PHY_STATE_INIT,
    PHY_STATE_2_5G,
    PHY_STATE_1G,
    PHY_STATE_100M,
    PHY_STATE_10M_RETRY,
    PHY_STATE_LINK_DOWN
} phy_healing_state_t;

// 定义驱动事件
typedef enum {
    EVENT_LINK_UP,
    EVENT_LINK_DOWN,
    EVENT_TIMER_EXPIRED,
    EVENT_CABLE_PLUG,
    EVENT_USER_SET_SPEED // 新增:用户通过Netlink强制设置速率
} phy_event_t;

// 定义Netlink消息类型
#define NETLINK_PHY_FAMILY 31 // 自定义Netlink协议族
#define MSG_TYPE_SET_SPEED    0x14 // 用户设置速率
#define MSG_TYPE_GET_STATUS   0x11 // 用户获取状态

// 定义速率常量
#define SPEED_10    10
#define SPEED_100   100
#define SPEED_1000  1000
#define SPEED_2500  2500

2. 状态机核心逻辑(集成Netlink速率设置)

复制代码
// 全局变量
phy_healing_state_t current_state = PHY_STATE_INIT;
struct timer_list retry_timer;
int retry_interval_sec = 600; // 默认10分钟

// PHY状态机主函数
void phy_state_machine_handler(phy_event_t event, void *data) {
    int target_speed;

    switch (current_state) {
        case PHY_STATE_INIT:
        case PHY_STATE_LINK_DOWN:
            if (event == EVENT_CABLE_PLUG || event == EVENT_LINK_UP) {
                printk(KERN_INFO "PHY Healing: Starting negotiation from 2.5G.\n");
                phy_set_speed_and_restart_autoneg(SPEED_2500);
                current_state = PHY_STATE_2_5G;
            } else if (event == EVENT_USER_SET_SPEED) {
                // 即使在Link Down状态,也尝试设置用户指定的速率
                target_speed = *(int*)data;
                printk(KERN_INFO "PHY Healing: User requests speed %d while link is down. Trying...\n", target_speed);
                phy_set_speed_and_restart_autoneg(target_speed);
                // 状态不改变,等待Link Up/Down事件来确认结果
            }
            break;

        case PHY_STATE_2_5G:
            if (event == EVENT_LINK_DOWN) {
                printk(KERN_INFO "PHY Healing: 2.5G link down, trying 1G.\n");
                phy_set_speed_and_restart_autoneg(SPEED_1000);
                current_state = PHY_STATE_1G;
            } else if (event == EVENT_USER_SET_SPEED) {
                target_speed = *(int*)data;
                printk(KERN_INFO "PHY Healing: User requests speed %d, overriding current 2.5G.\n", target_speed);
                phy_set_speed_and_restart_autoneg(target_speed);
                // 状态不改变,让自愈流程自然过渡到新速率对应的状态
            }
            break;

        case PHY_STATE_1G:
            if (event == EVENT_LINK_DOWN) {
                printk(KERN_INFO "PHY Healing: 1G link down, trying 100M.\n");
                phy_set_speed_and_restart_autoneg(SPEED_100);
                current_state = PHY_STATE_100M;
            } else if (event == EVENT_USER_SET_SPEED) {
                target_speed = *(int*)data;
                printk(KERN_INFO "PHY Healing: User requests speed %d, overriding current 1G.\n", target_speed);
                phy_set_speed_and_restart_autoneg(target_speed);
            }
            break;

        case PHY_STATE_100M:
            if (event == EVENT_LINK_DOWN) {
                printk(KERN_INFO "PHY Healing: 100M link down, trying 10M.\n");
                phy_set_speed_and_restart_autoneg(SPEED_10);
                current_state = PHY_STATE_10M_RETRY;
                on_link_established_at_10m();
            } else if (event == EVENT_USER_SET_SPEED) {
                target_speed = *(int*)data;
                printk(KERN_INFO "PHY Healing: User requests speed %d, overriding current 100M.\n", target_speed);
                phy_set_speed_and_restart_autoneg(target_speed);
            }
            break;

        case PHY_STATE_10M_RETRY:
            if (event == EVENT_LINK_DOWN) {
                printk(KERN_ERR "PHY Healing: 10M link down. Connection failed.\n");
                del_timer_sync(&retry_timer);
                current_state = PHY_STATE_LINK_DOWN;
            } else if (event == EVENT_TIMER_EXPIRED) {
                printk(KERN_INFO "PHY Healing: Timer expired, trying to upgrade from 10M to 100M.\n");
                phy_set_speed_and_restart_autoneg(SPEED_100);
            } else if (event == EVENT_USER_SET_SPEED) {
                // 用户可以随时打破10M的重试循环
                target_speed = *(int*)data;
                printk(KERN_INFO "PHY Healing: User requests speed %d, breaking 10M retry loop.\n", target_speed);
                del_timer_sync(&retry_timer); // 停止自动重试定时器
                phy_set_speed_and_restart_autoneg(target_speed);
            }
            break;
    }
}

// ... 其他辅助函数(phy_set_speed_and_restart_autoneg, retry_timer_callback等)保持不变 ...

复制代码
// 辅助函数:设置PHY速率并重启自动协商
void phy_set_speed_and_restart_autoneg(int speed) {
    // 通过MDIO接口写入PHY寄存器来设置速率
    mdio_write(PHY_ADDR, MII_BMCR, BMCR_ANENABLE | BMCR_ANRESTART | speed_to_bmcr_bits(speed));
}

// 定时器到期回调函数
void retry_timer_callback(struct timer_list *t) {
    // 向状态机发送定时器到期事件
    phy_state_machine_handler(EVENT_TIMER_EXPIRED);
    // 重新启动定时器,形成循环
    mod_timer(&retry_timer, jiffies + msecs_to_jiffies(retry_interval_sec * 1000));
}

3. 事件触发与状态初始化

复制代码
// 当PHY中断发生时,在中断处理函数中调用
void phy_interrupt_handler() {
    int link_status = mdio_read(PHY_ADDR, MII_BMSR) & BMSR_LSTATUS;
    static int last_link_status = 0;

    if (link_status != last_link_status) {
        if (link_status) {
            phy_state_machine_handler(EVENT_LINK_UP);
        } else {
            phy_state_machine_handler(EVENT_LINK_DOWN);
        }
        last_link_status = link_status;
    }
}

// 当检测到拔插事件时(例如通过PHY的另一个中断引脚)
void cable_unplug_plug_handler() {
    printk(KERN_INFO "PHY Healing: Cable plug/unplug event detected. Resetting state machine.\n");
    del_timer_sync(&retry_timer); // 停止定时器
    current_state = PHY_STATE_INIT; // 重置状态
    phy_state_machine_handler(EVENT_CABLE_PLUG);
}

// 在10M链路成功建立时,启动定时器
void on_link_established_at_10m() {
    if (current_state == PHY_STATE_10M_RETRY) {
        mod_timer(&retry_timer, jiffies + msecs_to_jiffies(retry_interval_sec * 1000));
    }
}

PHY层到MAC层端口监控流程

此部分是整个自愈机制的基础,与之前描述相同。

流程图:

复制代码
+----------------+      +-----------------+      +---------------------+
|  PHY (物理层)   | <--> |  MDIO Bus (总线) | <--> |  MAC Driver (内核驱动) |
+----------------+      +-----------------+      +---------------------+
       |                         |                            |
       | 1. 物理链路变化          |                            |
       |    (插拔、信号质量变差)  |                            |
       V                         V                            V
+----------------+      +-----------------+      +---------------------+
| PHY自动协商     |----->| PHY更新内部寄存器 |----->| 驱动轮询/中断       |
| (Autonegotiation)|      | (如BMSR, Status) |      | 读取PHY寄存器       |
+----------------+      +-----------------+      +---------------------+
                                                        |
                                                        | 2. 读取Link状态和速率
                                                        V
                                               +---------------------+
                                               | 驱动解析寄存器值     |
                                               | (Link Up/Down, Speed)|
                                               +---------------------+
                                                        |
                                                        | 3. 触发状态机事件
                                                        V
                                               +---------------------+
                                               | 调用 phy_state_machine_handler() |
                                               +---------------------+

详细步骤:

  1. 物理事件:网线插入/拔除,或线路质量变化。
  2. PHY内部处理 :PHY芯片检测变化,启动自动协商,并将结果写入其标准寄存器(如BMSR的Link Status位,PHY Specific Status Register的速率/双工模式)。
  3. MAC驱动检测 :驱动通过中断 (高效)或轮询方式,得知PHY状态变化。
  4. 驱动解析与触发 :驱动读取PHY寄存器,解析出链路是Up还是Down,以及当前的速率。然后,它调用phy_state_machine_handler()函数,并传入相应的事件(EVENT_LINK_UPEVENT_LINK_DOWN),从而启动自愈逻辑。

用户态到内核态数据交互 - Netlink速率设置流程

流程图:

复制代码
+----------------+      +-----------------+      +---------------------+
|  用户态App     |      |  系统调用接口    |      |  内核网络驱动        |
| (e.g., ethtool)|----->|                  |----->|                     |
+----------------+      +-----------------+      +---------------------+
       | 1. open socket          | 2. ioctl()              | 3. .ndo_do_ioctl()
       |    & ioctl()            |                          |
       V                         V                          V
+----------------+      +-----------------+      +---------------------+
| 传递命令和数据   |----->| 内核拷贝数据     |----->| 驱动执行具体操作     |
| (struct ifreq)  |      | (copy_from_user) |      | (读/写驱动变量)      |
+----------------+      +-----------------+      +---------------------+
                                                        |
                                                        | 4. 返回结果
                                                        V
                                               +---------------------+
                                               | 内核拷贝数据回用户   |
                                               | (copy_to_user)       |
                                               +---------------------+

详细步骤与代码示例:

我们将创建一个Netlink通道,允许用户态程序直接命令内核驱动设置一个特定的PHY速率。

1. 内核态:创建Netlink套接字并处理"设置速率"消息

这部分代码通常在网络驱动的初始化部分。

复制代码
#include <net/sock.h>
#include <linux/netlink.h>

struct sock *nl_sk = NULL;

// Netlink消息接收回调函数
void nl_rcv_msg(struct sk_buff *skb) {
    struct nlmsghdr *nlh;
    int pid;
    int msg_type;
    int target_speed;

    nlh = (struct nlmsghdr *)skb->data;
    pid = nlh->nlmsg_pid; // 发送消息的进程PID
    msg_type = nlh->nlmsg_type;

    printk(KERN_INFO "PHY Healing: Received Netlink message from user %d, type %d\n", pid, msg_type);

    switch (msg_type) {
        case MSG_TYPE_SET_SPEED:
            // 用户请求设置速率
            target_speed = *(int *)NLMSG_DATA(nlh);
            
            // 简单的有效性检查
            if (target_speed == SPEED_10 || target_speed == SPEED_100 || 
                target_speed == SPEED_1000 || target_speed == SPEED_2500) {
                
                printk(KERN_INFO "PHY Healing: User command to set speed to %dMbps.\n", target_speed);
                // 关键:调用状态机,传递用户设置速率事件
                phy_state_machine_handler(EVENT_USER_SET_SPEED, &target_speed);
            } else {
                printk(KERN_WARNING "PHY Healing: Invalid speed %d requested by user.\n", target_speed);
            }
            // 可以选择发送一个确认消息回用户态
            break;
        
        case MSG_TYPE_GET_STATUS:
            // 用户请求获取状态
            // send_status_to_user(pid); // 实现此函数以返回状态
            break;

        default:
            printk(KERN_WARNING "PHY Healing: Unknown Netlink message type %d\n", msg_type);
    }
}

// 在驱动初始化函数中创建Netlink套接字
int init_netlink(void) {
    struct netlink_kernel_cfg cfg = {
        .input = nl_rcv_msg,
    };

    nl_sk = netlink_kernel_create(&init_net, NETLINK_PHY_FAMILY, &cfg);
    if (!nl_sk) {
        printk(KERN_ALERT "PHY Healing: Error creating Netlink socket.\n");
        return -10;
    }
    printk(KERN_INFO "PHY Healing: Netlink socket created for speed control.\n");
    return 0;
}

// 在驱动退出函数中销毁Netlink套接字
void exit_netlink(void) {
    if (nl_sk) {
        netlink_kernel_release(nl_sk);
        printk(KERN_INFO "PHY Healing: Netlink socket released.\n");
    }
}

2. 用户态:通过Netlink发送"设置速率"命令

这是一个简单的C程序,用于演示如何向内核驱动发送设置速率的命令。

复制代码
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/socket.h>
#include <linux/netlink.h>

#define NETLINK_PHY_FAMILY 31
#define MSG_TYPE_SET_SPEED    0x14
#define MAX_PAYLOAD 256 // 足够容纳一个int

int main(int argc, char **argv) {
    int sock_fd;
    struct sockaddr_nl src_addr, dest_addr;
    struct nlmsghdr *nlh = NULL;
    int target_speed;

    if (argc != 2) {
        fprintf(stderr, "Usage: %s <speed>\n", argv[0]);
        fprintf(stderr, "Example: %s 1000\n", argv[0]);
        return -1;
    }
    target_speed = atoi(argv[1]);

    // 1. 创建Netlink套接字
    sock_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_PHY_FAMILY);
    if (sock_fd < 0) {
        perror("socket");
        return -1;
    }

    // 2. 绑定源地址
    memset(&src_addr, 0, sizeof(src_addr));
    src_addr.nl_family = AF_NETLINK;
    src_addr.nl_pid = getpid();

    bind(sock_fd, (struct sockaddr*)&src_addr, sizeof(src_addr));

    // 3. 准备目标地址(内核)
    memset(&dest_addr, 0, sizeof(dest_addr));
    dest_addr.nl_family = AF_NETLINK;
    dest_addr.nl_pid = 0; // 0表示内核

    // 4. 准备消息
    nlh = (struct nlmsghdr *)malloc(NLMSG_SPACE(MAX_PAYLOAD));
    memset(nlh, 0, NLMSG_SPACE(MAX_PAYLOAD));
    nlh->nlmsg_len = NLMSG_SPACE(MAX_PAYLOAD);
    nlh->nlmsg_pid = getpid();
    nlh->nlmsg_flags = 0;
    nlh->nlmsg_type = MSG_TYPE_SET_SPEED;

    // 将目标速率拷贝到消息数据区
    memcpy(NLMSG_DATA(nlh), &target_speed, sizeof(int));
    
    // 5. 发送消息到内核
    printf("Sending command to set PHY speed to %d Mbps...\n", target_speed);
    int ret = sendto(sock_fd, nlh, nlh->nlmsg_len, 0, (struct sockaddr*)&dest_addr, sizeof(dest_addr));
    if (ret < 0) { 
        perror("sendto"); 
    } else {
        printf("Command sent successfully.\n");
    }

    // 6. 清理
    close(sock_fd);
    free(nlh);

    return 0;
}

定时器与重试逻辑(核心新需求)

根据你的要求,我们将创建一个独立的C程序,它内部包含定时和重试逻辑,而不是使用cron。

重要提示:让一个用户态程序24/7运行并每秒检查时间,在资源消耗上不如cron高效。但为了满足你的具体要求,我们按此方式实现。

phy_scheduler.c - 完整的C程序代

复制代码
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <time.h>
#include <sys/socket.h>
#include <linux/netlink.h>

#define NETLINK_PHY_FAMILY 31
#define MSG_TYPE_SET_SPEED    0x14
#define MSG_TYPE_GET_STATUS   0x11
#define MAX_PAYLOAD 256
#define SLEEP_INTERVAL_SEC 60 // 每分钟检查一次时间

// Netlink通信函数
int communicate_with_kernel(int msg_type, int data, char* response_buffer) {
    struct sockaddr_nl src_addr, dest_addr;
    struct nlmsghdr *nlh = NULL;
    int sock_fd;
    int ret = -1;

    sock_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_PHY_FAMILY);
    if (sock_fd < 0) {
        perror("socket");
        return -1;
    }

    memset(&src_addr, 0, sizeof(src_addr));
    src_addr.nl_family = AF_NETLINK;
    src_addr.nl_pid = getpid();

    bind(sock_fd, (struct sockaddr*)&src_addr, sizeof(src_addr));

    memset(&dest_addr, 0, sizeof(dest_addr));
    dest_addr.nl_family = AF_NETLINK;
    dest_addr.nl_pid = 0; // 内核

    nlh = (struct nlmsghdr *)malloc(NLMSG_SPACE(MAX_PAYLOAD));
    if (!nlh) {
        close(sock_fd);
        return -1;
    }
    memset(nlh, 0, NLMSG_SPACE(MAX_PAYLOAD));
    nlh->nlmsg_len = NLMSG_SPACE(MAX_PAYLOAD);
    nlh->nlmsg_pid = getpid();
    nlh->nlmsg_flags = 0;
    nlh->nlmsg_type = msg_type;
    memcpy(NLMSG_DATA(nlh), &data, sizeof(int));

    ret = sendto(sock_fd, nlh, nlh->nlmsg_len, 0, (struct sockaddr*)&dest_addr, sizeof(dest_addr));
    if (ret < 0) {
        perror("sendto");
        free(nlh);
        close(sock_fd);
        return -1;
    }
    
    // 接收内核响应
    recv(sock_fd, response_buffer, MAX_PAYLOAD, 0);
    
    free(nlh);
    close(sock_fd);
    return 0;
}

// 执行5次重试的核心逻辑
void perform_9am_retry() {
    char response[MAX_PAYLOAD];
    printf("[%s] 9:00 AM check triggered.\n", get_timestamp());

    // 1. 获取当前PHY状态
    if (communicate_with_kernel(MSG_TYPE_GET_STATUS, 0, response) != 0) {
        printf("[%s] Failed to get status from kernel.\n", get_timestamp());
        return;
    }

    printf("[%s] Current kernel status: %s\n", get_timestamp(), response);

    // 2. 检查状态是否为 "10M_RETRY" 并且速率为 "10"
    // 注意:这里的字符串匹配需要根据内核返回的实际格式调整
    if (strstr(response, "State: 10M_RETRY") && strstr(response, "Speed: 10")) {
        printf("[%s] Port is stable at 10M. Starting 5 retries to 100M.\n", get_timestamp());

        for (int i = 1; i <= 5; i++) {
            printf("[%s] Attempt %d/5: Setting speed to 100M.\n", get_timestamp(), i);
            
            // 3. 发送设置100M的命令
            if (communicate_with_kernel(MSG_TYPE_SET_SPEED, SPEED_100, response) != 0) {
                printf("[%s] Failed to send set-speed command.\n", get_timestamp());
                continue;
            }
            
            // 4. 等待链路稳定
            sleep(5);

            // 5. 再次检查状态
            if (communicate_with_kernel(MSG_TYPE_GET_STATUS, 0, response) != 0) {
                printf("[%s] Failed to get status after attempt %d.\n", get_timestamp(), i);
                continue;
            }

            if (strstr(response, "Speed: 100")) {
                printf("[%s] Success! Port upgraded to 100M on attempt %d.\n", get_timestamp(), i);
                return; // 成功则退出
            } else {
                printf("[%s] Attempt %d failed. Current status: %s\n", get_timestamp(), i, response);
            }
        }
        printf("[%s] All 5 retry attempts failed.\n", get_timestamp());
    } else {
        printf("[%s] Port is not in the required 10M_RETRY state. No action taken.\n", get_timestamp());
    }
}

// 获取当前时间戳字符串
char* get_timestamp() {
    static char buffer[80];
    time_t rawtime;
    struct tm *timeinfo;

    time(&rawtime);
    timeinfo = localtime(&rawtime);
    strftime(buffer, sizeof(buffer), "%Y-%m-%d %H:%M:%S", timeinfo);
    return buffer;
}

int main() {
    time_t rawtime;
    struct tm *timeinfo;
    int last_day = -1;

    printf("PHY Scheduler started. Waiting for 9:00 AM to trigger retry logic...\n");

    while (1) {
        time(&rawtime);
        timeinfo = localtime(&rawtime);

        // 检查是否是9:00整,并且不是同一天内已经触发过的
        if (timeinfo->tm_hour == 9 && timeinfo->tm_min == 0 && timeinfo->tm_mday != last_day) {
            perform_9am_retry();
            last_day = timeinfo->tm_mday; // 标记今天已触发
        }
        
        // 每分钟检查一次,避免CPU空转
        sleep(SLEEP_INTERVAL_SEC);
    }

    return 0;
}
相关推荐
VekiSon1 小时前
Linux内核驱动——杂项设备驱动与内核模块编译
linux·c语言·arm开发·嵌入式硬件
AI+程序员在路上2 小时前
Nand Flash与EMMC区别及ARM开发板中的应用对比
arm开发
17(无规则自律)8 小时前
深入浅出 Linux 内核模块,写一个内核版的 Hello World
linux·arm开发·嵌入式硬件
梁洪飞20 小时前
内核的schedule和SMP多核处理器启动协议
linux·arm开发·嵌入式硬件·arm
代码游侠1 天前
学习笔记——Linux字符设备驱动
linux·运维·arm开发·嵌入式硬件·学习·架构
syseptember2 天前
Linux网络基础
linux·网络·arm开发
代码游侠2 天前
学习笔记——Linux字符设备驱动开发
linux·arm开发·驱动开发·单片机·嵌入式硬件·学习·算法
程序猿阿伟2 天前
《Apple Silicon与Windows on ARM:引擎原生构建与模拟层底层运作深度解析》
arm开发·windows
wkm9562 天前
在arm64 ubuntu系统安装Qt后编译时找不到Qt3DExtras头文件
开发语言·arm开发·qt
unicrom_深圳市由你创科技2 天前
基于ARM+DSP+FPGA异构计算架构的高速ADC采集卡定制方案
arm开发·fpga开发