phy降速自愈到100M重试流程分析

我们使用状态机模型,由用户通过Netlink触发的"强制设置速率"事件。

1. 定义状态、事件和Netlink消息类型

复制代码
// 定义PHY自愈状态
typedef enum {
    PHY_STATE_INIT,
    PHY_STATE_2_5G,
    PHY_STATE_1G,
    PHY_STATE_100M,
    PHY_STATE_10M_RETRY,
    PHY_STATE_LINK_DOWN
} phy_healing_state_t;

// 定义驱动事件
typedef enum {
    EVENT_LINK_UP,
    EVENT_LINK_DOWN,
    EVENT_TIMER_EXPIRED,
    EVENT_CABLE_PLUG,
    EVENT_USER_SET_SPEED // 新增:用户通过Netlink强制设置速率
} phy_event_t;

// 定义Netlink消息类型
#define NETLINK_PHY_FAMILY 31 // 自定义Netlink协议族
#define MSG_TYPE_SET_SPEED    0x14 // 用户设置速率
#define MSG_TYPE_GET_STATUS   0x11 // 用户获取状态

// 定义速率常量
#define SPEED_10    10
#define SPEED_100   100
#define SPEED_1000  1000
#define SPEED_2500  2500

2. 状态机核心逻辑(集成Netlink速率设置)

复制代码
// 全局变量
phy_healing_state_t current_state = PHY_STATE_INIT;
struct timer_list retry_timer;
int retry_interval_sec = 600; // 默认10分钟

// PHY状态机主函数
void phy_state_machine_handler(phy_event_t event, void *data) {
    int target_speed;

    switch (current_state) {
        case PHY_STATE_INIT:
        case PHY_STATE_LINK_DOWN:
            if (event == EVENT_CABLE_PLUG || event == EVENT_LINK_UP) {
                printk(KERN_INFO "PHY Healing: Starting negotiation from 2.5G.\n");
                phy_set_speed_and_restart_autoneg(SPEED_2500);
                current_state = PHY_STATE_2_5G;
            } else if (event == EVENT_USER_SET_SPEED) {
                // 即使在Link Down状态,也尝试设置用户指定的速率
                target_speed = *(int*)data;
                printk(KERN_INFO "PHY Healing: User requests speed %d while link is down. Trying...\n", target_speed);
                phy_set_speed_and_restart_autoneg(target_speed);
                // 状态不改变,等待Link Up/Down事件来确认结果
            }
            break;

        case PHY_STATE_2_5G:
            if (event == EVENT_LINK_DOWN) {
                printk(KERN_INFO "PHY Healing: 2.5G link down, trying 1G.\n");
                phy_set_speed_and_restart_autoneg(SPEED_1000);
                current_state = PHY_STATE_1G;
            } else if (event == EVENT_USER_SET_SPEED) {
                target_speed = *(int*)data;
                printk(KERN_INFO "PHY Healing: User requests speed %d, overriding current 2.5G.\n", target_speed);
                phy_set_speed_and_restart_autoneg(target_speed);
                // 状态不改变,让自愈流程自然过渡到新速率对应的状态
            }
            break;

        case PHY_STATE_1G:
            if (event == EVENT_LINK_DOWN) {
                printk(KERN_INFO "PHY Healing: 1G link down, trying 100M.\n");
                phy_set_speed_and_restart_autoneg(SPEED_100);
                current_state = PHY_STATE_100M;
            } else if (event == EVENT_USER_SET_SPEED) {
                target_speed = *(int*)data;
                printk(KERN_INFO "PHY Healing: User requests speed %d, overriding current 1G.\n", target_speed);
                phy_set_speed_and_restart_autoneg(target_speed);
            }
            break;

        case PHY_STATE_100M:
            if (event == EVENT_LINK_DOWN) {
                printk(KERN_INFO "PHY Healing: 100M link down, trying 10M.\n");
                phy_set_speed_and_restart_autoneg(SPEED_10);
                current_state = PHY_STATE_10M_RETRY;
                on_link_established_at_10m();
            } else if (event == EVENT_USER_SET_SPEED) {
                target_speed = *(int*)data;
                printk(KERN_INFO "PHY Healing: User requests speed %d, overriding current 100M.\n", target_speed);
                phy_set_speed_and_restart_autoneg(target_speed);
            }
            break;

        case PHY_STATE_10M_RETRY:
            if (event == EVENT_LINK_DOWN) {
                printk(KERN_ERR "PHY Healing: 10M link down. Connection failed.\n");
                del_timer_sync(&retry_timer);
                current_state = PHY_STATE_LINK_DOWN;
            } else if (event == EVENT_TIMER_EXPIRED) {
                printk(KERN_INFO "PHY Healing: Timer expired, trying to upgrade from 10M to 100M.\n");
                phy_set_speed_and_restart_autoneg(SPEED_100);
            } else if (event == EVENT_USER_SET_SPEED) {
                // 用户可以随时打破10M的重试循环
                target_speed = *(int*)data;
                printk(KERN_INFO "PHY Healing: User requests speed %d, breaking 10M retry loop.\n", target_speed);
                del_timer_sync(&retry_timer); // 停止自动重试定时器
                phy_set_speed_and_restart_autoneg(target_speed);
            }
            break;
    }
}

// ... 其他辅助函数(phy_set_speed_and_restart_autoneg, retry_timer_callback等)保持不变 ...

复制代码
// 辅助函数:设置PHY速率并重启自动协商
void phy_set_speed_and_restart_autoneg(int speed) {
    // 通过MDIO接口写入PHY寄存器来设置速率
    mdio_write(PHY_ADDR, MII_BMCR, BMCR_ANENABLE | BMCR_ANRESTART | speed_to_bmcr_bits(speed));
}

// 定时器到期回调函数
void retry_timer_callback(struct timer_list *t) {
    // 向状态机发送定时器到期事件
    phy_state_machine_handler(EVENT_TIMER_EXPIRED);
    // 重新启动定时器,形成循环
    mod_timer(&retry_timer, jiffies + msecs_to_jiffies(retry_interval_sec * 1000));
}

3. 事件触发与状态初始化

复制代码
// 当PHY中断发生时,在中断处理函数中调用
void phy_interrupt_handler() {
    int link_status = mdio_read(PHY_ADDR, MII_BMSR) & BMSR_LSTATUS;
    static int last_link_status = 0;

    if (link_status != last_link_status) {
        if (link_status) {
            phy_state_machine_handler(EVENT_LINK_UP);
        } else {
            phy_state_machine_handler(EVENT_LINK_DOWN);
        }
        last_link_status = link_status;
    }
}

// 当检测到拔插事件时(例如通过PHY的另一个中断引脚)
void cable_unplug_plug_handler() {
    printk(KERN_INFO "PHY Healing: Cable plug/unplug event detected. Resetting state machine.\n");
    del_timer_sync(&retry_timer); // 停止定时器
    current_state = PHY_STATE_INIT; // 重置状态
    phy_state_machine_handler(EVENT_CABLE_PLUG);
}

// 在10M链路成功建立时,启动定时器
void on_link_established_at_10m() {
    if (current_state == PHY_STATE_10M_RETRY) {
        mod_timer(&retry_timer, jiffies + msecs_to_jiffies(retry_interval_sec * 1000));
    }
}

PHY层到MAC层端口监控流程

此部分是整个自愈机制的基础,与之前描述相同。

流程图:

复制代码
+----------------+      +-----------------+      +---------------------+
|  PHY (物理层)   | <--> |  MDIO Bus (总线) | <--> |  MAC Driver (内核驱动) |
+----------------+      +-----------------+      +---------------------+
       |                         |                            |
       | 1. 物理链路变化          |                            |
       |    (插拔、信号质量变差)  |                            |
       V                         V                            V
+----------------+      +-----------------+      +---------------------+
| PHY自动协商     |----->| PHY更新内部寄存器 |----->| 驱动轮询/中断       |
| (Autonegotiation)|      | (如BMSR, Status) |      | 读取PHY寄存器       |
+----------------+      +-----------------+      +---------------------+
                                                        |
                                                        | 2. 读取Link状态和速率
                                                        V
                                               +---------------------+
                                               | 驱动解析寄存器值     |
                                               | (Link Up/Down, Speed)|
                                               +---------------------+
                                                        |
                                                        | 3. 触发状态机事件
                                                        V
                                               +---------------------+
                                               | 调用 phy_state_machine_handler() |
                                               +---------------------+

详细步骤:

  1. 物理事件:网线插入/拔除,或线路质量变化。
  2. PHY内部处理 :PHY芯片检测变化,启动自动协商,并将结果写入其标准寄存器(如BMSR的Link Status位,PHY Specific Status Register的速率/双工模式)。
  3. MAC驱动检测 :驱动通过中断 (高效)或轮询方式,得知PHY状态变化。
  4. 驱动解析与触发 :驱动读取PHY寄存器,解析出链路是Up还是Down,以及当前的速率。然后,它调用phy_state_machine_handler()函数,并传入相应的事件(EVENT_LINK_UPEVENT_LINK_DOWN),从而启动自愈逻辑。

用户态到内核态数据交互 - Netlink速率设置流程

流程图:

复制代码
+----------------+      +-----------------+      +---------------------+
|  用户态App     |      |  系统调用接口    |      |  内核网络驱动        |
| (e.g., ethtool)|----->|                  |----->|                     |
+----------------+      +-----------------+      +---------------------+
       | 1. open socket          | 2. ioctl()              | 3. .ndo_do_ioctl()
       |    & ioctl()            |                          |
       V                         V                          V
+----------------+      +-----------------+      +---------------------+
| 传递命令和数据   |----->| 内核拷贝数据     |----->| 驱动执行具体操作     |
| (struct ifreq)  |      | (copy_from_user) |      | (读/写驱动变量)      |
+----------------+      +-----------------+      +---------------------+
                                                        |
                                                        | 4. 返回结果
                                                        V
                                               +---------------------+
                                               | 内核拷贝数据回用户   |
                                               | (copy_to_user)       |
                                               +---------------------+

详细步骤与代码示例:

我们将创建一个Netlink通道,允许用户态程序直接命令内核驱动设置一个特定的PHY速率。

1. 内核态:创建Netlink套接字并处理"设置速率"消息

这部分代码通常在网络驱动的初始化部分。

复制代码
#include <net/sock.h>
#include <linux/netlink.h>

struct sock *nl_sk = NULL;

// Netlink消息接收回调函数
void nl_rcv_msg(struct sk_buff *skb) {
    struct nlmsghdr *nlh;
    int pid;
    int msg_type;
    int target_speed;

    nlh = (struct nlmsghdr *)skb->data;
    pid = nlh->nlmsg_pid; // 发送消息的进程PID
    msg_type = nlh->nlmsg_type;

    printk(KERN_INFO "PHY Healing: Received Netlink message from user %d, type %d\n", pid, msg_type);

    switch (msg_type) {
        case MSG_TYPE_SET_SPEED:
            // 用户请求设置速率
            target_speed = *(int *)NLMSG_DATA(nlh);
            
            // 简单的有效性检查
            if (target_speed == SPEED_10 || target_speed == SPEED_100 || 
                target_speed == SPEED_1000 || target_speed == SPEED_2500) {
                
                printk(KERN_INFO "PHY Healing: User command to set speed to %dMbps.\n", target_speed);
                // 关键:调用状态机,传递用户设置速率事件
                phy_state_machine_handler(EVENT_USER_SET_SPEED, &target_speed);
            } else {
                printk(KERN_WARNING "PHY Healing: Invalid speed %d requested by user.\n", target_speed);
            }
            // 可以选择发送一个确认消息回用户态
            break;
        
        case MSG_TYPE_GET_STATUS:
            // 用户请求获取状态
            // send_status_to_user(pid); // 实现此函数以返回状态
            break;

        default:
            printk(KERN_WARNING "PHY Healing: Unknown Netlink message type %d\n", msg_type);
    }
}

// 在驱动初始化函数中创建Netlink套接字
int init_netlink(void) {
    struct netlink_kernel_cfg cfg = {
        .input = nl_rcv_msg,
    };

    nl_sk = netlink_kernel_create(&init_net, NETLINK_PHY_FAMILY, &cfg);
    if (!nl_sk) {
        printk(KERN_ALERT "PHY Healing: Error creating Netlink socket.\n");
        return -10;
    }
    printk(KERN_INFO "PHY Healing: Netlink socket created for speed control.\n");
    return 0;
}

// 在驱动退出函数中销毁Netlink套接字
void exit_netlink(void) {
    if (nl_sk) {
        netlink_kernel_release(nl_sk);
        printk(KERN_INFO "PHY Healing: Netlink socket released.\n");
    }
}

2. 用户态:通过Netlink发送"设置速率"命令

这是一个简单的C程序,用于演示如何向内核驱动发送设置速率的命令。

复制代码
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/socket.h>
#include <linux/netlink.h>

#define NETLINK_PHY_FAMILY 31
#define MSG_TYPE_SET_SPEED    0x14
#define MAX_PAYLOAD 256 // 足够容纳一个int

int main(int argc, char **argv) {
    int sock_fd;
    struct sockaddr_nl src_addr, dest_addr;
    struct nlmsghdr *nlh = NULL;
    int target_speed;

    if (argc != 2) {
        fprintf(stderr, "Usage: %s <speed>\n", argv[0]);
        fprintf(stderr, "Example: %s 1000\n", argv[0]);
        return -1;
    }
    target_speed = atoi(argv[1]);

    // 1. 创建Netlink套接字
    sock_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_PHY_FAMILY);
    if (sock_fd < 0) {
        perror("socket");
        return -1;
    }

    // 2. 绑定源地址
    memset(&src_addr, 0, sizeof(src_addr));
    src_addr.nl_family = AF_NETLINK;
    src_addr.nl_pid = getpid();

    bind(sock_fd, (struct sockaddr*)&src_addr, sizeof(src_addr));

    // 3. 准备目标地址(内核)
    memset(&dest_addr, 0, sizeof(dest_addr));
    dest_addr.nl_family = AF_NETLINK;
    dest_addr.nl_pid = 0; // 0表示内核

    // 4. 准备消息
    nlh = (struct nlmsghdr *)malloc(NLMSG_SPACE(MAX_PAYLOAD));
    memset(nlh, 0, NLMSG_SPACE(MAX_PAYLOAD));
    nlh->nlmsg_len = NLMSG_SPACE(MAX_PAYLOAD);
    nlh->nlmsg_pid = getpid();
    nlh->nlmsg_flags = 0;
    nlh->nlmsg_type = MSG_TYPE_SET_SPEED;

    // 将目标速率拷贝到消息数据区
    memcpy(NLMSG_DATA(nlh), &target_speed, sizeof(int));
    
    // 5. 发送消息到内核
    printf("Sending command to set PHY speed to %d Mbps...\n", target_speed);
    int ret = sendto(sock_fd, nlh, nlh->nlmsg_len, 0, (struct sockaddr*)&dest_addr, sizeof(dest_addr));
    if (ret < 0) { 
        perror("sendto"); 
    } else {
        printf("Command sent successfully.\n");
    }

    // 6. 清理
    close(sock_fd);
    free(nlh);

    return 0;
}

定时器与重试逻辑(核心新需求)

根据你的要求,我们将创建一个独立的C程序,它内部包含定时和重试逻辑,而不是使用cron。

重要提示:让一个用户态程序24/7运行并每秒检查时间,在资源消耗上不如cron高效。但为了满足你的具体要求,我们按此方式实现。

phy_scheduler.c - 完整的C程序代

复制代码
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <time.h>
#include <sys/socket.h>
#include <linux/netlink.h>

#define NETLINK_PHY_FAMILY 31
#define MSG_TYPE_SET_SPEED    0x14
#define MSG_TYPE_GET_STATUS   0x11
#define MAX_PAYLOAD 256
#define SLEEP_INTERVAL_SEC 60 // 每分钟检查一次时间

// Netlink通信函数
int communicate_with_kernel(int msg_type, int data, char* response_buffer) {
    struct sockaddr_nl src_addr, dest_addr;
    struct nlmsghdr *nlh = NULL;
    int sock_fd;
    int ret = -1;

    sock_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_PHY_FAMILY);
    if (sock_fd < 0) {
        perror("socket");
        return -1;
    }

    memset(&src_addr, 0, sizeof(src_addr));
    src_addr.nl_family = AF_NETLINK;
    src_addr.nl_pid = getpid();

    bind(sock_fd, (struct sockaddr*)&src_addr, sizeof(src_addr));

    memset(&dest_addr, 0, sizeof(dest_addr));
    dest_addr.nl_family = AF_NETLINK;
    dest_addr.nl_pid = 0; // 内核

    nlh = (struct nlmsghdr *)malloc(NLMSG_SPACE(MAX_PAYLOAD));
    if (!nlh) {
        close(sock_fd);
        return -1;
    }
    memset(nlh, 0, NLMSG_SPACE(MAX_PAYLOAD));
    nlh->nlmsg_len = NLMSG_SPACE(MAX_PAYLOAD);
    nlh->nlmsg_pid = getpid();
    nlh->nlmsg_flags = 0;
    nlh->nlmsg_type = msg_type;
    memcpy(NLMSG_DATA(nlh), &data, sizeof(int));

    ret = sendto(sock_fd, nlh, nlh->nlmsg_len, 0, (struct sockaddr*)&dest_addr, sizeof(dest_addr));
    if (ret < 0) {
        perror("sendto");
        free(nlh);
        close(sock_fd);
        return -1;
    }
    
    // 接收内核响应
    recv(sock_fd, response_buffer, MAX_PAYLOAD, 0);
    
    free(nlh);
    close(sock_fd);
    return 0;
}

// 执行5次重试的核心逻辑
void perform_9am_retry() {
    char response[MAX_PAYLOAD];
    printf("[%s] 9:00 AM check triggered.\n", get_timestamp());

    // 1. 获取当前PHY状态
    if (communicate_with_kernel(MSG_TYPE_GET_STATUS, 0, response) != 0) {
        printf("[%s] Failed to get status from kernel.\n", get_timestamp());
        return;
    }

    printf("[%s] Current kernel status: %s\n", get_timestamp(), response);

    // 2. 检查状态是否为 "10M_RETRY" 并且速率为 "10"
    // 注意:这里的字符串匹配需要根据内核返回的实际格式调整
    if (strstr(response, "State: 10M_RETRY") && strstr(response, "Speed: 10")) {
        printf("[%s] Port is stable at 10M. Starting 5 retries to 100M.\n", get_timestamp());

        for (int i = 1; i <= 5; i++) {
            printf("[%s] Attempt %d/5: Setting speed to 100M.\n", get_timestamp(), i);
            
            // 3. 发送设置100M的命令
            if (communicate_with_kernel(MSG_TYPE_SET_SPEED, SPEED_100, response) != 0) {
                printf("[%s] Failed to send set-speed command.\n", get_timestamp());
                continue;
            }
            
            // 4. 等待链路稳定
            sleep(5);

            // 5. 再次检查状态
            if (communicate_with_kernel(MSG_TYPE_GET_STATUS, 0, response) != 0) {
                printf("[%s] Failed to get status after attempt %d.\n", get_timestamp(), i);
                continue;
            }

            if (strstr(response, "Speed: 100")) {
                printf("[%s] Success! Port upgraded to 100M on attempt %d.\n", get_timestamp(), i);
                return; // 成功则退出
            } else {
                printf("[%s] Attempt %d failed. Current status: %s\n", get_timestamp(), i, response);
            }
        }
        printf("[%s] All 5 retry attempts failed.\n", get_timestamp());
    } else {
        printf("[%s] Port is not in the required 10M_RETRY state. No action taken.\n", get_timestamp());
    }
}

// 获取当前时间戳字符串
char* get_timestamp() {
    static char buffer[80];
    time_t rawtime;
    struct tm *timeinfo;

    time(&rawtime);
    timeinfo = localtime(&rawtime);
    strftime(buffer, sizeof(buffer), "%Y-%m-%d %H:%M:%S", timeinfo);
    return buffer;
}

int main() {
    time_t rawtime;
    struct tm *timeinfo;
    int last_day = -1;

    printf("PHY Scheduler started. Waiting for 9:00 AM to trigger retry logic...\n");

    while (1) {
        time(&rawtime);
        timeinfo = localtime(&rawtime);

        // 检查是否是9:00整,并且不是同一天内已经触发过的
        if (timeinfo->tm_hour == 9 && timeinfo->tm_min == 0 && timeinfo->tm_mday != last_day) {
            perform_9am_retry();
            last_day = timeinfo->tm_mday; // 标记今天已触发
        }
        
        // 每分钟检查一次,避免CPU空转
        sleep(SLEEP_INTERVAL_SEC);
    }

    return 0;
}
相关推荐
凉、介8 小时前
ARM 总线技术 —— APB
arm开发·笔记·学习
电鱼智能的电小鱼1 天前
基于电鱼 ARM 工控机的煤矿主控系统高可靠运行方案——让井下控制系统告别“死机与重启”
arm开发·人工智能·嵌入式硬件·深度学习·机器学习
陌上花开缓缓归以1 天前
linux系统启动失败之flash异常分析
arm开发
电鱼智能的电小鱼2 天前
基于电鱼 ARM 工控机的井下AI故障诊断方案——让煤矿远程监控更智能、更精准
网络·arm开发·人工智能·算法·边缘计算
GilgameshJSS2 天前
STM32H743-ARM例程35-DHCP
c语言·arm开发·stm32·单片机·嵌入式硬件
GilgameshJSS2 天前
STM32H743-ARM例程34-BootROM
c语言·arm开发·stm32·单片机·嵌入式硬件
robin8611092 天前
Keil(MDK-ARM)和 STM32CubeIDE对比
arm开发·stm32·嵌入式硬件
学习和思考3 天前
为什么我的vscode有的时候可以跳转,有的时候不能跳转
arm开发·ide·驱动开发·vscode·学习·1024程序员节
sunshine~~~3 天前
【笔记】macOs arm架构安装虚拟机Ubuntu环境:ROS2 + Python开发
arm开发·笔记·python·macos·ros2