我们使用状态机模型,由用户通过Netlink触发的"强制设置速率"事件。
1. 定义状态、事件和Netlink消息类型
// 定义PHY自愈状态
typedef enum {
PHY_STATE_INIT,
PHY_STATE_2_5G,
PHY_STATE_1G,
PHY_STATE_100M,
PHY_STATE_10M_RETRY,
PHY_STATE_LINK_DOWN
} phy_healing_state_t;
// 定义驱动事件
typedef enum {
EVENT_LINK_UP,
EVENT_LINK_DOWN,
EVENT_TIMER_EXPIRED,
EVENT_CABLE_PLUG,
EVENT_USER_SET_SPEED // 新增:用户通过Netlink强制设置速率
} phy_event_t;
// 定义Netlink消息类型
#define NETLINK_PHY_FAMILY 31 // 自定义Netlink协议族
#define MSG_TYPE_SET_SPEED 0x14 // 用户设置速率
#define MSG_TYPE_GET_STATUS 0x11 // 用户获取状态
// 定义速率常量
#define SPEED_10 10
#define SPEED_100 100
#define SPEED_1000 1000
#define SPEED_2500 2500
2. 状态机核心逻辑(集成Netlink速率设置)
// 全局变量
phy_healing_state_t current_state = PHY_STATE_INIT;
struct timer_list retry_timer;
int retry_interval_sec = 600; // 默认10分钟
// PHY状态机主函数
void phy_state_machine_handler(phy_event_t event, void *data) {
int target_speed;
switch (current_state) {
case PHY_STATE_INIT:
case PHY_STATE_LINK_DOWN:
if (event == EVENT_CABLE_PLUG || event == EVENT_LINK_UP) {
printk(KERN_INFO "PHY Healing: Starting negotiation from 2.5G.\n");
phy_set_speed_and_restart_autoneg(SPEED_2500);
current_state = PHY_STATE_2_5G;
} else if (event == EVENT_USER_SET_SPEED) {
// 即使在Link Down状态,也尝试设置用户指定的速率
target_speed = *(int*)data;
printk(KERN_INFO "PHY Healing: User requests speed %d while link is down. Trying...\n", target_speed);
phy_set_speed_and_restart_autoneg(target_speed);
// 状态不改变,等待Link Up/Down事件来确认结果
}
break;
case PHY_STATE_2_5G:
if (event == EVENT_LINK_DOWN) {
printk(KERN_INFO "PHY Healing: 2.5G link down, trying 1G.\n");
phy_set_speed_and_restart_autoneg(SPEED_1000);
current_state = PHY_STATE_1G;
} else if (event == EVENT_USER_SET_SPEED) {
target_speed = *(int*)data;
printk(KERN_INFO "PHY Healing: User requests speed %d, overriding current 2.5G.\n", target_speed);
phy_set_speed_and_restart_autoneg(target_speed);
// 状态不改变,让自愈流程自然过渡到新速率对应的状态
}
break;
case PHY_STATE_1G:
if (event == EVENT_LINK_DOWN) {
printk(KERN_INFO "PHY Healing: 1G link down, trying 100M.\n");
phy_set_speed_and_restart_autoneg(SPEED_100);
current_state = PHY_STATE_100M;
} else if (event == EVENT_USER_SET_SPEED) {
target_speed = *(int*)data;
printk(KERN_INFO "PHY Healing: User requests speed %d, overriding current 1G.\n", target_speed);
phy_set_speed_and_restart_autoneg(target_speed);
}
break;
case PHY_STATE_100M:
if (event == EVENT_LINK_DOWN) {
printk(KERN_INFO "PHY Healing: 100M link down, trying 10M.\n");
phy_set_speed_and_restart_autoneg(SPEED_10);
current_state = PHY_STATE_10M_RETRY;
on_link_established_at_10m();
} else if (event == EVENT_USER_SET_SPEED) {
target_speed = *(int*)data;
printk(KERN_INFO "PHY Healing: User requests speed %d, overriding current 100M.\n", target_speed);
phy_set_speed_and_restart_autoneg(target_speed);
}
break;
case PHY_STATE_10M_RETRY:
if (event == EVENT_LINK_DOWN) {
printk(KERN_ERR "PHY Healing: 10M link down. Connection failed.\n");
del_timer_sync(&retry_timer);
current_state = PHY_STATE_LINK_DOWN;
} else if (event == EVENT_TIMER_EXPIRED) {
printk(KERN_INFO "PHY Healing: Timer expired, trying to upgrade from 10M to 100M.\n");
phy_set_speed_and_restart_autoneg(SPEED_100);
} else if (event == EVENT_USER_SET_SPEED) {
// 用户可以随时打破10M的重试循环
target_speed = *(int*)data;
printk(KERN_INFO "PHY Healing: User requests speed %d, breaking 10M retry loop.\n", target_speed);
del_timer_sync(&retry_timer); // 停止自动重试定时器
phy_set_speed_and_restart_autoneg(target_speed);
}
break;
}
}
// ... 其他辅助函数(phy_set_speed_and_restart_autoneg, retry_timer_callback等)保持不变 ...
// 辅助函数:设置PHY速率并重启自动协商
void phy_set_speed_and_restart_autoneg(int speed) {
// 通过MDIO接口写入PHY寄存器来设置速率
mdio_write(PHY_ADDR, MII_BMCR, BMCR_ANENABLE | BMCR_ANRESTART | speed_to_bmcr_bits(speed));
}
// 定时器到期回调函数
void retry_timer_callback(struct timer_list *t) {
// 向状态机发送定时器到期事件
phy_state_machine_handler(EVENT_TIMER_EXPIRED);
// 重新启动定时器,形成循环
mod_timer(&retry_timer, jiffies + msecs_to_jiffies(retry_interval_sec * 1000));
}
3. 事件触发与状态初始化
// 当PHY中断发生时,在中断处理函数中调用
void phy_interrupt_handler() {
int link_status = mdio_read(PHY_ADDR, MII_BMSR) & BMSR_LSTATUS;
static int last_link_status = 0;
if (link_status != last_link_status) {
if (link_status) {
phy_state_machine_handler(EVENT_LINK_UP);
} else {
phy_state_machine_handler(EVENT_LINK_DOWN);
}
last_link_status = link_status;
}
}
// 当检测到拔插事件时(例如通过PHY的另一个中断引脚)
void cable_unplug_plug_handler() {
printk(KERN_INFO "PHY Healing: Cable plug/unplug event detected. Resetting state machine.\n");
del_timer_sync(&retry_timer); // 停止定时器
current_state = PHY_STATE_INIT; // 重置状态
phy_state_machine_handler(EVENT_CABLE_PLUG);
}
// 在10M链路成功建立时,启动定时器
void on_link_established_at_10m() {
if (current_state == PHY_STATE_10M_RETRY) {
mod_timer(&retry_timer, jiffies + msecs_to_jiffies(retry_interval_sec * 1000));
}
}
PHY层到MAC层端口监控流程
此部分是整个自愈机制的基础,与之前描述相同。
流程图:
+----------------+ +-----------------+ +---------------------+
| PHY (物理层) | <--> | MDIO Bus (总线) | <--> | MAC Driver (内核驱动) |
+----------------+ +-----------------+ +---------------------+
| | |
| 1. 物理链路变化 | |
| (插拔、信号质量变差) | |
V V V
+----------------+ +-----------------+ +---------------------+
| PHY自动协商 |----->| PHY更新内部寄存器 |----->| 驱动轮询/中断 |
| (Autonegotiation)| | (如BMSR, Status) | | 读取PHY寄存器 |
+----------------+ +-----------------+ +---------------------+
|
| 2. 读取Link状态和速率
V
+---------------------+
| 驱动解析寄存器值 |
| (Link Up/Down, Speed)|
+---------------------+
|
| 3. 触发状态机事件
V
+---------------------+
| 调用 phy_state_machine_handler() |
+---------------------+
详细步骤:
- 物理事件:网线插入/拔除,或线路质量变化。
- PHY内部处理 :PHY芯片检测变化,启动自动协商,并将结果写入其标准寄存器(如BMSR的
Link Status位,PHY Specific Status Register的速率/双工模式)。 - MAC驱动检测 :驱动通过中断 (高效)或轮询方式,得知PHY状态变化。
- 驱动解析与触发 :驱动读取PHY寄存器,解析出链路是Up还是Down,以及当前的速率。然后,它调用
phy_state_machine_handler()函数,并传入相应的事件(EVENT_LINK_UP或EVENT_LINK_DOWN),从而启动自愈逻辑。
用户态到内核态数据交互 - Netlink速率设置流程
流程图:
+----------------+ +-----------------+ +---------------------+
| 用户态App | | 系统调用接口 | | 内核网络驱动 |
| (e.g., ethtool)|----->| |----->| |
+----------------+ +-----------------+ +---------------------+
| 1. open socket | 2. ioctl() | 3. .ndo_do_ioctl()
| & ioctl() | |
V V V
+----------------+ +-----------------+ +---------------------+
| 传递命令和数据 |----->| 内核拷贝数据 |----->| 驱动执行具体操作 |
| (struct ifreq) | | (copy_from_user) | | (读/写驱动变量) |
+----------------+ +-----------------+ +---------------------+
|
| 4. 返回结果
V
+---------------------+
| 内核拷贝数据回用户 |
| (copy_to_user) |
+---------------------+
详细步骤与代码示例:
我们将创建一个Netlink通道,允许用户态程序直接命令内核驱动设置一个特定的PHY速率。
1. 内核态:创建Netlink套接字并处理"设置速率"消息
这部分代码通常在网络驱动的初始化部分。
#include <net/sock.h>
#include <linux/netlink.h>
struct sock *nl_sk = NULL;
// Netlink消息接收回调函数
void nl_rcv_msg(struct sk_buff *skb) {
struct nlmsghdr *nlh;
int pid;
int msg_type;
int target_speed;
nlh = (struct nlmsghdr *)skb->data;
pid = nlh->nlmsg_pid; // 发送消息的进程PID
msg_type = nlh->nlmsg_type;
printk(KERN_INFO "PHY Healing: Received Netlink message from user %d, type %d\n", pid, msg_type);
switch (msg_type) {
case MSG_TYPE_SET_SPEED:
// 用户请求设置速率
target_speed = *(int *)NLMSG_DATA(nlh);
// 简单的有效性检查
if (target_speed == SPEED_10 || target_speed == SPEED_100 ||
target_speed == SPEED_1000 || target_speed == SPEED_2500) {
printk(KERN_INFO "PHY Healing: User command to set speed to %dMbps.\n", target_speed);
// 关键:调用状态机,传递用户设置速率事件
phy_state_machine_handler(EVENT_USER_SET_SPEED, &target_speed);
} else {
printk(KERN_WARNING "PHY Healing: Invalid speed %d requested by user.\n", target_speed);
}
// 可以选择发送一个确认消息回用户态
break;
case MSG_TYPE_GET_STATUS:
// 用户请求获取状态
// send_status_to_user(pid); // 实现此函数以返回状态
break;
default:
printk(KERN_WARNING "PHY Healing: Unknown Netlink message type %d\n", msg_type);
}
}
// 在驱动初始化函数中创建Netlink套接字
int init_netlink(void) {
struct netlink_kernel_cfg cfg = {
.input = nl_rcv_msg,
};
nl_sk = netlink_kernel_create(&init_net, NETLINK_PHY_FAMILY, &cfg);
if (!nl_sk) {
printk(KERN_ALERT "PHY Healing: Error creating Netlink socket.\n");
return -10;
}
printk(KERN_INFO "PHY Healing: Netlink socket created for speed control.\n");
return 0;
}
// 在驱动退出函数中销毁Netlink套接字
void exit_netlink(void) {
if (nl_sk) {
netlink_kernel_release(nl_sk);
printk(KERN_INFO "PHY Healing: Netlink socket released.\n");
}
}
2. 用户态:通过Netlink发送"设置速率"命令
这是一个简单的C程序,用于演示如何向内核驱动发送设置速率的命令。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/socket.h>
#include <linux/netlink.h>
#define NETLINK_PHY_FAMILY 31
#define MSG_TYPE_SET_SPEED 0x14
#define MAX_PAYLOAD 256 // 足够容纳一个int
int main(int argc, char **argv) {
int sock_fd;
struct sockaddr_nl src_addr, dest_addr;
struct nlmsghdr *nlh = NULL;
int target_speed;
if (argc != 2) {
fprintf(stderr, "Usage: %s <speed>\n", argv[0]);
fprintf(stderr, "Example: %s 1000\n", argv[0]);
return -1;
}
target_speed = atoi(argv[1]);
// 1. 创建Netlink套接字
sock_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_PHY_FAMILY);
if (sock_fd < 0) {
perror("socket");
return -1;
}
// 2. 绑定源地址
memset(&src_addr, 0, sizeof(src_addr));
src_addr.nl_family = AF_NETLINK;
src_addr.nl_pid = getpid();
bind(sock_fd, (struct sockaddr*)&src_addr, sizeof(src_addr));
// 3. 准备目标地址(内核)
memset(&dest_addr, 0, sizeof(dest_addr));
dest_addr.nl_family = AF_NETLINK;
dest_addr.nl_pid = 0; // 0表示内核
// 4. 准备消息
nlh = (struct nlmsghdr *)malloc(NLMSG_SPACE(MAX_PAYLOAD));
memset(nlh, 0, NLMSG_SPACE(MAX_PAYLOAD));
nlh->nlmsg_len = NLMSG_SPACE(MAX_PAYLOAD);
nlh->nlmsg_pid = getpid();
nlh->nlmsg_flags = 0;
nlh->nlmsg_type = MSG_TYPE_SET_SPEED;
// 将目标速率拷贝到消息数据区
memcpy(NLMSG_DATA(nlh), &target_speed, sizeof(int));
// 5. 发送消息到内核
printf("Sending command to set PHY speed to %d Mbps...\n", target_speed);
int ret = sendto(sock_fd, nlh, nlh->nlmsg_len, 0, (struct sockaddr*)&dest_addr, sizeof(dest_addr));
if (ret < 0) {
perror("sendto");
} else {
printf("Command sent successfully.\n");
}
// 6. 清理
close(sock_fd);
free(nlh);
return 0;
}
定时器与重试逻辑(核心新需求)
根据你的要求,我们将创建一个独立的C程序,它内部包含定时和重试逻辑,而不是使用cron。
重要提示:让一个用户态程序24/7运行并每秒检查时间,在资源消耗上不如cron高效。但为了满足你的具体要求,我们按此方式实现。
phy_scheduler.c - 完整的C程序代
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <time.h>
#include <sys/socket.h>
#include <linux/netlink.h>
#define NETLINK_PHY_FAMILY 31
#define MSG_TYPE_SET_SPEED 0x14
#define MSG_TYPE_GET_STATUS 0x11
#define MAX_PAYLOAD 256
#define SLEEP_INTERVAL_SEC 60 // 每分钟检查一次时间
// Netlink通信函数
int communicate_with_kernel(int msg_type, int data, char* response_buffer) {
struct sockaddr_nl src_addr, dest_addr;
struct nlmsghdr *nlh = NULL;
int sock_fd;
int ret = -1;
sock_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_PHY_FAMILY);
if (sock_fd < 0) {
perror("socket");
return -1;
}
memset(&src_addr, 0, sizeof(src_addr));
src_addr.nl_family = AF_NETLINK;
src_addr.nl_pid = getpid();
bind(sock_fd, (struct sockaddr*)&src_addr, sizeof(src_addr));
memset(&dest_addr, 0, sizeof(dest_addr));
dest_addr.nl_family = AF_NETLINK;
dest_addr.nl_pid = 0; // 内核
nlh = (struct nlmsghdr *)malloc(NLMSG_SPACE(MAX_PAYLOAD));
if (!nlh) {
close(sock_fd);
return -1;
}
memset(nlh, 0, NLMSG_SPACE(MAX_PAYLOAD));
nlh->nlmsg_len = NLMSG_SPACE(MAX_PAYLOAD);
nlh->nlmsg_pid = getpid();
nlh->nlmsg_flags = 0;
nlh->nlmsg_type = msg_type;
memcpy(NLMSG_DATA(nlh), &data, sizeof(int));
ret = sendto(sock_fd, nlh, nlh->nlmsg_len, 0, (struct sockaddr*)&dest_addr, sizeof(dest_addr));
if (ret < 0) {
perror("sendto");
free(nlh);
close(sock_fd);
return -1;
}
// 接收内核响应
recv(sock_fd, response_buffer, MAX_PAYLOAD, 0);
free(nlh);
close(sock_fd);
return 0;
}
// 执行5次重试的核心逻辑
void perform_9am_retry() {
char response[MAX_PAYLOAD];
printf("[%s] 9:00 AM check triggered.\n", get_timestamp());
// 1. 获取当前PHY状态
if (communicate_with_kernel(MSG_TYPE_GET_STATUS, 0, response) != 0) {
printf("[%s] Failed to get status from kernel.\n", get_timestamp());
return;
}
printf("[%s] Current kernel status: %s\n", get_timestamp(), response);
// 2. 检查状态是否为 "10M_RETRY" 并且速率为 "10"
// 注意:这里的字符串匹配需要根据内核返回的实际格式调整
if (strstr(response, "State: 10M_RETRY") && strstr(response, "Speed: 10")) {
printf("[%s] Port is stable at 10M. Starting 5 retries to 100M.\n", get_timestamp());
for (int i = 1; i <= 5; i++) {
printf("[%s] Attempt %d/5: Setting speed to 100M.\n", get_timestamp(), i);
// 3. 发送设置100M的命令
if (communicate_with_kernel(MSG_TYPE_SET_SPEED, SPEED_100, response) != 0) {
printf("[%s] Failed to send set-speed command.\n", get_timestamp());
continue;
}
// 4. 等待链路稳定
sleep(5);
// 5. 再次检查状态
if (communicate_with_kernel(MSG_TYPE_GET_STATUS, 0, response) != 0) {
printf("[%s] Failed to get status after attempt %d.\n", get_timestamp(), i);
continue;
}
if (strstr(response, "Speed: 100")) {
printf("[%s] Success! Port upgraded to 100M on attempt %d.\n", get_timestamp(), i);
return; // 成功则退出
} else {
printf("[%s] Attempt %d failed. Current status: %s\n", get_timestamp(), i, response);
}
}
printf("[%s] All 5 retry attempts failed.\n", get_timestamp());
} else {
printf("[%s] Port is not in the required 10M_RETRY state. No action taken.\n", get_timestamp());
}
}
// 获取当前时间戳字符串
char* get_timestamp() {
static char buffer[80];
time_t rawtime;
struct tm *timeinfo;
time(&rawtime);
timeinfo = localtime(&rawtime);
strftime(buffer, sizeof(buffer), "%Y-%m-%d %H:%M:%S", timeinfo);
return buffer;
}
int main() {
time_t rawtime;
struct tm *timeinfo;
int last_day = -1;
printf("PHY Scheduler started. Waiting for 9:00 AM to trigger retry logic...\n");
while (1) {
time(&rawtime);
timeinfo = localtime(&rawtime);
// 检查是否是9:00整,并且不是同一天内已经触发过的
if (timeinfo->tm_hour == 9 && timeinfo->tm_min == 0 && timeinfo->tm_mday != last_day) {
perform_9am_retry();
last_day = timeinfo->tm_mday; // 标记今天已触发
}
// 每分钟检查一次,避免CPU空转
sleep(SLEEP_INTERVAL_SEC);
}
return 0;
}