zynq用普通网口在局域网同步

参考

zynq的网口和串口透传.csdn

j2b描述ethercat.csdn

目标

在普通以太网环境下,让不同从机产生同步的周期脉冲 (抖动约20us)

现象

未启动主机,从机输出的脉冲未对齐

启动主机后, 从机输出的脉冲在几秒内对齐

假设

P1: 局域网中的从机同时收倒主机广播包

系统结构

text 复制代码
             UDP Broadcast SyncFrame
主站  -------------------------------->   从站 1
  |                                      从站 2
  |                                      从站 3
  v                                      ...
64 位纳秒基准时间                         本地PL里的64位纳秒计数器

BD

linux_udp_sync.tcl

irq_pulse_0和pulse_out_0 都是从机产生的同步脉冲, 只是pulse_out_0 的脉冲宽度大一些

ps_debug_out_0 是给ps测试用的引脚, 测试从机是否可以同步执行某些动作

AXI 地址映射表

Name Interface Slave Segment Master Base Address Range Master High Address
/axi_lite_ns_sync_pul_0/s_axi s_axi reg0 0x43C0_0000 64K 0x43C0_FFFF

PIN.xdc

bash 复制代码
## SPI SCLK
## IIC
set_property -dict {PACKAGE_PIN T11 IOSTANDARD LVCMOS33} [get_ports IIC_EMIO_scl_io]
set_property -dict {PACKAGE_PIN V5 IOSTANDARD LVCMOS33} [get_ports IIC_EMIO_sda_io]


## GPIO_EMIO
set_property -dict {PACKAGE_PIN U5 IOSTANDARD LVCMOS33} [get_ports GPIO_EMIO_tri_io[0]]
set_property -dict {PACKAGE_PIN V7 IOSTANDARD LVCMOS33} [get_ports GPIO_EMIO_tri_io[1]]
set_property -dict {PACKAGE_PIN W8 IOSTANDARD LVCMOS33} [get_ports GPIO_EMIO_tri_io[2]]
set_property -dict {PACKAGE_PIN U9 IOSTANDARD LVCMOS33} [get_ports GPIO_EMIO_tri_io[3]]
set_property -dict {PACKAGE_PIN U10 IOSTANDARD LVCMOS33} [get_ports GPIO_EMIO_tri_io[4]]
set_property -dict {PACKAGE_PIN W6 IOSTANDARD LVCMOS33} [get_ports GPIO_EMIO_tri_io[5]]
set_property -dict {PACKAGE_PIN Y7 IOSTANDARD LVCMOS33} [get_ports GPIO_EMIO_tri_io[6]]
set_property -dict {PACKAGE_PIN Y9 IOSTANDARD LVCMOS33} [get_ports GPIO_EMIO_tri_io[7]]


# 中断和脉冲产生
set_property -dict {PACKAGE_PIN W15 IOSTANDARD LVCMOS33} [get_ports irq_pulse_0]
set_property -dict {PACKAGE_PIN V15 IOSTANDARD LVCMOS33} [get_ports pulse_out_0]
set_property -dict {PACKAGE_PIN W14 IOSTANDARD LVCMOS33} [get_ports ps_debug_out_0]

PL代码

主要功能如下

为PS提供一个64位ns计数器

PL内部自动更新下一次脉冲产生的时刻

PS可以设置下次脉冲产生的时刻

PS可以操作ps_debug_out_0 引脚

ns_sync_pulse_core.v

verilog 复制代码
`timescale 1ns / 1ps

module ns_sync_pulse_core #(
    parameter integer PL_TICK_NS              = 10,              // PL 时钟周期对应的纳秒数,100MHz 时为 10ns
    parameter [31:0]  DEFAULT_PULSE_PERIOD_NS = 32'd1_000_000,   // 默认脉冲周期,1kHz 对应 1_000_000ns
    parameter [31:0]  DEFAULT_PULSE_WIDTH_NS  = 32'd10_000,      // 默认脉冲宽度
    parameter [31:0]  CORE_VERSION            = 32'h0001_0000    // IP 版本号
)(
    input  wire        clk,           // PL 工作时钟,建议 100MHz
    input  wire        rst_n,         // 低有效复位

    input  wire        reg_wr_en,     // 简单寄存器写使能,由 AXI-Lite wrapper 产生
    input  wire [7:0]  reg_wr_addr,   // 简单寄存器写地址,按字节地址编码
    input  wire [31:0] reg_wr_data,   // 简单寄存器写数据

    input  wire [7:0]  reg_rd_addr,   // 简单寄存器读地址,按字节地址编码
    output reg  [31:0] reg_rd_data,   // 简单寄存器读数据

    output reg         pulse_out,     // 硬件脉冲输出
    output reg         irq_pulse      // 每次产生脉冲时输出 1 个 clk 周期事件
);

    localparam [7:0] ADDR_CONTROL            = 8'h00; // CONTROL:bit0 计数器使能,bit1 脉冲输出使能
    localparam [7:0] ADDR_STATUS             = 8'h04; // STATUS:bit0 输出状态,bit1/2 使能状态,bit3 时间锁存有效
    localparam [7:0] ADDR_TIME_LATCH_LOW     = 8'h08; // TIME_LATCH_LOW:锁存时间低 32 位,只读
    localparam [7:0] ADDR_TIME_LATCH_HIGH    = 8'h0C; // TIME_LATCH_HIGH:锁存时间高 32 位,只读
    localparam [7:0] ADDR_NEXT_EDGE_CFG_LOW  = 8'h10; // NEXT_EDGE_CFG_LOW:待提交下一边沿低 32 位
    localparam [7:0] ADDR_NEXT_EDGE_CFG_HIGH = 8'h14; // NEXT_EDGE_CFG_HIGH:待提交下一边沿高 32 位
    localparam [7:0] ADDR_PULSE_PERIOD_NS    = 8'h18; // PULSE_PERIOD_NS:脉冲周期,单位 ns
    localparam [7:0] ADDR_PULSE_WIDTH_NS     = 8'h1C; // PULSE_WIDTH_NS:脉冲宽度,单位 ns
    localparam [7:0] ADDR_PHASE_DELTA_NS     = 8'h20; // PHASE_DELTA_NS:int32,相位修正量,正数提前,负数延后
    localparam [7:0] ADDR_PERIOD_ADJ_NS      = 8'h24; // PERIOD_ADJ_NS:int32,周期修正量,正数变慢,负数变快
    localparam [7:0] ADDR_COMMAND            = 8'h28; // COMMAND:只写命令寄存器
    localparam [7:0] ADDR_VERSION            = 8'h2C; // VERSION:版本号,只读
    localparam [7:0] ADDR_ACTIVE_EDGE_LOW    = 8'h30; // ACTIVE_EDGE_LOW:当前生效下一边沿低 32 位,只读
    localparam [7:0] ADDR_ACTIVE_EDGE_HIGH   = 8'h34; // ACTIVE_EDGE_HIGH:当前生效下一边沿高 32 位,只读
    localparam [7:0] ADDR_PULSE_COUNT_LOW    = 8'h38; // PULSE_COUNT_LOW:已输出脉冲计数低 32 位,只读
    localparam [7:0] ADDR_PULSE_COUNT_HIGH   = 8'h3C; // PULSE_COUNT_HIGH:已输出脉冲计数高 32 位,只读

    localparam CONTROL_COUNTER_ENABLE_BIT    = 0; // CONTROL bit0:计数器使能
    localparam CONTROL_PULSE_ENABLE_BIT      = 1; // CONTROL bit1:脉冲输出使能

    localparam COMMAND_APPLY_NEXT_EDGE_BIT   = 0; // COMMAND bit0:提交 NEXT_EDGE_CFG 到 active next_edge_ns
    localparam COMMAND_APPLY_PHASE_BIT       = 1; // COMMAND bit1:应用相位修正 next_edge_ns -= phase_delta_ns
    localparam COMMAND_CLEAR_COUNTER_BIT     = 2; // COMMAND bit2:清零 time_ns
    localparam COMMAND_LATCH_TIME_BIT        = 3; // COMMAND bit3:锁存 time_ns 到 time_latch_ns
    localparam COMMAND_CLEAR_PULSE_COUNT_BIT = 4; // COMMAND bit4:清零 pulse_count

    reg [31:0] control_reg;      // 控制寄存器缓存,对应 ADDR_CONTROL

    reg [63:0] time_ns;          // 64 位本地纳秒计数器,使能后每拍增加 PL_TICK_NS
    reg [63:0] time_latch_ns;    // 64 位时间锁存值,PS 读时间前先用 COMMAND_LATCH_TIME_BIT 锁存
    reg        time_latch_valid; // 时间锁存有效标志,反映到 STATUS bit3

    reg [63:0] next_edge_ns;     // 当前生效的下一次脉冲边沿时间
    reg [63:0] next_edge_cfg_ns; // 下一次脉冲边沿配置影子寄存器,low/high 写完后再提交

    reg [31:0]        pulse_period_ns; // 基准输出周期,单位 ns
    reg [31:0]        pulse_width_ns;  // 输出脉冲宽度,单位 ns
    reg signed [31:0] phase_delta_ns;  // 相位修正量,正数提前边沿,负数延后边沿
    reg signed [31:0] period_adj_ns;   // 频率修正量,参与 pulse_period_ns + period_adj_ns

    reg [63:0] pulse_end_ns;     // 当前脉冲结束时间
    reg [63:0] pulse_count;      // 已输出脉冲数量
    reg        pulse_active;     // 当前是否处于脉冲高电平期间
    reg        pulse_enable_d;   // pulse_enable 上一拍状态,用于检测上升沿

    reg [63:0] period_step_ns;   // 实际输出周期,等于 pulse_period_ns + period_adj_ns 后再限幅
    reg [31:0] period_adj_abs;   // period_adj_ns 的绝对值

    wire counter_enable;         // 从 control_reg 解码出的计数器使能
    wire pulse_enable;           // 从 control_reg 解码出的脉冲使能
    wire command_wr;             // 当前写操作是否为 COMMAND 写
    wire command_update_edge;    // 当前 COMMAND 是否会更新 next_edge_ns

    assign counter_enable = control_reg[CONTROL_COUNTER_ENABLE_BIT];
    assign pulse_enable   = control_reg[CONTROL_PULSE_ENABLE_BIT];
    assign command_wr     = reg_wr_en && (reg_wr_addr == ADDR_COMMAND);
    assign command_update_edge = command_wr &&
                                 (reg_wr_data[COMMAND_APPLY_NEXT_EDGE_BIT] ||
                                  reg_wr_data[COMMAND_APPLY_PHASE_BIT]);

    // 将 int32 有符号数转换成绝对值。
    // 后续时序逻辑用无符号幅值做加减,避免直接混用有符号运算。
    function [31:0] abs_s32;
        input signed [31:0] value;
        begin
            if (value[31]) begin
                abs_s32 = (~value[31:0]) + 32'd1;
            end else begin
                abs_s32 = value[31:0];
            end
        end
    endfunction

    // 保证脉冲宽度至少覆盖 1 个 PL 时钟周期。
    function [63:0] normalize_width_ns;
        input [31:0] width_ns;
        begin
            if (width_ns < PL_TICK_NS) begin
                normalize_width_ns = PL_TICK_NS;
            end else begin
                normalize_width_ns = {32'd0, width_ns};
            end
        end
    endfunction

    // 计算频率修正后的实际输出周期。
    // period_adj_ns 为负时缩短周期,为正时拉长周期。
    // 最终周期限制为至少 1 个 PL 时钟周期。
    always @(*) begin
        period_adj_abs = abs_s32(period_adj_ns);

        if (period_adj_ns[31]) begin
            if ({32'd0, pulse_period_ns} > {32'd0, period_adj_abs}) begin
                period_step_ns = {32'd0, pulse_period_ns} - {32'd0, period_adj_abs};
            end else begin
                period_step_ns = PL_TICK_NS;
            end
        end else begin
            period_step_ns = {32'd0, pulse_period_ns} + {32'd0, period_adj_ns[31:0]};
        end

        if (period_step_ns < PL_TICK_NS) begin
            period_step_ns = PL_TICK_NS;
        end
    end

    // 主时序逻辑。
    always @(posedge clk) begin
        if (!rst_n) begin
            control_reg       <= 32'd0;
            time_ns           <= 64'd0;
            time_latch_ns     <= 64'd0;
            time_latch_valid  <= 1'b0;
            next_edge_ns      <= 64'd0;
            next_edge_cfg_ns  <= 64'd0;
            pulse_period_ns   <= DEFAULT_PULSE_PERIOD_NS;
            pulse_width_ns    <= DEFAULT_PULSE_WIDTH_NS;
            phase_delta_ns    <= 32'sd0;
            period_adj_ns     <= 32'sd0;
            pulse_end_ns      <= 64'd0;
            pulse_count       <= 64'd0;
            pulse_active      <= 1'b0;
            pulse_enable_d    <= 1'b0;
            pulse_out         <= 1'b0;
            irq_pulse         <= 1'b0;
        end else begin
            // irq_pulse 是单时钟周期事件脉冲。
            irq_pulse <= 1'b0;

            // 处理外部总线封装模块转进来的寄存器写操作。
            if (reg_wr_en) begin
                case (reg_wr_addr)
                    ADDR_CONTROL: begin
                        control_reg <= reg_wr_data;
                    end

                    ADDR_NEXT_EDGE_CFG_LOW: begin
                        next_edge_cfg_ns[31:0] <= reg_wr_data;
                    end

                    ADDR_NEXT_EDGE_CFG_HIGH: begin
                        next_edge_cfg_ns[63:32] <= reg_wr_data;
                    end

                    ADDR_PULSE_PERIOD_NS: begin
                        pulse_period_ns <= reg_wr_data;
                    end

                    ADDR_PULSE_WIDTH_NS: begin
                        pulse_width_ns <= reg_wr_data;
                    end

                    ADDR_PHASE_DELTA_NS: begin
                        phase_delta_ns <= reg_wr_data;
                    end

                    ADDR_PERIOD_ADJ_NS: begin
                        period_adj_ns <= reg_wr_data;
                    end

                    default: begin
                        // 只读地址和未定义地址忽略写操作。
                    end
                endcase
            end

            // 锁存当前时间。锁存值取本周期计数器自增前的 time_ns。
            if (command_wr && reg_wr_data[COMMAND_LATCH_TIME_BIT]) begin
                time_latch_ns    <= time_ns;
                time_latch_valid <= 1'b1;
            end

            // 按命令清零脉冲输出计数器。
            if (command_wr && reg_wr_data[COMMAND_CLEAR_PULSE_COUNT_BIT]) begin
                pulse_count <= 64'd0;
            end

            // 64 位时间计数器在使能后单调递增。
            // clear_counter 优先级高于正常计数。
            if (command_wr && reg_wr_data[COMMAND_CLEAR_COUNTER_BIT]) begin
                time_ns <= 64'd0;
            end else if (counter_enable) begin
                time_ns <= time_ns + PL_TICK_NS;
            end

            // 记录上一拍 pulse_enable,用于检测脉冲使能上升沿。
            pulse_enable_d <= pulse_enable;

            // 命令寄存器对 next_edge_ns 的更新任何时候都可以生效。
            // 这样 PS 可以先配置下一边沿,再打开 pulse_enable。
            if (command_wr && reg_wr_data[COMMAND_APPLY_NEXT_EDGE_BIT]) begin
                next_edge_ns <= next_edge_cfg_ns;
            end else if (command_wr && reg_wr_data[COMMAND_APPLY_PHASE_BIT]) begin
                if (phase_delta_ns[31]) begin
                    // phase_delta_ns 为负:延后下一次边沿。
                    next_edge_ns <= next_edge_ns + {32'd0, abs_s32(phase_delta_ns)};
                end else if (next_edge_ns > {32'd0, phase_delta_ns[31:0]}) begin
                    // phase_delta_ns 为正:提前下一次边沿。
                    next_edge_ns <= next_edge_ns - {32'd0, phase_delta_ns[31:0]};
                end else begin
                    next_edge_ns <= 64'd0;
                end
            end

            if (!(counter_enable && pulse_enable)) begin
                pulse_active <= 1'b0;
                pulse_out    <= 1'b0;
            end else begin
                // 到达配置的脉冲宽度后,结束当前脉冲。
                if (pulse_active && (time_ns >= pulse_end_ns)) begin
                    pulse_active <= 1'b0;
                    pulse_out    <= 1'b0;
                end

                // 命令寄存器对 next_edge_ns 的更新优先级高于自动排程。
                if (command_update_edge) begin
                    // 本周期只执行命令,不自动产生脉冲或推进边沿。
                end else if (!pulse_enable_d && pulse_enable && (next_edge_ns <= time_ns)) begin
                    // pulse_enable 上升沿时,如果 next_edge_ns 已经过期,
                    // 重新安排下一次边沿,避免补发一串追赶脉冲。
                    next_edge_ns <= time_ns + period_step_ns;
                end else if (time_ns >= next_edge_ns) begin
                    // 产生一个脉冲事件,并安排下一周期边沿。
                    pulse_active <= 1'b1;
                    pulse_out    <= 1'b1;
                    irq_pulse    <= 1'b1;
                    pulse_end_ns <= time_ns + normalize_width_ns(pulse_width_ns);
                    next_edge_ns <= next_edge_ns + period_step_ns;
                    pulse_count  <= pulse_count + 64'd1;
                end
            end
        end
    end

    // 组合逻辑读数据选择器。
    // 多字数据通过 low/high 两个 32 位寄存器暴露给 PS。
    always @(*) begin
        case (reg_rd_addr)
            ADDR_CONTROL: begin
                reg_rd_data = control_reg;
            end

            ADDR_STATUS: begin
                reg_rd_data = 32'd0;
                reg_rd_data[0] = pulse_out;
                reg_rd_data[1] = counter_enable;
                reg_rd_data[2] = pulse_enable;
                reg_rd_data[3] = time_latch_valid;
            end

            ADDR_TIME_LATCH_LOW: begin
                reg_rd_data = time_latch_ns[31:0];
            end

            ADDR_TIME_LATCH_HIGH: begin
                reg_rd_data = time_latch_ns[63:32];
            end

            ADDR_NEXT_EDGE_CFG_LOW: begin
                reg_rd_data = next_edge_cfg_ns[31:0];
            end

            ADDR_NEXT_EDGE_CFG_HIGH: begin
                reg_rd_data = next_edge_cfg_ns[63:32];
            end

            ADDR_PULSE_PERIOD_NS: begin
                reg_rd_data = pulse_period_ns;
            end

            ADDR_PULSE_WIDTH_NS: begin
                reg_rd_data = pulse_width_ns;
            end

            ADDR_PHASE_DELTA_NS: begin
                reg_rd_data = phase_delta_ns;
            end

            ADDR_PERIOD_ADJ_NS: begin
                reg_rd_data = period_adj_ns;
            end

            ADDR_COMMAND: begin
                reg_rd_data = 32'd0;
            end

            ADDR_VERSION: begin
                reg_rd_data = CORE_VERSION;
            end

            ADDR_ACTIVE_EDGE_LOW: begin
                reg_rd_data = next_edge_ns[31:0];
            end

            ADDR_ACTIVE_EDGE_HIGH: begin
                reg_rd_data = next_edge_ns[63:32];
            end

            ADDR_PULSE_COUNT_LOW: begin
                reg_rd_data = pulse_count[31:0];
            end

            ADDR_PULSE_COUNT_HIGH: begin
                reg_rd_data = pulse_count[63:32];
            end

            default: begin
                reg_rd_data = 32'd0;
            end
        endcase
    end

endmodule

axi_lite_ns_sync_pulse.v

verilog 复制代码
`timescale 1ns / 1ps

module axi_lite_ns_sync_pulse #(
    parameter integer PL_TICK_NS              = 10,              // PL 时钟周期对应的纳秒数,100MHz 时为 10ns
    parameter [31:0]  DEFAULT_PULSE_PERIOD_NS = 32'd1_000_000,   // 默认脉冲周期,1kHz 对应 1_000_000ns
    parameter [31:0]  DEFAULT_PULSE_WIDTH_NS  = 32'd10_000,      // 默认脉冲宽度
    parameter [31:0]  CORE_VERSION            = 32'h0001_0000    // IP 版本号
)(
    input  wire        s_axi_aclk,    // AXI-Lite 时钟,同时作为 core 工作时钟
    input  wire        s_axi_aresetn, // AXI-Lite 低有效复位,同时作为 core 复位

    input  wire [7:0]  s_axi_awaddr,  // AXI-Lite 写地址,覆盖 0x00~0x3C 寄存器空间
    input  wire [2:0]  s_axi_awprot,  // AXI-Lite 保护属性,本模块不使用
    input  wire        s_axi_awvalid, // AXI-Lite 写地址有效
    output wire        s_axi_awready, // AXI-Lite 写地址 ready

    input  wire [31:0] s_axi_wdata,   // AXI-Lite 写数据,固定 32 位
    input  wire [3:0]  s_axi_wstrb,   // AXI-Lite 字节写使能
    input  wire        s_axi_wvalid,  // AXI-Lite 写数据有效
    output wire        s_axi_wready,  // AXI-Lite 写数据 ready

    output reg  [1:0]  s_axi_bresp,   // AXI-Lite 写响应,固定 OKAY
    output reg         s_axi_bvalid,  // AXI-Lite 写响应有效
    input  wire        s_axi_bready,  // AXI-Lite 写响应 ready

    input  wire [7:0]  s_axi_araddr,  // AXI-Lite 读地址,覆盖 0x00~0x3C 寄存器空间
    input  wire [2:0]  s_axi_arprot,  // AXI-Lite 保护属性,本模块不使用
    input  wire        s_axi_arvalid, // AXI-Lite 读地址有效
    output wire        s_axi_arready, // AXI-Lite 读地址 ready

    output reg  [31:0] s_axi_rdata,   // AXI-Lite 读数据,固定 32 位
    output reg  [1:0]  s_axi_rresp,   // AXI-Lite 读响应,固定 OKAY
    output reg         s_axi_rvalid,  // AXI-Lite 读响应有效
    input  wire        s_axi_rready,  // AXI-Lite 读响应 ready

    output wire        pulse_out,     // core 输出的硬件脉冲
    output wire        irq_pulse      // core 每次产生脉冲时输出 1 个 clk 周期事件
);

    localparam [1:0] AXI_RESP_OKAY = 2'b00; // AXI-Lite OKAY 响应

    reg        aw_holding;     // 已经接收到写地址,但还没等到完整写事务
    reg [7:0]  awaddr_reg;     // 暂存的写地址
    reg        w_holding;      // 已经接收到写数据,但还没等到完整写事务
    reg [31:0] wdata_reg;      // 暂存的写数据
    reg [3:0]  wstrb_reg;      // 暂存的字节写使能

    wire       aw_accept;      // 当前周期接收写地址
    wire       w_accept;       // 当前周期接收写数据
    wire       write_fire;     // 当前周期形成完整写事务,并写入 core
    wire [7:0] write_addr_raw; // 写事务原始地址,来自当前握手或暂存地址
    wire [7:0] write_addr;     // 写事务对齐后的 32 位寄存器地址
    wire [31:0] write_data;    // 写事务原始数据,来自当前握手或暂存数据
    wire [3:0]  write_strb;    // 写事务字节使能,来自当前握手或暂存数据

    wire       ar_accept;      // 当前周期接收读地址
    wire [7:0] read_addr;      // 读事务对齐后的 32 位寄存器地址

    wire [7:0]  core_reg_rd_addr; // 连接到 core 的简单寄存器读地址
    wire [31:0] core_reg_rd_data; // core 返回的简单寄存器读数据
    wire        core_reg_wr_en;   // 连接到 core 的简单寄存器写使能
    wire [7:0]  core_reg_wr_addr; // 连接到 core 的简单寄存器写地址
    wire [31:0] core_reg_wr_data; // 连接到 core 的简单寄存器写数据

    assign s_axi_awready = (!aw_holding) && (!s_axi_bvalid);
    assign s_axi_wready  = (!w_holding)  && (!s_axi_bvalid);

    assign aw_accept = s_axi_awvalid && s_axi_awready;
    assign w_accept  = s_axi_wvalid  && s_axi_wready;

    assign write_fire = (!s_axi_bvalid) &&
                        (aw_holding || aw_accept) &&
                        (w_holding  || w_accept);

    assign write_addr_raw = aw_accept ? s_axi_awaddr : awaddr_reg;
    assign write_addr     = {write_addr_raw[7:2], 2'b00};
    assign write_data     = w_accept ? s_axi_wdata : wdata_reg;
    assign write_strb     = w_accept ? s_axi_wstrb : wstrb_reg;

    assign s_axi_arready = (!s_axi_rvalid) && (!write_fire);
    assign ar_accept     = s_axi_arvalid && s_axi_arready;
    assign read_addr     = {s_axi_araddr[7:2], 2'b00};

    assign core_reg_rd_addr = write_fire ? write_addr : (ar_accept ? read_addr : 8'h00);
    assign core_reg_wr_en   = write_fire;
    assign core_reg_wr_addr = write_addr;
    assign core_reg_wr_data = apply_wstrb(core_reg_rd_data, write_data, write_strb);

    // 根据 AXI-Lite WSTRB 合并字节写数据。
    // PS 使用 32 位整字写时,WSTRB 通常为 4'b1111,此函数等价于直接写入 write_data。
    function [31:0] apply_wstrb;
        input [31:0] old_data;
        input [31:0] new_data;
        input [3:0]  byte_strobe;
        integer byte_index;
        begin
            apply_wstrb = old_data;
            for (byte_index = 0; byte_index < 4; byte_index = byte_index + 1) begin
                if (byte_strobe[byte_index]) begin
                    apply_wstrb[byte_index*8 +: 8] = new_data[byte_index*8 +: 8];
                end
            end
        end
    endfunction

    // 写地址/写数据通道。
    // 支持 AW 和 W 分开到达;两者都到齐后,向 core 发出一个周期的写使能。
    always @(posedge s_axi_aclk) begin
        if (!s_axi_aresetn) begin
            aw_holding   <= 1'b0;
            awaddr_reg   <= 8'd0;
            w_holding    <= 1'b0;
            wdata_reg    <= 32'd0;
            wstrb_reg    <= 4'd0;
            s_axi_bresp  <= AXI_RESP_OKAY;
            s_axi_bvalid <= 1'b0;
        end else begin
            if (write_fire) begin
                aw_holding   <= 1'b0;
                w_holding    <= 1'b0;
                s_axi_bresp  <= AXI_RESP_OKAY;
                s_axi_bvalid <= 1'b1;
            end else begin
                if (aw_accept) begin
                    aw_holding <= 1'b1;
                    awaddr_reg <= s_axi_awaddr;
                end

                if (w_accept) begin
                    w_holding <= 1'b1;
                    wdata_reg <= s_axi_wdata;
                    wstrb_reg <= s_axi_wstrb;
                end

                if (s_axi_bvalid && s_axi_bready) begin
                    s_axi_bvalid <= 1'b0;
                end
            end
        end
    end

    // 读地址/读数据通道。
    // 接收到 AR 后,立即从 core 的组合读口取数,并返回一个 AXI-Lite 读响应。
    always @(posedge s_axi_aclk) begin
        if (!s_axi_aresetn) begin
            s_axi_rdata  <= 32'd0;
            s_axi_rresp  <= AXI_RESP_OKAY;
            s_axi_rvalid <= 1'b0;
        end else begin
            if (ar_accept) begin
                s_axi_rdata  <= core_reg_rd_data;
                s_axi_rresp  <= AXI_RESP_OKAY;
                s_axi_rvalid <= 1'b1;
            end else if (s_axi_rvalid && s_axi_rready) begin
                s_axi_rvalid <= 1'b0;
            end
        end
    end

    ns_sync_pulse_core #(
        .PL_TICK_NS(PL_TICK_NS),
        .DEFAULT_PULSE_PERIOD_NS(DEFAULT_PULSE_PERIOD_NS),
        .DEFAULT_PULSE_WIDTH_NS(DEFAULT_PULSE_WIDTH_NS),
        .CORE_VERSION(CORE_VERSION)
    ) u_core (
        .clk(s_axi_aclk),
        .rst_n(s_axi_aresetn),
        .reg_wr_en(core_reg_wr_en),
        .reg_wr_addr(core_reg_wr_addr),
        .reg_wr_data(core_reg_wr_data),
        .reg_rd_addr(core_reg_rd_addr),
        .reg_rd_data(core_reg_rd_data),
        .pulse_out(pulse_out),
        .irq_pulse(irq_pulse)
    );

    // 未使用的 AXI 保护属性,单独引用一次,避免部分工具报警未使用。
    wire unused_axi_prot;
    assign unused_axi_prot = ^{s_axi_awprot, s_axi_arprot};

endmodule

VItis环境测试一下IP

周期

读64位ns计数器

取反ps_debug_out_0 引脚

打印PL内部状态

main.c

c 复制代码
#include "sleep.h"
#include "xil_io.h"
#include "xil_printf.h"
#include "xil_types.h"

/*
 * axi_lite_ns_sync_pulse AXI-Lite base address.
 *
 * This address must match the Vivado Address Editor assignment.
 */
#define NS_SYNC_BASE_ADDR             0x43C00000U

/*
 * Minimal register map for the simplified ns_sync_pulse_core.
 */
#define REG_CONTROL                   0x00U
#define REG_STATUS                    0x04U
#define REG_TIME_LATCH_LOW            0x08U
#define REG_TIME_LATCH_HIGH           0x0CU
#define REG_NEXT_EDGE_CFG_LOW         0x10U
#define REG_NEXT_EDGE_CFG_HIGH        0x14U
#define REG_COMMAND                   0x18U
#define REG_VERSION                   0x1CU
#define REG_ACTIVE_EDGE_LOW           0x20U
#define REG_ACTIVE_EDGE_HIGH          0x24U

/*
 * REG_CONTROL bits.
 */
#define CONTROL_COUNTER_ENABLE        0x00000001U
#define CONTROL_PULSE_ENABLE          0x00000002U
#define CONTROL_PS_DEBUG_OUT          0x00000004U

/*
 * REG_STATUS bits.
 */
#define STATUS_PULSE_OUT              0x00000001U
#define STATUS_COUNTER_ENABLE         0x00000002U
#define STATUS_PULSE_ENABLE           0x00000004U
#define STATUS_TIME_LATCH_VALID       0x00000008U
#define STATUS_NEXT_EDGE_VALID        0x00000010U
#define STATUS_PS_DEBUG_OUT           0x00000020U

/*
 * REG_COMMAND bits.
 */
#define CMD_LATCH_TIME                0x00000001U
#define CMD_APPLY_NEXT_EDGE           0x00000002U

static inline void ns_sync_write(u32 offset, u32 value)
{
    Xil_Out32(NS_SYNC_BASE_ADDR + offset, value);
}

static inline u32 ns_sync_read(u32 offset)
{
    return Xil_In32(NS_SYNC_BASE_ADDR + offset);
}

static u64 ns_sync_make_u64(u32 high, u32 low)
{
    return (((u64)high) << 32) | (u64)low;
}

static void ns_sync_print_u64(const char *name, u64 value)
{
    xil_printf("%s = 0x%08x_%08x (%u ms)\r\n",
               name,
               (u32)(value >> 32),
               (u32)value,
               (u32)(value / 1000000ULL));
}

static u64 ns_sync_read_time(void)
{
    u32 low;
    u32 high;

    /*
     * Latch first, then read low/high. This avoids 64-bit tearing.
     */
    ns_sync_write(REG_COMMAND, CMD_LATCH_TIME);
    low = ns_sync_read(REG_TIME_LATCH_LOW);
    high = ns_sync_read(REG_TIME_LATCH_HIGH);

    return ns_sync_make_u64(high, low);
}

static void ns_sync_dump_status(void)
{
    u32 status = ns_sync_read(REG_STATUS);

    xil_printf("STATUS = 0x%08x", status);
    xil_printf(" pulse_out=%d", (status & STATUS_PULSE_OUT) ? 1 : 0);
    xil_printf(" counter_en=%d", (status & STATUS_COUNTER_ENABLE) ? 1 : 0);
    xil_printf(" pulse_en=%d", (status & STATUS_PULSE_ENABLE) ? 1 : 0);
    xil_printf(" latch_valid=%d", (status & STATUS_TIME_LATCH_VALID) ? 1 : 0);
    xil_printf(" next_edge_valid=%d", (status & STATUS_NEXT_EDGE_VALID) ? 1 : 0);
    xil_printf(" ps_debug_out=%d\r\n", (status & STATUS_PS_DEBUG_OUT) ? 1 : 0);
}

static int ns_sync_check_version(void)
{
    u32 version = ns_sync_read(REG_VERSION);

    xil_printf("VERSION = 0x%08x\r\n", version);

    if (version == 0U || version == 0xFFFFFFFFU) {
        xil_printf("ERROR: AXI read failed. Check base address 0x%08x and bitstream.\r\n",
                   (u32)NS_SYNC_BASE_ADDR);
        return -1;
    }

    return 0;
}

int main(void)
{
    u32 control = 0U;
    u32 debug_state = 0U;
    u64 now_ns;

    xil_printf("\r\n=== ns_sync_pulse bare-metal minimal test ===\r\n");
    xil_printf("BASE = 0x%08x\r\n", (u32)NS_SYNC_BASE_ADDR);

    if (ns_sync_check_version() != 0) {
        return -1;
    }

    /*
     * Enable only the 64-bit PL time counter first.
     */
    control = CONTROL_COUNTER_ENABLE;
    ns_sync_write(REG_CONTROL, control);
    usleep(1000);

    now_ns = ns_sync_read_time();
    ns_sync_print_u64("time_ns", now_ns);
    ns_sync_dump_status();

    xil_printf("Loop: toggle ps_debug_out every 500ms and print latched time_ns.\r\n");

    while (1) {
        usleep(500000);

        debug_state ^= CONTROL_PS_DEBUG_OUT;
        control = CONTROL_COUNTER_ENABLE | debug_state;
        ns_sync_write(REG_CONTROL, control);

        now_ns = ns_sync_read_time();
        ns_sync_print_u64("time_ns", now_ns);
        ns_sync_dump_status();
    }

    return 0;
}

测试主从之间的时延

帧格式

latency_probe.hpp

c 复制代码
#pragma once

#include <array>
#include <cstddef>
#include <cstdint>

namespace latency_wire {

static const std::uint16_t kMagic = 0x4C54u;
static const std::uint16_t kVersion = 1u;
static const std::uint32_t kTypeRequest = 1u;
static const std::uint32_t kTypeResponse = 2u;
static const std::uint16_t kDefaultPort = 5006u;
static const std::uint32_t kDefaultCount = 100u;
static const std::uint32_t kDefaultIntervalMs = 30u;
static const std::uint32_t kDefaultTimeoutMs = 1000u;
static const std::size_t kFrameWireSize = 40u;

struct LatencyFrame {
   //固定标识0x4C54 测延时用
   std::uint16_t magic;
   std::uint16_t version;
   //1 = request,主机发给从机
   //2 = response从机回给主机
   std::uint32_t msg_type;
   //序号
   std::uint32_t seq;
   //保留字段
   std::uint32_t reserved;
   //主机发出的主机时间吗
   std::uint64_t master_tx_ns;
   //从机收到的从机时间
   std::uint64_t slave_rx_ns;
   //从机发出时的从机时间
   std::uint64_t slave_tx_ns;
};

inline void PutLe16(std::uint8_t *dst, std::uint16_t value)
{
   dst[0] = static_cast<std::uint8_t>(value & 0xffu);
   dst[1] = static_cast<std::uint8_t>((value >> 8) & 0xffu);
}

inline void PutLe32(std::uint8_t *dst, std::uint32_t value)
{
   dst[0] = static_cast<std::uint8_t>(value & 0xffu);
   dst[1] = static_cast<std::uint8_t>((value >> 8) & 0xffu);
   dst[2] = static_cast<std::uint8_t>((value >> 16) & 0xffu);
   dst[3] = static_cast<std::uint8_t>((value >> 24) & 0xffu);
}

inline void PutLe64(std::uint8_t *dst, std::uint64_t value)
{
   PutLe32(dst, static_cast<std::uint32_t>(value & 0xffffffffull));
   PutLe32(dst + 4, static_cast<std::uint32_t>(value >> 32));
}

inline std::uint16_t GetLe16(const std::uint8_t *src)
{
   return static_cast<std::uint16_t>(src[0] | (static_cast<std::uint16_t>(src[1]) << 8));
}

inline std::uint32_t GetLe32(const std::uint8_t *src)
{
   return static_cast<std::uint32_t>(src[0]) |
          (static_cast<std::uint32_t>(src[1]) << 8) |
          (static_cast<std::uint32_t>(src[2]) << 16) |
          (static_cast<std::uint32_t>(src[3]) << 24);
}

inline std::uint64_t GetLe64(const std::uint8_t *src)
{
   return static_cast<std::uint64_t>(GetLe32(src)) |
          (static_cast<std::uint64_t>(GetLe32(src + 4)) << 32);
}

inline std::array<std::uint8_t, kFrameWireSize> Encode(const LatencyFrame &frame)
{
   std::array<std::uint8_t, kFrameWireSize> out = {};
   PutLe16(out.data() + 0, frame.magic);
   PutLe16(out.data() + 2, frame.version);
   PutLe32(out.data() + 4, frame.msg_type);
   PutLe32(out.data() + 8, frame.seq);
   PutLe32(out.data() + 12, frame.reserved);
   PutLe64(out.data() + 16, frame.master_tx_ns);
   PutLe64(out.data() + 24, frame.slave_rx_ns);
   PutLe64(out.data() + 32, frame.slave_tx_ns);
   return out;
}

inline LatencyFrame Decode(const std::uint8_t *in)
{
   LatencyFrame frame = {};
   frame.magic = GetLe16(in + 0);
   frame.version = GetLe16(in + 2);
   frame.msg_type = GetLe32(in + 4);
   frame.seq = GetLe32(in + 8);
   frame.reserved = GetLe32(in + 12);
   frame.master_tx_ns = GetLe64(in + 16);
   frame.slave_rx_ns = GetLe64(in + 24);
   frame.slave_tx_ns = GetLe64(in + 32);
   return frame;
}

inline bool IsValid(const LatencyFrame &frame, std::uint32_t msg_type)
{
   return frame.magic == kMagic &&
          frame.version == kVersion &&
          frame.msg_type == msg_type;
}

}  // namespace latency_wire

主机

latency_master.cpp

c 复制代码
#include "latency_probe.hpp"

#include <arpa/inet.h>
#include <algorithm>
#include <cerrno>
#include <csignal>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <limits>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <time.h>
#include <unistd.h>
#include <vector>

namespace {

static const char *kDefaultDestIp = "255.255.255.255";
static const std::uint64_t kNsPerMs = 1000000ull;
static const std::uint64_t kNsPerSec = 1000000000ull;
static const std::uint64_t kCalibrateMaxRttNs = 300000ull;
static const std::uint64_t kCalibrateMaxDelayNs = 200000ull;

volatile sig_atomic_t g_stop_requested = 0;

void HandleStopSignal(int)
{
    g_stop_requested = 1;
}

void InstallSignalHandlers()
{
    struct sigaction action;
    std::memset(&action, 0, sizeof(action));
    action.sa_handler = HandleStopSignal;
    sigemptyset(&action.sa_mask);
    sigaction(SIGINT, &action, NULL);
    sigaction(SIGTERM, &action, NULL);
}

std::uint64_t MonotonicNs()
{
    struct timespec now;
    clock_gettime(CLOCK_MONOTONIC, &now);
    return static_cast<std::uint64_t>(now.tv_sec) * kNsPerSec +
           static_cast<std::uint64_t>(now.tv_nsec);
}

void SleepMs(std::uint32_t delay_ms)
{
    struct timespec delay;
    delay.tv_sec = static_cast<time_t>(delay_ms / 1000u);
    delay.tv_nsec = static_cast<long>((delay_ms % 1000u) * kNsPerMs);

    while (!g_stop_requested && nanosleep(&delay, &delay) < 0 && errno == EINTR) {
    }
}

bool ParseU16(const char *text, std::uint16_t &value_out)
{
    char *end = NULL;
    unsigned long value = std::strtoul(text, &end, 10);
    if (end == text || *end != '\0' || value == 0u || value > 65535u) {
        return false;
    }
    value_out = static_cast<std::uint16_t>(value);
    return true;
}

bool ParseU32(const char *text, std::uint32_t &value_out)
{
    char *end = NULL;
    unsigned long value = std::strtoul(text, &end, 10);
    if (end == text || *end != '\0' || value == 0u || value > UINT32_MAX) {
        return false;
    }
    value_out = static_cast<std::uint32_t>(value);
    return true;
}

void PrintUsage(const char *program_name)
{
    std::cerr << "Usage: " << program_name
              << " [dest_ip] [port] [count] [interval_ms] [timeout_ms] [save_path]\n"
              << "Default: dest_ip=" << kDefaultDestIp
              << " port=" << latency_wire::kDefaultPort
              << " count=" << latency_wire::kDefaultCount
              << " interval_ms=" << latency_wire::kDefaultIntervalMs
              << " timeout_ms=" << latency_wire::kDefaultTimeoutMs
              << " save_path=<none>\n";
}

class SocketFd {
public:
    explicit SocketFd(int fd = -1) : fd_(fd) {}
    ~SocketFd()
    {
        if (fd_ >= 0) {
            close(fd_);
        }
    }

    int get() const { return fd_; }

private:
    SocketFd(const SocketFd &);
    SocketFd &operator=(const SocketFd &);

    int fd_;
};

bool SetReceiveTimeout(int socket_fd, std::uint32_t timeout_ms)
{
    struct timeval timeout;
    timeout.tv_sec = static_cast<time_t>(timeout_ms / 1000u);
    timeout.tv_usec = static_cast<suseconds_t>((timeout_ms % 1000u) * 1000u);

    if (setsockopt(socket_fd, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof(timeout)) < 0) {
        std::cerr << "setsockopt SO_RCVTIMEO failed: " << std::strerror(errno) << "\n";
        return false;
    }

    return true;
}

bool MakeDestAddress(const char *dest_ip,
                     std::uint16_t dest_port,
                     struct sockaddr_in &dest_addr)
{
    std::memset(&dest_addr, 0, sizeof(dest_addr));
    dest_addr.sin_family = AF_INET;
    dest_addr.sin_port = htons(dest_port);

    if (inet_pton(AF_INET, dest_ip, &dest_addr.sin_addr) != 1) {
        std::cerr << "Invalid IPv4 address: " << dest_ip << "\n";
        return false;
    }

    return true;
}

int OpenClientSocket(std::uint32_t timeout_ms)
{
    int socket_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
    if (socket_fd < 0) {
        std::cerr << "socket failed: " << std::strerror(errno) << "\n";
        return -1;
    }

    int enable_broadcast = 1;
    if (setsockopt(socket_fd,
                   SOL_SOCKET,
                   SO_BROADCAST,
                   &enable_broadcast,
                   sizeof(enable_broadcast)) < 0) {
        std::cerr << "setsockopt SO_BROADCAST failed: " << std::strerror(errno) << "\n";
        close(socket_fd);
        return -1;
    }

    if (!SetReceiveTimeout(socket_fd, timeout_ms)) {
        close(socket_fd);
        return -1;
    }

    return socket_fd;
}

bool SendRequest(int socket_fd,
                 const struct sockaddr_in &dest_addr,
                 std::uint32_t seq,
                 std::uint64_t tx_ns)
{
    latency_wire::LatencyFrame frame = {};
    frame.magic = latency_wire::kMagic;
    frame.version = latency_wire::kVersion;
    frame.msg_type = latency_wire::kTypeRequest;
    frame.seq = seq;
    frame.master_tx_ns = tx_ns;

    const std::array<std::uint8_t, latency_wire::kFrameWireSize> packet =
        latency_wire::Encode(frame);

    ssize_t sent = sendto(socket_fd,
                          packet.data(),
                          packet.size(),
                          0,
                          reinterpret_cast<const struct sockaddr *>(&dest_addr),
                          sizeof(dest_addr));

    if (sent != static_cast<ssize_t>(packet.size())) {
        std::cerr << "sendto failed: " << std::strerror(errno) << "\n";
        return false;
    }

    return true;
}

bool ReceiveResponse(int socket_fd,
                     std::uint32_t expected_seq,
                     latency_wire::LatencyFrame &frame_out,
                     std::uint64_t &rx_ns_out,
                     struct sockaddr_in &from_addr_out)
{
    std::array<std::uint8_t, latency_wire::kFrameWireSize> packet = {};

    while (!g_stop_requested) {
        struct sockaddr_in from_addr;
        socklen_t from_len = sizeof(from_addr);

        ssize_t received = recvfrom(socket_fd,
                                    packet.data(),
                                    packet.size(),
                                    0,
                                    reinterpret_cast<struct sockaddr *>(&from_addr),
                                    &from_len);
        const std::uint64_t rx_ns = MonotonicNs();

        if (received < 0) {
            return false;
        }

        if (received != static_cast<ssize_t>(packet.size())) {
            continue;
        }

        latency_wire::LatencyFrame frame = latency_wire::Decode(packet.data());
        if (!latency_wire::IsValid(frame, latency_wire::kTypeResponse)) {
            continue;
        }

        if (frame.seq != expected_seq) {
            continue;
        }

        frame_out = frame;
        rx_ns_out = rx_ns;
        from_addr_out = from_addr;
        return true;
    }

    return false;
}

std::string AddressToText(const struct sockaddr_in &address)
{
    char buffer[INET_ADDRSTRLEN] = {};
    const char *text = inet_ntop(AF_INET, &address.sin_addr, buffer, sizeof(buffer));
    return text != NULL ? std::string(text) : std::string("unknown");
}

double NsToUs(std::uint64_t value_ns)
{
    return static_cast<double>(value_ns) / 1000.0;
}

struct DelaySample {
    std::uint64_t rtt_ns;
    std::uint64_t delay_ns;
};

std::uint64_t MedianNs(std::vector<std::uint64_t> values)
{
    if (values.empty()) {
        return 0;
    }

    std::sort(values.begin(), values.end());
    const std::size_t mid = values.size() / 2u;

    if ((values.size() % 2u) == 0u) {
        return (values[mid - 1u] + values[mid]) / 2u;
    }

    return values[mid];
}

bool SaveCalibratedDelay(const char *save_path, std::uint64_t delay_ns)
{
    if (save_path == NULL || save_path[0] == '\0') {
        return true;
    }

    std::ofstream out(save_path);
    if (!out) {
        std::cerr << "save calibrated d failed: " << save_path << "\n";
        return false;
    }

    out << delay_ns << "\n";
    return true;
}

}  // namespace

int main(int argc, char **argv)
{
    const char *dest_ip = kDefaultDestIp;
    std::uint16_t dest_port = latency_wire::kDefaultPort;
    std::uint32_t probe_count = latency_wire::kDefaultCount;
    std::uint32_t interval_ms = latency_wire::kDefaultIntervalMs;
    std::uint32_t timeout_ms = latency_wire::kDefaultTimeoutMs;
    const char *save_path = NULL;

    if (argc > 7) {
        PrintUsage(argv[0]);
        return 2;
    }

    if (argc >= 2) {
        dest_ip = argv[1];
    }

    if (argc >= 3 && !ParseU16(argv[2], dest_port)) {
        PrintUsage(argv[0]);
        return 2;
    }

    if (argc >= 4 && !ParseU32(argv[3], probe_count)) {
        PrintUsage(argv[0]);
        return 2;
    }

    if (argc >= 5 && !ParseU32(argv[4], interval_ms)) {
        PrintUsage(argv[0]);
        return 2;
    }

    if (argc >= 6 && !ParseU32(argv[5], timeout_ms)) {
        PrintUsage(argv[0]);
        return 2;
    }

    if (argc >= 7) {
        save_path = argv[6];
    }

    InstallSignalHandlers();

    struct sockaddr_in dest_addr;
    if (!MakeDestAddress(dest_ip, dest_port, dest_addr)) {
        return 1;
    }

    SocketFd socket_fd(OpenClientSocket(timeout_ms));
    if (socket_fd.get() < 0) {
        return 1;
    }

    std::cout << std::fixed << std::setprecision(3);
    std::cout << "Latency probe master -> " << dest_ip << ":" << dest_port
              << " count=" << probe_count
              << " interval=" << interval_ms << "ms"
              << " timeout=" << timeout_ms << "ms\n";

    std::uint32_t ok_count = 0;
    std::uint32_t lost_count = 0;
    std::uint64_t min_rtt_ns = std::numeric_limits<std::uint64_t>::max();
    std::uint64_t max_rtt_ns = 0;
    std::uint64_t sum_rtt_ns = 0;
    std::uint64_t min_delay_ns = std::numeric_limits<std::uint64_t>::max();
    std::uint64_t max_delay_ns = 0;
    std::uint64_t sum_delay_ns = 0;
    std::vector<DelaySample> samples;
    samples.reserve(probe_count);

    for (std::uint32_t seq = 0; seq < probe_count && !g_stop_requested; ++seq) {
        const std::uint64_t tx_ns = MonotonicNs();

        if (!SendRequest(socket_fd.get(), dest_addr, seq, tx_ns)) {
            ++lost_count;
            SleepMs(interval_ms);
            continue;
        }

        latency_wire::LatencyFrame reply = {};
        std::uint64_t rx_ns = 0;
        struct sockaddr_in from_addr;

        if (!ReceiveResponse(socket_fd.get(), seq, reply, rx_ns, from_addr)) {
            std::cout << "seq=" << seq << " timeout\n";
            ++lost_count;
            SleepMs(interval_ms);
            continue;
        }

        const std::uint64_t rtt_ns = rx_ns >= tx_ns ? rx_ns - tx_ns : 0;
        const std::uint64_t slave_process_ns =
            reply.slave_tx_ns >= reply.slave_rx_ns ?
            reply.slave_tx_ns - reply.slave_rx_ns :
            0;
        const std::uint64_t net_rtt_ns =
            rtt_ns > slave_process_ns ? rtt_ns - slave_process_ns : rtt_ns;
        const std::uint64_t fixed_delay_ns = net_rtt_ns / 2u;

        ++ok_count;
        sum_rtt_ns += rtt_ns;
        sum_delay_ns += fixed_delay_ns;
        samples.push_back(DelaySample{rtt_ns, fixed_delay_ns});
        min_rtt_ns = std::min(min_rtt_ns, rtt_ns);
        max_rtt_ns = std::max(max_rtt_ns, rtt_ns);
        min_delay_ns = std::min(min_delay_ns, fixed_delay_ns);
        max_delay_ns = std::max(max_delay_ns, fixed_delay_ns);

        std::cout << "seq=" << seq
                  << " from=" << AddressToText(from_addr)
                  << " rtt=" << NsToUs(rtt_ns) << " us"
                  << " slave_proc=" << NsToUs(slave_process_ns) << " us"
                  << " ETH_FIXED_DELAY_NS=" << fixed_delay_ns
                  << " (" << NsToUs(fixed_delay_ns) << " us)\n";

        if (seq + 1u < probe_count) {
            SleepMs(interval_ms);
        }
    }

    if (ok_count > 0u) {
        std::cout << "Summary: ok=" << ok_count
                  << " lost=" << lost_count
                  << " rtt_min/avg/max="
                  << NsToUs(min_rtt_ns) << "/"
                  << NsToUs(sum_rtt_ns / ok_count) << "/"
                  << NsToUs(max_rtt_ns) << " us"
                  << " delay_min/avg/max="
                  << NsToUs(min_delay_ns) << "/"
                  << NsToUs(sum_delay_ns / ok_count) << "/"
                  << NsToUs(max_delay_ns) << " us\n";

        std::vector<std::uint64_t> valid_delays;
        valid_delays.reserve(samples.size());

        for (std::size_t i = 0; i < samples.size(); ++i) {
            if (samples[i].rtt_ns <= kCalibrateMaxRttNs &&
                samples[i].delay_ns <= kCalibrateMaxDelayNs) {
                valid_delays.push_back(samples[i].delay_ns);
            }
        }

        const std::vector<std::uint64_t> *used_values = &valid_delays;
        std::vector<std::uint64_t> all_delays;

        if (valid_delays.empty()) {
            all_delays.reserve(samples.size());
            for (std::size_t i = 0; i < samples.size(); ++i) {
                all_delays.push_back(samples[i].delay_ns);
            }
            used_values = &all_delays;
        }

        const std::uint64_t calibrated_delay_ns = MedianNs(*used_values);
        const std::uint32_t dropped_count =
            ok_count - static_cast<std::uint32_t>(valid_delays.size());

        std::cout << "Calibration: valid=" << used_values->size()
                  << " dropped=" << dropped_count
                  << " rtt_limit=" << NsToUs(kCalibrateMaxRttNs) << " us"
                  << " delay_limit=" << NsToUs(kCalibrateMaxDelayNs) << " us\n"
                  << "D_CALIBRATED_NS=" << calibrated_delay_ns
                  << " (" << NsToUs(calibrated_delay_ns) << " us)\n";

        if (save_path != NULL) {
            if (SaveCalibratedDelay(save_path, calibrated_delay_ns)) {
                std::cout << "Saved D_CALIBRATED_NS to " << save_path << "\n";
            } else {
                return 1;
            }
        }
    } else {
        std::cout << "Summary: no response, check slave, firewall, port, and subnet.\n";
    }

    return ok_count > 0u ? 0 : 1;
}

从机

latency_slave_echo.cpp

c 复制代码
#include "latency_probe.hpp"

#include <arpa/inet.h>
#include <cerrno>
#include <csignal>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <netinet/in.h>
#include <sys/socket.h>
#include <time.h>
#include <unistd.h>

namespace {

static const std::uint64_t kNsPerSec = 1000000000ull;

volatile sig_atomic_t g_stop_requested = 0;

void HandleStopSignal(int)
{
    g_stop_requested = 1;
}

void InstallSignalHandlers()
{
    struct sigaction action;
    std::memset(&action, 0, sizeof(action));
    action.sa_handler = HandleStopSignal;
    sigemptyset(&action.sa_mask);
    sigaction(SIGINT, &action, NULL);
    sigaction(SIGTERM, &action, NULL);
}

std::uint64_t MonotonicNs()
{
    struct timespec now;
    clock_gettime(CLOCK_MONOTONIC, &now);
    return static_cast<std::uint64_t>(now.tv_sec) * kNsPerSec +
           static_cast<std::uint64_t>(now.tv_nsec);
}

bool ParseU16(const char *text, std::uint16_t &value_out)
{
    char *end = NULL;
    unsigned long value = std::strtoul(text, &end, 10);
    if (end == text || *end != '\0' || value == 0u || value > 65535u) {
        return false;
    }
    value_out = static_cast<std::uint16_t>(value);
    return true;
}

void PrintUsage(const char *program_name)
{
    std::cerr << "Usage: " << program_name << " [listen_port]\n"
              << "Default: listen_port=" << latency_wire::kDefaultPort << "\n";
}

class SocketFd {
public:
    explicit SocketFd(int fd = -1) : fd_(fd) {}
    ~SocketFd()
    {
        if (fd_ >= 0) {
            close(fd_);
        }
    }

    int get() const { return fd_; }

private:
    SocketFd(const SocketFd &);
    SocketFd &operator=(const SocketFd &);

    int fd_;
};

int OpenListenSocket(std::uint16_t listen_port)
{
    int socket_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
    if (socket_fd < 0) {
        std::cerr << "socket failed: " << std::strerror(errno) << "\n";
        return -1;
    }

    int reuse = 1;
    if (setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse)) < 0) {
        std::cerr << "setsockopt SO_REUSEADDR failed: " << std::strerror(errno) << "\n";
        close(socket_fd);
        return -1;
    }

    struct sockaddr_in listen_addr;
    std::memset(&listen_addr, 0, sizeof(listen_addr));
    listen_addr.sin_family = AF_INET;
    listen_addr.sin_addr.s_addr = htonl(INADDR_ANY);
    listen_addr.sin_port = htons(listen_port);

    if (bind(socket_fd,
             reinterpret_cast<const struct sockaddr *>(&listen_addr),
             sizeof(listen_addr)) < 0) {
        std::cerr << "bind port " << listen_port << " failed: " << std::strerror(errno) << "\n";
        close(socket_fd);
        return -1;
    }

    return socket_fd;
}

bool SendResponse(int socket_fd,
                  const struct sockaddr_in &master_addr,
                  const latency_wire::LatencyFrame &request,
                  std::uint64_t slave_rx_ns)
{
    latency_wire::LatencyFrame response = request;
    response.msg_type = latency_wire::kTypeResponse;
    response.slave_rx_ns = slave_rx_ns;
    response.slave_tx_ns = MonotonicNs();

    const std::array<std::uint8_t, latency_wire::kFrameWireSize> packet =
        latency_wire::Encode(response);

    ssize_t sent = sendto(socket_fd,
                          packet.data(),
                          packet.size(),
                          0,
                          reinterpret_cast<const struct sockaddr *>(&master_addr),
                          sizeof(master_addr));

    if (sent != static_cast<ssize_t>(packet.size())) {
        std::cerr << "sendto failed: " << std::strerror(errno) << "\n";
        return false;
    }

    return true;
}

std::string AddressToText(const struct sockaddr_in &address)
{
    char buffer[INET_ADDRSTRLEN] = {};
    const char *text = inet_ntop(AF_INET, &address.sin_addr, buffer, sizeof(buffer));
    return text != NULL ? std::string(text) : std::string("unknown");
}

}  // namespace

int main(int argc, char **argv)
{
    std::uint16_t listen_port = latency_wire::kDefaultPort;

    if (argc > 2) {
        PrintUsage(argv[0]);
        return 2;
    }

    if (argc == 2 && !ParseU16(argv[1], listen_port)) {
        PrintUsage(argv[0]);
        return 2;
    }

    InstallSignalHandlers();

    SocketFd socket_fd(OpenListenSocket(listen_port));
    if (socket_fd.get() < 0) {
        return 1;
    }

    std::cout << "Latency probe slave echo listening on 0.0.0.0:" << listen_port << "\n"
              << "Press Ctrl+C to stop.\n";

    std::uint64_t echo_count = 0;

    while (!g_stop_requested) {
        std::array<std::uint8_t, latency_wire::kFrameWireSize> packet = {};
        struct sockaddr_in master_addr;
        socklen_t master_len = sizeof(master_addr);

        ssize_t received = recvfrom(socket_fd.get(),
                                    packet.data(),
                                    packet.size(),
                                    0,
                                    reinterpret_cast<struct sockaddr *>(&master_addr),
                                    &master_len);
        const std::uint64_t slave_rx_ns = MonotonicNs();

        if (received < 0) {
            if (errno == EINTR) {
                continue;
            }
            std::cerr << "recvfrom failed: " << std::strerror(errno) << "\n";
            break;
        }

        if (received != static_cast<ssize_t>(packet.size())) {
            continue;
        }

        const latency_wire::LatencyFrame request = latency_wire::Decode(packet.data());
        if (!latency_wire::IsValid(request, latency_wire::kTypeRequest)) {
            continue;
        }

        if (SendResponse(socket_fd.get(), master_addr, request, slave_rx_ns)) {
            ++echo_count;
            if (echo_count == 1u || echo_count % 100u == 0u) {
                std::cout << "echo=" << echo_count
                          << " from=" << AddressToText(master_addr)
                          << ":" << ntohs(master_addr.sin_port)
                          << " seq=" << request.seq << "\n";
            }
        }
    }

    std::cout << "Stopped.\n";
    return 0;
}

主机打印

bash 复制代码
Latency probe master -> 255.255.255.255:5006 count=100 interval=30ms timeout=1000ms
seq=0 from=192.168.3.211 rtt=434.757 us slave_proc=4.374 us ETH_FIXED_DELAY_NS=215191 (215.191 us)
seq=1 from=192.168.3.211 rtt=228.317 us slave_proc=3.934 us ETH_FIXED_DELAY_NS=112191 (112.191 us)
seq=2 from=192.168.3.211 rtt=238.787 us slave_proc=3.550 us ETH_FIXED_DELAY_NS=117618 (117.618 us)
seq=3 from=192.168.3.211 rtt=231.195 us slave_proc=3.117 us ETH_FIXED_DELAY_NS=114039 (114.039 us)
seq=4 from=192.168.3.211 rtt=208.211 us slave_proc=3.323 us ETH_FIXED_DELAY_NS=102444 (102.444 us)
seq=5 from=192.168.3.211 rtt=170.040 us slave_proc=3.185 us ETH_FIXED_DELAY_NS=83427 (83.427 us)
seq=6 from=192.168.3.211 rtt=177.130 us slave_proc=3.206 us ETH_FIXED_DELAY_NS=86962 (86.962 us)
seq=7 from=192.168.3.211 rtt=182.046 us slave_proc=2.940 us ETH_FIXED_DELAY_NS=89553 (89.553 us

从机打印

bash 复制代码
root@ant:~# ./ming_net_sync_petalinux
Latency probe slave echo listening on 0.0.0.0:5006
Press Ctrl+C to stop.
echo=1 from=192.168.3.4:40495 seq=0
echo=100 from=192.168.3.4:40495 seq=99

主从正式工作代码

主机周期广播同步帧

从机调整下次产生脉冲的时刻

从机统计过去100次主从时差的中位数(整体减去了首次时差)

实测PC做主机和zynq做主机区别不大

PC主机定时器是 用clock_gettime

ZYNQ主机定时器是从PL读的

帧格式

c 复制代码
#pragma once

#include <array>
#include <cstddef>
#include <cstdint>

namespace sync_wire {

// 同步帧固定魔数,接收端用它过滤非本协议 UDP 包。
    static const std::uint16_t kMagic = 0x4A42u;
// 当前同步协议版本号,主从版本不一致时可据此拒收。
    static const std::uint16_t kVersion = 1u;
// 主机发送同步帧的周期,单位 ns,当前为 30 ms。
    static const std::uint64_t kSendPeriodNs = 30000000ull;
// pulse_out 的目标输出周期,单位 ns,当前为 1 ms。
    static const std::uint64_t kPulsePeriodNs = 1000000ull;
// 同步帧在线上的固定长度:2+2+4+8+8+4+4 = 32 字节。
    static const std::size_t kFrameWireSize = 32u;

// UDP 同步帧的逻辑字段。线上格式固定为小端 32 字节,由 Encode() 手动编码。
struct SyncFrame {
    // 固定魔数,用于判断是不是本协议的同步包。
    std::uint16_t magic;

    // 协议版本,用于识别主从程序的协议兼容性。
    std::uint16_t version;

    // 同步包序号,用于从机识别旧包或乱序包。
    std::uint32_t seq;

    // 主机发送该同步包时的主机时间,单位 ns。
    std::uint64_t master_tx_ns;

    // 主机期望所有从机对齐的下一次脉冲边沿时间,单位 ns。
    std::uint64_t next_edge_ns;

    // 主机同步包发送周期,单位 ns。
    std::uint32_t sync_period_ns;

    // pulse_out 脉冲周期,单位 ns。
    std::uint32_t pulse_period_ns;
};

inline std::   AlignUpNs(std::uint64_t value, std::uint64_t period)
{
    return ((value + period - 1u) / period) * period;
}

inline void PutLe16(std::uint8_t *dst, std::uint16_t value)
{
    dst[0] = static_cast<std::uint8_t>(value & 0xffu);
    dst[1] = static_cast<std::uint8_t>((value >> 8) & 0xffu);
}

inline void PutLe32(std::uint8_t *dst, std::uint32_t value)
{
    dst[0] = static_cast<std::uint8_t>(value & 0xffu);
    dst[1] = static_cast<std::uint8_t>((value >> 8) & 0xffu);
    dst[2] = static_cast<std::uint8_t>((value >> 16) & 0xffu);
    dst[3] = static_cast<std::uint8_t>((value >> 24) & 0xffu);
}

inline void PutLe64(std::uint8_t *dst, std::uint64_t value)
{
    PutLe32(dst, static_cast<std::uint32_t>(value & 0xffffffffull));
    PutLe32(dst + 4, static_cast<std::uint32_t>(value >> 32));
}

inline std::array<std::uint8_t, kFrameWireSize> Encode(const SyncFrame &frame)
{
    std::array<std::uint8_t, kFrameWireSize> out = {};
    PutLe16(out.data() + 0, frame.magic);
    PutLe16(out.data() + 2, frame.version);
    PutLe32(out.data() + 4, frame.seq);
    PutLe64(out.data() + 8, frame.master_tx_ns);
    PutLe64(out.data() + 16, frame.next_edge_ns);
    PutLe32(out.data() + 24, frame.sync_period_ns);
    PutLe32(out.data() + 28, frame.pulse_period_ns);
    return out;
}

}  // namespace sync_wire

PC主机

c 复制代码
#include "sync_frame.hpp"

#include <arpa/inet.h>
#include <cerrno>
#include <csignal>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <netinet/in.h>
#include <sys/socket.h>
#include <time.h>
#include <unistd.h>

namespace {

static const char *kDefaultDestIp = "255.255.255.255";
//默认目标 UDP 端口
static const std::uint16_t kDefaultDestPort = 5005u;
//毫秒转纳秒
static const std::uint64_t kNsPerMs = 1000000ull;
//秒转纳秒
static const std::uint64_t kNsPerSec = 1000000000ull;

volatile sig_atomic_t g_stop_requested = 0;

void HandleStopSignal(int)
{
    g_stop_requested = 1;
}

void InstallSignalHandlers()
{
    struct sigaction action;
    std::memset(&action, 0, sizeof(action));
    action.sa_handler = HandleStopSignal;
    sigemptyset(&action.sa_mask);
    sigaction(SIGINT, &action, NULL);
    sigaction(SIGTERM, &action, NULL);
}

std::uint64_t MonotonicNs()
{
    struct timespec now;
    clock_gettime(CLOCK_MONOTONIC, &now);
    return static_cast<std::uint64_t>(now.tv_sec) * kNsPerSec +
           static_cast<std::uint64_t>(now.tv_nsec);
}

void SleepUntilNs(std::uint64_t target_ns)
{
    struct timespec target;
    target.tv_sec = static_cast<time_t>(target_ns / kNsPerSec);
    target.tv_nsec = static_cast<long>(target_ns % kNsPerSec);

    while (!g_stop_requested) {
        int result = clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &target, NULL);
        if (result == 0 || result != EINTR) {
            return;
        }
    }
}

bool ParseU16(const char *text, std::uint16_t &value_out)
{
    char *end = NULL;
    unsigned long value = std::strtoul(text, &end, 10);
    if (end == text || *end != '\0' || value == 0u || value > 65535u) {
        return false;
    }
    value_out = static_cast<std::uint16_t>(value);
    return true;
}

bool ParsePeriodMs(const char *text, std::uint64_t &period_ns_out)
{
    char *end = NULL;
    unsigned long value_ms = std::strtoul(text, &end, 10);
    if (end == text || *end != '\0' || value_ms == 0u) {
        return false;
    }

    period_ns_out = static_cast<std::uint64_t>(value_ms) * kNsPerMs;
    return period_ns_out <= UINT32_MAX;
}

bool ParsePeriodNs(const char *text, std::uint64_t &period_ns_out)
{
    char *end = NULL;
    unsigned long long value_ns = std::strtoull(text, &end, 10);
    if (end == text || *end != '\0' || value_ns == 0ull || value_ns > UINT32_MAX) {
        return false;
    }

    period_ns_out = static_cast<std::uint64_t>(value_ns);
    return true;
}

void PrintUsage(const char *program_name)
{
    std::cerr << "Usage: " << program_name
              << " [broadcast_ip] [port] [sync_period_ms] [pulse_period_ns]\n"
              << "Default: " << program_name << " " << kDefaultDestIp << " "
              << kDefaultDestPort << " 30 " << sync_wire::kPulsePeriodNs << "\n";
}

class SocketFd {
public:
    explicit SocketFd(int fd = -1) : fd_(fd) {}
    ~SocketFd()
    {
        if (fd_ >= 0) {
            close(fd_);
        }
    }

    int get() const { return fd_; }

private:
    SocketFd(const SocketFd &);
    SocketFd &operator=(const SocketFd &);

    int fd_;
};

bool MakeDestAddress(const char *dest_ip,
                     std::uint16_t dest_port,
                     struct sockaddr_in &dest_addr)
{
    std::memset(&dest_addr, 0, sizeof(dest_addr));
    dest_addr.sin_family = AF_INET;
    dest_addr.sin_port = htons(dest_port);

    if (inet_pton(AF_INET, dest_ip, &dest_addr.sin_addr) != 1) {
        std::cerr << "Invalid IPv4 address: " << dest_ip << "\n";
        return false;
    }

    return true;
}

int OpenBroadcastSocket()
{
    int socket_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
    if (socket_fd < 0) {
        std::cerr << "socket failed: " << std::strerror(errno) << "\n";
        return -1;
    }

    int enable_broadcast = 1;
    if (setsockopt(socket_fd,
                   SOL_SOCKET,
                   SO_BROADCAST,
                   &enable_broadcast,
                   sizeof(enable_broadcast)) < 0) {
        std::cerr << "setsockopt SO_BROADCAST failed: " << std::strerror(errno) << "\n";
        close(socket_fd);
        return -1;
    }

    return socket_fd;
}

bool SendSyncFrame(int socket_fd,
                   const struct sockaddr_in &dest_addr,
                   const sync_wire::SyncFrame &frame)
{
    const std::array<std::uint8_t, sync_wire::kFrameWireSize> packet =
        sync_wire::Encode(frame);

    ssize_t sent = sendto(socket_fd,
                          packet.data(),
                          packet.size(),
                          0,
                          reinterpret_cast<const struct sockaddr *>(&dest_addr),
                          sizeof(dest_addr));

    if (sent != static_cast<ssize_t>(packet.size())) {
        std::cerr << "sendto failed: " << std::strerror(errno) << "\n";
        return false;
    }

    return true;
}

}  // namespace

int main(int argc, char **argv)
{
    const char *dest_ip = kDefaultDestIp;
    std::uint16_t dest_port = kDefaultDestPort;
    std::uint64_t send_period_ns = sync_wire::kSendPeriodNs;
    std::uint64_t pulse_period_ns = sync_wire::kPulsePeriodNs;

    if (argc > 5) {
        PrintUsage(argv[0]);
        return 2;
    }

    if (argc >= 2) {
        dest_ip = argv[1];
    }

    if (argc >= 3 && !ParseU16(argv[2], dest_port)) {
        PrintUsage(argv[0]);
        return 2;
    }

    if (argc >= 4 && !ParsePeriodMs(argv[3], send_period_ns)) {
        PrintUsage(argv[0]);
        return 2;
    }

    if (argc >= 5 && !ParsePeriodNs(argv[4], pulse_period_ns)) {
        PrintUsage(argv[0]);
        return 2;
    }

    InstallSignalHandlers();

    struct sockaddr_in dest_addr;
    if (!MakeDestAddress(dest_ip, dest_port, dest_addr)) {
        return 1;
    }

    SocketFd socket_fd(OpenBroadcastSocket());
    if (socket_fd.get() < 0) {
        return 1;
    }

    std::cout << "UDP sync master sending to " << dest_ip << ":" << dest_port
              << " every " << (send_period_ns / kNsPerMs) << " ms\n"
              << "Pulse period: " << pulse_period_ns << " ns ("
              << (kNsPerSec / pulse_period_ns) << " Hz if period divides 1s)\n"
              << "Frame: magic=0x" << std::hex << sync_wire::kMagic << std::dec
              << " version=" << sync_wire::kVersion
              << " size=" << sync_wire::kFrameWireSize << " bytes\n"
              << "Master clock: PC Linux CLOCK_MONOTONIC\n"
              << "Press Ctrl+C to stop.\n";

    std::uint32_t seq = 0;
    std::uint64_t sent_count = 0;
    std::uint64_t last_log_ns = 0;
    std::uint64_t next_send_ns = MonotonicNs();

    while (!g_stop_requested) {
        SleepUntilNs(next_send_ns);
        if (g_stop_requested) {
            break;
        }

        const std::uint64_t tx_ns = MonotonicNs();
        sync_wire::SyncFrame frame = {};
        frame.magic = sync_wire::kMagic;
        frame.version = sync_wire::kVersion;
        frame.seq = seq++;
        frame.master_tx_ns = tx_ns;

        // 主站发的是"主站 1kHz 时间网格参考边沿",不是固定向后延迟很多毫秒的边沿。
        // 从站收到后会按自己的 PL time_ns 把这个参考边沿整周期推到安全的未来时刻。
        // 这样多个从站保持同一个相位网格,同时避免每次同步都跳过一串 1kHz 脉冲。
        frame.next_edge_ns = sync_wire::AlignUpNs(tx_ns, pulse_period_ns);
        frame.sync_period_ns = static_cast<std::uint32_t>(send_period_ns);
        frame.pulse_period_ns = static_cast<std::uint32_t>(pulse_period_ns);

        if (SendSyncFrame(socket_fd.get(), dest_addr, frame)) {
            sent_count++;
        }

        if (sent_count == 1u || tx_ns - last_log_ns >= kNsPerSec) {
            std::cout << "sent=" << sent_count
                      << " seq=" << frame.seq
                      << " master_tx_ns=" << frame.master_tx_ns
                      << " next_edge_ns=" << frame.next_edge_ns
                      << " pulse_period_ns=" << frame.pulse_period_ns << "\n";
            last_log_ns = tx_ns;
        }

        next_send_ns += send_period_ns;
        const std::uint64_t after_send_ns = MonotonicNs();
        while (next_send_ns <= after_send_ns) {
            next_send_ns += send_period_ns;
        }
    }

    std::cout << "Stopped.\n";
    return 0;
}

ZYNQ主机

c 复制代码
#include "sync_frame.hpp"

#include <arpa/inet.h>
#include <cerrno>
#include <csignal>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <fcntl.h>
#include <iostream>
#include <netinet/in.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <unistd.h>

namespace {

static const char *kDefaultDestIp = "255.255.255.255";
static const std::uint16_t kDefaultDestPort = 5005u;
static const std::uintptr_t kDefaultNsSyncBaseAddr = 0x43C00000u;
static const std::size_t kRegisterSpanBytes = 0x100u;
static const std::uint64_t kNsPerMs = 1000000ull;
static const std::uint64_t kNsPerSec = 1000000000ull;
static const std::uint64_t kMasterPulseStartAheadNs = 1000000ull;
static const std::uint64_t kSyncEdgeLeadNs = 10000000ull;

static const std::uint32_t kRegControl = 0x00u;
static const std::uint32_t kRegTimeLatchLow = 0x08u;
static const std::uint32_t kRegTimeLatchHigh = 0x0cu;
static const std::uint32_t kRegNextEdgeCfgLow = 0x10u;
static const std::uint32_t kRegNextEdgeCfgHigh = 0x14u;
static const std::uint32_t kRegCommand = 0x18u;
static const std::uint32_t kRegVersion = 0x1cu;
static const std::uint32_t kRegActiveEdgeLow = 0x20u;
static const std::uint32_t kRegActiveEdgeHigh = 0x24u;

static const std::uint32_t kControlCounterEnable = 0x00000001u;
static const std::uint32_t kControlPulseEnable = 0x00000002u;
static const std::uint32_t kCmdLatchTime = 0x00000001u;
static const std::uint32_t kCmdApplyNextEdge = 0x00000002u;

volatile sig_atomic_t g_stop_requested = 0;

void HandleStopSignal(int)
{
    g_stop_requested = 1;
}

void InstallSignalHandlers()
{
    struct sigaction action;
    std::memset(&action, 0, sizeof(action));
    action.sa_handler = HandleStopSignal;
    sigemptyset(&action.sa_mask);
    sigaction(SIGINT, &action, NULL);
    sigaction(SIGTERM, &action, NULL);
}

class DevMemMap {
public:
    DevMemMap() :
        fd_(-1),
        mapped_base_(MAP_FAILED),
        mapped_length_(0),
        regs_(NULL)
    {
    }

    ~DevMemMap()
    {
        Close();
    }

    bool Open(std::uintptr_t base_addr, std::size_t span_bytes)
    {
        const long page_size_long = sysconf(_SC_PAGESIZE);
        if (page_size_long <= 0) {
            std::cerr << "sysconf(_SC_PAGESIZE) failed: " << std::strerror(errno) << "\n";
            return false;
        }

        const std::uintptr_t page_size = static_cast<std::uintptr_t>(page_size_long);
        const std::uintptr_t page_mask = ~(page_size - 1u);
        const std::uintptr_t page_base = base_addr & page_mask;
        const std::uintptr_t page_offset = base_addr - page_base;
        mapped_length_ = RoundUp(page_offset + span_bytes, page_size);

        fd_ = open("/dev/mem", O_RDWR | O_SYNC);
        if (fd_ < 0) {
            std::cerr << "open /dev/mem failed: " << std::strerror(errno) << "\n";
            return false;
        }

        mapped_base_ = mmap(NULL,
                            mapped_length_,
                            PROT_READ | PROT_WRITE,
                            MAP_SHARED,
                            fd_,
                            static_cast<off_t>(page_base));
        if (mapped_base_ == MAP_FAILED) {
            std::cerr << "mmap 0x" << std::hex << page_base << std::dec
                      << " failed: " << std::strerror(errno) << "\n";
            close(fd_);
            fd_ = -1;
            return false;
        }

        regs_ = reinterpret_cast<volatile std::uint32_t *>(
            static_cast<char *>(mapped_base_) + page_offset);
        return true;
    }

    void Close()
    {
        if (mapped_base_ != MAP_FAILED) {
            munmap(mapped_base_, mapped_length_);
            mapped_base_ = MAP_FAILED;
            mapped_length_ = 0;
        }

        if (fd_ >= 0) {
            close(fd_);
            fd_ = -1;
        }

        regs_ = NULL;
    }

    void Write32(std::uint32_t offset, std::uint32_t value)
    {
        regs_[offset / sizeof(std::uint32_t)] = value;
    }

    std::uint32_t Read32(std::uint32_t offset) const
    {
        return regs_[offset / sizeof(std::uint32_t)];
    }

private:
    static std::size_t RoundUp(std::size_t value, std::size_t alignment)
    {
        return (value + alignment - 1u) & ~(alignment - 1u);
    }

    DevMemMap(const DevMemMap &);
    DevMemMap &operator=(const DevMemMap &);

    int fd_;
    void *mapped_base_;
    std::size_t mapped_length_;
    volatile std::uint32_t *regs_;
};

class SocketFd {
public:
    explicit SocketFd(int fd = -1) : fd_(fd) {}
    ~SocketFd()
    {
        if (fd_ >= 0) {
            close(fd_);
        }
    }

    int get() const { return fd_; }

private:
    SocketFd(const SocketFd &);
    SocketFd &operator=(const SocketFd &);

    int fd_;
};

std::uint64_t MakeU64(std::uint32_t high, std::uint32_t low)
{
    return (static_cast<std::uint64_t>(high) << 32) | static_cast<std::uint64_t>(low);
}

std::uint64_t ReadPlTimeNs(DevMemMap *registers)
{
    registers->Write32(kRegCommand, kCmdLatchTime);
    const std::uint32_t low = registers->Read32(kRegTimeLatchLow);
    const std::uint32_t high = registers->Read32(kRegTimeLatchHigh);
    return MakeU64(high, low);
}

std::uint64_t ReadActiveEdgeNs(const DevMemMap &registers)
{
    const std::uint32_t low = registers.Read32(kRegActiveEdgeLow);
    const std::uint32_t high = registers.Read32(kRegActiveEdgeHigh);
    return MakeU64(high, low);
}

void WriteNextEdgeNs(DevMemMap *registers, std::uint64_t next_edge_ns)
{
    registers->Write32(kRegNextEdgeCfgLow, static_cast<std::uint32_t>(next_edge_ns));
    registers->Write32(kRegNextEdgeCfgHigh, static_cast<std::uint32_t>(next_edge_ns >> 32));
    registers->Write32(kRegCommand, kCmdApplyNextEdge);
}

bool CheckPlVersion(const DevMemMap &registers, std::uintptr_t base_addr)
{
    const std::uint32_t version = registers.Read32(kRegVersion);
    std::cout << "PL VERSION = 0x" << std::hex << version << std::dec << "\n";

    if (version == 0u || version == 0xffffffffu) {
        std::cerr << "ERROR: AXI read failed. Check PL base address 0x"
                  << std::hex << base_addr << std::dec << " and bitstream.\n";
        return false;
    }

    return true;
}

std::uint64_t ScheduleMasterPulse(DevMemMap *registers, std::uint64_t pulse_period_ns)
{
    registers->Write32(kRegControl, kControlCounterEnable);
    usleep(1000);

    const std::uint64_t now_ns = ReadPlTimeNs(registers);
    const std::uint64_t first_edge_ns =
        sync_wire::AlignUpNs(now_ns + kMasterPulseStartAheadNs, pulse_period_ns);

    WriteNextEdgeNs(registers, first_edge_ns);
    registers->Write32(kRegControl, kControlCounterEnable | kControlPulseEnable);
    return first_edge_ns;
}

std::uint64_t PickFutureMasterEdgeNs(const DevMemMap &registers,
                                     std::uint64_t tx_ns,
                                     std::uint64_t pulse_period_ns)
{
    std::uint64_t edge_ns = ReadActiveEdgeNs(registers);
    const std::uint64_t min_edge_ns = tx_ns + kSyncEdgeLeadNs;

    if (edge_ns == 0u) {
        edge_ns = sync_wire::AlignUpNs(min_edge_ns, pulse_period_ns);
    }

    while (edge_ns < min_edge_ns) {
        edge_ns += pulse_period_ns;
    }

    return edge_ns;
}

void WaitUntilPlNs(DevMemMap *registers, std::uint64_t target_ns)
{
    while (!g_stop_requested) {
        const std::uint64_t now_ns = ReadPlTimeNs(registers);
        if (now_ns >= target_ns) {
            return;
        }

        const std::uint64_t remain_ns = target_ns - now_ns;
        if (remain_ns > 2000000ull) {
            usleep(1000);
        } else if (remain_ns > 200000ull) {
            usleep(50);
        }
    }
}

bool MakeDestAddress(const char *dest_ip,
                     std::uint16_t dest_port,
                     struct sockaddr_in &dest_addr)
{
    std::memset(&dest_addr, 0, sizeof(dest_addr));
    dest_addr.sin_family = AF_INET;
    dest_addr.sin_port = htons(dest_port);

    if (inet_pton(AF_INET, dest_ip, &dest_addr.sin_addr) != 1) {
        std::cerr << "Invalid IPv4 address: " << dest_ip << "\n";
        return false;
    }

    return true;
}

int OpenBroadcastSocket()
{
    int socket_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
    if (socket_fd < 0) {
        std::cerr << "socket failed: " << std::strerror(errno) << "\n";
        return -1;
    }

    int enable_broadcast = 1;
    if (setsockopt(socket_fd,
                   SOL_SOCKET,
                   SO_BROADCAST,
                   &enable_broadcast,
                   sizeof(enable_broadcast)) < 0) {
        std::cerr << "setsockopt SO_BROADCAST failed: " << std::strerror(errno) << "\n";
        close(socket_fd);
        return -1;
    }

    return socket_fd;
}

bool SendSyncFrame(int socket_fd,
                   const struct sockaddr_in &dest_addr,
                   const sync_wire::SyncFrame &frame)
{
    const std::array<std::uint8_t, sync_wire::kFrameWireSize> packet =
        sync_wire::Encode(frame);

    ssize_t sent = sendto(socket_fd,
                          packet.data(),
                          packet.size(),
                          0,
                          reinterpret_cast<const struct sockaddr *>(&dest_addr),
                          sizeof(dest_addr));

    if (sent != static_cast<ssize_t>(packet.size())) {
        std::cerr << "sendto failed: " << std::strerror(errno) << "\n";
        return false;
    }

    return true;
}

}  // namespace

int main()
{
    const char *dest_ip = kDefaultDestIp;
    const std::uint16_t dest_port = kDefaultDestPort;
    const std::uint64_t send_period_ns = sync_wire::kSendPeriodNs;
    const std::uint64_t pulse_period_ns = sync_wire::kPulsePeriodNs;
    const std::uintptr_t base_addr = kDefaultNsSyncBaseAddr;

    InstallSignalHandlers();

    DevMemMap registers;
    if (!registers.Open(base_addr, kRegisterSpanBytes)) {
        return 1;
    }

    if (!CheckPlVersion(registers, base_addr)) {
        return 1;
    }

    const std::uint64_t master_first_edge_ns =
        ScheduleMasterPulse(&registers, pulse_period_ns);

    struct sockaddr_in dest_addr;
    if (!MakeDestAddress(dest_ip, dest_port, dest_addr)) {
        return 1;
    }

    SocketFd socket_fd(OpenBroadcastSocket());
    if (socket_fd.get() < 0) {
        return 1;
    }

    std::cout << "UDP sync master broadcasting to " << dest_ip << ":" << dest_port
              << " every " << (send_period_ns / kNsPerMs) << " ms\n"
              << "Pulse period: " << pulse_period_ns << " ns ("
              << (kNsPerSec / pulse_period_ns) << " Hz if period divides 1s)\n"
              << "Frame: magic=0x" << std::hex << sync_wire::kMagic << std::dec
              << " version=" << sync_wire::kVersion
              << " size=" << sync_wire::kFrameWireSize << " bytes\n"
              << "Master clock: PL 64-bit time_ns at base 0x"
              << std::hex << base_addr << std::dec << "\n"
              << "Master local pulse_out is enabled, first_edge_ns="
              << master_first_edge_ns << "\n"
              << "UDP next_edge_ns comes from PL active_edge grid with "
              << (kSyncEdgeLeadNs / kNsPerMs) << " ms lead\n"
              << "Press Ctrl+C to stop.\n";

    std::uint32_t seq = 0;
    std::uint64_t sent_count = 0;
    std::uint64_t last_log_ns = 0;
    std::uint64_t next_send_ns = ReadPlTimeNs(&registers);

    while (!g_stop_requested) {
        WaitUntilPlNs(&registers, next_send_ns);
        if (g_stop_requested) {
            break;
        }

        const std::uint64_t tx_ns = ReadPlTimeNs(&registers);
        sync_wire::SyncFrame frame = {};
        frame.magic = sync_wire::kMagic;
        frame.version = sync_wire::kVersion;
        frame.seq = seq++;
        frame.master_tx_ns = tx_ns;

        // 主站发的是"主站 1kHz 时间网格参考边沿",不是固定向后延迟很多毫秒的边沿。
        // 从站收到后会按自己的 PL time_ns 把这个参考边沿整周期推到安全的未来时刻。
        // 这样多个从站保持同一个相位网格,同时避免每次同步都跳过一串 1kHz 脉冲。
        frame.next_edge_ns = PickFutureMasterEdgeNs(registers,
                                                    tx_ns,
                                                    pulse_period_ns);
        frame.sync_period_ns = static_cast<std::uint32_t>(send_period_ns);
        frame.pulse_period_ns = static_cast<std::uint32_t>(pulse_period_ns);

        if (SendSyncFrame(socket_fd.get(), dest_addr, frame)) {
            sent_count++;
        }

        if (sent_count == 1u || tx_ns - last_log_ns >= kNsPerSec) {
            std::cout << "sent=" << sent_count
                      << " seq=" << frame.seq
                      << " master_tx_ns=" << frame.master_tx_ns
                      << " next_edge_ns=" << frame.next_edge_ns
                      << " pulse_period_ns=" << frame.pulse_period_ns << "\n";
            last_log_ns = tx_ns;
        }

        next_send_ns += send_period_ns;
        const std::uint64_t after_send_ns = ReadPlTimeNs(&registers);
        while (next_send_ns <= after_send_ns) {
            next_send_ns += send_period_ns;
        }
    }

    std::cout << "Stopped.\n";
    return 0;
}

zynq从机

在计算主从时差,使用了固定的时延 kDefaultEthFixedDelayNs= 95000ll; (95ms)

这个数没影响从机们的脉冲边沿对齐

slave_master_diff_median_us 计算过去主从时差中位数

因为主从时钟不在同一个时钟域

测得 slave_master_diff_median_us 基本在均匀单调变化

main.cpp

c 复制代码
#include <arpa/inet.h>
#include <algorithm>
#include <cerrno>
#include <csignal>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <fcntl.h>
#include <netinet/in.h>
#include <poll.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <unistd.h>

// ============================================================
// Linux UDP 从机程序
// ------------------------------------------------------------
// 作用:
//   1. 通过 UDP 接收主机发来的同步包 SyncFrame。
//   2. 通过 /dev/mem 访问 PL 里的 AXI-Lite 寄存器。
//   3. 收到 UDP 包后,立即读取 PL 的 64 位 time_ns 作为从机接收时刻。
//   4. 根据主机时间、固定链路延时 d、从机 PL 时间,计算 PL 下次 pulse_out 时刻。
//   5. 把计算出的 next_edge_ns 写入 PL,由 PL 在精确时刻输出 pulse_out。
//
// 重要原则:
//   - 从机同步时间基准只使用 PL 的 64 位 time_ns。
//   - Linux 不提供同步时间,只负责收 UDP、算数、写寄存器。
//   - Linux 调度会影响"recvfrom 返回到读 PL 时间"的延迟,所以极限精度
//     还需要硬件 RX timestamp;但当前版本先用软件方式验证链路和逻辑。
// ============================================================

// PL AXI-Lite 默认基地址。Vivado Address Editor 里如果改了地址,这里运行参数也要改。
static const std::uintptr_t kDefaultNsSyncBaseAddr = 0x43C00000u;

// 映射 AXI-Lite 寄存器窗口大小。当前寄存器只用到 0x24,映射 0x100 足够。
static const std::size_t kRegisterSpanBytes = 0x100u;

// UDP 同步端口。主机往这个端口发同步包,从机绑定 0.0.0.0:5005 接收。
static const std::uint16_t kDefaultSyncPort = 5005u;

// 默认固定以太网单向延时 d,单位 ns。
// 这个值来自主机 RTT 测试的大致估计,实际项目建议开机自动测试后传进来。
static const std::int64_t kDefaultEthFixedDelayNs = 95000ll;

// 默认 pulse 周期,单位 ns。1,000,000 ns = 1 ms。
// 如果主机同步包里 pulse_period_ns 为 0,就使用这个默认值。
static const std::uint64_t kDefaultPulsePeriodNs = 1000000ull;

// 为了避免"刚写完 next_edge,PL 已经错过这个时刻",要求下次脉冲至少在当前 PL 时间后 1ms。
static const std::uint64_t kMinScheduleAheadNs = 1000000ull;

// PL 计数器当前按 10ns 一个 tick 设计,所以写给 PL 的脉冲时刻向上对齐到 10ns。
static const std::uint64_t kPlTickNs = 10ull;

// PS 调试输出翻转周期。这里也用 PL time_ns 计时,不使用 Linux 时钟。
static const std::uint64_t kDebugTogglePeriodNs = 500000000ull;

// 日志打印节流周期。避免每个 UDP 包都打印,把终端刷爆。
static const std::uint64_t kLogPeriodNs = 1000000000ull;

// 保存"从机 PL 接收时间戳 - 主机发送时间戳"的历史样本,用来计算中位数。
// 100 个样本在 30ms 同步周期下约等于最近 3 秒,现场观察响应更快。
static const std::size_t kTimestampDiffHistorySize = 100u;

// poll 等待 UDP 的超时时间。即使没有 UDP 包,也会周期性醒来,用 PL 时间翻转 ps_debug_out。
static const int kIdlePollTimeoutMs = 10;

// 同步包头校验字段,防止误把其他 UDP 数据当成同步包。
static const std::uint16_t kSyncMagic = 0x4A42u;

// 同步协议版本号。主从版本不一致时,从机会丢弃该包。
static const std::uint16_t kSyncVersion = 1u;

// 当前 SyncFrame 线上的固定长度:2+2+4+8+8+4+4 = 32 字节。
static const std::size_t kSyncFrameWireSize = 32u;

// ============================================================
// PL AXI-Lite 寄存器偏移
// ------------------------------------------------------------
// 这些偏移必须和 verilog/axi_lite_ns_sync_pulse.v 里的寄存器表保持一致。
// ============================================================

// CONTROL:
//   bit0 = counter_enable,允许 PL 64 位 time_ns 计数器运行
//   bit1 = pulse_enable,允许 PL 到点输出 pulse_out
//   bit2 = ps_debug_out,PS 软件控制的调试 IO
static const std::uint32_t kRegControl = 0x00u;

// STATUS: 只读状态寄存器,包含 pulse_out、counter_en、pulse_en 等状态位。
static const std::uint32_t kRegStatus = 0x04u;

// TIME_LATCH_LOW/HIGH:
//   PS 写 COMMAND.bit0 后,PL 把当前 64 位 time_ns 锁存到这两个寄存器。
//   PS 读这两个寄存器,得到同一时刻的稳定 64 位时间。
static const std::uint32_t kRegTimeLatchLow = 0x08u;
static const std::uint32_t kRegTimeLatchHigh = 0x0cu;

// NEXT_EDGE_CFG_LOW/HIGH:
//   PS 先把"下次脉冲时刻"写到这两个配置寄存器。
//   再写 COMMAND.bit1,PL 才把配置值应用到 active next_edge_ns。
static const std::uint32_t kRegNextEdgeCfgLow = 0x10u;
static const std::uint32_t kRegNextEdgeCfgHigh = 0x14u;

// COMMAND:
//   bit0 = latch time_ns
//   bit1 = apply next_edge_cfg_ns
// 这是写 1 触发型命令寄存器,不需要保持。
static const std::uint32_t kRegCommand = 0x18u;

// VERSION: 用来粗略确认 AXI-Lite 地址和 bitstream 是对的。
static const std::uint32_t kRegVersion = 0x1cu;

// ACTIVE_EDGE_LOW/HIGH:
//   当前 PL 正在使用的下次脉冲时刻,可用于确认 PS 写入是否生效。
static const std::uint32_t kRegActiveEdgeLow = 0x20u;
static const std::uint32_t kRegActiveEdgeHigh = 0x24u;

// CONTROL 位定义。
static const std::uint32_t kControlCounterEnable = 0x00000001u;
static const std::uint32_t kControlPulseEnable = 0x00000002u;
static const std::uint32_t kControlPsDebugOut = 0x00000004u;

// STATUS 位定义。
static const std::uint32_t kStatusPulseOut = 0x00000001u;
static const std::uint32_t kStatusCounterEnable = 0x00000002u;
static const std::uint32_t kStatusPulseEnable = 0x00000004u;
static const std::uint32_t kStatusTimeLatchValid = 0x00000008u;
static const std::uint32_t kStatusNextEdgeValid = 0x00000010u;
static const std::uint32_t kStatusPsDebugOut = 0x00000020u;

// COMMAND 位定义。
static const std::uint32_t kCmdLatchTime = 0x00000001u;
static const std::uint32_t kCmdApplyNextEdge = 0x00000002u;

// Ctrl+C / kill 退出标志。信号处理函数里只改这个简单变量,避免做复杂操作。
static volatile std::sig_atomic_t g_stop_requested = 0;

// 64 位时间差样本环形数组:
//   timestamp_diff_ns = local_rx_ns - master_tx_ns
// 其中 local_rx_ns 来自从机 PL 64 位 time_ns,master_tx_ns 来自主站 UDP 包。
static std::int64_t g_timestamp_diff_history[kTimestampDiffHistorySize];
static std::int64_t g_timestamp_diff_sort_buffer[kTimestampDiffHistorySize];
static std::size_t g_timestamp_diff_write_index = 0u;
static std::size_t g_timestamp_diff_count = 0u;
static bool g_timestamp_diff_base_valid = false;
static std::int64_t g_timestamp_diff_base_ns = 0;

// UDP 同步包的内存结构说明。
// 注意:实际解包没有直接 reinterpret_cast,因为不同 CPU/编译器可能有对齐和大小端问题。
// 下面 DecodeSyncFrame() 会按小端字节序手动解析线上 32 字节数据。
struct SyncFrame {
    // 固定魔数,用于判断是不是我们的同步包。
    std::uint16_t magic;

    // 协议版本,防止主从程序版本不一致。
    std::uint16_t version;

    // 包序号,从机只接受递增序号,旧包/乱序包会被丢弃。
    std::uint32_t seq;

    // 主机发送该同步包时的主机时间,单位 ns。
    std::uint64_t master_tx_ns;

    // 主机希望所有从机共同输出的"下一次脉冲时刻",单位 ns,属于主机时间域。
    std::uint64_t next_edge_ns;

    // 主机同步包发送周期,单位 ns。当前从机只是解析保留,方便以后调试/扩展。
    std::uint32_t sync_period_ns;

    // pulse_out 周期,单位 ns。从机会用它把太近/已过期的边沿向后推。
    std::uint32_t pulse_period_ns;
};

// 处理退出信号:只设置停止标志,主循环看到后再安全退出。
static void HandleStopSignal(int)
{
    // 这里只置位,主循环看到后安全退出并关闭 pulse_out。
    g_stop_requested = 1;
}

// 解析命令行里的 PL AXI-Lite 基地址,支持十进制或 0x 前缀十六进制。
static bool ParseBaseAddress(const char *text, std::uintptr_t *base_addr_out)
{
    char *end = NULL;
    unsigned long long value = std::strtoull(text, &end, 0);

    if (end == text || *end != '\0') {
        return false;
    }

    *base_addr_out = static_cast<std::uintptr_t>(value);
    return true;
}

// 解析 1~65535 范围内的 UDP 端口号。
static bool ParseU16(const char *text, std::uint16_t *value_out)
{
    char *end = NULL;
    unsigned long value = std::strtoul(text, &end, 0);

    if (end == text || *end != '\0' || value == 0u || value > 65535u) {
        return false;
    }

    *value_out = static_cast<std::uint16_t>(value);
    return true;
}

// 解析 64 位有符号整数参数,例如固定链路延时 ns。
static bool ParseI64(const char *text, std::int64_t *value_out)
{
    char *end = NULL;
    long long value = std::strtoll(text, &end, 0);

    if (end == text || *end != '\0') {
        return false;
    }

    *value_out = static_cast<std::int64_t>(value);
    return true;
}

// RAII 文件描述符包装类,离开作用域时自动 close。
class Fd {
public:
    explicit Fd(int fd = -1) : fd_(fd) {}

    ~Fd()
    {
        if (fd_ >= 0) {
            close(fd_);
        }
    }

    int get() const { return fd_; }

private:
    // 禁止拷贝,防止两个 Fd 对象析构时重复 close 同一个 fd。
    Fd(const Fd &);
    Fd &operator=(const Fd &);

    int fd_;
};

// /dev/mem 映射类:
//   - Open() 把 PL AXI-Lite 物理地址映射到 Linux 用户态虚拟地址。
//   - Read32()/Write32() 按 32 位访问寄存器。
//   - 析构时自动 munmap/close。
class DevMemMap {
public:
    DevMemMap() :
        fd_(-1),
        mapped_base_(MAP_FAILED),
        mapped_length_(0),
        regs_(NULL)
    {
    }

    ~DevMemMap()
    {
        Close();
    }

    bool Open(std::uintptr_t base_addr, std::size_t span_bytes)
    {
        // mmap 必须按页对齐。AXI 基地址不一定刚好等于页起始地址,所以要计算页内偏移。
        const long page_size_long = sysconf(_SC_PAGESIZE);
        if (page_size_long <= 0) {
            std::fprintf(stderr, "sysconf(_SC_PAGESIZE) failed: %s\n", std::strerror(errno));
            return false;
        }

        const std::uintptr_t page_size = static_cast<std::uintptr_t>(page_size_long);
        const std::uintptr_t page_mask = ~(page_size - 1u);
        const std::uintptr_t page_base = base_addr & page_mask;
        const std::uintptr_t page_offset = base_addr - page_base;
        mapped_length_ = RoundUp(page_offset + span_bytes, page_size);

        fd_ = open("/dev/mem", O_RDWR | O_SYNC);
        if (fd_ < 0) {
            std::fprintf(stderr, "open /dev/mem failed: %s\n", std::strerror(errno));
            return false;
        }

        mapped_base_ = mmap(NULL,
                            mapped_length_,
                            PROT_READ | PROT_WRITE,
                            MAP_SHARED,
                            fd_,
                            static_cast<off_t>(page_base));
        if (mapped_base_ == MAP_FAILED) {
            std::fprintf(stderr, "mmap 0x%llx failed: %s\n",
                         static_cast<unsigned long long>(page_base),
                         std::strerror(errno));
            close(fd_);
            fd_ = -1;
            return false;
        }

        regs_ = reinterpret_cast<volatile std::uint32_t *>(
            static_cast<char *>(mapped_base_) + page_offset);
        return true;
    }

    void Close()
    {
        if (mapped_base_ != MAP_FAILED) {
            munmap(mapped_base_, mapped_length_);
            mapped_base_ = MAP_FAILED;
            mapped_length_ = 0;
        }

        if (fd_ >= 0) {
            close(fd_);
            fd_ = -1;
        }

        regs_ = NULL;
    }

    void Write32(std::uint32_t offset, std::uint32_t value)
    {
        // volatile 保证编译器不会把寄存器访问优化掉。
        regs_[offset / sizeof(std::uint32_t)] = value;
    }

    std::uint32_t Read32(std::uint32_t offset) const
    {
        return regs_[offset / sizeof(std::uint32_t)];
    }

private:
    static std::size_t RoundUp(std::size_t value, std::size_t alignment)
    {
        // 向上取整到页大小,确保 mmap 覆盖完整寄存器窗口。
        return (value + alignment - 1u) & ~(alignment - 1u);
    }

    // 禁止拷贝,避免多个对象管理同一块 mmap。
    DevMemMap(const DevMemMap &);
    DevMemMap &operator=(const DevMemMap &);

    int fd_;
    void *mapped_base_;
    std::size_t mapped_length_;
    volatile std::uint32_t *regs_;
};

// 从小端字节序读取 16 位整数。
static std::uint16_t GetLe16(const std::uint8_t *src)
{
    // 同步包按小端格式发送:低字节在前。
    return static_cast<std::uint16_t>(src[0] | (static_cast<std::uint16_t>(src[1]) << 8));
}

// 从小端字节序读取 32 位整数。
static std::uint32_t GetLe32(const std::uint8_t *src)
{
    return static_cast<std::uint32_t>(src[0]) |
           (static_cast<std::uint32_t>(src[1]) << 8) |
           (static_cast<std::uint32_t>(src[2]) << 16) |
           (static_cast<std::uint32_t>(src[3]) << 24);
}

// 从小端字节序读取 64 位整数。
static std::uint64_t GetLe64(const std::uint8_t *src)
{
    return static_cast<std::uint64_t>(GetLe32(src)) |
           (static_cast<std::uint64_t>(GetLe32(src + 4)) << 32);
}

// 将线上 32 字节同步包解码成 SyncFrame 字段。
static SyncFrame DecodeSyncFrame(const std::uint8_t *packet)
{
    // 按固定偏移解包,避免结构体 padding 和 CPU 大小端带来的坑。
    SyncFrame frame;
    frame.magic = GetLe16(packet + 0);
    frame.version = GetLe16(packet + 2);
    frame.seq = GetLe32(packet + 4);
    frame.master_tx_ns = GetLe64(packet + 8);
    frame.next_edge_ns = GetLe64(packet + 16);
    frame.sync_period_ns = GetLe32(packet + 24);
    frame.pulse_period_ns = GetLe32(packet + 28);
    return frame;
}

// 检查同步包魔数和协议版本是否匹配。
static bool IsValidSyncFrame(const SyncFrame &frame)
{
    // 只检查魔数和版本。包长在调用前已经检查为 32 字节。
    return frame.magic == kSyncMagic && frame.version == kSyncVersion;
}

// 把高/低 32 位寄存器值拼成 64 位时间。
static std::uint64_t MakeU64(std::uint32_t high, std::uint32_t low)
{
    return (static_cast<std::uint64_t>(high) << 32) | static_cast<std::uint64_t>(low);
}

// 触发 PL 锁存并读取当前 64 位 time_ns。
static std::uint64_t ReadTimeNs(DevMemMap *registers)
{
    // 触发 PL 锁存当前 64 位 time_ns。
    // 这样 PS 读 low/high 时不会遇到低 32 位翻转导致的"撕裂"问题。
    registers->Write32(kRegCommand, kCmdLatchTime);

    // 读取刚刚锁存的同一时刻的 low/high。
    const std::uint32_t low = registers->Read32(kRegTimeLatchLow);
    const std::uint32_t high = registers->Read32(kRegTimeLatchHigh);
    return MakeU64(high, low);
}

// 读取 PL 当前真正生效的下一次脉冲边沿时间。
static std::uint64_t ReadActiveEdgeNs(const DevMemMap &registers)
{
    // 读取 PL 当前真正生效的下次脉冲时刻,用于日志确认。
    const std::uint32_t low = registers.Read32(kRegActiveEdgeLow);
    const std::uint32_t high = registers.Read32(kRegActiveEdgeHigh);
    return MakeU64(high, low);
}

// 写入新的下一次脉冲边沿,并提交给 PL 生效。
static void WriteNextEdgeNs(DevMemMap *registers, std::uint64_t next_edge_ns)
{
    // 先写 64 位配置值,再发 apply 命令。PL 看到 apply 后才切换 active next_edge_ns。
    registers->Write32(kRegNextEdgeCfgLow, static_cast<std::uint32_t>(next_edge_ns));
    registers->Write32(kRegNextEdgeCfgHigh, static_cast<std::uint32_t>(next_edge_ns >> 32));
    registers->Write32(kRegCommand, kCmdApplyNextEdge);
}

// 按高低 32 位十六进制和毫秒值打印 64 位时间。
static void PrintU64(const char *name, std::uint64_t value)
{
    std::printf("%s = 0x%08x_%08x (%llu ms)\n",
                name,
                static_cast<unsigned int>(value >> 32),
                static_cast<unsigned int>(value),
                static_cast<unsigned long long>(value / 1000000ull));
}

// 打印 PL 状态寄存器的原始值和各状态位。
static void DumpStatus(const DevMemMap &registers)
{
    const std::uint32_t status = registers.Read32(kRegStatus);

    std::printf("STATUS = 0x%08x", status);
    std::printf(" pulse_out=%d", (status & kStatusPulseOut) ? 1 : 0);
    std::printf(" counter_en=%d", (status & kStatusCounterEnable) ? 1 : 0);
    std::printf(" pulse_en=%d", (status & kStatusPulseEnable) ? 1 : 0);
    std::printf(" latch_valid=%d", (status & kStatusTimeLatchValid) ? 1 : 0);
    std::printf(" next_edge_valid=%d", (status & kStatusNextEdgeValid) ? 1 : 0);
    std::printf(" ps_debug_out=%d\n", (status & kStatusPsDebugOut) ? 1 : 0);
}

// 读取 VERSION 寄存器,确认 AXI-Lite 映射和 bitstream 可访问。
static bool CheckVersion(const DevMemMap &registers, std::uintptr_t base_addr)
{
    // 如果 VERSION 读出来是 0 或 0xffffffff,通常说明基地址不对、PL 没加载或 AXI 没通。
    const std::uint32_t version = registers.Read32(kRegVersion);
    std::printf("VERSION = 0x%08x\n", version);

    if (version == 0u || version == 0xffffffffu) {
        std::fprintf(stderr,
                     "ERROR: AXI read failed. Check base address 0x%08llx and bitstream.\n",
                     static_cast<unsigned long long>(base_addr));
        return false;
    }

    return true;
}

// 将时间值向上对齐到指定步长。
static std::uint64_t AlignUp(std::uint64_t value, std::uint64_t step)
{
    // 把时间向上对齐到 PL tick,避免写入 PL 无法表达的非整数 tick 时间。
    return ((value + step - 1u) / step) * step;
}

// 计算从机接收时间与主机发送时间的相对差值。
static std::int64_t CalculateSlaveMasterDiffNs(std::uint64_t local_rx_ns,
                                               std::uint64_t master_tx_ns)
{
    // 当前调试定义:
    //   slave_master_diff_ns = 从机 PL 接收时间戳 - 主机发送时间戳
    //
    // 注意:主机 CLOCK_MONOTONIC 和从机 PL time_ns 的零点不同,所以这个值的绝对值
    // 没有物理意义;它的中位数/抖动更适合用来观察同步链路是否稳定。
    const std::int64_t raw_diff_ns =
        static_cast<std::int64_t>(local_rx_ns) -
        static_cast<std::int64_t>(master_tx_ns);

    if (!g_timestamp_diff_base_valid) {
        g_timestamp_diff_base_valid = true;
        g_timestamp_diff_base_ns = raw_diff_ns;
    }

    return raw_diff_ns - g_timestamp_diff_base_ns;
}

// 将纳秒转换成微秒浮点值,便于日志显示。
static double NsToUs(std::int64_t value_ns)
{
    return static_cast<double>(value_ns) / 1000.0;
}

// 把新的主从时间差样本写入环形历史缓冲。
static void PushSlaveMasterDiffNs(std::int64_t diff_ns)
{
    // 环形写入大数组,满了以后覆盖最旧样本。中位数不关心样本顺序。
    g_timestamp_diff_history[g_timestamp_diff_write_index] = diff_ns;
    g_timestamp_diff_write_index =
        (g_timestamp_diff_write_index + 1u) % kTimestampDiffHistorySize;

    if (g_timestamp_diff_count < kTimestampDiffHistorySize) {
        ++g_timestamp_diff_count;
    }
}

// 计算当前主从时间差样本的中位数。
static std::int64_t GetSlaveMasterDiffMedianNs()
{
    if (g_timestamp_diff_count == 0u) {
        return 0;
    }

    for (std::size_t i = 0u; i < g_timestamp_diff_count; ++i) {
        g_timestamp_diff_sort_buffer[i] = g_timestamp_diff_history[i];
    }

    std::sort(g_timestamp_diff_sort_buffer,
              g_timestamp_diff_sort_buffer + g_timestamp_diff_count);

    const std::size_t mid = g_timestamp_diff_count / 2u;
    if ((g_timestamp_diff_count & 1u) != 0u) {
        return g_timestamp_diff_sort_buffer[mid];
    }

    const std::int64_t lower = g_timestamp_diff_sort_buffer[mid - 1u];
    const std::int64_t upper = g_timestamp_diff_sort_buffer[mid];
    return lower + (upper - lower) / 2;
}

// 将主机时间域的 next_edge_ns 换算成从机 PL 时间域。
static std::uint64_t CalculateLocalNextEdgeNs(const SyncFrame &frame,
                                              std::uint64_t local_rx_ns,
                                              std::int64_t eth_fixed_delay_ns)
{
    // 主机包里给的 pulse 周期优先;如果为 0,则使用默认 1ms。
    const std::uint64_t pulse_period_ns =
        frame.pulse_period_ns != 0u ? frame.pulse_period_ns : kDefaultPulsePeriodNs;

    // offset_ns 表示"主机时间域 - 从机 PL 时间域"的估计偏移:
    //
    //   主机发包时刻:master_tx_ns
    //   估计到达从机:master_tx_ns + eth_fixed_delay_ns
    //   从机实际收包时刻:local_rx_ns,来自 PL time_ns
    //
    // 所以:
    //   offset_ns = 估计主机到达时刻 - 从机 PL 到达时刻
    //
    // 后面要把主机时间域的 next_edge_ns 转换到从机 PL 时间域:
    //   local_next_edge_ns = master_next_edge_ns - offset_ns
    const std::int64_t offset_ns =
        static_cast<std::int64_t>(frame.master_tx_ns) +
        eth_fixed_delay_ns -
        static_cast<std::int64_t>(local_rx_ns);

    std::int64_t local_next_edge_ns =
        static_cast<std::int64_t>(frame.next_edge_ns) - offset_ns;

    // 如果算出来的脉冲时刻已经过去,或者离当前太近,就按 pulse 周期向后推。
    // 这样给 PS 写寄存器和 PL 应用命令留出安全余量。
    const std::int64_t min_next_edge_ns =
        static_cast<std::int64_t>(local_rx_ns + kMinScheduleAheadNs);

    while (local_next_edge_ns < min_next_edge_ns) {
        local_next_edge_ns += static_cast<std::int64_t>(pulse_period_ns);
    }

    return AlignUp(static_cast<std::uint64_t>(local_next_edge_ns), kPlTickNs);
}

// 打开并绑定 UDP 同步监听 socket。
static int OpenSyncSocket(std::uint16_t listen_port)
{
    // UDP socket:从机不需要填写主机 IP,直接绑定 INADDR_ANY,谁发来就收谁。
    int socket_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
    if (socket_fd < 0) {
        std::fprintf(stderr, "socket failed: %s\n", std::strerror(errno));
        return -1;
    }

    int reuse = 1;
    if (setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse)) < 0) {
        std::fprintf(stderr, "setsockopt SO_REUSEADDR failed: %s\n", std::strerror(errno));
        close(socket_fd);
        return -1;
    }

    struct sockaddr_in listen_addr;
    std::memset(&listen_addr, 0, sizeof(listen_addr));
    listen_addr.sin_family = AF_INET;
    listen_addr.sin_addr.s_addr = htonl(INADDR_ANY);
    listen_addr.sin_port = htons(listen_port);

    if (bind(socket_fd,
             reinterpret_cast<const struct sockaddr *>(&listen_addr),
             sizeof(listen_addr)) < 0) {
        std::fprintf(stderr, "bind port %u failed: %s\n", listen_port, std::strerror(errno));
        close(socket_fd);
        return -1;
    }

    return socket_fd;
}

// 将 IPv4 地址转换成可打印字符串。
static const char *AddressToText(const struct sockaddr_in &address, char *buffer, std::size_t size)
{
    const char *text = inet_ntop(AF_INET, &address.sin_addr, buffer, size);
    return text != NULL ? text : "unknown";
}

// 打印命令行参数用法和默认值。
static void PrintUsage(const char *program_name)
{
    std::fprintf(stderr,
                 "Usage: %s [base_addr] [sync_port] [eth_fixed_delay_ns]\n",
                 program_name);
    std::fprintf(stderr,
                 "Default: base_addr=0x%08llx sync_port=%u eth_fixed_delay_ns=%lld\n",
                 static_cast<unsigned long long>(kDefaultNsSyncBaseAddr),
                 kDefaultSyncPort,
                 static_cast<long long>(kDefaultEthFixedDelayNs));
}

// 程序入口:初始化 PL、监听 UDP 同步包,并持续更新 pulse_out 对齐时刻。
int main(int argc, char **argv)
{
    // 运行参数:
    //   argv[1] = PL AXI-Lite 基地址,例如 0x43C00000
    //   argv[2] = UDP 监听端口,例如 5005
    //   argv[3] = 固定单向链路延时 d,单位 ns,例如 95000
    std::uintptr_t base_addr = kDefaultNsSyncBaseAddr;
    std::uint16_t sync_port = kDefaultSyncPort;
    std::int64_t eth_fixed_delay_ns = kDefaultEthFixedDelayNs;

    if (argc > 4) {
        PrintUsage(argv[0]);
        return 2;
    }

    if (argc >= 2 && !ParseBaseAddress(argv[1], &base_addr)) {
        std::fprintf(stderr, "Invalid base address: %s\n", argv[1]);
        return 2;
    }

    if (argc >= 3 && !ParseU16(argv[2], &sync_port)) {
        std::fprintf(stderr, "Invalid sync port: %s\n", argv[2]);
        return 2;
    }

    if (argc >= 4 && !ParseI64(argv[3], &eth_fixed_delay_ns)) {
        std::fprintf(stderr, "Invalid eth_fixed_delay_ns: %s\n", argv[3]);
        return 2;
    }

    std::signal(SIGINT, HandleStopSignal);
    std::signal(SIGTERM, HandleStopSignal);

    std::printf("\n=== ns_sync_pulse Linux UDP sync slave ===\n");
    std::printf("BASE = 0x%08llx\n", static_cast<unsigned long long>(base_addr));
    std::printf("sync_port = %u\n", sync_port);
    std::printf("eth_fixed_delay_ns = %lld\n", static_cast<long long>(eth_fixed_delay_ns));

    DevMemMap registers;
    if (!registers.Open(base_addr, kRegisterSpanBytes)) {
        return 1;
    }

    // 先确认 AXI-Lite 能读通,避免后面一直等 UDP 但其实 PL 地址是错的。
    if (!CheckVersion(registers, base_addr)) {
        return 1;
    }

    // 打开 UDP 监听 socket。从机端不需要指定主机 IP。
    Fd sync_socket(OpenSyncSocket(sync_port));
    if (sync_socket.get() < 0) {
        return 1;
    }

    std::uint32_t debug_state = 0u;

    // 先开启 PL 计数器,让 64 位 time_ns 开始运行。
    registers.Write32(kRegControl, kControlCounterEnable);
    usleep(1000);

    // 即使没有主站,也要让从站本地输出 1kHz pulse_out。
    // 做法:
    //   1. 读取当前 PL time_ns。
    //   2. 把第一次脉冲安排到当前时间之后至少 1ms,避免刚写完就过期。
    //   3. 写入 NEXT_EDGE_CFG_LOW/HIGH 并提交给 PL。
    //   4. 打开 pulse_enable,之后 PL 会自动每 1ms 递增 next_edge_ns,形成 1kHz。
    //
    // 后续如果收到主站 UDP 同步包,PS 会重新计算 local_next_edge_ns 并写入 PL,
    // 从而把这个本地 1kHz 脉冲拉到主站同步节奏上。
    const std::uint64_t startup_time_ns = ReadTimeNs(&registers);
    const std::uint64_t startup_first_edge_ns =
        AlignUp(startup_time_ns + kMinScheduleAheadNs, kPlTickNs);
    WriteNextEdgeNs(&registers, startup_first_edge_ns);
    registers.Write32(kRegControl,
                      kControlCounterEnable |
                      kControlPulseEnable |
                      debug_state);

    PrintU64("time_ns", startup_time_ns);
    PrintU64("free_run_first_edge_ns", startup_first_edge_ns);
    DumpStatus(registers);

    std::printf("Local 1kHz pulse_out is running before master sync.\n");
    std::printf("Waiting for UDP SyncFrame on 0.0.0.0:%u.\n", sync_port);
    std::printf("Press Ctrl+C to stop.\n");

    bool seq_inited = false;
    std::uint32_t last_seq = 0;
    std::uint64_t accepted_count = 0;
    std::uint64_t last_log_ns = 0;

    // 调试 IO 的下一次翻转时刻,使用 PL time_ns,而不是 Linux 时间。
    std::uint64_t next_debug_toggle_ns = ReadTimeNs(&registers) + kDebugTogglePeriodNs;

    while (!g_stop_requested) {
        // 用 poll 等 UDP 包,同时每 10ms 醒一次处理 ps_debug_out 翻转。
        struct pollfd poll_fd;
        poll_fd.fd = sync_socket.get();
        poll_fd.events = POLLIN;
        poll_fd.revents = 0;

        const int poll_result = poll(&poll_fd, 1, kIdlePollTimeoutMs);
        if (poll_result < 0) {
            if (errno == EINTR) {
                continue;
            }
            std::fprintf(stderr, "poll failed: %s\n", std::strerror(errno));
            break;
        }

        if (poll_result > 0 && (poll_fd.revents & POLLIN)) {
            std::uint8_t packet[kSyncFrameWireSize];
            struct sockaddr_in master_addr;
            socklen_t master_len = sizeof(master_addr);

            // 收 UDP 包。注意:recvfrom 返回后马上读 PL time_ns,减少软件延迟不确定性。
            const ssize_t received = recvfrom(sync_socket.get(),
                                              packet,
                                              sizeof(packet),
                                              0,
                                              reinterpret_cast<struct sockaddr *>(&master_addr),
                                              &master_len);

            // 这是同步计算里最重要的从机时间戳:来自 PL 64 位 time_ns。
            const std::uint64_t local_rx_ns = ReadTimeNs(&registers);

            if (received == static_cast<ssize_t>(sizeof(packet))) {
                const SyncFrame frame = DecodeSyncFrame(packet);

                // 丢弃非法包、旧包、乱序包。UDP 本身不保证顺序。
                if (IsValidSyncFrame(frame) &&
                    (!seq_inited || frame.seq > last_seq)) {
                    // 把主机时间域的 next_edge_ns 换算成从机 PL 时间域的 local_next_edge_ns。
                    const std::uint64_t local_next_edge_ns =
                        CalculateLocalNextEdgeNs(frame, local_rx_ns, eth_fixed_delay_ns);

                    // 写入 PL。PL 之后会在 local_next_edge_ns 时刻输出 pulse_out。
                    WriteNextEdgeNs(&registers, local_next_edge_ns);

                    // 使能 counter 和 pulse,保留当前 ps_debug_out 状态。
                    registers.Write32(kRegControl,
                                      kControlCounterEnable |
                                      kControlPulseEnable |
                                      debug_state);

                    seq_inited = true;
                    last_seq = frame.seq;
                    ++accepted_count;

                    const std::int64_t slave_master_diff_ns =
                        CalculateSlaveMasterDiffNs(local_rx_ns, frame.master_tx_ns);
                    PushSlaveMasterDiffNs(slave_master_diff_ns);

                    // 日志不用每包打印,否则 30ms 一包会刷太多;这里按 PL 时间约 1 秒打印一次。
                    if (accepted_count == 1u || local_rx_ns - last_log_ns >= kLogPeriodNs) {
                        char ip_text[INET_ADDRSTRLEN];

                        // 这里重新算 offset_ns 只为了打印,核心计算在 CalculateLocalNextEdgeNs()。
                        const std::int64_t offset_ns =
                            static_cast<std::int64_t>(frame.master_tx_ns) +
                            eth_fixed_delay_ns -
                            static_cast<std::int64_t>(local_rx_ns);

                        const std::int64_t slave_master_diff_median_ns =
                            GetSlaveMasterDiffMedianNs();

                        std::printf("sync=%llu from=%s seq=%u local_rx_ns=%llu master_tx_ns=%llu slave_master_diff_us=%.3f slave_master_diff_median_us=%.3f diff_samples=%u offset_ns=%lld local_next_edge_ns=%llu active_edge_ns=%llu\n",
                                    static_cast<unsigned long long>(accepted_count),
                                    AddressToText(master_addr, ip_text, sizeof(ip_text)),
                                    frame.seq,
                                    static_cast<unsigned long long>(local_rx_ns),
                                    static_cast<unsigned long long>(frame.master_tx_ns),
                                    NsToUs(slave_master_diff_ns),
                                    NsToUs(slave_master_diff_median_ns),
                                    static_cast<unsigned int>(g_timestamp_diff_count),
                                    static_cast<long long>(offset_ns),
                                    static_cast<unsigned long long>(local_next_edge_ns),
                                    static_cast<unsigned long long>(ReadActiveEdgeNs(registers)));
                        last_log_ns = local_rx_ns;
                    }
                }
            }
        }

        // 用 PL 时间驱动 ps_debug_out,每 500ms 翻转一次,方便示波器/ILA 看 PS 还活着。
        const std::uint64_t now_pl_ns = ReadTimeNs(&registers);
        while (now_pl_ns >= next_debug_toggle_ns) {
            debug_state ^= kControlPsDebugOut;
            registers.Write32(kRegControl,
                              kControlCounterEnable |
                              kControlPulseEnable |
                              debug_state);
            next_debug_toggle_ns += kDebugTogglePeriodNs;
        }
    }

    // 退出时关闭 pulse_out 和 ps_debug_out,但保留 counter_enable,方便继续读 PL 时间。
    registers.Write32(kRegControl, kControlCounterEnable);
    std::printf("Stopped. pulse_out disabled, ps_debug_out cleared, counter left enabled.\n");
    return 0;
}

MakeFile

bash 复制代码
# Petalinux cross compile config.
# Change PETA_PATH if Petalinux is installed somewhere else.
PETA_PATH ?= /opt/petalinux/2020.2
PETA_SYSROOT_X86 := $(PETA_PATH)/sysroots/x86_64-petalinux-linux
PETA_SYSROOT_ARM := $(PETA_PATH)/sysroots/cortexa9t2hf-neon-xilinx-linux-gnueabi

CROSS_COMPILE ?= $(PETA_SYSROOT_X86)/usr/bin/arm-xilinx-linux-gnueabi/arm-xilinx-linux-gnueabi-
override CC := $(CROSS_COMPILE)gcc
override CXX := $(CROSS_COMPILE)g++

COMMON_FLAGS := -Wall -g -Isrc/include \
                -mthumb -mfpu=neon -mfloat-abi=hard -mcpu=cortex-a9 \
                --sysroot=$(PETA_SYSROOT_ARM)

# Petalinux/Yocto environments often export CFLAGS/CXXFLAGS in advance.
# Force-append sysroot and CPU flags, otherwise target headers such as arpa/inet.h may be missing.
override CFLAGS += $(COMMON_FLAGS)
override CXXFLAGS += $(COMMON_FLAGS) -std=c++11
override LDFLAGS += --sysroot=$(PETA_SYSROOT_ARM) -lm -lpthread -ldl -lrt

TARGET ?= ns_sync_test
BUILD_DIR ?= build
OBJ_DIR ?= $(BUILD_DIR)/obj

SRCS := $(wildcard src/*.c src/*/*.c)
CXX_SRCS := $(wildcard src/*.cpp src/*/*.cpp)

OBJS := $(patsubst %.c,$(OBJ_DIR)/%.o,$(SRCS))
CXX_OBJS := $(patsubst %.cpp,$(OBJ_DIR)/%.o,$(CXX_SRCS))

.PHONY: all clean install run

all: $(BUILD_DIR)/$(TARGET)

$(BUILD_DIR) $(OBJ_DIR):
	mkdir -p $@

$(BUILD_DIR)/$(TARGET): $(OBJS) $(CXX_OBJS) | $(BUILD_DIR)
	$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)

$(OBJ_DIR)/%.o: %.c | $(OBJ_DIR)
	mkdir -p $(dir $@)
	$(CC) $(CFLAGS) -c $< -o $@

$(OBJ_DIR)/%.o: %.cpp | $(OBJ_DIR)
	mkdir -p $(dir $@)
	$(CXX) $(CXXFLAGS) -c $< -o $@



run: $(BUILD_DIR)/$(TARGET)
	sudo ./$(BUILD_DIR)/$(TARGET)

clean:
	rm -rf $(BUILD_DIR)

主机打印

bash 复制代码
root@ant:~# ./udp_sync_master_petalinux
UDP sync master sending to 255.255.255.255:5005 every 30 ms
Pulse period: 1000000 ns (1000 Hz if period divides 1s)
Frame: magic=0x4a42 version=1 size=32 bytes
Master clock: PC Linux CLOCK_MONOTONIC
Press Ctrl+C to stop.
sent=1 seq=0 master_tx_ns=1880106897508 next_edge_ns=1880107000000 pulse_period_ns=1000000
sent=35 seq=34 master_tx_ns=1881126886690 next_edge_ns=1881127000000 pulse_period_ns=1000000
sent=69 seq=68 master_tx_ns=1882146886753 next_edge_ns=1882147000000 pulse_period_ns=1000000
sent=103 seq=102 master_tx_ns=1883166886743 next_edge_ns=1883167000000 pulse_period_ns=1000000
sent=137 seq=136 master_tx_ns=1884186886273 next_edge_ns=1884187000000 pulse_period_ns=1000000
sent=171 seq=170 master_tx_ns=1885206887006 next_edge_ns=1885207000000 pulse_period_ns=1000000
sent=205 seq=204 master_tx_ns=1886226886568 next_edge_ns=1886227000000 pulse_period_ns=1000000
sent=239 seq=238 master_tx_ns=1887246897120 next_edge_ns=1887247000000 pulse_period_ns=1000000

从机打印

slave_master_diff_median_us 是过去100个包的主从时差中位数

因为

bash 复制代码
root@ant:~# ./ns_sync_test

=== ns_sync_pulse Linux UDP sync slave ===
BASE = 0x43c00000
sync_port = 5005
eth_fixed_delay_ns = 95000
VERSION = 0x00020000
time_ns = 0x00000000_00108ed4 (1 ms)
free_run_first_edge_ns = 0x00000000_001fd114 (2 ms)
STATUS = 0x0000001e pulse_out=0 counter_en=1 pulse_en=1 latch_valid=1 next_edge_valid=1 ps_debug_out=0
Local 1kHz pulse_out is running before master sync.
Waiting for UDP SyncFrame on 0.0.0.0:5005.
Press Ctrl+C to stop.
sync=1 from=192.168.3.211 seq=0 local_rx_ns=9174208210 master_tx_ns=1880106897508 slave_master_diff_us=0.000 slave_master_diff_median_us=0.000 diff_samples=1 odge_ns=9175215710 active_edge_ns=9175215710
sync=35 from=192.168.3.211 seq=34 local_rx_ns=10194070710 master_tx_ns=1881126886690 slave_master_diff_us=-126.682 slave_master_diff_median_us=-145.235 diff_sacal_next_edge_ns=10195089020 active_edge_ns=10195089020
sync=69 from=192.168.3.211 seq=68 local_rx_ns=11214074510 master_tx_ns=1882146886753 slave_master_diff_us=-122.945 slave_master_diff_median_us=-140.124 diff_sacal_next_edge_ns=11215092760 active_edge_ns=11215092760
sync=103 from=192.168.3.211 seq=102 local_rx_ns=12234083420 master_tx_ns=1883166886743 slave_master_diff_us=-114.025 slave_master_diff_median_us=-134.330 diff_ local_next_edge_ns=12235101680 active_edge_ns=12235101680
sync=137 from=192.168.3.211 seq=136 local_rx_ns=13254092390 master_tx_ns=1884186886273 slave_master_diff_us=-104.585 slave_master_diff_median_us=-127.966 diff_ local_next_edge_ns=13255111120 active_edge_ns=13255111120
sync=171 from=192.168.3.211 seq=170 local_rx_ns=14274105430 master_tx_ns=1885206887006 slave_master_diff_us=-92.278 slave_master_diff_median_us=-117.062 diff_slocal_next_edge_ns=14275123430 active_edge_ns=14275123430
sync=205 from=192.168.3.211 seq=204 local_rx_ns=15294107260 master_tx_ns=1886226886568 slave_master_diff_us=-90.010 slave_master_diff_median_us=-108.252 diff_slocal_next_edge_ns=15295125700 active_edge_ns=15295125700
sync=239 from=192.168.3.211 seq=238 local_rx_ns=16314165500 master_tx_ns=1887246897120 slave_master_diff_us=-42.322 slave_master_diff_median_us=-99.796 diff_saocal_next_edge_ns=16315173380 active_edge_ns=16315173380
^CStopped. pulse_out disabled, ps_debug_out cleared, counter left enabled.
相关推荐
weixin_467182281 小时前
Arduino进阶二|自定义类库保姆级教程(从零手写属于自己的传感器类库+完整源码)
c语言·c++·单片机·嵌入式硬件·arduino·c++面向对象·diy库文件
清风6666661 小时前
基于单片机的64位多模式流水灯控制系统设计
单片机·毕业设计·课程设计·期末大作业
进击的横打2 小时前
【车载开发系列】热敏电阻与上下拉电阻
单片机·嵌入式硬件
崇山峻岭之间2 小时前
单片机USB虚拟串口实验
单片机·嵌入式硬件
崇山峻岭之间2 小时前
单片机USB U盘实验
单片机·嵌入式硬件
点灯小铭3 小时前
基于单片机的锅炉压力与温度监测报警系统设计
数据库·单片机·嵌入式硬件·毕业设计·课程设计·期末大作业
环境倒逼我学习3 小时前
无人机地面站之第13章 Mission Planner 入门与界面总览
单片机·嵌入式硬件·无人机
大阳1233 小时前
ARM.8(ADC,SPI)
单片机·嵌入式硬件·adc·spi
杨连江4 小时前
一种三模式可调气隙式双侧定子滑移可变磁通轴向永磁电机
单片机·嵌入式硬件