1、概述
根据实际项目 需求,设计和开发一个基于 FPGA 的接口,来自MIPI csi 摄像头VB1943的数据,封装成标准的 UDP 数据包,通过 10G/25G 以太网 SPF+ 链路发送到主机计算平台。计算主机平台通过硬件加速和 GPU RDMA(远程直接内存访问) ,数据能绕过 CPU,直接进入 GPU 内存,将延迟降至最低,实现端到端超低延迟;
2、hsb camera 逻辑视图
把系统功能分解,模块划分,梳理出数据流和控制流如下:

FPGA中SFP接口的流框图:

核心模块:
sensor :通过 I2C 配置sensor出流,MIPI CSI-2 摄像头数据。
mipi rx : 接收 1~2 路 MIPI CSI-2 摄像头数据(通过硬核 D-PHY)。
udp打包模块:将视频流数据按照自定义协议(如 UDP/RoCE)打包成 AXI-Stream 流,并支持 PTP(IEEE 1588)时钟同步,为每帧图像打上精确时间戳。
10GbE MAC+PHY:将以太网帧发送到 SFP+ 光模块。
3、10GbE 光口网络控制器
eth_10gb_top 是一个实现图像数据的精确时间对齐,并通过 10GbE 网络发送,主要完成以下任务
-
用户通过 AXI-Stream 收发以太网帧
-
MAC 完成帧封装与解封装
-
PCS 完成物理层编码与同步
-
SERDES 完成高速差分信号驱动
-
最终通过 SFP+ 光口实现 10Gbps 全双工通信
// SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//应用层 → UDP/IP → 帧发送 → AXI-Stream → 10G MAC → XGMII → 10G PCS → SERDES → SFP 光口
module eth_10gb_top
#(
parameter ID = 0
)(
// clock and reset
input i_refclk_p, // 161.18MHz external ref clock 161.18MHz → 10GBASE-R 标准参考时钟
input i_refclk_n, // 161.18MHz external ref clock
// SERDES IO 4 个差分信号
input i_pad_rx_n, //SFP 光模块发来的高速差分接收数据(RX)
input i_pad_rx_p,
output o_pad_tx_n, //FPGA 发给 SFP 光模块的高速差分发送数据(TX)
output o_pad_tx_p,
// PCS Serdes clock
input i_pcs_clk, // 100MHz - 300MHz calib clock 100-300MHz → SERDES 校准时钟
input i_pcs_rst_n,
input i_sys_rst_n,
// PCS output user clock
output o_usr_clk, // pcs clock 322.2656 MHz 322.26MHz → PCS 输出给用户的时钟
output o_usr_clk_rdy,
// MAC APB Interface, abp clk domain
input i_aclk,
input i_arst_n,
input i_mac_apb_psel,
input i_mac_apb_penable,
input [31:0] i_mac_apb_paddr,
input [31:0] i_mac_apb_pwdata,
input i_mac_apb_pwrite,
output o_mac_apb_pready,
output [31:0] o_mac_apb_prdata,
output o_mac_apb_pserr,
// PCS APB Interface, abp clk domain
input i_pcs_apb_psel,
input i_pcs_apb_penable,
input [31:0] i_pcs_apb_paddr,
input [31:0] i_pcs_apb_pwdata,
input i_pcs_apb_pwrite,
output o_pcs_apb_pready,
output [31:0] o_pcs_apb_prdata,
output o_pcs_apb_pserr,
// Ethernet XGMII MAC, pclk domain
// XGMII processing clock
input i_pclk, // 156.25MHz processing clock 156.25MHz → XGMII 处理时钟
input i_prst_n, // active low reset
// AXIS Tx
input i_axis_tx_tvalid,
input i_axis_tx_tlast,
input [ 7:0] i_axis_tx_tkeep,
input [63:0] i_axis_tx_tdata,
input i_axis_tx_tuser,
output o_axis_tx_tready,
// AXIS Rx
output o_axis_rx_tvalid,
output o_axis_rx_tlast,
output [ 7:0] o_axis_rx_tkeep,
output [63:0] o_axis_rx_tdata,
output o_axis_rx_tuser,
input i_axis_rx_tready,
// Debug Status
output o_mac_interrupt,
output o_mac_tx_staten,
output [25:0] o_mac_tx_statvec,
output [25:0] o_mac_rx_statvec,
output o_mac_rx_staten,
output o_mac_crc_err,
output o_pcs_rxval,
output o_pcs_txrdy
);//------------------------------------------------------------------------------
// MAC Layer
//------------------------------------------------------------------------------// 10GbE MAC signals logic xgmii_rxval; //接收有效 logic [63:0] xgmii_rxd; //MAC ? PCS 并行接口 64bit 数据 + 8bit 控制) logic [ 7:0] xgmii_rxc; logic [63:0] xgmii_txd; //发送数据 logic [ 7:0] xgmii_txc; //发送控制 logic xgmii_txrdy; //发送就绪 logic xgmii_rx_hi_ber; //高误码指示 logic xgmii_rx_blk_lock; //块同步锁定 logic mac_rst_n; logic sys_ready; logic w_axis_tx_tvalid; logic w_axis_tx_tready; assign mac_rst_n = i_pcs_rst_n & i_sys_rst_n; // 10GbE MAC 组帧、解帧、CRC、流控 eth_10gb_mac u_10gbe_mac ( .reset_n_i ( mac_rst_n ), // XGMII reset needs to be free from pclk or doesn't work .rxmac_clk_i ( i_pclk ), .txmac_clk_i ( i_pclk ), // PCS interface .xgmii_rxd_i ( xgmii_rxd ), .xgmii_rxc_i ( xgmii_rxc ), .xgmii_txd_o ( xgmii_txd ), .xgmii_txc_o ( xgmii_txc ), // axis user interface .axis_tx_tvalid_i ( w_axis_tx_tvalid ), .axis_tx_tlast_i ( i_axis_tx_tlast ), .axis_tx_tkeep_i ( i_axis_tx_tkeep ), .axis_tx_tdata_i ( i_axis_tx_tdata ), .axis_tx_tuser_i ( i_axis_tx_tuser ), .axis_tx_tready_o ( w_axis_tx_tready ), .axis_rx_tvalid_o ( o_axis_rx_tvalid ), .axis_rx_tlast_o ( o_axis_rx_tlast ), .axis_rx_tkeep_o ( o_axis_rx_tkeep ), .axis_rx_tdata_o ( o_axis_rx_tdata ), .axis_rx_tuser_o ( o_axis_rx_tuser ), // status .tx_statvec_o ( o_mac_tx_statvec ), .tx_staten_o ( o_mac_tx_staten ), .rx_statvec_o ( o_mac_rx_statvec ), .rx_staten_o ( o_mac_rx_staten ), // apb register interface .apb_clk_i ( i_aclk ), .apb_psel_i ( i_mac_apb_psel ), .apb_paddr_i ( i_mac_apb_paddr ), .apb_pwdata_i ( i_mac_apb_pwdata ), .apb_pwrite_i ( i_mac_apb_pwrite ), .apb_penable_i ( i_mac_apb_penable ), .apb_pready_o ( o_mac_apb_pready ), .apb_prdata_o ( o_mac_apb_prdata ), .apb_pslverr_o ( o_mac_apb_pserr ), // ReLingo_waive_line:vendor_lscc:RL02 // interrupt .int_o ( o_mac_interrupt ) ); logic curr_mac_crc_err; logic curr_mac_crc_known; logic next_mac_crc_err; logic next_mac_crc_known; logic rx_staten_reg; // The below code keeps track of a packet's FCS status at the output of the MAC. The indication of the FCS // error via the staten and statvec signals can take place before a packet egresses or while a packet is // egressing. In back-to-back packet scenarios, the indication for the second packet can take place prior // to the first packet starting egress or while the first packet is egressing. So this code keeps track of // the current packet's FCS status as well as the next packet's status. In this way, back-to-back packets // can be handled. Otherwise, the FCS error condition could be applied to the wrong packet going into the // rx_parser. // 11/16/2023 - This code is not needed because the tuser signal indicates the FCS error (among others) at // tlast, which is what is required at the rx_parser module. Leaving the code in for future ref, if needed. always_ff @(posedge i_pclk) begin if (!i_prst_n) begin curr_mac_crc_err <= 1'b0; curr_mac_crc_known <= 1'b0; next_mac_crc_err <= 1'b0; next_mac_crc_known <= 1'b0; rx_staten_reg <= 1'b0; end else begin rx_staten_reg <= o_mac_rx_staten; if (o_mac_rx_staten && !rx_staten_reg) begin if (curr_mac_crc_known) begin next_mac_crc_known <= 1'b1; next_mac_crc_err <= o_mac_rx_statvec[17]; end else begin curr_mac_crc_known <= 1'b1; curr_mac_crc_err <= o_mac_rx_statvec[17]; end end else if (o_axis_rx_tvalid && o_axis_rx_tlast) begin if (next_mac_crc_known) begin curr_mac_crc_err <= next_mac_crc_err; next_mac_crc_err <= 1'b0; next_mac_crc_known <= 1'b0; end else begin curr_mac_crc_known <= 1'b0; end end end end // 11/16/2023 - o_mac_crc_err commented out because it is not needed (see above comments). //assign o_mac_crc_err = curr_mac_crc_known ? curr_mac_crc_err : 1'b0; assign o_mac_crc_err = 1'b0;//------------------------------------------------------------------------------
// PCS PHY Layer 8b/10b 编码、时钟恢复、信号均衡、对齐
//------------------------------------------------------------------------------logic [3:0] xg_rx_fifo_st; logic [3:0] xg_tx_fifo_st; generate if (ID == 0) begin : PCS_0// Ethernet PCS Tile 2 eth_10gb_pcs_0 u_10gbe_pcs ( // Reference clock select. Use external PAD refclk .pad_refclkn_i ( i_refclk_n ), .pad_refclkp_i ( i_refclk_p ), .refclkp0_ext_i ( 1'b0 ), .refclkn0_ext_i ( 1'b1 ), .refclkp1_ext_i ( 1'b0 ), .refclkn1_ext_i ( 1'b1 ), .pll_0_refclk_i ( 1'b0 ), .pll_1_refclk_i ( 1'b0 ), .sd_pll_refclk_i ( 1'b0 ), .use_refmux_i ( 1'b0 ), .diffioclksel_i ( 1'b0 ), .clksel_i ( 2'b0 ), // // PAD SERDES .pad_rxn_i ( i_pad_rx_n ), .pad_rxp_i ( i_pad_rx_p ), .pad_txn_o ( o_pad_tx_n ), .pad_txp_o ( o_pad_tx_p ), // // XGMII Interface .xg_tx_clk_i ( i_pclk ), .xg_tx_rst_n_i ( i_pcs_rst_n ), .xg_rx_clk_i ( i_pclk ), .xg_rx_rst_n_i ( i_pcs_rst_n ), .xg_pcs_clkin_i ( i_pcs_clk ), .xg_tx_clk_o ( o_usr_clk ), .xg_rx_clk_o ( ), .xg_txc_i ( xgmii_txc ), .xg_txd_i ( xgmii_txd ), .xg_rxc_o ( xgmii_rxc ), .xg_rxd_o ( xgmii_rxd ), .xg_rxval_o ( xgmii_rxval ), .xg_txval_i ( 1'b1 ), .xg_txrdy_o ( xgmii_txrdy ), .xg_rx_hi_ber_o ( xgmii_rx_hi_ber ), .xg_rx_blk_lock_o ( xgmii_rx_blk_lock ), // apb register interface .apb_pclk_i ( i_aclk ), .apb_preset_n_i ( i_arst_n ), .apb_psel_i ( i_pcs_apb_psel ), .apb_penable_i ( i_pcs_apb_penable ), .apb_paddr_i ( i_pcs_apb_paddr [15:0] ), .apb_pwdata_i ( i_pcs_apb_pwdata [15:0] ), .apb_pwrite_i ( i_pcs_apb_pwrite ), .apb_prdata_o ( o_pcs_apb_prdata [15:0] ), .apb_pready_o ( o_pcs_apb_pready ) ); end else begin : PCS_1// Ethernet PCS Tile 3 eth_10gb_pcs_1 u_10gbe_pcs ( // Reference clock select. Use external PAD refclk .pad_refclkn_i ( i_refclk_n ), .pad_refclkp_i ( i_refclk_p ), .refclkp0_ext_i ( 1'b0 ), .refclkn0_ext_i ( 1'b1 ), .refclkp1_ext_i ( 1'b0 ), .refclkn1_ext_i ( 1'b1 ), .pll_0_refclk_i ( 1'b0 ), .pll_1_refclk_i ( 1'b0 ), .sd_pll_refclk_i ( 1'b0 ), .use_refmux_i ( 1'b0 ), .diffioclksel_i ( 1'b0 ), .clksel_i ( 2'b0 ), // // PAD SERDES .pad_rxn_i ( i_pad_rx_n ), .pad_rxp_i ( i_pad_rx_p ), .pad_txn_o ( o_pad_tx_n ), .pad_txp_o ( o_pad_tx_p ), // // XGMII Interface .xg_tx_clk_i ( i_pclk ), .xg_tx_rst_n_i ( i_pcs_rst_n ), .xg_rx_clk_i ( i_pclk ), .xg_rx_rst_n_i ( i_pcs_rst_n ), .xg_pcs_clkin_i ( i_pcs_clk ), .xg_tx_clk_o ( o_usr_clk ), .xg_rx_clk_o ( ), .xg_txc_i ( xgmii_txc ), .xg_txd_i ( xgmii_txd ), .xg_rxc_o ( xgmii_rxc ), .xg_rxd_o ( xgmii_rxd ), .xg_rxval_o ( xgmii_rxval ), .xg_txval_i ( 1'b1 ), .xg_txrdy_o ( xgmii_txrdy ), .xg_rx_hi_ber_o ( xgmii_rx_hi_ber ), .xg_rx_blk_lock_o ( xgmii_rx_blk_lock ), // apb register interface .apb_pclk_i ( i_aclk ), .apb_preset_n_i ( i_arst_n ), .apb_psel_i ( i_pcs_apb_psel ), .apb_penable_i ( i_pcs_apb_penable ), .apb_paddr_i ( i_pcs_apb_paddr [15:0] ), .apb_pwdata_i ( i_pcs_apb_pwdata [15:0] ), .apb_pwrite_i ( i_pcs_apb_pwrite ), .apb_prdata_o ( o_pcs_apb_prdata [15:0] ), .apb_pready_o ( o_pcs_apb_pready ) ); end endgenerate assign o_pcs_apb_pserr = 1'b0; assign o_pcs_rxval = xgmii_rxval; assign o_pcs_txrdy = xgmii_txrdy; assign o_usr_clk_rdy = xgmii_txrdy;ifdef SIMULATION assign sys_ready = 1'b1;else
always_ff @(posedge i_pclk) begin
if (!i_pcs_rst_n) begin
sys_ready <= 1'b0;
end else begin
sys_ready <= xgmii_rxval && xgmii_txrdy && xgmii_rx_blk_lock;
end
end
`endifassign o_axis_tx_tready = w_axis_tx_tready && sys_ready; assign w_axis_tx_tvalid = i_axis_tx_tvalid && sys_ready;endmodule
4、总结
eth_10gb_top 是 FPGA 实现 10G 以太网 SFP+ 光口通信 的完整顶层模块。它解决了三个关键问题:
- **解决了 "FPGA 直驱 SFP+ 光口" 难题:**不用外接 10G PHY 芯片,FPGA 内部 MAC + PCS + SERDES 直接驱动 SFP。
- **解决了 "高速数据远距离传输" 问题:**10Gbps 带宽 = 一秒传 1GB 数据
- 低延迟(微秒级)。