【Verilog】基于Verilog的DDR控制器的简单实现(一)——初始化

在FPGA中,大规模数据的存储常常会用到DDR。为了方便用户使用,Xilinx提供了DDR MIG IP核,用户能够通过AXI接口进行DDR的读写访问,然而MIG内部自动实现了许多环节,不利于用户深入理解DDR的底层逻辑。

本文以美光(Micron)公司生产的DDR3芯片MT41J512M8RH-093为例,说明DDR芯片的操作过程。

该芯片的datasheet可以从厂商官网下载得到:(https://www.micron.com/-/media/client/global/documents/products/data-sheet/dram/ddr3/4gb_ddr3l.pdf?rev=305217e2f9bd4ef48d7c6f353dfc064c),这个datasheet包含了Micron公司多款DDR芯片,这里MT41J512M8RH-093芯片对应数据位宽×8,总容量4G(512M×8),频率2133(-093)的产品,在表格中需要注意区分,不同产品在时序参数上会有所区别。

DDR芯片的使用关键在于令接口的信号变化满足时序要求,在初始化过程中主要关注下面几个时序参数(来自P33 Table 9: Timing Parameters Used for IDD Measurements -- Clock Units与P96 Table 59: Electrical Characteristics and AC Operating Conditions for Speed Extensions (Continued))

复制代码
* CK(MIN)   0.938 ns
* CL        14 CK
* RCD(MIN)  14 CK
* RC(MIN)   50 CK
* RAS(MIN)  36 CK
* RP(MIN)   14 CK
* FAW       27 CK
* RRD       6  CK
* RFC       279CK
* XPR       > max(5CK, RFC+10ns)
* MRD       > 4 CK
* MOD       > max(12 CK, 15ns)
* ZQinit    < max(512nCK, 640ns)
* DLLK      > 512 CK

从datasheet的P12页的Fig2. Simplified State Diagram可以看到,DDR3芯片在上电(Power applied)后需要经过一系列的初始化步骤(主要包含三个部分Reset Procedure、Initialization、ZQ Calibration),之后进入正常工作状态(idle)。

上图中Command由多个信号的变化构成,在初始化过程主要用到以下几个指令。(来自P118 Table 70: Truth Table -- Command)

复制代码
* COMMAND     | NOP | MRS_1 | MRS_2 | MRS_3 | MRS_4 | ZQCL             
* ddr_cke     | 1 1 |  1 1  |  1 1  |  1 1  |  1 1  |  1 1   
* ddr_dqs_en  |  0  |   0   |   0   |   0   |   0   |   0    
* ddr_dq_en   |  0  |   0   |   0   |   0   |   0   |   0     
* ddr_cs_n    |  0  |   0   |   0   |   0   |   0   |   0     
* ddr_ras_n   |  1  |   0   |   0   |   0   |   0   |   1    
* ddr_cas_n   |  1  |   0   |   0   |   0   |   0   |   1    
* ddr_we_n    |  1  |   0   |   0   |   0   |   0   |   0    
* ddr_ba      | vvv |  010  |  011  |  001  |  000  |  000   
* ddr_addr    | vvv |   28  |   00  |   44  |  124  | a[10]=1      
* ddr_odt     |  0  |   0   |   0   |   0   |   0   |   0   

这里将时钟周期取为最小时钟周期0.938ns,对应时钟频率1066.099MHz。经过计算,Initialization阶段每条指令执行后的等待时间均不超过1us,因此这里将Initialization阶段每条指令执行后的等待时间均简化1us,最终得到的DDR3初始化代码如下:

c 复制代码
`timescale 1ns / 1ps
//
// Company: 
// Engineer: wjh776a68
// 
// Create Date: 01/05/2024 09:45:15 AM
// Design Name: micron_ddr
// Module Name: micron_ddr_init
// Project Name: micron_ddr
// Target Devices: vu9p
// Tool Versions: 2017.4
// Description: 
// 
// Dependencies: 
// 
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
// 
//

// ddr3 x8 4Gb MT41J512M8RH-093
/****************************
*  DDR3L-2133 https://www.micron.com/-/media/client/global/documents/products/data-sheet/dram/ddr3/4gb_ddr3l.pdf?rev=305217e2f9bd4ef48d7c6f353dfc064c
* CK(MIN) 0.938 ns
* CL        14 CK
* RCD(MIN)  14 CK
* RC(MIN)   50 CK
* RAS(MIN)  36 CK
* RP(MIN)   14 CK
* FAW       27 CK
* RRD       6  CK
* RFC       279CK
* XPR       >max(5CK, RFC+10ns)
* MRD       >4CK
* MOD       >max(12CK, 15ns)
* ZQinit    <max(512nCK, 640ns)
* DLLK      >512CK
* command all p118
* initial waveform p137
*
* COMMAND     | NOP | MRS_1 | MRS_2 | MRS_3 | MRS_4 | ZQCL             
* ddr_cke     | 1 1 |  1 1  |  1 1  |  1 1  |  1 1  |  1 1   
* ddr_dqs_en  |  0  |       |       |       |       |        
* ddr_dq_en   |  0  |       |       |       |       |         
* ddr_cs_n    |  0  |   0   |   0   |   0   |   0   |   0     
* ddr_ras_n   |  1  |   0   |   0   |   0   |   0   |   1    
* ddr_cas_n   |  1  |   0   |   0   |   0   |   0   |   1    
* ddr_we_n    |  1  |   0   |   0   |   0   |   0   |   0    
* ddr_ba      | vvv |  010  |  011  |  001  |  000  |         
* ddr_addr    | vvv |   28  |   00  |   44  |  124  |  a[10]      
* ddr_odt     |  0  |       |       |       |       |        
***************************************************************/
module micron_ddr_init #(
    parameter CLK_FREQ = 1066.099, // MHz
    parameter _1MS_CYCLE = 10.0**-3 / (1.0 / (CLK_FREQ * 10**6)),
    parameter _1US_CYCLE = 10.0**-6 / (1.0 / (CLK_FREQ * 10**6)),
    parameter integer INITIAL_CYCLE = 200 * _1US_CYCLE,
    parameter integer INITIAL_STABLE_CYCLE = 500 * _1US_CYCLE,
    parameter integer FREE_CYCLE = _1US_CYCLE
) (
    output  reg [15:0]  ddr_addr,
    output  reg [2:0]   ddr_ba,
    output  reg         ddr_cas_n,
    output  reg [0:0]   ddr_ck_n,
    output  reg [0:0]   ddr_ck_p,
    output  reg [0:0]   ddr_cke,
    output  reg [0:0]   ddr_cs_n,
    output  reg [0:0]   ddr_dm,
    inout       [7:0]   ddr_dq,
    inout       [0:0]   ddr_dqs_n,
    inout       [0:0]   ddr_dqs_p,
    output  reg [0:0]   ddr_odt,
    output  reg         ddr_ras_n,
    output  reg         ddr_reset_n,
    output  reg         ddr_we_n,

    input clk

);

localparam [27:0] NOP_CMD =  {11'b11000111000, 16'h0000, 1'b0};
localparam [27:0] MRS1_CMD = {11'b11000000010, 16'h0028, 1'b0};
localparam [27:0] MRS2_CMD = {11'b11000000011, 16'h0000, 1'b0};
localparam [27:0] MRS3_CMD = {11'b11000000001, 16'h0044, 1'b0};
localparam [27:0] MRS4_CMD = {11'b11000000000, 16'h0124, 1'b0};
localparam [27:0] ZQCL_CMD = {11'b11000110000, 16'h0400, 1'b0};

reg ddr_cke_p1, ddr_cke_p2;
reg ddr_dqs_i, ddr_dqs_o, ddr_dqs_en;
reg ddr_dq_i, ddr_dq_o, ddr_dq_en;

    OBUFDS OBUFDS_ck (
      .O(ddr_ck_p),   // 1-bit output: Diff_p output (connect directly to top-level port)
      .OB(ddr_ck_n), // 1-bit output: Diff_n output (connect directly to top-level port)
      .I(clk)    // 1-bit input: Buffer input
   );
   
   IOBUFDS #(
      .DQS_BIAS("FALSE")  // (FALSE, TRUE)
   )
   IOBUFDS_dqs_inst (
      .O(ddr_dqs_o),     // 1-bit output: Buffer output
      .I(ddr_dqs_i),     // 1-bit input: Buffer input
      .IO(ddr_dqs_p),   // 1-bit inout: Diff_p inout (connect directly to top-level port)
      .IOB(ddr_dqs_n), // 1-bit inout: Diff_n inout (connect directly to top-level port)
      .T(ddr_dqs_en)      // 1-bit input: 3-state enable input
   );
   
   IOBUF IOBUF_dq_inst (
     .O(ddr_dq_o),   // 1-bit output: Buffer output
     .I(ddr_dq_i),   // 1-bit input: Buffer input
     .IO(ddr_dq), // 1-bit inout: Buffer inout (connect directly to top-level port)
     .T(ddr_dq_en)    // 1-bit input: 3-state enable input
  );
  
  ODDRE1 #(
      .IS_C_INVERTED(1'b1),  // Optional inversion for C
      .IS_D1_INVERTED(1'b0), // Unsupported, do not use
      .IS_D2_INVERTED(1'b0), // Unsupported, do not use
      .SRVAL(1'b0)           // Initializes the ODDRE1 Flip-Flops to the specified value (1'b0, 1'b1)
   )
   ODDRE1_cke_inst (
      .Q(ddr_cke),   // 1-bit output: Data output to IOB
      .C(clk),   // 1-bit input: High-speed clock input
      .D1(ddr_cke_p1), // 1-bit input: Parallel data input 1
      .D2(ddr_cke_p2), // 1-bit input: Parallel data input 2
      .SR(1'b0)  // 1-bit input: Active High Async Reset
   );

//OBUFDS OBUFDS_dqs (
//      .O(ddr_dqs_p),   // 1-bit output: Diff_p output (connect directly to top-level port)
//      .OB(ddr_dqs_n), // 1-bit output: Diff_n output (connect directly to top-level port)
//      .I(ddr_dqs)    // 1-bit input: Buffer input
//   );

    reg [15:0]  ddr_addr_r;
    reg [2:0]   ddr_ba_r;
    reg         ddr_cas_n_r;
    reg [0:0]   ddr_cs_n_r;
    reg [0:0]   ddr_dm_r; // no ref
    reg [0:0]   ddr_odt_r;
    reg         ddr_ras_n_r;
    reg         ddr_we_n_r;
    reg         ddr_dq_en_r; 
    reg         ddr_dqs_en_r; 
    
   initial begin
       {ddr_cke_p2, ddr_cke_p1, ddr_dqs_en, ddr_dq_en, ddr_cs_n, ddr_ras_n, ddr_cas_n, ddr_we_n, ddr_ba, ddr_addr, ddr_odt} <= NOP_CMD;
       {ddr_cke_p2, ddr_cke_p1, ddr_dqs_en_r, ddr_dq_en_r, ddr_cs_n_r, ddr_ras_n_r, ddr_cas_n_r, ddr_we_n_r, ddr_ba_r, ddr_addr_r, ddr_odt_r} <= NOP_CMD;
   end

    always @(negedge clk) begin
        ddr_addr  <=  ddr_addr_r  ;
        ddr_ba      <=  ddr_ba_r    ;
        ddr_cas_n  <=  ddr_cas_n_r ;
        ddr_cs_n  <=  ddr_cs_n_r  ;
        ddr_dm      <=  ddr_dm_r    ;
        ddr_odt      <=  ddr_odt_r   ;
        ddr_ras_n  <=  ddr_ras_n_r ;
        ddr_we_n  <=  ddr_we_n_r  ;
        ddr_dq_en   <=  ddr_dq_en_r;
        ddr_dqs_en  <=  ddr_dqs_en_r;

    end


   reg [5:0] cs = 0, ns;
   reg [31:0]  initial_cnt = 0;
   reg [31:0]  initial_stable_cnt = 0;
   reg [31:0]  freerun_cnt = 0;
   reg [3:0]   initial_cmd_ptr = 0;
   reg [27:0] initial_cmd_seq[0:5] = '{NOP_CMD, MRS1_CMD, MRS2_CMD, MRS3_CMD, MRS4_CMD, ZQCL_CMD};
   reg initial_finish = 0;

   always @(negedge clk) begin
       cs <= ns;
   end

   always @(*) begin
       case (cs)
       0: begin
           if (initial_cnt == INITIAL_CYCLE) begin // wait 200us
               ns = 1;
           end else begin
               ns = 0;
           end
       end
       1: begin // wait 500us 
           if (initial_stable_cnt == INITIAL_STABLE_CYCLE) begin // wait 500us
               ns = 2;
           end else begin
               ns = 1;
           end
       end
       2: begin
           ns = 3;
       end
       3: begin
           if (freerun_cnt == FREE_CYCLE) begin
               if (initial_finish) begin
                   ns = 4;
               end else begin
                   ns = 2;
               end
           end else begin
               ns = 3;
           end
       end
       4: begin
           // finish initial, enter idle state
       end
       default: begin
           ns = 0;
       end
       endcase
   end

   always @(negedge clk) begin
       case (ns)
       0: begin
           initial_cnt <= initial_cnt + 1;
       end
       default: begin
           initial_cnt <= 0;
       end
       endcase
   end

   always @(negedge clk) begin
       case (ns)
       1: begin
           initial_stable_cnt <= initial_stable_cnt + 1;
       end
       default: begin
           initial_stable_cnt <= 0;
       end
       endcase
   end

   always @(negedge clk) begin
       case (ns)
       3: begin
           freerun_cnt <= freerun_cnt + 1;
       end
       default: begin
           freerun_cnt <= 0;
       end
       endcase
   end

   always @(negedge clk) begin
       case (ns)
       2: begin
           if (initial_cmd_ptr == 6 - 1) begin
               initial_finish <= 1;
           end else begin
               initial_finish <= 0;
           end
           initial_cmd_ptr <= initial_cmd_ptr + 1;
       end
       endcase
   end


   always @(negedge clk) begin
       case (ns)
       0: begin
           ddr_reset_n <= 1'b0;
           {ddr_cke_p2, ddr_cke_p1}    <= 2'b0;
           ddr_dqs_en_r <= 1'b0;
           ddr_dq_en_r  <= 1'b0;
       end
       1: begin
           ddr_reset_n <= 1'b1;
           {ddr_cke_p2, ddr_cke_p1}    <= 2'b0;
           ddr_dqs_en_r <= 1'b0;
           ddr_dq_en_r <= 1'b0;
       end
       2: begin
           {ddr_cke_p2, ddr_cke_p1, ddr_dqs_en_r, ddr_dq_en_r, ddr_cs_n_r, ddr_ras_n_r, ddr_cas_n_r, ddr_we_n_r, ddr_ba_r, ddr_addr_r, ddr_odt_r} <= initial_cmd_seq[initial_cmd_ptr];
       end
       3: begin
           {ddr_cke_p2, ddr_cke_p1, ddr_dqs_en_r, ddr_dq_en_r, ddr_cs_n_r, ddr_ras_n_r, ddr_cas_n_r, ddr_we_n_r, ddr_ba_r, ddr_addr_r, ddr_odt_r} <= NOP_CMD;
       end
       default: begin
           ddr_reset_n <= 1'b1;
       end
       endcase
   end


endmodule

上述代码已在Vivado 2017.4中进行了仿真测试,可替换ddr示例工程中的example_top自行仿真。

相关推荐
黄埔数据分析1 小时前
RecoNIC 入门:SmartNIC 上支持 RDMA 的计算卸载-FPGA-智能网卡-AMD-Xilinx
fpga开发
nanxl13 小时前
FPGA-数字时钟
fpga开发·verilog·vivado
尤老师FPGA16 小时前
LVDS系列9:Xilinx 7系可编程输入延迟(二)
单片机·嵌入式硬件·fpga开发
内有小猪卖20 小时前
时序约束 记录
fpga开发
Cao1234567893211 天前
FPGA时钟设计
fpga开发
JNTeresa1 天前
锁存器知识点详解
fpga开发
Cao1234567893211 天前
FPGA基础之基础语法
fpga开发
一大Cpp1 天前
通过Quartus II实现Nios II编程
fpga开发
7yewh1 天前
Verilog 语法 (二)
fpga开发
边缘计算社区2 天前
FPGA与边缘AI:计算革命的前沿力量
人工智能·fpga开发