【存算芯片】存算阵列模型和wavedrom

文章目录

在北京大学老师(https://bonany.cc/)的网页课堂上学习了模拟阵列verilog模型(https://bonany.gitlab.io/pis/)。

此模型结合了传统存储器功能和存内计算(Computing-in-Memory, CIM)功能。

阵列模型

核心代码
verilog 复制代码
//==================================================================================================
//  Filename      : Basic_GeMM_CIM.v
//  Created On    : 2022-10-07 08:17:17
//  Last Modified : 2022-12-27 11:05:50
//  Revision      : 
//  Author        : Bonan Yan
//  Company       : Peking University
//  Email         : bonanyan@pku.edu.cn
//
//  Description   : 
//
//
//==================================================================================================
module Basic_GeMM_CIM (
//output
      q,
      cim_out0,
      cim_out1,
      cim_out2,
      cim_out3,
      cim_out4,
      cim_out5,
      cim_out6,
      cim_out7,
//input
      clk,
      a,
      cs,
      web,
      cimeb,
      d,
      cim_in0,
      cim_in1,
      cim_in2,
      cim_in3
);

//Need to define the address map first and 
//change all of the following parameter accordingly
parameter 
	DATA_WIDTH = 8, // unit: bit
	ADDR_WIDTH = 10, // unit: bit

	ADC_PRECISION = 6, // unit: bit
	CIM_INPUT_PRECISION = 4, // unit: bit
	CIM_INPUT_PARALLELISM = 4, // unit: 1 (quantity)
	CIM_OUTPUT_PARALLELISM = 8; // unit: 1 (quantity)

//--------------Generated Parameters----------------------- 
parameter 
	RAM_DEPTH = 1 << ADDR_WIDTH;


//--------------Input Ports----------------------- 
input                  clk; //clk input
input [ADDR_WIDTH-1:0] a; //address, both effective in memory mode & CIM mode
input                  cs; //overall enable, chip select
input                  web; //write enable, low active
input                  cimeb; //CIM enable, low active
input [DATA_WIDTH-1:0] d; //memory mode input data
input [CIM_INPUT_PRECISION-1:0] cim_in0, cim_in1, cim_in2, cim_in3;


//--------------Output Ports----------------------- 
output reg [DATA_WIDTH-1:0] q; //memory mode output data
output [ADC_PRECISION-1:0] cim_out0, cim_out1, cim_out2, cim_out3, cim_out4, cim_out5, cim_out6, cim_out7;
reg [ADC_PRECISION-1:0] cim_out0_tmp, cim_out1_tmp, cim_out2_tmp, cim_out3_tmp, cim_out4_tmp, cim_out5_tmp, cim_out6_tmp, cim_out7_tmp;


//--------------Internal variables---------------- 

reg [DATA_WIDTH-1:0] mem [0:RAM_DEPTH-1];

//--------------Code Starts Here------------------ 

// Memory Write Block 
// Write Operation : When we = 1, cs = 1
always @ (posedge clk)
begin : MEM_WRITE
   if ( cs && !web ) begin
       mem[a] = d;
   end
end

// Memory Read Block 
// Read Operation : When we = 0, oe = 1, cs = 1
always @ (posedge clk)
begin : MEM_READ
  if (cs && web) begin
    if(cimeb) begin
      q = mem[a];
    end else begin
      // enter cim mode
      cim_out0_tmp  = cim_in0 * mem[{2'b00,a[7:5],3'd0,a[1:0]}] 
                + cim_in1 * mem[{2'b01,a[7:5],3'd0,a[1:0]}] 
                + cim_in2 * mem[{2'b10,a[7:5],3'd0,a[1:0]}] 
                + cim_in3 * mem[{2'b11,a[7:5],3'd0,a[1:0]}];
                
      cim_out1_tmp  = cim_in0 * mem[{2'b00,a[7:5],3'd1,a[1:0]}] 
                + cim_in1 * mem[{2'b01,a[7:5],3'd1,a[1:0]}] 
                + cim_in2 * mem[{2'b10,a[7:5],3'd1,a[1:0]}] 
                + cim_in3 * mem[{2'b11,a[7:5],3'd1,a[1:0]}];
                
      cim_out2_tmp  = cim_in0 * mem[{2'b00,a[7:5],3'd2,a[1:0]}] 
                + cim_in1 * mem[{2'b01,a[7:5],3'd2,a[1:0]}] 
                + cim_in2 * mem[{2'b10,a[7:5],3'd2,a[1:0]}] 
                + cim_in3 * mem[{2'b11,a[7:5],3'd2,a[1:0]}];
                
      cim_out3_tmp  = cim_in0 * mem[{2'b00,a[7:5],3'd3,a[1:0]}] 
                + cim_in1 * mem[{2'b01,a[7:5],3'd3,a[1:0]}] 
                + cim_in2 * mem[{2'b10,a[7:5],3'd3,a[1:0]}] 
                + cim_in3 * mem[{2'b11,a[7:5],3'd3,a[1:0]}];
                
      cim_out4_tmp  = cim_in0 * mem[{2'b00,a[7:5],3'd4,a[1:0]}] 
                + cim_in1 * mem[{2'b01,a[7:5],3'd4,a[1:0]}] 
                + cim_in2 * mem[{2'b10,a[7:5],3'd4,a[1:0]}] 
                + cim_in3 * mem[{2'b11,a[7:5],3'd4,a[1:0]}];
                
      cim_out5_tmp  = cim_in0 * mem[{2'b00,a[7:5],3'd5,a[1:0]}] 
                + cim_in1 * mem[{2'b01,a[7:5],3'd5,a[1:0]}] 
                + cim_in2 * mem[{2'b10,a[7:5],3'd5,a[1:0]}] 
                + cim_in3 * mem[{2'b11,a[7:5],3'd5,a[1:0]}];
                
      cim_out6_tmp  = cim_in0 * mem[{2'b00,a[7:5],3'd6,a[1:0]}] 
                + cim_in1 * mem[{2'b01,a[7:5],3'd6,a[1:0]}] 
                + cim_in2 * mem[{2'b10,a[7:5],3'd6,a[1:0]}] 
                + cim_in3 * mem[{2'b11,a[7:5],3'd6,a[1:0]}];
                
      cim_out7_tmp  = cim_in0 * mem[{2'b00,a[7:5],3'd7,a[1:0]}] 
                + cim_in1 * mem[{2'b01,a[7:5],3'd7,a[1:0]}] 
                + cim_in2 * mem[{2'b10,a[7:5],3'd7,a[1:0]}] 
                + cim_in3 * mem[{2'b11,a[7:5],3'd7,a[1:0]}];
    end
  end
end

assign cim_out0 = cim_out0_tmp[13:13-ADC_PRECISION+1];
assign cim_out1 = cim_out1_tmp[13:13-ADC_PRECISION+1];
assign cim_out2 = cim_out2_tmp[13:13-ADC_PRECISION+1];
assign cim_out3 = cim_out3_tmp[13:13-ADC_PRECISION+1];
assign cim_out4 = cim_out4_tmp[13:13-ADC_PRECISION+1];
assign cim_out5 = cim_out5_tmp[13:13-ADC_PRECISION+1];
assign cim_out6 = cim_out6_tmp[13:13-ADC_PRECISION+1];
assign cim_out7 = cim_out7_tmp[13:13-ADC_PRECISION+1];

endmodule // Basic_GeMM_CIM
工作模式
  1. 传统存储模式
    当 cimeb = 1(高电平)时:

写操作:cs=1 & web=0,将数据d写入地址a

读操作:cs=1 & web=1,从地址a读出数据到q

  1. CIM模式
    当 cimeb = 0(低电平)且 cs=1 & web=1 时:执行 4×8 矩阵乘法
存储布局

mem[地址] = {bank_select[1:0], a[7:5], column[2:0], a[1:0]}

  • bank_select[1:0] → 选择输入通道(0~3对应4个输入)
  • a[7:5] → 选择行(共8行)
  • a[1:0] → 选择权重组内的偏置
  • column[2:0] → 选择输出列(0~7对应8个输出)
输出精度

乘法:4位输入 × 8位权重 = 12位中间结果

累加:4个12位数相加 → 最大14位结果

精度截取:从14位中取高6位作为最终输出([13:8])

权值格式

对于地址 a = {2'b00, row[2:0], col[2:0], bias[1:0]}:

  • 权重矩阵按列存储,每列4个权重

  • 8个输出通道对应8列

  • 每个权重是8位精度

Wavedrom波形图

方法一(本人执行时存在问题):参考链接:https://zhuanlan.zhihu.com/p/6522936430

VScode + drawio Integration插件+drawio wavedrom插件

但是最后无法拖动?

方法二:使用Waveform render插件也可画图

使用说明

p - 脉冲(高低电平)

n - 负脉冲(低高电平)

0 - 低电平

1 - 高电平

. - 保持之前状态

x - 不定态

z - 高阻态

= - 数据总线值

示例

给出json文件的代码如下:

javascript 复制代码
{
  "signal": [
    { "name": "clk", "wave": "p...p...p...p..." },
    { "name": "cs",  "wave": "0.1..1...1...1.." },
    { "name": "web", "wave": "1.0..1...1...0.." },
    { "name": "cimeb", "wave": "1.......1......" },
    { "name": "a",   "wave": "=.=...=...=...=.", "data": ["A0", "A1", "A2", "A3"] },
    { "name": "d",   "wave": "x.=...x...x...=.", "data": ["D0", "", "", "D3"] },
    { "name": "q",   "wave": "x...=.x...=...x.", "data": ["Q0", "", "Q2", ""] }
  ],
  "head": {
    "text": "传统存储模式 - 读写操作时序",
    "tick": 0
  }
}
javascript 复制代码
{
  "signal": [
    { "name": "clk", "wave": "p...p...p...p...p...p..." },
    { "name": "cs",  "wave": "0.1..1...1...1...1...1.." },
    { "name": "web", "wave": "1.0..1...1...1...0..1.." },
    { "name": "cimeb", "wave": "1...........0..........." },
    { "name": "a",   "wave": "=.=...=...=...=...=...=.", 
      "data": ["WrAddr", "RdAddr", "CIMAddr1", "CIMAddr2", "WrAddr2", "RdAddr2"] },
    { "name": "d",   "wave": "x.=...x...x...x...=...x.", 
      "data": ["WriteData", "", "", "", "WriteData2", ""] },
    { "name": "q",   "wave": "x...=.x...x...x...x...=.", 
      "data": ["ReadData", "", "", "", "", "ReadData2"] },
    {},
    { "name": "cim_out", "wave": "x.......=...=...x.....", 
      "data": ["", "CIM_Result1", "CIM_Result2", ""] }
  ],
  "config": { "hscale": 2 },
  "head": {
    "text": "模式切换:存储模式 ↔ CIM模式",
    "tick": 0
  }
}
javascript 复制代码
{
  "signal": [
    { "name": "clk", "wave": "p..p..p..p.." },
    { "name": "cimeb", "wave": "1...0.....1." },
    { "name": "CIM计算", "wave": "0..1........", "period": 2 }
  ],
  "foot": {
    "text": "Tsetup: 2ns, Thold: 1ns, Tcalc: 3周期",
    "tick": 0
  }
}
javascript 复制代码
{
  "signal": [
    { "name": "Control", "wave": "1..0..", "node": ".a..b." },
    {},
    { "name": "Input Vector", "wave": "x..=..", 
      "data": ["[X0,X1,X2,X3]"], "node": "...c" },
    { "name": "Weight Address", "wave": "x.=...", 
      "data": ["Row_Select"], "node": "..d" },
    { "name": "Output Vector", "wave": "x...=.", 
      "data": ["[O0...O7]"], "node": "....e" }
  ],
  "edge": [
    "a~>d 地址解码",
    "c~>d 输入锁存",
    "d~>e 计算延迟"
  ]
}
相关推荐
求真求知的糖葫芦14 小时前
巴伦学习(一)一种新型补偿传输线巴伦论文学习笔记(自用)
笔记·学习·射频工程
GLDbalala15 小时前
GPU PRO 4 - 5.3 A Pipeline for Authored Structural Damage 笔记
笔记
三伏52216 小时前
Cortex-M3重启流程——笔记
笔记·cortex-m3
方见华Richard17 小时前
方见华:在递归的暗夜里,把自己活成一束光
人工智能·经验分享·笔记·学习方法·空间计算
zzcufo17 小时前
多邻国学习笔记第五阶段第10-11部分
笔记·学习·c#
BlackWolfSky17 小时前
鸿蒙中级课程笔记2—状态管理V2—@ObservedV2装饰器和@Trace装饰器:类属性变化观测
笔记·华为·harmonyos
航Hang*17 小时前
计算机等级考试(二级WPS)---第1章:综合应用基础---第2节:PDF文件应用
笔记·学习·pdf·wps·计算机二级·计算机等级考试
zhangrelay17 小时前
Linux(ubuntu)如何锁定cpu频率工作在最低能耗模式下
linux·笔记·学习
三伏52217 小时前
Cortex-M3权威指南Cn第四、五章——笔记
笔记·cortex-m3
轴测君18 小时前
MobileNet V1
人工智能·pytorch·笔记