【存算芯片】存算阵列模型和wavedrom

文章目录

在北京大学老师(https://bonany.cc/)的网页课堂上学习了模拟阵列verilog模型(https://bonany.gitlab.io/pis/)。

此模型结合了传统存储器功能和存内计算(Computing-in-Memory, CIM)功能。

阵列模型

核心代码
verilog 复制代码
//==================================================================================================
//  Filename      : Basic_GeMM_CIM.v
//  Created On    : 2022-10-07 08:17:17
//  Last Modified : 2022-12-27 11:05:50
//  Revision      : 
//  Author        : Bonan Yan
//  Company       : Peking University
//  Email         : bonanyan@pku.edu.cn
//
//  Description   : 
//
//
//==================================================================================================
module Basic_GeMM_CIM (
//output
      q,
      cim_out0,
      cim_out1,
      cim_out2,
      cim_out3,
      cim_out4,
      cim_out5,
      cim_out6,
      cim_out7,
//input
      clk,
      a,
      cs,
      web,
      cimeb,
      d,
      cim_in0,
      cim_in1,
      cim_in2,
      cim_in3
);

//Need to define the address map first and 
//change all of the following parameter accordingly
parameter 
	DATA_WIDTH = 8, // unit: bit
	ADDR_WIDTH = 10, // unit: bit

	ADC_PRECISION = 6, // unit: bit
	CIM_INPUT_PRECISION = 4, // unit: bit
	CIM_INPUT_PARALLELISM = 4, // unit: 1 (quantity)
	CIM_OUTPUT_PARALLELISM = 8; // unit: 1 (quantity)

//--------------Generated Parameters----------------------- 
parameter 
	RAM_DEPTH = 1 << ADDR_WIDTH;


//--------------Input Ports----------------------- 
input                  clk; //clk input
input [ADDR_WIDTH-1:0] a; //address, both effective in memory mode & CIM mode
input                  cs; //overall enable, chip select
input                  web; //write enable, low active
input                  cimeb; //CIM enable, low active
input [DATA_WIDTH-1:0] d; //memory mode input data
input [CIM_INPUT_PRECISION-1:0] cim_in0, cim_in1, cim_in2, cim_in3;


//--------------Output Ports----------------------- 
output reg [DATA_WIDTH-1:0] q; //memory mode output data
output [ADC_PRECISION-1:0] cim_out0, cim_out1, cim_out2, cim_out3, cim_out4, cim_out5, cim_out6, cim_out7;
reg [ADC_PRECISION-1:0] cim_out0_tmp, cim_out1_tmp, cim_out2_tmp, cim_out3_tmp, cim_out4_tmp, cim_out5_tmp, cim_out6_tmp, cim_out7_tmp;


//--------------Internal variables---------------- 

reg [DATA_WIDTH-1:0] mem [0:RAM_DEPTH-1];

//--------------Code Starts Here------------------ 

// Memory Write Block 
// Write Operation : When we = 1, cs = 1
always @ (posedge clk)
begin : MEM_WRITE
   if ( cs && !web ) begin
       mem[a] = d;
   end
end

// Memory Read Block 
// Read Operation : When we = 0, oe = 1, cs = 1
always @ (posedge clk)
begin : MEM_READ
  if (cs && web) begin
    if(cimeb) begin
      q = mem[a];
    end else begin
      // enter cim mode
      cim_out0_tmp  = cim_in0 * mem[{2'b00,a[7:5],3'd0,a[1:0]}] 
                + cim_in1 * mem[{2'b01,a[7:5],3'd0,a[1:0]}] 
                + cim_in2 * mem[{2'b10,a[7:5],3'd0,a[1:0]}] 
                + cim_in3 * mem[{2'b11,a[7:5],3'd0,a[1:0]}];
                
      cim_out1_tmp  = cim_in0 * mem[{2'b00,a[7:5],3'd1,a[1:0]}] 
                + cim_in1 * mem[{2'b01,a[7:5],3'd1,a[1:0]}] 
                + cim_in2 * mem[{2'b10,a[7:5],3'd1,a[1:0]}] 
                + cim_in3 * mem[{2'b11,a[7:5],3'd1,a[1:0]}];
                
      cim_out2_tmp  = cim_in0 * mem[{2'b00,a[7:5],3'd2,a[1:0]}] 
                + cim_in1 * mem[{2'b01,a[7:5],3'd2,a[1:0]}] 
                + cim_in2 * mem[{2'b10,a[7:5],3'd2,a[1:0]}] 
                + cim_in3 * mem[{2'b11,a[7:5],3'd2,a[1:0]}];
                
      cim_out3_tmp  = cim_in0 * mem[{2'b00,a[7:5],3'd3,a[1:0]}] 
                + cim_in1 * mem[{2'b01,a[7:5],3'd3,a[1:0]}] 
                + cim_in2 * mem[{2'b10,a[7:5],3'd3,a[1:0]}] 
                + cim_in3 * mem[{2'b11,a[7:5],3'd3,a[1:0]}];
                
      cim_out4_tmp  = cim_in0 * mem[{2'b00,a[7:5],3'd4,a[1:0]}] 
                + cim_in1 * mem[{2'b01,a[7:5],3'd4,a[1:0]}] 
                + cim_in2 * mem[{2'b10,a[7:5],3'd4,a[1:0]}] 
                + cim_in3 * mem[{2'b11,a[7:5],3'd4,a[1:0]}];
                
      cim_out5_tmp  = cim_in0 * mem[{2'b00,a[7:5],3'd5,a[1:0]}] 
                + cim_in1 * mem[{2'b01,a[7:5],3'd5,a[1:0]}] 
                + cim_in2 * mem[{2'b10,a[7:5],3'd5,a[1:0]}] 
                + cim_in3 * mem[{2'b11,a[7:5],3'd5,a[1:0]}];
                
      cim_out6_tmp  = cim_in0 * mem[{2'b00,a[7:5],3'd6,a[1:0]}] 
                + cim_in1 * mem[{2'b01,a[7:5],3'd6,a[1:0]}] 
                + cim_in2 * mem[{2'b10,a[7:5],3'd6,a[1:0]}] 
                + cim_in3 * mem[{2'b11,a[7:5],3'd6,a[1:0]}];
                
      cim_out7_tmp  = cim_in0 * mem[{2'b00,a[7:5],3'd7,a[1:0]}] 
                + cim_in1 * mem[{2'b01,a[7:5],3'd7,a[1:0]}] 
                + cim_in2 * mem[{2'b10,a[7:5],3'd7,a[1:0]}] 
                + cim_in3 * mem[{2'b11,a[7:5],3'd7,a[1:0]}];
    end
  end
end

assign cim_out0 = cim_out0_tmp[13:13-ADC_PRECISION+1];
assign cim_out1 = cim_out1_tmp[13:13-ADC_PRECISION+1];
assign cim_out2 = cim_out2_tmp[13:13-ADC_PRECISION+1];
assign cim_out3 = cim_out3_tmp[13:13-ADC_PRECISION+1];
assign cim_out4 = cim_out4_tmp[13:13-ADC_PRECISION+1];
assign cim_out5 = cim_out5_tmp[13:13-ADC_PRECISION+1];
assign cim_out6 = cim_out6_tmp[13:13-ADC_PRECISION+1];
assign cim_out7 = cim_out7_tmp[13:13-ADC_PRECISION+1];

endmodule // Basic_GeMM_CIM
工作模式
  1. 传统存储模式
    当 cimeb = 1(高电平)时:

写操作:cs=1 & web=0,将数据d写入地址a

读操作:cs=1 & web=1,从地址a读出数据到q

  1. CIM模式
    当 cimeb = 0(低电平)且 cs=1 & web=1 时:执行 4×8 矩阵乘法
存储布局

mem[地址] = {bank_select[1:0], a[7:5], column[2:0], a[1:0]}

  • bank_select[1:0] → 选择输入通道(0~3对应4个输入)
  • a[7:5] → 选择行(共8行)
  • a[1:0] → 选择权重组内的偏置
  • column[2:0] → 选择输出列(0~7对应8个输出)
输出精度

乘法:4位输入 × 8位权重 = 12位中间结果

累加:4个12位数相加 → 最大14位结果

精度截取:从14位中取高6位作为最终输出([13:8])

权值格式

对于地址 a = {2'b00, row[2:0], col[2:0], bias[1:0]}:

  • 权重矩阵按列存储,每列4个权重

  • 8个输出通道对应8列

  • 每个权重是8位精度

Wavedrom波形图

方法一(本人执行时存在问题):参考链接:https://zhuanlan.zhihu.com/p/6522936430

VScode + drawio Integration插件+drawio wavedrom插件

但是最后无法拖动?

方法二:使用Waveform render插件也可画图

使用说明

p - 脉冲(高低电平)

n - 负脉冲(低高电平)

0 - 低电平

1 - 高电平

. - 保持之前状态

x - 不定态

z - 高阻态

= - 数据总线值

示例

给出json文件的代码如下:

javascript 复制代码
{
  "signal": [
    { "name": "clk", "wave": "p...p...p...p..." },
    { "name": "cs",  "wave": "0.1..1...1...1.." },
    { "name": "web", "wave": "1.0..1...1...0.." },
    { "name": "cimeb", "wave": "1.......1......" },
    { "name": "a",   "wave": "=.=...=...=...=.", "data": ["A0", "A1", "A2", "A3"] },
    { "name": "d",   "wave": "x.=...x...x...=.", "data": ["D0", "", "", "D3"] },
    { "name": "q",   "wave": "x...=.x...=...x.", "data": ["Q0", "", "Q2", ""] }
  ],
  "head": {
    "text": "传统存储模式 - 读写操作时序",
    "tick": 0
  }
}
javascript 复制代码
{
  "signal": [
    { "name": "clk", "wave": "p...p...p...p...p...p..." },
    { "name": "cs",  "wave": "0.1..1...1...1...1...1.." },
    { "name": "web", "wave": "1.0..1...1...1...0..1.." },
    { "name": "cimeb", "wave": "1...........0..........." },
    { "name": "a",   "wave": "=.=...=...=...=...=...=.", 
      "data": ["WrAddr", "RdAddr", "CIMAddr1", "CIMAddr2", "WrAddr2", "RdAddr2"] },
    { "name": "d",   "wave": "x.=...x...x...x...=...x.", 
      "data": ["WriteData", "", "", "", "WriteData2", ""] },
    { "name": "q",   "wave": "x...=.x...x...x...x...=.", 
      "data": ["ReadData", "", "", "", "", "ReadData2"] },
    {},
    { "name": "cim_out", "wave": "x.......=...=...x.....", 
      "data": ["", "CIM_Result1", "CIM_Result2", ""] }
  ],
  "config": { "hscale": 2 },
  "head": {
    "text": "模式切换:存储模式 ↔ CIM模式",
    "tick": 0
  }
}
javascript 复制代码
{
  "signal": [
    { "name": "clk", "wave": "p..p..p..p.." },
    { "name": "cimeb", "wave": "1...0.....1." },
    { "name": "CIM计算", "wave": "0..1........", "period": 2 }
  ],
  "foot": {
    "text": "Tsetup: 2ns, Thold: 1ns, Tcalc: 3周期",
    "tick": 0
  }
}
javascript 复制代码
{
  "signal": [
    { "name": "Control", "wave": "1..0..", "node": ".a..b." },
    {},
    { "name": "Input Vector", "wave": "x..=..", 
      "data": ["[X0,X1,X2,X3]"], "node": "...c" },
    { "name": "Weight Address", "wave": "x.=...", 
      "data": ["Row_Select"], "node": "..d" },
    { "name": "Output Vector", "wave": "x...=.", 
      "data": ["[O0...O7]"], "node": "....e" }
  ],
  "edge": [
    "a~>d 地址解码",
    "c~>d 输入锁存",
    "d~>e 计算延迟"
  ]
}
相关推荐
tq10867 分钟前
先探索,后设计
笔记
hnult13 分钟前
2026 在线培训考试系统选型指南:核心功能拆解与选型逻辑
人工智能·笔记·课程设计
AI视觉网奇25 分钟前
ue 角色驱动衣服 绑定衣服
笔记·学习·ue5
三水不滴1 小时前
计网ping原理
经验分享·笔记·计算机网络
prog_61031 小时前
【笔记】思路分享:各种大模型免费当agent后台
笔记·大语言模型·agent·cursor
凯尔萨厮2 小时前
Maven(Windows下载安装)
笔记·maven
wdfk_prog2 小时前
[Linux]学习笔记系列 -- [drivers][input]serio
linux·笔记·学习
菩提小狗3 小时前
小迪安全2023-2024|第5天:基础入门-反弹SHELL&不回显带外&正反向连接&防火墙出入站&文件下载_笔记|web安全|渗透测试|
笔记·安全·web安全
Wentao Sun3 小时前
致敬软件创业者2026
笔记·程序人生
深蓝海拓4 小时前
PySide6,QCoreApplication::aboutToQuit与QtQore.qAddPostRoutine:退出前后的清理工作
笔记·python·qt·学习·pyqt