systemC示例

main.cpp

#include <memory>

using namespace std;

#include "top.h"

int sc_main(int i, char *av[])

{

// 关闭关于 IEEE 1666 标准中过时特性的警告

sc_report_handler::set_actions("/IEEE_Std_1666/deprecated", SC_DO_NOTHING);

cout << "sc main start at " << sc_time_stamp() << endl;

std::shared_ptr<Top> top = make_shared<Top>("top");

sc_start(200, SC_NS);

cout << "sc main end at " << sc_time_stamp() << endl;

return 0;

}

top.h

#include <systemc.h>

#include "dcu_interface.h"

SC_MODULE(Top)

{

SC_CTOR(Top)

{

dcu_interface_ = std::make_shared<Dcu_Interface>("dcu_Interface");

SC_THREAD(DcuDataHandle);

}

public:

std::shared_ptr<Dcu_Interface> dcu_interface_;

void SetDcuRegValue();

void SetDcuHbusRam();

void SendFeatureMapStartSignal(uint32_t start_flag);

void SendDataHandleStartSignal();

void DcuDataHandle();

};

top.cpp

#include "top.h"

void Top::SetDcuRegValue()

{

uint64_t fin_step_h = 1;

uint64_t fin_parallel_h = (uint64_t)1 << 32;

uint64_t reg_data = fin_step_h + fin_parallel_h;

dcu_interface_->DcuRegWrite(0x0, (uint8_t *)&reg_data, 8);

uint64_t fin_h = 4;

uint64_t fin_w = (uint64_t)4 << 32;

reg_data = fin_h + fin_w;

dcu_interface_->DcuRegWrite(0x8, (uint8_t *)&reg_data, 8);

uint64_t fin_c = 4;

uint64_t kernel_num = (uint64_t)4 << 32;

reg_data = fin_c + kernel_num;

dcu_interface_->DcuRegWrite(0x10, (uint8_t *)&reg_data, 8);

uint64_t kernel_size = 64;

uint64_t weight_buf_load_en = (uint64_t)1 << 32;

reg_data = kernel_size + weight_buf_load_en;

dcu_interface_->DcuRegWrite(0x18, (uint8_t *)&reg_data, 8);

uint64_t bias_buf_load_en = 1;

uint64_t scale_buf_load_en = (uint64_t)1 << 32;

reg_data = bias_buf_load_en + scale_buf_load_en;

dcu_interface_->DcuRegWrite(0x20, (uint8_t *)&reg_data, 8);

uint64_t weight_addr = 0x10000;

uint64_t bias_addr = (uint64_t)0x20000 << 32;

reg_data = weight_addr + bias_addr;

dcu_interface_->DcuRegWrite(0x28, (uint8_t *)&reg_data, 8);

uint64_t scale_addr = 0x30000;

uint64_t fin_bit_mode = (uint64_t)1 << 32;

reg_data = scale_addr + fin_bit_mode;

dcu_interface_->DcuRegWrite(0x30, (uint8_t *)&reg_data, 8);

uint64_t fin_sign = 1;

uint64_t dcu_en = (uint64_t)1 << 32;

reg_data = fin_sign + dcu_en;

dcu_interface_->DcuRegWrite(0x38, (uint8_t *)&reg_data, 8, 1);

}

void Top::SetDcuHbusRam()

{

std::array<uint8_t, 256> weight;

std::fill(weight.begin(), weight.end(), 1);

dcu_interface_->HbusMemWrite(0x10000, weight.data(), 256);

std::array<int16_t, 4> bias;

std::fill(bias.begin(), bias.end(), 50);

dcu_interface_->HbusMemWrite(0x20000, (uint8_t *)bias.data(), 8);

std::array<uint8_t, 4> scale;

std::fill(scale.begin(), scale.end(), 2);

dcu_interface_->HbusMemWrite(0x30000, scale.data(), 4, 1);

}

void Top::DcuDataHandle()

{

while (1)

{

SetDcuRegValue();

SetDcuHbusRam();

wait(30, SC_NS);

SendFeatureMapStartSignal(1);

wait(5, SC_NS);

SendFeatureMapStartSignal(0);

wait(5, SC_NS);

SendFeatureMapStartSignal(1);

wait(5, SC_NS);

SendFeatureMapStartSignal(0);

wait(5, SC_NS);

SendFeatureMapStartSignal(1);

wait(5, SC_NS);

SendFeatureMapStartSignal(0);

wait(10, SC_NS);

SendDataHandleStartSignal();

wait(90, SC_NS);

}

}

void Top::SendFeatureMapStartSignal(uint32_t start_flag)

{

dcu_interface_->array_->feature_map_write_flag_.write(start_flag);

}

void Top::SendDataHandleStartSignal()

{

dcu_interface_->dcu_->dcu_start_event_.notify();

}

dcu_interface.h

#pragma once

#include <systemc.h>

#include <map>

#include <vector>

#include "array.h"

#include "dcu.h"

#include "mem_interface.h"

#include "post.h"

#include "reg_read_interface.h"

#include "reg_write_interface.h"

using namespace sc_core;

class Dcu_Interface : public RegWrite_Interface, public RegRead_Interface, public Mem_Interface, public sc_module

{

SC_HAS_PROCESS(Dcu_Interface); // 声明模块支持进程

public:

Dcu_Interface(sc_module_name name);

~Dcu_Interface();

std::shared_ptr<Dcu> dcu_;

std::shared_ptr<Array> array_;

std::shared_ptr<Post> post_;

sc_event feature_map_event_;

sc_event dcu_result_event_;

virtual void DcuRegWrite(uint32_t addr, uint8_t* val, uint32_t size, uint8_t end_flag = 0);

virtual void DcuRegRead(uint32_t addr, uint8_t* val, uint32_t size);

virtual bool HbusMemWrite(uint32_t addr, uint8_t* val, uint32_t size, uint8_t end_flag = 0);

virtual bool HbusMemRead(uint32_t addr, uint8_t* val, uint32_t size);

virtual void HbusMemReset();

private:

sc_signal<uint32_t> dcu_reg_addr_;

sc_signal<uint64_t> dcu_reg_data_;

sc_fifo<uint8_t> feature_map_fifo_;

sc_fifo<int16_t> dcu_result_fifo_;

};

dcu_interface.cpp

#include "dcu_interface.h"

#include <chrono>

#include <thread>

Dcu_Interface::Dcu_Interface(sc_module_name name) : feature_map_fifo_(64)

{

dcu_ = std::make_shared<Dcu>("dcu", this);

array_ = std::make_shared<Array>("array", this);

array_->feature_map_out_(feature_map_fifo_);

dcu_->feature_map_in_(feature_map_fifo_);

post_ = std::make_shared<Post>("post", this);

dcu_->dcu_result_out_(dcu_result_fifo_);

post_->dcu_result_in_(dcu_result_fifo_);

dcu_->reg_write_addr_in_(dcu_reg_addr_);

dcu_->reg_write_data_in_(dcu_reg_data_);

}

Dcu_Interface::~Dcu_Interface()

{

}

void Dcu_Interface::DcuRegRead(uint32_t addr, uint8_t* val, uint32_t size)

{

dcu_->Read(addr, val, size);

wait(5, SC_NS);

}

void Dcu_Interface::DcuRegWrite(uint32_t addr, uint8_t* val, uint32_t size, uint8_t end_flag)

{

dcu_->Write(addr, val, size);

wait(5, SC_NS);

if (1 == end_flag)

{

dcu_->reg_write_end_event_.notify();

}

}

bool Dcu_Interface::HbusMemRead(uint32_t addr, uint8_t* val, uint32_t size)

{

if (addr + size <= RAM_SIZE)

{

memcpy(val, this->hbus_ram_ + addr, size);

wait(5, SC_NS);

return true;

}

else

{

cout << "addr + size = " << addr + size << "over range ram size" << endl;

return false;

}

}

bool Dcu_Interface::HbusMemWrite(uint32_t addr, uint8_t* val, uint32_t size, uint8_t end_flag)

{

if (addr + size <= RAM_SIZE)

{

memcpy(this->hbus_ram_ + addr, val, size);

wait(5, SC_NS);

if (1 == end_flag)

{

dcu_->hbus_load_event_.notify();

}

return true;

}

else

{

cout << "addr + size = " << addr + size << "over range ram size" << endl;

return false;

}

}

void Dcu_Interface::HbusMemReset()

{

memset(hbus_ram_, 0, RAM_SIZE);

}

dcu.h

#pragma once

#include "dcu_param.h"

class Dcu_Interface;

class Dcu : public sc_module

{

SC_HAS_PROCESS(Dcu); // 声明模块支持进程

public:

Dcu(sc_module_name name, Dcu_Interface* dcu_interface);

sc_in<uint32_t> reg_write_addr_in_;

sc_in<uint64_t> reg_write_data_in_;

sc_fifo_in<uint8_t> feature_map_in_;

std::vector<uint8_t> feature_map_;

std::vector<int16_t> dcu_result_;

sc_fifo_out<int16_t> dcu_result_out_;

sc_event reg_write_end_event_;

sc_event hbus_load_event_;

sc_event dcu_start_event_;

/* 写dcu寄存器 */

uint32_t Write(uint32_t addr, uint8_t* val, uint32_t size);

/* 读dcu寄存器 */

uint32_t Read(uint32_t addr, uint8_t* val, uint32_t size);

private:

Dcu_Interface* dcu_interface_;

/* dcu table */

DcuTableUnion dcu_table_union_ = {};

/*转化后的feature_map数据*/

std::array<int8_t, DCU_MAX_FEATURE_MAP_NUM * DCU_MAX_PARALLEL_H> feature_map_ex_;

/*feature map 并行度解析后的数据*/

std::array<std::array<int8_t, DCU_MAX_FEATURE_MAP_NUM>, DCU_MAX_PARALLEL_H> feature_map_after_parallel_parse_;

/*处理过程中的数据 */

std::array<int32_t, DCU_OUTPUT_CHANNEL_MAX_NUM> middle_result_;

/*最终结果 */

std::array<int16_t, DCU_OUTPUT_CHANNEL_MAX_NUM> final_result_;

/* weight数据 */

std::array<int8_t, DCU_WEIGHT_RAM_SIZE> dcu_pe_weight_mem_;

/* bias数据 */

std::array<int16_t, DCU_BIAS_RAM_SIZE> dcu_bias_mem_;

/* scale值 */

std::array<uint8_t, DCU_SCALE_RAM_SIZE> dcu_scale_mem_;

uint32_t work_step_;

/* 寄存器地址有效性检查 */

uint32_t AddrValid(uint32_t addr);

/* 接收feature map */

void RecvFeatureMap();

/* 加载hbus ram数据 */

void LoadHbusMem();

/* 启动dcu数据处理 */

void DcuDataProcessStart();

/* 初始化参数 */

uint32_t DcuParamsInit();

/* 打印并检查寄存器参数 */

uint32_t DcuParamsPrint();

/* feature map h/w转化 */

uint32_t DcuFeatureMapEx();

/* feature map数据并行度解析*/

uint32_t ParallelFeatureMapParse();

/* PE阵列计算 */

uint32_t DcuPECalc();

/* 输出结果加bias */

uint32_t DcuPEResultAddBias();

/* 输出结果移位*/

uint32_t DcuAddBiasResultMovebits();

/* dcu结果发送*/

uint32_t SendDcuResult();

/* 一次滑窗数据处理流程结束,重置状态 */

uint32_t WorkDone();

};

dcu.cpp

#include "dcu.h"

#include "dcu_interface.h"

/* 构造函数 */

Dcu::Dcu(sc_module_name name, Dcu_Interface *dcu_interface) : dcu_interface_(dcu_interface)

{

SC_THREAD(LoadHbusMem);

SC_METHOD(RecvFeatureMap);

sensitive << dcu_interface->feature_map_event_;

dont_initialize();

SC_THREAD(DcuDataProcessStart);

}

/**

* @description:寄存器地址有效性检查

* @param {uint64_t&} addr 寄存器地址

* @return {uint64_t} 执行流程是否异常 {RT_ERROR = 0, RT_OK = 1}

* @author: duyu

* @Date: 2023-04-07 17:17:19

*/

uint32_t Dcu::AddrValid(uint32_t addr)

{

uint32_t ret = RT_OK;

if ((addr >= DCU_REG_START) && (addr <= DCU_REG_END))

{

ret = RT_OK;

}

else

{

ret = RT_ERROR;

}

return ret;

}

/**

* @description:写寄存器

* @param {uint64_t} in_addr 寄存器地址

* @param {uint8_t*} in_data 寄存器数据

* @param {uint64_t} in_size 数据大小

* @return 无

* @author: duyu

* @Date: 2024-01-26 17:17:19

*/

uint32_t Dcu::Write(uint32_t addr, uint8_t *val, uint32_t size)

{

uint32_t ret = RT_OK;

uint32_t offset = 0;

if (AddrValid(addr))

{

offset = addr - DCU_REG_START;

for (uint32_t i = 0; i < size; i++)

{

dcu_table_union_.reg_8[offset + i] = val[i];

}

}

else

{

ret = RT_ERROR;

}

cout << "dcu reg write addr = 0x" << hex << addr << " at " << sc_time_stamp() << endl;

return ret;

}

/**

* @description:读寄存器

* @param {uint64_t} in_addr 寄存器地址

* @param {uint8_t*} out_data 寄存器数据

* @param {uint64_t} in_size 数据大小

* @return 无

* @author: duyu

* @Date: 2024-01-26 17:17:19

*/

uint32_t Dcu::Read(uint32_t addr, uint8_t *val, uint32_t size)

{

uint32_t ret = RT_OK;

uint32_t offset = 0;

/*地址有效性检查*/

if (AddrValid(addr))

{

offset = addr - DCU_REG_START;

for (uint32_t i = 0; i < size; i++)

{

val[i] = dcu_table_union_.reg_8[offset + i];

}

}

else

{

ret = RT_ERROR;

}

cout << "dcu reg read addr = 0x" << hex << addr << " at " << sc_time_stamp() << endl;

return ret;

}

void Dcu::LoadHbusMem()

{

while (1)

{

wait(hbus_load_event_ & reg_write_end_event_);

/* weight */

if (dcu_table_union_.dcu_table.weight_buf_load_en)

{

cout << "weight load start at " << sc_time_stamp() << endl;

dcu_interface_->HbusMemRead(dcu_table_union_.dcu_table.weight_addr, (uint8_t *)dcu_pe_weight_mem_.data(),

dcu_table_union_.dcu_table.kernel_size * dcu_table_union_.dcu_table.kernel_num);

cout << "weight load end at " << sc_time_stamp() << endl;

}

/* bias */

if (dcu_table_union_.dcu_table.bias_buf_load_en)

{

cout << "bias load start at " << sc_time_stamp() << endl;

dcu_interface_->HbusMemRead(dcu_table_union_.dcu_table.bias_addr, (uint8_t *)dcu_bias_mem_.data(),

dcu_table_union_.dcu_table.kernel_num * sizeof(int16_t));

cout << "bias load end at " << sc_time_stamp() << endl;

}

/* scale */

if (dcu_table_union_.dcu_table.scale_buf_load_en)

{

cout << "scale load start at " << sc_time_stamp() << endl;

dcu_interface_->HbusMemRead(dcu_table_union_.dcu_table.scale_addr, dcu_scale_mem_.data(), dcu_table_union_.dcu_table.kernel_num);

cout << "scale load end at " << sc_time_stamp() << endl;

}

}

}

void Dcu::RecvFeatureMap()

{

uint8_t feature_map = 0;

static uint32_t recv_feature_map_num = 0;

while (feature_map_in_.num_available() > 0)

{

feature_map = feature_map_in_.read();

// cout << "feature map: " << dec << (int)feature_map << endl;

feature_map_.push_back(feature_map);

}

recv_feature_map_num++;

cout << "num:" << recv_feature_map_num << " recv feature map at " << sc_time_stamp() << endl;

}

void Dcu::DcuDataProcessStart()

{

uint32_t ret = 0;

wait(dcu_start_event_);

DcuParamsInit();

cout << "data process start at " << sc_time_stamp() << endl;

while (1)

{

if (dcu_table_union_.dcu_table.dcu_en)

{

switch (work_step_)

{

case 0: /* 并行阵列解析 */

wait(10, SC_NS);

ret = ParallelFeatureMapParse();

if (RT_OK == ret)

{

work_step_ = 1;

cout << "feature map parse end at " << sc_time_stamp() << endl;

}

break;

case 1: /* PE阵列计算 */

wait(10, SC_NS);

ret = DcuPECalc();

if (RT_OK == ret)

{

work_step_ = 2;

cout << "pe cal end at " << sc_time_stamp() << endl;

}

break;

case 2: /* 加bias */

wait(10, SC_NS);

ret = DcuPEResultAddBias();

if (RT_OK == ret)

{

work_step_ = 3;

cout << "add bias end at " << sc_time_stamp() << endl;

}

break;

case 3: /* 移位 */

wait(10, SC_NS);

ret = DcuAddBiasResultMovebits();

if (RT_OK == ret)

{

work_step_ = 4;

cout << "movebit end at " << sc_time_stamp() << endl;

}

break;

case 4: /* 数据发送 */

wait(10, SC_NS);

ret = SendDcuResult();

if (RT_OK == ret)

{

work_step_ = 5;

cout << "dcu send result at " << sc_time_stamp() << endl;

}

break;

case 5: /* 置位 */

wait(10, SC_NS);

ret = WorkDone();

if (RT_OK == ret)

{

work_step_ = 6;

cout << "data process end at " << sc_time_stamp() << endl;

return;

}

break;

default:

break;

}

}

}

}

/**

* @description:初始化运行参数

* @param {*}

* @return {*}

* @author: duyu

* @Date: 2023-01-26 09:17:16

*/

uint32_t Dcu::DcuParamsInit()

{

uint32_t ret = RT_OK;

work_step_ = 0;

/* mem */

std::fill(feature_map_ex_.begin(), feature_map_ex_.end(), 0);

for (uint32_t i = 0; i < DCU_MAX_PARALLEL_H; ++i)

{

for (uint32_t j = 0; j < DCU_MAX_FEATURE_MAP_NUM; ++j)

{

feature_map_after_parallel_parse_[i][j] = 0;

}

}

std::fill(middle_result_.begin(), middle_result_.end(), 0);

std::fill(final_result_.begin(), final_result_.end(), 0);

/* 寄存器参数校验和打印 */

DcuParamsPrint();

return ret;

}

uint32_t Dcu::DcuParamsPrint()

{

uint32_t ret = RT_OK;

/* print */

// cout << "fin_step_h = 0x" << std::hex << dcu_table_union_.dcu_table.fin_step_h << endl;

// cout << "fin_parallel_h = 0x" << std::hex << dcu_table_union_.dcu_table.fin_parallel_h << endl;

// cout << "fin_h = 0x" << std::hex << dcu_table_union_.dcu_table.fin_h << endl;

// cout << "fin_w = 0x" << std::hex << dcu_table_union_.dcu_table.fin_w << endl;

// cout << "fin_c = 0x" << std::hex << dcu_table_union_.dcu_table.fin_c << endl;

// cout << "kernel_num = 0x" << std::hex << dcu_table_union_.dcu_table.kernel_num << endl;

// cout << "kernel_size = 0x" << std::hex << dcu_table_union_.dcu_table.kernel_size << endl;

// cout << "weight_buf_load_en = 0x" << std::hex << dcu_table_union_.dcu_table.weight_buf_load_en << endl;

// cout << "bias_buf_load_en = 0x" << std::hex << dcu_table_union_.dcu_table.bias_buf_load_en << endl;

// cout << "scale_buf_load_en = 0x" << std::hex << dcu_table_union_.dcu_table.scale_buf_load_en << endl;

// cout << "weight_addr = 0x" << std::hex << dcu_table_union_.dcu_table.weight_addr << endl;

// cout << "bias_addr = 0x" << std::hex << dcu_table_union_.dcu_table.bias_addr << endl;

// cout << "scale_addr = 0x" << std::hex << dcu_table_union_.dcu_table.scale_addr << endl;

// cout << "fin_bit_mode = 0x" << std::hex << dcu_table_union_.dcu_table.fin_bit_mode << endl;

// cout << "fin_sign = 0x" << std::hex << dcu_table_union_.dcu_table.fin_sign << endl;

// cout << "dcu_en = 0x" << std::hex << dcu_table_union_.dcu_table.dcu_en << endl;

return ret;

}

/**

* @description:并行模式下feature map数据解析

* @return 执行流程是否异常 {RT_ERROR = 0, RT_OK = 1}

* @author: duyu

* @Date: 2023-04-07 17:17:19

*/

uint32_t Dcu::ParallelFeatureMapParse()

{

uint32_t ret = RT_OK;

/* hw转化*/

DcuFeatureMapEx();

/* feature map并行度解析*/

if (1 == dcu_table_union_.dcu_table.fin_parallel_h)

{

memcpy(feature_map_after_parallel_parse_[0].data(), feature_map_ex_.data(), dcu_table_union_.dcu_table.kernel_size);

}

else if ((dcu_table_union_.dcu_table.fin_parallel_h > 1) && (dcu_table_union_.dcu_table.fin_parallel_h <= DCU_MAX_PARALLEL_H))

{

for (uint32_t i = 0; i < dcu_table_union_.dcu_table.fin_parallel_h; i++)

{

memcpy(feature_map_after_parallel_parse_[i].data(),

feature_map_ex_.data() +

i * dcu_table_union_.dcu_table.fin_step_h * dcu_table_union_.dcu_table.fin_w * dcu_table_union_.dcu_table.fin_c,

dcu_table_union_.dcu_table.kernel_size);

}

}

return ret;

}

/**

* @description:写feature map数据h到w切换

* @param {uint64_t} feature_map_data_in_64bit dacfifo_sort拼接成的64bit数据

* @return {uint32_t} 执行流程是否异常 {RT_ERROR = 0, RT_OK = 1}

* @author: duyu

* @Date: 2023-04-07 17:17:19

*/

uint32_t Dcu::DcuFeatureMapEx()

{

uint32_t ret = RT_OK;

/* hw方向转化*/

if (!feature_map_.empty())

{

for (uint32_t fin_w = 0; fin_w < dcu_table_union_.dcu_table.fin_w; fin_w++)

{

for (uint32_t fin_h = 0; fin_h < dcu_table_union_.dcu_table.fin_h; fin_h++)

{

memcpy(feature_map_ex_.data() + (fin_h * dcu_table_union_.dcu_table.fin_w + fin_w) * dcu_table_union_.dcu_table.fin_c,

feature_map_.data() + (fin_h + dcu_table_union_.dcu_table.fin_h * fin_w) * dcu_table_union_.dcu_table.fin_c,

dcu_table_union_.dcu_table.fin_c);

}

}

/* 去掉缓存中的feature map*/

feature_map_.erase(feature_map_.begin(), feature_map_.begin() + dcu_table_union_.dcu_table.fin_h * dcu_table_union_.dcu_table.fin_w *

dcu_table_union_.dcu_table.fin_c);

}

else

{

ret = RT_ERROR;

}

return ret;

}

/**

* @description:PE阵列计算

* @param 无

* @return {uint32_t} 执行流程是否异常 {RT_ERROR = 0, RT_OK = 1}

* @author: duyu

* @Date: 2023-04-07 17:17:19

*/

uint32_t Dcu::DcuPECalc()

{

uint32_t ret = RT_OK;

int32_t mul_result = 0;

for (uint32_t parallel_index = 0; parallel_index < dcu_table_union_.dcu_table.fin_parallel_h; parallel_index++)

{

for (uint32_t output_index = 0; output_index < dcu_table_union_.dcu_table.kernel_num; output_index++)

{

for (uint32_t feature_map_index = 0; feature_map_index < dcu_table_union_.dcu_table.kernel_size; feature_map_index++)

{

mul_result = static_cast<int32_t>(feature_map_after_parallel_parse_[parallel_index][feature_map_index] *

dcu_pe_weight_mem_[output_index * dcu_table_union_.dcu_table.kernel_size + feature_map_index]);

middle_result_[parallel_index * dcu_table_union_.dcu_table.kernel_num + output_index] += mul_result;

}

// cout << "pe cal result: " << dec << middle_result_[parallel_index * dcu_table_union_.dcu_table.kernel_num + output_index] << endl;

}

}

return ret;

}

/**

* @description:输出结果加bias

* @param 无

* @return {uint32_t} 执行流程是否异常 {RT_ERROR = 0, RT_OK = 1}

* @author: duyu

* @Date: 2023-04-07 17:17:19

*/

uint32_t Dcu::DcuPEResultAddBias()

{

uint32_t ret = RT_OK;

/* 加bias*/

for (uint32_t parallel_index = 0; parallel_index < dcu_table_union_.dcu_table.fin_parallel_h; parallel_index++)

{

for (uint32_t middle_result_index = 0; middle_result_index < dcu_table_union_.dcu_table.kernel_num; middle_result_index++)

{

middle_result_[parallel_index * dcu_table_union_.dcu_table.kernel_num + middle_result_index] =

middle_result_[parallel_index * dcu_table_union_.dcu_table.kernel_num + middle_result_index] + dcu_bias_mem_[middle_result_index];

// cout << "add bias result: " << dec << middle_result_[parallel_index * dcu_table_union_.dcu_table.kernel_num + middle_result_index]

// << endl;

}

}

return ret;

}

/**

* @description:输出结果移位

* @param 无

* @return {uint32_t} 执行流程是否异常 {RT_ERROR = 0, RT_OK = 1}

* @author: duyu

* @Date: 2023-04-07 17:17:19

*/

uint32_t Dcu::DcuAddBiasResultMovebits()

{

uint32_t ret = RT_OK;

/* 移位*/

for (uint32_t parallel_index = 0; parallel_index < dcu_table_union_.dcu_table.fin_parallel_h; parallel_index++)

{

for (uint32_t middle_result_index = 0; middle_result_index < dcu_table_union_.dcu_table.kernel_num; middle_result_index++)

{

middle_result_[parallel_index * dcu_table_union_.dcu_table.kernel_num + middle_result_index] =

middle_result_[parallel_index * dcu_table_union_.dcu_table.kernel_num + middle_result_index] >>

dcu_scale_mem_[middle_result_index];

/* 饱和截断*/

if (middle_result_[parallel_index * dcu_table_union_.dcu_table.kernel_num + middle_result_index] >= 0)

{

if (middle_result_[parallel_index * dcu_table_union_.dcu_table.kernel_num + middle_result_index] > 0x7fff)

{

final_result_[parallel_index * dcu_table_union_.dcu_table.kernel_num + middle_result_index] = 0x7fff;

}

else

{

final_result_[parallel_index * dcu_table_union_.dcu_table.kernel_num + middle_result_index] =

middle_result_[parallel_index * dcu_table_union_.dcu_table.kernel_num + middle_result_index];

}

}

else

{

if (middle_result_[parallel_index * dcu_table_union_.dcu_table.kernel_num + middle_result_index] < 0xffff8000)

{

final_result_[parallel_index * dcu_table_union_.dcu_table.kernel_num + middle_result_index] = 0x8000;

}

else

{

final_result_[parallel_index * dcu_table_union_.dcu_table.kernel_num + middle_result_index] =

middle_result_[parallel_index * dcu_table_union_.dcu_table.kernel_num + middle_result_index];

}

}

dcu_result_.push_back(final_result_[parallel_index * dcu_table_union_.dcu_table.kernel_num + middle_result_index]);

}

}

return ret;

}

/**

* @description:工作流程结束

* @param {*}

* @return {*}

* @author: duyu

* @Date: 2023-04-07 17:17:19

*/

uint32_t Dcu::WorkDone()

{

uint32_t ret = RT_OK;

/* mem */

std::fill(feature_map_ex_.begin(), feature_map_ex_.end(), 0);

for (uint32_t i = 0; i < DCU_MAX_PARALLEL_H; ++i)

{

for (uint32_t j = 0; j < DCU_MAX_FEATURE_MAP_NUM; ++j)

{

feature_map_after_parallel_parse_[i][j] = 0;

}

}

std::fill(middle_result_.begin(), middle_result_.end(), 0);

std::fill(final_result_.begin(), final_result_.end(), 0);

return ret;

}

uint32_t Dcu::SendDcuResult()

{

uint32_t ret = RT_OK;

sc_time time(10, SC_NS);

while (!dcu_result_.empty())

{

dcu_result_out_.write(dcu_result_.front());

dcu_result_.erase(dcu_result_.begin());

}

dcu_interface_->dcu_result_event_.notify(time);

return ret;

}

array.h

#pragma once

#include <systemc.h>

class Dcu_Interface;

class Array : public sc_module

{

public:

SC_HAS_PROCESS(Array); // 声明模块支持进程和线程

sc_fifo_out<uint8_t> feature_map_out_;

sc_signal<bool> feature_map_write_flag_;

void SendFeatureMap();

Array(sc_module_name name, Dcu_Interface* dcu_interface) : dcu_interface_(dcu_interface)

{

SC_METHOD(SendFeatureMap);

sensitive_pos << feature_map_write_flag_;

dont_initialize();

}

private:

Dcu_Interface* dcu_interface_;

};

array.cpp

#include "array.h"

#include "dcu_interface.h"

void Array::SendFeatureMap()

{

static uint32_t send_feature_map_num = 0;

sc_time time(10, SC_NS);

for (uint i = 0; i < 64; i++)

{

feature_map_out_.write(i);

}

dcu_interface_->feature_map_event_.notify(time);

send_feature_map_num++;

cout << "num:" << send_feature_map_num << " send feature map at " << sc_time_stamp() << endl;

}

post.h

#pragma once

#include <systemc.h>

class Dcu_Interface;

class Post : public sc_module

{

public:

SC_HAS_PROCESS(Post); // 声明模块支持进程和线程

Post(sc_module_name name, Dcu_Interface* dcu_interface);

sc_fifo_in<int16_t> dcu_result_in_;

std::vector<int16_t> dcu_result_;

/* 接收dcu 计算结果 */

void RecvDcuResult();

private:

Dcu_Interface* dcu_interface_;

};

post.cpp

#include "post.h"

#include "dcu_interface.h"

Post::Post(sc_module_name name, Dcu_Interface* dcu_interface) : dcu_interface_(dcu_interface)

{

SC_METHOD(RecvDcuResult);

sensitive << dcu_interface_->dcu_result_event_;

dont_initialize();

}

void Post::RecvDcuResult()

{

int16_t dcu_data = 0;

while (dcu_result_in_.num_available() > 0)

{

dcu_data = dcu_result_in_.read();

cout << "recv dcu data: " << dec << (int)dcu_data << endl;

dcu_result_.push_back(dcu_data);

}

cout << "post recv dcu data at " << sc_time_stamp() << endl;

}

feature_map_interface.h

#include <systemc.h>

#include <vector>

using namespace std;

class Feature_Map_Interface : virtual public sc_interface

{

/*走非指令通路 写buffer接口 tia_scale/comp数据 */

virtual void FeatureMapWrite(uint8_t* data, uint32_t size) = 0;

};

mem_interface.h

#include <systemc.h>

#include <vector>

using namespace std;

constexpr uint32_t RAM_SIZE = 256 * 1024;

class Mem_Interface : virtual public sc_interface

{

public:

/*走非指令通路 写buffer接口 tia_scale/comp数据 */

virtual bool HbusMemWrite(uint32_t addr, uint8_t* val, uint32_t size, uint8_t end_flag = 0) = 0;

virtual bool HbusMemRead(uint32_t addr, uint8_t* val, uint32_t size) = 0;

virtual void HbusMemReset() = 0;

uint8_t hbus_ram_[RAM_SIZE];

};

ahb_port_slave.h

#pragma once

#include <systemc.h>

#include "string.h"

#include "tlm_utils/simple_initiator_socket.h"

#include "tlm_utils/simple_target_socket.h"

using namespace std;

class Ahb_Port_Master;

class Ahb_Port_Slave {

public:

tlm_utils::simple_target_socket<Ahb_Port_Slave> t_ahb_socket_;

Ahb_Port_Slave(std::string name, sc_time latency = sc_core::SC_ZERO_TIME) {

t_ahb_socket_.register_b_transport(this, &Ahb_Port_Slave::Ahb_b_transport);

}

~Ahb_Port_Slave() {

}

/* ahb slave */

virtual void Ahb_b_transport(tlm::tlm_generic_payload& trans, sc_time& delay){};

int32_t Ahb_target_bind(tlm_utils::simple_initiator_socket<Ahb_Port_Master>& socket);

private:

const sc_time LATENCY;

};

ahb_port_master.h

#pragma once

#include <systemc.h>

#include "ahb_port_slave.h"

#include "string.h"

#include "tlm_utils/simple_initiator_socket.h"

#include "tlm_utils/simple_target_socket.h"

using namespace std;

class Ahb_Port_Master {

public:

tlm_utils::simple_initiator_socket<Ahb_Port_Master> i_ahb_socket;

Ahb_Port_Master(std::string name, sc_time latency = sc_core::SC_ZERO_TIME) : i_ahb_socket("i_ahb_socket"), LATENCY(latency) {

}

~Ahb_Port_Master() {

}

int32_t Ahb_Master_Send(tlm::tlm_command cmd, unsigned char* buf, sc_dt::uint64 addr, sc_dt::uint64 len);

int32_t Ahb_master_bind(tlm_utils::simple_target_socket<Ahb_Port_Slave>& socket);

private:

const sc_time LATENCY;

};

element.cpp

#include <ittypes.h>

#include <systemc>

#include "tlm_utils/simple_initiator_socket.h"

#include "tlm_utils/simple_target_socket.h"

#include "tlm_utils/tlm_quantumkeeper.h"

using namespace sc_core;

using namespace std;

#define MIN(a, b) ((a) > (b) ? (b) : (a))

#include "element.h"

element::element(sc_core::sc_module_name name, sc_clock& clock)

: sc_module(name)

, Ahb_Port_Slave("apb")

, Bus_Port_Master("bus")

, Instr_Port_Slave("instr")

, clock_(clock)

, fp16_ops_("fp16_ops")

, int_ops_("int_ops")

, fifo_left(8)

, fifo_right(8) {

SC_THREAD(ele_test_task);

sensitive << clock_.pos();

dont_initialize();

SC_THREAD(ele_left_read_task);

sensitive << clock_.pos();

dont_initialize();

SC_THREAD(ele_right_read_task);

sensitive << clock_.pos();

dont_initialize();

SC_THREAD(element_task);

sensitive << clock_.pos();

dont_initialize();

}

void element::ele_test_task() {

while (true) {

Bus_Master_Trans(tlm::tlm_command::TLM_READ_COMMAND, read, 0, 512);

wait(100, SC_NS);

}

void element::Ahb_b_transport(tlm::tlm_generic_payload& trans, sc_time& delay) {

// tlm::tlm_command cmd = trans.get_command();

// sc_dt::uint64 addr = trans.get_address();

// unsigned char* ptr = trans.get_data_ptr();

// unsigned int len = trans.get_data_length();

// unsigned int streaming_width = trans.get_streaming_width();

// unsigned char* be = trans.get_byte_enable_ptr();

// unsigned int be_len = trans.get_byte_enable_length();

// /* 添加ahb的寄存器的读写 */

// delay += LATENCY;

// trans.set_dmi_allowed(true);

// trans.set_response_status(tlm::TLM_OK_RESPONSE);

}

相关推荐
<但凡.8 分钟前
题海拾贝:力扣 138.随机链表的复制
数据结构·算法·leetcode
장숙혜8 分钟前
JavaScript正则表达式解析:模式、方法与实战案例
开发语言·javascript·正则表达式
安大小万25 分钟前
C++ 学习:深入理解 Linux 系统中的冯诺依曼架构
linux·开发语言·c++
随心Coding29 分钟前
【零基础入门Go语言】错误处理:如何更优雅地处理程序异常和错误
开发语言·后端·golang
T.Ree.33 分钟前
C语言_自定义类型(结构体,枚举,联合)
c语言·开发语言
Channing Lewis35 分钟前
python生成随机字符串
服务器·开发语言·python
田梓燊39 分钟前
图论 八字码
c++·算法·图论
小熊科研路(同名GZH)1 小时前
【Matlab高端绘图SCI绘图模板】第002期 绘制面积图
开发语言·matlab
鱼是一只鱼啊1 小时前
.netframeworke4.6.2升级.net8问题处理
开发语言·.net·.net8