提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档
前言
因为课题需要,调研了几个快速排序方法,并手写或者改进了若干待测试对象,包括记分板型冒泡排序(这个是别人的)、插入排序(这个是我写的)、双调排序(这个我改了又改,可能还会接着改进)、堆排序(这个是别人的)。以上都在7035开发板上测试了资源。除了堆排序截了时序图之外,其他几个我就直接给代码,外加资源占用情况和延迟(这个延迟是给定例子的延迟)。不介绍原理了,默认都懂!
这个领域比较小众,能查到的文献相比时髦的深度学习硬件加速器要少得多得多,但还是有存在价值。外加上开源的排序器硬件加速方案,按照作者的说法要么就是纯粹从原理介绍到电路设计,要么就额外添加几句自己的设计很牛,总之缺乏统一的对比,说服力不够。
我在这篇博客做的工作就是:
1、改进双调排序;
2、纯手撸一个4状态的插入排序;
3、合理比较。
书写、修改、调试不易,请大家多多惠存~
一、记分板型冒泡排序
以下是**.v**
// reference: https://mp.weixin.qq.com/s/BesXJzfle_ZvW__C6DBNrA
module sort_bubble #(parameter BITWIDTH = 8, parameter ELEMENTS = 32)(
input clk ,
input rst ,
input [BITWIDTH*ELEMENTS-1:0] data_in ,
input data_in_valid ,
output [BITWIDTH*ELEMENTS-1:0] data_out ,
output data_out_valid
);
reg [ 5:0] data_in_valid_ff;
reg [BITWIDTH*ELEMENTS-1:0] data_in_ff[5:0] ;
reg v[ELEMENTS-1:0][ELEMENTS-1:0] ;
reg [ 1:0] sum_1[31:0][15:0] ;
reg [ 2:0] sum_2[31:0][7:0] ;
reg [ 3:0] sum_3[31:0][3:0] ;
reg [ 4:0] sum_4[31:0][1:0] ;
reg [ 5:0] sum_5[31:0] ;
reg [BITWIDTH-1:0] data_out_temp[ELEMENTS-1:0] ;
reg data_out_valid_temp;
genvar i;
genvar j;
always @(posedge clk ) begin
if(rst == 1'b1)begin
data_in_valid_ff <= 6'b0;
end
else begin
data_in_valid_ff <= {data_in_valid_ff[4:0], data_in_valid};
end
end
always @(posedge clk ) begin
data_in_ff[0] <= data_in;
end
generate
for ( i = 0; i < 5 ; i = i + 1 ) begin : LOOP_DATA_IN
always @(posedge clk ) begin
data_in_ff[i+1] <= data_in_ff[i];
end
end
endgenerate
generate
for ( i = 0 ; i < 32 ; i = i + 1 ) begin : LOOP_V_I
for ( j = i ; j < 32 ; j = j + 1) begin : LOOP_V_J
always @(posedge clk ) begin
if(data_in_valid == 1'b1)begin
v[i][j] <= data_in[i*8 +: 8] >= data_in[j*8 +: 8]; // 2D Parallel
v[j][i] <= data_in[i*8 +: 8] < data_in[j*8 +: 8]; // 2D Parallel
end
end
end
end
endgenerate
generate
for ( i = 0 ; i < 32 ; i = i + 1 ) begin : LOOP_SUM_1_I
for ( j = 0 ; j < 16 ; j = j + 1) begin : LOOP_SUM_1_J
always @(posedge clk ) begin
if(data_in_valid_ff[0] == 1'b1)begin
sum_1[i][j] <= v[i][j*2] + v[i][j*2 + 1];
end
end
end
end
endgenerate
generate
for ( i = 0 ; i < 32 ; i = i + 1 ) begin : LOOP_SUM_2_I
for ( j = 0 ; j < 8 ; j = j + 1) begin : LOOP_SUM_2_J
always @(posedge clk ) begin
if(data_in_valid_ff[1] == 1'b1)begin
sum_2[i][j] <= sum_1[i][j*2] + sum_1[i][j*2 + 1];
end
end
end
end
endgenerate
generate
for ( i = 0 ; i < 32 ; i = i + 1 ) begin : LOOP_SUM_3_I
for ( j = 0 ; j < 4 ; j = j + 1) begin : LOOP_SUM_3_J
always @(posedge clk ) begin
if(data_in_valid_ff[2] == 1'b1)begin
sum_3[i][j] <= sum_2[i][j*2] + sum_2[i][j*2 + 1];
end
end
end
end
endgenerate
generate
for ( i = 0 ; i < 32 ; i = i + 1 ) begin : LOOP_SUM_4_I
for ( j = 0 ; j < 2 ; j = j + 1) begin : LOOP_SUM_4_J
always @(posedge clk ) begin
if(data_in_valid_ff[3] == 1'b1)begin
sum_4[i][j] <= sum_3[i][j*2] + sum_3[i][j*2 + 1];
end
end
end
end
endgenerate
generate
for ( i = 0 ; i < 32 ; i = i + 1 ) begin : LOOP_SUM_5_I
always @(posedge clk ) begin
if(data_in_valid_ff[4] == 1'b1)begin
sum_5[i] <= sum_4[i][0] + sum_4[i][1];
end
end
end
endgenerate
always @(posedge clk ) begin : LOOP_DATA_OUT_TEMP_CLK
integer k;
for ( k = 0; k < 32; k = k + 1) begin : LOOP_DATA_OUT_TEMP
if(data_in_valid_ff[5] == 1'b1)begin
data_out_temp[sum_5[k]] <= data_in_ff[5][k*8 +: 8];
data_out_valid_temp <= 1'b1;
end
else begin
data_out_temp[sum_5[k]] <= 8'd0;
data_out_valid_temp <= 1'b0;
end
end
end
generate
for ( i = 0 ; i < 32 ; i = i + 1) begin : LOOP_DATA_OUT
assign data_out[i*8 +: 8] = data_out_temp[i] ;
assign data_out_valid = data_out_valid_temp;
end
endgenerate
endmodule
以下是testbench
`timescale 1ns / 1ps
module tb_bubble(
);
reg clk;
reg rst;
reg [7:0] data_in_0;
reg [7:0] data_in_1;
reg [7:0] data_in_2;
reg [7:0] data_in_3;
reg [7:0] data_in_4;
reg [7:0] data_in_5;
reg [7:0] data_in_6;
reg [7:0] data_in_7;
reg [7:0] data_in_8;
reg [7:0] data_in_9;
reg [7:0] data_in_10;
reg [7:0] data_in_11;
reg [7:0] data_in_12;
reg [7:0] data_in_13;
reg [7:0] data_in_14;
reg [7:0] data_in_15;
reg [7:0] data_in_16;
reg [7:0] data_in_17;
reg [7:0] data_in_18;
reg [7:0] data_in_19;
reg [7:0] data_in_20;
reg [7:0] data_in_21;
reg [7:0] data_in_22;
reg [7:0] data_in_23;
reg [7:0] data_in_24;
reg [7:0] data_in_25;
reg [7:0] data_in_26;
reg [7:0] data_in_27;
reg [7:0] data_in_28;
reg [7:0] data_in_29;
reg [7:0] data_in_30;
reg [7:0] data_in_31;
wire [255:0] data_in;
reg data_in_valid;
wire [255:0] data_out;
wire data_out_valid;
initial begin
clk = 1'b0;
rst = 1'b1;
#50
rst = 1'b0;
end
always #5 clk = !clk;
always @(posedge clk ) begin
if(rst == 1'b1)begin
data_in_0 <= 8'd0;
data_in_1 <= 8'd0;
data_in_2 <= 8'd0;
data_in_3 <= 8'd0;
data_in_4 <= 8'd0;
data_in_5 <= 8'd0;
data_in_6 <= 8'd0;
data_in_7 <= 8'd0;
data_in_8 <= 8'd0;
data_in_9 <= 8'd0;
data_in_10 <= 8'd0;
data_in_11 <= 8'd0;
data_in_12 <= 8'd0;
data_in_13 <= 8'd0;
data_in_14 <= 8'd0;
data_in_15 <= 8'd0;
data_in_16 <= 8'd0;
data_in_17 <= 8'd0;
data_in_18 <= 8'd0;
data_in_19 <= 8'd0;
data_in_20 <= 8'd0;
data_in_21 <= 8'd0;
data_in_22 <= 8'd0;
data_in_23 <= 8'd0;
data_in_24 <= 8'd0;
data_in_25 <= 8'd0;
data_in_26 <= 8'd0;
data_in_27 <= 8'd0;
data_in_28 <= 8'd0;
data_in_29 <= 8'd0;
data_in_30 <= 8'd0;
data_in_31 <= 8'd0;
data_in_valid <= 1'b0;
end
else begin
data_in_0 <= {$random} % 255;
data_in_1 <= {$random} % 255;
data_in_2 <= {$random} % 255;
data_in_3 <= {$random} % 255;
data_in_4 <= {$random} % 255;
data_in_5 <= {$random} % 255;
data_in_6 <= {$random} % 255;
data_in_7 <= {$random} % 255;
data_in_8 <= {$random} % 255;
data_in_9 <= {$random} % 255;
data_in_10 <= {$random} % 255;
data_in_11 <= {$random} % 255;
data_in_12 <= {$random} % 255;
data_in_13 <= {$random} % 255;
data_in_14 <= {$random} % 255;
data_in_15 <= {$random} % 255;
data_in_16 <= {$random} % 255;
data_in_17 <= {$random} % 255;
data_in_18 <= {$random} % 255;
data_in_19 <= {$random} % 255;
data_in_20 <= {$random} % 255;
data_in_21 <= {$random} % 255;
data_in_22 <= {$random} % 255;
data_in_23 <= {$random} % 255;
data_in_24 <= {$random} % 255;
data_in_25 <= {$random} % 255;
data_in_26 <= {$random} % 255;
data_in_27 <= {$random} % 255;
data_in_28 <= {$random} % 255;
data_in_29 <= {$random} % 255;
data_in_30 <= {$random} % 255;
data_in_31 <= {$random} % 255;
data_in_valid <= 1'b1;
end
end
assign data_in = {data_in_0, data_in_1, data_in_2, data_in_3, data_in_4, data_in_5, data_in_6, data_in_7, data_in_8, data_in_9, data_in_10, data_in_11, data_in_12, data_in_13, data_in_14, data_in_15,data_in_16, data_in_17, data_in_18, data_in_19, data_in_20, data_in_21, data_in_22, data_in_23,data_in_24, data_in_25, data_in_26, data_in_27, data_in_28, data_in_29, data_in_30, data_in_31};
sort_bubble sort_bubble_u(
.clk (clk ),
.rst (rst ),
.data_in (data_in ),
.data_in_valid (data_in_valid ),
.data_out (data_out ),
.data_out_valid (data_out_valid)
);
endmodule
二、插入排序
以下是**.v** :
module sort_insertion #(parameter BITWIDTH = 8, parameter ELEMENTS = 32)(
input clk,
input rst_n,
input [BITWIDTH*ELEMENTS-1:0] data_in,
output reg [BITWIDTH*ELEMENTS-1:0] data_out
);
reg [7:0] array [0:31];
reg [5:0] i, j;
reg [7:0] key;
// State machine states
reg [2:0] state_c;
reg [2:0] state_n;
parameter IDLE = 3'b000,
SORT = 3'b001,
BREAK= 3'b010,
DONE = 3'b011;
genvar p;
generate
for (p=0; p<32; p=p+1) begin: ASSIGN_FOR_DATA_OUT
always @(posedge clk or negedge rst_n) begin
data_out[p*8 +: 8] <= array[p];
end
end
endgenerate
// State Transfer - First
always @(posedge clk or negedge rst_n) begin
if (!rst_n) begin
state_c <= IDLE;
end
else begin
state_c <= state_n;
end
end
// State Transfer - Second
always @(*) begin
case (state_c)
IDLE: begin
state_n = BREAK;
end
SORT: begin
if (j == 0 || array[j] <= key) state_n = BREAK;
else state_n = state_c;
end
BREAK: begin
if (i > 0 && i < 31) state_n = SORT;
else if (i == 31) state_n = DONE;
else state_n = state_c;
end
default: state_n = DONE;
endcase
end
// State Transfer - Third
always @(posedge clk or negedge rst_n) begin
if (!rst_n) begin i <= 0; j <= 0; key <= 8'd0; end
else if (state_c == SORT) begin
if (j == 0 || array[j] <= key) begin
i <= i; j <= j - 1; key <= array[i]; // 同时跳转到break
end
else begin
i <= i; j <= j - 1; key <= key; // 接着比较
end
end
else if (state_c == BREAK) begin
if ( i <= 30) begin
i <= i + 1; j <= i; key <= array[i+1];
end
else begin
i <= i + 1; j <= 0; key <= 8'd0;
end
end
else begin i <= 0; j <= 0; key <= 8'd0; end
end
always @(posedge clk or negedge rst_n) begin
if (!rst_n) begin
array[0] <= 0;
array[1] <= 0;
array[2] <= 0;
array[3] <= 0;
array[4] <= 0;
array[5] <= 0;
array[6] <= 0;
array[7] <= 0;
array[8] <= 0;
array[9] <= 0;
array[10] <= 0;
array[11] <= 0;
array[12] <= 0;
array[13] <= 0;
array[14] <= 0;
array[15] <= 0;
array[16] <= 0;
array[17] <= 0;
array[18] <= 0;
array[19] <= 0;
array[20] <= 0;
array[21] <= 0;
array[22] <= 0;
array[23] <= 0;
array[24] <= 0;
array[25] <= 0;
array[26] <= 0;
array[27] <= 0;
array[28] <= 0;
array[29] <= 0;
array[30] <= 0;
array[31] <= 0;
end
else begin
if (state_c == IDLE) begin
array[0] <= data_in[0*8 +: 8];
array[1] <= data_in[1*8 +: 8];
array[2] <= data_in[2*8 +: 8];
array[3] <= data_in[3*8 +: 8];
array[4] <= data_in[4*8 +: 8];
array[5] <= data_in[5*8 +: 8];
array[6] <= data_in[6*8 +: 8];
array[7] <= data_in[7*8 +: 8];
array[8] <= data_in[8*8 +: 8];
array[9] <= data_in[9*8 +: 8];
array[10] <= data_in[10*8 +: 8];
array[11] <= data_in[11*8 +: 8];
array[12] <= data_in[12*8 +: 8];
array[13] <= data_in[13*8 +: 8];
array[14] <= data_in[14*8 +: 8];
array[15] <= data_in[15*8 +: 8];
array[16] <= data_in[16*8 +: 8];
array[17] <= data_in[17*8 +: 8];
array[18] <= data_in[18*8 +: 8];
array[19] <= data_in[19*8 +: 8];
array[20] <= data_in[20*8 +: 8];
array[21] <= data_in[21*8 +: 8];
array[22] <= data_in[22*8 +: 8];
array[23] <= data_in[23*8 +: 8];
array[24] <= data_in[24*8 +: 8];
array[25] <= data_in[25*8 +: 8];
array[26] <= data_in[26*8 +: 8];
array[27] <= data_in[27*8 +: 8];
array[28] <= data_in[28*8 +: 8];
array[29] <= data_in[29*8 +: 8];
array[30] <= data_in[30*8 +: 8];
array[31] <= data_in[31*8 +: 8];
end
else if (state_c == BREAK && i == 1 && array[j] > key) begin
array[j + 1] <= array[j];
array[j] <= key;
end
else if (state_c == SORT && array[j] > key) begin
array[j + 1] <= array[j];
array[j] <= key;
end
end
end
endmodule
以下是testbench:
module sort_insertion_tb #(parameter BITWIDTH = 8, parameter ELEMENTS = 32)();
reg clk;
reg rst_n;
reg [BITWIDTH*ELEMENTS-1:0] data_in;
wire [BITWIDTH*ELEMENTS-1:0] data_out;
sort_insertion #(BITWIDTH, ELEMENTS) uut (
.clk(clk),
.rst_n(rst_n),
.data_in(data_in),
.data_out(data_out)
);
// Clock generation
always #5 clk = ~clk;
initial begin
// Initialize inputs
clk = 0;
rst_n = 0;
// Apply reset
#10;
rst_n = 1;
// Load test data
data_in[BITWIDTH*0 +: BITWIDTH] = 8'd12;
data_in[BITWIDTH*1 +: BITWIDTH] = 8'd3;
data_in[BITWIDTH*2 +: BITWIDTH] = 8'd25;
data_in[BITWIDTH*3 +: BITWIDTH] = 8'd8;
data_in[BITWIDTH*4 +: BITWIDTH] = 8'd15;
data_in[BITWIDTH*5 +: BITWIDTH] = 8'd18;
data_in[BITWIDTH*6 +: BITWIDTH] = 8'd7;
data_in[BITWIDTH*7 +: BITWIDTH] = 8'd1;
data_in[BITWIDTH*8 +: BITWIDTH] = 8'd31;
data_in[BITWIDTH*9 +: BITWIDTH] = 8'd14;
data_in[BITWIDTH*10 +: BITWIDTH] = 8'd6;
data_in[BITWIDTH*11 +: BITWIDTH] = 8'd22;
data_in[BITWIDTH*12 +: BITWIDTH] = 8'd27;
data_in[BITWIDTH*13 +: BITWIDTH] = 8'd20;
data_in[BITWIDTH*14 +: BITWIDTH] = 8'd5;
data_in[BITWIDTH*15 +: BITWIDTH] = 8'd9;
data_in[BITWIDTH*16 +: BITWIDTH] = 8'd4;
data_in[BITWIDTH*17 +: BITWIDTH] = 8'd17;
data_in[BITWIDTH*18 +: BITWIDTH] = 8'd2;
data_in[BITWIDTH*19 +: BITWIDTH] = 8'd10;
data_in[BITWIDTH*20 +: BITWIDTH] = 8'd11;
data_in[BITWIDTH*21 +: BITWIDTH] = 8'd13;
data_in[BITWIDTH*22 +: BITWIDTH] = 8'd24;
data_in[BITWIDTH*23 +: BITWIDTH] = 8'd28;
data_in[BITWIDTH*24 +: BITWIDTH] = 8'd19;
data_in[BITWIDTH*25 +: BITWIDTH] = 8'd26;
data_in[BITWIDTH*26 +: BITWIDTH] = 8'd23;
data_in[BITWIDTH*27 +: BITWIDTH] = 8'd30;
data_in[BITWIDTH*28 +: BITWIDTH] = 8'd29;
data_in[BITWIDTH*29 +: BITWIDTH] = 8'd21;
data_in[BITWIDTH*30 +: BITWIDTH] = 8'd16;
data_in[BITWIDTH*31 +: BITWIDTH] = 8'd0;
// Wait for the sorting to complete
#3000;
// Display sorted data
$display("Sorted data:");
$display("data_out[0] = %d", data_out[BITWIDTH*0 +: BITWIDTH]);
$display("data_out[1] = %d", data_out[BITWIDTH*1 +: BITWIDTH]);
$display("data_out[2] = %d", data_out[BITWIDTH*2 +: BITWIDTH]);
$display("data_out[3] = %d", data_out[BITWIDTH*3 +: BITWIDTH]);
$display("data_out[4] = %d", data_out[BITWIDTH*4 +: BITWIDTH]);
$display("data_out[5] = %d", data_out[BITWIDTH*5 +: BITWIDTH]);
$display("data_out[6] = %d", data_out[BITWIDTH*6 +: BITWIDTH]);
$display("data_out[7] = %d", data_out[BITWIDTH*7 +: BITWIDTH]);
$display("data_out[8] = %d", data_out[BITWIDTH*8 +: BITWIDTH]);
$display("data_out[9] = %d", data_out[BITWIDTH*9 +: BITWIDTH]);
$display("data_out[10] = %d", data_out[BITWIDTH*10 +: BITWIDTH]);
$display("data_out[11] = %d", data_out[BITWIDTH*11 +: BITWIDTH]);
$display("data_out[12] = %d", data_out[BITWIDTH*12 +: BITWIDTH]);
$display("data_out[13] = %d", data_out[BITWIDTH*13 +: BITWIDTH]);
$display("data_out[14] = %d", data_out[BITWIDTH*14 +: BITWIDTH]);
$display("data_out[15] = %d", data_out[BITWIDTH*15 +: BITWIDTH]);
$display("data_out[16] = %d", data_out[BITWIDTH*16 +: BITWIDTH]);
$display("data_out[17] = %d", data_out[BITWIDTH*17 +: BITWIDTH]);
$display("data_out[18] = %d", data_out[BITWIDTH*18 +: BITWIDTH]);
$display("data_out[19] = %d", data_out[BITWIDTH*19 +: BITWIDTH]);
$display("data_out[20] = %d", data_out[BITWIDTH*20 +: BITWIDTH]);
$display("data_out[21] = %d", data_out[BITWIDTH*21 +: BITWIDTH]);
$display("data_out[22] = %d", data_out[BITWIDTH*22 +: BITWIDTH]);
$display("data_out[23] = %d", data_out[BITWIDTH*23 +: BITWIDTH]);
$display("data_out[24] = %d", data_out[BITWIDTH*24 +: BITWIDTH]);
$display("data_out[25] = %d", data_out[BITWIDTH*25 +: BITWIDTH]);
$display("data_out[26] = %d", data_out[BITWIDTH*26 +: BITWIDTH]);
$display("data_out[27] = %d", data_out[BITWIDTH*27 +: BITWIDTH]);
$display("data_out[28] = %d", data_out[BITWIDTH*28 +: BITWIDTH]);
$display("data_out[29] = %d", data_out[BITWIDTH*29 +: BITWIDTH]);
$display("data_out[30] = %d", data_out[BITWIDTH*30 +: BITWIDTH]);
$display("data_out[31] = %d", data_out[BITWIDTH*31 +: BITWIDTH]);
$finish;
end
endmodule
看一张调了很久的波形:
三、双调排序
以下是双调排序.v(目前来看,这段代码还有很大改进空间):
//
//
// Create Date: 22/03/2022
// Author: Bala Dhinesh
// Module Name: BitonicSortScalable
// Project Name: Bitonic Sorting in Verilog
//
//
// This code can work with any number of elements in the powers of 2. There are three primary states in the code, namely SORT, MERGE_SETUP, MERGE.
// **SORT**: Sort the array for every eight elements.
// **MERGE_SETUP**: This will make a bitonic sequence for the entire array.
// **MERGE**: Do the bitonic sort from the bitonic sequence array obtained from MERGE_SETUP.
// This code uses eight elements as a base for hardware and scaled from it.
// reference: https://github.com/BalaDhinesh/Bitonic-Sorting-In-Verilog
// another reference: https://github.com/john9636/SortingNetwork
module sort_bitonic #(parameter BITWIDTH = 8, parameter ELEMENTS = 32)(
input clk,
input rst_n,
input en_i,
input [ELEMENTS*BITWIDTH-1:0] in,
// add signal for max
// 32-max1
input enable_SORT32_max1,
// 16-max1
input enable_SORT16_max1,
// The reason of multipling 8 is 8 group of 4-max1
// 4 group of 8-max1, 2 group of 16-max1, 1 group of 32-max1
output reg [BITWIDTH*8-1:0] sort_max,
output reg done_o,
output reg [ELEMENTS*BITWIDTH-1:0] out
);
// FSM states
localparam START = 3'b000, // 0
SETUP = 3'b001, // 1
SORT = 3'b010, // 2
DONE = 3'b011, // 3
MERGE_SETUP = 3'b100, // 4
MERGE = 3'b101, // 5
IDLE = 3'b111; // 7
reg positive; // sort ascending or descending for intermediate sub-arrays
reg [2:0] state; // state of FSM
reg [$clog2(ELEMENTS)-1:0] stage;
reg [7:0] d[0:7]; // temporary register array
reg [2:0] step;
// Register variables for Bitonic merge
reg [$clog2(ELEMENTS):0] compare;
reg [$clog2(ELEMENTS)-1:0] i_MERGE;
reg [$clog2(ELEMENTS)-1:0] sum;
reg [$clog2(ELEMENTS)-1:0] sum_max;
reg [$clog2(ELEMENTS):0] STAGES = ELEMENTS/16;
reg [$clog2(ELEMENTS):0] STAGES_FIXED = ELEMENTS/16;
always @(posedge clk or negedge rst_n)
if (!rst_n) begin
out <= 0;
step <= 4'd0;
done_o <= 1'd0;
state <= START;
end else begin
case(state)
START:
begin
step <= 0;
done_o <= 1'd0;
compare <= ELEMENTS;
i_MERGE <= 0;
positive <= 1;
sum <= 8;
sum_max <= 8;
out <= in;
if(en_i) begin
state <= SETUP;
stage <= 0;
end
end
SETUP:
begin
if(stage <= (ELEMENTS/8)) begin
d[0] <= in[stage*8*BITWIDTH + 0*BITWIDTH +: 8];
d[1] <= in[stage*8*BITWIDTH + 1*BITWIDTH +: 8];
d[2] <= in[stage*8*BITWIDTH + 2*BITWIDTH +: 8];
d[3] <= in[stage*8*BITWIDTH + 3*BITWIDTH +: 8];
d[4] <= in[stage*8*BITWIDTH + 4*BITWIDTH +: 8];
d[5] <= in[stage*8*BITWIDTH + 5*BITWIDTH +: 8];
d[6] <= in[stage*8*BITWIDTH + 6*BITWIDTH +: 8];
d[7] <= in[stage*8*BITWIDTH + 7*BITWIDTH +: 8];
state <= SORT;
end
else begin
state <= START;
end
end
SORT:
begin
case(step)
0: begin
if(d[0] > d[1]) begin
d[0] <= d[1];
d[1] <= d[0];
end
if(d[2] < d[3]) begin
d[2] <= d[3];
d[3] <= d[2];
end
if(d[4] > d[5]) begin
d[4] <= d[5];
d[5] <= d[4];
end
if(d[6] < d[7]) begin
d[6] <= d[7];
d[7] <= d[6];
end
step <= step + 1;
end
1: begin
if(d[0] > d[2]) begin
d[0] <= d[2];
d[2] <= d[0];
end
if(d[1] > d[3]) begin
d[1] <= d[3];
d[3] <= d[1];
end
if(d[4] < d[6]) begin
d[4] <= d[6];
d[6] <= d[4];
end
if(d[5] < d[7]) begin
d[5] <= d[7];
d[7] <= d[5];
end
step <= step + 1;
end
2: begin
if(d[0] > d[1]) begin
d[0] <= d[1];
d[1] <= d[0];
end
if(d[2] > d[3]) begin
d[2] <= d[3];
d[3] <= d[2];
end
if(d[4] < d[5]) begin
d[4] <= d[5];
d[5] <= d[4];
end
if(d[6] < d[7]) begin
d[6] <= d[7];
d[7] <= d[6];
end
step <= step + 1;
end
3: begin
if(stage%2 ==0) begin
if(d[0] > d[4]) begin
d[0] <= d[4];
d[4] <= d[0];
end
if(d[1] > d[5]) begin
d[1] <= d[5];
d[5] <= d[1];
end
if(d[2] > d[6]) begin
d[2] <= d[6];
d[6] <= d[2];
end
if(d[3] > d[7]) begin
d[3] <= d[7];
d[7] <= d[3];
end
end
else begin
if(d[0] < d[4]) begin
d[0] <= d[4];
d[4] <= d[0];
end
if(d[1] < d[5]) begin
d[1] <= d[5];
d[5] <= d[1];
end
if(d[2] < d[6]) begin
d[2] <= d[6];
d[6] <= d[2];
end
if(d[3] < d[7]) begin
d[3] <= d[7];
d[7] <= d[3];
end
end
step <= step + 1;
end
4: begin
if(stage%2 ==0) begin
if(d[0] > d[2]) begin
d[0] <= d[2];
d[2] <= d[0];
end
if(d[1] > d[3]) begin
d[1] <= d[3];
d[3] <= d[1];
end
if(d[4] > d[6]) begin
d[4] <= d[6];
d[6] <= d[4];
end
if(d[5] > d[7]) begin
d[5] <= d[7];
d[7] <= d[5];
end
end
else begin
if(d[0] < d[2]) begin
d[0] <= d[2];
d[2] <= d[0];
end
if(d[1] < d[3]) begin
d[1] <= d[3];
d[3] <= d[1];
end
if(d[4] < d[6]) begin
d[4] <= d[6];
d[6] <= d[4];
end
if(d[5] < d[7]) begin
d[5] <= d[7];
d[7] <= d[5];
end
end
step <= step + 1;
end
5: begin
if(stage%2 ==0) begin
if(d[0] > d[1]) begin
d[0] <= d[1];
d[1] <= d[0];
end
if(d[2] > d[3]) begin
d[2] <= d[3];
d[3] <= d[2];
end
if(d[4] > d[5]) begin
d[4] <= d[5];
d[5] <= d[4];
end
if(d[6] > d[7]) begin
d[6] <= d[7];
d[7] <= d[6];
end
end
else begin
if(d[0] < d[1]) begin
d[0] <= d[1];
d[1] <= d[0];
end
if(d[2] < d[3]) begin
d[2] <= d[3];
d[3] <= d[2];
end
if(d[4] < d[5]) begin
d[4] <= d[5];
d[5] <= d[4];
end
if(d[6] < d[7]) begin
d[6] <= d[7];
d[7] <= d[6];
end
end
step <= 4'd0;
state <= DONE;
end
default: step <= 4'd0;
endcase
end
DONE: begin
if(stage == (ELEMENTS/8 - 1)) begin
out[stage*8*BITWIDTH + 0*BITWIDTH +: 8] <= d[0];
out[stage*8*BITWIDTH + 1*BITWIDTH +: 8] <= d[1];
out[stage*8*BITWIDTH + 2*BITWIDTH +: 8] <= d[2];
out[stage*8*BITWIDTH + 3*BITWIDTH +: 8] <= d[3];
out[stage*8*BITWIDTH + 4*BITWIDTH +: 8] <= d[4];
out[stage*8*BITWIDTH + 5*BITWIDTH +: 8] <= d[5];
out[stage*8*BITWIDTH + 6*BITWIDTH +: 8] <= d[6];
out[stage*8*BITWIDTH + 7*BITWIDTH +: 8] <= d[7];
if(ELEMENTS == 8) state <= IDLE;
// add code by dention 20240514 start
// add 2 group of 16-max1
else begin
if (enable_SORT16_max1) begin
if (out[0*8*BITWIDTH + 7*BITWIDTH +: 8] > out[1*8*BITWIDTH + 0*BITWIDTH +: 8]) begin
sort_max[BITWIDTH-1:0] <= out[0*8*BITWIDTH + 7*BITWIDTH +: 8]; // 16-max1
end
else begin
sort_max[BITWIDTH-1:0] <= out[1*8*BITWIDTH + 0*BITWIDTH +: 8];
end
if (out[2*8*BITWIDTH + 7*BITWIDTH +: 8] > out[3*8*BITWIDTH + 0*BITWIDTH +: 8]) begin
sort_max[2*BITWIDTH-1:BITWIDTH] <= out[2*8*BITWIDTH + 7*BITWIDTH +: 8]; // 16-max1
end
else begin
sort_max[2*BITWIDTH-1:BITWIDTH] <= out[3*8*BITWIDTH + 0*BITWIDTH +: 8];
end
sort_max[8*BITWIDTH-1:2*BITWIDTH] <= 0;
state <= IDLE;
done_o <= 1;
end
else begin
sort_max[8*BITWIDTH-1:0] <= 0;
state <= MERGE_SETUP;
end
end
stage <= 0;
sum <= 8;
i_MERGE <= 0;
compare <= 16;
end
else if(stage < (ELEMENTS/8)) begin
out[stage*8*BITWIDTH + 0*BITWIDTH +: 8] <= d[0];
out[stage*8*BITWIDTH + 1*BITWIDTH +: 8] <= d[1];
out[stage*8*BITWIDTH + 2*BITWIDTH +: 8] <= d[2];
out[stage*8*BITWIDTH + 3*BITWIDTH +: 8] <= d[3];
out[stage*8*BITWIDTH + 4*BITWIDTH +: 8] <= d[4];
out[stage*8*BITWIDTH + 5*BITWIDTH +: 8] <= d[5];
out[stage*8*BITWIDTH + 6*BITWIDTH +: 8] <= d[6];
out[stage*8*BITWIDTH + 7*BITWIDTH +: 8] <= d[7];
state <= SETUP;
stage <= stage + 1;
end
else begin
out <= 110;
state <= IDLE;
end
end
MERGE_SETUP:
begin
if(STAGES == ELEMENTS | STAGES_FIXED == 1) begin
if(sum == ELEMENTS/2) begin
// add code by dention 20240514 start
// add 32-max1
if (enable_SORT32_max1) begin
// choose double sort bitonic 16 max1 and compare two max1 to gain the max in 32
if (out[1*8*BITWIDTH + 7*BITWIDTH +: 8] > out[2*8*BITWIDTH + 0*BITWIDTH +: 8]) begin
sort_max[BITWIDTH-1:0] <= out[1*8*BITWIDTH + 7*BITWIDTH +: 8]; // 32-max1
end
else begin
sort_max[BITWIDTH-1:0] <= out[2*8*BITWIDTH + 0*BITWIDTH +: 8]; // 32-max1
end
sort_max[BITWIDTH*8-1:BITWIDTH] <= 0;
state <= IDLE;
done_o <= 1;
end
// add code by dention 20240514 end
else begin
sort_max[BITWIDTH*8-1:0] <= 0;
state <= MERGE;
end
end
else begin
sum <= sum_max * 2; //16
sum_max <= sum_max * 2; //16
state <= MERGE_SETUP;
i_MERGE <= 0;
compare <= sum_max*4; //64
positive <= 1;
stage <= 0;
STAGES <= STAGES_FIXED / 2; // 2
STAGES_FIXED <= STAGES_FIXED / 2; // 1
end
end
// across-0 min-max-min-max 1 period
// across-1 min-max-min-max 2 period
// across-3 min-max-min-max 3 period
// across-7 min-max-min-max 4 period
else begin
if((sum + i_MERGE) < compare && (compare <= ELEMENTS) && (stage < STAGES)) begin
if(positive) begin
if(out[i_MERGE*BITWIDTH +: 8] > out[(i_MERGE+sum)*BITWIDTH +: 8]) begin
out[i_MERGE*BITWIDTH +: 8] <= out[(i_MERGE+sum)*BITWIDTH +: 8];
out[(i_MERGE+sum)*BITWIDTH +: 8] <= out[i_MERGE*BITWIDTH +: 8];
end
end
else begin
if(out[i_MERGE*BITWIDTH +: 8] < out[(i_MERGE+sum)*BITWIDTH +: 8]) begin
out[i_MERGE*BITWIDTH +: 8] <= out[(i_MERGE+sum)*BITWIDTH +: 8];
out[(i_MERGE+sum)*BITWIDTH +: 8] <= out[i_MERGE*BITWIDTH +: 8];
end
end
if ((sum + i_MERGE) >= (compare - 1)) begin
i_MERGE <= compare;
compare <= compare + 2*sum;
stage = stage + 1;
if(STAGES == 2) begin
if(stage == 0) positive <= 1;
else positive <= 0;
end
else begin
if((stage%(STAGES*2/STAGES_FIXED)) < STAGES/STAGES_FIXED) positive <= 1;
else positive <= 0;
end
state <= MERGE_SETUP;
end
else begin
i_MERGE = i_MERGE + 1;
state <= MERGE_SETUP;
end
end
else begin
state <= MERGE_SETUP;
i_MERGE <= 0;
positive <= 1;
sum <= sum / 2;
compare <= sum;
stage <= 0;
STAGES <= STAGES * 2;
end
end
end
MERGE:
begin
if(sum == 1) begin
state <= IDLE;
done_o <= 1;
end
else begin
if((sum + i_MERGE) < ELEMENTS) begin
if(out[i_MERGE*BITWIDTH +: 8] > out[(i_MERGE+sum)*BITWIDTH +: 8]) begin
out[i_MERGE*BITWIDTH +: 8] <= out[(i_MERGE+sum)*BITWIDTH +: 8];
out[(i_MERGE+sum)*BITWIDTH +: 8] <= out[i_MERGE*BITWIDTH +: 8];
end
if ((sum + i_MERGE) >= (compare - 1)) begin
i_MERGE <= compare;
compare <= compare * 2;
end
else begin
i_MERGE = i_MERGE + 1;
state <= MERGE;
end
end
else begin
state <= MERGE;
i_MERGE <= 0;
sum <= sum / 2;
compare <= sum;
end
end
end
IDLE: state <= IDLE;
default: state <= START;
endcase
end
endmodule
以下是testbench:
`timescale 1ns/1ps
`define clk_period 20
module BitonicSortScalable_tb #(
parameter BITWIDTH = 8, // Bitwidth of each element
parameter ELEMENTS = 32 // Number of elements to be sorted. This value must be a powers of two
)
();
reg clk, rst_n, en_i;
reg [ELEMENTS*BITWIDTH-1:0] in;
wire done_o;
wire [ELEMENTS*BITWIDTH-1:0] out;
reg enable_SORT32_max1;
reg enable_SORT16_max1;
wire [BITWIDTH*8-1:0] sort_max;
sort_bitonic bitonic_SORT_u(
clk,
rst_n,
en_i,
in,
enable_SORT32_max1,
enable_SORT16_max1,
sort_max,
done_o,
out
);
integer i;
initial begin
clk = 1'b1;
end
always #(`clk_period/2) begin
clk = ~clk;
end
initial begin
rst_n = 0;
en_i = 0;
in = 0;
enable_SORT16_max1 = 0;
enable_SORT32_max1 = 0;
#(`clk_period);
rst_n = 1;
en_i = 1;
enable_SORT16_max1 = 1;
// Input array vector to sort.
// Increase the number of elements based on ELEMENTS parameter
in ={ 8'd28, 8'd23, 8'd24, 8'd16, 8'd11, 8'd25, 8'd29, 8'd1,
8'd10, 8'd32, 8'd21, 8'd2, 8'd27, 8'd31, 8'd3, 8'd30,
8'd15, 8'd13, 8'd0, 8'd8, 8'd5, 8'd18, 8'd22, 8'd26,
8'd4, 8'd6, 8'd9, 8'd19, 8'd20, 8'd7, 8'd14, 8'd17
};
#(`clk_period);
en_i = 0;
#(`clk_period*10000);
$display("Input array:");
for(i=0;i<ELEMENTS;i=i+1)
$write(in[i*BITWIDTH +: 8]);
$display("\nOutput sorted array:");
for(i=0;i<ELEMENTS;i=i+1)
$write(out[i*BITWIDTH +: 8]);
$display("\ndone %d", done_o);
$finish;
end
endmodule
四、堆排序
以下是堆排序的.v代码:
// reference: https://zhuanlan.zhihu.com/p/32166363
`timescale 1ns / 1ps
module sort_heap
#(
parameter addr_width = 5,
parameter data_width = 8
)
(
input clk,
input rst_n,
input en,
input clr,
output reg done,
input [addr_width - 1:0] parent,
input [addr_width - 1:0] length,
output reg wea,
output reg [ addr_width - 1:0 ] addra,
output reg [ data_width - 1:0 ] data_we,
input [ data_width - 1:0 ] data_re
);
reg [data_width - 1:0] temp;
reg [addr_width :0] parent_r;//attention: For recognize the parent, we must expand data width of it
reg [addr_width :0] child_r;
reg [addr_width :0] length_r;
parameter IDLE = 6'b000001;
parameter BEGINA = 6'b000010;
parameter GET = 6'b000100;
parameter COMPARE = 6'b001000;
parameter WRITE = 6'b010000;
parameter COMPLETE= 6'b100000;
reg [5:0] state;
reg [5:0] next_state;
reg [7:0] cnt;
reg [data_width - 1:0] child_compare;
always@(posedge clk or negedge rst_n)
begin
if(!rst_n) begin state <= IDLE; end
else begin state <= next_state; end
end
always@(*)
begin
case(state)
IDLE: begin
if(en) begin next_state = BEGINA; end
else begin next_state = IDLE; end
end
BEGINA:begin
if(cnt == 8'd2) begin next_state = GET; end
else begin next_state = BEGINA; end
end
GET: begin
if(child_r >= length_r) begin next_state = COMPLETE; end
else if(cnt == 8'd4) begin next_state = COMPARE; end
else begin next_state = GET; end
end
COMPARE: begin
if(temp >= child_compare) begin next_state = COMPLETE; end
else begin next_state = WRITE; end
end
WRITE: begin
if(cnt == 8'd1) begin next_state = GET; end
else begin next_state = WRITE; end
end
COMPLETE:begin
if(clr) begin next_state = IDLE; end
else begin next_state = COMPLETE; end
end
endcase
end
reg [data_width - 1:0] child_R;
reg [data_width - 1:0] child_L;
always@(posedge clk or negedge rst_n)
begin
if(!rst_n) begin done <= 1'b0; end
else
begin
case(state)
IDLE: begin
parent_r <= {1'b0, parent};
length_r <= {1'b0, length};
child_r <= 2*parent + 1'b1;
cnt <= 8'd0; child_R <= 0; child_L <= 0;
done <= 1'b0;
end
BEGINA:begin
if(cnt == 8'd0) begin addra <= parent_r; cnt <= cnt + 1'b1; end
else if(cnt == 8'd2) begin temp <= data_re; cnt <= 1'b0; end
else begin cnt <= cnt + 1'b1; end
end
GET: begin
if(child_r >= length_r) begin addra <= addra; end
else
begin
if(cnt == 8'd0) begin addra <= child_r; cnt <= cnt + 1'b1; end
else if(cnt == 8'd1) begin addra <= child_r + 1'b1; cnt <= cnt + 1'b1; end
else if(cnt == 8'd2) begin child_L <= data_re; cnt <= cnt + 1'b1; end
else if(cnt == 8'd3) begin child_R <= data_re; cnt <= cnt + 1'b1; end
else if(cnt == 8'd4)
begin
if( (child_r + 1'b1 < length_r) && (child_R > child_L) )
begin
child_r <= child_r + 1'b1;
child_compare <= child_R;
end
else
begin
child_r <= child_r;
child_compare <= child_L;
end
cnt <= 8'd0;
end
else begin cnt <= cnt + 1'b1; end
end
end
COMPARE: begin end
WRITE: begin
if(cnt == 8'd0) begin
addra <= parent_r; wea <= 1'b1;
data_we <= child_compare; cnt <= cnt + 1'b1;
end
else if(cnt == 8'd1) begin
wea <= 1'b0; cnt <= 8'd0;
parent_r <= child_r;
child_r <= child_r*2 + 1'b1;
end
else begin cnt <= cnt; end
end
COMPLETE: begin
if(cnt == 8'd0) begin
wea <= 1'b1; addra <= parent_r;
data_we <= temp; cnt <= cnt + 1'b1;
end
else if(cnt == 8'd1)
begin
wea <= 1'b0;
cnt <= cnt + 1'b1;
done <= 1'b1;
end
else if(cnt == 8'd2)
begin
done <= 1'b0;
cnt <= 8'd2;
end
end
endcase
end
end
endmodule
以下是堆排序的top文件(注意事先在vivado的库内例化ROM,并载入coe,网上有大量关于coe的语法,我这边也贴上)
`timescale 1ns / 1ps
module sort_heap_top
#(
parameter addr_width = 5, //stack address width
parameter data_width = 8, //stack data width
parameter stack_deepth = 31 //stack deepth
)
(
input clk,
input rst_n
);
reg en; //initial module input: Enable initial process
reg clr; //initial module input: Reset initial process
wire done; //initial module output: One initial process have done
reg [addr_width - 1:0] parent; //initial module input: Parent
reg [addr_width - 1:0] length; //initial module input: Length of list
wire wea; //RAM module input: write enable
wire [addr_width - 1:0] addra; //RAM module input: write/read address
wire [data_width - 1:0] data_we; //RAM module input: write data
wire [data_width - 1:0] data_re; //RAM module output: read data
parameter BEGINA = 9'b0_0000_0001;//stage 1: stack initial
parameter RANK = 9'b0_0000_0010;
parameter FINISH = 9'b0_0000_0100;
parameter DONE = 9'b0_0000_1000;
parameter READ = 9'b0_0001_0000;//stage 2: rank of stack
parameter WRITE = 9'b0_0010_0000;
parameter RANK_2 = 9'b0_0100_0000;
parameter FINISH_2= 9'b0_1000_0000;
parameter DONE_2 = 9'b1_0000_0000;
reg [addr_width - 1:0] cnt; //counter in FSM stage 1/2
reg [addr_width - 1:0] cnt2; //counter in FSM stage 2
reg [8:0] state; //FSM state
reg [8:0] next_state; //FSM next state
reg [addr_width - 1:0] addr; //stack inital process read RAM address
reg initial_done; //stack initial done
reg [data_width - 1:0] list_i; //RANK process reg
reg [data_width - 1:0] list_0; //RANK process reg
reg wea_FSM; //wea signal from FSM
reg [data_width - 1:0] data_we_FSM; //write data form FSM
//FSM stage 1: state transform
always@(posedge clk or negedge rst_n)
begin
if(!rst_n) begin state <= BEGINA; end
else begin state <= next_state; end
end
//FSM stage 2: state change
always@(*)
begin
case(state)
BEGINA: begin next_state = RANK; end //stack initial process begin
RANK: begin
if(done) begin next_state = FINISH; end
else begin next_state = RANK; end
end
FINISH:begin
if(addr == stack_deepth - 1 & cnt != {addr_width{1'b1}}) begin next_state = BEGINA; end
else if(addr == stack_deepth - 1 & cnt == {addr_width{1'b1}} ) begin next_state = DONE; end
else begin next_state = FINISH; end
end
DONE: begin next_state = READ; end //stack initial process have done
READ: begin //stack rank process begin
if(cnt == 3) begin next_state = WRITE; end
else begin next_state = READ; end
end
WRITE:begin
if(cnt == 2) begin next_state = RANK_2; end
else begin next_state = WRITE; end
end
RANK_2:begin
if(done) begin next_state = FINISH_2; end
else begin next_state = RANK_2; end
end
FINISH_2:begin
if(addr == stack_deepth - 1 & cnt2 != 0) begin next_state = READ; end
else if(addr == stack_deepth - 1 & cnt2 == 0) begin next_state = DONE_2; end
else begin next_state = FINISH_2; end
end
DONE_2:begin next_state = DONE_2; end//stack rank process done
endcase
end
//FSM stage 3: state output
always@(posedge clk or negedge rst_n)
begin
if(!rst_n) begin cnt <= stack_deepth/2; addr <= {addr_width{1'b1}}; initial_done <= 1'b0; wea_FSM <= 1'b0; end
else
begin
case(state)
BEGINA: begin //stack initial begin
en <= 1'b1;
clr <= 1'b0;
parent <= cnt;
length <= stack_deepth;
end
RANK: begin
clr <= 1'b0;
if(done) begin cnt <= cnt - 1'b1; clr <= 1'b1; en <= 1'b0; addr <= 4'd0; end
end
FINISH:begin clr <= 1'b0; addr <= addr + 1'b1; end
DONE: begin
initial_done <= 1'b1; //stack initial have done
cnt2 <= stack_deepth - 1;
cnt <= 0;
end
READ: begin //stack rank process begin
if(cnt == 0) begin addr <= 0; cnt <= cnt + 1'b1; end
else if(cnt == 1) begin addr <= cnt2; cnt <= cnt + 1'b1; end
else if(cnt == 2) begin list_0 <= data_re; cnt <= cnt + 1'b1; end
else if(cnt == 3) begin list_i <= data_re; cnt <= 0; end
else begin cnt <= cnt; end
end
WRITE:begin
if(cnt == 0) begin
wea_FSM <= 1'b1;
addr <= 0; data_we_FSM <= list_i;
cnt <= cnt + 1'b1;
end
else if(cnt == 1) begin
wea_FSM <= 1'b1;
addr <= cnt2; data_we_FSM <= list_0;
cnt <= cnt + 1'b1;
end
else if(cnt == 2) begin wea_FSM <= 1'b0; cnt <= 0; parent <= 0; length <= cnt2; en <= 1'b1; end
else begin cnt <= cnt; end
end
RANK_2:begin
if(done) begin cnt2 <= cnt2 - 1'b1; clr <= 1'b1; en <= 1'b0; addr <= 0; end
end
FINISH_2:begin
clr <= 1'b0; addr <= addr + 1'b1;
end
endcase
end
end
wire wea_initial;
wire [data_width - 1:0] data_we_initial;
//stack initial process
sort_heap U1
(
.clk(clk),
.rst_n(rst_n),
.en(en),
.clr(clr),
.done(done),
.parent(parent),
.length(length),
.wea(wea_initial),
.addra(addra),
.data_we(data_we_initial),
.data_re(data_re)
);
wire [addr_width - 1:0] RAM_addr;
assign wea = (state == WRITE) ? wea_FSM:wea_initial;
assign RAM_addr = (state == FINISH || state == READ || state == WRITE || state == FINISH_2) ? addr:addra;
assign data_we = (state == WRITE) ? data_we_FSM:data_we_initial;
//RAM module
blk_mem_gen_0 RAM1
(
.clka(clk),
.wea(wea),
.addra(RAM_addr),
.dina(data_we),
.douta(data_re)
);
endmodule
对应的coe文件如下:
;分号后的代码都被认为是注释内容
memory_initialization_radix=10;
memory_initialization_vector=
1,
3,
4,
5,
2,
6,
9,
7,
8,
0,
11,
15,
13,
19,
20,
16,
12,
10,
14,
11,
25,
21,
23,
22,
18,
27,
36,
29,
17,
24,
26,
30;
以下为对应的testbench:
`define clk_period 20
module sort_hep_top_tb
();
reg clk;
reg rst_n;
initial begin
clk = 1'b1;
end
always #(`clk_period/2) begin
clk = ~clk;
end
initial begin
rst_n = 0;
#(`clk_period);
rst_n = 1;
#(`clk_period*100000);
$finish;
end
sort_heap_top sort_heap_top_u(
clk,
rst_n
);
endmodule
五、资源占用和延时对比
直接贴图表示,不多废话!