FPGA实现流水式排序算法

该算法采用双调排序算法,是一种可流水的递推算法,且算法的消耗时长可算,具体细节参考视频:

https://www.bilibili.com/video/BV1S3thzWEnh/?spm_id_from=333.1387.homepage.video_card.click&vd_source=69fb997b62efa60ae1add8b53b6a5923

复制代码
module bitonic_sort(
	input						i_clk	,
	input						i_rst	,
	input		[16 * 16 - 1:0]	i_buff	,
	output		[16 * 16 - 1:0]	o_buff
    );
	localparam WIN = 16;
	wire	signed	[15:0]	w_data_buff	[0:15];
	
	genvar i;
	genvar j;
	
	generate
		for(i = 0; i < 16; i = i + 1) begin
			assign w_data_buff[i] = i_buff[i * 16 + 15:i * 16];
		end
	endgenerate
	/*
	双调排序:Bitonic 
		首先将相邻两个元素作为一个数组:
			(0,1)(2,3)(4,5)(6,7)(8,9)(10,11)(12,13)(14,15)
		将其按照顺序、逆序交替排序,并两个相邻数组组成一个四元素的双调数组
			(0,1,2,3)(4,5,6,7)(8,9,10,11)(12,13,14,15)
		通过对每个四元素数组进行2、1间隔排序,使四元素数组按照顺序、逆序交替排序,并两个相邻数组组成一个8元素数组
			(0,1,2,3,4,5,6,7)(8,9,10,11,12,13,14,15)
		对每个8元素数组进行4、2、1间隔排序,使其按照顺序、逆序交替排序,最终组成16元素的回调数组
			(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15)
		对回调数组按照8、4、2、1间隔排序,完成顺序或逆序排列
	*/
	reg	signed	[15:0]	r_bitonic_pipe0	[0:WIN - 1];//间隔1
	generate
		for(i = 0; i < WIN / 4; i = i + 1) begin
		
			always@(posedge i_clk or posedge i_rst) begin
				if(i_rst) begin
					r_bitonic_pipe0[i * 4] 	<= 'd0;
					r_bitonic_pipe0[i * 4 + 1]<= 'd0;
				end
				else begin
					if(w_data_buff[i * 4] > w_data_buff[i * 4 + 1]) begin//升序
						r_bitonic_pipe0[i * 4] 		<= w_data_buff[i * 4 + 1];
						r_bitonic_pipe0[i * 4 + 1]	<= w_data_buff[i * 4];
					end
					else begin
						r_bitonic_pipe0[i * 4] 		<= w_data_buff[i * 4];
						r_bitonic_pipe0[i * 4 + 1]	<= w_data_buff[i * 4 + 1];
					end
				end
			end
			
			always@(posedge i_clk or posedge i_rst) begin
				if(i_rst) begin
					r_bitonic_pipe0[i * 4 + 2]<= 'd0;
					r_bitonic_pipe0[i * 4 + 3]<= 'd0;
				end
				else begin
					if(w_data_buff[i * 4 + 2] < w_data_buff[i * 4 + 3]) begin//降序
						r_bitonic_pipe0[i * 4 + 2]<= w_data_buff[i * 4 + 3];
						r_bitonic_pipe0[i * 4 + 3]<= w_data_buff[i * 4 + 2];
					end
					else begin
						r_bitonic_pipe0[i * 4 + 2]<= w_data_buff[i * 4 + 2];
						r_bitonic_pipe0[i * 4 + 3]<= w_data_buff[i * 4 + 3];
					end
				end
			end
			
		end
	endgenerate
	
	reg	signed	[15:0]	r_bitonic_pipe1	[0:WIN - 1];//间隔2
	generate
		for(i = 0; i < WIN / 8; i = i + 1) begin
			for(j = 0; j < WIN / 8; j = j + 1) begin
			
				always@(posedge i_clk or posedge i_rst) begin
					if(i_rst) begin
						r_bitonic_pipe1[i * 8 + j] 	<= 'd0;
						r_bitonic_pipe1[i * 8 + j + 2]<= 'd0;
					end
					else begin
						if(r_bitonic_pipe0[i * 8 + j] > r_bitonic_pipe0[i * 8 + j + 2]) begin//升序
							r_bitonic_pipe1[i * 8 + j]		<= r_bitonic_pipe0[i * 8 + j  + 2];
							r_bitonic_pipe1[i * 8 + j + 2]	<= r_bitonic_pipe0[i * 8 + j];
						end
						else begin
							r_bitonic_pipe1[i * 8 + j]		<= r_bitonic_pipe0[i * 8 + j];
							r_bitonic_pipe1[i * 8 + j + 2]	<= r_bitonic_pipe0[i * 8 + j + 2];				
						end
					end
				end

				always@(posedge i_clk or posedge i_rst) begin
					if(i_rst) begin
						r_bitonic_pipe1[i * 8 + j + 4]<= 'd0;
						r_bitonic_pipe1[i * 8 + j + 6]<= 'd0;
					end
					else begin
						if(r_bitonic_pipe0[i * 8 + j + 4] < r_bitonic_pipe0[i * 8 + j + 6]) begin//降序
							r_bitonic_pipe1[i * 8 + j + 4]<= r_bitonic_pipe0[i * 8 + j + 6];
							r_bitonic_pipe1[i * 8 + j + 6]<= r_bitonic_pipe0[i * 8 + j + 4];
						end
						else begin
							r_bitonic_pipe1[i * 8 + j + 4]<= r_bitonic_pipe0[i * 8 + j + 4];
							r_bitonic_pipe1[i * 8 + j + 6]<= r_bitonic_pipe0[i * 8 + j + 6];				
						end
					end
				end
				
			end
		end
	endgenerate
	
	reg	signed	[15:0]	r_bitonic_pipe2	[0:WIN - 1];
	generate
		for(i = 0; i < WIN / 8; i = i + 1) begin
			for(j = 0; j < WIN / 8; j = j + 1) begin
				
				always@(posedge i_clk or posedge i_rst) begin
					if(i_rst) begin
						r_bitonic_pipe2[i * 8 + j * 2] 		<= 'd0;
						r_bitonic_pipe2[i * 8 + j * 2 + 1] 	<= 'd0;
					end
					else begin
						if(r_bitonic_pipe1[i * 8 + j * 2] > r_bitonic_pipe1[i * 8 + j * 2 + 1]) begin//升序
							r_bitonic_pipe2[i * 8 + j * 2]		<= r_bitonic_pipe1[i * 8 + j * 2+ 1];
							r_bitonic_pipe2[i * 8 + j * 2 + 1]	<= r_bitonic_pipe1[i * 8 + j * 2];
						end
						else begin
							r_bitonic_pipe2[i * 8 + j * 2]		<= r_bitonic_pipe1[i * 8 + j * 2];
							r_bitonic_pipe2[i * 8 + j * 2 + 1]	<= r_bitonic_pipe1[i * 8 + j * 2 + 1];
						end
					end
				end
				
				always@(posedge i_clk or posedge i_rst) begin
					if(i_rst) begin
						r_bitonic_pipe2[i * 8 + j * 2 + 4]	<= 'd0;
						r_bitonic_pipe2[i * 8 + j * 2 + 5]	<= 'd0;
					end
					else begin
						if(r_bitonic_pipe1[i * 8 + j * 2 + 4] < r_bitonic_pipe1[i * 8 + j * 2 + 5]) begin//降序
							r_bitonic_pipe2[i * 8 + j * 2 + 4]	<= r_bitonic_pipe1[i * 8 + j * 2 + 5];
							r_bitonic_pipe2[i * 8 + j * 2 + 5] 	<= r_bitonic_pipe1[i * 8 + j * 2 + 4];
						end
						else begin
							r_bitonic_pipe2[i * 8 + j * 2 + 4]	<= r_bitonic_pipe1[i * 8 + j * 2 + 4];
						    r_bitonic_pipe2[i * 8 + j * 2 + 5]	<= r_bitonic_pipe1[i * 8 + j * 2 + 5];
						end
					end
				end
				
			end
		end
	endgenerate
	
	reg	signed	[15:0]	r_bitonic_pipe3	[0:WIN - 1];//间隔4
	generate
		for(i = 0; i < WIN / 4; i = i + 1) begin
		
			always@(posedge i_clk or posedge i_rst) begin
				if(i_rst) begin
					r_bitonic_pipe3[i]	<= 'd0;
					r_bitonic_pipe3[i + 4]<= 'd0;
				end
				else begin
					if(r_bitonic_pipe2[i] > r_bitonic_pipe2[i + 4]) begin//升序
						r_bitonic_pipe3[i]		<= r_bitonic_pipe2[i + 4];
						r_bitonic_pipe3[i + 4]	<= r_bitonic_pipe2[i];
					end
					else begin
						r_bitonic_pipe3[i]		<= r_bitonic_pipe2[i];
						r_bitonic_pipe3[i + 4]	<= r_bitonic_pipe2[i + 4];
					end
				end	
			end
	
			always@(posedge i_clk or posedge i_rst) begin
				if(i_rst) begin
					r_bitonic_pipe3[i + 8] <= 'd0;
					r_bitonic_pipe3[i + 12]<= 'd0;
				end
				else begin
					if(r_bitonic_pipe2[i + 8] < r_bitonic_pipe2[i + 12]) begin//降序
						r_bitonic_pipe3[i + 8] <= r_bitonic_pipe2[i + 12];
						r_bitonic_pipe3[i + 12]<= r_bitonic_pipe2[i + 8];
					end
					else begin
						r_bitonic_pipe3[i + 8] <= r_bitonic_pipe2[i + 8];
						r_bitonic_pipe3[i + 12]<= r_bitonic_pipe2[i + 12];
					end
				end
			end
	
		end
	endgenerate
	
	reg	signed	[15:0]	r_bitonic_pipe4	[0:WIN - 1];//间隔2
	generate
		for(i = 0; i < WIN / 8; i = i + 1) begin
			for(j = 0; j < WIN / 8; j = j + 1) begin
			
				always@(posedge i_clk or posedge i_rst) begin
					if(i_rst) begin
						r_bitonic_pipe4[i * 4 + j] 		<= 'd0;
						r_bitonic_pipe4[i * 4 + j + 2]	<= 'd0;
					end
					else begin
						if(r_bitonic_pipe3[i * 4 + j] > r_bitonic_pipe3[i * 4 + j + 2]) begin
							r_bitonic_pipe4[i * 4 + j] 		<= r_bitonic_pipe3[i * 4 + j + 2];
							r_bitonic_pipe4[i * 4 + j + 2]	<= r_bitonic_pipe3[i * 4 + j];
						end
						else begin
							r_bitonic_pipe4[i * 4 + j] 		<= r_bitonic_pipe3[i * 4 + j];
						    r_bitonic_pipe4[i * 4 + j + 2]	<= r_bitonic_pipe3[i * 4 + j + 2];
						end
					end
				end

				always@(posedge i_clk or posedge i_rst) begin
					if(i_rst) begin
						r_bitonic_pipe4[i * 4 + j + 8]	<= 'd0;
						r_bitonic_pipe4[i * 4 + j + 10]	<= 'd0;
					end
					else begin
						if(r_bitonic_pipe3[i * 4 + j + 8] < r_bitonic_pipe3[i * 4 + j + 10]) begin
							r_bitonic_pipe4[i * 4 + j + 8]	<= r_bitonic_pipe3[i * 4 + j + 10];
							r_bitonic_pipe4[i * 4 + j + 10]	<= r_bitonic_pipe3[i * 4 + j + 8];
						end
						else begin
							r_bitonic_pipe4[i * 4 + j + 8] 	<= r_bitonic_pipe3[i * 4 + j + 8];
						    r_bitonic_pipe4[i * 4 + j + 10]	<= r_bitonic_pipe3[i * 4 + j + 10];
						end
					end
				end
			
			end		
		end
	endgenerate
	
	reg	signed	[15:0]	r_bitonic_pipe5	[0:WIN - 1];//间隔1
	generate
		for(i = 0; i < WIN / 4; i = i + 1) begin
			
			always@(posedge i_clk or posedge i_rst) begin
				if(i_rst) begin
					r_bitonic_pipe5[i * 2]		<= 'd0;
					r_bitonic_pipe5[i * 2 + 1]	<= 'd0;
				end
				else begin
					if(r_bitonic_pipe4[i * 2] > r_bitonic_pipe4[i * 2 + 1]) begin
						r_bitonic_pipe5[i * 2]		<= r_bitonic_pipe4[i * 2 + 1];
						r_bitonic_pipe5[i * 2 + 1]	<= r_bitonic_pipe4[i * 2];
					end
					else begin
						r_bitonic_pipe5[i * 2]		<= r_bitonic_pipe4[i * 2];
						r_bitonic_pipe5[i * 2 + 1]	<= r_bitonic_pipe4[i * 2 + 1];
					end
				end
			end
			
			always@(posedge i_clk or posedge i_rst) begin
				if(i_rst) begin
					r_bitonic_pipe5[i * 2 + 8]	<= 'd0;
					r_bitonic_pipe5[i * 2 + 9]	<= 'd0;
				end
				else begin
					if(r_bitonic_pipe4[i * 2 + 8] < r_bitonic_pipe4[i * 2 + 9]) begin
						r_bitonic_pipe5[i * 2 + 8]	<= r_bitonic_pipe4[i * 2 + 9];
						r_bitonic_pipe5[i * 2 + 9]	<= r_bitonic_pipe4[i * 2 + 8];
					end
					else begin
						r_bitonic_pipe5[i * 2 + 8]	<= r_bitonic_pipe4[i * 2 + 8];
						r_bitonic_pipe5[i * 2 + 9]	<= r_bitonic_pipe4[i * 2 + 9];
					end
				end
			end
			
		end
	endgenerate
	
	reg	signed	[15:0]	r_bitonic_pipe6	[0:WIN - 1];//间隔8
	generate
		for(i = 0; i < WIN / 2; i = i + 1) begin
			
			always@(posedge i_clk or posedge i_rst) begin
				if(i_rst) begin
					r_bitonic_pipe6[i]		<= 'd0;
					r_bitonic_pipe6[i + 8]	<= 'd0;
				end
				else begin
					if(r_bitonic_pipe5[i] > r_bitonic_pipe5[i + 8]) begin
						r_bitonic_pipe6[i]		<= r_bitonic_pipe5[i + 8];
						r_bitonic_pipe6[i + 8]	<= r_bitonic_pipe5[i];
					end
					else begin
						r_bitonic_pipe6[i]		<= r_bitonic_pipe5[i];
						r_bitonic_pipe6[i + 8]	<= r_bitonic_pipe5[i + 8];
					end
				end
			end
			
		end
	endgenerate
	
	reg	signed	[15:0]	r_bitonic_pipe7	[0:WIN - 1];//间隔4
	generate
		for(i = 0; i < WIN / 8; i = i + 1) begin
			for(j = 0; j < WIN / 4; j = j + 1) begin
			
				always@(posedge i_clk or posedge i_rst) begin
					if(i_rst) begin
						r_bitonic_pipe7[i * 8 + j]		<= 'd0;
						r_bitonic_pipe7[i * 8 + j + 4]	<= 'd0;
					end
					else begin
						if(r_bitonic_pipe6[i * 8 + j] > r_bitonic_pipe6[i * 8 + j + 4]) begin
							r_bitonic_pipe7[i * 8 + j]		<= r_bitonic_pipe6[i * 8 + j + 4];
							r_bitonic_pipe7[i * 8 + j + 4]	<= r_bitonic_pipe6[i * 8 + j];
						end
						else begin
							r_bitonic_pipe7[i * 8 + j]		<= r_bitonic_pipe6[i * 8 + j]	;
							r_bitonic_pipe7[i * 8 + j + 4]	<= r_bitonic_pipe6[i * 8 + j + 4];
						end
					end
				end
			
			end
		end
	endgenerate

	reg	signed	[15:0]	r_bitonic_pipe8	[0:WIN - 1];//间隔2
	generate
		for(i = 0; i < WIN / 4; i = i + 1) begin
			for(j = 0; j < WIN / 8; j = j + 1) begin
			
				always@(posedge i_clk or posedge i_rst) begin
					if(i_rst) begin
						r_bitonic_pipe8[i * 4 + j]		<= 'd0;
						r_bitonic_pipe8[i * 4 + j + 2]	<= 'd0;
					end
					else begin
						if(r_bitonic_pipe7[i * 4 + j] > r_bitonic_pipe7[i * 4 + j + 2]) begin
							r_bitonic_pipe8[i * 4 + j]		<= r_bitonic_pipe7[i * 4 + j + 2];
							r_bitonic_pipe8[i * 4 + j + 2]	<= r_bitonic_pipe7[i * 4 + j];
						end
						else begin
							r_bitonic_pipe8[i * 4 + j]		<= r_bitonic_pipe7[i * 4 + j]	;
							r_bitonic_pipe8[i * 4 + j + 2]	<= r_bitonic_pipe7[i * 4 + j + 2];
						end
					end
				end
			
			end
		end
	endgenerate
		
	reg	signed	[15:0]	r_bitonic_pipe9	[0:WIN - 1];//间隔1
	generate
		for(i = 0; i < WIN / 2; i = i + 1) begin
			
			always@(posedge i_clk or posedge i_rst) begin
				if(i_rst) begin
					r_bitonic_pipe9[i * 2]		<= 'd0;
					r_bitonic_pipe9[i * 2 + 1]	<= 'd0;
				end
				else begin
					if(r_bitonic_pipe8[i * 2] > r_bitonic_pipe8[i * 2 + 1]) begin
						r_bitonic_pipe9[i * 2]		<= r_bitonic_pipe8[i * 2 + 1];
						r_bitonic_pipe9[i * 2 + 1]	<= r_bitonic_pipe8[i * 2];
					end
					else begin
						r_bitonic_pipe9[i * 2]		<= r_bitonic_pipe8[i * 2]	;
						r_bitonic_pipe9[i * 2 + 1]	<= r_bitonic_pipe8[i * 2 + 1];
					end
				end
			end
			
		end
	endgenerate	
	
	generate
		for(i = 0; i < 16; i = i + 1) begin
			assign	o_buff[i * 16 + 15:i * 16] = r_bitonic_pipe9[i];
		end
	endgenerate
	
endmodule
相关推荐
患得患失9491 分钟前
【前端WebSocket】心跳功能,心跳重置策略、双向确认(Ping-Pong) 以及 指数退避算法(Exponential Backoff)
前端·websocket·算法
海砥装备HardAus4 分钟前
飞控算法中双环串级PID深度解析:角度环与角速度环的协同机制
stm32·算法·无人机·飞控·串级pid
宵时待雨5 分钟前
优选算法专题1:双指针
数据结构·c++·笔记·算法·leetcode
zsc_1187 分钟前
pvz3解码小游戏求解算法
算法
汀、人工智能9 分钟前
[特殊字符] 第107课:LRU缓存(最后一课[特殊字符])
数据结构·算法·链表·数据库架构·哈希表·lru缓存
数据知道14 分钟前
claw-code 源码分析:结构化输出与重试——`structured_output` 一类开关如何改变「可解析性」与失败语义?
算法·ai·claude code·claw code
LCMICRO-1331084774615 分钟前
长芯微LPC556D1完全P2P替代DAC8830,是引脚兼容的16位数模转换器,该系列产品为单通道、低功耗、缓冲电压输出型DAC
stm32·单片机·嵌入式硬件·fpga开发·硬件工程·电压输出型dac
tankeven15 分钟前
HJ172 小红的矩阵染色
c++·算法
2301_8227032019 分钟前
Flutter 框架跨平台鸿蒙开发 - 智能植物生长记录应用
算法·flutter·华为·harmonyos·鸿蒙
每日任务(希望进OD版)24 分钟前
线性DP、区间DP
开发语言·数据结构·c++·算法·动态规划