ISP算法之坏点校正DPC(二):Verilog硬件实现与仿真

DPC的算法讲解和MATLAB仿真参考上一节:

ISP算法之坏点校正DPC(一):MATLAB仿真验证-CSDN博客

本节讲解Verilog的硬件实现与仿真

行缓存设计

DPC算法是基于窗口邻域的像素级别算法,因此需要对实时到来的视频流进行行缓存,行缓存的设计参考:Verilog实现图像处理的行缓存Line Buffer_verilog行缓冲-CSDN博客

行缓存会并行输出N行同一列位置的数据,在算法模块实例化行缓存模块并对其并行输出打拍寄存就可以得到一定大小的窗口数据。

Pipeline设计

硬件设计的难点是拆分算法逻辑并对齐时序,将算法实现的步骤按照Pipeline的方式进行分解,在每个时钟周期完成一个步骤并寄存,经过一定延时后,每个时钟周期完成处理并输出一个像素。

例如在下列pipeline框图中,一个实现被差分成了3个时钟周期ABC,每个时钟周期实现一部分组合逻辑(A、B、C),延时一段时间后(3个时钟周期),每个时钟周期都能处理完ABC三个过程,这样的设计其实是面积换速度策略,通过添加寄存器存储中间值来实现Pipeline。

DPC硬件设计框架

按照Pipeline的设计方式,DPC的数据流与算法硬件pipeline设计框架图如下所示:

每个时钟周期的已经用T1~T10表示。

1、通过行缓存得到并行的四行相同列的数据,同时输入第5行数据,此过程需要对行缓存和输入的像素进行打拍得到窗口大小的像素区域。

2、得到5x5窗口区域后,根据当前Bayer像素的格式R/B或G得到邻域区域的9个像素点(包含中心像素),详细查看上一节中的MATLAB仿真验证。

3、接下来充分利用硬件的并行性,同时进行中值的求取和坏点的检测过程,最后根据是否是坏点来输出中值或原值。每个过程按照Pipeline拆分为子过程。

计算中值

这里中值的获取采用三分法的原理,可以参考:

3x3开窗中值滤波器的FPGA硬件实现 - olivermahout - 博客园

(该方法的证明可以参考网络上的博客,这里会使用即可)

通过此方法无需对9个像素进行复杂的排序操作,只需要三三比较即可。

坏点检测

由于坏点检测当中涉及到了减法,因此需要对参与计算的数据扩展一位位宽用于符号判断(0正1负),如上述算法框图中过程6。计算得到的差值通过最高位来判断正负。

如果全为正或全为负且差值的绝对值超过一定阈值则判断为坏点,绝对值的计算使用有符号的0减去为负的数值。详细查看代码中的注释

cpp 复制代码
t4_diff1 <= t3_diff1[BITS] ? 1'sd0 - t3_diff1 : t3_diff1; 

对齐控制时序

随数据输入的还有HREF和VSYNC控制信号,需要对这些信号进行时序对齐,由于算法整个算法Pipeline过程延时为10个时钟周期,所以对这些信号也统一打拍10个时钟周期。

verilog硬件设计源码

cpp 复制代码
`timescale 1 ns / 1 ns

/*
 * ISP - Defective Pixel Correction
 */

/*
 * bayer 5x5邻域内同意颜色通道相对于中心像素都有8个临近像素。矫正按以下步骤操作:
 * 计算中心像素与周围八个像素值的差;
 * 判断八个差值是否都为正值或者都为负值;
 * 如果有的为正有的为负,那么就为正常值,否则进行下一步;
 * 设置一个阈值,如果八个差值的绝对值都查过阈值,那么就判断为坏点;
 * 判断为坏点后就用八个临近的像素值的中位值来替换当前的像素值;
*/

module isp_dpc
#(
	parameter BITS = 8,
	parameter WIDTH = 1280,
	parameter HEIGHT = 960,
	parameter BAYER = 0      //0:RGGB 1:GRBG 2:GBRG 3:BGGR
)
(
	input pclk,
	input rst_n,

	input [BITS-1:0] threshold,    //阈值越小,检测越松,坏点检测数越多

	input in_href,
	input in_vsync,
	input [BITS-1:0] in_raw,

	output out_href,
	output out_vsync,
	output [BITS-1:0] out_raw
);

	wire [BITS-1:0] shiftout;
	wire [BITS-1:0] tap3x, tap2x, tap1x, tap0x;


	shift_register #(BITS, WIDTH, 4) linebuffer(pclk, in_href, in_raw, shiftout, {tap3x, tap2x, tap1x, tap0x});   //缓存4行,第5行开始实时处理
	
	reg [BITS-1:0] in_raw_r;                 
	reg [BITS-1:0] p11,p12,p13,p14,p15;         5x5窗口
	reg [BITS-1:0] p21,p22,p23,p24,p25;
	reg [BITS-1:0] p31,p32,p33,p34,p35;
	reg [BITS-1:0] p41,p42,p43,p44,p45;
	reg [BITS-1:0] p51,p52,p53,p54,p55;
	always @ (posedge pclk or negedge rst_n) begin
		if (!rst_n) begin
			in_raw_r <= 0;
			p11 <= 0; p12 <= 0; p13 <= 0; p14 <= 0; p15 <= 0;
			p21 <= 0; p22 <= 0; p23 <= 0; p24 <= 0; p25 <= 0;
			p31 <= 0; p32 <= 0; p33 <= 0; p34 <= 0; p35 <= 0;
			p41 <= 0; p42 <= 0; p43 <= 0; p44 <= 0; p45 <= 0;
			p51 <= 0; p52 <= 0; p53 <= 0; p54 <= 0; p55 <= 0;
		end
		else begin
			in_raw_r <= in_raw;
			p11 <= p12; p12 <= p13; p13 <= p14; p14 <= p15; p15 <= tap3x;
			p21 <= p22; p22 <= p23; p23 <= p24; p24 <= p25; p25 <= tap2x;
			p31 <= p32; p32 <= p33; p33 <= p34; p34 <= p35; p35 <= tap1x;
			p41 <= p42; p42 <= p43; p43 <= p44; p44 <= p45; p45 <= tap0x;
			p51 <= p52; p52 <= p53; p53 <= p54; p54 <= p55; p55 <= in_raw_r;
		end
	end

	reg odd_pix;
	always @ (posedge pclk or negedge rst_n) begin
		if (!rst_n)
			odd_pix <= 0;
		else if (!in_href)    
			odd_pix <= 0;
		else
			odd_pix <= ~odd_pix;      //对列进行奇偶判断
	end
	wire odd_pix_sync_shift = odd_pix;
	
	reg prev_href;   //数据有效信号寄存一拍,用于后续提取下降沿
	always @ (posedge pclk or negedge rst_n) begin 
		if (!rst_n) 
			prev_href <= 0;
		else
			prev_href <= in_href;
	end	
	
	reg odd_line;
	always @ (posedge pclk or negedge rst_n) begin //对行进行奇偶判断
		if (!rst_n) 
			odd_line <= 0;
		else if (in_vsync)
			odd_line <= 0;
		else if (prev_href & (~in_href))     //数据有效下降沿
			odd_line <= ~odd_line;
		else
			odd_line <= odd_line;
	end
	wire odd_line_sync_shift = odd_line;

    //根据输入的图像格式和奇偶行判断窗口中心像素的格式
	wire [1:0] p33_fmt = BAYER[1:0] ^ {odd_line_sync_shift, odd_pix_sync_shift};    //pixel format 0:[R]GGB 1:R[G]GB 2:RG[G]B 3:RG



	reg [BITS-1:0] t1_p1, t1_p2, t1_p3;                   //3x3窗口  
	reg [BITS-1:0] t1_p4, t1_p5, t1_p6;
	reg [BITS-1:0] t1_p7, t1_p8, t1_p9;
	always @ (posedge pclk or negedge rst_n) begin
		if (!rst_n) begin
			t1_p1 <= 0; t1_p2 <= 0; t1_p3 <= 0;
			t1_p4 <= 0; t1_p5 <= 0; t1_p6 <= 0;
			t1_p7 <= 0; t1_p8 <= 0; t1_p9 <= 0;
		end
		else begin
			case (p33_fmt)    //根据中间像素的格式分离出不同的模式
				2'd0,2'd3: begin //R/B ,在5x5窗口中,中心点红色和绿色像素可以提取3x3
					t1_p1 <= p11; t1_p2 <= p13; t1_p3 <= p15;     
					t1_p4 <= p31; t1_p5 <= p33; t1_p6 <= p35;
					t1_p7 <= p51; t1_p8 <= p53; t1_p9 <= p55;
				end
				2'd1,2'd2: begin //Gr/Gb   //同样提取9个绿色像素
					t1_p1 <= p22; t1_p2 <= p13; t1_p3 <= p24;
					t1_p4 <= p31; t1_p5 <= p33; t1_p6 <= p35;
					t1_p7 <= p42; t1_p8 <= p53; t1_p9 <= p44;
				end
				default: begin
					t1_p1 <= 0; t1_p2 <= 0; t1_p3 <= 0;
					t1_p4 <= 0; t1_p5 <= 0; t1_p6 <= 0;
					t1_p7 <= 0; t1_p8 <= 0; t1_p9 <= 0;
				end
			endcase
		end
	end

	//中值滤波 step1  
	reg [BITS-1:0] t2_min1, t2_med1, t2_max1;
	reg [BITS-1:0] t2_min2, t2_med2, t2_max2;
	reg [BITS-1:0] t2_min3, t2_med3, t2_max3;
	always @ (posedge pclk or negedge rst_n) begin
		if (!rst_n) begin
			t2_min1 <= 0; t2_med1 <= 0; t2_max1 <= 0;
			t2_min2 <= 0; t2_med2 <= 0; t2_max2 <= 0;
			t2_min3 <= 0; t2_med3 <= 0; t2_max3 <= 0;
		end
		else begin
			t2_min1 <= min(t1_p1, t1_p2, t1_p3);
			t2_med1 <= med(t1_p1, t1_p2, t1_p3);
			t2_max1 <= max(t1_p1, t1_p2, t1_p3);
			t2_min2 <= min(t1_p4, t1_p5, t1_p6);
			t2_med2 <= med(t1_p4, t1_p5, t1_p6);
			t2_max2 <= max(t1_p4, t1_p5, t1_p6);
			t2_min3 <= min(t1_p7, t1_p8, t1_p9);
			t2_med3 <= med(t1_p7, t1_p8, t1_p9);
			t2_max3 <= max(t1_p7, t1_p8, t1_p9);
		end
	end

	//中值滤波 step2  
	reg [BITS-1:0] t3_max_of_min, t3_med_of_med, t3_min_of_max;
	always @ (posedge pclk or negedge rst_n) begin
		if (!rst_n) begin
			t3_max_of_min <= 0; t3_med_of_med <= 0; t3_min_of_max <= 0;
		end
		else begin
			t3_max_of_min <= max(t2_min1, t2_min2, t2_min3);
			t3_med_of_med <= med(t2_med1, t2_med2, t2_med3);
			t3_min_of_max <= min(t2_max1, t2_max2, t2_max3);
		end
	end

	//中值滤波 step3  
	reg [BITS-1:0] t4_medium;      //该中值就是3x3窗口的中值
	always @ (posedge pclk or negedge rst_n) begin
		if (!rst_n) begin
			t4_medium <= 0;
		end
		else begin
			t4_medium <= med(t3_max_of_min, t3_med_of_med, t3_min_of_max);
		end
	end

	//将中值打拍对齐到坏点检测时序
	reg [BITS-1:0] t5_medium;
	always @ (posedge pclk or negedge rst_n) begin
		if (!rst_n) begin
			t5_medium <= 0;
		end
		else begin
			t5_medium <= t4_medium;
		end
	end

	//坏点检测 step1 (转有符号数)
	reg signed [BITS:0] t2_p1, t2_p2, t2_p3;         //扩展一位用于符号计算
	reg signed [BITS:0] t2_p4, t2_p5, t2_p6;
	reg signed [BITS:0] t2_p7, t2_p8, t2_p9;
	always @ (posedge pclk or negedge rst_n) begin
		if (!rst_n) begin
			t2_p1 <= 0; t2_p2 <= 0; t2_p3 <= 0;
			t2_p4 <= 0; t2_p5 <= 0; t2_p6 <= 0;
			t2_p7 <= 0; t2_p8 <= 0; t2_p9 <= 0;
		end
		else begin
			t2_p1 <= {1'b0,t1_p1}; t2_p2 <= {1'b0,t1_p2}; t2_p3 <= {1'b0,t1_p3};
			t2_p4 <= {1'b0,t1_p4}; t2_p5 <= {1'b0,t1_p5}; t2_p6 <= {1'b0,t1_p6};
			t2_p7 <= {1'b0,t1_p7}; t2_p8 <= {1'b0,t1_p8}; t2_p9 <= {1'b0,t1_p9};
		end
	end

	//坏点检测 step2 (计算中心像素与周围八个像素值的差)
	reg [BITS:0] t3_center;
	reg signed [BITS:0] t3_diff1, t3_diff2, t3_diff3, t3_diff4, t3_diff5, t3_diff6, t3_diff7, t3_diff8;
	always @ (posedge pclk or negedge rst_n) begin
		if (!rst_n) begin
			t3_center <= 0;
			t3_diff1 <= 0; t3_diff2 <= 0;
			t3_diff3 <= 0; t3_diff4 <= 0;
			t3_diff5 <= 0; t3_diff6 <= 0;
			t3_diff7 <= 0; t3_diff8 <= 0;
		end
		else begin
			t3_center <= t2_p5[BITS-1:0];     //求取差值,得出的是补码
			t3_diff1 <= t2_p5 - t2_p1;
			t3_diff2 <= t2_p5 - t2_p2;
			t3_diff3 <= t2_p5 - t2_p3;
			t3_diff4 <= t2_p5 - t2_p4;
			t3_diff5 <= t2_p5 - t2_p6;
			t3_diff6 <= t2_p5 - t2_p7;
			t3_diff7 <= t2_p5 - t2_p8;
			t3_diff8 <= t2_p5 - t2_p9;
		end
	end

	//坏点检测 step3 (判断差值是否都为正或都为负,计算差值绝对值)
	reg t4_defective_pix;
	reg [BITS-1:0] t4_center;
	reg [BITS-1:0] t4_diff1, t4_diff2, t4_diff3, t4_diff4, t4_diff5, t4_diff6, t4_diff7, t4_diff8;
	always @ (posedge pclk or negedge rst_n) begin
		if (!rst_n) begin
			t4_defective_pix <= 0;
			t4_center <= 0;
			t4_diff1 <= 0; t4_diff2 <= 0;
			t4_diff3 <= 0; t4_diff4 <= 0;
			t4_diff5 <= 0; t4_diff6 <= 0;
			t4_diff7 <= 0; t4_diff8 <= 0;
		end
		else begin
			t4_center <= t3_center;    //判断最高位是否都为正或者都为负,最高位0为正,1为负
			t4_defective_pix <= (8'b0000_0000 == {t3_diff1[BITS],t3_diff2[BITS],t3_diff3[BITS],t3_diff4[BITS],t3_diff5[BITS],t3_diff6[BITS],t3_diff7[BITS],t3_diff8[BITS]})
							 || (8'b1111_1111 == {t3_diff1[BITS],t3_diff2[BITS],t3_diff3[BITS],t3_diff4[BITS],t3_diff5[BITS],t3_diff6[BITS],t3_diff7[BITS],t3_diff8[BITS]});
			t4_diff1 <= t3_diff1[BITS] ? 1'sd0 - t3_diff1 : t3_diff1;    //有符号数减法相当于取绝对值
			t4_diff2 <= t3_diff2[BITS] ? 1'sd0 - t3_diff2 : t3_diff2;
			t4_diff3 <= t3_diff3[BITS] ? 1'sd0 - t3_diff3 : t3_diff3;
			t4_diff4 <= t3_diff4[BITS] ? 1'sd0 - t3_diff4 : t3_diff4;
			t4_diff5 <= t3_diff5[BITS] ? 1'sd0 - t3_diff5 : t3_diff5;
			t4_diff6 <= t3_diff6[BITS] ? 1'sd0 - t3_diff6 : t3_diff6;
			t4_diff7 <= t3_diff7[BITS] ? 1'sd0 - t3_diff7 : t3_diff7;
			t4_diff8 <= t3_diff8[BITS] ? 1'sd0 - t3_diff8 : t3_diff8;
		end
	end

	//坏点检测 step4 (判断差值绝对值是否超出阈值)
	reg t5_defective_pix;
	reg [BITS-1:0] t5_center;
	always @ (posedge pclk or negedge rst_n) begin
		if (!rst_n) begin
			t5_defective_pix <= 0;
			t5_center <= 0;
		end
		else begin
			t5_center <= t4_center;   //如果与邻域差值都为正或者负,且差值绝对值达到阈值,则认为该点为坏点
			t5_defective_pix <= t4_defective_pix && t4_diff1 > threshold && t4_diff2 > threshold && t4_diff3 > threshold && t4_diff4 > threshold && 
													t4_diff5 > threshold && t4_diff6 > threshold && t4_diff7 > threshold && t4_diff8 > threshold;
		end
	end

	//坏点检测 step5 (坏点成立输出中值滤波值, 非坏点输出原值)
	reg [BITS-1:0] t6_center;
	always @ (posedge pclk or negedge rst_n) begin
		if (!rst_n) begin
			t6_center <= 0;
		end
		else begin
			t6_center <= t5_defective_pix ? t5_medium : t5_center;   //如果是坏点则输出中值滤波值,否则输出原值
		end
	end

	localparam DLY_CLK = 10;          //对控制信号进行打拍以对齐时序
	reg [DLY_CLK-1:0] href_dly;
	reg [DLY_CLK-1:0] vsync_dly;
	always @ (posedge pclk or negedge rst_n) begin
		if (!rst_n) begin
			href_dly <= 0;
			vsync_dly <= 0;
		end
		else begin
			href_dly <= {href_dly[DLY_CLK-2:0], in_href};
			vsync_dly <= {vsync_dly[DLY_CLK-2:0], in_vsync};
		end
	end
	
    //输出对齐后的控制时序
	assign out_href = href_dly[DLY_CLK-1];
	assign out_vsync = vsync_dly[DLY_CLK-1];
	assign out_raw = out_href ? t6_center : {BITS{1'b0}};

	function [BITS-1:0] min;      //求三个值中的最小值
		input [BITS-1:0] a, b, c;
		begin
			min = (a < b) ? ((a < c) ? a : c) : ((b < c) ? b : c);
		end
	endfunction
	function [BITS-1:0] med;   //取三个值中的中值
		input [BITS-1:0] a, b, c;
		begin
			med = (a < b) ? ((b < c) ? b : (a < c ? c : a)) : ((b > c) ? b : (a > c ? c : a));
		end
	endfunction
	function [BITS-1:0] max;   //求三个值中的最大值
		input [BITS-1:0] a, b, c;
		begin
			max = (a > b) ? ((a > c) ? a : c) : ((b > c) ? b : c);
		end
	endfunction
endmodule

Verilog仿真验证

(验证框架的搭建查看往期博客)

编写testbench,实例化算法模块,代码如下:

cpp 复制代码
`timescale 1ns / 1ns

module tb_dpc;

reg xclk = 0;
always #5 xclk <= ~xclk;     //像素时钟
 
reg rst_n = 0;               //axis 时钟复位 
initial begin
    rst_n <= 0;
    #100 rst_n <= 1;
end


localparam BAYER = 3;
localparam BITS     = 10;      
localparam WIDTH    = 2592;
localparam HEIGHT   = 1944;
localparam IN_FILE  = "E:/ISP/tb_dpc_2592x1944_16.raw";
localparam OUT_FILE = "E:/ISP/tb_dpc_2592x1944_16_verilogout.raw";
 

reg [BITS-1:0] dpc_thresh=100;    ///DPC阈值参数


FILE TO DVP/
wire pclk_in, href_in, vsyn_in,hsync_in;
wire [BITS-1:0] data_in;

FILE_TO_DVP #(
    .FILE(IN_FILE),
    .BITS(BITS),
    .H_DISP(WIDTH),
    .V_DISP(HEIGHT)
)
file_to_dvp_inst
(
    .xclk(xclk),
    .rst_n(rst_n),
    .pclk(pclk_in),
    .href(href_in),
    .hsync(hsync_in),
    .vsync(vsyn_in),
    .data(data_in)
);
 
ISP算法
wire [BITS-1:0] data_o;
wire href_o,vsyn_o;


isp_dpc #(
    .BITS(BITS),
    .WIDTH(WIDTH),
    .HEIGHT(HEIGHT),
    .BAYER(BAYER)
) dpc_inst(
    .pclk(pclk_in),
    .rst_n(rst_n),
    .threshold(dpc_thresh),
    .in_href(href_in),
    .in_vsync(vsyn_in),
    .in_raw(data_in),
    .out_href(href_o),
    .out_vsync(vsyn_o),
    .out_raw(data_o)
);
//


///DVP to FILE
DVP_TO_FILE #(
    .FILE(OUT_FILE),
    .BITS(BITS)
)
dvp_to_file_inst
(
    .pclk(pclk_in),
    .rst_n(rst_n),
    .href(href_o),
    .vsync(vsyn_o),
    .data(data_o)
);
   
endmodule

输入raw图像为:

(测试图像的生成参考往期博客)

得到的raw仅进行demosaic后得到如下:

可以看到坏点基本被消除,与MATAB仿真得到的结果基本一致。

相关推荐
Tiandaren8 小时前
从Python到C++的转变之路——如何高效复现C++开源项目 || Windows || Visual Studio || 持续更新
开发语言·c++·图像处理·人工智能·python·深度学习·开源
思通数科大数据舆情13 小时前
4大应用场景揭秘:AI视频监控在养老院中的智能化管理与安全保障
大数据·图像处理·人工智能·目标检测·机器学习·计算机视觉·数据挖掘
灵封~13 小时前
python图像处理
图像处理·人工智能·计算机视觉
liuming19921 天前
Halcon中dots_image(Operator)算子原理及应用详解
图像处理·人工智能·深度学习·计算机视觉·视觉检测
豆本-豆豆奶2 天前
Python 图像处理领域的十一个基础操作
开发语言·图像处理·python
哦布莱克斯2 天前
数字图像处理技术期末复习
图像处理·人工智能·计算机视觉
一只励志翻身的咸鱼哥2 天前
基于MATLAB 的数字图像处理技术总结
图像处理·学习·计算机视觉·matlab
ballball~~2 天前
相机(Camera)成像原理详解
图像处理·相机成像原理
时空自由民.2 天前
(Image Signal Processor)ISP简介
isp