原理
都是线性滤波器。
均值滤波
该点处的像素值=滤波核范围的像素值取平均
降噪
高斯滤波
平滑
该点处的像素值=滤波核范围的像素值乘上高斯系数的和
eg
sobel算子
边缘检测,
计算方式跟高斯滤波同
fpga实现均值滤波
直接实现
流水
步骤
- 一维求和
- 二维求和
- 除法转换
- 行缓存并实现行列像素对齐
一维求和
连续打四拍,两两并行相加,注意时序
增量更新法
增量更新是指在进行更新操作时,只更新需要改变的地方,不需要更新或者已经更新过的地方则不会重复更新。
假定要计算连续5个数据流的和,在上一个时刻t0,这5个待计算的数值是a0,a1,a2,a3,a4, sum=a0+(a1+a2+a3+a4),
在本时刻t1 待计算的数值是a1,a2,a3,a4,a5。sum’=(a1+a2+a3+a4)+a5
中间有4个值是相同元素。
如此如果每次计算都将5个数重新相加,就有点浪费资源。正确的做法是加上一个新值,再减去一个最老的值。
则 sum’=sum+(a5-a1)
`timescale 1ps/1ps
module sum_1d(
clk,
din,
din_valid,
dout_valid,
dout
);
parameter DW = 14;
parameter KSZ = 3;
input clk;
input [DW-1:0] din;
input din_valid;
output dout_valid;
output [2*DW-1:0] dout;
reg [KSZ:0] din_valid_r;
reg [DW-1:0] reg_din[0:KSZ];
reg [2*DW-1:0] sum;
wire [2*DW-1:0] sub_out;
wire [2*DW-1:0] diff;
integer j;
always @(posedge clk)
begin
din_valid_r <= #1 ({din_valid_r[KSZ - 1:0], din_valid});
reg_din[0] <= #1 din;
for (j = 1; j <= KSZ; j = j + 1)
reg_din[j] <= #1 reg_din[j - 1];
end
always @(posedge clk)
begin
if (din_valid == 1'b1 & ((~(din_valid_r[0]))) == 1'b1)
sum <= #1 {2*DW-1+1{1'b0}};
else if ((din_valid_r[0]) == 1'b1)
sum <= #1 sum + diff;
end
assign sub_out = ((din_valid_r[0] == 1'b1 & din_valid_r[KSZ] == 1'b1)) ?
({{DW{1'b0}},reg_din[KSZ]}) : ({2*DW{1'b0}});
assign diff = ({{DW{1'b0}},reg_din[1]}) - sub_out;
assign dout_valid = din_valid_r[0];
assign dout = sum;
endmodule
2维求和
5行的结果,重复一行法1的做法
//--------------------------------------------------------------------------------------------
//
// Generated by X-HDL VHDL Translator - Version 2.0.0 Feb. 1, 2011
// ?? 3? 16 2016 10:59:46
//
// Input file :
// Component name : sum_2d
// Author :
// Company :
//
// Description :
//
//
//--------------------------------------------------------------------------------------------
`timescale 1ps/1ps
module sum_2d(
rst_n,
clk,
din_valid,
din,
dout,
vsync,
vsync_out,
is_boarder,
dout_valid
);
parameter DW = 8;
parameter KSZ = 5;
parameter IH = 512;
parameter IW = 640;
input rst_n;
input clk;
input din_valid;
input [DW-1:0] din;
output [2*DW-1:0] dout;
input vsync;
output vsync_out;
reg vsync_out;
output is_boarder;
output dout_valid;
parameter radius = ((KSZ >> 1));
parameter num_all = KSZ * KSZ;
parameter fifo_num = KSZ - 1;
parameter med_idx = ((num_all >> 1));
reg rst_all;
reg [DW-1:0] line_dinl[0:KSZ-2];
wire [DW-1:0] line_doutl[0:KSZ-2];
reg [DW-1:0] line_dinh[0:KSZ-2];
wire [DW-1:0] line_douth[0:KSZ-2];
wire [KSZ-2:0] line_emptyl;
wire [KSZ-2:0] line_fulll;
wire [KSZ-2:0] line_rdenl;
wire [KSZ-2:0] line_wrenl;
wire [KSZ-2:0] line_emptyh;
wire [KSZ-2:0] line_fullh;
wire [KSZ-2:0] line_rdenh;
wire [KSZ-2:0] line_wrenh;
wire [9:0] line_countl[0:KSZ-2];
wire [9:0] line_counth[0:KSZ-2];
wire [9:0] line_count[0:KSZ-2];
wire din_valid_r;
wire [2*DW-1:0] sum;
wire [2*DW-1:0] sum_col;
reg [KSZ-2:0] buf_pop_en;
reg valid_r;
reg [10:0] in_line_cnt;
reg [15:0] flush_cnt;
reg flush_line;
reg [15:0] out_pixel_cnt;
reg [10:0] out_line_cnt;
reg [2*DW-1:0] dout_temp_r;
reg dout_valid_temp_r;
wire [2*DW-1:0] dout_temp;
wire dout_valid_temp;
wire [2*DW-1:0] sum_row1;
wire [2*DW-1:0] sum_row2;
reg [2*DW-1:0] sum_1_2;
reg [2*DW-1:0] sum_col_r;
reg [2*DW-1:0] sum_all;
wire is_boarder_tmp;
reg is_boarder_r;
wire valid;
wire row_valid;
wire [31:0] j;
wire [31:0] k;
reg [10:0] line_valid_r;
wire [2*DW-1:0] sum_row3;
wire [2*DW-1:0] sum_row4;
reg [2*DW-1:0] sum_3_4;
reg [2*DW-1:0] sum_0_1_2;
reg [2*DW-1:0] sum_3_4_r;
wire [2*DW-1:0] sum_row5;
wire [2*DW-1:0] sum_row6;
reg [2*DW-1:0] sum_5_6;
reg [2*DW-1:0] sum_3_4_5_6;
wire [2*DW-1:0] sum_row7;
wire [2*DW-1:0] sum_row8;
wire [2*DW-1:0] sum_row9;
wire [2*DW-1:0] sum_row10;
wire [2*DW-1:0] sum_row11;
wire [2*DW-1:0] sum_row12;
wire [2*DW-1:0] sum_row13;
wire [2*DW-1:0] sum_row14;
reg [2*DW-1:0] sum_7_8;
reg [2*DW-1:0] sum_9_10;
reg [2*DW-1:0] sum_11_12;
reg [2*DW-1:0] sum_13_14;
reg [2*DW-1:0] sum_7_8_9_10;
reg [2*DW-1:0] sum_11_12_13_14;
reg [2*DW-1:0] sum_0_to_6;
reg [2*DW-1:0] sum_7_to_14;
reg row_valid_r;
wire vsync_out_tmp;
reg [DW-1:0] xhdl4[0:KSZ-2];
reg [DW-1:0] xhdl5[0:KSZ-2];
wire[DW*2-1:0] xhdl13;
assign valid = din_valid | flush_line;
always @(posedge clk or negedge rst_n)
begin
if (rst_n == 1'b0)
rst_all <= #1 1'b1;
else
begin
if (vsync == 1'b1)
rst_all <= #1 1'b1;
else
rst_all <= #1 1'b0;
end
end
always @(posedge clk)
begin
if (rst_all == 1'b1)
row_valid_r <= #1 1'b1;
else
row_valid_r <= #1 row_valid;
end
sum_1d #(DW, KSZ)
row_sum(
.clk(clk),
.din(din),
.din_valid(valid),
.dout(sum_col),
.dout_valid(row_valid)
);
generate
begin : line_buffer_inst
genvar i;
for (i = 0; i <= KSZ - 2; i = i + 1)
begin : line_buf
if (i == 0)
begin : row_1st
always @(*) line_dinl[i] <= sum_col[DW - 1:0];
always @(*) line_dinh[i] <= sum_col[2 * DW - 1:DW];
assign line_wrenl[i] = row_valid;
assign line_wrenh[i] = row_valid;
end
if ((~(i == 0)))
begin : row_others
always @(*) line_dinl[i] <= line_doutl[i - 1];
always @(*) line_dinh[i] <= line_douth[i - 1];
assign line_wrenh[i] = line_rdenh[i - 1];
assign line_wrenl[i] = line_rdenl[i - 1];
end
assign line_rdenl[i] = buf_pop_en[i] & row_valid;
assign line_rdenh[i] = buf_pop_en[i] & row_valid;
always @(posedge clk)
begin
if (rst_all == 1'b1)
buf_pop_en[i] <= #1 1'b0;
else if (line_countl[i] == IW)
buf_pop_en[i] <= #1 1'b1;
end
always @(*) xhdl4[i] <= line_dinl[i];
line_buffer #(DW, IW)
line_buf_l(
.rst(rst_all),
.clk(clk),
.din(xhdl4[i]),
.dout(line_doutl[i]),
.wr_en(line_wrenl[i]),
.rd_en(line_rdenl[i]),
.empty(line_emptyl[i]),
.full(line_fulll[i]),
.count(line_countl[i])
);
always @(*) xhdl5[i] <= line_dinh[i];
line_buffer #(DW, IW)
line_buf_h(
.rst(rst_all),
.clk(clk),
.din(xhdl5[i]),
.dout(line_douth[i]),
.wr_en(line_wrenh[i]),
.rd_en(line_rdenh[i]),
.empty(line_emptyh[i]),
.full(line_fullh[i]),
.count(line_counth[i])
);
end
end
endgenerate
endgenerate
generate
if (KSZ == 3)
begin : sum_ksz_3
assign sum_row1 = ({line_douth[0][DW - 1:0], line_doutl[0][DW - 1:0]});
assign sum_row2 = (((buf_pop_en[1]) == 1'b1)) ?
({line_douth[1][DW - 1:0], line_doutl[1][DW - 1:0]}) :
{2*DW-1+1{1'b0}};
assign dout_valid_temp = line_valid_r[1 + 1];
always @(posedge clk)
begin
line_valid_r[2:0] <= ({line_valid_r[1:0], line_rdenl[0]});
if ((line_rdenl[0]) == 1'b1)
sum_col_r <= #1 sum_col;
if ((line_rdenl[0]) == 1'b1)
sum_1_2 <= #1 sum_row2 + sum_row1;
if ((line_valid_r[0]) == 1'b1)
sum_all <= #1 sum_1_2 + sum_col_r;
end
end
endgenerate
endmodule
除法
换成移位操作
module Mean_2D(rst_n, clk, din_valid, din, din_delay, din_valid_delay, dout, vsync, vsync_out, is_boarder, dout_valid);
parameter DW = 14;
parameter KSZ = 15;
parameter IH = 512;
parameter IW = 640;
input rst_n;
input clk;
input din_valid;
input [DW-1:0] din;
output [DW-1:0] din_delay;
output din_valid_delay;
output [2*DW-1:0] dout;
input vsync;
output vsync_out;
output is_boarder;
output dout_valid;
parameter radius = ((KSZ >> 1));
parameter num_all = KSZ * KSZ;
parameter fifo_num = KSZ - 1;
parameter med_idx = ((num_all >> 1));
parameter DW_ADD = DW * 2;
reg rst_all;
reg [DW-1:0] line_din[0:KSZ-2];
wire [DW-1:0] line_dout[0:KSZ-2];
wire [KSZ-2:0] line_empty;
wire [KSZ-2:0] line_full;
wire [KSZ-2:0] line_rden;
wire [KSZ-2:0] line_wren;
wire [9:0] line_count[0:KSZ-2];
wire din_valid_r;
reg [KSZ-2:0] buf_pop_en;
reg valid_r;
reg [10:0] in_line_cnt;
reg [15:0] flush_cnt;
reg flush_line;
reg [15:0] out_pixel_cnt;
reg [10:0] out_line_cnt;
reg [2*DW-1:0] dout_temp_r;
reg [10:0] dout_valid_temp_r;
wire [2*DW-1:0] dout_temp;
wire dout_valid_temp;
wire [2*DW-1:0] sum_row7;
wire [2*DW-1:0] sum_row8;
wire [2*DW-1:0] sum_row9;
wire [2*DW-1:0] sum_row10;
wire [2*DW-1:0] sum_row11;
wire [2*DW-1:0] sum_row12;
wire [2*DW-1:0] sum_row13;
wire [2*DW-1:0] sum_row14;
wire [2*DW-1:0] sum_1_2;
wire [2*DW-1:0] sum_col_r;
wire [2*DW-1:0] sum_col[0:KSZ-1];
wire [2*DW-1:0] sum_all;
wire is_boarder_tmp;
reg is_boarder_r;
wire [KSZ-1:0] sum_valid;
reg [radius-1+10:0] din_valid_tmp;
reg [(radius+10)*DW-1:0] din_tmp;
reg [2*DW-1:0] sum_tmp1;
reg [2*DW-1:0] sum_tmp2;
reg [2*DW-1:0] sum_tmp6;
reg [2*DW+1:0] sum_tmp3;
reg [2*DW+1:0] sum_tmp4;
reg [2*DW+1:0] sum_tmp5;
wire valid;
wire [31:0] j;
wire sum_all_valid;
wire [DW_ADD*2-1:0] add_all;
wire [DW*2*KSZ-1:0] data_tmp1;
wire [DW*2-1:0] data_tmp2;
wire dout_valid_tmp;
assign dout_valid = dout_valid_tmp;
assign valid = din_valid | flush_line;
always @(posedge clk or posedge rst_n)
if (((~(rst_n))) == 1'b1)
rst_all <= #1 1'b1;
else
begin
if (vsync == 1'b1)
rst_all <= #1 1'b1;
else
rst_all <= #1 1'b0;
end
generate
begin : xhdl0
genvar i;
for (i = 0; i <= KSZ - 2; i = i + 1)
begin : buf_inst
if (i == 0)
begin : xhdl4
always @(*) line_din[i] <= din;
assign line_wren[i] = valid;
end
if ((~(i == 0)))
begin : xhdl5
always @(*) line_din[i] <= line_dout[i - 1];
assign line_wren[i] = line_rden[i - 1];
end
line_buffer line_buf_inst(.rst(rst_all), .clk(clk), .din(line_din[i]), .dout(line_dout[i]), .wr_en(line_wren[i]), .rd_en(line_rden[i]), .empty(line_empty[i]), .full(line_full[i]), .count(line_count[i]));
assign line_rden[i] = buf_pop_en[i] & valid;
always @(posedge clk)
begin
if (rst_all == 1'b1)
buf_pop_en[i] <= #1 1'b0;
else if (line_count[i] == IW)
buf_pop_en[i] <= #1 1'b1;
end
Sum_1D #(DW, KSZ) row_sum(.clk(clk), .din(line_dout[i]), .din_valid(line_rden[i]), .dout(sum_col[i + 1]), .dout_valid(sum_valid[i + 1]));
end
end
endgenerate
Sum_1D #(DW, KSZ) row_sum_din(.clk(clk), .din(din), .din_valid(din_valid), .dout(sum_col[0]), .dout_valid(sum_valid[0]));
assign sum_row7 = (((sum_valid[6]) == 1'b1)) ? sum_col[7] :
{2*DW-1+1{1'b0}};
assign sum_row8 = (((sum_valid[7]) == 1'b1)) ? sum_col[8] :
{2*DW-1+1{1'b0}};
assign sum_row9 = (((sum_valid[8]) == 1'b1)) ? sum_col[9] :
{2*DW-1+1{1'b0}};
assign sum_row10 = (((sum_valid[9]) == 1'b1)) ? sum_col[10] :
{2*DW-1+1{1'b0}};
assign sum_row11 = (((sum_valid[10]) == 1'b1)) ? sum_col[11] :
{2*DW-1+1{1'b0}};
assign sum_row12 = (((sum_valid[11]) == 1'b1)) ? sum_col[12] :
{2*DW-1+1{1'b0}};
assign sum_row13 = (((sum_valid[12]) == 1'b1)) ? sum_col[13] :
{2*DW-1+1{1'b0}};
assign sum_row14 = (((sum_valid[13]) == 1'b1)) ? sum_col[14] :
{2*DW-1+1{1'b0}};
assign data_tmp1 = ({sum_col[0], sum_col[1], sum_col[2], sum_col[3], sum_col[4], sum_col[5], sum_col[6], sum_row7, sum_row8, sum_row9, sum_row10, sum_row11, sum_row12, sum_row13, sum_row14});
AddTree #(DW_ADD, KSZ) sum_total(.rst_n(rst_n), .clk(clk), .din_valid(sum_valid[7]), .din(data_tmp1), .dout(add_all), .dout_valid(sum_all_valid));
assign sum_all = add_all[DW_ADD - 1:0];
always @(posedge clk)
begin
if (sum_all_valid == 1'b1)
begin
sum_tmp1 <= ({5'b00000, sum_all[2 * DW - 1:5]} + ({3'b000, sum_all[2 * DW - 1:8]}));
sum_tmp2 <= ({1'b0, sum_all[2 * DW - 1:1]} + ({5'b00000, sum_all[2 * DW - 1:6]}));
sum_tmp3 <= ({sum_all[2 * DW - 1:0], 2'b00}) + ({13'b0000000000000, sum_all[2 * DW - 1:11]});
end
if ((dout_valid_temp_r[0]) == 1'b1)
begin
sum_tmp4 <= #1 sum_tmp3 + ({2'b00, sum_tmp1});
sum_tmp6 <= #1 sum_tmp2;
end
if ((dout_valid_temp_r[1]) == 1'b1)
sum_tmp5 <= sum_tmp4 + ({2'b00, sum_tmp6});
end
assign dout_temp = (((sum_tmp5[6]) == 1'b1)) ? ({5'b00000, sum_tmp5[2 * DW + 1:7]} + 1'b1) :
({5'b00000, sum_tmp5[2 * DW + 1:7]});
assign dout_valid_temp = dout_valid_temp_r[radius + 2];
assign data_tmp2 = (((is_boarder_tmp | (~(dout_valid_temp_r[radius + 2]))) == 1'b1)) ? {DW+1{1'b0}} :
dout_temp;
always @(posedge clk)
begin
if (rst_all == 1'b1)
begin
dout_temp_r <= #1 {2*DW-1+1{1'b0}};
dout_valid_temp_r <= #1 11'b00000000000;
valid_r <= #1 1'b0;
is_boarder_r <= 1'b0;
end
else
begin
dout_temp_r <= #1 data_tmp2;
dout_valid_temp_r <= #1 ({dout_valid_temp_r[9:0], sum_all_valid});
valid_r <= #1 valid;
is_boarder_r <= is_boarder_tmp;
end
end
assign dout = dout_temp_r;
assign dout_valid_tmp = dout_valid_temp_r[radius + 3];
assign is_boarder = is_boarder_r;
always @(posedge clk)
begin
if (rst_all == 1'b1)
begin
din_valid_tmp <= {radius+10{1'b0}};
din_tmp <= {radius+10*DW-1+1{1'b0}};
end
else
begin
din_valid_tmp <= #1 ({din_valid_tmp[radius - 2 + 10:0], line_rden[radius - 1]});
din_tmp <= #1 ({din_tmp[(radius + 10) * DW - 1 - DW:0], line_dout[radius - 1]});
end
end
assign din_valid_delay = din_valid_tmp[radius - 1 + 10];
assign din_delay = din_tmp[(radius + 10) * DW - 1:(radius + 9) * DW];
assign vsync_out = line_wren[radius - 1] & (~(line_wren[radius]));
endmodule