设计一个最简单的RGB ISP需要多少个模块呢?答案可能是4个,分别是CFA、CCM 、GAMMA和RGB2YUV,有了这4个模块,我们就可以将Sensor输出的RAW data转化为在显示器上可以观看的画面了。
CFA(Color Filter Array):
也叫做去马赛克(Demosaic),目的是将Bayer pattern(如上图示)转换为RGB像素。CFA的算法有很多,这里介绍一种简单直接的算法——双线性插值。所谓双线性,就是在两个方向上(X, Y)分别做一次线性插值。
根据Bayer pattern的特点,我们最小需要一个3x3的滑窗来实现双线性插值。如何将像素装进这个滑窗呢?这里用到一种图像处理的经典方法——首尾相接的双口RAM,如下图示。
有了滑窗,接下来就是怎样计算的问题了。我们将Bayer pattern中R/G/B三种颜色的排列情况提取出来,如下图:
可以看出,一共存在5种计算方式:
在RTL设计中,滑窗每次移动都会把这5种计算执行一次,然后再根据Bayer pattern phase和当前所处的像素位置,选出合适的结果作为R/G/B输出值。这样说还是太抽象,我们看下面具体的例子。
(A)计算第一行第一个像素时,左边和上边是空白,当做0处理;这时候R是C4的结果,实际上等于RAW(2,2);G是C2的结果,实际上等于[RAW(1,2) + RAW(2,1)]/2;B是C1的结果,就等于RAW(0,0);
(B)第一行最后一个像素的计算跟第一个像素类似,但是有一点特殊操作,我们会把倒数第二列从RAM中多读出一遍,这是因为第一个像素是从读出第二列开始计算的,这里多读一次就能保证时序整体上跟图像行宽是一致的;此外,这样做还有一个好处是,除了第一个像素的C1~C5需要走单独的代码分支,剩余所有像素的C1~C5都可以走共同的代码分支;
(C)R是C5的结果;G是C1的结果;B是C3的结果;
(D)R是C4的结果;G是C2的结果;B是C1的结果;
(E)R是C1的结果;G是C2的结果;B是C4的结果;
(F)为了给下边留白,最后一行的滑窗跟别的行有点不一样,会往上顶一行,这样操作也是为了时序在整体上的一致性;
(G)跟第一行最后一个像素类似,倒数第二列会多读出一遍。
/
Simple RGB ISP - Verilog
Author:
benyuee@foxmail.com
/
Copyright (C) 2011 -- 20xx
benyuee@foxmail.com
This source file may be used and distributed without
restriction provided that this copyright statement is not
removed from the file and that any derivative work contains
the original copyright notice and the associated disclaimer.
THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
/
//-------------------------------------------------------------------
// Description
// CFA (Color Filter Array) is also known as Demosaic.
// It reconstructs sub-sampled color data for images captured by a
// bayer color filter array image sensor.
// Bilinear interpolation is implemented in this design.
//
//-------------------------------------------------------------------
// Change History
//
//
//
//
//
//-------------------------------------------------------------------
module cfa (
// clock and reset
input clk,
input rstn,
// Input bayer data
input [ 7: 0] in_data,
input in_valid,
output in_ready,
input in_eol,
input in_sof,
// Output RGB data
output [23: 0] out_data,
output out_valid,
input out_ready,
output out_eol,
output out_sof,
// Control registers
input [15: 0] creg_image_width,
input [15: 0] creg_image_height,
input [ 1: 0] creg_bayer_phase
);
wire good_frame;
reg sof_ok;
reg [15: 0] in_x_cntr;
reg [ 7: 0] in_data_pipe1;
reg ram_wren;
wire [10: 0] ram_wr_addr;
wire [31: 0] ram_din;
wire ram_rden;
wire [10: 0] ram_rd_addr;
wire [31: 0] ram_dout;
wire fifo_wr_push;
wire [25: 0] fifo_wr_data;
wire fifo_almost_full;
wire fifo_rd_pop;
wire [25: 0] fifo_rd_data;
wire fifo_rd_empty;
reg rden;
reg [15: 0] rd_cntr, rd_cntr_pipe1, rd_cntr_pipe2, rd_cntr_pipe3;
reg [15: 0] rd_line_cntr, rd_line_cntr_pipe1, rd_line_cntr_pipe2, rd_line_cntr_pipe3;
reg [10: 0] rd_addr;
reg ram_dout_valid;
wire out_buffer_ready;
reg [ 7: 0] window_3x3_buffer [2:0][2:0];
reg window_3x3_buffer_vld;
reg [ 9: 0] cal_1_temp;
reg [ 9: 0] cal_2_temp;
reg [ 9: 0] cal_3_temp;
reg [ 9: 0] cal_4_temp;
reg [ 9: 0] cal_5_temp;
reg window_3x3_buffer_vld_pipe1;
wire cal_temp_valid;
reg cal_temp_valid_pipe1;
reg [ 7: 0] red_out, green_out, blue_out;
reg sof_out, eol_out;
reg process_ready;
reg out_good_frame;
//-------------------------------------------------------------------------//
// Control Path
//-------------------------------------------------------------------------//
// make sure it starts from the SOF of one frame
assign good_frame = sof_ok | (in_valid & in_ready & in_sof);
always @(posedge clk or negedge rstn)
begin
if(!rstn)
sof_ok <= 1'b0;
else if(in_valid & in_ready & in_sof)
sof_ok <= 1'b1;
end
// x is pixel counter
always @(posedge clk or negedge rstn)
begin
if(!rstn)
in_x_cntr <= 0;
else if(good_frame & in_valid & in_ready)
if(in_sof)
in_x_cntr <= 1;
else if(in_eol)
in_x_cntr <= 0;
else
in_x_cntr <= in_x_cntr + 1'b1;
end
// backpressure from out_buffer
assign in_ready = out_buffer_ready & process_ready;
// data in handshake
always @(posedge clk or negedge rstn)
begin
if(!rstn)
begin
process_ready <= 1'b1;
end
else if(good_frame)
begin
// Read line needs one more cycle than write line (at the last
// pixel), so in_ready should be held low for one clock cycle.
if(process_ready && in_valid && in_ready && (in_x_cntr == creg_image_width-1)) // one new line has been buffered
process_ready <= 1'b0;
else if(~process_ready && ram_rden && (rd_cntr == creg_image_width)) // the last pixel in rd_line has been read out
process_ready <= 1'b1;
end
end
// RAM rd counter, read one more time than creg_image_width, the last read
// is ram_dout[creg_image_width-2]
always @(posedge clk or negedge rstn)
begin
if(!rstn)
rd_cntr <= 0;
else if(ram_rden)
rd_cntr <= (rd_cntr == creg_image_width) ? 0 : (rd_cntr + 1'b1);
end
// rd line counter
always @(posedge clk or negedge rstn)
begin
if(!rstn)
rd_line_cntr <= 0;
else if(ram_rden && (rd_cntr == creg_image_width))
rd_line_cntr <= (rd_line_cntr == creg_image_height-1) ? 0 : (rd_line_cntr + 1'b1);
end
// rd counter mapping to RAM rd_addr
always @(*)
begin
if(rd_cntr == creg_image_width) // the last read out
rd_addr = creg_image_width - 2;
else
rd_addr = rd_cntr;
end
// Pipe rden & out_buffer_ready to generate ram_dout_valid
// Pipe rd_cntr to sync up with ram_dout_valid
// Pipe rd_line_cntr to sync up with ram_dout_valid
always @(posedge clk or negedge rstn)
begin
if(!rstn)
begin
ram_dout_valid <= 1'b0;
rd_cntr_pipe1 <= 0;
rd_line_cntr_pipe1 <= 0;
end
else
begin
ram_dout_valid <= ram_rden;
rd_cntr_pipe1 <= rd_cntr;
rd_line_cntr_pipe1 <= rd_line_cntr;
end
end
// Pipe rd_cntr_pipe1 to sync up with window_3x3_buffer
// Pipe rd_line_cntr_pipe1 to sync up with window_3x3_buffer
// Pipe ram_dout_valid to generate window_3x3_buffer_vld
always @(posedge clk or negedge rstn)
begin
if(!rstn)
begin
rd_cntr_pipe2 <= 0;
rd_line_cntr_pipe2 <= 0;
window_3x3_buffer_vld <= 1'b0;
end
else
begin
rd_cntr_pipe2 <= rd_cntr_pipe1;
rd_line_cntr_pipe2 <= rd_line_cntr_pipe1;
window_3x3_buffer_vld <= ram_dout_valid;
end
end
// Pipe rd_cntr_pipe2 to sync up with cal_temp_valid
// Pipe rd_line_cntr_pipe2 to sync up with cal_temp_valid
// Pipe window_3x3_buffer_vld to sync up with cal_temp_valid
always @(posedge clk or negedge rstn)
begin
if(!rstn)
begin
rd_cntr_pipe3 <= 0;
rd_line_cntr_pipe3 <= 0;
window_3x3_buffer_vld_pipe1 <= 1'b0;
end
else
begin
rd_cntr_pipe3 <= rd_cntr_pipe2;
rd_line_cntr_pipe3 <= rd_line_cntr_pipe2;
window_3x3_buffer_vld_pipe1 <= window_3x3_buffer_vld;
end
end
assign cal_temp_valid = window_3x3_buffer_vld_pipe1 & ~(rd_cntr_pipe3 == 0); // mask off the first invalid cal_temp pixel
// Generate SOF out
always @(posedge clk or negedge rstn)
begin
if(!rstn)
sof_out <= 1'b0;
else if(cal_temp_valid)
if((rd_line_cntr_pipe3 == 2) && (rd_cntr_pipe3 == 1)) // First pixel of the first line
sof_out <= 1'b1;
else
sof_out <= 1'b0;
end
// Generate EOL out
always @(posedge clk or negedge rstn)
begin
if(!rstn)
eol_out <= 1'b0;
else if(cal_temp_valid && (rd_cntr_pipe3 == creg_image_width)) // Last pixel in one line
eol_out <= 1'b1;
else
eol_out <= 1'b0;
end
// Generate out_good_frame
always @(posedge clk or negedge rstn)
begin
if(!rstn)
out_good_frame <= 1'b0;
else if(cal_temp_valid && (rd_line_cntr_pipe3 == 2) && (rd_cntr_pipe3 == 1))
out_good_frame <= 1'b1;
end
// Pipe cal_temp_valid to sync up with red/green/blue_out
always @(posedge clk or negedge rstn)
begin
if(!rstn)
cal_temp_valid_pipe1 <= 1'b0;
else
cal_temp_valid_pipe1 <= cal_temp_valid;
end
//-------------------------------------------------------------------------//
// Data Path
//-------------------------------------------------------------------------//
// pipe in_data
always @(posedge clk)
if(good_frame & in_valid & in_ready)
in_data_pipe1 <= in_data[7:0];
// Fill 3x3 buffer with ram_dout data
always @(posedge clk)
begin
if(ram_dout_valid)
begin
if(rd_cntr_pipe1 == creg_image_width) // The last pixel
begin
window_3x3_buffer[0][2] <= ram_dout[31:24]; //Upper line
window_3x3_buffer[0][1] <= window_3x3_buffer[0][2];
window_3x3_buffer[0][0] <= window_3x3_buffer[0][1];
window_3x3_buffer[1][2] <= ram_dout[23:16]; //Middle line
window_3x3_buffer[1][1] <= window_3x3_buffer[1][2];
window_3x3_buffer[1][0] <= window_3x3_buffer[1][1];
window_3x3_buffer[2][2] <= ram_dout[15:8]; //Lower line
window_3x3_buffer[2][1] <= window_3x3_buffer[2][2];
window_3x3_buffer[2][0] <= window_3x3_buffer[2][1];
end
else // normal pixels
begin
window_3x3_buffer[0][2] <= ram_dout[23:16]; //Upper line
window_3x3_buffer[0][1] <= window_3x3_buffer[0][2];
window_3x3_buffer[0][0] <= window_3x3_buffer[0][1];
window_3x3_buffer[1][2] <= ram_dout[15:8]; //Middle line
window_3x3_buffer[1][1] <= window_3x3_buffer[1][2];
window_3x3_buffer[1][0] <= window_3x3_buffer[1][1];
window_3x3_buffer[2][2] <= ram_dout[7:0]; //Lower line
window_3x3_buffer[2][1] <= window_3x3_buffer[2][2];
window_3x3_buffer[2][0] <= window_3x3_buffer[2][1];
end
end
end
// Bilinear Interpolation Calculation
always @(posedge clk)
begin
if(window_3x3_buffer_vld)
begin
if(rd_line_cntr_pipe2 == 2) // Start calculating the first line
begin
if(rd_cntr_pipe2 == 1) // The first pixel
begin
cal_1_temp <= window_3x3_buffer[1][1];
cal_2_temp <= (window_3x3_buffer[1][2] + window_3x3_buffer[2][1]) >> 1;
cal_3_temp <= window_3x3_buffer[1][2];
cal_4_temp <= window_3x3_buffer[2][2];
cal_5_temp <= window_3x3_buffer[2][1];
end
else // normal pixels
begin
cal_1_temp <= window_3x3_buffer[1][1];
cal_2_temp <= (window_3x3_buffer[1][2] + window_3x3_buffer[1][0] + (window_3x3_buffer[2][1] << 1)) >> 2;
cal_3_temp <= (window_3x3_buffer[1][2] + window_3x3_buffer[1][0]) >> 1;
cal_4_temp <= (window_3x3_buffer[2][2] + window_3x3_buffer[2][0]) >> 1;
cal_5_temp <= window_3x3_buffer[2][1];
end
end
else if(rd_line_cntr_pipe2 == 1) // Start calculating the last line in previous frame
begin
if(rd_cntr_pipe2 == 1) // The first pixel
begin
cal_1_temp <= window_3x3_buffer[1][1];
cal_2_temp <= (window_3x3_buffer[1][2] + window_3x3_buffer[0][1]) >> 1;
cal_3_temp <= window_3x3_buffer[1][2];
cal_4_temp <= window_3x3_buffer[0][2];
cal_5_temp <= window_3x3_buffer[0][1];
end
else // normal pixels
begin
cal_1_temp <= window_3x3_buffer[1][1];
cal_2_temp <= (window_3x3_buffer[1][2] + window_3x3_buffer[1][0] + (window_3x3_buffer[0][1] << 1)) >> 2;
cal_3_temp <= (window_3x3_buffer[1][2] + window_3x3_buffer[1][0]) >> 1;
cal_4_temp <= (window_3x3_buffer[0][2] + window_3x3_buffer[0][0]) >> 1;
cal_5_temp <= window_3x3_buffer[0][1];
end
end
else // normal lines
begin
if(rd_cntr_pipe2 == 1) // The first pixel
begin
cal_1_temp <= window_3x3_buffer[1][1];
cal_2_temp <= (window_3x3_buffer[0][1] + window_3x3_buffer[2][1] + (window_3x3_buffer[1][2] << 1)) >> 2;
cal_3_temp <= window_3x3_buffer[1][2];
cal_4_temp <= (window_3x3_buffer[0][2] + window_3x3_buffer[2][2]) >> 1;
cal_5_temp <= (window_3x3_buffer[0][1] + window_3x3_buffer[2][1]) >> 1;
end
else // normal pixels
begin
cal_1_temp <= window_3x3_buffer[1][1];
cal_2_temp <= (window_3x3_buffer[0][1] + window_3x3_buffer[2][1] + window_3x3_buffer[1][2] + window_3x3_buffer[1][0]) >> 2;
cal_3_temp <= (window_3x3_buffer[1][2] + window_3x3_buffer[1][0]) >> 1;
cal_4_temp <= (window_3x3_buffer[0][2] + window_3x3_buffer[2][2] + window_3x3_buffer[0][0] + window_3x3_buffer[2][0]) >> 2;
cal_5_temp <= (window_3x3_buffer[0][1] + window_3x3_buffer[2][1]) >> 1;
end
end
end
end
// Route the right calculation result to output
always @(posedge clk)
begin
if(cal_temp_valid)
begin
if(rd_line_cntr_pipe3[0] == 1'b0) // Odd lines
begin
case(creg_bayer_phase[1:0])
2'b00: // RG
begin
red_out <= (rd_cntr_pipe3[0] == 1'b1) ? cal_1_temp : cal_3_temp;
green_out <= (rd_cntr_pipe3[0] == 1'b1) ? cal_2_temp : cal_1_temp;
blue_out <= (rd_cntr_pipe3[0] == 1'b1) ? cal_4_temp : cal_5_temp;
end
2'b01: // GR
begin
red_out <= (rd_cntr_pipe3[0] == 1'b1) ? cal_3_temp : cal_1_temp;
green_out <= (rd_cntr_pipe3[0] == 1'b1) ? cal_1_temp : cal_2_temp;
blue_out <= (rd_cntr_pipe3[0] == 1'b1) ? cal_5_temp : cal_4_temp;
end
2'b10: // GB
begin
red_out <= (rd_cntr_pipe3[0] == 1'b1) ? cal_5_temp : cal_4_temp;
green_out <= (rd_cntr_pipe3[0] == 1'b1) ? cal_1_temp : cal_2_temp;
blue_out <= (rd_cntr_pipe3[0] == 1'b1) ? cal_3_temp : cal_1_temp;
end
2'b11: // BG
begin
red_out <= (rd_cntr_pipe3[0] == 1'b1) ? cal_4_temp : cal_5_temp;
green_out <= (rd_cntr_pipe3[0] == 1'b1) ? cal_2_temp : cal_1_temp;
blue_out <= (rd_cntr_pipe3[0] == 1'b1) ? cal_1_temp : cal_3_temp;
end
endcase
end
else // Even lines
begin
case(creg_bayer_phase[1:0])
2'b00: // RG
begin
red_out <= (rd_cntr_pipe3[0] == 1'b1) ? cal_5_temp : cal_4_temp;
green_out <= (rd_cntr_pipe3[0] == 1'b1) ? cal_1_temp : cal_2_temp;
blue_out <= (rd_cntr_pipe3[0] == 1'b1) ? cal_3_temp : cal_1_temp;
end
2'b01: // GR
begin
red_out <= (rd_cntr_pipe3[0] == 1'b1) ? cal_4_temp : cal_5_temp;
green_out <= (rd_cntr_pipe3[0] == 1'b1) ? cal_2_temp : cal_1_temp;
blue_out <= (rd_cntr_pipe3[0] == 1'b1) ? cal_1_temp : cal_3_temp;
end
2'b10: // GB
begin
red_out <= (rd_cntr_pipe3[0] == 1'b1) ? cal_1_temp : cal_3_temp;
green_out <= (rd_cntr_pipe3[0] == 1'b1) ? cal_2_temp : cal_1_temp;
blue_out <= (rd_cntr_pipe3[0] == 1'b1) ? cal_4_temp : cal_5_temp;
end
2'b11: // BG
begin
red_out <= (rd_cntr_pipe3[0] == 1'b1) ? cal_3_temp : cal_1_temp;
green_out <= (rd_cntr_pipe3[0] == 1'b1) ? cal_1_temp : cal_2_temp;
blue_out <= (rd_cntr_pipe3[0] == 1'b1) ? cal_5_temp : cal_4_temp;
end
endcase
end
end
end
//****************************************************************************************
// read data has 1 cycle latency relative to rden
//****************************************************************************************
assign ram_din = {ram_dout[23:0], in_data_pipe1[7:0]};
assign ram_wr_addr = in_x_cntr;
assign ram_rden = process_ready ? (good_frame & in_valid & in_ready) : 1'b1;
assign ram_rd_addr = rd_addr;
// Pipe ram wren to be aligned with ram din
always @(posedge clk or negedge rstn)
begin
if(!rstn)
ram_wren <= 1'b0;
else
ram_wren <= good_frame & in_valid & in_ready;
end
cfa_ram cfa_ram_inst // 32bit x 2048depth
(
.clka (clk),
.ena (1'b1),
.wea (ram_wren),
.addra (ram_wr_addr[10:0]),
.dina (ram_din[31:0]),
.clkb (clk),
.enb (ram_rden),
.addrb (ram_rd_addr),
.doutb (ram_dout[31:0])
);
assign fifo_wr_push = out_good_frame & cal_temp_valid_pipe1;
assign fifo_wr_data = {sof_out, eol_out, red_out[7:0], green_out[7:0], blue_out[7:0]};
assign out_buffer_ready = ~fifo_almost_full;
assign fifo_rd_pop = out_valid & out_ready;
assign out_valid = ~fifo_rd_empty;
assign out_data[23:0] = fifo_rd_data[23:0];
assign out_eol = fifo_rd_data[24];
assign out_sof = fifo_rd_data[25];
cfa_rvh_fifo cfa_rvh_fifo_inst // 26bit x 16depth
(
.clk ( clk ) ,
.srst ( ~rstn ) ,
.din ( fifo_wr_data[25:0] ) ,
.wr_en ( fifo_wr_push ) ,
.rd_en ( fifo_rd_pop ) ,
.prog_full_thresh ( 4'd10 ) ,
.dout ( fifo_rd_data[25:0] ) ,
.full ( ) ,
.almost_full ( ) ,
.empty ( fifo_rd_empty ) ,
.prog_full ( fifo_almost_full ) ,
.wr_rst_busy ( ) ,
.rd_rst_busy ( )
);
endmodule
CCM(Color Correction Matrix):
经CFA后,像素从Bayer域转换为RGB域,但是各颜色之间存在互相渗透,会导致颜色误差,而CCM就是用来校正这种误差的。
CCM的实现就是一组矩阵运算(包括9个系数和3个偏置):
那么怎么得到这些参数呢?一般是用该传感器拍摄一张特殊图像,然后与标准图像相比较,从而计算得出CCM参数。
/
Simple RGB ISP - Verilog
Author:
benyuee@foxmail.com
/
Copyright (C) 2011 -- 20xx
benyuee@foxmail.com
This source file may be used and distributed without
restriction provided that this copyright statement is not
removed from the file and that any derivative work contains
the original copyright notice and the associated disclaimer.
THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
/
//-------------------------------------------------------------------
// Description
// CCM (Color Correction Matrix)
// _ _ _ _ _ _ _ _
// | | | || | | |
// | Ro | | K11 K12 K13 || Ri | | Or |
// | Go | = | K21 K22 K23 || Gi | + | Og |
// | Bo | | K31 K32 K33 || Bi | | Ob |
// |_ _| |_ _||_ _| |_ _|
//
// K = [-8, 8) << 14
//
//-------------------------------------------------------------------
// Change History
//
//
//
//
//
//-------------------------------------------------------------------
module ccm
(
// Clock and reset
input clk,
input rstn,
// RGB input
input in_valid,
output in_ready,
input [23: 0] in_data,
input in_eol,
input in_sof,
// RGB output
output out_valid,
input out_ready,
output [23: 0] out_data,
output out_eol,
output out_sof,
// Registers
input signed [17:0] creg_coeff_k11,
input signed [17:0] creg_coeff_k12,
input signed [17:0] creg_coeff_k13,
input signed [17:0] creg_coeff_k21,
input signed [17:0] creg_coeff_k22,
input signed [17:0] creg_coeff_k23,
input signed [17:0] creg_coeff_k31,
input signed [17:0] creg_coeff_k32,
input signed [17:0] creg_coeff_k33,
input signed [ 8:0] creg_roffset,
input signed [ 8:0] creg_goffset,
input signed [ 8:0] creg_boffset,
input signed [ 8:0] creg_clip,
input signed [ 8:0] creg_clamp
);
//******************************************************************************
// Color Correction Matrix
// _ _ _ _ _ _ _ _
// | | | || | | |
// | Ro | | K11 K12 K13 || Ri | | Or |
// | Go | = | K21 K22 K23 || Gi | + | Og |
// | Bo | | K31 K32 K33 || Bi | | Ob |
// |_ _| |_ _||_ _| |_ _|
//
// K = [-8, 8) << 14
//******************************************************************************
localparam PIPE_STAGES = 7;
reg signed [ 8: 0] in_red;
reg signed [ 8: 0] in_green;
reg signed [ 8: 0] in_blue;
wire fifo_rd_pop;
wire [25: 0] fifo_rd_data;
wire fifo_rd_empty;
wire fifo_wr_push;
wire [25: 0] fifo_wr_data;
wire fifo_almost_full;
wire signed [26: 0] red_mul_k11, red_mul_k21, red_mul_k31;
wire signed [26: 0] green_mul_k12, green_mul_k22, green_mul_k32;
wire signed [26: 0] blue_mul_k13, blue_mul_k23, blue_mul_k33;
wire in_valid_cken;
reg [PIPE_STAGES-1: 0] in_valid_pipe;
wire in_sof_cken;
reg [PIPE_STAGES-1: 0] in_sof_pipe;
wire in_eol_cken;
reg [PIPE_STAGES-1: 0] in_eol_pipe;
reg signed [27: 0] red_shift_14;
reg signed [27: 0] green_shift_14;
reg signed [27: 0] blue_shift_14;
wire signed [13: 0] red_cast, green_cast, blue_cast;
reg [ 7: 0] red_out, green_out, blue_out;
//-------------------------------------------------------------------------//
// Control Path
//-------------------------------------------------------------------------//
// make data valid aligned with data pipes
assign in_valid_cken = (in_valid & in_ready) | (|in_valid_pipe[PIPE_STAGES-1:0]);
always @(posedge clk or negedge rstn)
begin
if(!rstn)
in_valid_pipe <= {PIPE_STAGES{1'b0}};
else if(in_valid_cken)
in_valid_pipe <= {in_valid_pipe[PIPE_STAGES-2:0], (in_valid & in_ready)};
end
// make sof aligned with data pipes
assign in_sof_cken = (in_valid & in_ready & in_sof) | (|in_sof_pipe[PIPE_STAGES-1:0]);
always @(posedge clk or negedge rstn)
begin
if(!rstn)
in_sof_pipe <= {PIPE_STAGES{1'b0}};
else if(in_sof_cken)
in_sof_pipe <= {in_sof_pipe[PIPE_STAGES-2:0], (in_valid & in_ready & in_sof)};
end
// make eol aligned with data pipes
assign in_eol_cken = (in_valid & in_ready & in_eol) | (|in_eol_pipe[PIPE_STAGES-1:0]);
always @(posedge clk or negedge rstn)
begin
if(!rstn)
in_eol_pipe <= {PIPE_STAGES{1'b0}};
else if(in_eol_cken)
in_eol_pipe <= {in_eol_pipe[PIPE_STAGES-2:0], (in_valid & in_ready & in_eol)};
end
//-------------------------------------------------------------------------//
// Data Path
//-------------------------------------------------------------------------//
// Pipe in data
always @(posedge clk)
begin
if(in_valid & in_ready)
begin
in_red <= {1'b0, in_data[23:16]};
in_green <= {1'b0, in_data[15:8]};
in_blue <= {1'b0, in_data[7:0]};
end
end
// Sum of multiply results and offset
// 26b + 26b + 26b + 23b = 28b
always @(posedge clk)
begin
if(in_valid_pipe[4])
begin
red_shift_14 <= red_mul_k11 + green_mul_k12 + blue_mul_k13 + (creg_roffset << 14);
green_shift_14 <= red_mul_k21 + green_mul_k22 + blue_mul_k23 + (creg_goffset << 14);
blue_shift_14 <= red_mul_k31 + green_mul_k32 + blue_mul_k33 + (creg_boffset << 14);
end
end
assign red_cast = red_shift_14[27:14];
assign green_cast = green_shift_14[27:14];
assign blue_cast = blue_shift_14[27:14];
// Calculate final results
always @(posedge clk)
begin
if(in_valid_pipe[5])
begin
if(red_cast >= creg_clip)
red_out <= creg_clip;
else if(red_cast <= creg_clamp)
red_out <= creg_clamp;
else
red_out <= (red_shift_14[13] == 1'b1) ? (red_cast[7:0] + 1'b1) : red_cast[7:0];
if(green_cast >= creg_clip)
green_out <= creg_clip;
else if(green_cast <= creg_clamp)
green_out <= creg_clamp;
else
green_out <= (green_shift_14[13] == 1'b1) ? (green_cast[7:0] + 1'b1) : green_cast[7:0];
if(blue_cast >= creg_clip)
blue_out <= creg_clip;
else if(blue_cast <= creg_clamp)
blue_out <= creg_clamp;
else
blue_out <= (blue_shift_14[13] == 1'b1) ? (blue_cast[7:0] + 1'b1) : blue_cast[7:0];
end
end
assign fifo_wr_push = in_valid_pipe[PIPE_STAGES-1];
assign fifo_wr_data = {in_sof_pipe[PIPE_STAGES-1], in_eol_pipe[PIPE_STAGES-1], red_out[7:0], green_out[7:0], blue_out[7:0]};
ccm_rvh_fifo ccm_rvh_fifo_inst // 26bit x 16depth
(
.clk ( clk ) ,
.srst ( ~rstn ) ,
.din ( fifo_wr_data[25:0] ) ,
.wr_en ( fifo_wr_push ) ,
.rd_en ( fifo_rd_pop ) ,
.prog_full_thresh ( 4'd8 ) ,
.dout ( fifo_rd_data[25:0] ) ,
.full ( ) ,
.almost_full ( ) ,
.empty ( fifo_rd_empty ) ,
.prog_full ( fifo_almost_full ) ,
.wr_rst_busy ( ) ,
.rd_rst_busy ( )
);
assign out_valid = ~fifo_rd_empty;
assign fifo_rd_pop = out_valid & out_ready;
assign out_data = fifo_rd_data[23:0];
assign out_eol = fifo_rd_data[24];
assign out_sof = fifo_rd_data[25];
assign in_ready = ~fifo_almost_full;
//**********************************************************************************************
// ccm_mult timing
// __ __ __ __ __ __ __ __
// clk __| |__| |__| |__| |__| |__| |__| |__| |__
// _____ _____
// in X_I-0_X_I-1_X
// _____ _____
// out X_O-0_X_O-1_X
//
// |<--------------------->| latency = 4
//
//**********************************************************************************************
ccm_mult ccm_mult_inst1 // 9bit x 18bit, signed
(
.CLK ( clk ),
.A ( in_red ),
.B ( creg_coeff_k11 ),
.P ( red_mul_k11 )
);
ccm_mult ccm_mult_inst2 // 9bit x 18bit, signed
(
.CLK ( clk ),
.A ( in_green ),
.B ( creg_coeff_k12 ),
.P ( green_mul_k12 )
);
ccm_mult ccm_mult_inst3 // 9bit x 18bit, signed
(
.CLK ( clk ),
.A ( in_blue ),
.B ( creg_coeff_k13 ),
.P ( blue_mul_k13 )
);
ccm_mult ccm_mult_inst4 // 9bit x 18bit, signed
(
.CLK ( clk ),
.A ( in_red ),
.B ( creg_coeff_k21 ),
.P ( red_mul_k21 )
);
ccm_mult ccm_mult_inst5 // 9bit x 18bit, signed
(
.CLK ( clk ),
.A ( in_green ),
.B ( creg_coeff_k22 ),
.P ( green_mul_k22 )
);
ccm_mult ccm_mult_inst6 // 9bit x 18bit, signed
(
.CLK ( clk ),
.A ( in_blue ),
.B ( creg_coeff_k23 ),
.P ( blue_mul_k23 )
);
ccm_mult ccm_mult_inst7 // 9bit x 18bit, signed
(
.CLK ( clk ),
.A ( in_red ),
.B ( creg_coeff_k31 ),
.P ( red_mul_k31 )
);
ccm_mult ccm_mult_inst8 // 9bit x 18bit, signed
(
.CLK ( clk ),
.A ( in_green ),
.B ( creg_coeff_k32 ),
.P ( green_mul_k32 )
);
ccm_mult ccm_mult_inst9 // 9bit x 18bit, signed
(
.CLK ( clk ),
.A ( in_blue ),
.B ( creg_coeff_k33 ),
.P ( blue_mul_k33 )
);
endmodule
GAMMA:
Gamma校正也是RGB ISP不可或缺的一环,因为人眼其实是一个非线性感知系统,而图像传感器是线性系统,如果图像传感器的数据不经处理就给人看,会让人觉得“不自然”,而Gamma校正就是通过一种非线性映射,让图像传感器的数据在显示终端看起来更接近自然界的“真实”景象。
这里使用一种简单的查找表(LUT)方式来实现Gamma映射,对于不同的Gamma曲线,只要改变LUT中的映射值即可。
/
Simple RGB ISP - Verilog
Author:
benyuee@foxmail.com
/
Copyright (C) 2011 -- 20xx
benyuee@foxmail.com
This source file may be used and distributed without
restriction provided that this copyright statement is not
removed from the file and that any derivative work contains
the original copyright notice and the associated disclaimer.
THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
/
//-------------------------------------------------------------------
// Description
// Gamma correction
// LUT is implemented in this design.
//
//-------------------------------------------------------------------
// Change History
//
//
//
//
//
//-------------------------------------------------------------------
module gamma
(
// Clock and reset
input clk,
input rstn,
// RGB input
input in_valid,
output in_ready,
input [23: 0] in_data,
input in_sof,
input in_eol,
// RGB output
output out_valid,
input out_ready,
output [23: 0] out_data,
output out_sof,
output out_eol,
// Registers
input axil_aclk,
input axil_aclken,
input axil_rstn,
input creg_wr_gamma_lut_en,
input [ 7: 0] creg_gamma_wraddr,
input [ 7: 0] creg_gamma_wrvalue
);
reg [ 7: 0] gamma_lut_mem[255:0];
reg [ 7: 0] in_red;
reg [ 7: 0] in_green;
reg [ 7: 0] in_blue;
reg in_valid_pipe1;
reg in_sof_pipe1;
reg in_eol_pipe1;
wire fifo_rd_pop;
wire [25: 0] fifo_rd_data;
wire fifo_rd_empty;
reg fifo_wr_push;
wire [25: 0] fifo_wr_data;
wire fifo_almost_full;
reg [ 7: 0] out_red_buf;
reg [ 7: 0] out_green_buf;
reg [ 7: 0] out_blue_buf;
reg out_sof_buf;
reg out_eol_buf;
integer i; // initial LUT with GAMMA = 1
// Update Gamma LUT, axil_aclk domain
always @(posedge axil_aclk or negedge axil_rstn)
begin
if(!axil_rstn)
begin
for(i=0; i<256; i=i+1)
begin
gamma_lut_mem[i] <= i; // GAMMA = 1
end
end
else if(axil_aclken & creg_wr_gamma_lut_en)
begin
gamma_lut_mem[creg_gamma_wraddr] <= creg_gamma_wrvalue;
end
end
// Pipe in control signals
always @(posedge clk or negedge rstn)
begin
if(!rstn)
begin
in_valid_pipe1 <= 1'b0;
in_sof_pipe1 <= 1'b0;
in_eol_pipe1 <= 1'b0;
end
else if(in_valid & in_ready)
begin
in_valid_pipe1 <= 1'b1;
in_sof_pipe1 <= in_sof;
in_eol_pipe1 <= in_eol;
end
else
in_valid_pipe1 <= 1'b0;
end
// Make control signals aligned with data
always @(posedge clk or negedge rstn)
begin
if(!rstn)
begin
fifo_wr_push <= 1'b0;
out_sof_buf <= 1'b0;
out_eol_buf <= 1'b0;
end
else if(in_valid_pipe1)
begin
out_sof_buf <= in_sof_pipe1;
out_eol_buf <= in_eol_pipe1;
fifo_wr_push <= 1'b1;
end
else
fifo_wr_push <= 1'b0;
end
// Pipe in data
always @(posedge clk)
begin
if(in_valid & in_ready)
begin
in_red <= in_data[23:16];
in_green <= in_data[15:8];
in_blue <= in_data[7:0];
end
end
// Calculate gamma result by LUT
always @(posedge clk)
begin
if(in_valid_pipe1)
begin
out_red_buf <= gamma_lut_mem[in_red];
out_green_buf <= gamma_lut_mem[in_green];
out_blue_buf <= gamma_lut_mem[in_blue];
end
end
assign fifo_wr_data = {out_sof_buf, out_eol_buf, out_red_buf[7:0], out_green_buf[7:0], out_blue_buf[7:0]};
gamma_rvh_fifo gamma_rvh_fifo_inst // 26bit x 8depth
(
.clk ( clk ) ,
.srst ( ~rstn ) ,
.din ( fifo_wr_data[25:0] ) ,
.wr_en ( fifo_wr_push ) ,
.rd_en ( fifo_rd_pop ) ,
.prog_full_thresh ( 3'd4 ) ,
.dout ( fifo_rd_data[25:0] ) ,
.full ( ) ,
.almost_full ( ) ,
.empty ( fifo_rd_empty ) ,
.prog_full ( fifo_almost_full ) ,
.wr_rst_busy ( ) ,
.rd_rst_busy ( )
);
assign out_valid = ~fifo_rd_empty;
assign fifo_rd_pop = out_valid & out_ready;
assign out_data = fifo_rd_data[23:0];
assign out_eol = fifo_rd_data[24];
assign out_sof = fifo_rd_data[25];
assign in_ready = ~fifo_almost_full;
endmodule
RGB2YUV:
鉴于大部分显示系统以及图像压缩引擎都是基于YUV域的,所以在RGB ISP的最后引入RGB888 to YUV444转换模块。它的转换是基于一组公式:
经简化后,公式里面包含4个乘系数和3个偏置,在RTL实现中会把它们做定点化处理变成整数。
需要注意的是,有三种RGB和YUV相互转换的标准,分别是BT.601(SDTV),BT.470(Analog TV),BT.709(HDTV)。使用最多的是BT.601和BT.470,BT.601实际就是RGB2YCrCb,而BT.470实际就是RGB2YUV。最后一种BT.709则较少用到。对于每一种标准,又有Full Swing和Studio Swing两种模式。Studio Swing模式下,YUV输出的范围较小,目的是为了兼容信号因为滤波而引起的过冲。
选择哪一种转换标准,取决于后级的模块需求(比如送到压缩模块还是显示系统),可以参考这篇帖子:mjpeg vs. webRTC color range inconsistency · Issue #157 · pikvm/ustreamer · GitHub
/
Simple RGB ISP - Verilog
Author:
benyuee@foxmail.com
/
Copyright (C) 2011 -- 20xx
benyuee@foxmail.com
This source file may be used and distributed without
restriction provided that this copyright statement is not
removed from the file and that any derivative work contains
the original copyright notice and the associated disclaimer.
THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
/
//-------------------------------------------------------------------
// Description
// RGB888 to YUV444 conversion
//
//-------------------------------------------------------------------
// Change History
//
//
//
//
//
//-------------------------------------------------------------------
module rgb2yuv
(
// Clock and reset
input clk,
input rstn,
// RGB input
input in_valid,
output in_ready,
input [23: 0] in_data,
input in_sof,
input in_eol,
// YUV output
output out_valid,
input out_ready,
output [23: 0] out_data,
output out_sof,
output out_eol,
// Registers
input [ 7: 0] creg_luma_max_clamping_value,
input [ 7: 0] creg_luma_min_clamping_value,
input [ 7: 0] creg_chroma_u_max_clamping_value,
input [ 7: 0] creg_chroma_u_min_clamping_value,
input [ 7: 0] creg_chroma_v_max_clamping_value,
input [ 7: 0] creg_chroma_v_min_clamping_value,
input signed [ 8: 0] creg_y_offset_value,
input signed [ 8: 0] creg_u_offset_value,
input signed [ 8: 0] creg_v_offset_value,
input signed [17: 0] creg_a_coef_value, // ac
input signed [17: 0] creg_b_coef_value, // bc
input signed [17: 0] creg_c_coef_value, // cc
input signed [17: 0] creg_d_coef_value // dc
);
//******************************************************************************
// RGB to YUV Conversion Formulas:
//
// Y = 0.299R + 0.587G + 0.114B + Y_offset
// = 0.299R + (1-0.299-0.114)G + 0.114B + Y_offset
// = G + 0.299(R-G) + 0.114(B-G) + Y_offset
//
// (Y << 16) = 65536G + 19595(R-G) + 7471(B-G) + (Y_offset << 16)
// ac bc
//
// U = -0.1687R - 0.3313G + 0.5B + U_offset
// = 0.564(B-Y) + U_offset
//
// (U << 16) = 36392(B-Y) + (U_offset << 16)
// dc
//
// V = 0.5R - 0.4187G - 0.0813B + V_offset
// = 0.713(R-Y) + V_offset
//
// (V << 16) = 46727(R-Y) + (V_offset << 16)
// cc
//******************************************************************************
localparam PIPE_STAGES = 14;
wire in_valid_cken;
reg [PIPE_STAGES-1: 0] in_valid_pipe;
wire in_sof_cken;
reg [PIPE_STAGES-1: 0] in_sof_pipe;
wire in_eol_cken;
reg [PIPE_STAGES-1: 0] in_eol_pipe;
reg [ 2: 0] in_red_wcntr;
reg [ 7: 0] in_red_buf[5:0];
reg [ 2: 0] in_red_rcntr;
reg [ 7: 0] in_red_s7;
reg [ 1: 0] in_green_wcntr;
reg [ 7: 0] in_green_buf[3:0];
reg [ 1: 0] in_green_rcntr;
reg [ 7: 0] in_green_s5;
reg [ 2: 0] in_blue_wcntr;
reg [ 7: 0] in_blue_buf[5:0];
reg [ 2: 0] in_blue_rcntr;
reg [ 7: 0] in_blue_s7;
reg [ 2: 0] y_result_wcntr;
reg [ 7: 0] y_result_buf[5:0];
reg [ 2: 0] y_result_rcntr;
reg [ 7: 0] y_result_s14;
wire [ 7: 0] in_red;
wire [ 7: 0] in_green;
wire [ 7: 0] in_blue;
reg signed [ 8: 0] r_minus_g;
reg signed [ 8: 0] b_minus_g;
reg signed [26: 0] y_shift_16;
wire signed [26: 0] r_minus_g_mul_ac;
wire signed [26: 0] b_minus_g_mul_bc;
reg signed [ 8: 0] y_result;
reg signed [ 8: 0] r_minus_y;
reg signed [ 8: 0] b_minus_y;
reg signed [25: 0] v_shift_16;
reg signed [25: 0] u_shift_16;
wire signed [26: 0] r_minus_y_mul_cc;
wire signed [26: 0] b_minus_y_mul_dc;
reg [ 7: 0] v_result_s14;
reg [ 7: 0] u_result_s14;
wire fifo_rd_pop;
wire [25: 0] fifo_rd_data;
wire fifo_rd_empty;
wire fifo_wr_push;
wire [25: 0] fifo_wr_data;
wire fifo_almost_full;
//-------------------------------------------------------------------------//
// Control Path
//-------------------------------------------------------------------------//
// make data valid aligned with data pipes
assign in_valid_cken = (in_valid & in_ready) | (|in_valid_pipe[PIPE_STAGES-1:0]);
always @(posedge clk or negedge rstn)
begin
if(!rstn)
in_valid_pipe <= {PIPE_STAGES{1'b0}};
else if(in_valid_cken)
in_valid_pipe <= {in_valid_pipe[PIPE_STAGES-2:0], (in_valid & in_ready)};
end
// make sof aligned with data pipes
assign in_sof_cken = (in_valid & in_ready & in_sof) | (|in_sof_pipe[PIPE_STAGES-1:0]);
always @(posedge clk or negedge rstn)
begin
if(!rstn)
in_sof_pipe <= {PIPE_STAGES{1'b0}};
else if(in_sof_cken)
in_sof_pipe <= {in_sof_pipe[PIPE_STAGES-2:0], (in_valid & in_ready & in_sof)};
end
// make eol aligned with data pipes
assign in_eol_cken = (in_valid & in_ready & in_eol) | (|in_eol_pipe[PIPE_STAGES-1:0]);
always @(posedge clk or negedge rstn)
begin
if(!rstn)
in_eol_pipe <= {PIPE_STAGES{1'b0}};
else if(in_eol_cken)
in_eol_pipe <= {in_eol_pipe[PIPE_STAGES-2:0], (in_valid & in_ready & in_eol)};
end
//-------------------------------------------------------------------------//
// Data Path
//-------------------------------------------------------------------------//
//---------------- Below are data pipes with low power design ----------------//
//-------- in_red pipe buffer --------//
// write counter
always @(posedge clk or negedge rstn)
begin
if(!rstn)
in_red_wcntr <= 3'd0;
else if(in_valid & in_ready)
in_red_wcntr <= (in_red_wcntr == 3'd5) ? 3'd0 : (in_red_wcntr + 1'b1);
end
// in_red buffer
genvar i;
generate
for(i=0;i<6;i=i+1) begin: IN_RED_PIPE
always @(posedge clk)
begin
if((in_valid & in_ready) && (in_red_wcntr == i))
in_red_buf[i] <= in_red;
end
end
endgenerate
// read counter
always @(posedge clk or negedge rstn)
begin
if(!rstn)
in_red_rcntr <= 3'd0;
else if(in_valid_pipe[5])
in_red_rcntr <= (in_red_rcntr == 3'd5) ? 3'd0 : (in_red_rcntr + 1'b1);
end
// in_red of stage-7
always @(posedge clk)
begin
if(in_valid_pipe[5])
in_red_s7 <= in_red_buf[in_red_rcntr];
end
//-------- in_green pipe buffer --------//
// write counter
always @(posedge clk or negedge rstn)
begin
if(!rstn)
in_green_wcntr <= 2'd0;
else if(in_valid & in_ready)
in_green_wcntr <= in_green_wcntr + 1'b1; // 0~3
end
// in_green buffer
genvar j;
generate
for(j=0;j<4;j=j+1) begin: IN_GREEN_PIPE
always @(posedge clk)
begin
if((in_valid & in_ready) && (in_green_wcntr == j))
in_green_buf[j] <= in_green;
end
end
endgenerate
// read counter
always @(posedge clk or negedge rstn)
begin
if(!rstn)
in_green_rcntr <= 2'd0;
else if(in_valid_pipe[3])
in_green_rcntr <= in_green_rcntr + 1'b1; // 0~3
end
// in_green of stage-5
always @(posedge clk)
begin
if(in_valid_pipe[3])
in_green_s5 <= in_green_buf[in_green_rcntr];
end
//-------- in_blue pipe buffer --------//
// write counter
always @(posedge clk or negedge rstn)
begin
if(!rstn)
in_blue_wcntr <= 3'd0;
else if(in_valid & in_ready)
in_blue_wcntr <= (in_blue_wcntr == 3'd5) ? 3'd0 : (in_blue_wcntr + 1'b1);
end
// in_blue buffer
genvar k;
generate
for(k=0;k<6;k=k+1) begin: IN_BLUE_PIPE
always @(posedge clk)
begin
if((in_valid & in_ready) && (in_blue_wcntr == k))
in_blue_buf[k] <= in_blue;
end
end
endgenerate
// read counter
always @(posedge clk or negedge rstn)
begin
if(!rstn)
in_blue_rcntr <= 3'd0;
else if(in_valid_pipe[5])
in_blue_rcntr <= (in_blue_rcntr == 3'd5) ? 3'd0 : (in_blue_rcntr + 1'b1);
end
// in_blue pipe7
always @(posedge clk)
begin
if(in_valid_pipe[5])
in_blue_s7 <= in_blue_buf[in_blue_rcntr];
end
//-------- y_result pipe buffer --------//
// write counter
always @(posedge clk or negedge rstn)
begin
if(!rstn)
y_result_wcntr <= 3'd0;
else if(in_valid_pipe[6])
y_result_wcntr <= (y_result_wcntr == 3'd5) ? 3'd0 : (y_result_wcntr + 1'b1);
end
// y_result buffer
genvar m;
generate
for(m=0;m<6;m=m+1) begin: Y_RESULT_PIPE
always @(posedge clk)
begin
if((in_valid_pipe[6] == 1'b1) && (y_result_wcntr == m))
y_result_buf[m] <= y_result;
end
end
endgenerate
// read counter
always @(posedge clk or negedge rstn)
begin
if(!rstn)
y_result_rcntr <= 3'd0;
else if(in_valid_pipe[12])
y_result_rcntr <= (y_result_rcntr == 3'd5) ? 3'd0 : (y_result_rcntr + 1'b1);
end
// y_result of stage-14
always @(posedge clk)
begin
if(in_valid_pipe[12])
y_result_s14 <= y_result_buf[y_result_rcntr];
end
//---------------------- Below are calculations ----------------------//
// stage-0
assign in_red = in_data[23:16];
assign in_green = in_data[15:8];
assign in_blue = in_data[7:0];
// stage-1, r - g, b - g
always @(posedge clk)
begin
if(in_valid & in_ready)
begin
r_minus_g <= in_red - in_green;
b_minus_g <= in_blue - in_green;
end
end
// stage-6
always @(posedge clk)
begin
if(in_valid_pipe[4])
y_shift_16 <= r_minus_g_mul_ac + b_minus_g_mul_bc + (in_green_s5<<16) + (creg_y_offset_value<<16);
end
// stage-7
always @(posedge clk)
begin
if(in_valid_pipe[5])
begin
if(y_shift_16[24:16] >= creg_luma_max_clamping_value)
y_result <= {1'b0, creg_luma_max_clamping_value};
else if(y_shift_16[24:16] <= creg_luma_min_clamping_value)
y_result <= {1'b0, creg_luma_min_clamping_value};
else
y_result <= (y_shift_16[15] == 1'b1) ? (y_shift_16[24:16] + 1'b1) : y_shift_16[24:16];
end
end
// stage-8
always @(posedge clk)
begin
if(in_valid_pipe[6])
begin
r_minus_y <= in_red_s7 - y_result; // -255~255 (9bit)
b_minus_y <= in_blue_s7 - y_result; // -255~255 (9bit)
end
end
// stage-13
always @(posedge clk)
begin
if(in_valid_pipe[11])
begin
v_shift_16 <= r_minus_y_mul_cc + (creg_v_offset_value<<16);
u_shift_16 <= b_minus_y_mul_dc + (creg_u_offset_value<<16);
end
end
// stage-14
always @(posedge clk)
begin
if(in_valid_pipe[12])
begin
if(v_shift_16[24:16] >= creg_chroma_v_max_clamping_value)
v_result_s14 <= creg_chroma_v_max_clamping_value;
else if(v_shift_16[24:16] <= creg_chroma_v_min_clamping_value)
v_result_s14 <= creg_chroma_v_min_clamping_value;
else
v_result_s14 <= (v_shift_16[15] == 1'b1) ? (v_shift_16[23:16] + 1'b1) : v_shift_16[23:16];
if(u_shift_16[24:16] >= creg_chroma_u_max_clamping_value)
u_result_s14 <= creg_chroma_u_max_clamping_value;
else if(u_shift_16[24:16] <= creg_chroma_u_min_clamping_value)
u_result_s14 <= creg_chroma_u_min_clamping_value;
else
u_result_s14 <= (u_shift_16[15] == 1'b1) ? (u_shift_16[23:16] + 1'b1) : u_shift_16[23:16];
end
end
assign fifo_wr_push = in_valid_pipe[13];
assign fifo_wr_data = {in_sof_pipe[13], in_eol_pipe[13], y_result_s14[7:0], u_result_s14[7:0], v_result_s14[7:0]};
assign in_ready = ~fifo_almost_full;
assign out_valid = ~fifo_rd_empty;
assign fifo_rd_pop = out_valid & out_ready;
assign out_data = fifo_rd_data[23:0];
assign out_eol = fifo_rd_data[24];
assign out_sof = fifo_rd_data[25];
rgb2yuv_rvh_fifo rgb2yuv_rvh_fifo_inst // 26bit x 32depth
(
.clk ( clk ) ,
.srst ( ~rstn ) ,
.din ( fifo_wr_data[25:0] ) ,
.wr_en ( fifo_wr_push ) ,
.rd_en ( fifo_rd_pop ) ,
.prog_full_thresh ( 5'd16 ) ,
.dout ( fifo_rd_data[25:0] ) ,
.full ( ) ,
.almost_full ( ) ,
.empty ( fifo_rd_empty ) ,
.prog_full ( fifo_almost_full ) ,
.wr_rst_busy ( ) ,
.rd_rst_busy ( )
);
//**********************************************************************************************
// Timing Sequence of Multiplier
// __ __ __ __ __ __ __ __
// clk __| |__| |__| |__| |__| |__| |__| |__| |__
// _____ _____
// in X_I-0_X_I-1_X
// _____ _____
// out X_O-0_X_O-1_X
//
// |<--------------------->| latency = 4
//
//**********************************************************************************************
rgb2yuv_mult rgb2yuv_mult_inst1 // 9bit x 18bit, signed
(
.CLK ( clk ),
.A ( r_minus_g ),
.B ( creg_a_coef_value ),
.P ( r_minus_g_mul_ac )
);
rgb2yuv_mult rgb2yuv_mult_inst2 // 9bit x 18bit, signed
(
.CLK ( clk ),
.A ( b_minus_g ),
.B ( creg_b_coef_value ),
.P ( b_minus_g_mul_bc )
);
rgb2yuv_mult rgb2yuv_mult_inst3 // 9bit x 18bit, signed
(
.CLK ( clk ),
.A ( r_minus_y ),
.B ( creg_c_coef_value ),
.P ( r_minus_y_mul_cc )
);
rgb2yuv_mult rgb2yuv_mult_inst4 // 9bit x 18bit, signed
(
.CLK ( clk ),
.A ( b_minus_y ),
.B ( creg_d_coef_value ),
.P ( b_minus_y_mul_dc )
);
endmodule
P.S.上面代码中使用了一些低功耗设计方法,另贴再言~