基于小梅哥的FPGA串口传图到TFT屏幕做一个RGB转Ycbcr模块
我是用的是小梅哥的AC620开发板,自带了一个从pc通过uart串口传图到FPGA的sdram中,再从sdram读取图片到TFT屏幕上。因为是通过RGB格式输出的图片,所以我想在他的工程里添加一个模块用于RGB转Ycbcr。图中红色选中就是我添加的模块
首先是RGB转Ycbcr算法: (这是一种转换格式)
计算公式: Y = 0.299R + 0.587G + 0.114B
Y = -0.619R - 0.331G + 0.500B + 128
Y = 0.500R - 0.419G - 0.081B + 128
但是在FPGA中浮点数计算不方便,耗费资源多,为了加快速度,所以去掉浮点:把小数扩大256倍,舍去小数点 Y0 ≈ 76R + 150G + 29B ,因为数值扩大256倍,所以把Y0右移8位得正确结果,
module rgb2Ycbcr(
input clk,
input reset,
input idata_en,
input ihsync,
input ivsync,
input [7:0]ired,
input [7:0]igreen,
input [7:0]iblue,
output odata_en,
output ohsync,
output ovsync,
output [7:0]Y_data,
output [7:0]cb_data,
output [7:0]cr_data
);
//第一步:耗时一个clk,完成9个乘法计算
reg [15:0]red0,red1,red2;
reg [15:0]green0,green1,green2;
reg [15:0]blue0,blue1,blue2;
always@(posedge clk)begin
red0 <= ired * 8'd76;
red1 <= ired * 8'd43;
red2 <= ired * 8'd128;
green0 <= igreen * 8'd150;
green1 <= igreen * 8'd84;
green2 <= igreen * 8'd107;
blue0 <= iblue * 8'd29;
blue1 <= iblue * 8'd128;
blue2 <= iblue * 8'd20;
end
//第二步:耗时一个clk,完成加减法运算
reg [15:0]Y0,cb0,cr0;
always@(posedge clk)begin
Y0 <= red0 + green0 + blue0;
cb0 <= blue1 - green1 -red1 + 16'd32768;
cr0 <= red2 - green2 - blue2 + 16'd32768;
end
//第三步:耗时一个clk,把数据缩小为1/256
reg [15:0]Y1,cb1,cr1;
always@(posedge clk)begin
Y1 <= Y0[15:8];
cb1 <= cb0[15:8];
cr1 <= cr0[15:8];
end
//前面三步耗费了3个clk,所以把场行同步信号也延迟三拍
reg hsync1,hsync2,hsync3;
reg vsync1,vsync2,vsync3;
reg data_en1,data_en2,data_en3;
always@(posedge clk or negedge reset)
begin
hsync1 <= ihsync;
vsync1 <= ivsync;
data_en1 <= idata_en;
hsync2 <= hsync1;
vsync2 <= vsync1;
data_en2 <= data_en1;
hsync3 <= hsync2;
vsync3 <= vsync2;
data_en3 <= data_en2;
end
assign ohsync = hsync3;
assign ovsync = vsync3;
assign odata_en = data_en3;
assign Y_data = ohsync?Y1:8'd0;
assign cb_data = ohsync?cb1:8'd0;
assign cr_data = ohsync?cr1:8'd0;
endmodule
因为进行乘法移位等运算,耗费三个时钟,所以图片的行同步和场同步信号也延迟三个时钟以达到同步。代码是参考《基于FPGA和MATLAB的图像处理》,作者延时是使用向量方法(下图),但是在quartus II中会报错,所以我还是用打拍子的方式延时。最后在小梅哥的工程里把本模块例化就行。最后可以在TFT屏幕上显示Ycbcr各个分量