基于FPGA的矩阵乘法

最近在学system verilog,于是就打算用它写一下矩阵乘法,来体验一把system verilog相对于verilog的方便之处(sv中数组可以作为接口)
以下是矩阵乘法的代码:

`timescale 1ns / 1ps
//
// Company: 
// Engineer: 
// 
// Create Date: 2020/11/13 16:04:32
// Design Name: 
// Module Name: block_mm
// Project Name: 
// Target Devices: 
// Tool Versions: 
// Description: 
// 
// Dependencies: 
// 
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
// 
//


module block_mm
#(parameter Tn=4)
(
input logic clk,
input logic rst,
input logic start,                 //start拉高一个周期表示开始
input logic [15:0] A[0:Tn-1][0:Tn-1],
input logic [15:0] B[0:Tn-1][0:Tn-1],
output logic [15:0] O[0:Tn-1][0:Tn-1],
output logic done                              //done拉高一个周期表示完成
    );
int row;
int col;
int k;
logic busy;
//busy
always_ff@(posedge clk,posedge rst)
if(rst)
    busy<=1'b0;
else if(start)
    busy<=1'b1;
else if(row==Tn-1&&col==Tn-1&&k==Tn-1)
    busy<=1'b0;
//k
always_ff@(posedge clk,posedge rst)
if(rst)
    k<=0;
else if(start)
    k<=0;
else if(k==Tn-1)
    k<=0;
else
    k<=k+1;
//col
always_ff@(posedge clk,posedge rst)
if(rst)
    col<=0;
else if(start)
    col<=0;
else if(k==Tn-1)
if(col==Tn-1)
    col<=0;
else
    col<=col+1;
//row
always_ff@(posedge clk,posedge rst)
if(rst)
    row<=0;
else if(start)
    row<=0;
else if(col==Tn-1&&k==Tn-1)
    row<=row+1;
//done
always_ff@(posedge clk,posedge rst)
if(rst)
    done<=1'b0;
else if(row==Tn-1&&col==Tn-1&&k==Tn-1&&done==1'b0)
    done<=1'b1;
else
    done<=1'b0;
//calculate matrix 
always_ff@(posedge clk,posedge rst)
if(rst)
    ;
else if(busy)
if(k==0)
    O[row][col]<=A[row][k]*B[k][col];
else
    O[row][col]<=O[row][col]+A[row][k]*B[k][col];

endmodule

以下是testbench:

`timescale 1ns / 1ps
//
// Company: 
// Engineer: 
// 
// Create Date: 2020/11/13 16:25:11
// Design Name: 
// Module Name: test
// Project Name: 
// Target Devices: 
// Tool Versions: 
// Description: 
// 
// Dependencies: 
// 
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
// 
//


module test(

    );
parameter Tn = 4;

logic [15:0]A[0:Tn-1][0:Tn-1];
logic [15:0]B[0:Tn-1][0:Tn-1];
logic [15:0]O[0:Tn-1][0:Tn-1];
logic [15:0]OO[0:Tn-1][0:Tn-1];
logic start;
logic done;
logic clk;
logic rst;

initial 
begin
    rst=1;
    #10 rst=0;
end
initial 
begin
    clk=0;
    forever 
    #5 clk=~clk;    
end
initial 
begin
    start=0;
    #20
    start=1;
    #10;
    start=0;    
end
//显示结果
always_ff@(posedge clk)
if(done)
begin
    int count=0;
    for(int i=0;i<Tn;i++)
    begin
        for(int j=0;j<Tn;j++)
        begin
           if(O[i][j]!=OO[i][j])
               $display("error");
           else
               count=count+1; 
        end
    end
    $display("correct count is %d",count);
end
//初始化矩阵
initial 
begin
for(int i=0;i<Tn;i++)
    for(int j=0;j<Tn;j++)
    begin
        A[i][j]={$random}%10;
        B[i][j]={$random}%10;
    end
for(int i=0;i<Tn;i++)
    for(int j=0;j<Tn;j++)
    begin
        OO[i][j]=0;
        for(int k=0;k<Tn;k++)
            OO[i][j]=OO[i][j]+A[i][k]*B[k][j];
    end
end

block_mm U
(.*);

endmodule

仿真无误:
在这里插入图片描述

分块矩阵加载的代码:

`timescale 1ns / 1ps
//
// Company: 
// Engineer: 
// 
// Create Date: 2020/11/13 18:10:01
// Design Name: 
// Module Name: load_block
// Project Name: 
// Target Devices: 
// Tool Versions: 
// Description: 
// 
// Dependencies: 
// 
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
// 
//


module load_block(
input logic start,
input logic clk,
input logic rst,
input logic [15:0] din,
output logic [7:0] addr,
input logic [7:0]block_row,
input logic [7:0]block_col,         //读取M[block_row:block_row+Tn,block_col:block_col+Tn]
output logic [15:0]block_mat[0:Tn-1][0:Tn-1],
output logic done
     );
parameter Tn = 4;
parameter N = 16 ;

logic [7:0]row;
logic [7:0]col;
logic [7:0]row_ff1;
logic [7:0]row_ff2;
logic [7:0]col_ff1;
logic [7:0]col_ff2;
logic busy;
logic busy_ff1;
logic busy_ff2;
logic done_ff0;
logic done_ff1;
logic done_ff2;

assign done=done_ff2;
//done_ff0
always_ff@(posedge clk,posedge rst)
if(rst)
    done_ff0<=1'b0;
else if(row==block_row+Tn-1&&col==block_col+Tn-1&&~done_ff0)
    done_ff0<=1'b1;
else 
    done_ff0<=1'b0;
//done_ff1,ff2
always_ff@(posedge clk,posedge rst)
if(rst)
begin
    done_ff1<=1'b0;
    done_ff2<=1'b0;
end
else
begin
    done_ff1<=done_ff0;
    done_ff2<=done_ff1;
end
//busy
always_ff@(posedge clk,posedge rst)
if(rst)
    busy<=1'b0;
else if(start)
    busy<=1'b1;
else if(row==block_row+Tn-1&&col==block_col+Tn-1)
    busy<=1'b0;
//busy_ff1,busy_ff2
always_ff@(posedge clk,posedge rst)
if(rst)
begin
    busy_ff1<=1'b0;
    busy_ff2<=1'b0;
end
else
begin
    busy_ff1<=busy;
    busy_ff2<=busy_ff1;
end
//row
always_ff@(posedge clk,posedge rst)
if(rst)
    row<=8'd0;
else if(start)
    row<=block_row;
else if(col==block_col+Tn-1)
    row<=row+1;
//col
always_ff@(posedge clk,posedge rst)
if(rst)
    col<=8'd0;
else if(start)
    col<=block_col;
else if(busy)
if(col==block_col+Tn-1)
    col<=block_col;
else 
    col<=col+1;
always_ff@(posedge clk,posedge rst)
if(rst)
begin
    row_ff1<=8'd0;
    row_ff2<=8'd0;
    col_ff1<=8'd0;
    col_ff2<=8'd0;
end
else
begin
    row_ff1<=row;
    row_ff2<=row_ff1;
    col_ff1<=col;
    col_ff2<=col_ff1;
end
//addr
assign addr=(row*N+col);
//din
always_ff@(posedge clk,posedge rst)
if(rst)
    ;
else if(busy_ff2)
    block_mat[row_ff2-block_row][col_ff2-block_col]<=din;    

endmodule

testbench:

`timescale 1ns / 1ps
//
// Company: 
// Engineer: 
// 
// Create Date: 2020/11/13 18:51:33
// Design Name: 
// Module Name: load_test
// Project Name: 
// Target Devices: 
// Tool Versions: 
// Description: 
// 
// Dependencies: 
// 
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
// 
//


module load_test(

    );
parameter Tn = 4;
parameter N = 16;

logic clk;
logic rst;
logic [15:0]read_data;
logic [7:0]addr;
logic [7:0]addra;
logic [7:0]write_addr;
logic [15:0]block_mat[0:Tn-1][0:Tn-1];
logic start;
logic done;
logic [7:0]block_row;
logic [7:0]block_col;
logic [15:0]count;
logic flag;

logic we;
logic [15:0]write_data;
initial 
begin
    for(int i=0;i<N;i++)
    begin
        for(int j=0;j<N;j++)
        begin
            $write("%d,",i*N+j);
        end
        $write("\n");
    end
end
//clk
initial 
begin
    clk=0;
    forever 
    #5 clk=~clk;
end
//rst
initial
begin
    rst=1;
    #10
    rst=0;
end
//初始化矩阵
always@(posedge clk,posedge rst)
if(rst)
begin
    we<=1'b0;
    write_addr<=8'd0;
    write_data<=16'd0;
end
else if(write_addr==255)
begin
    we<=1'b0;
end
else
begin
    write_addr<=write_addr+8'd1;
    write_data<=write_data+16'd1;
    we<=1'b1;
end
//count
always_ff@(posedge clk,posedge rst)
if(rst)
    count<=16'd0;
else 
    count<=count+16'd1;
always_ff@(posedge clk,posedge rst)
if(rst)
    flag<=1'b0;
else if(count==300)
    flag<=1'b1;
//block_row and block_col
always_ff@(posedge clk,posedge rst)
if(rst)
begin
    block_row<=8'd0;
    block_col<=8'd0;
    start<=1'b0;
end
else if(count==300)
begin
    block_row<=8'd12;
    block_col<=8'd12;
    start<=1'b1;
end
else
    start<=1'b0;
//done
always@(posedge clk,posedge rst)
if(rst)
 ;
else if(done)
begin
    for(int i=0;i<Tn;i++)
    begin
        for(int j=0;j<Tn;j++)
        begin
            $write("%d,",block_mat[i][j]);
        end
        $write("\n");
    end
end

assign addra=(flag==1'b1)?addr:write_addr;

Matrix M(
  .clka(clk),    // input wire clka
  .ena(1'b1),      // input wire ena
  .wea(we),      // input wire [0 : 0] wea
  .addra(addra),  // input wire [7 : 0] addra
  .dina(write_data),    // input wire [15 : 0] dina
  .douta(read_data)  // output wire [15 : 0] douta
);

load_block V(
.start(start),
.clk(clk),
.rst(rst),
.din(read_data),
.addr(addr),
.block_row(block_row),
.block_col(block_col),         //读取M[block_row:block_row+Tn,block_col:block_col+Tn]
.block_mat(block_mat),
.done(done)
     );
endmodule

仿真结果
在这里插入图片描述

分块矩阵存储的代码

`timescale 1ns / 1ps
//
// Company: 
// Engineer: 
// 
// Create Date: 2020/11/17 19:12:44
// Design Name: 
// Module Name: store_block
// Project Name: 
// Target Devices: 
// Tool Versions: 
// Description: 
// 
// Dependencies: 
// 
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
// 
//


module store_block(
input logic [7:0]block_row,
input logic [7:0]block_col,
input logic [15:0]block_mat[0:Tn-1][0:Tn-1],
input logic clk,
input logic rst,
input logic start,
output logic we,
output logic [7:0]addr,
output logic [15:0]dout,
output logic done
    );
parameter Tn=4;
parameter N=16;
logic [7:0]row;
logic [7:0]col;
logic busy;
//busy
always_ff@(posedge clk,posedge rst)
if(rst)
   busy<=1'b0;
else if(start)
   busy<=1'b1;
else if(row==block_row+Tn-1&&col==block_col+Tn-1)
   busy<=1'b0;
//row
always_ff@(posedge clk,posedge rst)
if(rst)
   row<=8'd0;
else if(start)
   row<=block_row;
else if(col==block_col+Tn-1)
   if(row==block_row+Tn-1)
       row<=8'd0;
   else
       row<=row+8'd1;
//col
always_ff@(posedge clk,posedge rst)
if(rst)
   col<=8'd0;
else if(start)
   col<=block_col;
else if(col==block_col+Tn-1)
   col<=block_col;
else
   col<=col+8'd1;
//we
always_ff@(posedge clk,posedge rst)
if(rst)
   we<=1'b0;
else if(busy)
   we<=1'b1;
else
   we<=1'b0;
//addr
always_ff@(posedge clk,posedge rst)
if(rst)
   addr<=8'd0;
else if(busy)
   addr<=row*N+col;
else
   addr<=8'd0;
//dout
always_ff@(posedge clk,posedge rst)
if(rst)
   dout<=16'd0;
else if(busy)
   dout<=block_mat[row-block_row][col-block_col];
else 
   dout<=16'd0;
//done
assign done=(~busy&&we)?1'b1:1'b0;
endmodule

testbench:

`timescale 1ns / 1ps
//
// Company: 
// Engineer: 
// 
// Create Date: 2020/11/17 19:35:10
// Design Name: 
// Module Name: store_block_test
// Project Name: 
// Target Devices: 
// Tool Versions: 
// Description: 
// 
// Dependencies: 
// 
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
// 
//


module store_block_test;
parameter Tn = 4;
parameter N  = 16;

logic clk;
logic rst;
logic [7:0]block_row;
logic [7:0]block_col;
logic [15:0]block_mat[0:Tn-1][0:Tn-1];
logic start;
logic done;
logic we;
logic [7:0]addr;
logic [15:0]dout;
//clk
initial 
begin
   clk=0;
   forever
    #5 clk=~clk;
end
//rst
initial
begin
    rst=1;
    #10
    rst=0;
end
//
initial 
begin
    for(int i=0;i<Tn;i++)
       for(int j=0;j<Tn;j++)
          block_mat[i][j]<={$random}%100;
end
initial 
begin
   block_row=8;
   block_col=12;    
end
initial
begin
    start=0;
    #100
    start=1;
    #10
    start=0;
end
//
store_block U(.*);
endmodule

仿真结果正确!

  • 5
    点赞
  • 55
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

FPGA硅农

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值