最近在学system verilog,于是就打算用它写一下矩阵乘法,来体验一把system verilog相对于verilog的方便之处(sv中数组可以作为接口)
以下是矩阵乘法的代码:
`timescale 1ns / 1ps
//
// Company:
// Engineer:
//
// Create Date: 2020/11/13 16:04:32
// Design Name:
// Module Name: block_mm
// Project Name:
// Target Devices:
// Tool Versions:
// Description:
//
// Dependencies:
//
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
//
//
module block_mm
#(parameter Tn=4)
(
input logic clk,
input logic rst,
input logic start, //start拉高一个周期表示开始
input logic [15:0] A[0:Tn-1][0:Tn-1],
input logic [15:0] B[0:Tn-1][0:Tn-1],
output logic [15:0] O[0:Tn-1][0:Tn-1],
output logic done //done拉高一个周期表示完成
);
int row;
int col;
int k;
logic busy;
//busy
always_ff@(posedge clk,posedge rst)
if(rst)
busy<=1'b0;
else if(start)
busy<=1'b1;
else if(row==Tn-1&&col==Tn-1&&k==Tn-1)
busy<=1'b0;
//k
always_ff@(posedge clk,posedge rst)
if(rst)
k<=0;
else if(start)
k<=0;
else if(k==Tn-1)
k<=0;
else
k<=k+1;
//col
always_ff@(posedge clk,posedge rst)
if(rst)
col<=0;
else if(start)
col<=0;
else if(k==Tn-1)
if(col==Tn-1)
col<=0;
else
col<=col+1;
//row
always_ff@(posedge clk,posedge rst)
if(rst)
row<=0;
else if(start)
row<=0;
else if(col==Tn-1&&k==Tn-1)
row<=row+1;
//done
always_ff@(posedge clk,posedge rst)
if(rst)
done<=1'b0;
else if(row==Tn-1&&col==Tn-1&&k==Tn-1&&done==1'b0)
done<=1'b1;
else
done<=1'b0;
//calculate matrix
always_ff@(posedge clk,posedge rst)
if(rst)
;
else if(busy)
if(k==0)
O[row][col]<=A[row][k]*B[k][col];
else
O[row][col]<=O[row][col]+A[row][k]*B[k][col];
endmodule
以下是testbench:
`timescale 1ns / 1ps
//
// Company:
// Engineer:
//
// Create Date: 2020/11/13 16:25:11
// Design Name:
// Module Name: test
// Project Name:
// Target Devices:
// Tool Versions:
// Description:
//
// Dependencies:
//
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
//
//
module test(
);
parameter Tn = 4;
logic [15:0]A[0:Tn-1][0:Tn-1];
logic [15:0]B[0:Tn-1][0:Tn-1];
logic [15:0]O[0:Tn-1][0:Tn-1];
logic [15:0]OO[0:Tn-1][0:Tn-1];
logic start;
logic done;
logic clk;
logic rst;
initial
begin
rst=1;
#10 rst=0;
end
initial
begin
clk=0;
forever
#5 clk=~clk;
end
initial
begin
start=0;
#20
start=1;
#10;
start=0;
end
//显示结果
always_ff@(posedge clk)
if(done)
begin
int count=0;
for(int i=0;i<Tn;i++)
begin
for(int j=0;j<Tn;j++)
begin
if(O[i][j]!=OO[i][j])
$display("error");
else
count=count+1;
end
end
$display("correct count is %d",count);
end
//初始化矩阵
initial
begin
for(int i=0;i<Tn;i++)
for(int j=0;j<Tn;j++)
begin
A[i][j]={$random}%10;
B[i][j]={$random}%10;
end
for(int i=0;i<Tn;i++)
for(int j=0;j<Tn;j++)
begin
OO[i][j]=0;
for(int k=0;k<Tn;k++)
OO[i][j]=OO[i][j]+A[i][k]*B[k][j];
end
end
block_mm U
(.*);
endmodule
仿真无误:
更
分块矩阵加载的代码:
`timescale 1ns / 1ps
//
// Company:
// Engineer:
//
// Create Date: 2020/11/13 18:10:01
// Design Name:
// Module Name: load_block
// Project Name:
// Target Devices:
// Tool Versions:
// Description:
//
// Dependencies:
//
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
//
//
module load_block(
input logic start,
input logic clk,
input logic rst,
input logic [15:0] din,
output logic [7:0] addr,
input logic [7:0]block_row,
input logic [7:0]block_col, //读取M[block_row:block_row+Tn,block_col:block_col+Tn]
output logic [15:0]block_mat[0:Tn-1][0:Tn-1],
output logic done
);
parameter Tn = 4;
parameter N = 16 ;
logic [7:0]row;
logic [7:0]col;
logic [7:0]row_ff1;
logic [7:0]row_ff2;
logic [7:0]col_ff1;
logic [7:0]col_ff2;
logic busy;
logic busy_ff1;
logic busy_ff2;
logic done_ff0;
logic done_ff1;
logic done_ff2;
assign done=done_ff2;
//done_ff0
always_ff@(posedge clk,posedge rst)
if(rst)
done_ff0<=1'b0;
else if(row==block_row+Tn-1&&col==block_col+Tn-1&&~done_ff0)
done_ff0<=1'b1;
else
done_ff0<=1'b0;
//done_ff1,ff2
always_ff@(posedge clk,posedge rst)
if(rst)
begin
done_ff1<=1'b0;
done_ff2<=1'b0;
end
else
begin
done_ff1<=done_ff0;
done_ff2<=done_ff1;
end
//busy
always_ff@(posedge clk,posedge rst)
if(rst)
busy<=1'b0;
else if(start)
busy<=1'b1;
else if(row==block_row+Tn-1&&col==block_col+Tn-1)
busy<=1'b0;
//busy_ff1,busy_ff2
always_ff@(posedge clk,posedge rst)
if(rst)
begin
busy_ff1<=1'b0;
busy_ff2<=1'b0;
end
else
begin
busy_ff1<=busy;
busy_ff2<=busy_ff1;
end
//row
always_ff@(posedge clk,posedge rst)
if(rst)
row<=8'd0;
else if(start)
row<=block_row;
else if(col==block_col+Tn-1)
row<=row+1;
//col
always_ff@(posedge clk,posedge rst)
if(rst)
col<=8'd0;
else if(start)
col<=block_col;
else if(busy)
if(col==block_col+Tn-1)
col<=block_col;
else
col<=col+1;
always_ff@(posedge clk,posedge rst)
if(rst)
begin
row_ff1<=8'd0;
row_ff2<=8'd0;
col_ff1<=8'd0;
col_ff2<=8'd0;
end
else
begin
row_ff1<=row;
row_ff2<=row_ff1;
col_ff1<=col;
col_ff2<=col_ff1;
end
//addr
assign addr=(row*N+col);
//din
always_ff@(posedge clk,posedge rst)
if(rst)
;
else if(busy_ff2)
block_mat[row_ff2-block_row][col_ff2-block_col]<=din;
endmodule
testbench:
`timescale 1ns / 1ps
//
// Company:
// Engineer:
//
// Create Date: 2020/11/13 18:51:33
// Design Name:
// Module Name: load_test
// Project Name:
// Target Devices:
// Tool Versions:
// Description:
//
// Dependencies:
//
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
//
//
module load_test(
);
parameter Tn = 4;
parameter N = 16;
logic clk;
logic rst;
logic [15:0]read_data;
logic [7:0]addr;
logic [7:0]addra;
logic [7:0]write_addr;
logic [15:0]block_mat[0:Tn-1][0:Tn-1];
logic start;
logic done;
logic [7:0]block_row;
logic [7:0]block_col;
logic [15:0]count;
logic flag;
logic we;
logic [15:0]write_data;
initial
begin
for(int i=0;i<N;i++)
begin
for(int j=0;j<N;j++)
begin
$write("%d,",i*N+j);
end
$write("\n");
end
end
//clk
initial
begin
clk=0;
forever
#5 clk=~clk;
end
//rst
initial
begin
rst=1;
#10
rst=0;
end
//初始化矩阵
always@(posedge clk,posedge rst)
if(rst)
begin
we<=1'b0;
write_addr<=8'd0;
write_data<=16'd0;
end
else if(write_addr==255)
begin
we<=1'b0;
end
else
begin
write_addr<=write_addr+8'd1;
write_data<=write_data+16'd1;
we<=1'b1;
end
//count
always_ff@(posedge clk,posedge rst)
if(rst)
count<=16'd0;
else
count<=count+16'd1;
always_ff@(posedge clk,posedge rst)
if(rst)
flag<=1'b0;
else if(count==300)
flag<=1'b1;
//block_row and block_col
always_ff@(posedge clk,posedge rst)
if(rst)
begin
block_row<=8'd0;
block_col<=8'd0;
start<=1'b0;
end
else if(count==300)
begin
block_row<=8'd12;
block_col<=8'd12;
start<=1'b1;
end
else
start<=1'b0;
//done
always@(posedge clk,posedge rst)
if(rst)
;
else if(done)
begin
for(int i=0;i<Tn;i++)
begin
for(int j=0;j<Tn;j++)
begin
$write("%d,",block_mat[i][j]);
end
$write("\n");
end
end
assign addra=(flag==1'b1)?addr:write_addr;
Matrix M(
.clka(clk), // input wire clka
.ena(1'b1), // input wire ena
.wea(we), // input wire [0 : 0] wea
.addra(addra), // input wire [7 : 0] addra
.dina(write_data), // input wire [15 : 0] dina
.douta(read_data) // output wire [15 : 0] douta
);
load_block V(
.start(start),
.clk(clk),
.rst(rst),
.din(read_data),
.addr(addr),
.block_row(block_row),
.block_col(block_col), //读取M[block_row:block_row+Tn,block_col:block_col+Tn]
.block_mat(block_mat),
.done(done)
);
endmodule
仿真结果
分块矩阵存储的代码
`timescale 1ns / 1ps
//
// Company:
// Engineer:
//
// Create Date: 2020/11/17 19:12:44
// Design Name:
// Module Name: store_block
// Project Name:
// Target Devices:
// Tool Versions:
// Description:
//
// Dependencies:
//
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
//
//
module store_block(
input logic [7:0]block_row,
input logic [7:0]block_col,
input logic [15:0]block_mat[0:Tn-1][0:Tn-1],
input logic clk,
input logic rst,
input logic start,
output logic we,
output logic [7:0]addr,
output logic [15:0]dout,
output logic done
);
parameter Tn=4;
parameter N=16;
logic [7:0]row;
logic [7:0]col;
logic busy;
//busy
always_ff@(posedge clk,posedge rst)
if(rst)
busy<=1'b0;
else if(start)
busy<=1'b1;
else if(row==block_row+Tn-1&&col==block_col+Tn-1)
busy<=1'b0;
//row
always_ff@(posedge clk,posedge rst)
if(rst)
row<=8'd0;
else if(start)
row<=block_row;
else if(col==block_col+Tn-1)
if(row==block_row+Tn-1)
row<=8'd0;
else
row<=row+8'd1;
//col
always_ff@(posedge clk,posedge rst)
if(rst)
col<=8'd0;
else if(start)
col<=block_col;
else if(col==block_col+Tn-1)
col<=block_col;
else
col<=col+8'd1;
//we
always_ff@(posedge clk,posedge rst)
if(rst)
we<=1'b0;
else if(busy)
we<=1'b1;
else
we<=1'b0;
//addr
always_ff@(posedge clk,posedge rst)
if(rst)
addr<=8'd0;
else if(busy)
addr<=row*N+col;
else
addr<=8'd0;
//dout
always_ff@(posedge clk,posedge rst)
if(rst)
dout<=16'd0;
else if(busy)
dout<=block_mat[row-block_row][col-block_col];
else
dout<=16'd0;
//done
assign done=(~busy&&we)?1'b1:1'b0;
endmodule
testbench:
`timescale 1ns / 1ps
//
// Company:
// Engineer:
//
// Create Date: 2020/11/17 19:35:10
// Design Name:
// Module Name: store_block_test
// Project Name:
// Target Devices:
// Tool Versions:
// Description:
//
// Dependencies:
//
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
//
//
module store_block_test;
parameter Tn = 4;
parameter N = 16;
logic clk;
logic rst;
logic [7:0]block_row;
logic [7:0]block_col;
logic [15:0]block_mat[0:Tn-1][0:Tn-1];
logic start;
logic done;
logic we;
logic [7:0]addr;
logic [15:0]dout;
//clk
initial
begin
clk=0;
forever
#5 clk=~clk;
end
//rst
initial
begin
rst=1;
#10
rst=0;
end
//
initial
begin
for(int i=0;i<Tn;i++)
for(int j=0;j<Tn;j++)
block_mat[i][j]<={$random}%100;
end
initial
begin
block_row=8;
block_col=12;
end
initial
begin
start=0;
#100
start=1;
#10
start=0;
end
//
store_block U(.*);
endmodule
仿真结果正确!