自动生成的RTL代码,大家疑虑最多的就是效率问题。如果自动代码的电路面积,功耗和时序性能做不到与手工代码相当,其它方面的优点再多也很难让大家接受,特别是成本、功耗和性能敏感的ASIC设计。
可以自动生成RTL代码的两大方法,HLS和MBD,MBD一定要在代码效率这个问题上和HLS划清界线。HLS“骗取”了设计师对电路的精细控制权,那它就必须要对电路效率负起完全的责任,将电路效率优化到与手工代码相当是HLS必须要完成的承诺,不管这个承诺有多难达成。MBD完全没有必须蹚“电路效率优化”这滩浑水,把自己陷入到HLS那样的困境中。
MBD就应该老老实实按照模型设计生成RTL代码:
每个模型只对应一个RTL代码段;
每个RTL代码段只对应一个模型;
RTL代码段之间的拓扑连接关系与模型完全相同;
代码生成功能不在RTL代码上对模型之间的连接顺序和关系做任何修改,不做美名其曰的优化处理。
至于RTL代码效率,不应该是MBD的责任,MBD只要忠实“翻译”出模型的代码就好了,代码效率完全交由模型设计师来负责掌控。这样MBD就可以大声宣布说:我的自动代码效率完全媲美手工代码。而事实上也确实可以做到。
就以DDS这个设计为例来谈谈MBD的电路优化策略。
初始的设计中,LUT存储了一个圆周的正弦波数据,存储空间上有很多浪费,其实只要存储第一象限的数据就够了,其它象限的数据可以根据与第一象限的关系计算得到。这个存储空间的优化处理不应该在RTL代码自动生成时实施,而应该由设计师在源头的模型设计中改进,结果如下图所示,原来的8比特的LUT地址,高两位表示象限,低六位用于新的正弦波数据LUT地址,这样LUT的存储空间就减少到了原来的四分之一。
如果有手工代码宣称效率比MBD的自动代码高,MBD就会说你哪一点效率比我高,我改还不行嘛,让我的模型设计师按照你的实现方式改!
改进MBD后生成的完整RTL代码如下,LUT的存储空间从之前的256减少到了64.
//-----------------------------------------------------------------------------
// FILE : rtlgen_demo_dds_opt.v
// AUTHOR : myName
// DATE : 2020-08-18
// ABSTRACT :
// test simu-rtl_gen
//
//=====================================================================
// Created with SIMU2RTL(v20200814)
// @ 2020-08-18 18:45:15
// Simulink System:
// tb_demo_dds_opt
//=====================================================================
// TESTBENCH FILE : rtlgen_demo_dds_opt_tb.v
// TEST STIMULUS FILE : ./test_data/*.vec
//
// @Copyright 2020 MyCorp
// All rights reserved.
//-----------------------------------------------------------------------------
module rtlgen_demo_dds_opt (
CLK ,
RST_N ,
FREQ , // <24,0,u>
DOUT // <10,0,t>
);
// ports declaration
input CLK ;
input RST_N ;
input [23:0] FREQ ; // <24,0,u>
output [ 9:0] DOUT ; // <10,0,t>
//-----------------------------------------------------------------------------
// internal signals declaration
reg [ 9:0] Reg17_REG ; // <10,0,t>
reg [23:0] Reg3_REG ; // <24,0,u>
wire [24:0] Add1_CMB ; // <25,1,u>
wire [ 1:0] Cst5_CMB ; // <2,0,u>
wire [23:0] Fmt2_CMB ; // <24,0,u>
wire [ 5:0] Fmt4_CMB ; // <6,-2,u>
wire [ 8:0] Sub6_CMB ; // <9,0,t>
wire [23:0] Fsh7_CMB ; // <24,2,u>
wire [ 1:0] Fmt8_CMB ; // <2>
wire Bsl14_CMB ; // <1>
wire Bsl9_CMB ; // <1>
wire [ 8:0] Swt10_CMB ; // <9,0,t>
wire [ 5:0] Fmt11_CMB ; // <6,-2,u>
reg [ 9:0] Lut12_CMB ; // <10,0,t>
reg [ 9:0] Neg13_CMB ; // <10,0,t>
wire [ 9:0] Swt15_CMB ; // <10,0,t>
wire [ 9:0] Fmt16_CMB ; // <10,0,t>
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
// hilite_system('tb_demo_dds_opt/DDS/Adder')
// Adder #1 : <24,0,u> + <24,0,u> --> <25,1,u>
assign Add1_CMB = {1'B0,Reg3_REG}+{1'B0,FREQ};
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
// hilite_system('tb_demo_dds_opt/DDS/Format')
// Format #2 : wrap & truncate <25,1,u> to <24,0,u>
assign Fmt2_CMB = Add1_CMB[23:0];
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
// hilite_system('tb_demo_dds_opt/DDS/UnitDelay')
// Register #3 : <24,0,u>
always @(posedge CLK or negedge RST_N)
begin : proc_Reg3_REG
if(RST_N==1'B0)
Reg3_REG <= 24'H0; // <24,0,u> 0.000000
else
Reg3_REG <= Fmt2_CMB;
end
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
// hilite_system('tb_demo_dds_opt/DDS/Format2')
// Format #4 : wrap & truncate <24,0,u> to <6,-2,u>
assign Fmt4_CMB = Reg3_REG[21:16];
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
// hilite_system('tb_demo_dds_opt/DDS/Const')
// Constant #5 : <2,0,u> 0.500000
assign Cst5_CMB = 2'D2;
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
// hilite_system('tb_demo_dds_opt/DDS/Subtractor')
// Subtractor #6 : <2,0,u> - <6,-2,u> --> <9,0,t>
assign Sub6_CMB = {1'B0,Cst5_CMB,6'D0}-{3'D0,Fmt4_CMB};
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
// hilite_system('tb_demo_dds_opt/DDS/Shifter')
// Fixed Shifter #7 : <24,0,u> << 2 --> <24,2,u>
assign Fsh7_CMB = Reg3_REG;
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
// hilite_system('tb_demo_dds_opt/DDS/Format3')
// Format #8 : wrap & truncate <24,2,u> to <2>
assign Fmt8_CMB = Fsh7_CMB[23:22];
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
// hilite_system('tb_demo_dds_opt/DDS/WordSplit')
// Bits-Selection #9 : Bit start: 0, length: 1 from <2>
assign Bsl9_CMB = Fmt8_CMB[0];
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
// hilite_system('tb_demo_dds_opt/DDS/Switch')
// Switch #10 : <6,-2,u> , <9,0,t> --> <9,0,t>
assign Swt10_CMB = (Bsl9_CMB==1'B0)? {3'D0,Fmt4_CMB}:Sub6_CMB;
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
// hilite_system('tb_demo_dds_opt/DDS/Format4')
// Format #11 : wrap & truncate <9,0,t> to <6,-2,u>
assign Fmt11_CMB = Swt10_CMB[5:0];
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
// hilite_system('tb_demo_dds_opt/DDS/LUT')
// LUT #12 : <6,-2,u> --> <10,0,t>
always @(Fmt11_CMB)
begin : proc_Lut12_CMB
case(Fmt11_CMB)
6'D0 : Lut12_CMB = 10'D0;
6'D1 : Lut12_CMB = 10'D13;
6'D2 : Lut12_CMB = 10'D25;
6'D3 : Lut12_CMB = 10'D38;
6'D4 : Lut12_CMB = 10'D50;
6'D5 : Lut12_CMB = 10'D63;
6'D6 : Lut12_CMB = 10'D75;
6'D7 : Lut12_CMB = 10'D88;
6'D8 : Lut12_CMB = 10'D100;
6'D9 : Lut12_CMB = 10'D112;
6'D10 : Lut12_CMB = 10'D124;
6'D11 : Lut12_CMB = 10'D137;
6'D12 : Lut12_CMB = 10'D149;
6'D13 : Lut12_CMB = 10'D161;
6'D14 : Lut12_CMB = 10'D172;
6'D15 : Lut12_CMB = 10'D184;
6'D16 : Lut12_CMB = 10'D196;
6'D17 : Lut12_CMB = 10'D207;
6'D18 : Lut12_CMB = 10'D219;
6'D19 : Lut12_CMB = 10'D230;
6'D20 : Lut12_CMB = 10'D241;
6'D21 : Lut12_CMB = 10'D252;
6'D22 : Lut12_CMB = 10'D263;
6'D23 : Lut12_CMB = 10'D274;
6'D24 : Lut12_CMB = 10'D284;
6'D25 : Lut12_CMB = 10'D295;
6'D26 : Lut12_CMB = 10'D305;
6'D27 : Lut12_CMB = 10'D315;
6'D28 : Lut12_CMB = 10'D325;
6'D29 : Lut12_CMB = 10'D334;
6'D30 : Lut12_CMB = 10'D344;
6'D31 : Lut12_CMB = 10'D353;
6'D32 : Lut12_CMB = 10'D362;
6'D33 : Lut12_CMB = 10'D371;
6'D34 : Lut12_CMB = 10'D379;
6'D35 : Lut12_CMB = 10'D388;
6'D36 : Lut12_CMB = 10'D396;
6'D37 : Lut12_CMB = 10'D404;
6'D38 : Lut12_CMB = 10'D411;
6'D39 : Lut12_CMB = 10'D419;
6'D40 : Lut12_CMB = 10'D426;
6'D41 : Lut12_CMB = 10'D433;
6'D42 : Lut12_CMB = 10'D439;
6'D43 : Lut12_CMB = 10'D445;
6'D44 : Lut12_CMB = 10'D452;
6'D45 : Lut12_CMB = 10'D457;
6'D46 : Lut12_CMB = 10'D463;
6'D47 : Lut12_CMB = 10'D468;
6'D48 : Lut12_CMB = 10'D473;
6'D49 : Lut12_CMB = 10'D478;
6'D50 : Lut12_CMB = 10'D482;
6'D51 : Lut12_CMB = 10'D486;
6'D52 : Lut12_CMB = 10'D490;
6'D53 : Lut12_CMB = 10'D493;
6'D54 : Lut12_CMB = 10'D497;
6'D55 : Lut12_CMB = 10'D500;
6'D56 : Lut12_CMB = 10'D502;
6'D57 : Lut12_CMB = 10'D504;
6'D58 : Lut12_CMB = 10'D506;
6'D59 : Lut12_CMB = 10'D508;
6'D60 : Lut12_CMB = 10'D510;
6'D61 : Lut12_CMB = 10'D511;
6'D62 : Lut12_CMB = 10'D511;
// 6'D63
default : Lut12_CMB = 10'D511;
endcase
end
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
// hilite_system('tb_demo_dds_opt/DDS/Negative')
// Negative #13 : <10,0,t> --> <10,0,t>
always @(Lut12_CMB)
begin : proc_Neg13_CMB
if(Lut12_CMB==10'H200)
Neg13_CMB = 10'H1FF;
else
Neg13_CMB = ~Lut12_CMB+1'B1;
end
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
// hilite_system('tb_demo_dds_opt/DDS/WordSplit')
// Bits-Selection #14 : Bit start: 1, length: 1 from <2>
assign Bsl14_CMB = Fmt8_CMB[1];
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
// hilite_system('tb_demo_dds_opt/DDS/Switch1')
// Switch #15 : <10,0,t> , <10,0,t> --> <10,0,t>
assign Swt15_CMB = (Bsl14_CMB==1'B0)? Lut12_CMB:Neg13_CMB;
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
// hilite_system('tb_demo_dds_opt/DDS/Format1')
// Format #16 : clip & truncate <10,0,t> to <10,0,t>
assign Fmt16_CMB = Swt15_CMB;
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
// hilite_system('tb_demo_dds_opt/DDS/UnitDelay2')
// Register #17 : <10,0,t>
always @(posedge CLK or negedge RST_N)
begin : proc_Reg17_REG
if(RST_N==1'B0)
Reg17_REG <= 10'H0; // <10,0,t> 0.000000
else
Reg17_REG <= Fmt16_CMB;
end
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
// Output
assign DOUT = Reg17_REG; // <10,0,t> hilite_system('tb_demo_dds_opt/DDS/OUP')
//-----------------------------------------------------------------------------
endmodule // rtlgen_demo_dds_opt
//-----------------------------------------------------------------------------
// Statistics:
// Input Port(non-memory): 1
// Output Port(non-memory): 1
// Instances(non-memory): 19
// Internal Signal: 17
// Total Register: 34 bits
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
// Internal Comb. Signals Top 50 (Vector Excluded)
// Index NAME FXP
// # 1 Add1_CMB <25,1,u>
// # 2 Fsh7_CMB <24,2,u>
// # 3 Fmt2_CMB <24,0,u>
// # 4 Lut12_CMB <10,0,t>
// # 5 Neg13_CMB <10,0,t>
// # 6 Swt15_CMB <10,0,t>
// # 7 Sub6_CMB <9,0,t>
// # 8 Swt10_CMB <9,0,t>
// # 9 Fmt4_CMB <6,-2,u>
// # 10 Fmt11_CMB <6,-2,u>
// # 11 Fmt8_CMB <2>
// # 12 Cst5_CMB <2,0,u>
// # 13 Bsl9_CMB <1>
// # 14 Bsl14_CMB <1>
//-----------------------------------------------------------------------------
// Internal Registers Top 50 (Vector Excluded)
// Index NAME FXP
// # 1 Reg3_REG <24,0,u>
// # 2 Reg17_REG <10,0,t>
//-----------------------------------------------------------------------------
// Multiplier Count: 0 Total bits: 0
// Multiplier Top 50 of 0
// Index NAME FXP
//-----------------------------------------------------------------------------
// Adder/Subtractor Count: 2 Total bits: 0
// Adder/Subtractor Top 50 of 2
// Index NAME FXP
// # 1 Add1_CMB <25,1,u>
// # 2 Sub6_CMB <9,0,t>
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
// End of the file.
//-----------------------------------------------------------------------------