SM4算法是一个分组加密算法,消息分组和密钥长度均为 128 bit,主要包括密钥扩展算法、加密算法和解密算法。算法采用32轮非线性迭代结构,加密和解密算法结构相同,只是轮密钥使用顺序相反。针对SM4密码算法的硬件设计,分为循环架构和流水线架构两种。循环架构面向资源节约优化,使SM4密码算法能够部署在资源受限的硬件设备上;流水线架构面向加密性能优化,使SM4密码算法能够部署在对吞吐量要求较高的场景中。这里采用循环结构,即不对循环结构进行展开,每加密一个消息分组需要32个时钟周期。
具体verilog代码如下:
module sm4_top(
input clk,rst_n,
input [1:0] cmd, //00:pause 01:key_exp 10:encrypt 11:decrypt
input [127:0] sm4_din,
output [31:0] sm4_dout,
output reg res_vld
);
localparam IDLE = 3'd0;
localparam KEYEXP = 3'd1;
localparam ENCRYPT = 3'd2;
localparam DECRYPT = 3'd3;
localparam STRES = 3'd4;
localparam OUTPUT = 3'd5;
reg [2:0] state_r;
wire state_is_idle = (state_r==IDLE );
wire state_is_decrypt = (state_r==DECRYPT );
reg [1:0] counter;
reg [4:0] ikey_en,ikey_de;
reg [127:0] sm4_round_din;
wire [127:0] sm4_round_dout;
wire [31:0] ikey;
wire key_exp_done;
wire key_exp_start = state_is_idle&cmd==2'b01;
wire [4:0] ikey_n = (state_is_decrypt|cmd==2'b11)?ikey_de:ikey_en;
assign sm4_dout = sm4_round_din[127:96];
always @(posedge clk,negedge rst_n) begin
if(~rst_n)begin
state_r<=IDLE;
counter<=2'd0;
ikey_en<=5'd0;
ikey_de<=5'd31;
sm4_round_din<=128'd0;
res_vld<=1'b0;
end
else begin
case (state_r)
IDLE:begin
res_vld<=1'b0;
sm4_round_din<=sm4_din;
case (cmd)
2'b00:state_r<=IDLE;
2'b01:state_r<=KEYEXP;
2'b10:begin
ikey_en<=ikey_en+1'b1;
state_r<=ENCRYPT;
end
2'b11:begin
ikey_de<=ikey_de-1'b1;
state_r<=DECRYPT;
end
default: state_r<=IDLE;
endcase
end
KEYEXP:begin
if(key_exp_done)begin
res_vld<=1'b1;
state_r<=IDLE;
end
end
ENCRYPT:begin
sm4_round_din<=sm4_round_dout;
ikey_en<=ikey_en+1'b1;
if(ikey_en==5'd31)
state_r<=STRES;
end
DECRYPT:begin
sm4_round_din<=sm4_round_dout;
ikey_de<=ikey_de-1'b1;
if(ikey_de==5'd0)
state_r<=STRES;
end
STRES:begin
sm4_round_din<={sm4_round_dout[31:0],sm4_round_dout[63:32],sm4_round_dout[95:64],sm4_round_dout[127:96]};
res_vld<=1'b1;
state_r<=OUTPUT;
end
OUTPUT:begin
counter<=counter+1'b1;
sm4_round_din<={sm4_round_din[95:0],32'd0};
if(counter==2'd3)begin
res_vld<=1'b0;
state_r<=IDLE;
end
end
default: state_r<=IDLE;
endcase
end
end
sm4_encdec_round u_sm4_round(
.round_din(sm4_round_din),
.round_key(ikey),
.round_dout(sm4_round_dout)
);
key_expansion u_key_exp(
.clk(clk),
.rst_n(rst_n),
.mkey(sm4_din),
.key_exp_start(key_exp_start),
.ikey_n(ikey_n),
.ikey(ikey),
.key_exp_done(key_exp_done)
);
endmodule
module key_expansion(
input clk,rst_n,
input [127:0] mkey,
input key_exp_start,
input [4:0] ikey_n,
output [31:0] ikey,
output key_exp_done
);
localparam FK0 = 32'ha3b1bac6;
localparam FK1 = 32'h56aa3350;
localparam FK2 = 32'h677d9197;
localparam FK3 = 32'hb27022dc;
reg state_is_idle;
reg ram_ikey_wea;
reg [4:0] exp_counter;
reg [127:0] round_din_r;
wire [31:0] round_key_r;
wire [4:0] ram_ikey_addr;
wire [127:0] round_dout;
wire key_exp_trigger = state_is_idle&key_exp_start;
assign key_exp_done = ~state_is_idle&exp_counter==5'd31;
assign ram_ikey_addr = state_is_idle?ikey_n:exp_counter;
always @(posedge clk,negedge rst_n) begin
if(~rst_n)state_is_idle<=1'b1;
else if(key_exp_trigger)
state_is_idle<=1'b0;
else if(key_exp_done)
state_is_idle<=1'b1;
else state_is_idle<=state_is_idle;
end
always @(posedge clk,negedge rst_n) begin
if(~rst_n)ram_ikey_wea<=1'b0;
else if(key_exp_trigger)
ram_ikey_wea<=1'b1;
else if(key_exp_done)ram_ikey_wea<=1'b0;
else ram_ikey_wea<=ram_ikey_wea;
end
always @(posedge clk,negedge rst_n) begin
if(~rst_n)exp_counter<=5'd0;
else if(~state_is_idle)
exp_counter<=exp_counter+1'b1;
end
always @(posedge clk,negedge rst_n) begin
if(~rst_n)round_din_r<=128'd0;
else if(key_exp_trigger)
round_din_r<=mkey^{FK0,FK1,FK2,FK3};
else if(~state_is_idle)round_din_r<=round_dout;
else round_din_r<=round_din_r;
end
get_cki u_cki(
.round_cnt(exp_counter),
.cki(round_key_r)
);
sm4_key_round u_key_round(
.round_din(round_din_r),
.round_ckey(round_key_r),
.round_dout(round_dout)
);
ram_ikey #(
.DP(32),
.AW(5),
.DW(32)
) u_ram_ikey(
.clk(clk),
.din(round_dout[31:0]),
.addr(ram_ikey_addr),
.wea(ram_ikey_wea),
.dout(ikey)
);
endmodule
module sm4_encdec_round(
input [127:0] round_din,
input [31:0] round_key,
output [127:0] round_dout
);
wire [31:0] word_0,word_1,word_2,word_3;
wire [31:0] transform_din;
wire [31:0] transform_dout;
wire [7:0] sbox_bin0,sbox_bin1,sbox_bin2,sbox_bin3;
wire [7:0] sbox_bout0,sbox_bout1,sbox_bout2,sbox_bout3;
wire [31:0] sbox_wout={sbox_bout0,sbox_bout1,sbox_bout2,sbox_bout3};
assign {word_0,word_1,word_2,word_3} = round_din;
assign transform_din = word_1^word_2^word_3^round_key;
assign {sbox_bin0,sbox_bin1,sbox_bin2,sbox_bin3}=transform_din;
assign transform_dout = ((sbox_wout^{sbox_wout[29:0],sbox_wout[31:30]})^({sbox_wout[21:0],sbox_wout[31:22]}
^{sbox_wout[13:0],sbox_wout[31:14]}))^{sbox_wout[7:0],sbox_wout[31:8]};
assign round_dout = {word_1,word_2,word_3,transform_dout^word_0};
s_box sbox0(
.s_in(sbox_bin0),
.s_out(sbox_bout0)
);
s_box sbox1(
.s_in(sbox_bin1),
.s_out(sbox_bout1)
);
s_box sbox2(
.s_in(sbox_bin2),
.s_out(sbox_bout2)
);
s_box sbox3(
.s_in(sbox_bin3),
.s_out(sbox_bout3)
);
endmodule
module sm4_key_round(
input [127:0] round_din,
input [31:0] round_ckey,
output [127:0] round_dout
);
wire [31:0] word_0,word_1,word_2,word_3;
wire [31:0] transform_din;
wire [31:0] transform_dout;
wire [7:0] sbox_bin0,sbox_bin1,sbox_bin2,sbox_bin3;
wire [7:0] sbox_bout0,sbox_bout1,sbox_bout2,sbox_bout3;
wire [31:0] sbox_wout={sbox_bout0,sbox_bout1,sbox_bout2,sbox_bout3};
assign {word_0,word_1,word_2,word_3} = round_din;
assign transform_din = word_1^word_2^word_3^round_ckey;
assign {sbox_bin0,sbox_bin1,sbox_bin2,sbox_bin3}=transform_din;
assign transform_dout = (sbox_wout^{sbox_wout[18:0],sbox_wout[31:19]})^{sbox_wout[8:0],sbox_wout[31:9]};
assign round_dout = {word_1,word_2,word_3,transform_dout^word_0};
s_box sbox0(
.s_in(sbox_bin0),
.s_out(sbox_bout0)
);
s_box sbox1(
.s_in(sbox_bin1),
.s_out(sbox_bout1)
);
s_box sbox2(
.s_in(sbox_bin2),
.s_out(sbox_bout2)
);
s_box sbox3(
.s_in(sbox_bin3),
.s_out(sbox_bout3)
);
endmodule
module get_cki(
input [4:0] round_cnt,
output reg [31:0] cki
);
always@(*)
case(round_cnt)
5'h00: cki <= 32'h00070e15;
5'h01: cki <= 32'h1c232a31;
5'h02: cki <= 32'h383f464d;
5'h03: cki <= 32'h545b6269;
5'h04: cki <= 32'h70777e85;
5'h05: cki <= 32'h8c939aa1;
5'h06: cki <= 32'ha8afb6bd;
5'h07: cki <= 32'hc4cbd2d9;
5'h08: cki <= 32'he0e7eef5;
5'h09: cki <= 32'hfc030a11;
5'h0a: cki <= 32'h181f262d;
5'h0b: cki <= 32'h343b4249;
5'h0c: cki <= 32'h50575e65;
5'h0d: cki <= 32'h6c737a81;
5'h0e: cki <= 32'h888f969d;
5'h0f: cki <= 32'ha4abb2b9;
5'h10: cki <= 32'hc0c7ced5;
5'h11: cki <= 32'hdce3eaf1;
5'h12: cki <= 32'hf8ff060d;
5'h13: cki <= 32'h141b2229;
5'h14: cki <= 32'h30373e45;
5'h15: cki <= 32'h4c535a61;
5'h16: cki <= 32'h686f767d;
5'h17: cki <= 32'h848b9299;
5'h18: cki <= 32'ha0a7aeb5;
5'h19: cki <= 32'hbcc3cad1;
5'h1a: cki <= 32'hd8dfe6ed;
5'h1b: cki <= 32'hf4fb0209;
5'h1c: cki <= 32'h10171e25;
5'h1d: cki <= 32'h2c333a41;
5'h1e: cki <= 32'h484f565d;
5'h1f: cki <= 32'h646b7279;
default: cki <= 32'h0;
endcase
endmodule
module s_box(
input [7:0] s_in,
output reg [7:0] s_out
);
always@(*)
case(s_in)
8'h00: s_out <= 8'hd6;
8'h01: s_out <= 8'h90;
8'h02: s_out <= 8'he9;
8'h03: s_out <= 8'hfe;
8'h04: s_out <= 8'hcc;
8'h05: s_out <= 8'he1;
8'h06: s_out <= 8'h3d;
8'h07: s_out <= 8'hb7;
8'h08: s_out <= 8'h16;
8'h09: s_out <= 8'hb6;
8'h0a: s_out <= 8'h14;
8'h0b: s_out <= 8'hc2;
8'h0c: s_out <= 8'h28;
8'h0d: s_out <= 8'hfb;
8'h0e: s_out <= 8'h2c;
8'h0f: s_out <= 8'h05;
8'h10: s_out <= 8'h2b;
8'h11: s_out <= 8'h67;
8'h12: s_out <= 8'h9a;
8'h13: s_out <= 8'h76;
8'h14: s_out <= 8'h2a;
8'h15: s_out <= 8'hbe;
8'h16: s_out <= 8'h04;
8'h17: s_out <= 8'hc3;
8'h18: s_out <= 8'haa;
8'h19: s_out <= 8'h44;
8'h1a: s_out <= 8'h13;
8'h1b: s_out <= 8'h26;
8'h1c: s_out <= 8'h49;
8'h1d: s_out <= 8'h86;
8'h1e: s_out <= 8'h06;
8'h1f: s_out <= 8'h99;
8'h20: s_out <= 8'h9c;
8'h21: s_out <= 8'h42;
8'h22: s_out <= 8'h50;
8'h23: s_out <= 8'hf4;
8'h24: s_out <= 8'h91;
8'h25: s_out <= 8'hef;
8'h26: s_out <= 8'h98;
8'h27: s_out <= 8'h7a;
8'h28: s_out <= 8'h33;
8'h29: s_out <= 8'h54;
8'h2a: s_out <= 8'h0b;
8'h2b: s_out <= 8'h43;
8'h2c: s_out <= 8'hed;
8'h2d: s_out <= 8'hcf;
8'h2e: s_out <= 8'hac;
8'h2f: s_out <= 8'h62;
8'h30: s_out <= 8'he4;
8'h31: s_out <= 8'hb3;
8'h32: s_out <= 8'h1c;
8'h33: s_out <= 8'ha9;
8'h34: s_out <= 8'hc9;
8'h35: s_out <= 8'h08;
8'h36: s_out <= 8'he8;
8'h37: s_out <= 8'h95;
8'h38: s_out <= 8'h80;
8'h39: s_out <= 8'hdf;
8'h3a: s_out <= 8'h94;
8'h3b: s_out <= 8'hfa;
8'h3c: s_out <= 8'h75;
8'h3d: s_out <= 8'h8f;
8'h3e: s_out <= 8'h3f;
8'h3f: s_out <= 8'ha6;
8'h40: s_out <= 8'h47;
8'h41: s_out <= 8'h07;
8'h42: s_out <= 8'ha7;
8'h43: s_out <= 8'hfc;
8'h44: s_out <= 8'hf3;
8'h45: s_out <= 8'h73;
8'h46: s_out <= 8'h17;
8'h47: s_out <= 8'hba;
8'h48: s_out <= 8'h83;
8'h49: s_out <= 8'h59;
8'h4a: s_out <= 8'h3c;
8'h4b: s_out <= 8'h19;
8'h4c: s_out <= 8'he6;
8'h4d: s_out <= 8'h85;
8'h4e: s_out <= 8'h4f;
8'h4f: s_out <= 8'ha8;
8'h50: s_out <= 8'h68;
8'h51: s_out <= 8'h6b;
8'h52: s_out <= 8'h81;
8'h53: s_out <= 8'hb2;
8'h54: s_out <= 8'h71;
8'h55: s_out <= 8'h64;
8'h56: s_out <= 8'hda;
8'h57: s_out <= 8'h8b;
8'h58: s_out <= 8'hf8;
8'h59: s_out <= 8'heb;
8'h5a: s_out <= 8'h0f;
8'h5b: s_out <= 8'h4b;
8'h5c: s_out <= 8'h70;
8'h5d: s_out <= 8'h56;
8'h5e: s_out <= 8'h9d;
8'h5f: s_out <= 8'h35;
8'h60: s_out <= 8'h1e;
8'h61: s_out <= 8'h24;
8'h62: s_out <= 8'h0e;
8'h63: s_out <= 8'h5e;
8'h64: s_out <= 8'h63;
8'h65: s_out <= 8'h58;
8'h66: s_out <= 8'hd1;
8'h67: s_out <= 8'ha2;
8'h68: s_out <= 8'h25;
8'h69: s_out <= 8'h22;
8'h6a: s_out <= 8'h7c;
8'h6b: s_out <= 8'h3b;
8'h6c: s_out <= 8'h01;
8'h6d: s_out <= 8'h21;
8'h6e: s_out <= 8'h78;
8'h6f: s_out <= 8'h87;
8'h70: s_out <= 8'hd4;
8'h71: s_out <= 8'h00;
8'h72: s_out <= 8'h46;
8'h73: s_out <= 8'h57;
8'h74: s_out <= 8'h9f;
8'h75: s_out <= 8'hd3;
8'h76: s_out <= 8'h27;
8'h77: s_out <= 8'h52;
8'h78: s_out <= 8'h4c;
8'h79: s_out <= 8'h36;
8'h7a: s_out <= 8'h02;
8'h7b: s_out <= 8'he7;
8'h7c: s_out <= 8'ha0;
8'h7d: s_out <= 8'hc4;
8'h7e: s_out <= 8'hc8;
8'h7f: s_out <= 8'h9e;
8'h80: s_out <= 8'hea;
8'h81: s_out <= 8'hbf;
8'h82: s_out <= 8'h8a;
8'h83: s_out <= 8'hd2;
8'h84: s_out <= 8'h40;
8'h85: s_out <= 8'hc7;
8'h86: s_out <= 8'h38;
8'h87: s_out <= 8'hb5;
8'h88: s_out <= 8'ha3;
8'h89: s_out <= 8'hf7;
8'h8a: s_out <= 8'hf2;
8'h8b: s_out <= 8'hce;
8'h8c: s_out <= 8'hf9;
8'h8d: s_out <= 8'h61;
8'h8e: s_out <= 8'h15;
8'h8f: s_out <= 8'ha1;
8'h90: s_out <= 8'he0;
8'h91: s_out <= 8'hae;
8'h92: s_out <= 8'h5d;
8'h93: s_out <= 8'ha4;
8'h94: s_out <= 8'h9b;
8'h95: s_out <= 8'h34;
8'h96: s_out <= 8'h1a;
8'h97: s_out <= 8'h55;
8'h98: s_out <= 8'had;
8'h99: s_out <= 8'h93;
8'h9a: s_out <= 8'h32;
8'h9b: s_out <= 8'h30;
8'h9c: s_out <= 8'hf5;
8'h9d: s_out <= 8'h8c;
8'h9e: s_out <= 8'hb1;
8'h9f: s_out <= 8'he3;
8'ha0: s_out <= 8'h1d;
8'ha1: s_out <= 8'hf6;
8'ha2: s_out <= 8'he2;
8'ha3: s_out <= 8'h2e;
8'ha4: s_out <= 8'h82;
8'ha5: s_out <= 8'h66;
8'ha6: s_out <= 8'hca;
8'ha7: s_out <= 8'h60;
8'ha8: s_out <= 8'hc0;
8'ha9: s_out <= 8'h29;
8'haa: s_out <= 8'h23;
8'hab: s_out <= 8'hab;
8'hac: s_out <= 8'h0d;
8'had: s_out <= 8'h53;
8'hae: s_out <= 8'h4e;
8'haf: s_out <= 8'h6f;
8'hb0: s_out <= 8'hd5;
8'hb1: s_out <= 8'hdb;
8'hb2: s_out <= 8'h37;
8'hb3: s_out <= 8'h45;
8'hb4: s_out <= 8'hde;
8'hb5: s_out <= 8'hfd;
8'hb6: s_out <= 8'h8e;
8'hb7: s_out <= 8'h2f;
8'hb8: s_out <= 8'h03;
8'hb9: s_out <= 8'hff;
8'hba: s_out <= 8'h6a;
8'hbb: s_out <= 8'h72;
8'hbc: s_out <= 8'h6d;
8'hbd: s_out <= 8'h6c;
8'hbe: s_out <= 8'h5b;
8'hbf: s_out <= 8'h51;
8'hc0: s_out <= 8'h8d;
8'hc1: s_out <= 8'h1b;
8'hc2: s_out <= 8'haf;
8'hc3: s_out <= 8'h92;
8'hc4: s_out <= 8'hbb;
8'hc5: s_out <= 8'hdd;
8'hc6: s_out <= 8'hbc;
8'hc7: s_out <= 8'h7f;
8'hc8: s_out <= 8'h11;
8'hc9: s_out <= 8'hd9;
8'hca: s_out <= 8'h5c;
8'hcb: s_out <= 8'h41;
8'hcc: s_out <= 8'h1f;
8'hcd: s_out <= 8'h10;
8'hce: s_out <= 8'h5a;
8'hcf: s_out <= 8'hd8;
8'hd0: s_out <= 8'h0a;
8'hd1: s_out <= 8'hc1;
8'hd2: s_out <= 8'h31;
8'hd3: s_out <= 8'h88;
8'hd4: s_out <= 8'ha5;
8'hd5: s_out <= 8'hcd;
8'hd6: s_out <= 8'h7b;
8'hd7: s_out <= 8'hbd;
8'hd8: s_out <= 8'h2d;
8'hd9: s_out <= 8'h74;
8'hda: s_out <= 8'hd0;
8'hdb: s_out <= 8'h12;
8'hdc: s_out <= 8'hb8;
8'hdd: s_out <= 8'he5;
8'hde: s_out <= 8'hb4;
8'hdf: s_out <= 8'hb0;
8'he0: s_out <= 8'h89;
8'he1: s_out <= 8'h69;
8'he2: s_out <= 8'h97;
8'he3: s_out <= 8'h4a;
8'he4: s_out <= 8'h0c;
8'he5: s_out <= 8'h96;
8'he6: s_out <= 8'h77;
8'he7: s_out <= 8'h7e;
8'he8: s_out <= 8'h65;
8'he9: s_out <= 8'hb9;
8'hea: s_out <= 8'hf1;
8'heb: s_out <= 8'h09;
8'hec: s_out <= 8'hc5;
8'hed: s_out <= 8'h6e;
8'hee: s_out <= 8'hc6;
8'hef: s_out <= 8'h84;
8'hf0: s_out <= 8'h18;
8'hf1: s_out <= 8'hf0;
8'hf2: s_out <= 8'h7d;
8'hf3: s_out <= 8'hec;
8'hf4: s_out <= 8'h3a;
8'hf5: s_out <= 8'hdc;
8'hf6: s_out <= 8'h4d;
8'hf7: s_out <= 8'h20;
8'hf8: s_out <= 8'h79;
8'hf9: s_out <= 8'hee;
8'hfa: s_out <= 8'h5f;
8'hfb: s_out <= 8'h3e;
8'hfc: s_out <= 8'hd7;
8'hfd: s_out <= 8'hcb;
8'hfe: s_out <= 8'h39;
8'hff: s_out <= 8'h48;
default: s_out <= 8'h00;
endcase
endmodule
module ram_ikey #(
parameter DP = 32,
parameter AW = 8,
parameter DW = 32
)(
input clk,
input [DW-1:0] din,
input [AW-1:0] addr,
input wea,
output [DW-1:0] dout
);
reg [DW-1:0] mem_r [0:DP-1];
reg [AW-1:0] addr_r;
always @(posedge clk)
if(~wea)addr_r<=addr;
always @(posedge clk)
if(wea)
mem_r[addr] <= din;
assign dout=mem_r[addr_r];
endmodule
module tb;
reg clk,rst_n;
reg [127:0] sm4_din;
reg [1:0] cmd;
wire [31:0] sm4_dout;
wire res_vld;
initial begin
clk=0;rst_n=0;
sm4_din=128'h0123456789abcdeffedcba9876543210;
cmd=0;
#10 rst_n=1;
#10 cmd=1;
#10 cmd=0;
wait(res_vld);
#10 cmd=2;
#10 cmd=0;
wait(res_vld);
#75 sm4_din=128'h681edf34d206965e86b3e94f536e4246;
#10 cmd=3;
#10 cmd=0;
wait(res_vld);
end
always #5 clk=~clk;
sm4_top u_sm4_top(
.clk(clk),
.rst_n(rst_n),
.cmd(cmd), //00:pause 01:key_exp 10:encrypt 11:decrypt
.sm4_din(sm4_din),
.sm4_dout(sm4_dout),
.res_vld(res_vld)
);
endmodule