【无标题】

离开那天

已于 2024-08-08 14:44:10 修改

阅读量36

点赞数

文章标签： pytorch 深度学习人工智能

于 2024-08-08 09:33:19 首次发布

本文链接：https://blog.csdn.net/weixin_43982216/article/details/141017371

版权

model.embed_tokens.weight                             
: torch.Size([128256, 4096])

model.layers.0.input_layernorm.weight                
: torch.Size([4096])
model.layers.0.self_attn.q_proj.weight                
: torch.Size([4096, 4096])
model.layers.0.self_attn.k_proj.weight                
: torch.Size([1024, 4096])
model.layers.0.self_attn.v_proj.weight                
: torch.Size([1024, 4096])
model.layers.0.self_attn.o_proj.weight                
: torch.Size([4096, 4096])
model.layers.0.post_attention_layernorm.weight        
: torch.Size([4096])
model.layers.0.mlp.gate_proj.weight                   
: torch.Size([14336, 4096])
model.layers.0.mlp.up_proj.weight                     
: torch.Size([14336, 4096])
model.layers.0.mlp.down_proj.weight                   
: torch.Size([4096, 14336])



model.norm.weight                                     
: torch.Size([4096])
lm_head.weight                                        
: torch.Size([128256, 4096])

embedding.word_embeddings.weight                    
: torch.Size([128256, 4096])

decoder.layers.0.input_layernorm.weight             
: torch.Size([4096])
decoder.layers.0.self_attention.linear_qkv.weight   
: torch.Size([6144, 4096])
decoder.layers.0.self_attention.linear_proj.weight  
: torch.Size([4096, 4096])
decoder.layers.0.pre_mlp_layernorm.weight           
: torch.Size([4096])
decoder.layers.0.mlp.linear_fc1.weight              
: torch.Size([28672, 4096])
decoder.layers.0.mlp.linear_fc2.weight              
: torch.Size([4096, 14336])





decoder.final_layernorm.weight                      
: torch.Size([4096])
output_layer.weight                                 
: torch.Size([128256, 4096])

embedding.word_embeddings.weight               
: torch.Size([64128, 4096])

decoder.layers.0.self_attention.linear_qkv.weight 
: torch.Size([3072, 4096])
decoder.layers.0.self_attention.linear_proj.weight 
: torch.Size([4096, 2048])
decoder.layers.0.mlp.linear_fc1.weight         
: torch.Size([14336, 4096])
decoder.layers.0.mlp.linear_fc2.weight         
: torch.Size([4096, 7168])
decoder.layers.0.input_layernorm.weight        
: torch.Size([4096])
decoder.layers.0.pre_mlp_layernorm.weight      
: torch.Size([4096])
*16 layers