Chapter 4 Exercise solutions
In [1]:
from importlib.metadata import version
import torch
print("torch version:", version("torch"))
torch version: 2.4.0Exercise 4.1: Parameters in the feed forward versus attention module
In [2]:
from gpt import TransformerBlock
GPT_CONFIG_124M = {
    "vocab_size": 50257,
    "context_length": 1024,
    "emb_dim": 768,
    "n_heads": 12,
    "n_layers": 12,
    "drop_rate": 0.1,
    "qkv_bias": False
}block = TransformerBlock(GPT_CONFIG_124M)

- The results above are for a single transformer block
- 可以选择乘以 12 以涵盖 1.24 亿参数的 GPT 模型中的所有变压器模块。
Exercise 4.2: Initialize larger GPT models
-  GPT2-small (the 124M configuration we already implemented): - "emb_dim" = 768
- "n_layers" = 12
- "n_heads" = 12
 
-  GPT2-medium: - "emb_dim" = 1024
- "n_layers" = 24
- "n_heads" = 16
 
-  GPT2-large: - "emb_dim" = 1280
- "n_layers" = 36
- "n_heads" = 20
 
-  GPT2-XL: - "emb_dim" = 1600
- "n_layers" = 48
- "n_heads" = 25
 
In [5]:
GPT_CONFIG_124M = {
    "vocab_size": 50257,
    "context_length": 1024,
    "emb_dim": 768,
    "n_heads": 12,
    "n_layers": 12,
    "drop_rate": 0.1,
    "qkv_bias": False
}
def get_config(base_config, model_name="gpt2-small"):
    GPT_CONFIG = base_config.copy()
    if model_name == "gpt2-small":
        GPT_CONFIG["emb_dim"] = 768
        GPT_CONFIG["n_layers"] = 12
        GPT_CONFIG["n_heads"] = 12
    elif model_name == "gpt2-medium":
        GPT_CONFIG["emb_dim"] = 1024
        GPT_CONFIG["n_layers"] = 24
        GPT_CONFIG["n_heads"] = 16
    elif model_name == "gpt2-large":
        GPT
                
 
                   
                   
                   
                   最低0.47元/天 解锁文章
最低0.47元/天 解锁文章
                           
                       
       
           
                 
                 
                 
                 
                 
                
               
                 
                 
                 
                 
                
               
                 
                 扫一扫
扫一扫
                     
              
             
                   5300
					5300
					
 被折叠的  条评论
		 为什么被折叠?
被折叠的  条评论
		 为什么被折叠?
		 
		  到【灌水乐园】发言
到【灌水乐园】发言                                
		 
		 
    
   
    
   
             
            


 
            