# encoding: utf-8
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
class GPTConfig:
vocab_size: int = 16000
seq_len: int = 128
d_model: int = 128 # 注意:原文本中'd_mode'可能是'd_model'的误写
n_layer: int = 4
n_head: int = 4 # 注意:原文本中缺少这个参数的名称,我假设它是'n_head'
bias: bool = True
dropout: float = 0.0
class SinusoidPE(nn.Module):
def __init__(self,config):
super().__init__()
self.config = config
pe = torch.zeros(config.seq_len, config.d_model)
pos = torch.arange(0, config.seq_len, dtype=torch.float).unsqueeze(1)
emb = torch.exp(torch.arange(0, config.d_model, 2).float() * (-mat
gpt_model
于 2024-09-03 23:12:14 首次发布