# encoding: utf-8
from gpt_model import GPTConfig, GPTModel
import numpy as np
import sys
import torch
from data_set import load_tokenizer
device = 'cuda' if torch.cuda.is_available() else 'cpu'
learning_rate = 1e-3
max_iters = 500
# 使用numpy的memmap读取数据
train_data = np.memmap('train.dat', dtype=np.int32, mode='r')
test_data = np.memmap('test.dat', dtype=np.int32, mode='r')
def get_batch(split, config):
data = train_data if split == "train" else test_data
ix = torch.randint(0, len(data) - config.seq_len, (config.batch_size,))
x = torch.stack([torch.from_numpy(data[i:i + config.seq_len].astype(np.int32)) for i in ix])
y =
train增加loss图表
最新推荐文章于 2024-10-07 06:31:57 发布