1、代码模块书写规范
1.1、导入包
导入包需要注意分段
import os
os.environ['MKL_NUM_THREADS'] = '1'
from functools import partial
import random
import wandb
import sys
import collections
# Local imports
from data_loaders.data_manager import DataManager
from utils.utils import *
注:os.environ[‘MKL_NUM_THREADS’] = ‘1’
pytorch以及tensorflow的多线程输入设定过大,一般推荐较大数据流4线程,较小2线程。具体问题具体分析,要看数据输入是否是训练速度优化的瓶颈。
numpy或者opencv等的多线程操作或者tensorflow以及pytorch在cpu运行上的op。这些模块使用OMP或者MKL进行多线程加速,一般默认为cpu线程总数的一半,十分浪费计算力,推荐使用4线程,详见下表。
1.2、简化config
通过合并多个config,使用startswith识别可以减少config个数。
config['MODEL_NAME'].lower().startswith('stare'):
1.3、@staticmethod
常用在数据读取部分,可以省略调用前初始化类的过程。
class DataManager(object):
""" Give me your args I'll give you a path to load the dataset with my superawesome AI """
@staticmethod
def load(config: Union[dict, FancyDict]) -> Callable:
1.4、函数声明规范
函数声明部分一般需要包含Decisions函数简介、:param *:参数介绍、:return:返回数据介绍。其中参数介绍与返回数据介绍部分均需说明数据类型。
@staticmethod
def get_alternative_graph_repr(raw: Union[List[List[int]], np.ndarray], config: dict) \
-> Dict[str, np.ndarray]:
"""
Decisions:
Quals are represented differently here, i.e., more as a coo matrix
s1 p1 o1 qr1 qe1 qr2 qe2 [edge index column 0]
s2 p2 o2 qr3 qe3 [edge index column 1]
edge index:
[ [s1, s2],
[o1, o2] ]
edge type:
[ p1, p2 ]
quals will looks like
[ [qr1, qr2, qr3],
[qe1, qr2, qe3],
[0 , 0 , 1 ] <- obtained from the edge index columns
:param raw: [[s, p, o, qr1, qe1, qr2, qe3...], ..., [...]]
(already have a max qualifier length padded data)
:param config: the config dict
:return: output dict
"""
1.5、代码模块声明
在代码中常需要在下一个新的模块前声明新模块功能。
"""
Make the model.
"""
1.6、传入参数设置
import argparse
def parse_config():
parser = argparse.ArgumentParser()
parser.add_argument('--max_len', type=int, default=128)
parser.add_argument('--ckpt_path', type=str)
parser.add_argument('--test_data',type=str)
parser.add_argument('--out_path',type=str)
parser.add_argument('--gpu_id',type=int, default=0)
return parser.parse_args()
if __name__ == '__main__':
args = parse_config()
ckpt_path = args.ckpt_path
test_data = args.test_data
out_path = args.out_path
gpu_id = args.gpu_id
2、pytorch常用函数熟悉规范
2.1、声明优化器
if config['OPTIMIZER'] == 'sgd':
optimizer = torch.optim.SGD(model.parameters(), lr=config['LEARNING_RATE'])
elif config['OPTIMIZER'] == 'adam':
optimizer = torch.optim.Adam(model.parameters(), lr=config['LEARNING_RATE'])
1.6、train规范
训练部分规范,使用tqdm显示训练进度,使用torch.nn.utils.clip_grad_norm_预防梯度爆照
from tqdm.autonotebook import tqdm
def training_loop_gcn(epochs: int):
train_loss = []
train_acc = []
for e in range(epochs):
per_epoch_loss = []
per_epoch_tr_acc = []
with Timer() as timer:
trn_dl = data_fn(data['train'])
train_fn.train()
for batch in tqdm(trn_dl, desc='Training'):
opt.zero_grad()
triples, labels = batch
sub, rel = triples[:, 0], triples[:, 1]
if qualifier_aware:
quals = triples[:, 2:]
_quals = torch.tensor(quals, dtype=torch.long, device=device)
_sub = torch.tensor(sub, dtype=torch.long, device=device)
_rel = torch.tensor(rel, dtype=torch.long, device=device)
_labels = torch.tensor(labels, dtype=torch.float, device=device)
pred = train_fn(_sub, _rel, _quals)
loss = train_fn.loss(pred, _labels)
per_epoch_loss.append(loss.item())
loss.backward()
if grad_clipping:
torch.nn.utils.clip_grad_norm_(train_fn.parameters(), 1.0)
opt.step()
print(f"[Epoch: {e} ] Loss: {np.mean(per_epoch_loss)}")
train_loss.append(np.mean(per_epoch_loss))
train_loss.append(np.mean(per_epoch_loss))
if e % eval_every == 0 and e >= 1:
with torch.no_grad():
summary_val = val_testbench()
per_epoch_vl_acc = summary_val['metrics']['hits_at 1']
2.2、model书写规范
2.2.1、model包装规范
在model部分需要注意做好包装,做好类的继承。
class StarE_Transformer(StarEEncoder):
def __init__(self, kg_graph_repr: Dict[str, np.ndarray], config: dict, id2e: tuple = None):
if id2e is not None:
super(self.__class__, self).__init__(kg_graph_repr, config, id2e[1])
else:
super(self.__class__, self).__init__(kg_graph_repr, config)
self.model_name = 'StarE_Transformer_Statement'
self.hid_drop2 = config['STAREARGS']['HID_DROP2']
2.2.2、model常用函数
2.2.2.1、torch.view()
对torch进行重新整型。
rel_embed = rel_embed.view(-1, 1, self.emb_dim)
2.2.2.2、torch.transpose()
交换一个tensor的两个维度。
注:transpose()一次只能在两个维度间进行转置。
#先转置0维和1维,之后在第2,3维间转置,之后在第1,3间转置
y=x.transpose(0,1).transpose(3,2).transpose(1,3)
2.2.2.3、torch.reshape()
变换张量tensor的形状,注意两个数据类型都是张量。
注:reshape是按照行来进行reshape(变形)的。
c=torch.randn((2,5))
# tensor([[ 1.0559, -0.3533, 0.5194, 0.9526, -0.2483],
# [-0.1293, 0.4809, -0.5268, -0.3673, 0.0666]])
d=torch.reshape(c,(5,2))
# tensor([[ 1.0559, -0.3533],
# [ 0.5194, 0.9526],
# [-0.2483, -0.1293],
# [ 0.4809, -0.5268],
# [-0.3673, 0.0666]])
2.2.2.4、torch.mean(x,dim=0,keepdim=True)
在指定纬度进行求均值。
x=torch.arange(15).view(5,3)
# 0 1 2
# 3 4 5
# 6 7 8
# 9 10 11
# 12 13 14
x_mean=torch.mean(x,dim=0,keepdim=True)
# 6 7 8
x_mean0=torch.mean(x,dim=1,keepdim=True)
# 1
# 4
# 7
# 10
# 13
2.2.2.5、torch.min(x,dim=0,keepdim=True)
在指定纬度进行取最小值。
2.2.2.6、torch.mm与torch.mul
torch.mul(a, b)是矩阵a和b对应位相乘,a和b的维度必须相等,比如a的维度是(1, 2),b的维度是(1, 2),返回的仍是(1, 2)的矩阵;
torch.mm(a, b)是矩阵a和b矩阵相乘,比如a的维度是(1, 2),b的维度是(2, 3),返回的就是(1, 3)的矩阵。
2.2.2.7、torch.sigmoid(x)与torch.nn.Sigmoid(x)
torch.sigmoid(x)是一个函数,torch.nn.Sigmoid(x)是一个类,需要在init进行声明。
2.2.2.8、torch.nn.Linear(self.emb_dim, self.emb_dim)
声明一个线性变换层
self.fc = torch.nn.Linear(self.emb_dim, self.emb_dim)
2.2.2.9、torch.nn.Dropout(self.hid_drop)
声明一个dropout
self.hidden_drop = torch.nn.Dropout(self.hid_drop)
2.2.2.10、nn.BatchNorm1d(num_features)
对小批量(mini-batch)的2d或3d输入进行批标准化(Batch Normalization)操作
来自期望输入的特征数,该期望输入的大小为’batch_size x num_features [x width]’
nn.BatchNorm1d(num_features)
对小批量(mini-batch)3d数据组成的4d输入进行批标准化(Batch Normalization)操作
来自期望输入的特征数,该期望输入的大小为’batch_size x num_features x height x width’
nn.BatchNorm2d(num_features)
对小批量(mini-batch)4d数据组成的5d输入进行批标准化(Batch Normalization)操作
来自期望输入的特征数,该期望输入的大小为’batch_size x num_features depth x height x width’
nn.BatchNorm3d(num_features)
2.2.2.11、激活层
import torch.nn.functional as F
x = F.relu(x)
3、bert(torch)
from google_bert import BasicTokenizer
def extract_parameters(ckpt_path):
model_ckpt = torch.load(ckpt_path)
bert_args = model_ckpt['bert_args']
model_args = model_ckpt['args']
bert_vocab = model_ckpt['bert_vocab']
model_parameters = model_ckpt['model']
tree_args = model_ckpt['tree_args']
tree_vocab = model_ckpt['tree_vocab']
return bert_args, model_args, bert_vocab, model_parameters, tree_args, tree_vocab
def init_empty_bert_model(bert_args, bert_vocab, gpu_id, approx = 'none'):
bert_model = BERTLM(gpu_id, bert_vocab, bert_args.embed_dim, bert_args.ff_embed_dim, bert_args.num_heads, \
bert_args.dropout, bert_args.layers, approx)
return bert_model
def init_empty_tree_model(t_args, tree_vocab, gpuid):
tree_model = TreeLSTM(tree_vocab.size(), t_args.input_dim, t_args.mem_dim, t_args.hidden_dim, t_args.num_classes, t_args.freeze_embed)
tree_model = tree_model.cuda(gpuid)
return tree_model
def init_sequence_classification_model(empty_bert_model, args, bert_args, gpu_id, bert_vocab, model_parameters, empty_tree_model, tree_args):
number_class = args.number_class
number_category = 3
embedding_size = bert_args.embed_dim
batch_size = args.batch_size
dropout = args.dropout
tree_hidden_dim = tree_args.hidden_dim
device = gpu_id
vocab = bert_vocab
seq_tagging_model = myModel(empty_bert_model, number_class, number_category, embedding_size, batch_size, dropout, device, vocab, empty_tree_model, tree_hidden_dim)
seq_tagging_model.load_state_dict(model_parameters)
return seq_tagging_model
bert_args, model_args, bert_vocab, model_parameters, tree_args, tree_vocab = extract_parameters(ckpt_path)
empty_bert_model = init_empty_bert_model(bert_args, bert_vocab, gpu_id, approx='none')
empty_tree_model = init_empty_tree_model(tree_args, tree_vocab, gpu_id)
seq_classification_model = init_sequence_classification_model(empty_bert_model, model_args, bert_args, gpu_id, bert_vocab, model_parameters, empty_tree_model, tree_args)
seq_classification_model.cuda(gpu_id)
tokenizer = BasicTokenizer()