2021SC@SDUSC
在readme.md文档中有一个步骤要执行
python divide_inhouse_data.py
但是找遍项目目录结构,没有这个文件,只好略过。
按照readme.md文档,此时应该执行main.py,涉及两个文件main.py和train.py。train.py文件里面创建了一个重要的图对象GraphC_QAModel,对象中提供了几个方法。
下面是做初始化。
class GraphC_QAModel(object):
def __init__(self, args, local_rank):
super(GraphC_QAModel, self).__init__()
self.save_args = args
args['cnn_filters'] = list(zip(args['cnn_filters'][:-1:2], args['cnn_filters'][1::2]))
args = collections.namedtuple("HParams", sorted(args.keys()))(**args)
if not os.path.exists(args.ckpt):
os.mkdir(args.ckpt)
self.args = args
self.local_rank = local_rank
这个函数根据AMR模型来建图,这里首选GPU,GPU可以并行训练多个batch 数据,提高性能。
def _build_model(self):
print(self.args, 'final')
self.device = torch.device("cuda:"+str(self.args.gpus[0]) if torch.cuda.is_available() else "cpu")
print(self.device, 'here')
vocabs, lexical_mappings = [], []
config_class, model_class, tokenizer_class = MODEL_CLASSES[self.args.encoder_type]
self.bert_config = config_class.from_pretrained(
self.args.lm_model,
)
self.bert_tokenizer = tokenizer_class.from_pretrained(
self.args.lm_model
)
if self.args.bert_pretrained_file == None:
self.bert_model = model_class.from_pretrained(
self.args.lm_model,
config=self.args.lm_model
).to(self.device)
else:
self.bert_model = model_class.from_pretrained(
self.args.bert_pretrained_file,
).to(self.device)
print('bert_pretrained')
# self.device = torch.device('cuda', self.args.gpus[0])
if self.args.encoder_type in ['ACB_dual']:
vocabs, lexical_mapping = self._prepare_data()
self.model = Reasoning_AMR_CN_DUAL(vocabs,
self.args.concept_char_dim, self.args.concept_dim,
self.args.cnn_filters, self.args.char2concept_dim,
self.args.rel_dim, self.args.rnn_hidden_size, self.args.rnn_num_layers,
self.args.embed_dim, self.args.bert_embed_dim, self.args.ff_embed_dim,
self.args.num_heads,
self.args.dropout,
self.args.snt_layer,
self.args.graph_layers,
self.args.pretrained_file, self.device, self.args.batch_size,
self.args.lm_model, self.bert_config, self.bert_model, self.bert_tokenizer, self.args.bert_max_length,
self.args.n_answers,
self.args.encoder_type,
self.args.max_conceptnet_length,
self.args.conceptnet_path,
)
else:
pass
self.model.to(self.device)
self.criterion = nn.CrossEntropyLoss()
return vocabs, lexical_mapping
定义函数average_gradients,计算平均梯度。
def _average_gradients(self, model):
size = float(dist.get_world_size())
for param in model.parameters():
if param.grad is not None:
dist.all_reduce(param.grad.data, op=dist.ReduceOp.SUM)
param.grad.data /= size
把图中每一个结点的数据分为5个维度:
def _prepare_data(self):
vocabs = dict()
vocabs['concept'] = Vocab(self.args.concept_vocab, 5, [CLS])
vocabs['token'] = Vocab(self.args.token_vocab, 5, [STR, END])
vocabs['token_char'] = Vocab(self.args.token_char_vocab, 100, [STR, END])
vocabs['concept_char'] = Vocab(self.args.concept_char_vocab, 100, [STR, END])
vocabs['relation'] = Vocab(self.args.relation_vocab, 5, [CLS, rCLS, SEL, TL])
lexical_mapping = LexicalMap()
定义train函数,训练模块,针对训练、开发、测试三种类型的实验,参数不同,调用同一个数据加载程序。
def train(self):
from datetime import datetime
current_time = datetime.now().strftime('%b%d_%H-%M-%S')
task = self.args.task
tb_writer = SummaryWriter(log_dir='./runs/'+task+"/"+current_time+self.args.prefix, comment=self.args.prefix)
vocabs, lexical_mapping = self._build_model()
train_data = DataLoader(self.args, vocabs, lexical_mapping, self.args.train_data, self.args.batch_size,
for_train=True)
dev_data = DataLoader(self.args, vocabs, lexical_mapping, self.args.dev_data, self.args.batch_size,
for_train=False)
test_data = DataLoader(self.args, vocabs, lexical_mapping, self.args.test_data, self.args.batch_size,
for_train='Eval')
下面要去分析数据加载的程序。