经过了上文的互注意力机制,本文进行完整的第一种实现方法的展示
首先展示互注意力机制的实现。
def create_vector_k(index):
saved_vector = load_data('./vec.csv')
ref_vector = load_data('./ref.csv')
vectorA_list = ref_vector[0]
vectorX_list = saved_vector[index]
A = np.array(vectorA_list)
X = np.array(vectorX_list)
mat_M = np.dot(A, X.T)
α = maxminnorm(mat_M)
β = maxminnorm(mat_M.T).T
# 期望
total = β[0].copy() # 需要使用.copy(),否则会改变β
for item in β:
total += item
# print(item)
total -= β[0].copy()
β_avg = total / mat_M.shape[0]
# 注意力向量
αA_X = np.dot(α, β_avg.T)
Ax = A.copy()
for i in range(0, A.shape[0]):
Ax[i] = αA_X[i] * Ax[i].copy()
vk = Ax.flatten()
return vk
其中使用的归一化方法是进行了最大最小极值归一化方法,具体实现如下:
def maxminnorm(array):
if array.shape[1] == 1:
array[:, 0] = 0.5
return array
maxcols = array.max(axis=0)
mincols = array.min(axis=0)
data_shape = array.shape
data_rows = data_shape[0]
data_cols = data_shape[1]
t = np.empty((data_rows, data_cols))
for i in range(data_cols):
t[:, i] = ((array[:, i] - mincols[i]) + 0.01) / ((maxcols[i] - mincols[i]) + 0.01*5)
return t
其中我们的输入考生答案的长短是不一致的,因此存在了归一化函数种分母是零的情况,这种情况的处理方法是将其全部转化成0.5这种形式,存在了一定的误差,但是操作简便,有较好的语义性理解。
最后,将数据进行转化,对模型进行参数的训练:
# coding: utf-8
import sys
sys.path.append('..') # 为了引入父目录的文件而进行的设定
from common.optimizer import SGD,Adam
from common.trainer import Trainer
from common.base_model import BaseModel
# from dataset import spiral
from two_layer_net import TwoLayerNet
import numpy as np
from readCsv import get_attention_vector_and_label
from yigexiangfa import softmax, softmax1, softmax2, getDim
# 设定超参数
max_epoch = 30
batch_size = 64
hidden_size = 1024 # 256 | 512 | 768 | 1024
learning_rate = 0.005
TRAINNUM = 768
TESTNUM = 32
nest_degree = 4 # 2 | 3 | 4 | 6
outputSize = 3
file_name_input_data = 'input1-1-mode.csv'
file_name_label = 'label.csv'
FILE_NAME = 'two_layer_net.pkl'
referencce = '无权。根据公司法律制度的规定,名义股东处分股权造成实际出资人损失,实际出资人请求名义股东承担赔偿责任的,人民法院应予以支持'
inputDim = int(2*getDim(referencce, nest_degree))
x0, t0 = get_attention_vector_and_label(TRAINNUM + TESTNUM, outputSize,
file_name_input_data, file_name_label, nest_degree)
x = x0[0:TRAINNUM]
t = t0[0:TRAINNUM]
model = TwoLayerNet(input_size=inputDim, hidden_size=hidden_size,
output_size=outputSize, file_name=FILE_NAME)
optimizer = SGD(lr=learning_rate)
# optimizer = Adam(lr=learning_rate)
trainer = Trainer(model, optimizer)
trainer.fit(x, t, max_epoch, batch_size, eval_interval=10)
# 保存必要数据,以便后续使用
BaseModel.save_params(model, 'two_layer_net.pkl')
print('|| TEST || -------------------------------------')
text_data = x0[TRAINNUM:TESTNUM+TRAINNUM]
text_result = model.predict(text_data).tolist()
text_label = t0[TRAINNUM:TESTNUM+TRAINNUM].tolist()
print(text_result)
print(text_label)
ACR = 0
for text_i in range(0, len(text_result)):
maxindex = 0
maxvalue = text_result[text_i][0]
for index, value in enumerate(text_result[text_i]):
if value > maxvalue:
maxindex = index
maxvalue = value
if text_label[text_i][maxindex] == 1:
ACR += 1
print('absolute correct rate (ACR):')
print(str(100*ACR/len(text_data))+'%')
BCR = 0
for text_i in range(0, len(text_result)):
maxindex = 0
maxvalue = text_result[text_i][0]
for index, value in enumerate(text_result[text_i]):
if value > maxvalue:
maxindex = index
maxvalue = value
if text_label[text_i][maxindex] == 1:
BCR += 1
elif text_label[text_i][0] != 1 and maxindex != 0:
BCR += 1
print('basic correct rate (BCR):')
print(str(100*BCR/len(text_data))+'%')
至此,第一种的实现全部完成,在完成了全部实现后将对三种实现方式的结果进行比对。