项目进度:
对考生答案矩阵和参考答案矩阵采用了互注意力机制,归一化处理,得到了考生答案的参考答案表示,,提升了语义抽取的能力。
完成过程:
使用BERT预训练模型对考生答案和参考答案进行向量化后,进一步筛选向量,选择具有代表性的向量进行训练。
归一化方法:
def maxminnorm(array):
# print(array.shape)
if array.shape[1] == 1:
array[:, 0] = 0.5
# print(array)
return array
maxcols = array.max(axis=0)
mincols = array.min(axis=0)
data_shape = array.shape
data_rows = data_shape[0]
data_cols = data_shape[1]
t = np.empty((data_rows, data_cols))
for i in range(data_cols):
t[:, i] = ((array[:, i] - mincols[i]) + 0.01) / ((maxcols[i] - mincols[i]) + 0.01*5)
# print(t)
return t
考生答案的参考答案表示过程:
def createVk(reference, answer, bc):
res = re.split("。|,", reference)
judge0 = res[0]
res = res[1:]
# answer = '不能。根据公司法律制度的规定,实际出资人要经过公司其他股东半数以上同意才能记载于股东名册,转变为公司的股东'
# answer = input("请输入考生答案:")
# ans = answer.split("。")
# print(ans)
print(answer)
ans = re.split("。|,", answer)
judge = ans[0]
# if len(ans) > 2:
# ans = ans[1:]
# print(judge)
# ans = ans[1:]
# print(res)
vectorA_list = []
for item in res:
t = bc.encode([item])
vectorA_list.append(t[0])
vectorX_list = []
for item in ans:
t = bc.encode([item])
vectorX_list.append(t[0])
A = np.array(vectorA_list)
X = np.array(vectorX_list)
mat_M = np.dot(A, X.T)
α = maxminnorm(mat_M)
β = maxminnorm(mat_M.T).T
# 期望
total = β[0].copy() # 需要使用.copy(),否则会改变β
for item in β:
total += item
# print(item)
total -= β[0].copy()
β_avg = total / mat_M.shape[0]
# print('β_avg:')
# print(β_avg)
# 注意力向量
αA_X = np.dot(α, β_avg.T)
# print('αA_X:')
# print(αA_X)
Ax = A.copy()
for i in range(0, A.shape[0]):
Ax[i] = αA_X[i] * Ax[i].copy()
# print('Ax:')
# print(Ax)
vk = Ax.flatten()
return vk