项目进度:
根据stacking集成学习算法的思想,划分数据集进行训练。
# coding: utf-8
import sys
sys.path.append('..') # 为了引入父目录的文件而进行的设定
from common.optimizer import SGD, Adam
from common.trainer import Trainer
from common.base_model import BaseModel
# from dataset import spiral
from two_layer_net import TwoLayerNet
import numpy as np
from readCsv import get_attention_vector_and_label
from yigexiangfa import softmax, softmax1, softmax2, getDim
import torch
# 设定超参数
max_epoch = 1
batch_size = 64
hidden_size = 1024 # 256 | 512 | 768 | 1024
learning_rate = 0.005
TRAINNUM = 10000
TESTNUM = 2000
nest_degree = 4 # 2 | 3 | 4 | 6
outputSize = 3
file_name_input_data = 'input1-1-mode.csv'
file_name_label = 'vec.csv'
FILE_NAME = 'two_layer_net.pkl'
referencce = '无权。根据公司法律制度的规定,名义股东处分股权造成实际出资人损失,实际出资人请求名义股东承担赔偿责任的,人民法院应予以支持'
inputDim = int(2 * getDim(referencce, nest_degree))
x0, t0 = get_attention_vector_and_label(TRAINNUM + TESTNUM, outputSize,
file_name_input_data, file_name_label, nest_degree)
x1 = x0[0:2000]
t1 = t0[0:2000]
x2 = x0[2000:4000]
t2 = t0[2000:4000]
x3 = x0[4000:6000]
t3 = t0[4000:6000]
x4 = x0[6000:8000]
t4 = t0[6000:8000]
x5 = x0[8000:10000]
t5 = t0[8000:10000]
e1 = np.vstack((x2, x3, x4, x5)) # 1
e2 = np.vstack((x1, x3, x4, x5)) # 2
e3 = np.vstack((x1, x2, x4, x5)) # 3
e4 = np.vstack((x1, x2, x3, x5)) # 4
e5 = np.vstack((x1, x2, x3, x4)) # 5
f1 = np.vstack((t2, t3, t4, t5)) # 1
f2 = np.vstack((t1, t3, t4, t5)) # 2
f3 = np.vstack((t1, t2, t4, t5)) # 3
f4 = np.vstack((t1, t2, t3, t5)) # 4
f5 = np.vstack((t1, t2, t3, t4)) # 5
mat = []
ceshi = np.zeros((2000, 3))
for i in range(5):
model = TwoLayerNet(input_size=inputDim, hidden_size=hidden_size,
output_size=outputSize, file_name=FILE_NAME)
optimizer = SGD(lr=learning_rate)
# optimizer = Adam(lr=learning_rate)
if (i == 0):
x = e1
t = f1
yuce = x1
elif (i == 1):
x = e2
t = f2
yuce = x2
elif (i == 2):
x = e3
t = f3
yuce = x3
elif (i == 3):
x = e4
t = f4
yuce = x4
else:
x = e5
t = f5
yuce = x5
trainer = Trainer(model, optimizer)
trainer.fit(x, t, max_epoch, batch_size, eval_interval=10)
# 保存必要数据,以便后续使用
# BaseModel.save_params(model, 'two_layer_net.pkl')
print('|| TEST || -------------------------------------')
text_data = x0[10000:12000]
text_result = model.predict(text_data).tolist()
ceshi = ceshi + text_result
text_label = t0[10000:12000].tolist()
text_data2 = yuce
text_result2 = model.predict(text_data2)
ACR2=0
for text_i in range(0, len(text_result2)):
maxindex = 0
maxvalue = text_result2[text_i][0]
for index, value in enumerate(text_result2[text_i]):
if value > maxvalue:
maxindex = index
maxvalue = value
if text_label[text_i][maxindex] == 1:
ACR2 += 1
print('absolute correct rate (ACR):')
print(str(100 * ACR2 / len(text_data2)) + '%')
#mat.append(text_result2)
#np.savetxt('mat{}.txt'.format(i+1), text_result2, delimiter=',')
# text_label = t0[10000:12500].tolist()
# for text_i in range(0, len(text_result)):
# print(text_result[text_i])
# print(text_label)
ACR = 0
for text_i in range(0, len(text_result)):
maxindex = 0
maxvalue = text_result[text_i][0]
for index, value in enumerate(text_result[text_i]):
if value > maxvalue:
maxindex = index
maxvalue = value
if text_label[text_i][maxindex] == 1:
ACR += 1
print('absolute correct rate (ACR):')
print(str(100 * ACR / len(text_data)) + '%')
gongju = np.zeros((2000, 3))
ceshi = ceshi / (gongju + 5)
print("ceshi:================")
print(ceshi)
#np.savetxt('matrix.txt', ceshi, delimiter=',')
print("mat:==================")
print(mat)
BCR = 0
for text_i in range(0, len(text_result)):
maxindex = 0
maxvalue = text_result[text_i][0]
for index, value in enumerate(text_result[text_i]):
if value > maxvalue:
maxindex = index
maxvalue = value
if text_label[text_i][maxindex] == 1:
BCR += 1
elif text_label[text_i][0] != 1 and maxindex != 0:
BCR += 1
print('basic correct rate (BCR):')
print(str(100*BCR/len(text_data))+'%')