项目实训 --主观题阅卷评分网站(十一)

项目进度:

为了集成各个模型的训练效果,获得更高的正确率。我从集成学习的思路出发,开始设计集成算法。
首先设计了对弱分类器的集成,采用了一种投票的机制,在总数据集上进行训练,得到了11个弱分类器,保存模型,对同一个数据集进行预测。

# coding: utf-8
import sys

sys.path.append('..')  # 为了引入父目录的文件而进行的设定
from common.optimizer import SGD, Adam
from common.trainer import Trainer
from common.trainer import RnnlmTrainer
from common.base_model import BaseModel
# from dataset import spiral
from two_layer_net import TwoLayerNet
from common.time_layers import TimeLSTM
from common.time_layers import LSTM
from common.rnnlm import Rnnlm
import numpy as np
from readCsv import get_attention_vector_and_label
from yigexiangfa import softmax, softmax1, softmax2, getDim
from common.seq2seq import Decoder

# 设定超参数
max_epoch = 10
batch_size = 64
hidden_size = 1024  # 256 | 512 | 768 | 1024
learning_rate = 0.005
begin = 0
end = 1500
# TRAINNUM = 90
# TESTNUM = 10
# 5*100=500
nest_degree = 4  # 2 | 3 | 4 | 6
outputSize = 3

file_name_input_data = 'input1-1-mode.csv'
file_name_label = 'vec.csv'
FILE_NAME = 'two_layer_net.pkl'

referencce = '无权。根据公司法律制度的规定,名义股东处分股权造成实际出资人损失,实际出资人请求名义股东承担赔偿责任的,人民法院应予以支持'

inputDim = int(2 * getDim(referencce, nest_degree))

x0, t0 = get_attention_vector_and_label(15000, outputSize, file_name_input_data, file_name_label, nest_degree)

resu = np.zeros((500, 3))
for i in range(9):

    x = x0[14500:]
    t = t0[14500:]
    # begin += 1500
    # end += 1500

    model = TwoLayerNet(input_size=inputDim, hidden_size=hidden_size,
                        output_size=outputSize, file_name='two_layer_net{}.pkl'.format(i + 1))
    optimizer = SGD(lr=learning_rate)
    trainer = Trainer(model, optimizer)
    # optimizer = Adam(lr=learning_rate)

    # V, D, H = inputDim, outputSize, hidden_size
    #
    # model2 = Rnnlm()
    #
    #
    # trainer2 = RnnlmTrainer(model2, optimizer)
    #
    # trainer2.fit(x, t, max_epoch, batch_size, time_size=35,
    #          max_grad=None,eval_interval=10)

    trainer.fit(x, t, max_epoch, batch_size, eval_interval=10)

    # 保存必要数据,以便后续使用
    # BaseModel.save_params(model, 'two_layer_net{}.pkl'.format(i+1))

    print('||  TEST  || -------------------------------------')
    text_data = x
    text_result = model.predict(text_data).tolist()
    text_label = t

    print(text_result)
    print(text_label)

    for text_i in range(0, len(text_result)):
        maxindex = 0
        maxvalue = text_result[text_i][0]
        for index, value in enumerate(text_result[text_i]):
            if value > maxvalue:
                maxindex = index
                maxvalue = value
        resu[text_i][maxindex] += 1
for j in range(0, len(text_label)):
    print(text_label[j])
for text_i in range(0, len(text_result)):
    print(resu[text_i])

ACR = 0
for text_i in range(0, len(text_result)):
    maxindex = 0
    maxvalue = resu[text_i][0]
    for index, value in enumerate(resu[text_i]):
        if resu[text_i][0] != 0:
            maxindex = 0
            continue
        if resu[text_i][1] > 3:
            maxindex = 1
            continue
        elif value > maxvalue:
            maxindex = index
            maxvalue = value
    if text_label[text_i][maxindex] == 1:
        ACR += 1

print(str(100 * ACR / len(text_data)) + '%')

BCR = 0

for text_i in range(0, len(text_result)):
    maxindex = 0
    maxvalue = text_result[text_i][0]
    for index, value in enumerate(text_result[text_i]):
        if value > maxvalue:
            maxindex = index
            maxvalue = value
    if text_label[text_i][maxindex] == 1:
        BCR += 1
    elif text_label[text_i][0] == 1:
        BCR += 1
print('basic correct rate (BCR):')
print(str(100 * BCR / len(text_data)) + '%')

#
# GTR = 0
# for text_i in range(0, len(text_result)):
#     maxindex = 0
#     maxvalue = resu[text_i][0]
#     for index, value in enumerate(resu[text_i]):
#         if resu[text_i][1] > 3:
#             maxindex = 1
#             continue
#         elif value > maxvalue:
#             maxindex = index
#             maxvalue = value
#     if text_label[text_i][maxindex] == 1:
#         GTR += 1
#
# print(str(100 * GTR / len(text_data)) + '%')
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值