项目实训 --主观题阅卷评分网站(十)

项目进度:

对考生答案尝试分句分词处理,拼接成二维矩阵,准备对考生的答案做二维卷积处理,提取考生答案特征向量。
数据处理过程:

import pandas as pd
import jieba
import torch
import numpy as np

df = pd.read_csv('clean_input.csv', encoding='GBK').astype(str)
df.drop(['id'], axis=1, inplace=True)

x = df['answer'].values
y = df['1p'].values
ysk = []
for i in range(15000):
    for w in jieba.cut(x[i]):
        if isinstance(w, str):
            ysk.append(w)

yskl = set(ysk)
print(len(yskl))
print(yskl)
# yskl为总词典

vocab = set(yskl)

word_to_ix = {word: i for i, word in enumerate(vocab)}

print(word_to_ix)
embedding = torch.nn.Embedding(2982, 30)
hello_idx = torch.LongTensor([word_to_ix['李某']])
hello_embed = embedding(hello_idx)
# print(hello_embed)

res = []
for i in range(10):
    res.append(x[i].split(','))
# 分句
all = []
# print(res)
for i in range(10):
    print(i)
    ssml = []
    for j in range(len(res[i])):
        for w in jieba.cut(res[i][j]):
            k = 0
            sml = np.zeros((1, 30))
            if isinstance(w, str):
                hello_idx = torch.LongTensor([word_to_ix[w]])
                hello_embed = embedding(hello_idx)
                sml += (hello_embed.cpu().detach().numpy().tolist()[0])
                # print(sml)
                k += 1
            sml /= k
        ssml.append(sml.tolist())
    all.append(ssml)
print(all)

定义二维卷积层,池化层,全连接层

conv1 = torch.nn.Conv1d(in_channels=30, out_channels=10, kernel_size=3)
# input = torch.tensor(cs)
# for i in range(len(input)):
#     print(input[i])
#     print(len(input[i]))
#     for j in range(input[i]):
#         print(input[i][j])
input = torch.tensor(temp)
# batch_size x max_sent_len x embedding_size -> batch_size x embedding_size x max_sent_len
input = temp.permute(0, 2, 1)
# print("input:", input.size())
output = conv1(input)
print("output:", output.size())
# 最大池化
pool1d = torch.nn.MaxPool1d(kernel_size=6)
pool1d_value = pool1d(output)
print("最大池化输出:", pool1d_value.size())
# 全连接
fc = torch.nn.Linear(in_features=10, out_features=3)
fc_inp = pool1d_value.view(-1, pool1d_value.size(1))
print("全连接输入:", fc_inp.size())
fc_outp = fc(fc_inp)
print("全连接输出:", fc_outp.size())
# softmax
m = torch.nn.Softmax()
out = m(fc_outp)
print("输出结果值:", out)

输出结果示例在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值