项目实训 --主观题阅卷评分网站（十）

最新推荐文章于 2024-09-15 22:31:42 发布

素衣白裳一隅安康

最新推荐文章于 2024-09-15 22:31:42 发布

阅读量139

点赞数

文章标签： python 卷积

本文链接：https://blog.csdn.net/yskysky/article/details/117793370

版权

项目进度：

对考生答案尝试分句分词处理，拼接成二维矩阵，准备对考生的答案做二维卷积处理，提取考生答案特征向量。
数据处理过程：

import pandas as pd
import jieba
import torch
import numpy as np

df = pd.read_csv('clean_input.csv', encoding='GBK').astype(str)
df.drop(['id'], axis=1, inplace=True)

x = df['answer'].values
y = df['1p'].values
ysk = []
for i in range(15000):
    for w in jieba.cut(x[i]):
        if isinstance(w, str):
            ysk.append(w)

yskl = set(ysk)
print(len(yskl))
print(yskl)
# yskl为总词典

vocab = set(yskl)

word_to_ix = {word: i for i, word in enumerate(vocab)}

print(word_to_ix)
embedding = torch.nn.Embedding(2982, 30)
hello_idx = torch.LongTensor([word_to_ix['李某']])
hello_embed = embedding(hello_idx)
# print(hello_embed)

res = []
for i in range(10):
    res.append(x[i].split('，'))
# 分句
all = []
# print(res)
for i in range(10):
    print(i)
    ssml = []
    for j in range(len(res[i])):
        for w in jieba.cut(res[i][j]):
            k = 0
            sml = np.zeros((1, 30))
            if isinstance(w, str):
                hello_idx = torch.LongTensor([word_to_ix[w]])
                hello_embed = embedding(hello_idx)
                sml += (hello_embed.cpu().detach().numpy().tolist()[0])
                # print(sml)
                k += 1
            sml /= k
        ssml.append(sml.tolist())
    all.append(ssml)
print(all)

定义二维卷积层，池化层，全连接层

conv1 = torch.nn.Conv1d(in_channels=30, out_channels=10, kernel_size=3)
# input = torch.tensor(cs)
# for i in range(len(input)):
#     print(input[i])
#     print(len(input[i]))
#     for j in range(input[i]):
#         print(input[i][j])
input = torch.tensor(temp)
# batch_size x max_sent_len x embedding_size -> batch_size x embedding_size x max_sent_len
input = temp.permute(0, 2, 1)
# print("input:", input.size())
output = conv1(input)
print("output:", output.size())
# 最大池化
pool1d = torch.nn.MaxPool1d(kernel_size=6)
pool1d_value = pool1d(output)
print("最大池化输出：", pool1d_value.size())
# 全连接
fc = torch.nn.Linear(in_features=10, out_features=3)
fc_inp = pool1d_value.view(-1, pool1d_value.size(1))
print("全连接输入：", fc_inp.size())
fc_outp = fc(fc_inp)
print("全连接输出：", fc_outp.size())
# softmax
m = torch.nn.Softmax()
out = m(fc_outp)
print("输出结果值：", out)