# encoding:utf-8
import numpy as np
wordsList = np.load('wordsList.npy')
print('载入word列表')
wordsList = wordsList.tolist()
wordsList = [word.decode('UTF-8')
for word in wordsList]
wordVectors = np.load('wordVectors.npy')
print('载入文本向量')
print(len(wordsList))
# print("wordsList", wordsList)
# 'muguti', 'boidin', 'madueke', 'smikle', 'uteritz', 'gusin', ...40万个
print(wordVectors.shape)
# print("wordVectors", wordVectors)
# wordVectors [[ 0. 0. 0. ... 0. 0. 0. ]
# [ 0.013441 0.23682 -0.16899 ... -0.56657 0.044691 0.30392 ]
# [ 0.15164 0.30177 -0.16763 ... -0.35652 0.016413 0.10216 ]
# ...
# [-0.51181 0.058706 1.0913 ... -0.25003 -1.125 1.5863 ]
# [-0.75898 -0.47426 0.4737 ... 0.78954 -0.014116 0.6448 ]
# [-0.79149 0.86617 0.11998 ... -0.29996 -0.0063003 0.3954 ]]
# 40万*50 维的向量
import os
from os.path import isfile, join
pos_files = ['pos/' + f for f in os.listdir(
'pos/') if isfile(join('pos/', f))]
neg_files = ['neg/' + f for f in os.listdir(
'neg/') if isfile(join('neg/', f))]
num_words = []
for pf in pos_files:
with open(pf, "r", encoding='utf-8') as f:
line = f.readline()
counter =
LSTM实战电影评价情感分析
最新推荐文章于 2023-05-02 15:35:15 发布
本文详细介绍了如何运用LSTM长短期记忆网络进行电影评论的情感分析。通过训练和验证模型,展示了LSTM在理解和捕捉文本序列情感特征方面的强大能力,为自然语言处理中的情感理解提供了一种有效方法。
摘要由CSDN通过智能技术生成