import torch
import torchtext
from torchtext import data
from torchtext import datasets
from torchtext.vocab import GloVe
import spacy
from spacy.lang.en import English
import random
import torch.nn as nn
import torch.nn.functional as F
SEED =1234
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic =True
print(f'Number of training examples: {len(train_data)}')print(f'Number of validation examples: {len(valid_data)}')print(f'Number of testing examples: {len(test_data)}')
Number of training examples: 17500
Number of validation examples: 7500
Number of testing examples: 25000
{'text': This movie has got to be one of the worst I have ever seen make it to DVD!!! The story line might have clicked if the film had more funding and writers that would have cut the nonsense and sickly scenes that I highly caution parents on.... But the story line is like a loose cannon. If there was such a thing as a drive thru movie maker-this one would have sprung from that.It reminded me a lot of the quickie films that were put out in the 1960's, poor script writing and filming. <br /><br />The only sensible characters in the whole movie was the bartender and beaver. The rest of the film, could have easily been made by middle school children. I give this film a rating of 1 as it is truly awful and left my entire family with a sense of being cheated. My advice-Don't Watch It!!!, 'label': 'neg'}
173
This
<class 'spacy.tokens.token.Token'>
# 从上面可以看出,使用English()这种方法切出来的每个单词并不是string类型,# 它的type是token,因此我们要把切出来的词的type都转化为strfor data in[train_data,valid_data,test_data]:for i inrange(len(data)):
a = data[i]
a.text =[str(j)for j in a.text]#建立词典
TEXT.build_vocab(train_data, max_size=25000,vectors='glove.6B.100d',unk_init=torch.Tensor.normal_)
LABEL.build_vocab(train_data)
defpredict_sentiment(sentence):
tokennized =[str(tok)for tok in TEXT.tokenize(sentence)]print(tokennized)
indexed = torch.LongTensor([TEXT.vocab.stoi[t]for t in tokennized]).to(device).unsqueeze(1)
pred = torch.sigmoid(best_model(indexed))return pred.item()
sentence =input('please input the sentence you want to predict(in English):')print('输入语句表达正向情感的概率为:{}'.format(predict_sentiment(sentence)))
please input the sentence you want to predict(in English): this is a good movie
['this', 'is', 'a', 'good', 'movie']
输入语句表达正向情感的概率为:1.0
sentence =input('please input the sentence you want to predict(in English):')print('输入语句表达正向情感的概率为:{}'.format(predict_sentiment(sentence)))
please input the sentence you want to predict(in English): the film is great while the stars are awful
['the', 'film', 'is', 'great', 'while', 'the', 'stars', 'are', 'awful']
输入语句表达正向情感的概率为:3.232804579589299e-10
sentence =input('please input the sentence you want to predict(in English):')print('输入语句表达正向情感的概率为:{}'.format(predict_sentiment(sentence)))
please input the sentence you want to predict(in English): the film is great and the stars are good
[' ', 'the', 'film', 'is', 'great', 'and', 'the', 'stars', 'are', 'good']
输入语句表达正向情感的概率为:1.0