import tensorflow_hub as hub
import tensorflow as tf
import re
import numpy as np
import pickle
import pandas as pd
from nltk import WordNeatLemmatizer,word_tokenize
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
stoplist = stopwords.words('english')
elmo = hub.Module("https://tfhub.dev/google/elmo/2", trainable=True)
# x = ["Roasted ants are a popular snack in Columbia"]
# embeddings = elmo(x, signature="default", as_dict=True)["elmo"]
from nltk import WordNetLemmatizer,word_tokenize
from keras import Model,Input,layers
from keras.utils.np_utils import to_categorical
pd.set_option('display.max_colwidth', -1)
data_train=pd.read_csv(r'/content/sample_data/train.tsv',sep='\t')
data_test=pd.read_csv(r'/content/sample_data/test.tsv',sep='\t')
sub=pd.read_csv(r'/content/sample_data/sampleSubmission.csv')
data_train_X = data_train.Phrase.values
X_test = list(data_test.Phrase.values)
data_train_Y = list(data_train.Sentiment.values)
lemmat = WordNetLemmatizer()
def clean(data):
data = [re.sub('[^a-zA-Z]',' ',word) for word in data] #去非英文字符
data_x = []
for i in (data):
data_word = word_tokenize(i) #分词
# data_word = [word for word in data_word if word not in stoplist ] #去停用词
data_word_result = []
for word in data_word:
if word in stoplist:
data_word.remove(word)
data_word_result.append(word)
elif data_word == []:
for j in data_word_result:
data_word.append(j)
data_word1= [lemmat.lemmatize(word.lower()) for word in data_word] #去时态语态
data_word1 = ' '.join(data_word1)
data_x.append(data_word1)
return data_x
def elmo_vector(x):
embeddings = elmo(x,signature='default',as_dict=True)["elmo"]
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(tf.tables_initializer())
return sess.run(tf.reduce_mean(embeddings,1))
data_train_X = clean(data_train_X)
data_train_Y = to_categorical(data_train_Y)
data_train_X_list = [data_train_X[i:i+250] for i in range(0,len(data_train_X),250)]
elmo_train = [elmo_vector(x) for x in data_train_X_list]
elmo_train_new = np.concatenate(elmo_train,axis=0)
pickle_out = open('elmo_train.pickle','wb')
pickle.dump(elmo_train_new,pickle_out)
pickle_out.close()
ELMO代码学习
最新推荐文章于 2022-01-08 00:10:12 发布