data.open('flare').read()
//移除换行
data=data.replace("\n",'').replace('\r','')
//字符去除
letters=list(set(data))
//长度
num_letters=len(letters)
#建立字典
int_to_char={a:b for a,b in enumerate(letters)}
char_to_int={b:a for b,a in enumerate(letters)}
time_step=20
import numpy as np
from keras.utils import to_categorical
//滑动窗口提取数据
def extract_data(data,slide):
x=[]
y=[]
for i in range(len(data)-slide):
x.append([a for a in data[i:i+slide]])
y.append(data[i+slide])
return x,y
def char_to_int_data(x,y,char_to_int):
x_to_int=[]
y_to_int=[]
for i in range(len(x)):
x_to_int.append([char_to_int[char] for char in x[i]])
y_to_int.append([char_to_int[char] for char in y[i]])
return x_to_int,y_to_int
def data_preprocessing(data,slide,num_letters,char_to_int):
char_data=extract_data(data,slide)
in_data=char_to_int_data(char_data[0],char_data[1],cahr_to_int)
Input=int_data[0]
Output=list(np,array(int_data[1])).flatten()
Input_reshapeo=np.array(Input).reshape(len(Input),slide)
new=np.random.randint(0,10,szie=[Input_reshapeo[0],Input_reshapeo[1],num_letters])
for i in range(Input_reshapeo,shape[0]):
for j in range(Input_reshapeo[1]):
new [i,j,:]=to_categorical(Input_reshapeo[i,j],num_classes=num_letters)
return new,Output
x,y=data_preprocessing(data,time_step,num_letters,char_to_int)
from sklearn.model_selection import train_test_split
x_train ,y_train,y_test=train_test_split(x,y,test_size=0.1,random_state=10)
y_train_category=to_categorical(y_tarin,num_letters)
//引入模型
from kears.models import Sequential()
from kears.layers import Dense,LSTM
model=Sequential()
model.add(LSTM(units=20,input_shape=(x_train.shape[1],x_train.shape[2]),activation='relu'))
model.add(Dense(untis=num_letters,actication='softmax'))
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
model.summary()
model.fit(x_train,y_train_category,batch_size=1000,epochs=5)
//预测
y_train_predict=model.pridict_classes(x_train)
y_train_predict_char=[int_to_char[i] for i in y_train_predict]
y_test_predict=model.pridict_classes(x_test)
y_test_predict_char=[int_to_char[i] for i in y_test_predict]
//比分
from sklearn.metrics import accuracy_score
accuracy_train=accuracy_score(y_train,y_train_predict)
accuracy_test=accuracy_score(y_test,y_test_predict)
new_letters='ggdagdgasdasgd'
x_new,y_new=data_preprocessing(new_leetters,time_step,num_letters,char_to_int)
y_new_predict=model.pridict_classes(x_new)
y_new_predict_char=[int_to_char[i] for i in y_new_predict]
for i in range(0,x_new.shape[0]-20):
print(new_letters[i:i+20],'fghjkddtyuyth',y_new_predict_char[i])
LSTM文本输入生成
最新推荐文章于 2024-07-13 21:18:24 发布