LSTM文本输入生成

data.open('flare').read()
//移除换行
data=data.replace("\n",'').replace('\r','')
//字符去除
letters=list(set(data))
//长度
num_letters=len(letters)
#建立字典
int_to_char={a:b for a,b in enumerate(letters)}
char_to_int={b:a for b,a in enumerate(letters)}
time_step=20

import numpy as np
from keras.utils import to_categorical
//滑动窗口提取数据
def extract_data(data,slide):
    x=[]
    y=[]
    for i in range(len(data)-slide):
        x.append([a for a in data[i:i+slide]])
        y.append(data[i+slide])
    return x,y
def char_to_int_data(x,y,char_to_int):
    x_to_int=[]
    y_to_int=[]
    for i in range(len(x)):
        x_to_int.append([char_to_int[char] for char in x[i]])
        y_to_int.append([char_to_int[char] for char in y[i]])
    return x_to_int,y_to_int
def data_preprocessing(data,slide,num_letters,char_to_int):
    char_data=extract_data(data,slide)
    in_data=char_to_int_data(char_data[0],char_data[1],cahr_to_int)
    Input=int_data[0]
    Output=list(np,array(int_data[1])).flatten()
    Input_reshapeo=np.array(Input).reshape(len(Input),slide)
    new=np.random.randint(0,10,szie=[Input_reshapeo[0],Input_reshapeo[1],num_letters])
    for i in range(Input_reshapeo,shape[0]):
        for j in range(Input_reshapeo[1]):
            new [i,j,:]=to_categorical(Input_reshapeo[i,j],num_classes=num_letters)
    return new,Output

x,y=data_preprocessing(data,time_step,num_letters,char_to_int)
from sklearn.model_selection import train_test_split
x_train ,y_train,y_test=train_test_split(x,y,test_size=0.1,random_state=10)
y_train_category=to_categorical(y_tarin,num_letters)

//引入模型
from kears.models import Sequential()

from kears.layers import Dense,LSTM
model=Sequential()
model.add(LSTM(units=20,input_shape=(x_train.shape[1],x_train.shape[2]),activation='relu'))
model.add(Dense(untis=num_letters,actication='softmax'))
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
model.summary()
model.fit(x_train,y_train_category,batch_size=1000,epochs=5)

//预测
y_train_predict=model.pridict_classes(x_train)
y_train_predict_char=[int_to_char[i] for i in y_train_predict]

y_test_predict=model.pridict_classes(x_test)
y_test_predict_char=[int_to_char[i] for i in y_test_predict]

//比分
from sklearn.metrics import accuracy_score
accuracy_train=accuracy_score(y_train,y_train_predict)
accuracy_test=accuracy_score(y_test,y_test_predict)

new_letters='ggdagdgasdasgd'
x_new,y_new=data_preprocessing(new_leetters,time_step,num_letters,char_to_int)
y_new_predict=model.pridict_classes(x_new)

y_new_predict_char=[int_to_char[i] for i in y_new_predict]
for i in range(0,x_new.shape[0]-20):
    print(new_letters[i:i+20],'fghjkddtyuyth',y_new_predict_char[i])
  • 11
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值