古诗生成—用LSTM
太懒了,数据集明天再传
整体流程
完整代码:
import numpy as np
#from collections import Counter
from tensorflow import keras
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.python.ops import summary_ops_v2
import time
import os
#处理数据集,目的:产生vocab_to_int,int_to_vocab 和 所有古诗转化的数字序列
def create_lookup_tables():
with open(r'newtxt.txt','r') as f:
text = f.read()
print(len(text))
vocab = sorted(set(text))
print(len(vocab))
vocab_to_int = {
u:i for i,u in enumerate(vocab)}
#print(vocab_to_int)
int_to_vocab = {
i:u for i,u in enumerate(vocab)}
int_text = np.array([vocab_to_int[word] for word in text if word != '\n'])
print(len(int_text))
return vocab_to_int,int_to_vocab,int_text
vocab_to_int,int_to_vocab,int_text = create_lookup_tables()
#这个是获取下一batch内容的函数,要好好看看
def get_batches(int_text, batch_size, seq_length):
batchCnt = len(int_text) // (batch_size * seq_length)
#y取x的下一个
int_text_inputs = int_text[:batchCnt * (batch_size * seq_length)]
int_text_targets = int_text[1:batchCnt * (batch_size * seq_length)+1]
result_list = []
x = np.array(int_text_inputs).reshape(1, batch_size, -1)
y = np.array(int_text_targets).reshape(1, batch_size, -1)
'''
split(ary, indices_or_sections, axis=0)
把一个数组从左到右按顺序切分
参数:
ary: 要切分的数组
indices_or_sections: 如果是一个整数,就用该数平均切分,如果是一个数组,
为沿轴切分的位置(左开右闭)
axis: 沿着哪个维度进行切向,默认为0,横向切分。为1时,纵向切分
参考网站:https://blog.csdn.net/lthirdonel/article/details/88690923
'''
x_new = np.dsplit(x, batchCnt)
y_new