最新第六章(1,GitHub标星8k

最后

由于篇幅原因,就不多做展示了

本文已被CODING开源项目:【一线大厂Java面试题解析+核心总结学习笔记+最新讲解视频+实战项目源码】收录

需要这份系统化的资料的朋友,可以点击这里获取

import tensorflow as tf

import tensorflow.contrib.rnn as rnn

import tensorflow.contrib.legacy_seq2seq as seq2seq

BEGIN_CHAR = ‘^’

END_CHAR = ‘$’

UNKNOWN_CHAR = ‘*’

MAX_LENGTH = 100

MIN_LENGTH = 10

max_words = 3000

epochs = 50

语料

poetry_file = ‘story.txt’

模型文件存放位置

save_dir = ‘model’

class Data:

def init(self):

self.batch_size = 64

self.poetry_file = poetry_file

self.load()

self.create_batches()

def load(self):

def handle(line):

if len(line) > MAX_LENGTH:

index_end = line.rfind(‘。’, 0, MAX_LENGTH)

index_end = index_end if index_end > 0 else MAX_LENGTH

line = line[:index_end + 1]

return BEGIN_CHAR + line + END_CHAR

self.poetrys = [line.strip().replace(’ ‘, ‘’).split(’😂[1] for line in

open(self.poetry_file, encoding=‘utf-8’)]

self.poetrys = [handle(line) for line in self.poetrys if len(line) > MIN_LENGTH]

所有字

words = []

for poetry in self.poetrys:

words += [word for word in poetry]

counter = collections.Counter(words)

count_pairs = sorted(counter.items(), key=lambda x: -x[1])

words, _ = zip(*count_pairs)

取出现频率最高的词的数量组成字典,不在字典中的字用’*'代替

words_size = min(max_words, len(words))

self.words = words[:words_size] + (UNKNOWN_CHAR,)

self.words_size = len(self.words)

字映射成id

self.char2id_dict = {w: i for i, w in enumerate(self.words)}

self.id2char_dict = {i: w for i, w in enumerate(self.words)}

self.unknow_char = self.char2id_dict.get(UNKNOWN_CHAR)

self.char2id = lambda char: self.char2id_dict.get(char, self.unknow_char)

self.id2char = lambda num: self.id2char_dict.get(num)

self.poetrys = sorted(self.poetrys, key=lambda line: len(line))

self.poetrys_vector = [list(map(self.char2id, poetry)) for poetry in self.poetrys]

def create_batches(self):

self.n_size = len(self.poetrys_vector) // self.batch_size

self.poetrys_vector = self.poetrys_vector[:self.n_size * self.batch_size]

self.x_batches = []

self.y_batches = []

for i in range(self.n_size):

batches = self.poetrys_vector[i * self.batch_size: (i + 1) * self.batch_size]

length = max(map(len, batches))

for row in range(self.batch_size):

if len(batches[row]) < length:

r = length - len(batches[row])

batches[row][len(batches[row]): length] = [self.unknow_char] * r

xdata = np.array(batches)

ydata = np.copy(xdata)

ydata[:, :-1] = xdata[:, 1:]

self.x_batches.append(xdata)

self.y_batches.append(ydata)

class Model:

def init(self, data, model=‘lstm’, infer=False):

self.rnn_size = 128

self.n_layers = 2

if infer:

self.batch_size = 1

else:

self.batch_size = data.batch_size

if model == ‘rnn’:

cell_rnn = rnn.BasicRNNCell

elif model == ‘gru’:

cell_rnn = rnn.GRUCell

elif model == ‘lstm’:

cell_rnn = rnn.BasicLSTMCell

cell = cell_rnn(self.rnn_size, state_is_tuple=False)

self.cell = rnn.MultiRNNCell([cell] * self.n_layers, state_is_tuple=False)

self.x_tf = tf.placeholder(tf.int32, [self.batch_size, None])

self.y_tf = tf.placeholder(tf.int32, [self.batch_size, None])

self.initial_state = self.cell.zero_state(self.batch_size, tf.float32)

with tf.variable_scope(‘rnnlm’):

softmax_w = tf.get_variable(“softmax_w”, [self.rnn_size, data.words_size])

softmax_b = tf.get_variable(“softmax_b”, [data.words_size])

with tf.device(“/cpu:0”):

embedding = tf.get_variable(

“embedding”, [data.words_size, self.rnn_size])

inputs = tf.nn.embedding_lookup(embedding, self.x_tf)

outputs, final_state = tf.nn.dynamic_rnn(

self.cell, inputs, initial_state=self.initial_state, scope=‘rnnlm’)

self.output = tf.reshape(outputs, [-1, self.rnn_size])

self.logits = tf.matmul(self.output, softmax_w) + softmax_b

self.probs = tf.nn.softmax(self.logits)

self.final_state = final_state

pred = tf.reshape(self.y_tf, [-1])

seq2seq

loss = seq2seq.sequence_loss_by_example([self.logits],

[pred],

[tf.ones_like(pred, dtype=tf.float32)], )

self.cost = tf.reduce_mean(loss)

self.learning_rate = tf.Variable(0.0, trainable=False)

tvars = tf.trainable_variables()

grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), 5)

optimizer = tf.train.AdamOptimizer(self.learning_rate)

self.train_op = optimizer.apply_gradients(zip(grads, tvars))

def train(data, model):

with tf.Session() as sess:

sess.run(tf.global_variables_initializer())

saver = tf.train.Saver(tf.global_variables())

n = 0

for epoch in range(epochs):

sess.run(tf.assign(model.learning_rate, 0.002 * (0.97 ** epoch)))

pointer = 0

for batche in range(data.n_size):

n += 1

feed_dict = {model.x_tf: data.x_batches[pointer], model.y_tf: data.y_batches[pointer]}

pointer += 1

train_loss, _, _ = sess.run([model.cost, model.final_state, model.train_op], feed_dict=feed_dict)

sys.stdout.write(‘\r’)

info = “{}/{} (epoch {}) | train_loss {:.3f}” \

.format(epoch * data.n_size + batche,

epochs * data.n_size, epoch, train_loss)

sys.stdout.write(info)

sys.stdout.flush()

save

if (epoch * data.n_size + batche) % 1000 == 0 \

or (epoch == epochs - 1 and batche == data.n_size - 1):

checkpoint_path = os.path.join(save_dir, ‘model.ckpt’)

saver.save(sess, checkpoint_path, global_step=n)

sys.stdout.write(‘\n’)

print(“model saved to {}”.format(checkpoint_path))

sys.stdout.write(‘\n’)

def sample(data, model, head=u’'):

def to_word(weights):

惊喜

最后还准备了一套上面资料对应的面试题(有答案哦)和面试时的高频面试算法题(如果面试准备时间不够,那么集中把这些算法题做完即可,命中率高达85%+)

image.png

image.png

本文已被CODING开源项目:【一线大厂Java面试题解析+核心总结学习笔记+最新讲解视频+实战项目源码】收录

需要这份系统化的资料的朋友,可以点击这里获取

试时的高频面试算法题(如果面试准备时间不够,那么集中把这些算法题做完即可,命中率高达85%+)

[外链图片转存中…(img-h7K7Bc42-1715679149607)]

[外链图片转存中…(img-80zPLzJ3-1715679149607)]

本文已被CODING开源项目:【一线大厂Java面试题解析+核心总结学习笔记+最新讲解视频+实战项目源码】收录

需要这份系统化的资料的朋友,可以点击这里获取

  • 16
    点赞
  • 27
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值