1、dropout解决过拟合
dropout就是踢掉里面的一些神经连接
pool可以视为保留一些参数
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from __future__ import print_function
import tensorflow as tf
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
#保留概率
with tf.name_scope("keep_prob"):
keep_prob = tf.placeholder(tf.float32)
digits = load_digits()
x = digits.data
print(x.shape)
y = digits.target
print(y.shape)
y = LabelBinarizer().fit_transform(y)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=3)
def add_layer(inputs, in_size, out_size, n_layer, activation=None):
layer_name = "layer%s"%n_layer
with tf.name_scope("weights"):
w = tf.Variable(tf.random_normal([in_size, out_size]))
tf.summary.histogram("layer"+layer_name+"weight", w)
with tf.name_scope("bias"):
b = tf.Variable(tf.zeros([1, out_size])+0.1)
tf.summary.histogram("layer"+layer_name+"bias", b)
w_b = tf.matmul(inputs, w) +b
w_b = tf.nn.dropout(w_b, keep_prob)
if activation is None:
out_put = w_b
else:
out_put = activation(w_b)
tf.summary.histogram("out_put", out_put)
return out_put
xs = tf.placeholder(tf.float32, [None, 64])
ys = tf.placeholder(tf.float32, [None, 10])
keep_prob = tf.placeholder(tf.float32)
l1 = add_layer(xs, 64, 20, 0, activation=tf.nn.tanh)
predict = add_layer(l1, 20, 10, 1,activation=tf.nn.softmax)
cross_entrory = tf.reduce_mean(-tf.reduce_sum(ys*tf.log(predict), reduction_indices=[1]))
optimizer = tf.train.GradientDescentOptimizer(0.1)
train = optimizer.minimize(cross_entrory)
sess = tf.Session()
merge = tf.summary.merge_all()
init = tf.global_variables_initializer()
sess.run(init)
for step in range(100):
sess.run(train, feed_dict={xs:x_train, ys:y_train, keep_prob:1})
print(sess.run(cross_entrory, feed_dict={xs: x_train, ys: y_train, keep_prob: 0.5}))
2、DQN
这里不讲环境,只将DQN的核心算法,环境可以搭配走迷宫的游戏
class DQN():
def __init__(self):
#initial
self.lr = 0.1
self.greedy = 0.9
self.actions = 4
self.features = 2
self.memory_size = 2000
self.batch_size = 32
self.gamma = 0.9
self.replace_inter = 100
self.memory = np.zeros((self.memory_size, self.features*2+2))
print(self.memory.shape)
self.build_net()
self.sess = tf.Session()
inital = tf.global_variables_initializer()
self.sess.run(inital)
#在定义变量的时候使用collections可以
t_params = tf.get_collection("target_net_paras")
e_params = tf.get_collection("eval_net_paras")
self.replace_target_op = [tf.assign(t,e) for t, e in zip(t_params, e_params)]
def add_layer(self, xs, paras_name):
c_names = [paras_name, tf.GraphKeys.GLOBAL_VARIABLES]
w1 = tf.Variable(tf.random_uniform([self.features, 10]), collections=c_names)
b1 = tf.Variable(tf.zeros([10])+0.1, collections=c_names)
y1 = tf.nn.relu(tf.matmul(xs, w1)+b1)
w2 = tf.Variable(tf.random_uniform([10, self.actions]), collections=c_names)
b2 = tf.Variable(tf.zeros([self.actions])+0.1, collections=c_names)
y2 = tf.matmul(y1, w2)+b2
return y2
def build_net(self):
#动作值函数对应的
self.s = tf.placeholder(tf.float32, [None, self.features])
self.q_eval = self.add_layer(self.s, "eval_net_paras")
self.s_ = tf.placeholder(tf.float32, [None, self.features])
self.q_next = self.add_layer(self.s_, "target_net_paras")
self.q_target = tf.placeholder(tf.float32, [None, self.actions])
self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval))
optimizer = tf.train.GradientDescentOptimizer(self.lr)
self.train = optimizer.minimize(self.loss)
def choose_action(self, observation):
observation = observation[np.newaxis, :]#因为features是2维的,所以这里需要添加1维
#print(observation.shape)
action_value = self.sess.run(self.q_eval, feed_dict={self.s:observation})
#rint(type(action_value))
if np.random.uniform()<self.greedy:
action = np.argmax(action_value)
else:
action = np.random.randint(0, self.actions)
return action
def store_transition(self, s, reward, action, s_):
if not hasattr(self, "memory_counter"):
self.memory_counter = 0
transition = np.hstack((s, [action, reward], s_))
#print(transition)
index = self.memory_counter%self.memory_size
self.memory[index, :] = transition
self.memory_counter +=1
def learn(self):
if not hasattr(self, "learn_step"):
self.learn_step = 0
if self.learn_step%self.replace_inter==0:
self.sess.run(self.replace_target_op)
if self.memory_counter > self.memory_size:
sample_index = np.random.choice(self.memory_size, size=self.batch_size)
else:
sample_index = np.random.choice(self.memory_counter, size = self.batch_size)
#print("sample_index is {0}".format(sample_index))
batch_memory = self.memory[sample_index, :]
#print(batch_memory)
q_next, q_eval = self.sess.run(
[self.q_next,self.q_eval],
feed_dict={
self.s_:batch_memory[:,-self.features:],#计算target目标网络
self.s:batch_memory[:, :self.features]})
#print(q_next)
q_target = q_eval.copy()
batch_index = np.arange(self.batch_size)
#print(batch_index)
eval_act_index = batch_memory[:, self.features].astype(int)
#print(eval_act_index)
reward = batch_memory[:, self.features+1]
#print(reward)
q_target[batch_index, eval_act_index] = reward + self.gamma*np.max(q_next, axis=1)
_, self.cost = self.sess.run([self.train, self.loss],feed_dict = {self.s:batch_memory[:,
:self.features], self.q_target: q_target})
存储和恢复参数:
存储参数在整个程序运行之后:
saver = tf.train.Saver()
save_path = saver.save(Rl.sess, "my_net/save_net.ckpt")
恢复参数需要在build_net的时候进行
saver = tf.train.Saver()
saver.restore(self.sess, "my_net/save_net.ckpt")
此时不需要再进行learn的过程。