第七章的乒乓球~~
import time
import gym
import numpy as np
import tensorflow as tf
import tensorlayer as tl
from tensorlayer.layers import *
def prepro(I):
I = I[35:195]
I = I[::2, ::2, 0]
I[I == 144] = 0
I[I == 109] = 0
I[I != 0] = 1
return I.astype(np.float).ravel()
image_size = 80
D = image_size * image_size
t_states = tf.placeholder(tf.float32, shape=[None, D])
network = InputLayer(t_states, name='input')
network = DenseLayer(network, n_units=200, act=tf.nn.relu, name='hidden')
network = DenseLayer(network, n_units=3, name='output')
probs = network.outputs
sampling_prob = tf.nn.softmax(probs)
batch_size = 10
learning_rate = 1e-4
gamma = 0.99
decay_rate = 0.99
render = False
# resume = True
model_file_name = "model_pong72"
t_actions = tf.placeholder(tf.int32, shape=[None])
t_discount_rewards = tf.placeholder(tf.float32, shape=[None])
loss = tl.rein.cross_entropy_reward_loss(probs, t_actions, t_discount_rewards)
train_op = tf.train.RMSPropOptimizer(learning_rate, decay_rate).minimize(loss)
# np.set_printoptions(threshold=np.nan)
env = gym.make("Pong-v0")