python版本的flapy bird_Python平台上用tensorflow训练flappy bird出现问题,求大神点播...

该楼层疑似违规已被系统折叠 隐藏此楼查看此楼

训练时长已经7个小时了,开始时还能明显观察到小鸟运动的变化是越来越好的,后来好像就停留在某个阶段了,过了几个小时还是那样,总是撞到上面的柱子,代码在下面,求大神点播,小弟感激不尽。

import tensorflow as tf

import cv2

import sys

sys.path.append("d:/study/Keras_FlappyBird-master/game")

import wrapped_flappy_bird as game

import numpy as np

t = 0

i = 1

def weight_variable(shape):

initial = tf.truncated_normal(shape, stddev=0.01)

return tf.Variable(initial)

def bias_variable(shape):

initial = tf.constant(0.01, shape=shape)

return tf.Variable(initial)

def conv2d(x, W, stride):

return tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding="SAME")

def max_pool_2x2(x):

return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")

W_conv1 = weight_variable([8, 8, 4, 32])

b_conv1 = bias_variable([32])

W_conv2 = weight_variable([4, 4, 32, 64])

b_conv2 = bias_variable([64])

W_conv3 = weight_variable([3, 3, 64, 64])

b_conv3 = bias_variable([64])

W_fc1 = weight_variable([1600, 512])

b_fc1 = bias_variable([512])

W_fc2 = weight_variable([512, 2])

b_fc2 = bias_variable([2])

s = tf.placeholder("float", [1, 80, 80, 4])

h_conv1 = tf.nn.relu(conv2d(s, W_conv1, 4) + b_conv1)#出32,80*80

h_pool1 = max_pool_2x2(h_conv1)#出32,40*40

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2, 2) + b_conv2)#出64,40*40

h_conv3 = tf.nn.relu(conv2d(h_conv2, W_conv3, 1) + b_conv3)#出64,40*40

h_conv3_flat = tf.reshape(h_conv3, [-1, 1600])

h_fc1 = tf.nn.relu(tf.matmul(h_conv3_flat, W_fc1) + b_fc1)#出512维,64个

readout = tf.matmul(h_fc1,W_fc2) + b_fc2#出2维,64个

a = tf.placeholder("float", [1, 2])#占位输入2维,1行,2列

y = tf.placeholder("float", [1])#输入一维,赋值r_t

readout_action = tf.reduce_sum(tf.multiply(readout, a))

#sum,得到数字一个

cost = tf.reduce_mean(tf.square(y - readout_action))

train_step = tf.train.AdamOptimizer(1e-6).minimize(cost)

game_state = game.GameState()

x_t, r_0, terminal = game_state.frame_step([1,0])

x_t = cv2.cvtColor(cv2.resize(x_t, (80, 80)), cv2.COLOR_BGR2GRAY)

ret, x_t = cv2.threshold(x_t, 1, 255, cv2.THRESH_BINARY)

s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)

sess = tf.InteractiveSession()

sess.run(tf.global_variables_initializer())

while "flappy bird" != "angry bird":

readout_t = sess.run(readout,feed_dict={s:[s_t]})[0]#1行,2列

a_t = np.zeros([2])

action_index = np.argmax(readout_t)

a_t[action_index] = 1

x_t1_colored, r_t, terminal = game_state.frame_step(a_t)#输入a_t

#return image_data, reward, terminal

x_t1 = cv2.cvtColor(cv2.resize(x_t1_colored, (80, 80)), cv2.COLOR_BGR2GRAY)

ret, x_t1 = cv2.threshold(x_t1, 1, 255, cv2.THRESH_BINARY)

x_t1 = np.reshape(x_t1, (80, 80, 1))

s_t1 = np.append(x_t1, s_t[:, :, :3], axis=2)

train_step.run(feed_dict={y:[r_t],a:[a_t],s:[s_t]})

s_t = s_t1

t += 1

if t > 8000*i:

saver = tf.train.Saver()

saver.save(sess,'d:/study/dataset/flappy.ckpt', global_step=t)

i += 1

print('saved')

if t == 1:

saver = tf.train.Saver()

ckpt = tf.train.get_checkpoint_state('d:/study/dataset')

saver.restore(sess, ckpt.model_checkpoint_path)

print('loaded')

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值