DQN深度强化学习tensorflow对屏幕数据进行处理

import tensorflow as tf
import tflearn 
import numpy as np
import cv2
from PIL import ImageGrab

ACTIONS = 2 # number of valid actions

s = tf.placeholder("float", [None, 80, 80, 4])

# hidden layers   
h_conv1 = tflearn.conv_2d(s, 32, 8, strides=4, activation='relu',weights_init=tflearn.initializations.truncated_normal(stddev=0.01))  
h_pool1 = tflearn.max_pool_2d(h_conv1,2,2)

h_conv2 = tflearn.conv_2d(h_pool1,64,4,strides=2,activation='relu',weights_init=tflearn.initializations.truncated_normal(stddev=0.01))

h_conv3 = tflearn.conv_2d(h_conv2,64,3,strides=1,activation='relu',weights_init=tflearn.initializations.truncated_normal(stddev=0.01))

h_conv3_flat = tflearn.reshape(h_conv3,[-1,1600])
h_fc1 = tflearn.fully_connected(incoming=h_conv3_flat,n_units=512,activation='relu',weights_init=tflearn.initializations.truncated_normal(stddev=0.01))

readout = tflearn.fully_connected(incoming=h_fc1,n_units=ACTIONS)

network_params = tf.trainable_variables()
q_values=readout
a = tf.placeholder("float", [None, ACTIONS])#action
y = tf.placeholder("float", [None])#Q现实
readout_action = tf.reduce_sum(tf.multiply(q_values, a), reduction_indices=1)
cost = tf.reduce_mean(tf.square(y - readout_action))
train_step = tf.train.AdamOptimizer(1e-6).minimize(cost)
target_network_params = tf.trainable_variables()[len(network_params):]

saver = tf.train.Saver()
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

reset_target_network_params = \
    [target_network_params[i].assign(network_params[i])
     for i in range(len(target_network_params))]  
sess.run(reset_target_network_params)

im=ImageGrab.grab()
img = np.array(im)
x_t = cv2.cvtColor(cv2.resize(img, (80, 80)), cv2.COLOR_BGR2GRAY)
ret, x_t = cv2.threshold(x_t,1,255,cv2.THRESH_BINARY)
s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)
readout_t = q_values.eval(feed_dict={s : [s_t]})[0]

print(readout_t)
tf.summary.FileWriter("./logs", sess.graph)




  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值