python神经网络训练损失率nan_python-神经网络无法学习(损失保持不变)

我和我的项目合作伙伴目前在我们最新的大学项目中遇到问题.

我们的任务是实现一个玩Pong游戏的神经网络.我们将球的速度和球拍的位置传递给网络,并提供三个输出:UP DOWN DO_NOTHING.玩家获得11分后,我们将训练所有状态的网络,做出的决策以及做出的决策的奖励(请参阅reward_cal()).我们面临的问题是,仅根据学习率,损失就一直保持在特定值上.因此,即使我们将其视为严重错误,网络也总是做出相同的决定.

请帮助我们找出我们做错了什么,我们感谢每一个建议!下面是我们的代码,请随时询问是否有任何问题.我们对这个话题还很新,所以如果有什么完全愚蠢的话,请不要粗鲁:D

这是我们的代码:

import sys,pygame,time

import numpy as np

import random

from os.path import isfile

import keras

from keras.optimizers import SGD

from keras.layers import Dense

from keras.layers.core import Flatten

pygame.init()

pygame.mixer.init()

#surface of the game

width = 400

height = 600

black = 0,0 #RGB value

screen = pygame.display.set_mode((width,height),32)

#(Resolution(x,y),flags,colour depth)

font = pygame.font.SysFont('arial',36,bold=True)

pygame.display.set_caption('PyPong') #title of window

#consts for the game

acceleration = 0.0025 # ball becomes faster during the game

mousematch = 1

delay_time = 0

paddleP = pygame.image.load("schlaeger.gif")

playerRect = paddleP.get_rect(center = (200,550))

paddleC = pygame.image.load("schlaeger.gif")

comRect = paddleC.get_rect(center=(200,50))

ball = pygame.image.load("ball.gif")

ballRect = ball.get_rect(center=(200,300))

#Variables for the game

pointsPlayer = [0]

pointsCom = [0]

playermove = [0,0]

speedbar = [0,0]

speed = [6,6]

hitX = 0

#neural const

learning_rate = 0.01

number_of_actions = 3

filehandler = open('logfile.log','a')

filename = sys.argv[1]

#neural variables

states,action_prob_grads,rewards,action_probs = [],[],[]

reward_sum = 0

episode_number = 0

reward_sums = []

pygame.display.flip()

def pointcontrol(): #having a look at the points in the game and restart()

if pointsPlayer[0] >= 11:

print('Player Won ',pointsPlayer[0],'/',pointsCom[0])

restart(1)

return 1

if pointsCom[0] >= 11:

print('Computer Won ',pointsCom[0])

restart(1)

return 1

elif pointsCom[0] < 11 and pointsPlayer[0] < 11:

restart(0)

return 0

def restart(finished): #resetting the positions and the ball speed and

(if point limit was reached) the points

ballRect.center = 200,300

comRect.center = 200,50

playerRect.center = 200,550

speed[0] = 6

speed[1] = 6

screen.blit(paddleC,comRect)

screen.blit(paddleP,playerRect)

pygame.display.flip()

if finished:

pointsPlayer[0] = 0

pointsCom[0] = 0

def reward_cal(r,gamma = 0.99): #rewarding every move

discounted_r = np.zeros_like(r) #making zero array with size of

reward array

running_add = 0

for t in range(r.size - 1,-1): #iterating beginning in the end

if r[t] != 0: #if reward -1 or 1 (point made or lost)

running_add = 0

running_add = running_add * gamma + r[t] #making every move

before the point the same reward but a little bit smaller

discounted_r[t] = running_add #putting the value in the new

reward array

#e.g r = 000001000-1 -> discounted_r = 0.5 0.6 0.7 0.8 0.9 1 -0.7

-0.8 -0.9 -1 values are not really correct just to make it clear

return discounted_r

#neural net

model = keras.models.Sequential()

model.add(Dense(16,input_dim = (8),kernel_initializer =

'glorot_normal',activation = 'relu'))

model.add(Dense(32,kernel_initializer = 'glorot_normal',activation =

'relu'))

model.add(Dense(number_of_actions,activation='softmax'))

model.compile(loss = 'categorical_crossentropy',optimizer = 'adam')

model.summary()

if isfile(filename):

model.load_weights(filename)

# one ball movement before the AI gets to make a decision

ballRect = ballRect.move(speed)

reward_temp = 0.0

if ballRect.left < 0 or ballRect.right > width:

speed[0] = -speed[0]

if ballRect.top < 0:

pointsPlayer[0] += 1

reward_temp = 1.0

done = pointcontrol()

if ballRect.bottom > height:

pointsCom[0] += 1

done = pointcontrol()

reward_temp = -1.0

if ballRect.colliderect(playerRect):

speed[1] = -speed[1]

if ballRect.colliderect(comRect):

speed[1] = -speed[1]

if speed[0] < 0:

speed[0] -= acceleration

if speed[0] > 0:

speed[0] += acceleration

if speed[1] < 0:

speed[1] -= acceleration

if speed[1] > 0 :

speed[1] += acceleration

while True: #game

for event in pygame.event.get():

if event.type == pygame.QUIT:

pygame.quit()

sys.exit()

state = np.array([ballRect.center[0],ballRect.center[1],speed[0],speed[1],playerRect.center[0],playerRect.center[1],comRect.center[0],comRect.center[1]])

states.append(state)

action_prob = model.predict_on_batch(state.reshape(1,8))[0,:]

action_probs.append(action_prob)

action = np.random.choice(number_of_actions,p=action_prob)

if(action == 0): playermove = [0,0]

elif(action == 1): playermove = [5,0]

elif(action == 2): playermove = [-5,0]

playerRect = playerRect.move(playermove)

y = np.array([-1,-1,-1])

y[action] = 1

action_prob_grads.append(y-action_prob)

#enemy move

comRect = comRect.move(speedbar)

ballY = ballRect.left+5

comRectY = comRect.left+30

if comRect.top <= (height/1.5):

if comRectY - ballY > 0:

speedbar[0] = -7

elif comRectY - ballY < 0:

speedbar[0] = 7

if comRect.top > (height/1.5):

speedbar[0] = 0

if(mousematch == 1):

done = 0

reward_temp = 0.0

ballRect = ballRect.move(speed)

if ballRect.left < 0 or ballRect.right > width:

speed[0] = -speed[0]

if ballRect.top < 0:

pointsPlayer[0] += 1

done = pointcontrol()

reward_temp = 1.0

if ballRect.bottom > height:

pointsCom[0] += 1

done = pointcontrol()

reward_temp = -1.0

if ballRect.colliderect(playerRect):

speed[1] = -speed[1]

if ballRect.colliderect(comRect):

speed[1] = -speed[1]

if speed[0] < 0:

speed[0] -= acceleration

if speed[0] > 0:

speed[0] += acceleration

if speed[1] < 0:

speed[1] -= acceleration

if speed[1] > 0 :

speed[1] += acceleration

rewards.append(reward_temp)

if (done):

episode_number += 1

reward_sums.append(np.sum(rewards))

if len(reward_sums) > 40:

reward_sums.pop(0)

s = 'Episode %d Total Episode Reward: %f,Mean %f' % (

episode_number,np.sum(rewards),np.mean(reward_sums))

print(s)

filehandler.write(s + '\n')

filehandler.flush()

# Propagate the rewards back to actions where no reward

was given.

# Rewards for earlier actions are attenuated

rewards = np.vstack(rewards)

action_prob_grads = np.vstack(action_prob_grads)

rewards = reward_cal(rewards)

X = np.vstack(states).reshape(-1,8)

Y = action_probs + learning_rate * rewards * y

print('loss: ',model.train_on_batch(X,Y))

model.save_weights(filename)

states,[]

reward_sum = 0

screen.fill(black)

screen.blit(paddleP,playerRect)

screen.blit(ball,ballRect)

screen.blit(paddleC,comRect)

pygame.display.flip()

pygame.time.delay(delay_time)

这是我们的输出:

pygame 1.9.4 Hello from the pygame community. https://www.pygame.org/contribute.html Using TensorFlow backend.

_________________________________________________________________

Layer (type) Output Shape Param #

=================================================================

dense_1 (Dense) (None,16) 144

_________________________________________________________________

dense_2 (Dense) (None,32) 544

_________________________________________________________________

dense_3 (Dense) (None,3) 99

=================================================================

Total params: 787 Trainable params: 787 Non-trainable params: 0

_________________________________________________________________ 2019-02-14 11:18:10.543401: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 AVX512F FMA 2019-02-14 11:18:10.666634: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1432] Found device 0 with properties: name: GeForce GTX 1080 Ti major: 6 minor: 1 memoryClockRate(GHz): 1.6705 pciBusID: 0000:17:00.0 totalMemory:

10.92GiB freeMemory: 10.76GiB 2019-02-14 11:18:10.775144: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1432] Found device 1 with properties: name: GeForce GTX 1080 Ti major: 6 minor: 1 memoryClockRate(GHz): 1.6705 pciBusID: 0000:65:00.0 totalMemory:

10.91GiB freeMemory: 10.73GiB 2019-02-14 11:18:10.776037: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1511] Adding visible gpu devices: 0,1 2019-02-14 11:18:11.176560: I tensorflow/core/common_runtime/gpu/gpu_device.cc:982] Device interconnect StreamExecutor with strength 1 edge matrix: 2019-02-14 11:18:11.176590: I tensorflow/core/common_runtime/gpu/gpu_device.cc:988] 0 1 2019-02-14 11:18:11.176596: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1001] 0: N Y 2019-02-14 11:18:11.176600: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1001] 1: Y N 2019-02-14 11:18:11.176914: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10403 MB memory) -> physical GPU (device: 0,name: GeForce GTX 1080 Ti,pci bus id: 0000:17:00.0,compute capability: 6.1) 2019-02-14 11:18:11.177216: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 10382 MB memory) -> physical GPU (device: 1,pci bus id: 0000:65:00.0,compute capability: 6.1)

Computer Won 0 / 11 Episode 1 Total Episode Reward: -11.000000,Mean -11.000000

loss: 0.254405

Computer Won 0 / 11 Episode 2 Total Episode Reward: -11.000000,Mean -11.000000

loss: 0.254304

Computer Won 0 / 11 Episode 3 Total Episode Reward: -11.000000,Mean -11.000000

loss: 0.254304

Computer Won 0 / 11 Episode 4 Total Episode Reward: -11.000000,Mean -11.000000

loss: 0.254304

Computer Won 0 / 11 Episode 5 Total Episode Reward: -11.000000,Mean -11.000000

loss: 0.254304

Computer Won 0 / 11 Episode 6 Total Episode Reward: -11.000000,Mean -11.000000

loss: 0.254304

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值