# 5分钟构建一个自己的无人驾驶车

#### 接下来让我们训练一下自己的无人驾驶车

from flat_game import carmunk
import numpy as np
import random
import csv
from nn import neural_net, LossHistory
import os.path
import timeit

NUM_INPUT = 3
GAMMA = 0.9  # Forgetting.
TUNING = False  # If False, just use arbitrary, pre-selected params.

# 训练一个神经网络，有一些hyper parameters
def train_net(model, params):

filename = params_to_filename(params)

# 定义变量：观察数据
observe = 1000  # Number of frames to observe before training.
epsilon = 1
train_frames = 1000000  # Number of frames to play.
batchSize = params['batchSize']
buffer = params['buffer']

# 定义变量：位置
# Just stuff used below.
max_car_distance = 0
car_distance = 0
t = 0
data_collect = []
replay = []  # stores tuples of (S, A, R, S').

loss_log = []

# 创建一个新的游戏实例
# Create a new game instance.
game_state = carmunk.GameState()

# 得到这个实例的第一个状态
# Get initial state by doing nothing and getting the state.
_, state = game_state.frame_step((2))

# 用一个timer来追踪
# Let's time it.
start_time = timeit.default_timer()

# 当我们开始建立experience replay时
# Run the frames.
while t < train_frames:

t += 1
car_distance += 1

# 我们会更新位置变量，然后依据状态随机选择行为
# Choose an action.
if random.random() < epsilon or t < observe:
action = np.random.randint(0, 3)  # random
else:
# 如果这个随机变量在我们的限制条件之外，我们会得到每个行为的Q值，
# 来帮我们找到最优的决策
# Get Q values for each action.
qval = model.predict(state, batch_size=1)
action = (np.argmax(qval))  # best

# 如果它是有效的，我们会得到一个reward
# Take action, observe new state and get our treat.
reward, new_state = game_state.frame_step(action)

# Experience replay storage.
replay.append((state, action, reward, new_state))

# 当它结束观察游戏和建立经验回放时，会开始训练采样记忆experience replaying，得到训练值
# If we're done observing, start training.
if t > observe:

# If we've stored enough in our buffer, pop the oldest.
if len(replay) > buffer:
replay.pop(0)

# Randomly sample our experience replay memory
minibatch = random.sample(replay, batchSize)

# Get training values.
X_train, y_train = process_minibatch(minibatch, model)

# 然后训练神经网络模型
# Train the model on this batch.
history = LossHistory()
model.fit(
X_train, y_train, batch_size=batchSize,
nb_epoch=1, verbose=0, callbacks=[history]
)
loss_log.append(history.losses)

# 然后更新状态
# Update the starting state with S'.
state = new_state

# Decrement epsilon over time.
if epsilon > 0.1 and t > observe:
epsilon -= (1/train_frames)

# 当car dies，
# We died, so update stuff.
if reward == -500:
# Log the car's distance at this T.
data_collect.append([t, car_distance])

# Update max.
if car_distance > max_car_distance:
max_car_distance = car_distance

# Time it.
tot_time = timeit.default_timer() - start_time
fps = car_distance / tot_time

# Output some stuff so we can watch.
print("Max: %d at %d\tepsilon %f\t(%d)\t%f fps" %
(max_car_distance, t, epsilon, car_distance, fps))

# 记录距离，重启
# Reset.
car_distance = 0
start_time = timeit.default_timer()

# 每25000 frames保存一下模型和weights
# Save the model every 25,000 frames.
if t % 25000 == 0:
model.save_weights('saved-models/' + filename + '-' +
str(t) + '.h5',
overwrite=True)
print("Saving model %s - %d" % (filename, t))

# Log results after we're done all frames.
log_results(filename, data_collect, loss_log)

The code in the video is here:
https://github.com/llSourcell/Self-Dr…

Paper 1: Long term Planning for Short Term Prediction
[http://arxiv.org/pdf/1602.01580v1.pdf(http://arxiv.org/pdf/1602.01580v1.pdf)

Paper 2: End-to-End Learning for Self-Driving Cars
[https://arxiv.org/pdf/1604.07316v1.pdf(https://arxiv.org/pdf/1604.07316v1.pdf)

More on Reinforcement Learning:
http://www2.hawaii.edu/~chenx/ics699r…
https://www.quora.com/Artificial-Inte…
http://www2.econ.iastate.edu/tesfatsi…

#### 无人驾驶学习笔记

2016-12-17 23:42:48

#### 只有Python基础竟成为无人驾驶工程师，她是怎么做到的？

2017-10-31 00:00:00

#### 【自动驾驶】如何利用深度学习搭建一个最简单的无人驾驶系统

2017-11-02 00:00:00

#### 无人驾驶车核心算法 — SLAM

2016-09-23 15:10:32

#### 用树莓派制作一辆无人驾驶小车——前序

2018-03-27 20:23:31

#### 无人驾驶实战课（Python+机器学习+无人驾驶50讲）

2018年05月24日 18:11

#### TensorFlow学习笔记(一)

2017-05-12 23:15:49

#### NVIDIA 自动驾驶算法的keras（TensorFlow）实现

2018-02-17 17:25:34

#### tensorflow+入门笔记︱基本张量tensor理解与tensorflow运行结构

2017-01-22 11:57:34

#### 深度学习数据库

2017-01-03 12:00:26