import chainer
import chainerrl
# Strategyfrom datetime import datetime
import backtrader
import random
# Integrate Modelimport sys
import warnings
import numpy
import pandas
warnings.filterwarnings('ignore')# Build Instance and draw single plot#為了將GUI圖形顯示在 Jupyter notebook %matplotlib inline
from mpl_toolkits import mplot3d
from mpl_toolkits.mplot3d import Axes3D
import matplotlib
matplotlib.use('Agg')import matplotlib.pyplot
obs_size =1500
n_actions =3# Instance of Value function Q from "chainerrl.q_functions.FCStateQFunctionWithDiscreteAction"
I_Am_Q_Function = chainerrl.q_functions.FCStateQFunctionWithDiscreteAction(obs_size, n_actions,n_hidden_layers=7, n_hidden_channels=512)
# Instance of training optimizer from chainer.optimizers
optimizer = chainer.optimizers.Adam(eps=1e-2)# set the oprimizer
optimizer.setup(I_Am_Q_Function)
# Set the discount factor that discounts future rewards.
gamma =0.95# Use epsilon-greedy for explorationdefIm_RandomInterger_Function(Interger_Range_Start=0,Interger_Range_End=2):return random.randint(Interger_Range_Start, Interger_Range_End)
explorer = chainerrl.explorers.ConstantEpsilonGreedy(epsilon=0.3, random_action_func=Im_RandomInterger_Function)# DQN uses Experience Replay.# Specify a replay buffer and its capacity.
replay_buffer = chainerrl.replay_buffer.ReplayBuffer(capacity=10**6)# Since observations from CartPole-v0 is numpy.float64 while# Chainer only accepts numpy.float32 by default, specify# a converter as a feature extractor function phi.
phi =lambda x: x.astype(numpy.float32, copy=False)# Now create an agent that will interact with the environment.
I_am_DQN_Agent = chainerrl.agents.DoubleDQN(
I_Am_Q_Function, optimizer, replay_buffer, gamma, explorer,
replay_start_size=500, update_interval=1,
target_update_interval=100, phi=phi)