1
参数:
hidden_dim = 128
batch_size = 32
replay_buffer_size = 2160
learn_rate = 0.001
奖励函数:
if env.time_step % 24 == 0:
#print('net_electric_consumption in one day ', sum(env.net_electric_consumption[-24:-1]))
# 如果白天放电,reward_day = 0,否则为-300
if np.array(action_day[7:-1]).mean() > 2.0:
reward_day = -300
else:
reward_day = 100
# 如果夜晚充电,reward_day = 300,否则为-300
if np.array(action_day[0:7]).mean() > 2.1:
reward_night = 400
else:
reward_night = -300
reward += reward_day + reward_night
#print('reward: ', reward)
reward_epoch += reward
结果:
效果比之前的DQN差远了
2
感觉上一步曲线过拟合了,而且波动较大,减小学习率
hidden_dim = 128
batch_size = 32
replay_buffer_size = 2160
learn_rate = 0.0001
action取值在[-1/c, 1/c]之间,故平均值应为0
if env.time_step % 24 == 0:
#print('net_electric_consumption in one day ', sum(env.net_electric_consumption[-24:-1]))
# 如果白天放电,reward_day = 0,否则为-300
if np.array(action_day[7:-1]).mean() > 0.0:
reward_day = -300
else:
reward_day = 100
# 如果夜晚充电,reward_day = 300,否则为-300
if np.array(action_day[0:7]).mean() > 0.1:
reward_night = 1000
else:
reward_night = -300
reward += reward_day + reward_night
结果
显然,reward曲线与cost曲线无关联
3
hidden_dim = 128
batch_size = 32
replay_buffer_size = 7200
learn_rate = 0.0003
仿照论文对时间作了调整
# 鼓励晚上充电,白天放电
if env.time_step % 24 == 0:
#print('net_electric_consumption in one day ', sum(env.net_electric_consumption[-24:-1]))
# 如果白天放电,reward_day = 0,否则为-300
if np.array(action_day[0:20]).mean() > 0.0:
reward_day = -300
else:
reward_day = 300
# 如果夜晚充电,reward_day = 300,否则为-300
if np.array(action_day[22:-1]).mean() > 0.1:
reward_night = 1000
elif np.array(action_day[22:-1]).mean() < 0.0:
reward_night = -500
else:
reward_night = 0
reward += reward_day + reward_night
未达到最佳结果,需增加epoch
4
仅奖励函数变化
# 鼓励晚上充电,白天放电
if env.time_step % 24 == 0:
#print('net_electric_consumption in one day ', sum(env.net_electric_consumption[-24:-1]))
# 如果白天放电,reward_day = 0,否则为-300
if np.array(action_day[0:20]).mean() > 0.0:
reward_day = -1000
else:
reward_day = 0
# 如果夜晚充电,reward_day = 300,否则为-300
if np.array(action_day[22:-1]).mean() > 0.1:
reward_night = 1000
elif np.array(action_day[22:-1]).mean() < 0.0:
reward_night = -1000
else:
reward_night = 0
reward += reward_day + reward_night
#print('reward: ', reward)
reward_epoch += reward