100次独立模拟100次循环的数据(算力有限)
# -*- coding: utf-8 -*-
"""
Created on Fri May 1 14:47:35 2020
@author: Ziz
"""
import numpy as np
import random
import time
import matplotlib.pyplot as plt
import pickle
# fr = open('dataFile.txt','wb')
# pickle.dumps([x,y],fr,-1)
# fr.close()
# fr = open('dataFile.txt','rb')
# pickle.load(fr)
# fr.close()
x = []
y = []
inner_loop = 5
outer_loop = 5
for loop in range(1,51):
start_time =time.time()
train_steps = loop*20
average_expect_out = 0
for loop in range(outer_loop):
q_values = np.zeros((10,1))
for i,c in enumerate(q_values):
q_values[i]=np.random.randn()
def get_Rt(a):
return np.random.randn()+q_values[a]
alpha = 0.1
average_expect=0
for n in range(inner_loop):
q_n = np.zeros((10,1))
Q = np.zeros((10,1))
total_expect = 0
for i in range(train_steps):
if(i==0) :
a = np.random.randint(0,10)
else:
if(np.random.rand()<0.1):
a = np.random.randint(0,10)
else:
a = np.where(Q==np.max(Q))
a = a[0][0]
R_n = get_Rt(a)
total_expect += R_n
# q_n[a]+=1
Q[a] = Q[a] + (R_n-Q[a])*alpha #/q_n[a]
total_expect/=train_steps
average_expect += total_expect
average_expect/=inner_loop
average_expect_out+=average_expect
average_expect_out/=outer_loop
x.append(train_steps)
y.append(average_expect_out)
end_time = time.time()
print(x)
print(y)
print('time consume = ',end_time-start_time)
plt.plot(x,y)