进货问题:存一个货需要h,进货要k的总运费,每个货物要c,每卖一个货物得到p。买家的需求按照泊松分布lamb。
采用V*迭代求解最优策略。不过经测试不像最优,但起码收敛并且表现不错,策略的进货数也和lamb呼应,存货越多,进货越少,有合理性。纯练手作品。
import Poisson
import copy
Max=20
A=[0]*21
P=[]
c=5;k=2;h=2;p=8
V=[]
lamb=5
def carUpdate():
pass
def reward(x,a):
global c,k,h,p,Max
new_s=min((x+a),Max)
tmp=0
pro=1
for j in range(new_s):
tmp=tmp+Poisson.poisson(new_s-j,lamb)*(new_s-j)*p
pro=pro-Poisson.poisson(new_s-j,lamb)
tmp=tmp+pro*(new_s-0)*p
r=-k*min(a,1)-c*a-h*x+tmp
return r
def valueEva():
global Max,V,A
V_n=[]
for i in range(21):
value_act=[]
for k in range(21):
tmp=0
new_s=min((i+k),Max)
pro=1
for j in range(new_s):
tmp=tmp+Poisson.poisson(new_s-j,lamb)*(0.9*V[j])
pro=pro-Poisson.poisson(new_s-j,lamb)
tmp=tmp+(pro-Poisson.poisson(0,lamb))*(0.9*V[0])+Poisson.poisson(0,lamb)*(0.9*V[new_s])+reward(i,k)
value_act.append(tmp)
V_n.append(max(value_act))
A[i]=value_act.index(max(value_act))
V=V_n
#print V
return V
def valueConv():
global V
lim=1
for k in range(30):
V_l=[]
V_n=copy.copy(V)
valueEva()
for i in range(21):
V_l.append(abs(V[i]-V_n[i]))
if max(V_l)<lim:
print k
break
return
#see how it works
def policy(A):
global Max,V,c,k,h,p
car=15
sumr=0
for i in range(1000):
#car=15
old_car=car
buy=Poisson.Poisson(Max,lamb)
r=-k*min(A[car],1)-c*A[car]-h*car+p*min(buy,min(Max,car+A[car]))
#print k*min(A[car],1),c*A[car],h*car,p*min(buy,min(Max,car+A[car]))
car=max(min(Max,car+A[car])-buy,0)
#print old_car,buy,A[old_car],r,111
sumr=sumr+r
return sumr
#initialize policy,value
for i in range(21):
P.append(0)
V.append(0)
valueConv()
print policy(A)
A1=[]
A2=[]
A3=[]
for i in range(21):
A1.append(A[i]+1)
A2.append(A[i]+2)
A3.append(A[i]+3)
print policy(A1)
print policy(A2)
print policy(A3)
#print V
#print A
A是策略,V是估值。A的选择基于上面公式中使当前存货数估值最大的行为。