python 动态规划供应链_动态经济学的python实现|（一）动态规划问题

最新推荐文章于 2025-03-24 11:52:57 发布

LKEG

最新推荐文章于 2025-03-24 11:52:57 发布

阅读量782

点赞数

文章标签： python 动态规划供应链

本文链接：https://blog.csdn.net/weixin_33345090/article/details/112931575

版权

本文介绍了使用Python解决确定性和不确定性动态规划问题，以蛋糕消费模型为例，展示如何通过迭代法求解贝尔曼方程。讨论了CRRA效用函数，并提供了代码实现，包括确定性问题的值函数和策略函数迭代，以及不确定性问题的二维转移矩阵求解。文章最后给出了小练习，引导读者进一步探索动态经济学中的投资问题。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

一、确定性的动态规划问题

考虑这样一个简单的吃蛋糕的问题：(此模型其实可以看作是RCK最优增长模型的家庭效用最大化的决策问题) 你有一个蛋糕大小是

,每期消费量

,当期蛋糕剩余量为

,蛋糕效用的“折现率”为

,蛋糕每期的持有回报率为

,现在考虑吃蛋糕的人的效用最大化问题，为使问题更加具体化，不妨设效用函数

(CRRA效用函数),考虑如下一个最优化问题：

求其贝尔曼方程为：

,过程如下：定义其值函数

：

则原问题转换为对贝尔曼方程的动态规划(本题特指无限期确定性动态规划)问题：

程序主要通过迭代法求近似解(数理方法亦可解出准确解：值函数：

,策略函数:

)

%matplotlib inline

from __future__ import division

from scipy.interpolate import InterpolatedUnivariateSpline #一维插值函数

from scipy.optimize import fminbound #求其最小值的函数，返回其最小值点

import matplotlib.pyplot as plt #绘图

import numpy as np

class CakeProblem(object):

def __init__(self, beta=0.96, r = 1.0, gamma = 1.5,

grid_max=2.5, grid_min = 1e-3, grid_size=120):#grid_max代表蛋糕的尺寸,size代表点集的数量

self.r, self.beta, self.gamma = r, beta, gamma #变量初始化，具体说明见题意

if gamma != 1.0:

self.u = lambda c: (c**(1 - gamma))/(1 - gamma)

else:

self.u = np.log

self.gamma = 1

self.grid = np.linspace(grid_min, grid_max, grid_size)#生成插值点

def bellman_operator(self, w):#定义贝尔曼方程，用于后面的迭代求解

Vx = InterpolatedUnivariateSpline(self.grid, w)#对W进行关于grid的一维插值

Tw = np.empty(len(w))

c = np.empty(len(w))

for i, x in enumerate(self.grid):

value = lambda c: - self.u(c) - self.beta * Vx(self.r*(x - c))#定义贝尔曼方程

c_star = fminbound(value, 1e-6, x-1e-6)#最优化

Tw[i] = - value(c_star)

c[i] = c_star

return Tw,c #返回值包含两个，一个是迭代得到的的值的集合，一个是对应的最优c的值

cp = CakeProblem()

v_zero = np.zeros(len(cp.grid)) #初始化v的值

v_one = cp.bellman_operator(v_zero)[0] #第一次迭代

v_two = cp.bellman_operator(v_one)[0] #第二次迭代

v_three = cp.bellman_operator(v_two)[0]#第三次迭代

v_four = cp.bellman_operator(v_three)[0]#第四次迭代

fig, ax = plt.subplots(figsize =(8,6))

ax.plot(cp.grid, v_zero, 'y-', lw = 2, alpha = .8, label='Initial Guess')

ax.plot(cp.grid, v_one, 'g-', lw = 2, alpha = .8, label='First ')

ax.plot(cp.grid, v_two, 'r-', lw = 2, alpha = .8, label='Second ')

ax.plot(cp.grid, v_three, 'c-', lw = 2, alpha = .8, label='Third ')

ax.plot(cp.grid, v_four, 'b-', lw = 2, alpha = .8, label='Fourth ')

ax.legend()#loc = 'lower right', fontsize='x-large')

ax.set_ylabel('$V(x)$', fontsize=12)

ax.set_xlabel('$x$', fontsize=12)

t = r'$Nonstochastic \ DP:\beta = {0:.2f}$'.format(cp.beta)

ax.set_title(t)

plt.show()

def compute_fixed_point(T, v, error_tol=1e-3, max_iter=500,#进行迭代的函数，T代表传入的值函数(贝尔曼方程)，v代表上次迭代的值，error_tol代表误差范围，max_iter代表最大迭代次数，

print_skip=25,verbose=True):#print_skip代表打印结果的间隔，verbose代表是否打印结果

i = 0

error = error_tol + 1

while i < max_iter and error > error_tol:

new_v = T(v)[0]

i += 1

error = np.max(np.abs(new_v - v))

if verbose and i % print_skip == 0:

print(f"第 {i}步收敛结果是 {error}.")

v = new_v

if i == max_iter:

print("收敛失败")

if verbose and i < max_iter:

print(f"\n在第 {i} 步收敛")

return v

v = np.zeros(len(cp.grid))

v_star = compute_fixed_point(cp.bellman_operator, v, max_iter = 500)#对值函数进行迭代第 25步收敛结果是 32.61290746816758.

第 50步收敛结果是 11.9808393483504.

第 75步收敛结果是 4.322996516406192.

第 100步收敛结果是 1.5584699088216212.

第 125步收敛结果是 0.5616670706775722.

第 150步收敛结果是 0.20242254985498676.

第 175步收敛结果是 0.07295252479207193.

第 200步收敛结果是 0.026146320327711692.

第 225步收敛结果是 0.009306197514433734.

第 250步收敛结果是 0.0033539231632744304.

第 275步收敛结果是 0.0012087429016105489.

在第 280 步收敛

fig, ax = plt.subplots()

ax.plot(cp.grid, v_star, label='Approximate value function')#由于是迭代得到的结果，因此结果不能认为使精确的

ax.set_ylabel('$V(W_{t})$', fontsize=12)

ax.set_xlabel('$W_{t}$', fontsize=12)

ax.set_title('Value function')

ax.legend()

plt.show()

#由值函数迭代的最优解，代入贝尔曼方程求其策略

fig, ax = plt.subplots()

c_star = cp.bellman_operator(v_star)[1]

def piecewise_linear(x, k):

# x

# x>=x0 ⇒ lambda x: k2*x + y0 - k2*x0

return np.piecewise(x, [x < x0, x >= x0], [lambda x:k1*x + y0-k1*x0,

lambda x:k2*x + y0-k2*x0])

ax.plot(cp.grid, c_star, label='Approximate policy function')

ax.set_ylabel(r'$c_{t}(W_{t})$')

ax.set_xlabel('$W_{t}$')

ax.legend()

ax.set_title('Policy function')

plt.show()

小练习：

效用函数

(CRRA效用函数),在t期，

表示资本存量，

表示产出，

表示消费量，

表示投资，

表示资本的损失率，其中

考虑如下一个最优化问题：

(提示：求其贝尔曼方程为：

,等价于

，对其进行迭代求解即可。)

二、不确定性的动态规划问题

在简单问题的基础之上，我们考虑taste shock,这就是一个不确定性的问题，具体定义可以见教科书，其实只是简单问题的推广，原先的一维转移方程变成了二维的转移矩阵。在此给出贝尔曼方程：

作代换

得：

则可以进行迭代法求解了，以书上给的样例作如下求解

%matplotlib inline

from __future__ import division

from scipy.interpolate import InterpolatedUnivariateSpline

from scipy.optimize import fminbound

import matplotlib.pyplot as plt

import numpy as np

class CakeProblem(object):

def __init__(self, beta=0.95, r = 1/0.95, gamma = 2, shocks = [.9, 1.1],

transition = [[.3,.7],[.5,.5]], deterministic=False, grid_max=2,

grid_min = .4, grid_size=150):

self.r, self.beta, self.gamma = r, beta, gamma

self.shocks = shocks

self.transition = np.asarray(transition)

self.deterministic = deterministic

if deterministic == True:#确定性

self.transition = np.asarray([[1]])

self.shocks = np.asarray([0])

if self.gamma != 1:

self.u = lambda c: (c**(1 - gamma))/(1 - gamma)

else:

self.u = np.log

self.gamma = 1

self.grid = np.linspace(grid_min, grid_max, grid_size)

def bellman_operator(self, w):# 定义贝尔曼方程，输出函数和最优值

Aw = [InterpolatedUnivariateSpline(self.grid, w[i]) for i in range(len(self.shocks))]

Awx = lambda y : [Aw[i](y[i]) for i in range(len(y))]

Tw = np.asarray([np.empty(len(w[0]))]*len(self.shocks))

c = np.asarray([np.empty(len(w[0]))]*len(self.shocks))

for i,x in enumerate(self.grid):

for j in range(len(self.shocks)):

objective = lambda c: - self.u(c) - self.beta*(np.dot(self.transition[j],Awx(np.add(self.r*(x - c), self.shocks))))

c_star = fminbound(objective, 1e-6, x-1e-6)

Tw[j][i] = - objective(c_star)

c[j][i] = c_star

return Tw,c

def compute_fixed_point(T, v, error_tol=1e-3, max_iter=500,

print_skip=25,verbose=True):#迭代计算

i = 0

error = error_tol + 1

while i < max_iter and error > error_tol:

new_v = T(v)[0]

i += 1

error = np.max(np.abs(new_v - v))

if verbose and i % print_skip == 0:

print(f"第 {i}步收敛结果是 {error}.")

v = new_v

if i == max_iter:

print("收敛失败")

if verbose and i < max_iter:

print(f"\n在第 {i} 步收敛")

return v

scp2 = CakeProblem()

v = np.asarray([np.zeros(scp2.grid.size)]*len(scp2.shocks))

v_star = compute_fixed_point(scp2.bellman_operator, v, max_iter = 500, verbose = 1)第 25步收敛结果是 0.28708564466581876.

第 50步收敛结果是 0.07955836901511759.

第 75步收敛结果是 0.022055027459529697.

第 100步收敛结果是 0.006114529179431116.

第 125步收敛结果是 0.0016951696693112694.

在第 136 步收敛

fig = plt.figure(figsize=(16,6))

ax1 = plt.subplot(121)

ax2 = plt.subplot(122)

labels = [r'Shock {0:.2g}'.format(scp2.shocks[k]) for k in range(len(scp2.shocks))]

ax1.plot(scp2.grid, v_star[0], label=labels[0])

ax1.plot(scp2.grid, v_star[1], label=labels[1])

ax1.set_ylabel('$V(W_{t})$', fontsize=12)

ax1.set_xlabel('$W_{t}$', fontsize=12)

ax1.set_title('Value function')

ax1.legend()

c_star = scp2.bellman_operator(v_star)[1]

ax2.plot(scp2.grid, c_star[0], label=labels[0])

ax2.plot(scp2.grid, c_star[1], label=labels[0])

ax2.set_ylabel(r'$c_{t}(W_{t})$')

ax2.set_xlabel('$W_{t}$')

ax2.legend()

ax2.set_title('Policy function')

plt.show()

小练习：

参考Dynamic Economics一书中8.4.4的问题，求带有Borrowing Restriction的投资问题：值函数如下：

参考文献：Dynamic economics: quantitative methods and applications J Adda, R Cooper, RW Cooper - 2003

python 动态规划 供应链_动态经济学的python实现|（一）动态规划问题

python 动态规划供应链_动态经济学的python实现|（一）动态规划问题