一、确定性的动态规划问题
考虑这样一个简单的吃蛋糕的问题:(此模型其实可以看作是RCK最优增长模型的家庭效用最大化的决策问题) 你有一个蛋糕大小是
,每期消费量
,当期蛋糕剩余量为
,蛋糕效用的“折现率”为
,蛋糕每期的持有回报率为
,现在考虑吃蛋糕的人的效用最大化问题,为使问题更加具体化,不妨设效用函数
(CRRA效用函数),考虑如下一个最优化问题:
求其贝尔曼方程为:
,过程如下: 定义其值函数
:
则原问题转换为对贝尔曼方程的动态规划(本题特指无限期确定性动态规划)问题:
程序主要通过迭代法求近似解(数理方法亦可解出准确解:值函数:
,策略函数:
)
%matplotlib inline
from __future__ import division
from scipy.interpolate import InterpolatedUnivariateSpline #一维插值函数
from scipy.optimize import fminbound #求其最小值的函数,返回其最小值点
import matplotlib.pyplot as plt #绘图
import numpy as np
class CakeProblem(object):
def __init__(self, beta=0.96, r = 1.0, gamma = 1.5,
grid_max=2.5, grid_min = 1e-3, grid_size=120):#grid_max代表蛋糕的尺寸,size代表点集的数量
self.r, self.beta, self.gamma = r, beta, gamma #变量初始化,具体说明见题意
if gamma != 1.0:
self.u = lambda c: (c**(1 - gamma))/(1 - gamma)
else:
self.u = np.log
self.gamma = 1
self.grid = np.linspace(grid_min, grid_max, grid_size)#生成插值点
def bellman_operator(self, w):#定义贝尔曼方程,用于后面的迭代求解
Vx = InterpolatedUnivariateSpline(self.grid, w)#对W进行关于grid的一维插值
Tw = np.empty(len(w))
c = np.empty(len(w))
for i, x in enumerate(self.grid):
value = lambda c: - self.u(c) - self.beta * Vx(self.r*(x - c))#定义贝尔曼方程
c_star = fminbound(value, 1e-6, x-1e-6)#最优化
Tw[i] = - value(c_star)
c[i] = c_star
return Tw,c #返回值包含两个,一个是迭代得到的的值的集合,一个是对应的最优c的值
cp = CakeProblem()
v_zero = np.zeros(len(cp.grid)) #初始化v的值
v_one = cp.bellman_operator(v_zero)[0] #第一次迭代
v_two = cp.bellman_operator(v_one)[0] #第二次迭代
v_three = cp.bellman_operator(v_two)[0]#第三次迭代
v_four = cp.bellman_operator(v_three)[0]#第四次迭代
fig, ax = plt.subplots(figsize =(8,6))
ax.plot(cp.grid, v_zero, 'y-', lw = 2, alpha = .8, label='Initial Guess')
ax.plot(cp.grid, v_one, 'g-', lw = 2, alpha = .8, label='First ')
ax.plot(cp.grid, v_two, 'r-', lw = 2, alpha = .8, label='Second ')
ax.plot(cp.grid, v_three, 'c-', lw = 2, alpha = .8, label='Third ')
ax.plot(cp.grid, v_four, 'b-', lw = 2, alpha = .8, label='Fourth ')
ax.legend()#loc = 'lower right', fontsize='x-large')
ax.set_ylabel('$V(x)$', fontsize=12)
ax.set_xlabel('$x$', fontsize=12)
t = r'$Nonstochastic \ DP:\beta = {0:.2f}$'.format(cp.beta)
ax.set_title(t)
plt.show()
def compute_fixed_point(T, v, error_tol=1e-3, max_iter=500,#进行迭代的函数,T代表传入的值函数(贝尔曼方程),v代表上次迭代的值,error_tol代表误差范围,max_iter代表最大迭代次数,
print_skip=25,verbose=True):#print_skip代表打印结果的间隔,verbose代表是否打印结果
i = 0
error = error_tol + 1
while i < max_iter and error > error_tol:
new_v = T(v)[0]
i += 1
error = np.max(np.abs(new_v - v))
if verbose and i % print_skip == 0:
print(f"第 {i}步收敛结果是 {error}.")
v = new_v
if i == max_iter:
print("收敛失败")
if verbose and i < max_iter:
print(f"\n在第 {i} 步收敛")
return v
v = np.zeros(len(cp.grid))
v_star = compute_fixed_point(cp.bellman_operator, v, max_iter = 500)#对值函数进行迭代第 25步收敛结果是 32.61290746816758.
第 50步收敛结果是 11.9808393483504.
第 75步收敛结果是 4.322996516406192.
第 100步收敛结果是 1.5584699088216212.
第 125步收敛结果是 0.5616670706775722.
第 150步收敛结果是 0.20242254985498676.
第 175步收敛结果是 0.07295252479207193.
第 200步收敛结果是 0.026146320327711692.
第 225步收敛结果是 0.009306197514433734.
第 250步收敛结果是 0.0033539231632744304.
第 275步收敛结果是 0.0012087429016105489.
在第 280 步收敛
fig, ax = plt.subplots()
ax.plot(cp.grid, v_star, label='Approximate value function')#由于是迭代得到的结果,因此结果不能认为使精确的
ax.set_ylabel('$V(W_{t})$', fontsize=12)
ax.set_xlabel('$W_{t}$', fontsize=12)
ax.set_title('Value function')
ax.legend()
plt.show()
#由值函数迭代的最优解,代入贝尔曼方程求其策略
fig, ax = plt.subplots()
c_star = cp.bellman_operator(v_star)[1]
def piecewise_linear(x, k):
# x
# x>=x0 ⇒ lambda x: k2*x + y0 - k2*x0
return np.piecewise(x, [x < x0, x >= x0], [lambda x:k1*x + y0-k1*x0,
lambda x:k2*x + y0-k2*x0])
ax.plot(cp.grid, c_star, label='Approximate policy function')
ax.set_ylabel(r'$c_{t}(W_{t})$')
ax.set_xlabel('$W_{t}$')
ax.legend()
ax.set_title('Policy function')
plt.show()
小练习:
效用函数
(CRRA效用函数),在t期,
表示资本存量,
表示产出,
表示消费量,
表示投资,
表示资本的损失率,其中
考虑如下一个最优化问题:
(提示:求其贝尔曼方程为:
,等价于
,对其进行迭代求解即可。)
二、不确定性的动态规划问题
在简单问题的基础之上,我们考虑taste shock,这就是一个不确定性的问题,具体定义可以见教科书,其实只是简单问题的推广,原先的一维转移方程变成了二维的转移矩阵。在此给出贝尔曼方程:
作代换
得:
则可以进行迭代法求解了,以书上给的样例作如下求解
%matplotlib inline
from __future__ import division
from scipy.interpolate import InterpolatedUnivariateSpline
from scipy.optimize import fminbound
import matplotlib.pyplot as plt
import numpy as np
class CakeProblem(object):
def __init__(self, beta=0.95, r = 1/0.95, gamma = 2, shocks = [.9, 1.1],
transition = [[.3,.7],[.5,.5]], deterministic=False, grid_max=2,
grid_min = .4, grid_size=150):
self.r, self.beta, self.gamma = r, beta, gamma
self.shocks = shocks
self.transition = np.asarray(transition)
self.deterministic = deterministic
if deterministic == True:#确定性
self.transition = np.asarray([[1]])
self.shocks = np.asarray([0])
if self.gamma != 1:
self.u = lambda c: (c**(1 - gamma))/(1 - gamma)
else:
self.u = np.log
self.gamma = 1
self.grid = np.linspace(grid_min, grid_max, grid_size)
def bellman_operator(self, w):# 定义贝尔曼方程,输出函数和最优值
Aw = [InterpolatedUnivariateSpline(self.grid, w[i]) for i in range(len(self.shocks))]
Awx = lambda y : [Aw[i](y[i]) for i in range(len(y))]
Tw = np.asarray([np.empty(len(w[0]))]*len(self.shocks))
c = np.asarray([np.empty(len(w[0]))]*len(self.shocks))
for i,x in enumerate(self.grid):
for j in range(len(self.shocks)):
objective = lambda c: - self.u(c) - self.beta*(np.dot(self.transition[j],Awx(np.add(self.r*(x - c), self.shocks))))
c_star = fminbound(objective, 1e-6, x-1e-6)
Tw[j][i] = - objective(c_star)
c[j][i] = c_star
return Tw,c
def compute_fixed_point(T, v, error_tol=1e-3, max_iter=500,
print_skip=25,verbose=True):#迭代计算
i = 0
error = error_tol + 1
while i < max_iter and error > error_tol:
new_v = T(v)[0]
i += 1
error = np.max(np.abs(new_v - v))
if verbose and i % print_skip == 0:
print(f"第 {i}步收敛结果是 {error}.")
v = new_v
if i == max_iter:
print("收敛失败")
if verbose and i < max_iter:
print(f"\n在第 {i} 步收敛")
return v
scp2 = CakeProblem()
v = np.asarray([np.zeros(scp2.grid.size)]*len(scp2.shocks))
v_star = compute_fixed_point(scp2.bellman_operator, v, max_iter = 500, verbose = 1)第 25步收敛结果是 0.28708564466581876.
第 50步收敛结果是 0.07955836901511759.
第 75步收敛结果是 0.022055027459529697.
第 100步收敛结果是 0.006114529179431116.
第 125步收敛结果是 0.0016951696693112694.
在第 136 步收敛
fig = plt.figure(figsize=(16,6))
ax1 = plt.subplot(121)
ax2 = plt.subplot(122)
labels = [r'Shock {0:.2g}'.format(scp2.shocks[k]) for k in range(len(scp2.shocks))]
ax1.plot(scp2.grid, v_star[0], label=labels[0])
ax1.plot(scp2.grid, v_star[1], label=labels[1])
ax1.set_ylabel('$V(W_{t})$', fontsize=12)
ax1.set_xlabel('$W_{t}$', fontsize=12)
ax1.set_title('Value function')
ax1.legend()
c_star = scp2.bellman_operator(v_star)[1]
ax2.plot(scp2.grid, c_star[0], label=labels[0])
ax2.plot(scp2.grid, c_star[1], label=labels[0])
ax2.set_ylabel(r'$c_{t}(W_{t})$')
ax2.set_xlabel('$W_{t}$')
ax2.legend()
ax2.set_title('Policy function')
plt.show()
小练习:
参考Dynamic Economics一书中8.4.4的问题,求带有Borrowing Restriction的投资问题:值函数如下:
参考文献:Dynamic economics: quantitative methods and applications J Adda, R Cooper, RW Cooper - 2003