function [p_opt, fval] = dynprog(x,DecisFun, SubObjFun, TransFun, ObjFun)
% 动态规划逆序算法
% x是状态变量,一列代表一个阶段状态;
% M-函数DecisFun(k,x)由阶段k的状态变量x求出相应的允许决策变量;
% M-函数SubObjFun(k,x,u)是阶段指标函数;
% M-函数TransFun(k,x,u)是状态转移函数,其中x是阶段k的某状态变量,u是相应的决策变量;
% M-函数ObjFun(v,f)是第k阶段至最后阶段指标函数,当ObjFun(v,f) = v+f时,输入的ObjFun可以省略;
% fval是一个列向量,各元素分别表示p_opt各最优策略组对应始端状态x的最优函数值
k =length(x(1,:));
x_isnan = ~isnan(x);
t_vub = inf;
t_vubm = inf*ones(size(x));
f_opt = nan*ones(size(x));
d_opt =f_opt;
%计算终端(最后一阶段)相关值
tmp1 = find(x_isnan(:,k));
tmp2 = length(tmp1);
for i = 1:tmp2
u = feval(DecisFun, k,x(i,k));
tmp3 = length(u);
for j = 1:tmp3
tmp = feval(SubObjFun,k,x(tmp1(i),k),u(j));
if tmp <= t_vub
f_opt(i,k) = tmp;
d_opt(i,k) = u(j);
t_vub =tmp;
end
end
end
for ii = k-1:-1:1
tmp10 = find(x_isnan(:,ii));
tmp20 = length(tmp10);
for i = 1:tmp20
u = feval(DecisFun,ii,x(i,ii));
tmp30 = length(u);
for j = 1:tmp30
tmp00 = feval(SubObjFun, ii, x(tmp10(i),ii),u(j));
tmp40 = feval(TransFun,ii,x(tmp10(i),ii),u(j));
tmp50 = x(:,ii+1) -tmp40;
tmp60 = find(tmp50 ==0);
if ~isempty(tmp60)
if nargin < 5
tmp00 =tmp00 + f_opt(tmp60(1),ii+1);
else
tmp00 = feval(ObjFun,tmp00,f_opt(tmp60(1),ii+1));
end
if tmp00 <= t_vubm(i,ii)
f_opt(i,ii) = tmp00;
d_opt(i,ii) = u(j);
t_vubm(i,ii) = tmp00;
end
end
end
end
end
fval = f_opt(tmp1,1);
fval = fval(find(~isnan(fval)),1);
%记录最优决策、最优轨迹线和相应指标函数值。
p_opt = [];
tmpx =[];
tmpd =[];
tmpf =[];
tmp0 = find(x_isnan(:,1));
tmp01 = length(tmp0);
% disp(tmp0);
for i = 1:tmp01
tmpd(i) = d_opt(tmp0(i),1);
tmpx(i) = x(tmp0(i),1);
tmpf(i) = feval(SubObjFun,1,tmpx(i),tmpd(i));
p_opt(k*(i-1) +1 ,[1,2,3,4]) = [1,tmpx(i),tmpd(i),tmpf(i)];
for ii = 2:k
tmpx(i) = feval(TransFun, ii-1,tmpx(i),tmpd(i));
tmp1 = x(:,ii) -tmpx(i);
tmp2 = find(tmp1==0);
if ~isempty(tmp2)
tmpd(i) = d_opt(tmp2(1),ii);
end
tmpf(i) = feval(SubObjFun,ii,tmpx(i),tmpd(i));
p_opt(k*(i-1) +ii,[1,2,3,4]) = [ii,tmpx(i),tmpd(i),tmpf(i)];
end
end