bootstrap是统计学中非常经典的一种重采样方法(resampling)。一般用于样本空间有限、变异系数估计较大的场合。土壤环境质量调查过程中很容易因为土壤的各向异性导致采集到的样品不具备代表性,因此需要用对应的工具进行计算。核心代码如下,改进自matlab自带的bootstrap。
function [bootstat, bootsam] = bootstrp(nboot,bootfun,varargin)
% 本代码为采用拔靴法(bootstrapping)进行土壤调查采样点位数据处理的核心函数
% 代码。
% 检查输入参数是否溢出
if nargin<2
error(message('stats:bootstrp:TooFewInputs'));
end
if nboot<=0 || nboot~=round(nboot)
error(message('stats:bootstrp:BadNboot'))
end
% 使用extractNameValuePairs函数提取非数值型参数
[weights, options, bootargs] = extractNameValuePairs(varargin{:});
% 传递必须形参
[n,booteval] = bootEvalCommand(bootfun,bootargs{:});
% 传递可选形参
[useParallel, RNGscheme, poolsz] = ...
internal.stats.parallel.processParallelAndStreamOptions(options,true);
usePool = useParallel && poolsz>0;
% 函数功能初始化
[myrand,randargs] = defineRNGcall(RNGscheme, usePool, n, weights);
% 开始函数迭代
% 根据形参进行并行化计算迭代(n=>2情况下适用)
haveallsamples = (nargout>=2);
% 对于bootfun函数没有给定形参情况下,自主生成抽样矩阵
% 根据需要产生重整化的全零矩阵
if isempty(bootfun)
bootstat = zeros(nboot,0);
if haveallsamples
bootsam = internal.stats.parallel.smartForSliceout( ...
nboot, @loopBodyEmptyBootfun, useParallel, RNGscheme);
end
return
end
% 抽样
try
% 获取每次抽样结果
bootstat = feval(bootfun,bootargs{:});
bootstat = bootstat(:)';
catch ME
m = message('stats:bootstrp:BadBootFun');
MEboot = MException(m.Identifier,'%s',getString(m));
ME = addCause(ME,MEboot);
rethrow(ME);
end
% 初始化抽样结果
% 计算抽样并保留前一次迭代结果
bootstat(nboot,1:numel(bootstat)) = bootstat;
% 迭代n次
if haveallsamples
[bootstat,bootsam] = internal.stats.parallel.smartForSliceout( ...
nboot, @loopBody, useParallel, RNGscheme);
else
bootstat = internal.stats.parallel.smartForSliceout( ...
nboot, @loopBody, useParallel, RNGscheme);
end
% 网格化函数
function onesample = loopBodyEmptyBootfun(~,~)
onesample = myrand(randargs{:});
end
function [onebootstat,onebootsam] = loopBody(~,~)
onesample = myrand(randargs{:});
tmp = booteval(onesample);
onebootstat = (tmp(:))';
if nargout > 1
onebootsam = onesample;
end
end
end
function [myrand,randargs] = defineRNGcall(RNGscheme,usePool,n,weights)
uuid = RNGscheme.uuid;
streams = RNGscheme.streams;
useSubstreams = RNGscheme.useSubstreams;
if isempty(streams)
if isempty(weights)
myrand = @randi;
randargs = {n,n,1};
else
myrand = @randsample;
randargs = {n,n,true,weights};
end
else
% 计算过程以文本流形式输出
S = streams{1};
if isempty(weights)
myrand = @randi;
if ~usePool || useSubstreams
randargs = {S,n,n,1};
else
myrand = @(streams,useSubstreams,usePool) ...
randi(internal.stats.parallel.workerGetValue(uuid),n,n,1);
randargs = {streams,useSubstreams,usePool};
end
else
if ~usePool || useSubstreams
myrand = @randsample;
randargs = {S,n,n,true,weights};
else
myrand = @(streams,useSubstreams,usePool) ...
randsample(internal.stats.parallel.workerGetValue(uuid),n,n,true,weights);
randargs = {streams,useSubstreams,usePool};
end
end
end
end
function tmp = generalEval(onesample,bootfun,la,scalard,varargin)
db = cell(la,1);
for k = 1:la
if scalard(k) == 0
db{k} = varargin{k}(onesample,:);
else
db{k} = varargin{k};
end
end
tmp = feval(bootfun,db{:});
end
function [n,anonfun] = bootEvalCommand(bootfun,varargin)
la = length(varargin);
if la == 0
error(message('stats:bootstrp:NotEnoughBootfunArgs'));
end
scalard = zeros(la,1);
n = 1;
for k = 1:la
[row,col] = size(varargin{k});
if max(row,col) == 1
scalard(k) = 1;
end
if row == 1 && col ~= 1
row = col;
varargin{k} = varargin{k}(:);
end
if n>1 && row>1 && row~=n
error(message('stats:bootstrp:SizeMismatchBootfunArgs'));
end
n = max(n,row);
end
if n<2
error(message('stats:bootstrp:OnlyScalarBootfunArgs'));
end
if la==1 && ~any(scalard)
X1 = varargin{1};
anonfun = @(onesample) feval(bootfun,X1(onesample,:));
elseif la==2 && ~any(scalard)
X1 = varargin{1};
X2 = varargin{2};
anonfun = @(onesample) feval(bootfun,X1(onesample,:),X2(onesample,:));
else
anonfun = @(onesample) generalEval(onesample,bootfun,la,scalard,varargin{:});
end
end
function [weights,options,bootargs] = extractNameValuePairs(varargin)
weights = [];
options = statset('bootstrp');
defSpecialArgs = {'weights' 'options'};
defSpecialValues = {weights options};
if length(varargin)>1
screen = @(arg) (ischar(arg) && size(arg,1)==1);
isspecial = cellfun(screen,varargin);
newOptions = [];
if any(isspecial)
firstspecial = find(isspecial,1,'first');
specialArgs = varargin(firstspecial:end);
varargin = varargin(1:firstspecial-1);
[weights,newOptions] ...
= internal.stats.parseArgs(defSpecialArgs,defSpecialValues,specialArgs{:});
end
if ~isempty(newOptions)
if ~isstruct(newOptions)
error(message('stats:bootstrp:BadOption'))
end
try
options = statset(options, newOptions);
catch ME
m = message('stats:bootstrp:BadOption');
newME = MException(m.Identifier,'%s',getString(m));
newME = addCause(newME,ME);
throw(newME);
end
end
end
bootargs = varargin;
end