POTENTIAL潜力值是由一组与地点适应性因子相关的系数计算得到的,表达的是某个地块被开发的可能性。这些系数通过R语言中的Ime4包进行多级逻辑回归得到,系数随着区域的变化而变化,最佳的模型由MuMln包的dredge函数自动选择得到。
模块r.futures.potential 有两种运行模式,如果没有-d标志,它将使用所有给定的预测器来构建模型,而如果使用-d标志,它评估所有预测器的不同组合,并根据AIC选择最佳组合。
-
输入数据
-
输出数据
输出文件的格式是CSV文件(以制表符作为分隔符)。表头包含因子的名称,第一列是表示子区域的标识符。列的顺序很重要,第二列表示截距,第三列表示开发压力,然后是预测因子。因此,开发压力列必须指定为选项列中的第一列。
-
源码解析
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
##############################################################################
#
# MODULE: r.futures.potential
#
# AUTHOR(S): Anna Petrasova (kratochanna gmail.com)
#
# PURPOSE: FUTURES Potential submodel
#
# COPYRIGHT: (C) 2016 by the GRASS Development Team
#
# This program is free software under the GNU General Public
# License (>=v2). Read the file COPYING that comes with GRASS
# for details.
#
##############################################################################
#%module
#% description: Module for computing development potential as input to r.futures.pga
#% keyword: raster
#% keyword: statistics
#%end
#%option G_OPT_V_INPUT
#%end
#%option G_OPT_F_OUTPUT
#% description: Output Potential file
#%end
#%option G_OPT_DB_COLUMNS
#% description: Names of attribute columns representing sampled predictors
#% required: yes
#%end
#%option G_OPT_DB_COLUMN
#% key: developed_column
#% description: Name of attribute column representing development
#% required: yes
#%end
#%option G_OPT_DB_COLUMN
#% key: subregions_column
#% description: Name of attribute column representing subregions
#% required: yes
#%end
#%option
#% type: integer
#% key: min_variables
#% description: Minimum number of predictors considered
#% required: no
#% answer: 1
#% options: 1-20
#%end
#%option
#% type: integer
#% key: max_variables
#% description: Maximum number of predictors considered
#% required: no
#% options: 1-20
#%end
#%flag
#% key: d
#% description: Use dredge fuction to find best model
#%end
import sys
import atexit
import subprocess
import grass.script as gscript
#R 语言脚本
rscript = """
# load required libraries
for (package in c("MuMIn", "lme4", "optparse")) {
if (!(is.element(package, installed.packages()[,1]) ))
stop(paste("Package", package, " not found"))
}
suppressPackageStartupMessages(library(MuMIn))
suppressPackageStartupMessages(library(lme4))
suppressPackageStartupMessages(library(optparse))
option_list = list(
make_option(c("-i","--input"), action="store", default=NA, type='character', help="input CSV file"),
make_option(c("-o","--output"), action="store", default=NA, type='character', help="output CSV file"),
make_option(c("-l","--level"), action="store", default=NA, type='character', help="level variable name"),
make_option(c("-r","--response"), action="store", default=NA, type='character', help="binary response variable name"),
make_option(c("-d","--usedredge"), action="store", default=NA, type='logical', help="use dredge to find best model"),
make_option(c("-m","--minimum"), action="store", default=NA, type='integer', help="minimum number of variables for dredge"),
make_option(c("-x","--maximum"), action="store", default=NA, type='integer', help="maximum number of variables for dredge")
)
opt = parse_args(OptionParser(option_list=option_list))
# 模型调用
# import data
input_data = read.csv(opt$input)
# create global model with all variables
predictors <- names(input_data)
if (!is.na(opt$level)) {
predictors <- predictors[predictors != opt$level]
}
predictors <- predictors[predictors != opt$response]
if (is.na(opt$level)) {
fmla <- as.formula(paste(opt$response, " ~ ", paste(c(predictors), collapse= "+")))
model = glm(formula=fmla, family = binomial, data=input_data, na.action = "na.fail")
} else {
interc <- paste("(1|", opt$level, ")")
fmla <- as.formula(paste(opt$response, " ~ ", paste(c(predictors, interc), collapse= "+")))
model = glmer(formula=fmla, family = binomial, data=input_data, na.action = "na.fail")
}
if(opt$usedredge) {
#create all possible models, always include county as the level
if (is.na(opt$level)) {
select.model <- dredge(model, evaluate=TRUE, rank="AIC", m.lim=c(opt$minimum, opt$maximum), trace=FALSE)
} else {
select.model <- dredge(model, evaluate=TRUE, rank="AIC", fixed=~(1|opt$level), m.lim=c(opt$minimum, opt$maximum), trace=FALSE)
}
# save the best model
model.best <- get.models(select.model, 1)
if (is.na(opt$level)) {
model = glm(formula(model.best[[1]]), family = binomial, data=input_data, na.action = "na.fail")
} else {
model = glmer(formula(model.best[[1]]), family = binomial, data=input_data, na.action = "na.fail")
}
}
print(summary(model))
if (is.na(opt$level)) {
coefs <- t(as.data.frame(coef(model)))
} else {
coefs <- as.data.frame(coef(model)[[1]])
}
write.table(cbind(rownames(coefs), coefs), opt$output, row.names=FALSE, sep="\t")
"""
TMP_CSV = None
TMP_POT = None
TMP_RSCRIPT = None
def cleanup():
gscript.try_remove(TMP_CSV)
gscript.try_remove(TMP_POT)
gscript.try_remove(TMP_RSCRIPT)
def main():
vinput = options['input']
columns = options['columns'].split(',')
binary = options['developed_column']
level = options['subregions_column']
minim = int(options['min_variables'])
dredge = flags['d']
# 相关因子的数量判断
if options['max_variables']:
maxv = (options['max_variables'])
else:
maxv = len(columns)
if dredge and minim > maxv:
gscript.fatal(_("Minimum number of predictor variables is larger than maximum number"))
global TMP_CSV, TMP_RSCRIPT, TMP_POT
# 临时csv文件
TMP_CSV = gscript.tempfile(create=False) + '.csv'
#
TMP_RSCRIPT = gscript.tempfile()
include_level = True
#
distinct = gscript.read_command('v.db.select', flags='c', map=vinput,
columns="distinct {l}".format(l=level)).strip()
#组织输入文件
if len(distinct.splitlines()) <= 1:
include_level = False
single_level = distinct.splitlines()[0]
with open(TMP_RSCRIPT, 'w') as f:
f.write(rscript)
TMP_POT = gscript.tempfile(create=False) + '_potential.csv'
columns += [binary]
if include_level:
columns += [level]
where = "{c} IS NOT NULL".format(c=columns[0])
for c in columns[1:]:
where += " AND {c} IS NOT NULL".format(c=c)
gscript.run_command('v.db.select', map=vinput, columns=columns, separator='comma', where=where, file=TMP_CSV)
if dredge:
gscript.info(_("Running automatic model selection ..."))
else:
gscript.info(_("Computing model..."))
#调用R包
cmd = ['Rscript', TMP_RSCRIPT, '-i', TMP_CSV, '-r', binary,
'-m', str(minim), '-x', str(maxv), '-o', TMP_POT, '-d', 'TRUE' if dredge else 'FALSE']
if include_level:
cmd += [ '-l', level]
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
print(stderr)
if p.returncode != 0:
print(stderr)
gscript.fatal(_("Running R script failed, check messages above"))
gscript.info(_("Best model summary:"))
gscript.info("-------------------------")
print(stdout)
# 输出潜力文件
with open(TMP_POT, 'r') as fin, open(options['output'], 'w') as fout:
i = 0
for line in fin.readlines():
row = line.strip().split('\t')
row = [each.strip('"') for each in row]
if i == 0:
row[0] = "ID"
row[1] = "Intercept"
if i == 1 and not include_level:
row[0] = single_level
fout.write('\t'.join(row))
fout.write('\n')
i += 1
if __name__ == "__main__":
options, flags = gscript.parser()
atexit.register(cleanup)
sys.exit(main())