FUTURES模型 | 5. Potential 潜力子模块

最新推荐文章于 2024-10-25 11:17:32 发布

酱肉包-

最新推荐文章于 2024-10-25 11:17:32 发布

阅读量329

点赞数

分类专栏： FUTURES模型文章标签： r语言 python 开发语言

本文链接：https://blog.csdn.net/weixin_40960487/article/details/126557368

版权

FUTURES模型专栏收录该内容

9 篇文章 0 订阅

订阅专栏

POTENTIAL潜力值是由一组与地点适应性因子相关的系数计算得到的，表达的是某个地块被开发的可能性。这些系数通过R语言中的Ime4包进行多级逻辑回归得到，系数随着区域的变化而变化，最佳的模型由MuMln包的dredge函数自动选择得到。

模块r.futures.potential 有两种运行模式，如果没有-d标志，它将使用所有给定的预测器来构建模型，而如果使用-d标志，它评估所有预测器的不同组合，并根据AIC选择最佳组合。

输入数据
输出数据

输出文件的格式是CSV文件(以制表符作为分隔符)。表头包含因子的名称，第一列是表示子区域的标识符。列的顺序很重要，第二列表示截距，第三列表示开发压力，然后是预测因子。因此，开发压力列必须指定为选项列中的第一列。

源码解析

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
##############################################################################
#
# MODULE:       r.futures.potential
#
# AUTHOR(S):    Anna Petrasova (kratochanna gmail.com)
#
# PURPOSE:      FUTURES Potential submodel
#
# COPYRIGHT:    (C) 2016 by the GRASS Development Team
#
#               This program is free software under the GNU General Public
#               License (>=v2). Read the file COPYING that comes with GRASS
#               for details.
#
##############################################################################

#%module
#% description: Module for computing development potential as input to r.futures.pga
#% keyword: raster
#% keyword: statistics
#%end
#%option G_OPT_V_INPUT
#%end
#%option G_OPT_F_OUTPUT
#% description: Output Potential file
#%end
#%option G_OPT_DB_COLUMNS
#% description: Names of attribute columns representing sampled predictors
#% required: yes
#%end
#%option G_OPT_DB_COLUMN
#% key: developed_column
#% description: Name of attribute column representing development
#% required: yes
#%end
#%option G_OPT_DB_COLUMN
#% key: subregions_column
#% description: Name of attribute column representing subregions
#% required: yes
#%end
#%option
#% type: integer
#% key: min_variables
#% description: Minimum number of predictors considered
#% required: no
#% answer: 1
#% options: 1-20
#%end
#%option
#% type: integer
#% key: max_variables
#% description: Maximum number of predictors considered
#% required: no
#% options: 1-20
#%end
#%flag
#% key: d
#% description: Use dredge fuction to find best model
#%end

import sys
import atexit
import subprocess
import grass.script as gscript

#R 语言脚本
rscript = """
# load required libraries
for (package in c("MuMIn", "lme4", "optparse")) {
    if (!(is.element(package, installed.packages()[,1]) ))
        stop(paste("Package", package, " not found"))
    }
suppressPackageStartupMessages(library(MuMIn))
suppressPackageStartupMessages(library(lme4))
suppressPackageStartupMessages(library(optparse))
option_list = list(
  make_option(c("-i","--input"), action="store", default=NA, type='character', help="input CSV file"),
  make_option(c("-o","--output"), action="store", default=NA, type='character', help="output CSV file"),
  make_option(c("-l","--level"), action="store", default=NA, type='character', help="level variable name"),
  make_option(c("-r","--response"), action="store", default=NA, type='character', help="binary response variable name"),
  make_option(c("-d","--usedredge"), action="store", default=NA, type='logical', help="use dredge to find best model"),
  make_option(c("-m","--minimum"), action="store", default=NA, type='integer', help="minimum number of variables for dredge"),
  make_option(c("-x","--maximum"), action="store", default=NA, type='integer', help="maximum number of variables for dredge")
)

opt = parse_args(OptionParser(option_list=option_list))

# 模型调用
# import data
input_data = read.csv(opt$input)
# create global model with all variables
predictors <- names(input_data)
if (!is.na(opt$level)) {
    predictors <- predictors[predictors != opt$level]
}
predictors <- predictors[predictors != opt$response]

if (is.na(opt$level)) {
    fmla <- as.formula(paste(opt$response, " ~ ", paste(c(predictors), collapse= "+")))
    model = glm(formula=fmla, family = binomial, data=input_data, na.action = "na.fail")
} else {
    interc <- paste("(1|", opt$level, ")")
    fmla <- as.formula(paste(opt$response, " ~ ", paste(c(predictors, interc), collapse= "+")))
    model = glmer(formula=fmla, family = binomial, data=input_data, na.action = "na.fail")
}

if(opt$usedredge) {
    #create all possible models, always include county as the level
    if (is.na(opt$level)) {
        select.model <- dredge(model, evaluate=TRUE, rank="AIC", m.lim=c(opt$minimum, opt$maximum), trace=FALSE)
    } else {
        select.model <- dredge(model, evaluate=TRUE, rank="AIC", fixed=~(1|opt$level), m.lim=c(opt$minimum, opt$maximum), trace=FALSE)
    }
    # save the best model
    model.best <- get.models(select.model, 1)
    if (is.na(opt$level)) {
        model = glm(formula(model.best[[1]]), family = binomial, data=input_data, na.action = "na.fail")
    } else {
        model = glmer(formula(model.best[[1]]), family = binomial, data=input_data, na.action = "na.fail")
    }
}
print(summary(model))
if (is.na(opt$level)) {
    coefs <- t(as.data.frame(coef(model)))
} else {
    coefs <- as.data.frame(coef(model)[[1]])
}
write.table(cbind(rownames(coefs), coefs), opt$output, row.names=FALSE, sep="\t")
"""

TMP_CSV = None
TMP_POT = None
TMP_RSCRIPT = None


def cleanup():
    gscript.try_remove(TMP_CSV)
    gscript.try_remove(TMP_POT)
    gscript.try_remove(TMP_RSCRIPT)


def main():
    vinput = options['input']
    columns = options['columns'].split(',')
    binary = options['developed_column']
    level = options['subregions_column']
    minim = int(options['min_variables'])
    dredge = flags['d']

    # 相关因子的数量判断
    if options['max_variables']:
        maxv = (options['max_variables'])
    else:
        maxv = len(columns)
    if dredge and minim > maxv:
        gscript.fatal(_("Minimum number of predictor variables is larger than maximum number"))

    global TMP_CSV, TMP_RSCRIPT, TMP_POT
    # 临时csv文件
    TMP_CSV = gscript.tempfile(create=False) + '.csv'
    # 
    TMP_RSCRIPT = gscript.tempfile()
    include_level = True
    #
    distinct = gscript.read_command('v.db.select', flags='c', map=vinput,
                                    columns="distinct {l}".format(l=level)).strip()
    #组织输入文件
    if len(distinct.splitlines()) <= 1:
        include_level = False
        single_level = distinct.splitlines()[0]
    with open(TMP_RSCRIPT, 'w') as f:
        f.write(rscript)
    TMP_POT = gscript.tempfile(create=False) + '_potential.csv'
    columns += [binary]
    if include_level:
        columns += [level]
    where = "{c} IS NOT NULL".format(c=columns[0])
    for c in columns[1:]:
        where += " AND {c} IS NOT NULL".format(c=c)
    gscript.run_command('v.db.select', map=vinput, columns=columns, separator='comma', where=where, file=TMP_CSV)

    if dredge:
        gscript.info(_("Running automatic model selection ..."))
    else:
        gscript.info(_("Computing model..."))
    #调用R包
    cmd = ['Rscript', TMP_RSCRIPT, '-i', TMP_CSV,  '-r', binary,
               '-m', str(minim), '-x', str(maxv), '-o', TMP_POT, '-d', 'TRUE' if dredge else 'FALSE']
    if include_level:
        cmd += [ '-l', level]
    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout, stderr = p.communicate()
    print(stderr)
    if p.returncode != 0:
        print(stderr)
        gscript.fatal(_("Running R script failed, check messages above"))

    gscript.info(_("Best model summary:"))
    gscript.info("-------------------------")
    print(stdout)

    # 输出潜力文件
    with open(TMP_POT, 'r') as fin, open(options['output'], 'w') as fout:
        i = 0
        for line in fin.readlines():
            row = line.strip().split('\t')
            row = [each.strip('"') for each in row]
            if i == 0:
                row[0] = "ID"
                row[1] = "Intercept"
            if i == 1 and not include_level:
                row[0] = single_level
            fout.write('\t'.join(row))
            fout.write('\n')
            i += 1


if __name__ == "__main__":
    options, flags = gscript.parser()
    atexit.register(cleanup)
    sys.exit(main())