R语言批处理中国地面气候资料日值数据集(V3.0)
代码分需要的数据项进行
运行不了请删备注
gc()
library(data.table)
setwd("C:\\Users\\D\\Desktop\\SURF_CLI_CHN_MUL_DAY_V3.0") #工作目录
rawpath <- "datasets"#文件夹
temfiles <- list.files(path = rawpath, full.names = T, pattern = "^SURF_CLI_CHN_MUL_DAY-TEM-12001-.*.TXT")#找到形如SURF_CLI_CHN_MUL_DAY-TEM-12001-.的文件
temdata <- rbindlist(lapply(temfiles, fread))[, 1:10]#读取1-10列
setnames(temdata, c("sid", "lat", "long", "elev", "year", "month", "day", "tmean", "tmax", "tmin"))#设置属性名
data <- temdata
fwrite(setorder(data, sid, year, month, day), file = "cmd_raw.csv", quote = T)#保存为原始数据
data <- fread("cmd_raw.csv")#读取原始数据
data[tmean == 32766, tmean := NA]
data[tmax == 32766, tmax := NA]
data[tmin == 32766, tmin := NA]
data[, c("tmean", "tmax", "tmin") := list(tmean * 0.1, tmax * 0.1, tmin * 0.1)]#数据预处理
fwrite(setorder(data, sid, year, month, day), file = "cmd_norm.csv", quote = T)#保持为标准数据
sub <- read.csv( "cmd_norm.csv")
saihanba <- subset(sub,sid==56969)#筛选出sid为56969的
fwrite(setorder(saihanba, sid, year, month, day), file = "yunnanmengla_tem.csv", quote = T)#保持筛选数据
gc()
library(data.table)
setwd("C:\\Users\\D\\Desktop\\SURF_CLI_CHN_MUL_DAY_V3.0")
rawpath <- "datasets"
rhufiles <- list.files(path = rawpath, full.names = T,pattern = "^SURF_CLI_CHN_MUL_DAY-RHU-13003-.*.TXT")
rhudata <- rbindlist(lapply(rhufiles, fread))[, 1:9]
setnames(rhudata, c("sid", "lat", "long", "elev", "year", "month", "day","rhmean", "rhmin"))
data <- rhudata
fwrite(setorder(data, sid, year, month, day), file = "cmd_raw.csv", quote = T)
data <- fread("cmd_raw.csv")
data[rhmean == 32766, rhmean := NA]
data[rhmin == 32766, rhmin := NA]
data[rhmin >= 300, rhmin := rhmin - 300]
data[, c("rhmean", "rhmin") := list(as.numeric(rhmean), as.numeric(rhmin))]
fwrite(setorder(data, sid, year, month, day), file = "cmd_norm.csv", quote = T)
sub <- read.csv( "cmd_norm.csv")
saihanba <- subset(sub,sid==56969)
fwrite(setorder(saihanba, sid, year, month, day), file = "yunnanmengla_rhu.csv", quote = T)
gc()
library(data.table)
setwd("C:\\Users\\D\\Desktop\\SURF_CLI_CHN_MUL_DAY_V3.0")
rawpath <- "datasets"
winfiles <- list.files(path = rawpath, full.names = T, pattern = "^SURF_CLI_CHN_MUL_DAY-WIN-11002-.*.TXT")
windata <- rbindlist(lapply(winfiles, fread))[, 1:9]
setnames(windata, c("sid", "lat", "long", "elev", "year", "month", "day","wsmean", "wsmax"))
data <- windata
fwrite(setorder(data, sid, year, month, day), file = "cmd_raw.csv", quote = T)
data <- fread("cmd_raw.csv")
data[wsmean == 32766, wsmean := NA]
data[wsmean >= 1000, wsmean := wsmean - 1000]
data[wsmax == 32766, wsmax := NA]
data[wsmax >= 1000, wsmax := wsmax - 1000]
data[, c("wsmean", "wsmax") := list(wsmean * 0.1, wsmax * 0.1)]
fwrite(setorder(data, sid, year, month, day), file = "cmd_norm.csv", quote = T)
sub <- read.csv( "cmd_norm.csv")
saihanba <- subset(sub,sid==56969)
fwrite(setorder(saihanba, sid, year, month, day), file = "yunnanmengla_win.csv", quote = T)
gc()
library(data.table)
setwd("C:\\Users\\D\\Desktop\\SURF_CLI_CHN_MUL_DAY_V3.0")
rawpath <- "datasets"
ssdfiles <- list.files(path = rawpath, full.names = T, pattern = "^SURF_CLI_CHN_MUL_DAY-SSD-14032-.*.TXT")
ssddata <- rbindlist(lapply(ssdfiles, fread))[, 1:8]
setnames(ssddata, c("sid", "lat", "long", "elev", "year", "month", "day", "dh"))
data <- ssddata
fwrite(setorder(data, sid, year, month, day), file = "cmd_raw.csv", quote = T)
data <- fread("cmd_raw.csv")
data[dh == 32766, dh := NA]
data[, dh := dh * 0.1]
fwrite(setorder(data, sid, year, month, day), file = "cmd_norm.csv", quote = T)
sub <- read.csv( "cmd_norm.csv")
saihanba <- subset(sub,sid==56969)
fwrite(setorder(saihanba, sid, year, month, day), file = "yunnanmengla_ssd.csv", quote = T)
gc()
library(data.table)
setwd("C:\\Users\\D\\Desktop\\SURF_CLI_CHN_MUL_DAY_V3.0")
rawpath <- "datasets"
prefiles <- list.files(path = rawpath, full.names = T, pattern = "^SURF_CLI_CHN_MUL_DAY-PRE-13011-.*.TXT")
predata <- rbindlist(lapply(prefiles, fread))[, c(1:7, 10)]
setnames(predata, c("sid", "lat", "long", "elev", "year", "month", "day","pcp"))
data <- predata
fwrite(setorder(data, sid, year, month, day), file = "cmd_raw.csv", quote = T)
data <- fread("cmd_raw.csv")
data[pcp == 32766, pcp := NA]
data[pcp == 32700, pcp := 0]
data[(pcp >= 30000) & (pcp < 31000), pcp := pcp - 30000]
data[(pcp >= 31000) & (pcp < 32000), pcp := pcp - 31000]
data[(pcp >= 32000) & (pcp < 33000), pcp := pcp - 32000]
data[, pcp := pcp * 0.1]
fwrite(setorder(data, sid, year, month, day), file = "cmd_norm.csv", quote = T)
sub <- read.csv( "cmd_norm.csv")
saihanba <- subset(sub,sid==56969)
fwrite(setorder(saihanba, sid, year, month, day), file = "yunnanmengla_pre.csv", quote = T)
gc()
library(data.table)
setwd("C:\\Users\\D\\Desktop\\SURF_CLI_CHN_MUL_DAY_V3.0")
rawpath <- "datasets"
evpfiles <- list.files(path = rawpath, full.names = T, pattern = "^SURF_CLI_CHN_MUL_DAY-EVP-13240-.*.TXT")
evpdata <- rbindlist(lapply(evpfiles, fread))[, 1:9]
setnames(evpdata, c("sid", "lat", "long", "elev", "year", "month", "day","evp1", "evp2"))
data <- evpdata
fwrite(setorder(data, sid, year, month, day), file = "cmd_raw.csv", quote = T)
data <- fread("cmd_raw.csv")
data[evp1 == 32766, evp1 := NA]
data[evp1 >= 1000, evp1 := evp1 - 1000]
data[evp2 == 32766, evp2 := NA]
data[evp2 >= 1000, evp2 := evp2 - 1000]
data[, c("evp1", "evp2") := list(evp1 * 0.1, evp2 * 0.1)]
fwrite(setorder(data, sid, year, month, day), file = "cmd_norm.csv", quote = T)
sub <- read.csv( "cmd_norm.csv")
saihanba <- subset(sub,sid==56969)
fwrite(setorder(saihanba, sid, year, month, day), file = "yunnanmengla_evp.csv", quote = T)
gc()
library(data.table)
setwd("C:\\Users\\D\\Desktop\\SURF_CLI_CHN_MUL_DAY_V3.0")
rawpath <- "datasets"
prsfiles <- list.files(path =rawpath, full.names = T, pattern = "^SURF_CLI_CHN_MUL_DAY-PRS-10004-.*.TXT")
prsdata <- rbindlist(lapply(prsfiles, fread))[, 1:10]
setnames(prsdata, c("sid", "lat", "long", "elev", "year", "month", "day","pmean", "pmax", "pmin"))
data <- prsdata
fwrite(setorder(data, sid, year, month, day), file = "cmd_raw.csv", quote = T)
data <- fread("cmd_raw.csv")
data[pmean == 32766, pmean := NA]
data[pmean >= 20000, pmean := pmean - 20000]
data[pmax == 32766, pmax := NA]
data[pmax >= 20000, pmax := pmax - 20000]
data[pmin == 32766, pmin := NA]
data[pmin >= 20000, pmin := pmin - 20000]
data[, c("pmean", "pmax", "pmin") := list(pmean * 10, pmax * 10, pmin * 10)]
fwrite(setorder(data, sid, year, month, day), file = "cmd_norm.csv", quote = T)
sub <- read.csv( "cmd_norm.csv")
saihanba <- subset(sub,sid==56969)
fwrite(setorder(saihanba, sid, year, month, day), file = "yunnanmengla_prs.csv", quote = T)
gc()
library(data.table)
setwd("C:\\Users\\D\\Desktop\\SURF_CLI_CHN_MUL_DAY_V3.0")
rawpath <- "datasets"
gstfiles <- list.files(path = rawpath, full.names = T,pattern = "^SURF_CLI_CHN_MUL_DAY-GST-12030-0cm-.*.TXT")
gstdata <- rbindlist(lapply(gstfiles, fread))[, 1:10]
setnames(gstdata, c("sid", "lat", "long", "elev", "year", "month", "day","stmean", "stmax", "stmin"))
data <- gstdata
fwrite(setorder(data, sid, year, month, day), file = "cmd_raw.csv", quote = T)
data <- fread("cmd_raw.csv")
data[stmean == 32766, stmean := NA]
data[stmax == 32766, stmax := NA]
data[stmin == 32766, stmin := NA]
data[stmean >= 10000, stmean := stmean - 10000]
data[stmean <= -10000, stmean := stmean + 10000]
data[stmax >= 10000, stmax := stmax - 10000]
data[stmax <= -10000, stmax := stmax + 10000]
data[stmin >= 10000, stmin := stmin - 10000]
data[stmin <= -10000, stmin := stmin + 10000]
data[, c("stmean", "stmax", "stmin") :=list(stmean * 0.1, stmax * 0.1, stmin * 0.1)]
fwrite(setorder(data, sid, year, month, day), file = "cmd_norm.csv", quote = T)
sub <- read.csv( "cmd_norm.csv")
saihanba <- subset(sub,sid==56969)
fwrite(setorder(saihanba, sid, year, month, day), file = "yunnanmengla_gst.csv", quote = T)
data[, c("lat", "long", "elev") :=
list(lat %/% 100 + (lat %% 100) / 60, long %/% 100 + (long %% 100) / 60,
ifelse(elev < 100000, elev * 0.1, (elev - 100000) * 0.1))]