批量读取文件取并集
这里介绍的算是笨办法,适用于文件名字很有规律的情况,另外linux似乎也可以取并集,且比R语言更加方便。
代码如下:
# 加载一个我常用的数据处理R包
library(dplyr)
library(data.table) #读大文件很快
# 读文件
a1 <- fread(paste0("type4_ColB-B73v4.",1,".egwas"),sep = "\t")
a2 <- fread(paste0("type4_ColB-B73v4.",2,".egwas"),sep = "\t")
a3 <- fread(paste0("type4_ColB-B73v4.",3,".egwas"),sep = "\t")
a4 <- fread(paste0("type4_ColB-B73v4.",4,".egwas"),sep = "\t")
a5 <- fread(paste0("type4_ColB-B73v4.",5,".egwas"),sep = "\t")
a6 <- fread(paste0("type4_ColB-B73v4.",6,".egwas"),sep = "\t")
a7 <- fread(paste0("type4_ColB-B73v4.",7,".egwas"),sep = "\t")
a8 <- fread(paste0("type4_ColB-B73v4.",8,".egwas"),sep = "\t")
a9 <- fread(paste0("type4_ColB-B73v4.",9,".egwas"),sep = "\t")
a10 <- fread(paste0("type4_ColB-B73v4.",10,".egwas"),sep = "\t")
# 合并
a <- rbind(a1,a2,a3,a4,a5,a6,a7,a8,a9,a10)
colnames(a)<-c("SNP","CHR","BP","RefAllele","AltAllele", "freq", "Beta", "SE", "Chi","P","PGC","n1","freq1","n2","freq2","Fst","DD")
a = a[,1:16]
# dplyr操作
type2 <- a %>%
group_by(SNP) %>%
filter(PGC == min(PGC))
# 必须要有ungroup这一步,不然后续文件处理会出问题
type2 <- ungroup(type2)