library(stringr)
#选取要处理的文件
mab <- list.files(pattern="^[A-Z].*txt.gz")
for(i in mab){
tmp <- read.table(i,header=T)
#分割并且添加为chromosome:start
tmp$"chromosome:start" <- str_replace(tmp$MarkerName, ":\\D+", "")
indexs <- strsplit(i,".",fixed=T)
filename <- paste(indexs[[1]][1],"_v2.txt",sep='')
#保存文件
write.table(tmp,file=filename,sep ="\t",row.names=FALSE,col.names=TRUE,quote =FALSE)
}
library(dplyr)
files <- list.files(pattern="^[A-Z].*v2.txt")
match = read.table("snp150_hg19.txt.gz",header=T,check.names=F,sep="\t")
for(i in files){
indexs <- strsplit(i,"_",fixed=T)
tes = read.table(i,header=T,check.names=F,sep="\t")
#如果snp150_hg19.txt文件中有对应的RS号,则比对到test.txt文件中,如果没有的话,就变为NA
need=dplyr::left_join(tes,match,by="chromosome:start")
filename <- paste(indexs[[1]][1],"_v3.txt",sep='')
write.table(need,file=filename,sep="\t",row.names=FALSE,col.names=TRUE,quote=FALSE)
}