library(vroom)
library(data.table)
library(stringr)
match = vroom("snp150_hg19.txt")
a = fread('pm2_5.txt')
new_col <- strsplit(a$variant, ":")
a$CHR = sapply(new_col, function(x) x[1])
a$Pos = sapply(new_col, function(x) x[2])
a$A1 = sapply(new_col, function(x) x[4])
a$A2 = sapply(new_col, function(x) x[3])
a$aa = paste(a$CHR, ':', a$Pos)
a$aa <- str_replace_all(a$aa, "\\s","")
b = a[a$low_confidence_variant == 'FALSE',]
b = b[b$minor_AF > 0.01,]
need = dplyr::left_join(b,match,by="aa")
rs_rows <- need[str_detect(need$name, "^rs"), ]
setnames(rs_rows, 'name', 'SNP')
setnames(rs_rows, 'minor_AF', 'maf')
setnames(rs_rows, 'n_complete_samples', 'N')
rs_rows <- subset(rs_rows, select = -variant)
rs_rows <- subset(rs_rows, select = -minor_allele)
rs_rows <- subset(rs_rows, select = -low_confidence_variant)
rs_rows <- subset(rs_rows, select = -aa)
write.table(rs_rows, file = 'pm25snp.txt', quote = F, sep = '\t', row.names = F)
ukb gwas数据整理
最新推荐文章于 2024-01-15 22:34:26 发布