我要一键出图
#install.packages("devtools")
#devtools::install_github("mrcieu/gwasglue", force = TRUE)
#if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#BiocManager::install("VariantAnnotation")
#install.packages("remotes")
#remotes::install_github("MRCIEU/TwoSampleMR")
#install.packages("dplyr")
#install.packages("tidyr")
#install.packages("CMplot")
road<-getwd()
setwd(road) #设置工作目录
#引用包
library(VariantAnnotation)
library(gwasglue)
library(dplyr)
library(tidyr)
library(CMplot)
library(TwoSampleMR)
exposureName="BMI" #图形中展示暴露数据的名称
outcomeName="Coronary heart disease" #图形中展示结局数据的名称
inputFile="bbj-a-57.vcf.gz" #暴露数据输入文件(根据下载暴露数据的文件名称进行修改)
outcomeFile="ukb-a-561.vcf.gz" #结局数据输入文件(改成已下载的结局数据文件)
#读取输入文件, 并对输入文件进行格式转换
vcfRT1 <- readVcf(inputFile)
exposuredata=gwasvcf_to_TwoSampleMR(vcf=vcfRT1, type="exposure")
#读取结局数据的vcf文件,并对数据进行格式转换
vcfRT2 <- readVcf(outcomeFile)
outcomeData=gwasvcf_to_TwoSampleMR(vcf=vcfRT2, type="outcome")
#根据pvalue<5e-08对结果进行过滤
outTab<-subset(exposuredata, pval.exposure<5e-08)
write.csv(outTab, file="exposure.pvalue.csv", row.names=F)
#准备绘制暴露变量的曼哈顿图的数据
exposuredata=exposuredata[,c("SNP", "chr.exposure", "pos.exposure", "pval.exposure")]
colnames(exposuredata)=c("SNP","CHR","BP","pvalue")
#绘制线性的曼哈顿图
CMplot(exposuredata, plot.type="m",
LOG10=TRUE, threshold=5e-08, threshold.lwd=3, threshold.lty=1, signal.cex=0.2,
chr.den.col=NULL, cex=0.2, bin.size=1e5, ylim=c(0,50),
file="pdf", file.output=TRUE, width=15, height=9, verbose=TRUE)
#绘制圈图
CMplot(exposuredata, plot.type="c",
LOG10=TRUE, threshold=5e-08, threshold.lwd=3, threshold.lty=1, signal.cex=0.2,
chr.den.col=NULL, cex=0.2, bin.size=1e5, ylim=c(0,100),
file="pdf", file.output=TRUE, width=7, height=7, verbose=TRUE)
exposureFile="exposure.pvalue.csv" #输入文件
#setwd(road) #设置孟德尔结果数据保存的工作目录
#读取输入文件
exposure_dat<-read_exposure_data(filename=exposureFile,
sep = ",",
snp_col = "SNP",
beta_col = "beta.exposure",
se_col = "se.exposure",
effect_allele_col = "effect_allele.exposure",
other_allele_col = "other_allele.exposure",
eaf_col = "eaf.exposure",
samplesize_col = "samplesize.exposure",
clump = F)
#去除连锁不平衡的SNP,一般标准为kb=10000,r2=0.001
exposure_dat_clumped <- clump_data(exposure_dat, clump_kb=10000, clump_r2=0.001)
write.csv(exposure_dat_clumped, file="exposure.LD.csv", row.names=F)
inputFile="exposure.LD.csv" #输入文件
#setwd(road) #设置工作目录
#读取输入文件
dat<-read.csv(inputFile, header=T, sep=",", check.names=F)
#计算F检验值
N=dat[1,"samplesize.exposure"] #获取样品的数目
dat=transform(dat,R2=2*((beta.exposure)^2)*eaf.exposure*(1-eaf.exposure)) #计算R2
dat=transform(dat,F=(N-2)*R2/(1-R2)) #计算F检验值
#根据F值>10进行过滤, 删除弱工具变量
Ffilter=10 #F值过滤条件,
outTab=dat[dat$F>Ffilter,]
write.csv(dat, "exposure.F.csv", row.names=F)
library(MendelianRandomization) #引用包
inputFile="exposure.F.csv" #输入文件
#setwd(road) #可设置专属文件的独有工作目录
#读取已经去除连锁不平衡、低F值的暴露因素文件
dat=read.csv(inputFile, header=T, sep=",", check.names=F)
#对SNP分组
snpId=dat$SNP
y=seq_along(snpId)
chunks <- split(snpId, ceiling(y/100))
#对分组进行循环,每次得到一个分组
outTab=data.frame()
for(i in names(chunks)){
#混杂因素分析
confounder=phenoscanner(
snpquery = chunks[[i]],
catalogue = "GWAS",
pvalue = 1e-05,
proxies = "None",
r2 = 0.8,
build = 37)
outTab=rbind(outTab, confounder$results)
}
#输出SNP相关性状的表格
write.csv(outTab, "confounder.result.csv", row.names=F)
#输出去除混杂因素的结果
delSnp=c("rs13078960", "rs2030323") #混杂SNP的名称(需修改)
dat=dat[!dat$SNP %in% delSnp,]
write.csv(dat, "exposure.confounder.csv", row.names=F)
exposureFile="exposure.confounder.csv" #输入经各种过滤的暴露数据文件
#读取暴露数据
exposure_dat<-read_expos