傻瓜式孟德尔随机化

最新推荐文章于 2025-04-29 23:02:09 发布

小小赤脚！！！

最新推荐文章于 2025-04-29 23:02:09 发布

阅读量4.4k

点赞数 13

文章标签：最小二乘法

本文链接：https://blog.csdn.net/2303_78448036/article/details/133980282

版权

我要一键出图

#install.packages("devtools")
#devtools::install_github("mrcieu/gwasglue", force = TRUE)

#if (!require("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")
#BiocManager::install("VariantAnnotation")

#install.packages("remotes")
#remotes::install_github("MRCIEU/TwoSampleMR")

#install.packages("dplyr")
#install.packages("tidyr")
#install.packages("CMplot")

road<-getwd()
setwd(road)      #设置工作目录

#引用包
library(VariantAnnotation)
library(gwasglue)
library(dplyr)
library(tidyr)
library(CMplot)
library(TwoSampleMR)

exposureName="BMI"                         #图形中展示暴露数据的名称
outcomeName="Coronary heart disease"       #图形中展示结局数据的名称

inputFile="bbj-a-57.vcf.gz"     #暴露数据输入文件(根据下载暴露数据的文件名称进行修改)
outcomeFile="ukb-a-561.vcf.gz"   #结局数据输入文件(改成已下载的结局数据文件)


#读取输入文件, 并对输入文件进行格式转换
vcfRT1 <- readVcf(inputFile)
exposuredata=gwasvcf_to_TwoSampleMR(vcf=vcfRT1, type="exposure")
#读取结局数据的vcf文件,并对数据进行格式转换
vcfRT2 <- readVcf(outcomeFile)
outcomeData=gwasvcf_to_TwoSampleMR(vcf=vcfRT2, type="outcome")
#根据pvalue<5e-08对结果进行过滤
outTab<-subset(exposuredata, pval.exposure<5e-08)
write.csv(outTab, file="exposure.pvalue.csv", row.names=F)

#准备绘制暴露变量的曼哈顿图的数据
exposuredata=exposuredata[,c("SNP", "chr.exposure", "pos.exposure", "pval.exposure")]
colnames(exposuredata)=c("SNP","CHR","BP","pvalue")

#绘制线性的曼哈顿图
CMplot(exposuredata,  plot.type="m",
       LOG10=TRUE, threshold=5e-08, threshold.lwd=3, threshold.lty=1, signal.cex=0.2,
       chr.den.col=NULL, cex=0.2, bin.size=1e5, ylim=c(0,50),
       file="pdf", file.output=TRUE, width=15, height=9, verbose=TRUE)

#绘制圈图
CMplot(exposuredata,  plot.type="c",
       LOG10=TRUE, threshold=5e-08, threshold.lwd=3, threshold.lty=1, signal.cex=0.2,
       chr.den.col=NULL, cex=0.2, bin.size=1e5, ylim=c(0,100),
       file="pdf", file.output=TRUE, width=7, height=7, verbose=TRUE)



exposureFile="exposure.pvalue.csv"     #输入文件
#setwd(road)      #设置孟德尔结果数据保存的工作目录

#读取输入文件
exposure_dat<-read_exposure_data(filename=exposureFile,
                                 sep = ",",
                                 snp_col = "SNP",
                                 beta_col = "beta.exposure",
                                 se_col = "se.exposure",
                                 effect_allele_col = "effect_allele.exposure",
                                 other_allele_col = "other_allele.exposure",
                                 eaf_col = "eaf.exposure",
                                 samplesize_col = "samplesize.exposure",
                                 clump = F)

#去除连锁不平衡的SNP，一般标准为kb=10000,r2=0.001
exposure_dat_clumped <- clump_data(exposure_dat, clump_kb=10000, clump_r2=0.001)
write.csv(exposure_dat_clumped, file="exposure.LD.csv", row.names=F)

inputFile="exposure.LD.csv"      #输入文件
#setwd(road)      #设置工作目录

#读取输入文件
dat<-read.csv(inputFile, header=T, sep=",", check.names=F)

#计算F检验值
N=dat[1,"samplesize.exposure"]     #获取样品的数目
dat=transform(dat,R2=2*((beta.exposure)^2)*eaf.exposure*(1-eaf.exposure))     #计算R2
dat=transform(dat,F=(N-2)*R2/(1-R2))      #计算F检验值

#根据F值>10进行过滤, 删除弱工具变量
Ffilter=10        #F值过滤条件，
outTab=dat[dat$F>Ffilter,]
write.csv(dat, "exposure.F.csv", row.names=F)

library(MendelianRandomization)     #引用包
inputFile="exposure.F.csv"          #输入文件
#setwd(road)     #可设置专属文件的独有工作目录

#读取已经去除连锁不平衡、低F值的暴露因素文件
dat=read.csv(inputFile, header=T, sep=",", check.names=F)

#对SNP分组
snpId=dat$SNP
y=seq_along(snpId)
chunks <- split(snpId, ceiling(y/100))

#对分组进行循环,每次得到一个分组
outTab=data.frame()
for(i in names(chunks)){
  #混杂因素分析
  confounder=phenoscanner(
    snpquery = chunks[[i]],
    catalogue = "GWAS",
    pvalue = 1e-05,
    proxies = "None",
    r2 = 0.8,
    build = 37)
  outTab=rbind(outTab, confounder$results)
}
#输出SNP相关性状的表格
write.csv(outTab, "confounder.result.csv", row.names=F)

#输出去除混杂因素的结果
delSnp=c("rs13078960", "rs2030323")     #混杂SNP的名称(需修改)
dat=dat[!dat$SNP %in% delSnp,]
write.csv(dat, "exposure.confounder.csv", row.names=F)

exposureFile="exposure.confounder.csv"     #输入经各种过滤的暴露数据文件

#读取暴露数据
exposure_dat<-read_expos