孟德尔随机化分析（包括TwosampleMR和MRlap包）

最新推荐文章于 2025-03-25 23:10:59 发布

onlooker_zsq

最新推荐文章于 2025-03-25 23:10:59 发布

阅读量2.9k

点赞数 14

文章标签： r语言

本文链接：https://blog.csdn.net/onlookerq/article/details/138290800

版权

文章介绍了如何使用MRlapR包对单个暴露变量与多个结局进行分析，特别关注了处理样本重叠问题的方法，包括设置工作目录、数据预处理、调用相关包并执行for循环进行GWAS数据的处理和分析。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

MRlap包可以很好的校正暴露和结局之间样本重叠问题：所以当先用TwosampleMR包分析完成后，可以用MRlap作为补充：MR包具体介绍为：GitHub - n-mounier/MRlap: R package to perform two-sample Mendelian Randomisation (MR) analyses using (potentially) overlapping samples

MRlap包使用记录：

这里因为是单个暴露对多个结局的影响，所以简单写了个for循环，相对便捷一些：

setwd('H:\\Puberty_SEM\\MR_GWAS\\Outcome1')#设置工作目录

#调用用到的包
library(MRlap)
library(data.table)
library(dplyr)
user_SNP = exposure_dat#这个地方是对暴露数据进行clump之后的数据，为了获得clump之后的SNP
lf <-list.files(pattern = ".txt$") #以report.tsv 结尾的
files <- gsub("\\.txt", "", lf) #切掉后缀.tsv，获得这些名称，为循环准备
files[1]
lf[1]
for (i in seq_along(files)){
outcome_dat = fread(lf[i],header = T)
#MRlap要求必须要有chr和pos列,所以这里将原数据里面的BP和CHR列名更改为了chr和pos

colnames(outcome_dat)[colnames(outcome_dat)=='BP'] = 'pos'  
colnames(outcome_dat)[colnames(outcome_dat)=='CHR'] = 'chr'

#MRlap要求输入的GWAS summary data的A1和A2列必须为均为大写，所以需要转换一下，这个官网没介绍，一直出现报错后，才发现这个问题；另外，还有MRlap和LDSC一样，beta，和or值只能有一个，所以这里对数据中是否有OR值进行简单判断，如果有的话，将它改为其他的列名，就不会被识别了；由于MRlap内置的clump功能与twosampleMR一样，如果不使用本地，很难进行，虽然它官网提供了本地clump的功能，但是感觉应该是还没调试好，目前还是会报错，无法进行；所以就只能先clump好之后，用user_SNPsToKeep参数指定SNP；

  outcome_dat$A1 = toupper(outcome_dat$A1)
  outcome_dat$A2 = toupper(outcome_dat$A2)
  column_to_check <- "OR"
  if(column_to_check %in% colnames(outcome_dat)){
    colnames(outcome_dat)[colnames(outcome_dat)=='OR'] = 'id'
  } else {
    print(paste(column_to_check, "does not exist in the dataframe"))
  }
  exposure_dat = fread('./../PSY_factor.txt',header = T)
  colnames(exposure_dat)[2] = 'pos'
  colnames(exposure_dat)[colnames(exposure_dat)=='CHR'] = 'chr'
  user_SNP = as.vector(user_SNP)
  head(exposure_dat)
  A = MRlap(exposure = exposure_dat,
            exposure_name = "mvpuberty",
            outcome = outcome_dat,
            outcome_name = files[i],
            ld = "./../../eur_w_ld_chr",
            hm3 = "./../../eur_w_ld_chr/w_hm3.snplist",
            do_pruning = FALSE,
            user_SNPsToKeep = user_SNP)
  filename1 <- paste0("MRlap_",files[i], ".RData")
  save(A,file = filename1)
}

#下面是对自己使用TwosampleMR的简单记录，依然是单个暴露与多个结局之间MR分析的for循环。

setwd('H:\\Puberty_SEM\\MR_GWAS\\Outcome1')
library(TwoSampleMR)
library(dplyr)
library(ieugwasr)
exposure_dat = read_exposure_data('./../PSY_factor_5E-8.txt',
                                  sep = "\t",
                                  snp_col = "SNP",
                                  beta_col = "BETA",
                                  se_col = "SE",
                                  effect_allele_col = "A1",
                                  other_allele_col = "A2",
                                  pval_col = "P",
                                  samplesize_col = "N",
                                  phenotype_col ='Phenotype')
exposure_dat = mutate(exposure_dat,R=get_r_from_bsen(exposure_dat$beta.exposure,
                                                     exposure_dat$se.exposure,
                                                     exposure_dat$samplesize.exposure))
exposure_dat = mutate(exposure_dat,F=(exposure_dat$samplesize.exposure-2)*((R*R)/(1-R*R)))
exposure_dat = exposure_dat %>% filter(F>10)
colnames(exposure_dat)[1] ='rsid'
colnames(exposure_dat)[6] = 'pval'
colnames(exposure_dat)[11] = 'id'
exposure_dat=ld_clump(dat = exposure_dat,
                      plink_bin = 'C:/Users/DELL/Documents/R/win-library/4.1/plinkbinr/bin/plink_Windows.exe',
                      bfile = './../EUR/EUR',#链接：http://fileserve.mrcieu.ac.uk/ld/1kg.v3.tgz
                      clump_kb = 10000,
                      clump_r2 = 0.001
)
colnames(exposure_dat)[1] ='SNP'
colnames(exposure_dat)[6] = 'pval.exposure'
colnames(exposure_dat)[11] = 'id.exposure'
lf <-list.files(pattern = ".txt$") #以report.tsv 结尾的
files <- gsub("\\.txt", "", lf)   #切掉后缀.tsv，获得这些名称，为循环准备
files[1]
lf[1]
for (i in seq_along(files)){
  outcome_dat <- read_outcome_data(
    snps = exposure_dat$SNP,
    filename = lf[i],
    sep = "\t",
    snp_col = "SNP",
    beta_col = "BETA",
    se_col = "SE",
    effect_allele_col = "A1",
    other_allele_col = "A2",
    pval_col = "P",
    samplesize_col = "N",
    phenotype_col = 'Phenotype'
  )
  dat <- harmonise_data(
    exposure_dat = exposure_dat, 
    outcome_dat = outcome_dat
  )
  filename1 <- paste0("dat_",files[i], ".txt")
  write.table(dat,file=filename1,row.names = F,sep = '\t',quote = F)
  res <- mr(dat)
  res = generate_odds_ratios(res)
  filename2 <- paste0("res_",files[i], ".txt")
  write.table(res,file=filename2,row.names = F,sep = '\t',quote = F)
  het <- mr_heterogeneity(dat)
  filename3 <- paste0("het_",files[i], ".txt")
  write.table(het,file=filename3,row.names = F,sep = '\t',quote = F)
  plt <- mr_pleiotropy_test(dat)
  filename4 <- paste0("plt_",files[i], ".txt")
  write.table(plt,file=filename4,row.names = F,sep = '\t',quote = F)
  out <- directionality_test(dat)
  filename5 <- paste0("out_",files[i], ".txt")
  write.table(out,file=filename5,row.names = F,sep = '\t',quote = F)
}