MRlap包可以很好的校正暴露和结局之间样本重叠问题:所以当先用TwosampleMR包分析完成后,可以用MRlap作为补充:MR包具体介绍为:GitHub - n-mounier/MRlap: R package to perform two-sample Mendelian Randomisation (MR) analyses using (potentially) overlapping samples
MRlap包使用记录:
这里因为是单个暴露对多个结局的影响,所以简单写了个for循环,相对便捷一些:
setwd('H:\\Puberty_SEM\\MR_GWAS\\Outcome1')#设置工作目录
#调用用到的包
library(MRlap)
library(data.table)
library(dplyr)
user_SNP = exposure_dat#这个地方是对暴露数据进行clump之后的数据,为了获得clump之后的SNP
lf <-list.files(pattern = ".txt$") #以report.tsv 结尾的
files <- gsub("\\.txt", "", lf) #切掉后缀.tsv,获得这些名称,为循环准备
files[1]
lf[1]
for (i in seq_along(files)){
outcome_dat = fread(lf[i],header = T)
#MRlap要求必须要有chr和pos列,所以这里将原数据里面的BP和CHR列名更改为了chr和pos
colnames(outcome_dat)[colnames(outcome_dat)=='BP'] = 'pos'
colnames(outcome_dat)[colnames(outcome_dat)=='CHR'] = 'chr'
#MRlap要求输入的GWAS summary data的A1和A2列必须为均为大写,所以需要转换一下,这个官网没介绍,一直出现报错后,才发现这个问题;另外,还有MRlap和LDSC一样,beta,和or值只能有一个,所以这里对数据中是否有OR值进行简单判断,如果有的话,将它改为其他的列名,就不会被识别了;由于MRlap内置的clump功能与twosampleMR一样,如果不使用本地,很难进行,虽然它官网提供了本地clump的功能,但是感觉应该是还没调试好,目前还是会报错,无法进行;所以就只能先clump好之后,用user_SNPsToKeep参数指定SNP;
outcome_dat$A1 = toupper(outcome_dat$A1)
outcome_dat$A2 = toupper(outcome_dat$A2)
column_to_check <- "OR"
if(column_to_check %in% colnames(outcome_dat)){
colnames(outcome_dat)[colnames(outcome_dat)=='OR'] = 'id'
} else {
print(paste(column_to_check, "does not exist in the dataframe"))
}
exposure_dat = fread('./../PSY_factor.txt',header = T)
colnames(exposure_dat)[2] = 'pos'
colnames(exposure_dat)[colnames(exposure_dat)=='CHR'] = 'chr'
user_SNP = as.vector(user_SNP)
head(exposure_dat)
A = MRlap(exposure = exposure_dat,
exposure_name = "mvpuberty",
outcome = outcome_dat,
outcome_name = files[i],
ld = "./../../eur_w_ld_chr",
hm3 = "./../../eur_w_ld_chr/w_hm3.snplist",
do_pruning = FALSE,
user_SNPsToKeep = user_SNP)
filename1 <- paste0("MRlap_",files[i], ".RData")
save(A,file = filename1)
}
#下面是对自己使用TwosampleMR的简单记录,依然是单个暴露与多个结局之间MR分析的for循环。
setwd('H:\\Puberty_SEM\\MR_GWAS\\Outcome1')
library(TwoSampleMR)
library(dplyr)
library(ieugwasr)
exposure_dat = read_exposure_data('./../PSY_factor_5E-8.txt',
sep = "\t",
snp_col = "SNP",
beta_col = "BETA",
se_col = "SE",
effect_allele_col = "A1",
other_allele_col = "A2",
pval_col = "P",
samplesize_col = "N",
phenotype_col ='Phenotype')
exposure_dat = mutate(exposure_dat,R=get_r_from_bsen(exposure_dat$beta.exposure,
exposure_dat$se.exposure,
exposure_dat$samplesize.exposure))
exposure_dat = mutate(exposure_dat,F=(exposure_dat$samplesize.exposure-2)*((R*R)/(1-R*R)))
exposure_dat = exposure_dat %>% filter(F>10)
colnames(exposure_dat)[1] ='rsid'
colnames(exposure_dat)[6] = 'pval'
colnames(exposure_dat)[11] = 'id'
exposure_dat=ld_clump(dat = exposure_dat,
plink_bin = 'C:/Users/DELL/Documents/R/win-library/4.1/plinkbinr/bin/plink_Windows.exe',
bfile = './../EUR/EUR',#链接:http://fileserve.mrcieu.ac.uk/ld/1kg.v3.tgz
clump_kb = 10000,
clump_r2 = 0.001
)
colnames(exposure_dat)[1] ='SNP'
colnames(exposure_dat)[6] = 'pval.exposure'
colnames(exposure_dat)[11] = 'id.exposure'
lf <-list.files(pattern = ".txt$") #以report.tsv 结尾的
files <- gsub("\\.txt", "", lf) #切掉后缀.tsv,获得这些名称,为循环准备
files[1]
lf[1]
for (i in seq_along(files)){
outcome_dat <- read_outcome_data(
snps = exposure_dat$SNP,
filename = lf[i],
sep = "\t",
snp_col = "SNP",
beta_col = "BETA",
se_col = "SE",
effect_allele_col = "A1",
other_allele_col = "A2",
pval_col = "P",
samplesize_col = "N",
phenotype_col = 'Phenotype'
)
dat <- harmonise_data(
exposure_dat = exposure_dat,
outcome_dat = outcome_dat
)
filename1 <- paste0("dat_",files[i], ".txt")
write.table(dat,file=filename1,row.names = F,sep = '\t',quote = F)
res <- mr(dat)
res = generate_odds_ratios(res)
filename2 <- paste0("res_",files[i], ".txt")
write.table(res,file=filename2,row.names = F,sep = '\t',quote = F)
het <- mr_heterogeneity(dat)
filename3 <- paste0("het_",files[i], ".txt")
write.table(het,file=filename3,row.names = F,sep = '\t',quote = F)
plt <- mr_pleiotropy_test(dat)
filename4 <- paste0("plt_",files[i], ".txt")
write.table(plt,file=filename4,row.names = F,sep = '\t',quote = F)
out <- directionality_test(dat)
filename5 <- paste0("out_",files[i], ".txt")
write.table(out,file=filename5,row.names = F,sep = '\t',quote = F)
}