setwd('H:\\Puberty_SEM\\MR_GWAS')
library(TwoSampleMR)
library(dplyr)
files = c('ukb-a-25')
files[1]
这里用计算继续F值,但是由于要用到samplesize,所以先用if语句判断一下数据中是否含有samplesize值,如果是NA值,就跳过,这样就减少了观看每个文档的时间;这条命令也适用于其他的很多种场景中;所以简单记录一下;
for (i in seq_along(files)){
exposure_dat <- extract_instruments(outcomes = files[i])
if(any(is.na(exposure_dat$samplesize.exposure))){
print(paste("列", "column_name", "包含NA值,跳过这个步骤。"))
}else{
exposure_dat = mutate(exposure_dat,R=get_r_from_bsen(exposure_dat$beta.exposure,
exposure_dat$se.exposure,
exposure_dat$samplesize.exposure))
exposure_dat = mutate(exposure_dat,F=(exposure_dat$samplesize.exposure-2)*((R*R)/(1-R*R)))
exposure_dat = exposure_dat %>% filter(F>10)
}
outcome_dat <- read_outcome_data(
snps = exposure_dat$SNP,
filename = "PSY_factor.txt",
sep = "\t",
snp_col = "rsID",
beta_col = "BETA",
se_col = "SE",
effect_allele_col = "EA",
other_allele_col = "NEA",
pval_col = "P",
samplesize_col = "N"
)
dat <- harmonise_data(
exposure_dat = exposure_dat,
outcome_dat = outcome_dat
)
filename1 <- paste0("dat_",files[i], ".txt")#这里给输出文件命名一下
write.table(dat,file=filename1,row.names = F,sep = '\t',quote = F)
res <- mr(dat)
res = generate_odds_ratios(res)
filename2 <- paste0("res_",files[i], ".txt")
write.table(res,file=filename2,row.names = F,sep = '\t',quote = F)
het <- mr_heterogeneity(dat)
filename3 <- paste0("het_",files[i], ".txt")
write.table(res,file=filename3,row.names = F,sep = '\t',quote = F)
plt <- mr_pleiotropy_test(dat)
filename4 <- paste0("plt_",files[i], ".txt")
write.table(res,file=filename4,row.names = F,sep = '\t',quote = F)
#这里一样用了一个if循环判断一下
if(any(is.na(exposure_dat$samplesize.exposure))){
print(paste("列", "column_name", "包含NA值,跳过这个步骤。"))
}else{
out <- directionality_test(dat)
filename5 <- paste0("out_",files[i], ".txt")
}
write.table(res,file=filename5,row.names = F,sep = '\t',quote = F)
}
##批量提取某GWAS数据中多个snp 1Mb范围内p值小于5e-8的所有snplibrary(data.table)
library(dplyr)
df = fread('age of menarche.txt',header = T)
files = c(##多个snp名,进行循环
)
files[1]
for (i in seq_along(files)){
df1 = df[(df$RSID==files[i]),] #df 数据框中RSID列等于files的那一行
a = df1$CHR #df 数据框中RSID列等于files的那一行,的CHR
a = as.numeric(a)
b = df1$POS
b = as.numeric(b)
subset_df = subset(df,CHR==a & POS<(b+500000) & POS>(b-500000) & P<5e-8) #提取df数据框中CHR==a,POS 1Mb范围内P值小于5e-8的SNP
filename1 <- paste0("voice_",files[i], ".txt")
write.table(subset_df,file = filename1,row.names = F,sep = '\t',quote = F)
}
#RSID获取其chr和bp
library(dplyr)
library(data.table)
library(tidyr)
match = fread("snp150_hg19.txt",header=T,check.names=F,sep="\t")#用hg19作为reference 进行转换
files = c(#多个GWAS数据
)
files[1]
for (i in seq_along(files)){
tes = fread(files[i],header=T,check.names=F,sep="\t")
colnames(tes)[1] = 'name'#因为snp150_hg19.txt数据中列名为name,所以转换以下
need=dplyr::left_join(tes,match,by="name")
need = na.omit(need)
need=temp%>%separate(`chromosome:start`,c('CHR','BP'),'[:]')
colnames(need)=c('SNP','A1','A2','Freq','BETA','SE','P','N','CHR','BP')
need = select(need,c('SNP','CHR','BP','A1','A2','Freq','BETA','SE','P','N'))
write.table(need,file = filest[i],append = F,quote = F ,sep = "\t",row.names=F)
}
#批量添加表型列和按不同的阈值筛选IVs
setwd('H:\\Puberty_SEM\\MR_GWAS\\Exposure')
library(dplyr)
library(data.table)
library(tidyr)
filest <- list.files(pattern = "*.txt")
filelent <- length(filest)
filest[2]
lf <-list.files(pattern = ".txt$") #以report.tsv 结尾的
files <- gsub("\\.txt", "", lf)
files[2]
for(i in 1:length(filest)){
temp <-fread(filest[i],header = T,sep = "\t")
column_to_check <- "Phenotype"#检查是否有表型列,如果没有添加文件名作为表型列
if(column_to_check %in% colnames(temp)){
print(paste(column_to_check, "exist in the dataframe"))
} else {
temp = mutate(temp,Phenotype = files[i])
}
less_than_5e_8_count <- sum(temp$P < 5e-8)#检查GWAS summary data数据是否用P值小于5e-8的值,如果有用5e-8作为阈值,如果没有用1e-5作为阈值;
if (less_than_5e_8_count < 1000) {
temp1e_5 = temp %>% filter(P<1e-5)
temp1e_5 =
select(temp1e_5,c('SNP','CHR','BP','A1','A2','BETA','SE','P','N','Phenotype'))
filename1 <- paste(files[i],'.1E_5')
write.table(temp1e_5,file = filename1,append = F,quote = F ,sep = "\t",row.names=F)
} else{
temp5e_8 = temp %>% filter(P<5e-8)
temp5e_8 =
select(temp5e_8,c('SNP','CHR','BP','A1','A2','BETA','SE','P','N','Phenotype'))#为了合并时列数和列名均一致
filename2 <- paste(files[i],'.5E_8')
write.table(temp5e_8,file = filename2,append = F,quote = F ,sep = "\t",row.names=F)
}
}
#将1e-5和5e-8的数据分别合并
filest <- list.files(pattern = "*.1E_5")
filelent <- length(filest)
newdatat <- c()
for(i in 1:length(filest)){
temp <-read.table(filest[i],header = T,sep = "\t")
newdatat = rbind(newdatat,temp)
}
write.table(x = newdatat,file = "Exposure_1e_5.txt",append = F,quote = F ,sep = "\t",row.names=F)
filest <- list.files(pattern = "*.5E_8")
filelent <- length(filest)
newdatat <- c()
for(i in 1:length(filest)){
temp <-read.table(filest[i],header = T,sep = "\t")
newdatat = rbind(newdatat,temp)
}
write.table(x = newdatat,file = "Exposure_5E_8.txt",append = F,quote = F ,sep = "\t",row.names=F)
##ieu opengwas 数据(.vcf.gz)转换为twosampleMR的格式
setwd('H:\\Puberty_SEM\\MR_GWAS\\Biomarkers\\biomarkers')
library(VariantAnnotation)
library(gwasglue)
library(TwoSampleMR)
library(ieugwasr)
library(dplyr)
lf <-list.files(pattern = ".gz$")#以report.tsv 结尾的
files <- gsub("\\.gz", "", lf)
files[1]
for(i in 1:length(lf)){
exposure_vcf <- readVcf(lf[i])
exposure_data <- gwasvcf_to_TwoSampleMR(exposure_vcf)
write.table(exposure_data,file = files[i],row.names = F,sep = '\t',quote = F)
}
library(data.table)
library(dplyr)
lf <-list.files(pattern = ".txt$")
for(i in 1:length(lf)){
temp <-fread(lf[i],header = T,sep = "\t")
colnames(temp)[colnames(temp)=='chr.exposure'] = 'CHR'
colnames(temp)[colnames(temp)=='pos.exposure'] = 'POS'
colnames(temp)[colnames(temp)=='other_allele.exposure'] = 'A2'
colnames(temp)[colnames(temp)=='effect_allele.exposure'] = 'A1'
colnames(temp)[colnames(temp)=='beta.exposure'] = 'BETA'
colnames(temp)[colnames(temp)=='se.exposure'] = 'SE'
colnames(temp)[colnames(temp)=='pval.exposure'] = 'P'
colnames(temp)[colnames(temp)=='eaf.exposure'] = 'freq'
colnames(temp)[colnames(temp)=='samplesize.exposure'] = 'N'
colnames(temp)[colnames(temp)=='SNP'] = 'SNP'
temp = select(temp,c('SNP','CHR','POS','A1','A2','freq','BETA','SE','P','N'))
write.table(temp,file = lf[i],append = F,quote = F ,sep = "\t",row.names=F)
}
setwd('H:\\Puberty_SEM\\MR_GWAS\\Biomarkers\\biomarkers')
library(TwoSampleMR)
filest <- list.files(pattern = "*.vcf")
filelent <- length(filest)
filest[2]
files <- gsub("\\.vcf", "", filest)
files[1]
exposure_dat = read.table('./../PSY_factor_clump.txt',header = T,sep = '\t')
for (i in 1:length(filest )){
outcome_dat <- read_outcome_data(
snps = exposure_dat$SNP,
filename = filest[i],
sep = "\t",
snp_col = "SNP",
beta_col = "BETA",
se_col = "SE",
effect_allele_col = "A1",
other_allele_col = "A2",
pval_col = "P",
samplesize_col = "N",
phenotype_col = 'Phenotype'
)
dat <- harmonise_data(
exposure_dat = exposure_dat,
outcome_dat = outcome_dat
)
filename1 <- paste0("dat_",files[i], ".txt")
write.table(dat,file=filename1,row.names = F,sep = '\t',quote = F)
res <- mr(dat)
res = generate_odds_ratios(res)
filename2 <- paste0("res_",files[i], ".txt")
write.table(res,file=filename2,row.names = F,sep = '\t',quote = F)
het <- mr_heterogeneity(dat)
filename3 <- paste0("het_",files[i], ".txt")
write.table(het,file=filename3,row.names = F,sep = '\t',quote = F)
plt <- mr_pleiotropy_test(dat)
filename4 <- paste0("plt_",files[i], ".txt")
write.table(plt,file=filename4,row.names = F,sep = '\t',quote = F)
if(any(is.na(outcome_dat$samplesize.outcome))){
print(paste("列", "column_name", "包含NA值,跳过这个步骤。"))
}else{
out <- directionality_test(dat)
filename5 <- paste0("out_",files[i], ".txt")
}
write.table(out,file=filename5,row.names = F,sep = '\t',quote = F)
}
setwd('H:\\Puberty_SEM\\MR_GWAS\\Biomarkers\\biomarkers\\res')
filest <- list.files(pattern = "*.txt")
filelent <- length(filest)
newdatat <- c()
for(i in 1:length(filest)){
temp <-read.table(filest[i],header = T,sep = '\t')
newdatat = rbind(newdatat,temp)
}
write.table(x = newdatat,file = "res.txt",append = F,quote = F ,sep = "\t",row.names=F)
library(MRlap)
library(data.table)
library(dplyr)
setwd('H:\\Puberty_SEM\\MR_GWAS\\Biomarkers\\biomarkers')
filest <- list.files(pattern = "*.vcf")
filelent <- length(filest)
filest[2]
files <- gsub("\\.vcf", "", filest)
files[1]
outcome_dat = fread('PSY_factor.txt',header = T)
colnames(outcome_dat)[colnames(outcome_dat)=='BP'] = 'pos'
colnames(outcome_dat)[colnames(outcome_dat)=='CHR'] = 'chr'
user_SNP_input = read.table('PSY_factor_clump.txt',header = T)
user_SNP_input = user_SNP_input$SNP
for (i in seq_along(filest)){
exposure_dat = fread(filest[i],header = T)
colnames(exposure_dat)[colnames(exposure_dat)=='BP'] = 'pos'
colnames(exposure_dat)[colnames(exposure_dat)=='CHR'] = 'chr'
exposure_dat$A1 = toupper(exposure_dat$A1)
exposure_dat$A2 = toupper(exposure_dat$A2)
A = MRlap(exposure = outcome_dat,
exposure_name = 'mvPuberty',
outcome = exposure_dat,
outcome_name = files[i],
ld = "./../../../eur_w_ld_chr",
hm3 = "./../../../eur_w_ld_chr/w_hm3.snplist",
do_pruning = FALSE,
user_SNPsToKeep = user_SNP_input)
filename1 <- paste0(files[i],"_MRlap", ".RData")
save(A,file = filename1)
}
}
setwd('H:\\Puberty_SEM\\MR_GWAS\\Biomarkers\\biomarkers\\MRlap')
library(dplyr)
filest <- list.files(pattern = ".RData")
files <- gsub("\\.RData", "", filest)
filelent <- length(filest)
for(i in 1:length(filest)){
load(filest[i])
df1 = unlist(A["MRcorrection"])
df1 = t(df1)
df1 = as.data.frame(df1)
df1 = select(df1,c('MRcorrection.observed_effect',
'MRcorrection.observed_effect_se',
'MRcorrection.m_IVs',
'MRcorrection.observed_effect_p',
'MRcorrection.corrected_effect',
'MRcorrection.corrected_effect_se',
'MRcorrection.corrected_effect_p',
'MRcorrection.test_difference',
'MRcorrection.p_difference'))
df2 = unlist(A["LDSC"])
df2 = t(df2)
df2 = as.data.frame(df2)
filename1 <- paste(files[i],'.txt')
filename2 <- paste(files[i],'_ldsc','.txt')
write.table(df1,file = filename1,append = F,quote = F ,sep = "\t",row.names=F)
write.table(df2,file = filename2,append = F,quote = F ,sep = "\t",row.names=F)
}
setwd('H:\\Puberty_SEM\\MR_GWAS\\Biomarkers\\biomarkers\\MRlap\\MRlap')
filest <- list.files(pattern = "*.txt")
filelent <- length(filest)
files <- gsub("\\_MRlap.txt", "", filest)
merge.data <- read.table(file = filest[1],header=T,sep="\t")
merge.data <- cbind(files[1], merge.data )#左侧增加列/cbind( merge.data, dir[1])#右边增加列
colnames(merge.data)[1]='ID'
for(i in 2:40){
new.data <- read.table(file = filest[i], header=T, sep="\t")
new.data <- cbind(files[i], new.data)
colnames(new.data)[1]='ID'
merge.data <- rbind(merge.data,new.data)
}
write.table(x = merge.data,file = "MRlap.txt",append = F,quote = F ,sep = "\t",row.names=F)
setwd('G:\\materal_prengancy\\New_analysis')
library(TwoSampleMR)
library(dplyr)
library(ieugwasr)
library(data.table)
exposure_dat = read_exposure_data('./BW.txt',
sep = "\t",
snp_col = "SNP",
beta_col = "BETA",
se_col = "SE",
effect_allele_col = "A1",
other_allele_col = "A2",
pval_col = "P",
samplesize_col = 'N',
phenotype_col = 'Phenotype')
colnames(exposure_dat)[1] ='rsid'
colnames(exposure_dat)[6] = 'pval'
colnames(exposure_dat)[11] = 'id'
exposure_dat=ld_clump(dat = exposure_dat,
plink_bin = 'C:/Users/DELL/Documents/R/win-library/4.1/plinkbinr/bin/plink_Windows.exe',
bfile = "H:\\Puberty_SEM\\MR_GWAS\\EUR\\EUR",#链接:http://fileserve.mrcieu.ac.uk/ld/1kg.v3.tgz
clump_kb = 1000,
clump_r2 = 0.01
)
colnames(exposure_dat)[1] ='SNP'
colnames(exposure_dat)[6] = 'pval.exposure'
colnames(exposure_dat)[11] = 'id.exposure'
write.table(exposure_dat,file = './../Offspring_BW.txt',row.names = F,sep = '\t',quote = F)
exposure_dat = mutate(exposure_dat,R=get_r_from_bsen(exposure_dat$beta.exposure,
exposure_dat$se.exposure,
exposure_dat$samplesize.exposure))
exposure_dat = mutate(exposure_dat,F=(exposure_dat$samplesize.exposure-2)*((R*R)/(1-R*R)))
exposure_dat = exposure_dat %>% filter(F>10)
exposure_dat = read.table('BW.txt',header = T,sep = '\t')
outcome_path = 'G:\\materal_prengancy\\New_analysis\\outcome/'
lf <-list.files(path = outcome_path,pattern = ".txt$")
files <- gsub("\\.txt", "", lf)
filest = paste('./outcome/',lf,sep = '')
files[1]
for (i in 1:length(filest)){
outcome_dat <- read_outcome_data(
snps = exposure_dat$SNP,
filename = filest[i],
sep = "\t",
snp_col = "SNP",
beta_col = "BETA",
se_col = "SE",
effect_allele_col = "A1",
other_allele_col = "A2",
pval_col = "P",
samplesize_col = "N",
phenotype_col = 'Phenotype'
)
dat <- harmonise_data(
exposure_dat = exposure_dat,
outcome_dat = outcome_dat
)
filename1 <- paste('BW_',files[i],'_dat','.txt')
write.table(dat,file=filename1,row.names = F,sep = '\t',quote = F)
res <- mr(dat)
res = generate_odds_ratios(res)
filename2 <- paste('BW_',files[i],'_res','.txt')
write.table(res,file=filename2,row.names = F,sep = '\t',quote = F)
het <- mr_heterogeneity(dat)
filename3 <- paste('BW_',files[i],'_het','.txt')
write.table(het,filename3,row.names = F,sep = '\t',quote = F)
plt <- mr_pleiotropy_test(dat)
filename4 <- paste('BW_',files[i],'_plt','.txt')
write.table(plt,file=filename4,row.names = F,sep = '\t',quote = F)
if(any(is.na(outcome_dat$samplesize.outcome))){
print(paste("列", "column_name", "包含NA值,跳过这个步骤。"))
}else{
out <- directionality_test(dat)
filename5 <- paste('BW_',files[i],'_out','.txt')
}
write.table(out,file=filename5,row.names = F,sep = '\t',quote = F)
}
#######################################################################MR-Lasso
setwd('G:\\materal_prengancy\\New_analysis\\BW_Puberty_MR/dat/')
library(MendelianRandomization)
library(dplyr)
filest <- list.files(pattern = "*.txt")
filelent <- length(filest)
files <- gsub("\\.txt", "", filest)
for(i in 1:length(filest)){
df = read.table(filest[i],header = T,sep = '\t')
df = split(df,df$exposure)
df1 = df[['Offspring_BW']]
filename1 <- paste('Offspring_BW_',files[i],'.txt')
write.table(df1,file = filename1,row.names = F,sep = '\t',quote = F)
df2 = df[['Fetal_BW']]
filename2 <- paste('Fetal_BW_',files[i],'.txt')
write.table(df2,file = filename2,row.names = F,sep = '\t',quote = F)
df3 = df[['Maternal_BW']]
filename3 <- paste('Maternal_BW_',files[i],'.txt')
write.table(df3,file = filename3,row.names = F,sep = '\t',quote = F)
}
setwd('G:\\materal_prengancy\\New_analysis\\BW_Puberty_MR/dat/MR-LASSO/')
filest <- list.files(pattern = "*.txt")
filelent <- length(filest)
files <- gsub("\\.txt", "", filest)
for(i in 1:length(filest)){
data = read.table(filest[i],header = T,sep = '\t')
data = select(data,c('SNP','beta.exposure','se.exposure','beta.outcome','se.outcome','exposure','outcome'))
colnames(data) = c('snps','bx','bxse','by','byse','exposure','outcome')
data$by = as.numeric(data$by)
data$byse = as.numeric(data$byse)
data$bx = as.numeric(data$bx)
data$bxse = as.numeric(data$bxse)
data =mr_input(bx = data$bx,bxse = data$bxse,by = data$by,byse=data$byse)
df = mr_lasso(data,distribution = "normal", alpha = 0.05)
filename4 <- paste(files[i],'.RData')
save(df,file = filename4)
}
filest <- list.files(pattern = "*.RData")
filelent <- length(filest)
files <- gsub("\\.RData", "", filest)
for(i in 1:length(filest)){
load(filest[i])
df1 = c(df@Valid,df@Estimate,df@StdError,df@Pvalue,df@Lambda)
df1 = as.data.frame(df1)
df1 = t(df1)
df1 = as.data.frame(df1)
colnames(df1)=c('SNPS','beta','se','p','lambda')
df1 = mutate(df1,ID=files[i])
filename5 <- paste('MR_Lasso_',files[i],'.txt')
write.table(df1,file =filename5,row.names = F,sep = '\t',quote = F)
}
setwd('G:\\materal_prengancy\\New_analysis\\BW_Puberty_MR\\dat/MR-LASSO/')
filest <- list.files(pattern = "*.txt")
filelent <- length(filest)
newdatat <- c()
for(i in 1:length(filest)){
temp <-read.table(filest[i],header = T,sep = '\t')
newdatat = rbind(newdatat,temp)
}
write.table(x = newdatat,file = "BW_Puberty_MR-LASSO.txt",append = F,quote = F ,sep = "\t",row.names=F)