#数据准备——Z.matrix,P.matrix
library(base)
library(dplyr)
setwd('D:\\8.28\\MDD_gastrointestinal tract disorders\\GWAS_data\\PGM_MDD')
df1 = read.table('MDD_2019.txt',header = T,sep = '\t')
df1=mutate(df1,Z=BETA/SE)
df1=select(df1,c('SNP','P','Z'))
df2=read.table('3_PGM.txt',header = T,sep = '\t')
df2=mutate(df2,Z=BETA/SE)
df2=select(df2,c('SNP','P','Z'))
df_fianl=merge(df1,df2,by='SNP')
df_fianl=na.omit(df_fianl)
Z.matrix=select(df_fianl,c('SNP','Z.x','Z.y'))
row.names(Z.matrix)<-make.names(Z.matrix[,1],TRUE)
Z.matrix=Z.matrix[,-1]
Z.matrix=as.matrix(Z.matrix)
P.matrix=select(df_fianl,c('SNP','P.x','P.y'))
row.names(P.matrix)<-make.names(P.matrix[,1],TRUE)
P.matrix=P.matrix[,-1]
P.matrix=as.matrix(P.matrix)
#PLACO分析
require(devtools)
source_url("https://github.com/RayDebashree/PLACO/blob/master/PLACO_v0.1.1.R?raw=TRUE")
set.seed(1)
VarZ=var.placo(Z.matrix = Z.matrix,P.matrix = P.matrix,p.threshold = 1e-04)
out=sapply(1:nrow(Z.matrix), function(i) placo(Z=Z.matrix[i,],VarZ=VarZ))
out1=out[,1:5]
write.table(Z.matrix,file='Z.matrix',sep='\t',quote = F)
write.table(out,file='PGM_PTSD.txt',sep='\t',quote = F)
如果source_url 报错:直接在将PLACO的原函数输入运行,再进行var.placo分析。
.pdfx<-function(x) besselK(x=abs(x),nu=0)/pi
.p.bessel<-function(z, varz, AbsTol=1e-13){
p1<-2*as.double(integrate(Vectorize(.pdfx), abs(z[1]*z[2]/sqrt(varz[1])),Inf, abs.tol=AbsTol)$value)
p2<-2*as.double(integrate(Vectorize(.pdfx), abs(z[1]*z[2]/sqrt(varz[2])),Inf, abs.tol=AbsTol)$value)
p0<-2*as.double(integrate(Vectorize(.pdfx), abs(z[1]*z[2]),Inf, abs.tol=AbsTol)$value)
pval.compnull<-p1+p2-p0
return(pval.compnull)
}
var.placo<-function(Z.matrix, P.matrix, p.threshold=1e-4){
# Here Z.matrix is the pxk matrix of Z-scores where p is total no. of variants in the dataset, and k is the no. of traits
# Similarly, P.matrix is the corresponding pxk matrix of p-values where p is total no. of variants in the dataset, and k is the no. of traits
# p.threshold determines which variants are deemed to have no association marginally (default: 1e-4)
# checks
k<-ncol(Z.matrix)
if(k!=2) stop("This method is meant for 2 traits only. Columns correspond to traits.")
ZP<-cbind(Z.matrix,P.matrix)
ZP<-na.omit(ZP)
rows.alt<-which(ZP[,3]<p.threshold & ZP[,4]<p.threshold)
if(length(rows.alt)>0){
ZP<-ZP[-rows.alt,]
if(nrow(ZP)==0) stop(paste("No 'null' variant left at p-value threshold",p.threshold))
if(nrow(ZP)<30) warning(paste("Too few 'null' variants at p-value threshold",p.threshold))
}
varz<-diag(var(ZP[,c(1,2)]))
return(varz)
}
#---------------- Function for estimating correlation matrix of the Z's
cor.pearson<-function(Z.matrix, P.matrix, p.threshold=1e-4){
# Here Z.matrix is the pxk matrix of Z-scores where p is total no. of variants in the dataset, and k is the no. of traits
# Similarly, P.matrix is the corresponding pxk matrix of p-values where p is total no. of variants in the dataset, and k is the no. of traits
# p.threshold determines which variants are deemed to have no association marginally (default: 1e-4)
# checks
k<-ncol(Z.matrix)
if(k!=2) stop("This method is meant for 2 traits only.")
# estimating correlation
row.exclude<-which( apply(P.matrix, MARGIN = 1, function(x) any(x < p.threshold)) == TRUE )
if(length(row.exclude)>0) Z.matrix<-Z.matrix[-row.exclude,]
R<-cor(Z.matrix)
return(R)
}
############################################
placo<-function(Z, VarZ, AbsTol=.Machine$double.eps^0.8){
# Z: vector of Z-scores of size k=2 (i.e., collection of Z-scores of a particular SNP for k=2 traits)
# VarZ: vector of variances of Z-scores (covariance assumed 0; so need to be independent traits)
# AbsTol: absolute tolerance (accuracy paramater) for numerical integration.
# checks
k<-length(Z)
if(k!=2) stop("This method is meant for 2 traits only.")
if(length(VarZ)!=k) stop("Provide variance estimates for 2 traits as obtained using var.placo() function.")
# test of pleiotropy: PLACO
pvalue.b=.p.bessel(z=Z, varz=VarZ, AbsTol=AbsTol)
return(list(T.placo=prod(Z), p.placo=pvalue.b))
}
输出的out文件为列数过多,可以使用fread函数读取,然后行列准换。
newdata <- t(mydata)#R语言t函数进行行列转换