pleiotropic analysis__PLACO

#数据准备——Z.matrix,P.matrix
library(base)
library(dplyr)
setwd('D:\\8.28\\MDD_gastrointestinal tract disorders\\GWAS_data\\PGM_MDD')
df1 = read.table('MDD_2019.txt',header = T,sep = '\t')
df1=mutate(df1,Z=BETA/SE)
df1=select(df1,c('SNP','P','Z'))
df2=read.table('3_PGM.txt',header = T,sep = '\t')
df2=mutate(df2,Z=BETA/SE)
df2=select(df2,c('SNP','P','Z'))
df_fianl=merge(df1,df2,by='SNP')
df_fianl=na.omit(df_fianl)
Z.matrix=select(df_fianl,c('SNP','Z.x','Z.y'))
row.names(Z.matrix)<-make.names(Z.matrix[,1],TRUE)
Z.matrix=Z.matrix[,-1]
Z.matrix=as.matrix(Z.matrix)
P.matrix=select(df_fianl,c('SNP','P.x','P.y'))
row.names(P.matrix)<-make.names(P.matrix[,1],TRUE)
P.matrix=P.matrix[,-1]
P.matrix=as.matrix(P.matrix)
#PLACO分析
require(devtools)
source_url("https://github.com/RayDebashree/PLACO/blob/master/PLACO_v0.1.1.R?raw=TRUE")
set.seed(1)
VarZ=var.placo(Z.matrix = Z.matrix,P.matrix = P.matrix,p.threshold = 1e-04)
out=sapply(1:nrow(Z.matrix), function(i) placo(Z=Z.matrix[i,],VarZ=VarZ))
out1=out[,1:5]
write.table(Z.matrix,file='Z.matrix',sep='\t',quote = F)
write.table(out,file='PGM_PTSD.txt',sep='\t',quote = F)

如果source_url 报错:直接在将PLACO的原函数输入运行,再进行var.placo分析。

.pdfx<-function(x) besselK(x=abs(x),nu=0)/pi
.p.bessel<-function(z, varz, AbsTol=1e-13){
  p1<-2*as.double(integrate(Vectorize(.pdfx), abs(z[1]*z[2]/sqrt(varz[1])),Inf, abs.tol=AbsTol)$value)
  p2<-2*as.double(integrate(Vectorize(.pdfx), abs(z[1]*z[2]/sqrt(varz[2])),Inf, abs.tol=AbsTol)$value)
  p0<-2*as.double(integrate(Vectorize(.pdfx), abs(z[1]*z[2]),Inf, abs.tol=AbsTol)$value)
  pval.compnull<-p1+p2-p0
  return(pval.compnull)
}
var.placo<-function(Z.matrix, P.matrix, p.threshold=1e-4){
  # Here Z.matrix is the pxk matrix of Z-scores where p is total no. of variants in the dataset, and k is the no. of traits 
  # Similarly, P.matrix is the corresponding pxk matrix of p-values where p is total no. of variants in the dataset, and k is the no. of traits
  # p.threshold determines which variants are deemed to have no association marginally (default: 1e-4)
  # checks
  k<-ncol(Z.matrix)
  if(k!=2) stop("This method is meant for 2 traits only. Columns correspond to traits.")
  ZP<-cbind(Z.matrix,P.matrix)
  ZP<-na.omit(ZP)
  
  rows.alt<-which(ZP[,3]<p.threshold & ZP[,4]<p.threshold)
  if(length(rows.alt)>0){
    ZP<-ZP[-rows.alt,]
    if(nrow(ZP)==0) stop(paste("No 'null' variant left at p-value threshold",p.threshold))
    if(nrow(ZP)<30) warning(paste("Too few 'null' variants at p-value threshold",p.threshold))
  }
  varz<-diag(var(ZP[,c(1,2)]))
  return(varz)
}

#---------------- Function for estimating correlation matrix of the Z's
cor.pearson<-function(Z.matrix, P.matrix, p.threshold=1e-4){
  # Here Z.matrix is the pxk matrix of Z-scores where p is total no. of variants in the dataset, and k is the no. of traits 
  # Similarly, P.matrix is the corresponding pxk matrix of p-values where p is total no. of variants in the dataset, and k is the no. of traits
  # p.threshold determines which variants are deemed to have no association marginally (default: 1e-4)
  # checks
  k<-ncol(Z.matrix)
  if(k!=2) stop("This method is meant for 2 traits only.")
  # estimating correlation
  row.exclude<-which( apply(P.matrix, MARGIN = 1, function(x) any(x < p.threshold)) == TRUE )
  if(length(row.exclude)>0) Z.matrix<-Z.matrix[-row.exclude,]
  R<-cor(Z.matrix)
  return(R)
}

############################################
placo<-function(Z, VarZ, AbsTol=.Machine$double.eps^0.8){
  # Z: vector of Z-scores of size k=2 (i.e., collection of Z-scores of a particular SNP for k=2 traits)
  # VarZ: vector of variances of Z-scores (covariance assumed 0; so need to be independent traits)
  # AbsTol: absolute tolerance (accuracy paramater) for numerical integration.
  # checks				
  k<-length(Z)		
  if(k!=2) stop("This method is meant for 2 traits only.")
  if(length(VarZ)!=k) stop("Provide variance estimates for 2 traits as obtained using var.placo() function.")
  
  # test of pleiotropy: PLACO
  pvalue.b=.p.bessel(z=Z, varz=VarZ, AbsTol=AbsTol)
  return(list(T.placo=prod(Z), p.placo=pvalue.b))
}

PLACO详细介绍:GitHub - RayDebashree/PLACO: A statistical test of pleiotropic effect of a genetic variant on two traits using GWAS summary statistics

输出的out文件为列数过多,可以使用fread函数读取,然后行列准换。

newdata <- t(mydata)#R语言t函数进行行列转换

评论 9
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值