1. cca demo 1
(1) calc CCA ====
dim(LifeCycleSavings)
head(LifeCycleSavings)
plot(LifeCycleSavings, gap=F)
pop=LifeCycleSavings[, 2:3]; dim(pop) #50 2
oec=LifeCycleSavings[, -c(2:3)]; dim(oec) #50 3
colnames(LifeCycleSavings) # "sr","pop15","pop75","dpi","ddpi"
if(0){
cca.rs = CCA::cc(X=pop, Y=oec) #use R pkg
CCA::plt.cc(cca.rs)
CCA::plt.indiv(cca.rs, d1=1, d2=2)
}
cca.rs2=cancor(x=pop, y=oec) #n*p1, n*p2;
cca.rs2
#
# check 1
as.matrix(pop) %*% cca.rs2$xcoef #as U
as.matrix(oec) %*% cca.rs2$ycoef #as V
# 几乎都是0,说明正交
cor(as.matrix(pop) %*% cca.rs2$xcoef,
as.matrix(oec) %*% cca.rs2$ycoef)
# 几乎都是0,说明正交
cor(as.matrix(pop) %*% cca.rs2$xcoef,
as.matrix(oec) %*% cca.rs2$ycoef)[,1:2] - diag(cca.rs2$cor)
# T
all(abs(cor(as.matrix(pop) %*% cca.rs2$xcoef,
as.matrix(oec) %*% cca.rs2$ycoef)[,1:2] - diag(cca.rs2$cor))<1e-15)
# check 2: 除对角线是1,其他位置都是0附近
cor(as.matrix(pop) %*% cca.rs2$xcoef)
cor(as.matrix(pop) %*% cca.rs2$xcoef)-diag(2)
cor(as.matrix(pop) %*% cca.rs2$xcoef)-diag(2) < 1e-10
# check 3
cor(as.matrix(oec) %*% cca.rs2$ycoef)
cor(as.matrix(oec) %*% cca.rs2$ycoef) - diag(3)
cor(as.matrix(oec) %*% cca.rs2$ycoef) - diag(3) < 1e-10
(2) plot cor ====
u_vector=(as.matrix(pop) %*% cca.rs2$xcoef)
v_vector=(as.matrix(oec) %*% cca.rs2$ycoef)
cor(u_vector[,1], v_vector[,1]) #0.82
cor(u_vector[,2], v_vector[,2]) #0.36
cca.rs2$cor #[1] 0.8247966 0.3652762
plot(u_vector[,1], v_vector[,1], pch=19, main="Fig2")
#abline(0.4, 1)
abline(lm(y~x, data.frame(x=u_vector[,1], y=v_vector[,1])), col="red", lty=2, lwd=2)
#
plot(u_vector[,2], v_vector[,2], pch=19, cex=0.5, main="Fig3")
abline(lm(y~x, data.frame(x=u_vector[,2], y=v_vector[,2])), col="red", lty=2, lwd=2)
(3) plot sample ====
head(LifeCycleSavings)
plot(u_vector[,1], u_vector[,2], pch=19)
plot(v_vector[,1], v_vector[,2], pch=19)
dat=LifeCycleSavings
dat=cbind(dat, u_vector, v_vector)
colnames(dat)[6:10]=c( paste0("U_", 1:2), paste0("V_", 1:3) )
head(dat)
library(ggplot2)
library(patchwork)
{
p1=ggplot(dat, aes(U_1, U_2, color=dpi) )+
geom_point()+ theme_bw()+ggtitle("U vector")
p2=ggplot(dat, aes(V_1, V_2, color=dpi) )+
geom_point()+ theme_bw()+ggtitle("V vector")
p1+p2
}
(2) UMAP based on CCA ----
# use U map
library(uwot)
umap_results = umap(u_vector, n_neighbors = 15, n_components = 2)
colnames(umap_results)=paste0("UMAP_", 1:2)
head(umap_results)
dat=cbind(dat, umap_results)
head(dat)
#
ggplot(dat, aes(UMAP_1, UMAP_2, color=pop15) )+
geom_point()+ theme_bw()+ggtitle("u vector")+
#scale_color_gradient(low = "navy", high = "red")
scale_color_gradient2(low = "navy", mid = "white", high = "red", midpoint = 35)
# use v map
vmap_results = umap(v_vector, n_neighbors = 10, n_components = 3)
colnames(vmap_results)=paste0("VMAP_", 1:3)
head(vmap_results)
dat=cbind(dat[,1:12], vmap_results)
head(dat)
#
ggplot(dat, aes(VMAP_1, VMAP_2, color= ddpi ) )+
geom_point()+ theme_bw()+ggtitle("v vector")+
scale_color_gradient2(low = "navy", mid = "white", high = "red", midpoint = 5)
#
Ref
-
典型关联分析(CCA)原理总结 https://www.cnblogs.com/pinard/p/6288716.html
-
Seurat CCA? It’s just a simple extension of PCA! https://xinmingtu.cn/blog/2022/CCA_dual_PCA/
该作者认为Seurat CCA就是 PCA的简单扩展:X.XT 修改为 XYT.