给大佬磕头!!!
【机器学习】降维——PCA(非常详细) - 知乎
-
Proportion of variance explained
- choose dimension
R
#correlation plot
library(GGally)
ggscatmat(track[,1:7])
track_pca <- prcomp(track[,1:7], center=TRUE, scale=TRUE)#center后第一个eigenvalu和intercept就无关了
#照抄的表格 选k dim.
library(kableExtra)
library(knitr)
track_pca_smry <- tibble(evl=track_pca$sdev^2) %>%
mutate(p = evl/sum(evl), cum_p = cumsum(evl/sum(evl))) %>% t()
colnames(track_pca_smry) <- colnames(track_pca$rotation)
rownames(track_pca_smry) <- c("Variance", "Proportion", "Cum. prop")
kable(track_pca_smry, digits=2, align="r") %>%
kable_styling(full_width = T) %>%
row_spec(0, color="white", background = "#7570b3") %>%
column_spec(1, width = "2.5em", color="white", background = "#7570b3") %>%
column_spec(1:8, width = "2.5em") %>%
row_spec(3, color="white", background = "#CA6627")
#eigenvalue visualization
track_pca_var <- tibble(
n=1:length(track_pca$sdev),
evl=track_pca$sdev^2)
ggplot(track_pca_var, aes(x=n, y=evl)) +
geom_line() +
xlab("Number of PCs") + ylab("Eigenvalue") +
theme_bw()
track_pca_pcs <- as_tibble(track_pca$x[,1:2]) %>%
mutate(cnt=track$country)
track_pca_evc <- as_tibble(track_pca$rotation[,1:2]) %>%
mutate(origin=rep(0, 7),
variable=colnames(track)[1:7],
varname=rownames(track_pca$rotation)) %>%
mutate(PC1s = PC1*(track_pca_var$evl[1]*2.5),
PC2s = PC2*(track_pca_var$evl[2]*2.5))
#Plot the principal component scores, and also the contribution of the original variables to the principal component.
library(ggrepel)
g <- ggplot() +
geom_segment(data=track_pca_evc,
aes(x=origin, xend=PC1s,
y=origin, yend=PC2s), colour="orange") +
geom_text_repel(data=track_pca_evc,
aes(x=PC1s, y=PC2s,
label=variable),
colour="orange", nudge_x=0.1) +
geom_point(data=track_pca_pcs, aes(x=PC1, y=PC2)) +
geom_text(data=filter(track_pca_pcs, abs(PC1)>5),
aes(x=PC1, y=PC2, label=cnt), nudge_y=0.1, nudge_x=-0.1) +
geom_text(data=filter(track_pca_pcs, abs(PC2)>1.5),
aes(x=PC1, y=PC2, label=cnt), nudge_y=0.1, nudge_x=-0.1) +
xlab("PC1") + ylab("PC2") +
theme(aspect.ratio=1) +
theme_bw()