R语言手册(第四站 降维方法)
标签: R语言
1.读入房屋数据集,准备数据
houses<-read.csv(file="C/……/houses. csv",
stringsAsFactors = FALSE,
header = FALSE)
names(houses)<-c("MVAL","MINC","HAGE","ROOMS","BEDRMS","POPN","HHLDS","LAT","LONG")
#标准化变量
houses$MINC_Z<-(houses$MINC-mean(houses$MINC))/(sd(houses$MINC))
houses$HAGE_Z<-(houses$HAGE-mean(houses$HAGE))/(sd(houses$HAGE))
#同样操作作用于其他变量
#随机选取90%用于测试数据集
choose<-runif(dim(houses)[1],0,1)
test.house<-houses[ which(choose<.1),]
train.house<-houses[ which(choose<=.1),]
2.主成分分析
#需要库"psych"
library(psych)
pcal<-principal(train.housef,
c(10:17)],
nfactors=8,
rotate="none",
scores=TRUE)
3. 主成分分析结果
#特征值:
pcal$values
#负载矩阵
#解释变异
pcal$loadings
输出:
4.坡度图
plot(pcalSvalues,
type="b",
main="ScreePlotfor HousesData")
输出:
5.坡度图因子得分
pairs(~train.house$MINC + train.house$HAGE + pcal$scores[.3],
labels=c("Median Income",
"Housing Median Age",
"Component 3 Scores"))
输出:
6. 计算共性
comm3<-loadings(pcal)[2,1]2 + loadings(pcal)[2,2]~2 + loadings(pcal)[2,342
comm4<-loadings(pcal)[2,1]~2 + loadings(pcal)[2,2]92 + loadings(pcal)[2,3]~2 +
loadings(pcal)[2,4]2
comm3;comm4
输出:
7.主成分验证
pca2<-principal(test.house[,c(10:17)],
nfactors=4,
rotate="none",
scores=TRUE)
pca2sloadings
输出:
以下的是因子分析:
8.读入,准备数据用于因子分析
adult<-read.csv(file="C:/.../adult. txt",
stringsAsFactors =FALSE)
adult$"capnet"<-adultScapital.gain-adult$capital.loss
adult.s<-adult[.c(1,3,5,13,16)]
#标准化数据:
adult.s$AGE_Z<-(adult.s$age - mean(adult.s$age)) / (sd(adult. sSage))
adult.s$DEM_Z<-(adult.s$demogweight - mean(adult.s$demogweight)) / (sd(adult.s$demogweight))
adult.s$EDUC_Z<-(adult.s$education.num - mean(adult.s$education.num)) / (sd(adult.s$education.num))
adult.s$CAPNET_Z<-(adult.s$capnet - mean(adult.s$capnet) / (sd(adult.s$capnet))
adult.s$HOURS_Z<-(adult.s$hours.per.week - mean(adult.s$hours.per.week) / (sd(adult.s$hours.per.week))
#随机选取测试集
choose<-runif(dim(adult.s)[1],0,1)
test.adult<-adult.s[which(choose<.1),c(6:10)]
train.adult<adult.s[which(choose>=.1),c(6:10)]
9.Bartlett球度检验
#需要包psych
library(psych)
corrmatl <- cor(train.adult, method="pearson")
cortest.bartlett(corrmatl, n=dim(train.adult)[1])
输出:
10.带有5个主成分的因子分析
#需要psych、GPArotation
library(GPArotation)
fal<-fa(train. adult, nfactors=5, fm="pa", rotate="none")
fal$values #特征值
fal$loadings #负载
输出:
11.带有两个注册后给你分的因子分析
fa2<-fa(train.adult,
nfactors=2,
fm="pa",
max.iter=200,
rotate="none")
fa2$values #特征值
fa2Sloadings #负载
fa2Scommunality #共性
输出:
12.最大方差旋转法
fa2v<-fa(train.adult,
nfactors=2,
fm="pa",
max.iter=200,
rotate="varimax")
fa2v$loadings
fa2v$communality
输出: