R语言手册(第三站 探索性数据分析)

R语言手册(第四站 降维方法)

标签: R语言

1.读入房屋数据集,准备数据

houses<-read.csv(file="C/……/houses. csv", 
                stringsAsFactors = FALSE, 
                header = FALSE)
names(houses)<-c("MVAL","MINC","HAGE","ROOMS","BEDRMS","POPN","HHLDS","LAT","LONG")
#标准化变量
houses$MINC_Z<-(houses$MINC-mean(houses$MINC))/(sd(houses$MINC))
houses$HAGE_Z<-(houses$HAGE-mean(houses$HAGE))/(sd(houses$HAGE))
#同样操作作用于其他变量
#随机选取90%用于测试数据集
choose<-runif(dim(houses)[1],0,1)
test.house<-houses[ which(choose<.1),]
train.house<-houses[ which(choose<=.1),]

2.主成分分析

#需要库"psych"
library(psych)
pcal<-principal(train.housef,
                c(10:17)], 
                nfactors=8, 
                rotate="none", 
                scores=TRUE)

3. 主成分分析结果

#特征值:
pcal$values
#负载矩阵
#解释变异
pcal$loadings

输出:image_1d96p7hhj1oh51fpb95gnl66bc9.png-61.3kB

4.坡度图

plot(pcalSvalues,
    type="b", 
    main="ScreePlotfor HousesData")

输出:image_1d96p92q61t5mrv18jfh3q1kf4m.png-43.3kB

5.坡度图因子得分

pairs(~train.house$MINC + train.house$HAGE + pcal$scores[.3],
    labels=c("Median Income",
            "Housing Median Age",
            "Component 3 Scores"))

输出:image_1d96pc56217hr13aofe61lfn12ov13.png-90.4kB

6. 计算共性

comm3<-loadings(pcal)[2,1]2 + loadings(pcal)[2,2]~2 + loadings(pcal)[2,342
comm4<-loadings(pcal)[2,1]~2 + loadings(pcal)[2,2]92 + loadings(pcal)[2,3]~2 +
                loadings(pcal)[2,4]2
comm3;comm4

输出:image_1d96pgq2i1kjt1fsjs01ddk14lr1g.png-6.2kB

7.主成分验证

pca2<-principal(test.house[,c(10:17)],
                nfactors=4,
                rotate="none", 
                scores=TRUE)
pca2sloadings

输出:image_1d96pije31dq7ucu1a2fkulsek1t.png-53.9kB


以下的是因子分析:

8.读入,准备数据用于因子分析

adult<-read.csv(file="C:/.../adult. txt", 
                stringsAsFactors =FALSE)
adult$"capnet"<-adultScapital.gain-adult$capital.loss 
adult.s<-adult[.c(1,3,5,13,16)]
#标准化数据:
adult.s$AGE_Z<-(adult.s$age - mean(adult.s$age)) / (sd(adult. sSage))
adult.s$DEM_Z<-(adult.s$demogweight - mean(adult.s$demogweight)) / (sd(adult.s$demogweight))
adult.s$EDUC_Z<-(adult.s$education.num - mean(adult.s$education.num)) / (sd(adult.s$education.num))
adult.s$CAPNET_Z<-(adult.s$capnet - mean(adult.s$capnet) / (sd(adult.s$capnet))
adult.s$HOURS_Z<-(adult.s$hours.per.week - mean(adult.s$hours.per.week) / (sd(adult.s$hours.per.week))
#随机选取测试集
choose<-runif(dim(adult.s)[1],0,1)
test.adult<-adult.s[which(choose<.1),c(6:10)]
train.adult<adult.s[which(choose>=.1),c(6:10)]

9.Bartlett球度检验

#需要包psych
library(psych)
corrmatl <- cor(train.adult, method="pearson")
cortest.bartlett(corrmatl, n=dim(train.adult)[1])

输出:image_1d96q327tqvlv3e1a7nhit1m3i2a.png-15.6kB

10.带有5个主成分的因子分析

#需要psych、GPArotation
library(GPArotation)
fal<-fa(train. adult, nfactors=5, fm="pa", rotate="none")
fal$values     #特征值
fal$loadings   #负载

输出:image_1d96q676o1l3185dmh97uic8a2n.png-54.3kB

11.带有两个注册后给你分的因子分析

fa2<-fa(train.adult,
        nfactors=2,
        fm="pa",
        max.iter=200,
        rotate="none")
fa2$values        #特征值
fa2Sloadings      #负载
fa2Scommunality   #共性

输出:image_1d96qbs9555i1e5h1htug7017d13h.png-66.5kB

12.最大方差旋转法

fa2v<-fa(train.adult,
        nfactors=2, 
        fm="pa", 
        max.iter=200, 
        rotate="varimax")
fa2v$loadings 
fa2v$communality

输出:image_1d96qbeoohgk1h4t1gpa1l4hcfv34.png-31.2kB

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值