R语言实战:机器学习与数据分析源代码6(最终弹)

本文辑录了《R语言实战——机器学习与数据分析》(电子工业出版社2016年出版)一书第7章后半部分(137页~145页)至第8章之代码。本书引言请见如下链接:
http://blog.csdn.net/baimafujinji/article/details/51596171





内容简介:本书系统地介绍了统计分析和机器学习领域中最为重要和流行的多种技术及它们的基本原理,在详解有关算法的基础上,结合大量R语言实例演示了这些理论在实践中的使用方法。具体内容被分成三个部分,即R语言编程基础、基于统计的数据分析方法以及机器学习理论。统计分析与机器学习部分又具体介绍了包括参数估计、假设检验、极大似然估计、非参数检验方法(包括列联分析、符号检验、符号秩检验等)、方差分析、线性回归(包括岭回归和Lasso方法)、逻辑回归、支持向量机、聚类分析(包括K均值算法和EM算法)和人工神经网络等内容。同时,统计理论的介绍也为深化读者对于后续机器学习部分的理解提供了很大助益。知识结构和阅读进度的安排上既兼顾了循序渐进的学习规律,亦统筹考虑了夯实基础的必要性

网上书店地址:

电子工业出版社官网
中国互动出版网China-pub
京东商城(1)
京东商城(2)


Chapter 7 (From P137)

P137

assessment <- c("weak","good","limited","fair")
assessment1 <- factor(assessment)
assessment1

str(assessment1)

assessment1 <- factor(assessment, order=TRUE,
+ levels=c("good","fair","limited","weak"))

assessment1
str(assessment1)

sample <- c(12,15,7,10)
fsample <- factor(sample,levels=c(7,10,12,15,100))
fsample

length(fsample)

P138~139


fsample[5]<-100
fsample

fsample[6]<-99

wt <- c(46,39,35,42,43,43)
group <- c("A","B","C","A","B","C")
tapply(wt,as.factor(group),mean)

wt <- c(46,39,35,42,43,43,42,44,36,40,39,38)
diet <- c("A","B","C","A","B","C","A","B","C","A","B","C")
gender <- c("M","M","M","M","M","M","F","F","F","F","F","F")
tapply(wt,list(as.factor(diet),as.factor(gender)),mean)

split(wt,list(diet,gender))

P140~141

myopia

by(myopia,myopia$degree,function(frame) frame[,2]+frame[,3])

diet
gender
wt
table(list(diet,gender))

artery <- read.csv("C:/data/graft_arteries.csv")
artery

P142

table(list(artery$Diabetes,artery$Hypertension))

table(artery$Diabetes)

table(D=artery$Diabetes,H=artery$Hypertension,S=artery$Ever_smoked)

P143~144

dh_tab <- table(list(D=artery$Diabetes,H=artery$Hypertension))
dh_tab

dh_tab[1,1]
dh_tab[1,]
dh_tab[,2]

dh_tab[2,2]*4
dh_tab[1,]*2
dh_tab/3

dh_tab

apply(dh_tab,1,sum)
apply(dh_tab,2,sum)

addmargins(dh_tab)

dhs_tab <- table(D=artery$Diabetes,
+ H=artery$Hypertension,S=artery$Ever_smoked)
dhs_tab

addmargins(dhs_tab)

P145


apply(dhs_tab,"S",sum)
apply(dhs_tab,"D",sum)
apply(dhs_tab,"H",sum)

Chapter 8

P147~148

57/200745

pnorm(1)-pnorm(-1)
pnorm(2)-pnorm(-2)
pnorm(3)-pnorm(-3)

P150~151

n <- 200745
(p.hat <- 57/n)
p.hat + c(-1.96, 1.96) * sqrt(p.hat * (1 - p.hat)/n)

binom.test(57,200745)

conf.int<-function(x,n,sigma,alpha){
options(digits=5)
mean<-mean(x)
c(mean-sigma*qnorm(1-alpha/2,mean=0, sd=1,
lower.tail = TRUE)/sqrt(n),
mean+sigma*qnorm(1-alpha/2,mean=0, sd=1,
lower.tail = TRUE)/sqrt(n))
}

x<-c(112.5, 101.0, 103.0, 102.0, 100.5,
+ 102.6, 107.5, 95.00, 108.8, 115.6,
+ 100.0, 123.5, 102.0, 101.6, 102.2,
+ 116.6, 95.40, 97.80, 108.6, 105.0,
+ 136.8, 102.8, 101.5, 98.40, 93.30)

n <- 25
alpha <- 0.05
sigma <- 10
result <- conf.int(x, n, sigma, alpha)
result

P152~153

curve(dnorm(x), from = -5, to = 5, ylim = c(0, 0.45),
+ ylab ="", col = "blue")
par(new=TRUE)
curve(dt(x, 1), from = -5, to = 5, ylim = c(0, 0.45),
+ ylab ="", lty = 2, col = "red")
par(new=TRUE)
curve(dt(x, 3), from = -5, to = 5, ylim = c(0, 0.45),
+ ylab ="", lty = 3)
text.legend = c("dnorm","dt(1)", "dt(3)")
legend("topright", legend = text.legend, lty=c(1,2,3),
+ col = c("blue", "red", "black"))

pH <- c(6, 5.7, 6.2, 6.3, 6.5, 6.4, 6.9, 6.6,
+ 6.8, 6.7, 6.8, 7.1, 6.8, 7.1, 7.1, 7.5, 7)
mean(pH); sd(pH)
mean(pH)+ qt(c(0.025,0.975),length(pH)-1)*sd(pH)/sqrt(length(pH))

t.test(pH, mu=7)

P155

chisq.var.test <- function (x, alpha){
options(digits=4)
result<-list( )
n<-length(x)
v<-var(x)
result$conf.int.var <- c(
(n-1)*v/qchisq(alpha/2, df=n-1, lower.tail=F),
(n-1)*v/qchisq(alpha/2, df=n-1, lower.tail=T))
result$conf.int.se <- sqrt(result$conf.int.var)
result
}

chisq.var.test(x, 0.05)

P157

chicks <- data.frame(feed = rep(c(1,2), times=c(3,6)),
+ weight_gain = c(
+ 42, 68, 85,
+ 42, 97, 81, 95, 61, 103))
tapply(chicks$weight_gain, chicks$feed, mean)
tapply(chicks$weight_gain, chicks$feed, sd)

t.test(weight_gain ~ feed, data = chicks, var.equal = TRUE)

P159~160

t.test(weight_gain ~ feed, data = chicks)

Feed.1 <- c(44, 55, 68, 85, 90, 97)
Feed.2 <- c(42, 61, 81, 95, 97, 103)
t.test(Feed.2, Feed.1, paired = T)

diff = Feed.2-Feed.1
t.test(diff)

P161~164

Feed <- c(Feed.1, Feed.2)
group <- c(rep(1, 6), rep(2, 6))
t.test(Feed ~ group)

prop.test(x=c(225,128),n=c(500,400), correct=F)
prop.test(x=c(225,128),n=c(500,400))

1 - pbinom(5, size = 8, prob = 0.5)

pbinom(8, 100, 0.26)

P165~166

prop.test(8,100,p=0.26,alternative="less")

binom.test(8,100,p=0.26,alternative="less")

P168~169

2*pt(-2.9326, 16, lower.tail = T)

qt(0.025, 16); qt(0.975, 16)

pt(-2.9326, 16)

t.test(pH, mu = 7, alternative = "less")

P170~172

qt(0.025, 7); qt(0.975, 7)

pt(-0.9019, 7, lower.tail = T)*2

qt(0.025, 4.503); qt(0.975, 4.503)

pt(-0.9357, 4.503, lower.tail = T)*2

qt(0.025, 5); qt(0.975, 5)

2*(pt(3.2359, 5, lower.tail = F))

P177~178

f <- function(lamda){
logL = n*log(lamda) - lamda*sum(x)
return (logL)
}

x = c(518,612,713,388,434)
n = length(x)
duration <- optimize(f, c(0,1), maximum = TRUE)
duration

1/duration$maximum

library(MASS)
attach(geyser)

hist(waiting, freq = FALSE, col = "wheat")
lines(density(waiting), col = 'red', lwd = 2)

P179

LL<-function(params,data){
t1<-suppressWarnings(dnorm(data,params[2],params[3]))
t2<-suppressWarnings(dnorm(data,params[4],params[5]))
ll<-sum(log(params[1]*t1+(1-params[1])*t2))
return(ll)
}

library("maxLik")
mle <- maxLik(logLik = LL, start = c(0.5,50,10,80,10), data=waiting)
mle

a <- mle$estimate[1]
mu1<-mle$estimate[2]; s1<- mle$estimate[3]
mu2<-mle$estimate[4]; s2<- mle$estimate[5]
X<-seq(40,120,length=100)
f<-a*dnorm(X,mu1,s1)+(1-a)*dnorm(X,mu2,s2)
hist(waiting, freq = FALSE, col = “wheat”)
lines(density(waiting), col = ‘red’, lty = 2)
lines(X, f, col = “blue”)
text.legend = c(“Density Line”,”Max Likelihood”)
legend(“topright”, legend = text.legend, lty=c(2,1),
+ col = c(“red”,”blue”))

至此,全书代码已发布完毕。

  • 6
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

白马负金羁

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值