统计学习导论—基于R应用

### Lab: Deep Learning深度学习

## A Single Layer Network on the Hitters Data击球手数据的单层网络

###Keras版本 keras这是一个依赖于 Python 的深度学习 R 包

library(ISLR2)

Gitters <- na.omit(Hitters)

n <- nrow(Gitters)

set.seed(13)

ntest <- trunc(n / 3)#trunc取整函数,将一个数值向0的方向取整

testid <- sample(1:n, ntest)

###

lfit <- lm(Salary ~ ., data = Gitters[-testid, ])

lpred <- predict(lfit, Gitters[testid, ])

with(Gitters[testid, ], mean(abs(lpred - Salary)))

###

library(glmnet)

cvfit <- cv.glmnet(x[-testid, ], y[-testid],

                   type.measure = "mae")

cpred <- predict(cvfit, x[testid, ], s = "lambda.min")

mean(abs(y[testid] - cpred))

ras是一个依赖于 Python 的深度学习 R 包

install.packages("keras")

library(keras)

modnn <- keras_model_sequential() %>%

  layer_dense(units = 50, activation = "relu",

              input_shape = ncol(x)) %>%

  layer_dropout(rate = 0.4) %>%

  layer_dense(units = 1)

###Convolutional Neural Networks卷积神经网络

###

cifar100 <- dataset_cifar100()

names(cifar100)

x_train <- cifar100$train$x

g_train <- cifar100$train$y

x_test <- cifar100$test$x

g_test <- cifar100$test$y

dim(x_train)

range(x_train[1,,, 1])

###

x_train <- x_train / 255

x_test <- x_test / 255

y_train <- to_categorical(g_train, 100)

dim(y_train)

###

install.packages("jpeg")

library(jpeg)

par(mar = c(0, 0, 0, 0), mfrow = c(5, 5))

index <- sample(seq(50000), 25)

for (i in index) plot(as.raster(x_train[i,,, ]))

###

model <- keras_model_sequential() %>%

  layer_conv_2d(filters = 32, kernel_size = c(3, 3),

                padding = "same", activation = "relu",

                input_shape = c(32, 32, 3)) %>%

  layer_max_pooling_2d(pool_size = c(2, 2)) %>%

  layer_conv_2d(filters = 64, kernel_size = c(3, 3),

                padding = "same", activation = "relu") %>%

  layer_max_pooling_2d(pool_size = c(2, 2)) %>%

  layer_conv_2d(filters = 128, kernel_size = c(3, 3),

                padding = "same", activation = "relu") %>%

  layer_max_pooling_2d(pool_size = c(2, 2)) %>%

  layer_conv_2d(filters = 256, kernel_size = c(3, 3),

                padding = "same", activation = "relu") %>%

  layer_max_pooling_2d(pool_size = c(2, 2)) %>%

  layer_flatten() %>%

  layer_dropout(rate = 0.5) %>%

  layer_dense(units = 512, activation = "relu") %>%

  layer_dense(units = 100, activation = "softmax")

summary(model)

###

model %>% compile(loss = "categorical_crossentropy",

                  optimizer = optimizer_rmsprop(), metrics = c("accuracy"))

history <- model %>% fit(x_train, y_train, epochs = 30,

                         batch_size = 128, validation_split = 0.2)

model %>% predict(x_test) %>% k_argmax() %>% accuracy(g_test)

###Using Pretrained CNN Models使用预训练的 CNN 模型

###

img_dir <- "book_images"

image_names <- list.files(img_dir)

num_images <- length(image_names)

x <- array(dim = c(num_images, 224, 224, 3))

for (i in 1:num_images) {

  img_path <- paste(img_dir, image_names[i], sep = "/")

  img <- image_load(img_path, target_size = c(224, 224))

  x[i,,, ] <- image_to_array(img)

}

x <- imagenet_preprocess_input(x)

###

model <- application_resnet50(weights = "imagenet")

summary(model)

###

pred6 <- model %>% predict(x) %>%

  imagenet_decode_predictions(top = 3)

names(pred6) <- image_names

print(pred6)

###IMDb Document Classification IMDb 文档分类

###

max_features <- 10000

imdb <- dataset_imdb(num_words = max_features)

c(c(x_train, y_train), c(x_test, y_test)) %<-% imdb

###

x_train[[1]][1:12]

###

word_index <- dataset_imdb_word_index()

decode_review <- function(text, word_index) {

  word <- names(word_index)

  idx <- unlist(word_index, use.names = FALSE)

  word <- c("<PAD>", "<START>", "<UNK>", "<UNUSED>", word)

  idx <- c(0:3, idx + 3)

  words <- word[match(text, idx, 2)]

  paste(words, collapse = " ")

}

decode_review(x_train[[1]][1:12], word_index)

###

library(Matrix)

one_hot <- function(sequences, dimension) {

  seqlen <- sapply(sequences, length)

  n <- length(seqlen)

  rowind <- rep(1:n, seqlen)

  colind <- unlist(sequences)

  sparseMatrix(i = rowind, j = colind,

               dims = c(n, dimension))

}

###

x_train_1h <- one_hot(x_train, 10000)

x_test_1h <- one_hot(x_test, 10000)

dim(x_train_1h)

nnzero(x_train_1h) / (25000 * 10000)

###

set.seed(3)

ival <- sample(seq(along = y_train), 2000)

###

library(glmnet)

fitlm <- glmnet(x_train_1h[-ival, ], y_train[-ival],

                family = "binomial", standardize = FALSE)

classlmv <- predict(fitlm, x_train_1h[ival, ]) > 0

acclmv <- apply(classlmv, 2, accuracy,  y_train[ival] > 0)

###

par(mar = c(4, 4, 4, 4), mfrow = c(1, 1))

plot(-log(fitlm$lambda), acclmv)

###

model <- keras_model_sequential() %>%

  layer_dense(units = 16, activation = "relu",

              input_shape = c(10000)) %>%

  layer_dense(units = 16, activation = "relu") %>%

  layer_dense(units = 1, activation = "sigmoid")

model %>% compile(optimizer = "rmsprop",

                  loss = "binary_crossentropy", metrics = c("accuracy"))

history <- model %>% fit(x_train_1h[-ival, ], y_train[-ival],

                         epochs = 20, batch_size = 512,

                         validation_data = list(x_train_1h[ival, ], y_train[ival]))

###

history <- model %>% fit(

  x_train_1h[-ival, ], y_train[-ival], epochs = 20,

  batch_size = 512, validation_data = list(x_test_1h, y_test)

)

###Recurrent Neural Networks递归神经网络

###Sequential Models for Document Classification文件分类的顺序模型

###

wc <- sapply(x_train, length)

median(wc)

sum(wc <= 500) / length(wc)

###

maxlen <- 500

x_train <- pad_sequences(x_train, maxlen = maxlen)

x_test <- pad_sequences(x_test, maxlen = maxlen)

dim(x_train)

dim(x_test)

x_train[1, 490:500]

###

model <- keras_model_sequential() %>%

  layer_embedding(input_dim = 10000, output_dim = 32) %>%

  layer_lstm(units = 32) %>%

  layer_dense(units = 1, activation = "sigmoid")

###

model %>% compile(optimizer = "rmsprop",

                  loss = "binary_crossentropy", metrics = c("acc"))

history <- model %>% fit(x_train, y_train, epochs = 10,

                         batch_size = 128, validation_data = list(x_test, y_test))

plot(history)

predy <- predict(model, x_test) > 0.5

mean(abs(y_test == as.numeric(predy)))

###Time Series Prediction时间序列预测

###

library(ISLR2)

xdata <- data.matrix(

  NYSE[, c("DJ_return", "log_volume","log_volatility")]

)

istrain <- NYSE[, "train"]

xdata <- scale(xdata)

###

lagm <- function(x, k = 1) {

  n <- nrow(x)

  pad <- matrix(NA, k, ncol(x))

  rbind(pad, x[1:(n - k), ])

}

###

arframe <- data.frame(log_volume = xdata[, "log_volume"],

                      L1 = lagm(xdata, 1), L2 = lagm(xdata, 2),

                      L3 = lagm(xdata, 3), L4 = lagm(xdata, 4),

                      L5 = lagm(xdata, 5)

)

###

arframe <- arframe[-(1:5), ]

istrain <- istrain[-(1:5)]

###

arfit <- lm(log_volume ~ ., data = arframe[istrain, ])

arpred <- predict(arfit, arframe[!istrain, ])

V0 <- var(arframe[!istrain, "log_volume"])

1 - mean((arpred - arframe[!istrain, "log_volume"])^2) / V0

###

arframed <- data.frame(day = NYSE[-(1:5), "day_of_week"], arframe)

arfitd <- lm(log_volume ~ ., data = arframed[istrain, ])

arpredd <- predict(arfitd, arframed[!istrain, ])

1 - mean((arpredd - arframe[!istrain, "log_volume"])^2) / V0

###

n <- nrow(arframe)

xrnn <- data.matrix(arframe[, -1])

xrnn <- array(xrnn, c(n, 3, 5))

xrnn <- xrnn[,, 5:1]

xrnn <- aperm(xrnn, c(1, 3, 2))

dim(xrnn)

###

model <- keras_model_sequential() %>%

  layer_simple_rnn(units = 12,

                   input_shape = list(5, 3),

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值