R语言glmnet包拟合广义线性模型

最新推荐文章于 2024-07-14 19:34:42 发布

qq_27390023

最新推荐文章于 2024-07-14 19:34:42 发布

阅读量7.5k

点赞数 8

文章标签： r语言开发语言

本文链接：https://blog.csdn.net/qq_27390023/article/details/122656396

版权

R包glmnet是通过惩罚最大似然法拟合广义线性和类似模型的软件包。

1. 加载包和数据

#install.packages("glmnet", repos = "https://cran.us.r-project.org")

library(glmnet)
ls("package:glmnet")

# x = matrix(rnorm(100 * 20), 100, 20)*10
# y = rnorm(100)

### 1. 加载包和数据
data(QuickStartExample)
x <- QuickStartExample$x
y <- QuickStartExample$y

2. 拟合模型

### 2. 拟合模型
fit = glmnet(x,y,alpha = 1,family = "gaussian",standardize=TRUE) # lasso回归
##glmnet主要参数说明：
# alpha： elasticnet mixing parameter,0≤α≤ 1，
# alpha=1(默认值) is the lasso penalty, and alpha=0 the ridge penalty.
# nlambda: 默认100
# lambda: 用户提供的lambda序列。默认是让程序根据nlambda和lambda计算的lambda序列
# standardize：数据是否要标准化处理，默认为TRUE
# intercept，线性模型的截距，默认为TRUE
# family:模型中使用的误差分布和链接函数,取值范围:"gaussian", "binomial", 
# "poisson", "multinomial","cox", "mgaussian",默认为gaussian。

#fit = glmnet(x,y,alpha = 0,family = "gaussian",standardize=TRUE) # Ridge回归
#fit = glmnet(x,y,alpha = 0.3,family = "gaussian",standardize=TRUE) # elasticnet

# # 自定义lambdas
# lambdas <- 10^seq(2, -2, by = -.1) # 100和0.01之间
# fit = glmnet(x,y,alpha = 1,family = "gaussian",
#              lambda=lambdas,standardize=TRUE)

class(fit) # [1] "elnet"  "glmnet"
print(fit)
# glmnet返回一系列模型供用户选择（不同lambda下训练好的模型）
## 输出结果解读:
# Df:the number of nonzero coefficients
# %dev:the percent deviance explained
# Lambda: the corresponding value of λ，可能小于指定的lambda数，由于算法停止规则

# help(glmnet.control) # internal glmnet parameters

3.查看模型参数

### 3.查看模型参数
any(fit$lambda == 0.5)# 是否有此lambda值的模型（训练好的）

# 所有lambda值下的模型参数
coef(fit) 
# 指定lambda值下的模型参数
coef(fit,s = 0.01)
coef(fit,s = c(10,1,0.1))

4. 预测

### 4. 预测
# 不同lambda下模型的预测结果。注：一个lambda对应一个训练好的模型
predict(fit, newx = x[1:5,],type = "response")
# type:“link”, “response”, “coefficients”, “nonzero”其中的一个

# 指定参数lambda，模型的预测结果
predict(fit, newx = x[1:5,], type = "response", s = 0.00351) 
predict(fit, newx = x[1:5,], type = "response", s = 0.5)

5. 模型拟合过程作图

### 5. 模型拟合过程作图
plot(fit,label = TRUE)
plot(fit, xvar = "lambda", label = TRUE)
## 参数 xvar: “norm” for the ℓ1-norm of the coefficients (default), 
# “lambda” for the log-lambda value and “dev” for %deviance explained
## 参数label: 是否显示变量名

6. 交叉验证确定模型的最佳lambda（超参数）

### 6. 交叉验证确定模型的最佳lambda（超参数）
# glmnet返回一系列模型供用户选择
cvfit <- cv.glmnet(x, y) # 默认10倍交叉验证（nfolds = 10）。
# 返回cv.glmnet对象，包含交叉验证的所有fits要素统计。
# lambda.min:is the value of λ that gives minimum mean cross-validated error
# lambda.1se: is the value of λ that gives the most regularized model such that
# the cross-validated error is within one standard error of the minimum

plot(cvfit)

cvfit$lambda.min # 交叉验证平均误差最小时模型的lambda
cvfit$lambda.1se 
# 得到交叉验证平均误差最小的lambda时模型的系数（coefficients）
coef(cvfit, s = "lambda.min")
coef(cvfit) # 默认s = "lambda.1se"
coef(cvfit, s = "lambda.1se") 

predict(cvfit, newx = x[1:5,], s = "lambda.min")
predict(cvfit, newx = x[1:5,])

7. Cox 回归

### 7. Cox 回归
# Cox回归
data(CoxExample)
fit = glmnet(CoxExample$x,CoxExample$y,
              alpha = 1,family = "cox") 
## alpha = 1，用于变量选择，去除系数为零的变量

cvfit <- cv.glmnet(CoxExample$x,CoxExample$y,
                   alpha = 1,family = "cox")
# type.measure :deviance

# cvfit <- cv.glmnet(CoxExample$x,CoxExample$y,
#                    alpha = 1,family = "cox",
#                    type.measure= "C")

plot(cvfit)

# type.measure="C" is Harrel's concordance measure
cvfit$lambda.min
coef(cvfit,s = cvfit$lambda.min)
coef(cvfit,s = cvfit$lambda.1se)

# 预测生存时间
predict(cvfit, newx = CoxExample$x[1:10, ], 
        s = cvfit$lambda.min,
        type = "response")
# type:“link”, “response”, “coefficients”, “nonzero”其中的一个

## 和survival包coxph比较
library(survival)
set.seed(1)
nobs <- 100; nvars <- 15
x <- matrix(rnorm(nobs * nvars), nrow = nobs)

# create response
ty <- rep(rexp(nobs / 5), each = 5)
tcens <- rbinom(n = nobs, prob = 0.3, size = 1)
y <- Surv(ty, tcens)

glmnet_fit <- glmnet(x, y, family = "cox", lambda = 0)
# lambda =0, 没有正则项，等于coxph_fit
coxph_fit <- coxph(y ~ x)
# glmnet函数，当lambda =0, 没有正则项，等于coxph_fit
plot(coef(glmnet_fit), coef(coxph_fit))
abline(0, 1)

8. 其他回归实例

###8. 其他回归实例

## 当y仅包含两种分类时可以使用二分类模型，
data(BinomialExample)
x <- BinomialExample$x
y <- BinomialExample$y
fit = glmnet(x,y,alpha = 1,family = "binomial")
plot(fit,label = TRUE) 
#指定lambda在0.05和0.01时预测新样本的类别，type = "class"指定输出值为类别
predict(fit, newx = x[1:5,], type = "class", s = c(0.05, 0.01))
#交叉验证
#cvfit = cv.glmnet(x, y, family = "binomial", type.measure = "class")
cvfit = cv.glmnet(x, y, family = "binomial", type.measure = "auc")
plot(cvfit)

## 当y为多分类时
data(MultinomialExample)
x <- MultinomialExample$x
y <- MultinomialExample$y
#拟合模型
fit = glmnet(x, y, family = "multinomial", type.multinomial = "grouped")
predict(fit, newx = x[1:10,], type = "class")
#交叉验证
cvfit=cv.glmnet(x, y, family="multinomial", type.multinomial = "grouped", 
                parallel = TRUE)
#cvfit = cv.glmnet(x, y, family = "multinomial", type.measure = "class")
# Only deviance, class, mse, mae available as type.measure for Multinomial models;
#绘图展示
plot(fit, xvar = "lambda", label = TRUE, type.coef = "2norm")
plot(cvfit)

#预测模型
predict(cvfit, newx = x[1:10,], s = "lambda.min", type = "class")