R语言glmnet包拟合广义线性模型

R包glmnet是通过惩罚最大似然法拟合广义线性和类似模型的软件包。

1. 加载包和数据

#install.packages("glmnet", repos = "https://cran.us.r-project.org")

library(glmnet)
ls("package:glmnet")

# x = matrix(rnorm(100 * 20), 100, 20)*10
# y = rnorm(100)

### 1. 加载包和数据
data(QuickStartExample)
x <- QuickStartExample$x
y <- QuickStartExample$y

 2. 拟合模型

### 2. 拟合模型
fit = glmnet(x,y,alpha = 1,family = "gaussian",standardize=TRUE) # lasso回归
##glmnet主要参数说明:
# alpha: elasticnet mixing parameter,0≤α≤ 1,
# alpha=1(默认值) is the lasso penalty, and alpha=0 the ridge penalty.
# nlambda: 默认100
# lambda: 用户提供的lambda序列。默认是让程序根据nlambda和lambda计算的lambda序列
# standardize:数据是否要标准化处理,默认为TRUE
# intercept,线性模型的截距,默认为TRUE
# family:模型中使用的误差分布和链接函数,取值范围:"gaussian", "binomial", 
# "poisson", "multinomial","cox", "mgaussian",默认为gaussian。

#fit = glmnet(x,y,alpha = 0,family = "gaussian",standardize=TRUE) # Ridge回归
#fit = glmnet(x,y,alpha = 0.3,family = "gaussian",standardize=TRUE) # elasticnet

# # 自定义lambdas
# lambdas <- 10^seq(2, -2, by = -.1) # 100和0.01之间
# fit = glmnet(x,y,alpha = 1,family = "gaussian",
#              lambda=lambdas,standardize=TRUE)

class(fit) # [1] "elnet"  "glmnet"
print(fit)
# glmnet返回一系列模型供用户选择(不同lambda下训练好的模型)
## 输出结果解读:
# Df:the number of nonzero coefficients
# %dev:the percent deviance explained
# Lambda: the corresponding value of λ,可能小于指定的lambda数,由于算法停止规则

# help(glmnet.control) # internal glmnet parameters

3.查看模型参数

### 3.查看模型参数
any(fit$lambda == 0.5)# 是否有此lambda值的模型(训练好的)

# 所有lambda值下的模型参数
coef(fit) 
# 指定lambda值下的模型参数
coef(fit,s = 0.01)
coef(fit,s = c(10,1,0.1))

4. 预测

### 4. 预测
# 不同lambda下模型的预测结果。注:一个lambda对应一个训练好的模型
predict(fit, newx = x[1:5,],type = "response")
# type:“link”, “response”, “coefficients”, “nonzero”其中的一个

# 指定参数lambda,模型的预测结果
predict(fit, newx = x[1:5,], type = "response", s = 0.00351) 
predict(fit, newx = x[1:5,], type = "response", s = 0.5)

5. 模型拟合过程作图

### 5. 模型拟合过程作图
plot(fit,label = TRUE)
plot(fit, xvar = "lambda", label = TRUE)
## 参数 xvar: “norm” for the ℓ1-norm of the coefficients (default), 
# “lambda” for the log-lambda value and “dev” for %deviance explained
## 参数label: 是否显示变量名

6. 交叉验证确定模型的最佳lambda(超参数)

### 6. 交叉验证确定模型的最佳lambda(超参数)
# glmnet返回一系列模型供用户选择
cvfit <- cv.glmnet(x, y) # 默认10倍交叉验证(nfolds = 10)。
# 返回cv.glmnet对象,包含交叉验证的所有fits要素统计。
# lambda.min:is the value of λ that gives minimum mean cross-validated error
# lambda.1se: is the value of λ that gives the most regularized model such that
# the cross-validated error is within one standard error of the minimum

plot(cvfit)

cvfit$lambda.min # 交叉验证平均误差最小时模型的lambda
cvfit$lambda.1se 
# 得到交叉验证平均误差最小的lambda时模型的系数(coefficients)
coef(cvfit, s = "lambda.min")
coef(cvfit) # 默认s = "lambda.1se"
coef(cvfit, s = "lambda.1se") 

predict(cvfit, newx = x[1:5,], s = "lambda.min")
predict(cvfit, newx = x[1:5,])

7. Cox 回归

### 7. Cox 回归
# Cox回归
data(CoxExample)
fit = glmnet(CoxExample$x,CoxExample$y,
              alpha = 1,family = "cox") 
## alpha = 1,用于变量选择,去除系数为零的变量

cvfit <- cv.glmnet(CoxExample$x,CoxExample$y,
                   alpha = 1,family = "cox")
# type.measure :deviance

# cvfit <- cv.glmnet(CoxExample$x,CoxExample$y,
#                    alpha = 1,family = "cox",
#                    type.measure= "C")

plot(cvfit)

# type.measure="C" is Harrel's concordance measure
cvfit$lambda.min
coef(cvfit,s = cvfit$lambda.min)
coef(cvfit,s = cvfit$lambda.1se)

# 预测生存时间
predict(cvfit, newx = CoxExample$x[1:10, ], 
        s = cvfit$lambda.min,
        type = "response")
# type:“link”, “response”, “coefficients”, “nonzero”其中的一个

## 和survival包coxph比较
library(survival)
set.seed(1)
nobs <- 100; nvars <- 15
x <- matrix(rnorm(nobs * nvars), nrow = nobs)

# create response
ty <- rep(rexp(nobs / 5), each = 5)
tcens <- rbinom(n = nobs, prob = 0.3, size = 1)
y <- Surv(ty, tcens)

glmnet_fit <- glmnet(x, y, family = "cox", lambda = 0)
# lambda =0, 没有正则项,等于coxph_fit
coxph_fit <- coxph(y ~ x)
# glmnet函数,当lambda =0, 没有正则项,等于coxph_fit
plot(coef(glmnet_fit), coef(coxph_fit))
abline(0, 1)

8. 其他回归实例

###8. 其他回归实例

## 当y仅包含两种分类时可以使用二分类模型,
data(BinomialExample)
x <- BinomialExample$x
y <- BinomialExample$y
fit = glmnet(x,y,alpha = 1,family = "binomial")
plot(fit,label = TRUE) 
#指定lambda在0.05和0.01时预测新样本的类别,type = "class"指定输出值为类别
predict(fit, newx = x[1:5,], type = "class", s = c(0.05, 0.01))
#交叉验证
#cvfit = cv.glmnet(x, y, family = "binomial", type.measure = "class")
cvfit = cv.glmnet(x, y, family = "binomial", type.measure = "auc")
plot(cvfit)

## 当y为多分类时
data(MultinomialExample)
x <- MultinomialExample$x
y <- MultinomialExample$y
#拟合模型
fit = glmnet(x, y, family = "multinomial", type.multinomial = "grouped")
predict(fit, newx = x[1:10,], type = "class")
#交叉验证
cvfit=cv.glmnet(x, y, family="multinomial", type.multinomial = "grouped", 
                parallel = TRUE)
#cvfit = cv.glmnet(x, y, family = "multinomial", type.measure = "class")
# Only deviance, class, mse, mae available as type.measure for Multinomial models;
#绘图展示
plot(fit, xvar = "lambda", label = TRUE, type.coef = "2norm")
plot(cvfit)

#预测模型
predict(cvfit, newx = x[1:10,], s = "lambda.min", type = "class")

  • 7
    点赞
  • 64
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值