Python的lasso回归分析

78 篇文章 9 订阅
5 篇文章 0 订阅

加载调用函数包

import numpy as np # 快速操作结构数组的工具
import pandas
import matplotlib.pyplot as plt  # 可视化绘制
from sklearn.linear_model import Lasso,LassoCV,LassoLarsCV   

数据读取

data = pd.read_csv('C://Users//TD//Desktop//data0629.csv')
相关系数查看
data.corr()

选择变量相关性系数较好的,可以作为进一步参考分析。

归一化处理

利用均值和标准差进行数据归一化处理,克服量纲的影响。

import numpy as np
arr_mean = np.mean(data) #求均值
arr_std = np.std(data,ddof=1) #求标准差
newdata=(data-arr_mean )/arr_std 

分组抽样划分训练集和测试集

X = newdata[["年龄","性别","重点病种名称编码","住院总费用","科室名称编码","确诊天数","门诊与入院诊断符合情况","住院是否超30天","术前住院日","手术季度","手术日期与出院日时差天数","手术级别程度编码","是否重返医院","检验时长"]]
Y = newdata[['住院天数']]  # 选取列名为y的作为Y.
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,Y,random_state=42)

通过设置不同的alpha值建立5个lasso实例

lasso = Lasso().fit(X_train,y_train)
lasso001 =Lasso(alpha=0.01).fit(X_train,y_train)
lasso005 = Lasso(alpha=0.05).fit(X_train,y_train)
lasso00001 = Lasso(alpha=0.0001).fit(X_train,y_train)
lasso05 = Lasso(alpha=0.5).fit(X_train,y_train)
方法一
print('**********************************')
print("Lasso alpha=1")
print ("training set score:{:.2f}".format(lasso.score(X_train,y_train)))
print ("test set score:{:.2f}".format(lasso.score(X_test,y_test)))
print ("Number of features used:{}".format(np.sum(lasso.coef_!=0)))
方法二
print('**********************************')
print("Lasso alpha=0.01")
print ("training set score:{:.2f}".format(lasso001.score(X_train,y_train)))
print ("test set score:{:.2f}".format(lasso001.score(X_test,y_test)))
print ("Number of features used:{}".format(np.sum(lasso001.coef_!=0)))
方法三
print('**********************************')
print("Lasso alpha=0.0001")
print ("training set score:{:.2f}".format(lasso00001.score(X_train,y_train)))
print ("test set score:{:.2f}".format(lasso00001.score(X_test,y_test)))
print ("Number of features used:{}".format(np.sum(lasso00001.coef_!=0)))
方法四
print('**********************************')
print("Lasso alpha=0.05")
print ("training set score:{:.2f}".format(lasso005.score(X_train,y_train)))
print ("test set score:{:.2f}".format(lasso005.score(X_test,y_test)))
print ("Number of features used:{}".format(np.sum(lasso005.coef_!=0)))
方法五
print('**********************************')
print("Lasso alpha=0.5")
print ("training set score:{:.2f}".format(lasso05.score(X_train,y_train)))
print ("test set score:{:.2f}".format(lasso05.score(X_test,y_test)))
print ("Number of features used:{}".format(np.sum(lasso05.coef_!=0)))

 ### 建立岭回归实例
ridge01 = Ridge(alpha=0.01).fit(X_train,y_train)
计算R方
ridge01.score(X_train,y_train)
打印整个模型
print("住院天数 = " + str(a.intercept_[0]) + " + " + str(a.coef_[0][0]) + " * CRIM")
住院天数 = 0.17721718179269352 + 0.005671216373405902 * CRIM

进行预测和比较

output=ridge01.predict(X_test)
from pandas.core.frame import DataFrame
output1=DataFrame(output)
import pandas
outputdata=pandas.concat([output1,y_test]) # 直接合并数据框
outputdata.to_csv('C:\\Users\\TD\\Desktop\\some_csv.csv', index=False)

最终比较预测数据和实际数据的结果是否一致,得出MSE等值情况。

评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值