import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, linear_model, metrics
import statsmodels.api as sm
boston = datasets.load_boston() # 这个数据集是一个字典
def skl_func():
X = boston.data # data是字典的一个key
y = boston.target
#print (boston.feature_names)
#print (boston.DESCR) # 数据集描述
from sklearn.cross_validation import train_test_split # 分训练集和测试集要使用的模块
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 1)
lm = linear_model.LinearRegression()
lm.fit(x_train, y_train)
print ("Scikit-learn's Results")
print (lm.coef_,'\n')
def sm_func():
X = pd.DataFrame(boston.data, columns = boston.feature_names)
y = pd.DataFrame(boston.target, columns = ['MEDV'])
X = sm.add_constant(X) # statsmodels 中的线性回归模型没有截距项,这一行是给训练集加上一列数值为1的特征
model = sm.OLS(y,X).fit()
print ("Statsmodels's Results")
print (model.summary())
skl_func()
sm_func()
Boston房价数据集线性回归—— sklearn & statsmodels 比较
最新推荐文章于 2024-06-23 10:33:23 发布