`#波士顿房价预测
from sklearn import linear_model
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import statsmodels.api as sm
boston=datasets.load_boston()
x=boston.data
y=boston.target
print(x.shape)
print(boston.DESCR) #房价信息属性
#plt.scatter(x[‘RM’],y,color=‘blue’)
#plt.scatter(x[‘LSTAT’], y,color=‘blue’)
boston=datasets.load_boston()
x=pd.DataFrame(boston.data,columns=boston.feature_names)
y=pd.DataFrame(boston.target,columns=[‘MEDV’])
x.drop(‘AGE’,axis=1,inplace=True)
x.drop(‘INDUS’,axis=1,inplace=True)
x_add1=sm.add_constant(x)
model=sm.OLS(y,x_add1).fit()#sm.OLS()为普通最小二乘回归模型,fit()用于拟合
print(model.summary())
x_test=np.array([[1,0.006,18.0,0.0,0.52,6.6,4.87,1.0,290.0,15.2,396.2,5]])
print(model.predict(x_test))