代码如下:
#encoding:utf-8 import matplotlib.pyplot as plt import numpy as np import pandas as pd from sklearn import datasets,linear_model data=pd.read_csv('ccpp1.csv')#导入数据 #print(data.head())#打印前五行数据 print(data.shape)#查看数据的维度 X=data[['AT','V','AP','RH']] print(X.head())#打印X的前五行数据 y=data[['PE']] print(y.head())#打印y的前五行数据 #对数据进行分类,分为训练数据和测试数据 from sklearn.cross_validation import train_test_split X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=1) #查看训练数据和测试数据的维度 print(X_train.shape) print(y_train.shape) print(X_test.shape) print(y_test.shape) #使用最小二乘法来训练模型 from sklearn.linear_model import LinearRegression linreg=LinearRegression() linreg.fit(X_train,y_train) #打印模型系数结果 print(linreg.intercept_) print(linreg.coef_) #模型评价 #模型拟合测试集 y_pred=linreg.predict(X_test) from sklearn import metrics #用scikit-learn计算MSE print("MSE:",metrics.mean_squared_error(y_test,y_pred)) #用scikit-learn计算RMSE print("RMSE:",np.sqrt(metrics.mean_squared_error(y_test,y_pred))) #使用交叉验证来持续优化模型 X=data[['AT','V','AP','RH']] y=data[['PE']] from sklearn.model_selection import cross_val_predict predicted=cross_val_predict(linreg,X,y,cv=10) #用scikit-learn计算MSE print("MSE:",metrics.mean_squared_error(y,predicted)) #用scikit-learn计算RMSE print("RMSE:",np.sqrt(metrics.mean_squared_error(y,predicted))) #画图 fig,ax=plt.subplots() ax.scatter(y,predicted) ax.plot([y.min(),y.max()],[y.min(),y.max()],'k--',lw=4) ax.set_xlabel('Measured') ax.set_ylabel('Predicter') plt.show()
运行效果图如下: