#coding:utf-8 #一元线性回归实例 import sys import numpy as np from sklearn import linear_model import matplotlib.pyplot as plt import sklearn.metrics as sm #计算误差的模块 import cPickle as pickle #获取数据 def getXY(): filename = "D:\\develop\\python_workspace\\test\\datas\\data_singlevar.txt" X = [] Y = [] with open(filename,'r') as f: for line in f.readlines(): xt,yt = [float(i) for i in line.split(',')] X.append(xt) Y.append(yt) return X,Y #把上面获取到数据的80%作为训练数据 def getTrainData(X,Y): num_training = int(0.8*len(X)) X_training = np.array(X[:num_training]).reshape((num_training,1)) Y_training = np.array(Y[:num_training]) return X_training,Y_training #把上面获取到数据的20%作为测试数据 def getTestData(X,Y): num_training = int(0.8 * len(X)) num_test = len(X) - num_training X_test = np.array(X[num_training:]).reshape((num_test,1)) Y_test = np.array(Y[num_training:]) return X_test,Y_test #获取模型函数 def getPredict(X_train,Y_train): linear_regressor = linear_model.LinearRegression() linear_regressor.fit(X_train,Y_train) Y_predict = linear_regressor.predict(X_train) return linear_regressor,Y_predict #画出训练回归直线 def showLR(X_train,Y_train,Y_train_predict): plt.figure() plt.scatter(X_train,Y_train,color="green") plt.plot(X_train,Y_train_predict,color="red",linewidth=4) plt.title("Training data") plt.show() #测试模型 def getTestPredict(linear_regressor,X_test): Y_test_predict = linear_regressor.predict(X_test) return Y_test_predict #画出测试回归直线 def showTestLR(X_test,Y_test,Y_test_predict): plt.scatter(X_test,Y_test,color="green") plt.plot(X_test,Y_test_predict,color="red",linewidth=4) plt.title("Test Data") plt.show() #计算回归准确性,求误差error大小 def getError(Y_test,Y_test_predict): #平均绝对误差 mean_absolute_error = round(sm.mean_absolute_error(Y_test,Y_test_predict),2) print("mean_absolute_error : ",mean_absolute_error) #均方误差 mean_squared_error = round(sm.mean_squared_error(Y_test,Y_test_predict),2) print("mean_squared_error : ", mean_squared_error) #中位数绝对误差 median_absolute_error = round(sm.median_absolute_error(Y_test,Y_test_predict),2) print("median_absolute_error : ", median_absolute_error) #解释方差 explained_variance_score = round(sm.explained_variance_score(Y_test,Y_test_predict),2) print("explained_variance_score : ", explained_variance_score) #R方 R2_score = round(sm.r2_score(Y_test,Y_test_predict),2) print("R2_score : ", R2_score) #保存模型 def saveLR(linear_regressor): out_model_file = "D:\\develop\\python_workspace\\test\\datas\\saved_model.pkl" with open(out_model_file,'w') as f: pickle.dump(linear_regressor,f) #加载模型 def loadLR(): model_file = "D:\\develop\\python_workspace\\test\\datas\\saved_model.pkl" with open(model_file,'r') as f: model_linregr = pickle.load(f) return model_linregr if __name__ == "__main__": X,Y = getXY() X_train,Y_train = getTrainData(X,Y) X_test,Y_test = getTestData(X,Y) linear_regressor,Y_train_predict = getPredict(X_train,Y_train) #showLR(X_train,Y_train,Y_train_predict) Y_test_predict = getTestPredict(linear_regressor,X_test) #showTestLR(X_test,Y_test,Y_test_predict) getError(Y_test,Y_test_predict) saveLR(linear_regressor) model_linear = loadLR()
一元线性回归实例
最新推荐文章于 2024-01-18 14:40:46 发布