MachineLearningHomework1: LinearRegresshion (python)_machine learning homework set 2: linear models-CSDN博客

本文链接：https://blog.csdn.net/qq_41704837/article/details/88034960

算是自己整个完成的，没有参考别人的，留念。

import numpy as np
import pandas as pd
import math
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import pickle

#读取数据
data = pd.read_csv(r'G:\Code\Python\untitled\ML_wu_homework\machine-learning-ex1\ex1\ex1data1.txt', \
                   header=None, names=['Population', 'Profit'])

#数据的预处理
x = data['Population']
x = np.array(x)

#x = preprocessing.scale(x)
y = data['Profit']
y_raw = np.array(y)

#留出test数据集
test_num = int(math.ceil((len(x) * 0.2)))
x_test = x[-test_num:]
y_test = y[-test_num:]
x = x[:-test_num]
y = y[:-test_num]

#划分数据集
x_train, x_validation, y_train, y_validation = train_test_split(x, y, test_size=0.25)

#线性回归部分
clf = LinearRegression(n_jobs=-1)

clf.fit(x_train.reshape(-1, 1), y_train)

#保存结果，以免每次计算
with open('homework1.pickle', 'wb') as wf:
    pickle.dump(clf, wf)
with open('homework1.pickle', 'rb') as rf:
     clf = pickle.load(rf)

#输出拟合的r^2值
r2_train = clf.score(x_train.reshape(-1, 1), y_train)
print('train r2 is: ' + str(r2_train))

r2_validation = clf.score(x_validation.reshape(-1, 1), y_validation)
print('validation r2 is: ' + str(r2_validation))

r2_test = clf.score(x_test.reshape(-1, 1), y_test)
print('test r2 is: ' + str(r2_validation))

#画图，看看拟合结果
plt.figure(num=1)
y_train_pre = clf.predict(x_train.reshape(-1, 1))
plt.plot(x_train, y_train_pre, 'g')

plt.scatter(x_train, y_train)
plt.plot()

plt.figure(num=2)
y_validition_pre = clf.predict(x_validation.reshape(-1, 1))
plt.plot(x_validation, y_validition_pre, 'r')
plt.scatter(x_validation, y_validation)

plt.plot()
plt.show()

#输出拟合出来的系数
print('Estimated coefficients for the linear regression problem is: ')
print(clf.coef_)
print('Independent term in the linear model is: ')
print(clf.intercept_)