算是自己整个完成的,没有参考别人的,留念。
import numpy as np
import pandas as pd
import math
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import pickle
#读取数据
data = pd.read_csv(r'G:\Code\Python\untitled\ML_wu_homework\machine-learning-ex1\ex1\ex1data1.txt', \
header=None, names=['Population', 'Profit'])
#数据的预处理
x = data['Population']
x = np.array(x)
#x = preprocessing.scale(x)
y = data['Profit']
y_raw = np.array(y)
#留出test数据集
test_num = int(math.ceil((len(x) * 0.2)))
x_test = x[-test_num:]
y_test = y[-test_num:]
x = x[:-test_num]
y = y[:-test_num]
#划分数据集
x_train, x_validation, y_train, y_validation = train_test_split(x, y, test_size=0.25)
#线性回归部分
clf = LinearRegression(n_jobs=-1)
clf.fit(x_train.reshape(-1, 1), y_train)
#保存结果,以免每次计算
with open('homework1.pickle', 'wb') as wf:
pickle.dump(clf, wf)
with open('homework1.pickle', 'rb') as rf:
clf = pickle.load(rf)
#输出拟合的r^2值
r2_train = clf.score(x_train.reshape(-1, 1), y_train)
print('train r2 is: ' + str(r2_train))
r2_validation = clf.score(x_validation.reshape(-1, 1), y_validation)
print('validation r2 is: ' + str(r2_validation))
r2_test = clf.score(x_test.reshape(-1, 1), y_test)
print('test r2 is: ' + str(r2_validation))
#画图,看看拟合结果
plt.figure(num=1)
y_train_pre = clf.predict(x_train.reshape(-1, 1))
plt.plot(x_train, y_train_pre, 'g')
plt.scatter(x_train, y_train)
plt.plot()
plt.figure(num=2)
y_validition_pre = clf.predict(x_validation.reshape(-1, 1))
plt.plot(x_validation, y_validition_pre, 'r')
plt.scatter(x_validation, y_validation)
plt.plot()
plt.show()
#输出拟合出来的系数
print('Estimated coefficients for the linear regression problem is: ')
print(clf.coef_)
print('Independent term in the linear model is: ')
print(clf.intercept_)
i