一:__init__.py(主函数)
import numpy as np
import scipy.io as sio
import scipy.optimize as opt
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import function as f
X, y, Xval, yval, Xtest, ytest = f.load_data()
#用二维点阵展示数据
df = pd.DataFrame({'water_level':X, 'flow':y}) #创建一个二维表
sns.lmplot('water_level', 'flow', data = df, fit_reg = False, height = 7)
# plt.show()
X, Xval, Xtest =[np.insert(x.reshape(x.shape[0], 1), 0, np.ones(x.shape[0]), axis = 1) for x in (X, Xval, Xtest)] #插入一列x0
#假设theta = 1,看一下代价是多少,梯度是多少
# theta = np.ones(X.shape[1])
# print(f.cost(theta, X, y))
# print(f.gradient(theta, X, y))
# print(f.regularized_gradient(theta, X, y))
#对theta进行优化
# theta = np.ones(X.shape[0])
# final_theta = f.linear_regression_np(X, y, l = 0).get('x')
#画出拟合的直线
# b = final_theta[0] # intercept
# m = final_theta[1] # slope
# plt.scatter(X[:,1], y, label="Training data")
# plt.plot(X[:, 1], X[:, 1]*m + b, label="Prediction")
# plt.legend(loc=2)
# plt.show()
training_cost, cv_cost = [], [] #创建训练集和测试集的误差的数组
# 1.使用训练集的子集来拟合应模型
# 2.在计算训练代价和交叉验证代价时,没有用正则化
# 3.记住使用相同的训练集子集