python实现吴恩达机器学习练习5(偏差与方差)

Regularized Linear Regression and Bias v.s. Variance

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.io as io
import scipy.optimize as opt
data = io.loadmat('D:/python/practise/sample/machine-learning-ex5/ex5/ex5data1.mat')

1 Regularized Linear Regression

1.1 visualizing the dataset

X, y, Xval, yval, Xtest, ytest = data['X'], data['y'], data['Xval'], data['yval'], data['Xtest'], data['ytest']
# 把y改成数组
y = y.reshape(-1) 
yval = yval.reshape(-1)
ytest = ytest.reshape(-1)
fig, ax = plt.subplots(1, 3, figsize=(16, 5))
ax[0].scatter(x = X, y = y, marker = 'x', color = 'r')
ax[1].scatter(x = Xval, y = yval, marker = 'x', color = 'b')
ax[2].scatter(x = Xtest, y = ytest, marker = 'x', color = 'g')
ax[0].set_title('train')
ax[1].set_title('validation')
ax[2].set_title('test')

在这里插入图片描述

1.2 regularized linear regression cost function

J ( θ ) = 1 2 m ( ∑ i = 1 m ( h θ ( x ( i ) ) − y ( i ) ) 2 ) + λ 2 m ( ∑ j = 1 n θ j 2 ) J(\theta)=\frac{1}{2m}\left(\sum^m_{i=1}(h_{\theta}(x^{(i)})-y^{(i)})^2\right)+\frac{\lambda}{2m}\left(\sum^n_{j=1}\theta^2_j\right) J(θ)=2m1(i=1m(hθ(x(i))y(i))2)+2mλ(j=1nθj2)

T i p s : h θ ( x ) = θ T X Tips:h_{\theta}(x)=\theta^TX Tips:hθ(x)=θTX

# 增加x0项
X_1 = np.insert(X, 0, 1, axis = 1)
Xval_1 = np.insert(Xval, 0, 1, axis = 1)
Xtest_1 = np.insert(Xtest, 0, 1, axis = 1)
def J_func(theta, x, y):
    cost = np.square(x.dot(theta.T) - y).mean() / 2
    return cost
def J_func_reg(theta, x, y, c=1):
    theta_ = theta[1: ]
    reg = c * theta_.dot(theta_.T) / (2 * len(x))
    return J_func(theta, x, y) + reg

按文档theta取1,计算代价函数值

theta = np.ones(2) # 按照文档theta取1
J_func_reg(theta, X_1, y, c=1)
303.9931922202643

1.3 regularized linear regression gradient

∂ J ( θ ) ∂ θ 0 = 1 m ∑ i = 1 m ( h θ ( x ( i ) ) − y ( i ) ) x j ( i ) − − − − − − − − f o r : j = 0 \frac{\partial J(\theta)}{\partial \theta_0}=\frac{1}{m}\sum^m_{i=1}(h_{\theta}(x^{(i)})-y^{(i)})x^{(i)}_j--------for : j=0 θ0J(θ)=m1i=1m(hθ(x(i))y(i))xj(i)for:j=0
∂ J ( θ ) ∂ θ j = ( 1 m ∑ i = 1 m ( h θ ( x ( i ) ) − y ( i ) ) x j ( i ) ) + λ m θ j − − − − f o r : j ≥ 1 \frac{\partial J(\theta)}{\partial \theta_j}=\left(\frac{1}{m}\sum^m_{i=1}(h_{\theta}(x^{(i)})-y^{(i)})x^{(i)}_j \right)+\frac{\lambda}{m}\theta_j----for:j\geq1 θjJ(θ)=(m1i=1m(hθ(x(i))y(i))xj(i))+mλθjfor:j1

def gradient_reg(theta, x, y, c=1):
    theta_ = theta.copy()
    theta_[0] = 0
    gradient = ((x.dot(theta.T) - y).T.dot(x)) / len(x)
    reg = (c / len(x)) * theta_
    return gradient + reg
gradient_reg(theta, X_1, y, c=1)
array([-15.30301567, 598.25074417])

1.4 fitting linear regression

def optimize_tnc(x, y, c):
    theta = np.zeros(x.shape[1])
    result = opt.fmin_tnc(J_func_reg, x0 = theta, fprime = gradient_reg, args = (x, y, c))
    return result
res = optimize_tnc(X_1, y, c=0)
res
(array([13.08790351,  0.36777923]), 9, 0)

plot the best fit line

plt.figure(figsize = (7, 7))
plt.plot(X_1[:, 1].reshape(-1), X_1.dot(res[0]))
plt.scatter(X_1[:, 1], y, marker = 'x', color = 'r')
plt.grid(True)

在这里插入图片描述

2 Bias-Variance

2.1 learning curves

Implement code to generate the learning curves.
For a training set size of i, you should use the first i examples and y.
After learning the θ \theta θ parameters, you should compute the error on the training and cross validation sets. Recall that the training and CV crror for a dataset is defined as:
J t r a i n ( θ ) = 1 2 m [ ∑ i = 1 m ( h θ ( x ( i ) ) − y ( i ) ) 2 ] J_{train}(\theta)=\frac{1}{2m}\left[\sum^{m}_{i=1}(h_{\theta}(x^{(i)})-y^{(i)})^2\right] Jtrain(θ)=2m1[i=1m(hθ(x(i))y(i))2]

J c v ( θ ) = 1 2 m c v [ ∑ i = 1 m c v ( h θ ( x c v ( i ) ) − y c v ( i ) ) 2 ] J_{cv}(\theta)=\frac{1}{2m_{cv}}\left[\sum^{m_{cv}}_{i=1}(h_{\theta}(x_{cv}^{(i)})-y_{cv}^{(i)})^2\right] Jcv(θ)=2mcv1[i=1mcv(hθ(xcv(i))ycv(i))2]
When you are computing the training set error, make sure you compute it on the training subset(i.e.,X(0:n, 😃 and y(0:n))(instead of the entire training set).

def learing_curve_points(Xtrain, ytrain, Xval, yval, c):
    jtrain = []
    jcv = []
    for i in range(len(Xtrain)):
        res = optimize_tnc(Xtrain[:(i+1), :], ytrain[:(i+1)], c)
        jtrain.append(J_func(res[0], Xtrain[:(i+1), :], ytrain[:(i+1)]))
        jcv.append(J_func(res[0], Xval, yval))
    return np.array(jtrain), np.array(jcv)
jtrain_learn, jcv_learn = learing_curve_points(X_1, y, Xval_1, yval, c = 0)
plt.figure(figsize = (9, 6))
plt.plot(np.arange(1, 13), jtrain_learn, label='J_train')
plt.plot(np.arange(1, 13), jcv_learn, label='J_cv')
plt.xlim([0,12])
plt.ylim([0,225])
plt.xticks(np.arange(13))
plt.yticks(np.arange(225, step=25))
plt.grid(True)
plt.legend(loc='best')
plt.ylabel('error')
plt.xlabel('number of training example')
plt.title('Learning curve for linear regression', fontsize=18)

在这里插入图片描述

3 Polynomial regression

# 这次用np实现一元多项式生成
def generate_polynomial(x, power):
    x_ = x.copy()
    for i in range(2, power+1):
        x_ = np.c_[x_, x_[:, 1]**i]
    return x_

3.1 learning polynomial regression

将多项式化后的数据集进行标准化(见课时30:feature scaling)
– feature scaling in order to get gradient descent to run quite a lot faster(don’t apply to x0)

def normalize_scaling(x):
    x_ = x.copy()
    x_ = x_[:, 1:]
    normal = (x_ - x_.mean(axis=0)) / x_.std(axis=0, ddof=1) 
    # ddof:Means Delta Degrees of Freedom. The divisor used in calculations is N - ddof, where N represents the number of elements. By default ddof is zero.
    # ddof = 0时,是总体标准差的估计 ;ddof = 1时,是样本标准差
    return np.insert(normal, 0, 1, axis=1), x_.mean(axis=0), x_.std(axis=0, ddof=1)
def like_normalize(x, mean, std):
    x_ = x.copy()
    x_ = x_[:, 1:]
    thing = (x_ - mean) / std
    return np.insert(thing, 0, 1, axis = 1)
#将训练数据做多项式化处理和标准化处理
X_1_poly = generate_polynomial(X_1, 6)
X_1_poly_norm, train_mean, train_std = normalize_scaling(X_1_poly)

AndrewNg文档中用8次幂,计算后无法和文档图形一致(查了几个人的作业发现都有这个问题,提到matlab和python优化算法的区别),因此采用6次幂,能够吻合
注意:画曲线时,轴延长线上的点计算 h θ ( x ) h_\theta(x) hθ(x)时,也要进行标准化 ( X ∗ = X − μ σ ) (X^*=\frac{X-\mu}{\sigma}) (X=σXμ),并且 μ \mu μ σ \sigma σ要全部用训练集的对应次幂

def poly_lambda(c):
    res2 = optimize_tnc(X_1_poly_norm, y, c)
    print('theta=', res2[0])
    xs = np.linspace(-75, 55, 60).reshape((-1, 1))
    xs_1 = np.insert(xs, 0, 1, axis = 1)
    # 采用6次幂
    xs_1_poly = generate_polynomial(xs_1, 6)
    #用训练集的均值和标准差
    thing = (xs_1_poly[:, 1:] - train_mean) / train_std
    xs_1_poly_norm = np.insert(thing, 0, 1, axis = 1)

    plt.figure(figsize = (7, 5))
    plt.plot(xs.reshape(-1), xs_1_poly_norm.dot(res2[0]), 'b--')
    plt.scatter(X_1[:, 1], y, marker = 'x', color = 'r')
    plt.grid(True)
    plt.title(f'Polynomial Regression(lambda={c})')
poly_lambda(c=0)
theta= [ 11.21758931  11.37072     13.43377357  10.74317307  -4.38682157
 -11.9225753   -5.12273063]

在这里插入图片描述

Xval_1_poly_likenorm = like_normalize(generate_polynomial(Xval_1, 6), train_mean, train_std)
Xval_1_poly_likenorm.shape
(21, 7)
jtrain_learn, jcv_learn = learing_curve_points(X_1_poly_norm, y, Xval_1_poly_likenorm, yval, c = 0)
def plot_learnCurve_lambda(c):
    jtrain_learn, jcv_learn = learing_curve_points(X_1_poly_norm, y, Xval_1_poly_likenorm, yval, c)
    plt.figure(figsize = (9, 6))
    plt.plot(np.arange(1, 13), jtrain_learn, label='J_train')
    plt.plot(np.arange(1, 13), jcv_learn, label='J_cv')
    plt.xlim([0,12])
    plt.xticks(np.arange(13))
    plt.grid(True)
    plt.legend(loc='best')
    plt.ylabel('error')
    plt.xlabel('number of training example')
    plt.title(f'Polynomial Regression Learning Curve(lambda={c})', fontsize=15)
plot_learnCurve_lambda(c=0)

在这里插入图片描述

3.2 adjusting the regularization parameter

try: λ = 1 , λ = 100 \lambda=1,\lambda=100 λ=1,λ=100

poly_lambda(1)
theta= [11.21758834  8.60998012  5.33437646  3.84079294  2.24113677  2.10180568
  0.88936458]

在这里插入图片描述

poly_lambda(100)
theta= [11.21758934  0.98876222  0.30025153  0.77917852  0.11521524  0.59036753
 -0.01185144]

在这里插入图片描述

plot_learnCurve_lambda(c=1)

在这里插入图片描述

plot_learnCurve_lambda(c=100)

在这里插入图片描述

3.3 selecting λ \lambda λ using a cross validation set

lambda_vec = [0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10]
def single_lambda_fit(c):
    res = optimize_tnc(X_1_poly_norm, y, c)
    J_train = J_func(res[0], X_1_poly_norm, y)
    J_cv = J_func(res[0], Xval_1_poly_likenorm, yval)
    return J_train, J_cv
def more_lambda_fit(lambda_vec):
    zipped = []
    for c in lambda_vec:
        element = single_lambda_fit(c)
        zipped.append(element)
    J_train, J_cv = zip(* zipped)
    J_train = np.array(J_train)
    J_cv = np.array(J_cv)
    return J_train, J_cv
def lambda_curve(lambda_vec):
    J_train, J_cv = more_lambda_fit(lambda_vec)
    plt.figure(figsize = (9,6))
    plt.plot(lambda_vec, J_train, color = 'b', label = 'J_train')
    plt.plot(lambda_vec, J_cv, color = 'g', label = 'J_cv')
    plt.xticks(np.arange(11))
    plt.xlabel('lambda')
    plt.ylabel('error')
    plt.grid(True)
    plt.title('Lambda Curve', fontsize = 14)
    plt.legend()
lambda_curve(lambda_vec)

在这里插入图片描述

def gen_2times_lambdaVec(start):
    vec = [0, start]
    for i in range(1,11):
        vec.append(vec[-1]*2)
    return vec    
# 按照课程中的要求,使λ成倍增长至10
times_lambdaVec = gen_2times_lambdaVec(0.01)
times_lambdaVec
[0, 0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24]
lambda_curve(times_lambdaVec)

在这里插入图片描述

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值