吴恩达机器学习# bias vs variance (偏差和方差)

` starmultiple `

已于 2022-08-20 12:32:37 修改

阅读量544

点赞数

分类专栏：笔记文章标签： python 机器学习

于 2022-08-15 17:18:25 首次发布

本文为博主原创文章，未经本人允许不得转载。

本文链接：https://blog.csdn.net/starmultiple/article/details/126281408

版权

笔记专栏收录该内容

48 篇文章 1 订阅

订阅专栏

在这里插入图片描述

评估假设

在这里插入图片描述

模型选择

在这里插入图片描述
高偏差和高方差

正则化和偏差、方差

在这里插入图片描述

学习曲线

平均误差随着训练集样本m增大而增大
验证误差和测试集误差随着训练集样本m增大而减小
在这里插入图片描述
在高偏差条件下，增加数据数量是不能减小误差的
·1 调试学习算法
假设您已经实现了正则化线性回归来预测房价。然而，当你在一组新的房子里测试你的假设时，你会发现它在预测中犯了不可接受的大错误。下一步我们可以

增加训练数据集	巩固高方差问题
使用更少的特征	巩固高方差问题
使用更多特征	巩固高偏差问题
增加多项式特征	巩固高偏差问题
lambda增大	巩固高偏差
lambda减少	巩固高方差问题

列子

#前半部分实现一个正则化的线性回归利用水库中的水位变化来预测大坝中流出的水量，后半部分用调试比较偏差和方差。
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.io as sio
import scipy.optimize as opt
import seaborn as sns



def load_data(path):
    d = sio.loadmat(path)
    return map(np.ravel, [d['X'], d['y'], d['Xval'], d['yval'], d['Xtest'], d['ytest']])

X, y, Xval, yval, Xtest, ytest = load_data(r'C:\\Users\\仲以昕\\Desktop\\ex5data1.mat')

X.shape, Xval.shape, Xtest.shape

df = pd.DataFrame({'Water level': X, 'Flowing out': y})
sns.lmplot('Water level', 'Flowing out', data=df, fit_reg=False, height=7)
plt.show()

在这里插入图片描述

df.plot(kind='scatter', x='Water level', y='Flowing out', figsize=((12,8)))

在这里插入图片描述

X, Xval, Xtest = [np.insert(x.reshape(-1, 1), 0, np.ones(x.shape[0]), axis=1) for x in (X, Xval, Xtest)]
print(X.shape, Xval.shape, Xtest.shape)


#cost function


def regularized_cost(theta, X, y, l=1):
    '''
    X: m*n
    y: m
    theta: n
    '''
    m = X.shape[0]
    inner = X @ theta.T - y
    first = inner @ inner.T / (2 * m)

    tmp = np.array(theta)
    tmp[0] = 0
    second = tmp @ tmp.T * l / (2 * m)

    return first + second

theta = np.ones(X.shape[1])
regularized_cost(theta, X, y)

#gradient

def regularized_gradient(theta, X, y, l=1):
    m = X.shape[0]
    tmp = np.array(theta)
    tmp[0] = 0
    ret = (X @ theta.T - y) @ X / m + l / m * tmp

    return ret

#Fitting linear regression
#设置labda=0，因为线性回归模型中的lambda意义不大。
def linear_regression(X, y, l=1):
    theta = np.ones(X.shape[1])
    res = opt.minimize(fun=regularized_cost, x0=theta,
                      args=(X, y, l), method='TNC',
                      jac=regularized_gradient,
                      options={'disp': True})
    return res

theta = linear_regression(X, y, l=0).get('x')
print(theta)

# ax + b
b = theta[0] # intercept
a = theta[1] # slope
x = X[:, 1:]
fig, ax = plt.subplots(figsize=(12,8))
ax.scatter(x, y, label='Training data')
ax.plot(x, a*x+b, label='Prediction', c='r')
ax.legend(loc='best')
plt.show()

在这里插入图片描述


#Learning curves
#画出训练和验证集的误差

training_cost,cv_cost=[],[]

m = X.shape[0]
for i in range(1, m + 1):
    res = linear_regression(X[:i, :], y[:i], l=0)
    tc = regularized_cost(res.x, X[:i, :], y[:i], l=0)
    cv = regularized_cost(res.x, Xval, yval, l=0)

    training_cost.append(tc)
    cv_cost.append(cv)


fig, ax = plt.subplots(figsize=(12,8))
ax.plot(np.arange(1, m+1), training_cost, label='Training cost')
ax.plot(np.arange(1, m+1), cv_cost, label='Cv cost')
ax.legend(loc='best')
plt.show()

#从学习曲线上可以得到，这一个高偏差的模型（欠拟合），随着数据的增多训练集和验证集的误差都很高且接近。

在这里插入图片描述

#Polynomial regression
def normalize_feature(df):
    return df.apply(lambda col: (col - col.mean()) / col.std())
    # df = (df - df.mean()) / data.std()


def poly_features(x, power, as_ndarray=False):
    data = {'f{}'.format(i): np.power(x, i) for i in range(1, power + 1)}
    df = pd.DataFrame(data)

    return df.values if as_ndarray else df


def prepare_poly_data(*args, power):
    '''
    args: X, Xval, Xtest
    '''

    def prepare(x):
        df = poly_features(x, power)
        ndarr = normalize_feature(df).values
        #         ndarr = ((df - df.mean()) / df.std()).values
        return np.insert(ndarr, 0, np.ones(ndarr.shape[0]), axis=1)

    return [prepare(x) for x in args]

X, y, Xval, yval, Xtest, ytest = load_data(r'C:\\Users\\仲以昕\\Desktop\\ex5data1.mat')



poly_features(X, power=3)

X_p, Xval_p, Xtest_p = prepare_poly_data(X, Xval, Xtest, power=8)
print(X_p[:3, :])

#Learning curves

def plot_learning_curve(X, y, Xval, yval, l=0):
    m = X.shape[0]
    training_cost, cv_cost = [], []

    for i in range(1, m + 1):
        _x = X[:i, :]
        _y = y[:i]

        res = linear_regression(_x, _y, l=l)
        # 计算cost时不需要计算正则项，正则项只用于拟合
        tc = regularized_cost(res.x, _x, _y, l=0)
        cv = regularized_cost(res.x, Xval, yval, l=0)

        training_cost.append(tc)
        cv_cost.append(cv)

    fig, ax = plt.subplots(figsize=(12, 8))
    ax.plot(np.arange(1, m + 1), training_cost, label='Training cost')
    ax.plot(np.arange(1, m + 1), cv_cost, label='Cv cost')
    ax.legend(loc='best', title=r'$\lambda={}$'.format(l))
    plt.show()
#lambda=0,Trainning cost太低(
plot_learning_curve(X_p, y, Xval_p, yval, l=0)


plot_learning_curve(X_p, y, Xval_p, yval, l=1)

在这里插入图片描述

plot_learning_curve(X_p, y, Xval_p, yval, l=100)

在这里插入图片描述

#Selecting lambda

candidate_l=[0,0.001,0.003,0.01,0.03,0.1,0.3,1,3,10]
training_cost,cv_cost,test_cost=[],[],[]

for l in candidate_l:
    res = linear_regression(X_p, y, l)
    tc = regularized_cost(res.x, X_p, y, l=0)
    cv = regularized_cost(res.x, Xval_p, yval, l=0)
    test_c = regularized_cost(res.x, Xtest_p, ytest, l=0)

    training_cost.append(tc)
    cv_cost.append(cv)
    test_cost.append(test_c)

fig, ax = plt.subplots(figsize=(12, 8))
ax.plot(candidate_l, training_cost, label='Training')
ax.plot(candidate_l, cv_cost, label='Cross validation')
ax.plot(candidate_l, test_cost, label='Testing')
ax.legend(loc='best')
ax.set_xlabel(r'$\lambda$')
ax.set_ylabel('cost')
plt.show()

在这里插入图片描述

#从验证中找，最优的lambda=1
candidate_l[np.argmin(cv_cost)]
#从测试集中找，最优的lambda=0.3
candidate_l[np.argmin(test_cost)]

在这里插入图片描述

` starmultiple `

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
吴恩达机器学习# bias vs variance (偏差和方差)

吴恩达机器学习# bias vs variance (偏差和方差)
复制链接

扫一扫

专栏目录