CS229 吴恩达机器学习习题答案 problem sets 01 （代码题目部分，欢迎各位前辈指教）

最新推荐文章于 2022-03-03 15:34:55 发布

ML--小小白

最新推荐文章于 2022-03-03 15:34:55 发布

阅读量1.7k

点赞数 2

分类专栏： CS229 文章标签：机器学习人工智能深度学习

本文链接：https://blog.csdn.net/qq_26928055/article/details/122177240

版权

CS229 专栏收录该内容

8 篇文章 7 订阅

订阅专栏

p01b_logreg

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os


plt.rcdefaults()
# import util
# from linear_model import LinearModel

def main(train_path, eval_path, pred_path):
    """Problem 1(b): Logistic regression with Newton's Method.

    Args:
        train_path: Path to CSV file containing dataset for training.
        eval_path: Path to CSV file containing dataset for evaluation.
        pred_path: Path to save predictions.
    """
#     x_train, y_train = util.load_dataset(train_path, add_intercept=True)
    df = pd.read_csv(train_path)
    df2 = pd.read_csv(eval_path)
    x_train, y_train = df.loc[:, ['x_1', 'x_2']].values, df.loc[:, 'y'].values.reshape((-1, 1))
    x_valid, y_valid = df2.loc[:, ['x_1', 'x_2']].values, df2.loc[:, 'y'].values.reshape((-1, 1))

    # *** START CODE HERE ***
    clf = LogisticRegression()
    clf.fit(x_train, y_train)
    clf.predict(x_valid)
    error_rate = np.abs(clf.predict_result - y_valid).sum() / y_valid.shape[0]
    df_predict = pd.DataFrame(clf.predict_result, index=None, columns=['y_predict'])
    df_predict.to_csv(pred_path+os.path.basename(eval_path))
    print(f'error rate on validation set is: {error_rate}')
    
    fig1 = plt.figure()
    ax1 = fig1.add_subplot(121)
    ax1.scatter(df.loc[df['y']==0, 'x_1'], df.loc[df['y']==0, 'x_2'], c='b', s=5, label='-')
    ax1.scatter(df.loc[df['y']==1, 'x_1'], df.loc[df['y']==1, 'x_2'], c='g', s=5, label='+')
    ax1.legend()
    ax1.plot(df.loc[:, 'x_1'], (-clf.weight[0]-clf.weight[1]*df.loc[:, 'x_1'])/clf.weight[2], c='r')
    ax1.set_title('trainning set', **dict(fontsize=15, weight='black'))
    
    ax2 = fig1.add_subplot(122)
    ax2.scatter(df2.loc[df2['y']==0, 'x_1'], df2.loc[df2['y']==0, 'x_2'], c='b', s=5, label='-')
    ax2.scatter(df2.loc[df2['y']==1, 'x_1'], df2.loc[df2['y']==1, 'x_2'], c='g', s=5, label='+')
    ax2.legend()
    ax2.plot(df2.loc[:, 'x_1'], (-clf.weight[0]-clf.weight[1]*df2.loc[:, 'x_1'])/clf.weight[2], c='r')
    ax2.set_title('validation set', **dict(fontsize=15, weight='black'))
    
    plt.savefig(pred_path + os.path.basename(eval_path).split('_')[-2] + ".png")
    plt.show()   
    # *** END CODE HERE ***


class LogisticRegression:
    """Logistic regression with Newton's Method as the solver.

    Example usage:
        > clf = LogisticRegression()
        > clf.fit(x_train, y_train)
        > clf.predict(x_eval)
    """
    def __init__(self):
        self.error = 1e-5
        self.learning_rate = 1.0
    
    def sigmoid(self, x, theta):
        result = np.matmul(x, theta)
        for i in range(len(result)):
            if result[i] > 0:
                result[i] = 1.0 / (1 + np.exp(-result[i]))
            else:
                result[i] = np.exp(result[i]) / (1 + np.exp(result[i]))
        return result

    
    def fit(self, x, y):
        """Run Newton's Method to minimize J(theta) for logistic regression.

        Args:
            x: Training example inputs. Shape (m, n).
            y: Training example labels. Shape (m,).
        """
        # *** START CODE HERE ***
        x = np.column_stack((np.ones(x.shape[0]), x))
        m, n = x.shape
        theta = np.zeros((n, 1))
        error = self.error
        alpha = self.learning_rate
        max_cycle = 500
        for i in range(max_cycle):
            theta_old = np.copy(theta)
            gradient = np.matmul(x.T, (self.sigmoid(x, theta) - y)) / m
#             print('gradient', i, gradient.flatten())
            heissen = x.T * self.sigmoid(x, theta).flatten() * (1 - self.sigmoid(x, theta)).flatten() @ x / m
            theta -= alpha * (np.linalg.inv(heissen) @ gradient)
#             print('heissen', heissen.flatten(), '\n', 'inv', np.linalg.inv(heissen).flatten())
#             print('norm', np.linalg.norm(theta - theta_old))
            if (np.linalg.norm(theta - theta_old) < error):
                print('迭代次数为{}'.format(i + 1))
                print(f'最终权重:\n{theta}')
                self.weight = theta
                return theta
        print(f'达到最大迭代次数限制{max_cycle}')
        print(f'最终权重:\n{theta}')
        return theta
        
        
        # *** END CODE HERE ***

    def predict(self, x):
        """Make a prediction given new inputs x.

        Args:
            x: Inputs of shape (m, n).

        Returns:
            Outputs of shape (m,).
        """
        # *** START CODE HERE ***
        x = np.column_stack((np.ones(x.shape[0]), x))
        self.predict_result = self.sigmoid(x, self.weight)
        # *** END CODE HERE ***

for i in range(1, 3):
    train_path = f'/Users/xiaobai/Documents/CS/CS229/cs229-2018-autumn-main/problem-sets/PS1/data/ds{i}_train.csv'
    eval_path = f'/Users/xiaobai/Documents/CS/CS229/cs229-2018-autumn-main/problem-sets-solutions/PS1/code/data/ds{i}_valid.csv'
    pred_path = './output/'
    main(train_path, eval_path, pred_path)

迭代次数为7
最终权重:
[[-6.26018491]
 [ 2.47707251]
 [-0.0299125 ]]
error rate on validation set is: 0.18132893797905528

在这里插入图片描述

迭代次数为8
最终权重:
[[ 2.38425454]
 [ 3.6371206 ]
 [-3.81234337]]
error rate on validation set is: 0.13388695090321312

在这里插入图片描述

p01e_gda

import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt


def main2(train_path, eval_path, pred_path):
    """Problem 1(e): Gaussian discriminant analysis (GDA)

    Args:
        train_path: Path to CSV file containing dataset for training.
        eval_path: Path to CSV file containing dataset for evaluation.
        pred_path: Path to save predictions.
    """
    # Load dataset
    df = pd.read_csv(train_path)
    x_train, y_train = df.loc[:, ['x_1', 'x_2']].values, df.loc[:, 'y'].values.reshape((-1, 1))
    clf = GDA()
    clf.fit(x_train, y_train)
    df1 = pd.read_csv(eval_path)
    x_valid, y_valid = df1.loc[:, ['x_1', 'x_2']].values, df1.loc[:, 'y'].values.reshape((-1, 1))
    clf.predict(x_valid)
    y_pred = clf.pred_result
    error_rate = np.abs(y_pred - y_valid).sum() / y_pred.shape[0]
    print(f'error rate on validation set is: {error_rate:.2f}')
    df_predict = pd.DataFrame(y_pred, index=None, columns=['y_predict'])
    df_predict.to_csv(pred_path + os.path.basename(eval_path))
    
    
    fig = plt.figure()
    ax1 = fig.add_subplot(121)
    ax1.scatter(df.loc[df['y']==0, 'x_1'], df.loc[df['y']==0, 'x_2'], c='b', s=5, marker='o', label='-')
    ax1.scatter(df.loc[df['y']==1, 'x_1'], df.loc[df['y']==1, 'x_2'], c='g', s=5, marker='*', label='+')
    ax1.plot(df.loc[:, 'x_1'].values, (-(df.loc[:, 'x_1'].values*clf.theta[0] + clf.theta0)/clf.theta[1]).flatten(), c='r')
    ax1.legend()
    ax1.set_title('training set', **dict(fontsize=15, weight='black'))
    
    ax2 = fig.add_subplot(122)
    ax2.scatter(df1.loc[df1['y']==0, 'x_1'], df1.loc[df1['y']==0, 'x_2'], c='b', s=5, marker='o', label='-')
    ax2.scatter(df1.loc[df1['y']==1, 'x_1'], df1.loc[df1['y']==1, 'x_2'], c='g', s=5, marker='x', label='+')
    ax2.plot(df1.loc[:, 'x_1'].values, (-(df1.loc[:, 'x_1'].values*clf.theta[0] + clf.theta0)/clf.theta[1]).flatten(), c='r')
    ax2.legend()
    ax2.set_title('validation set', **dict(fontsize=15, weight='black'))
    
    plt.savefig(pred_path + os.path.basename(eval_path).split('_')[-2] + ".png")
    plt.show() 
    # *** START CODE HERE ***
    # *** END CODE HERE ***


class GDA():
    """Gaussian Discriminant Analysis.

    Example usage:
        > clf = GDA()
        > clf.fit(x_train, y_train)
        > clf.predict(x_eval)
    """

    def fit(self, x, y):
        """Fit a GDA model to training set given by x and y.

        Args:
            x: Training example inputs. Shape (m, n).
            y: Training example labels. Shape (m,).

        Returns:
            theta: GDA model parameters.
        """
        # *** START CODE HERE ***
        m, n = x.shape
        phi = y.sum() / np.float64(m)
        mu0 = (x[(y==0).flatten(), :].sum(axis=0) / y[y==0].shape[0]).reshape((-1, 1))
        mu1 = (x[(y==1).flatten(), :].sum(axis=0) / y[y==1].shape[0]).reshape((-1, 1))
        Sigma = (((x[(y==0).flatten(), :].T - mu0) @ (x[(y==0).flatten(), :].T - mu1).T) +  ((x[(y==1).flatten(), :].T - mu0) @ (x[(y==1).flatten(), :].T - mu1).T)) / m
        self.theta = np.linalg.inv(Sigma) @ (mu1 - mu0)
        self.theta0 = 0.5 * (mu1 + mu0).T @ np.linalg.inv(Sigma) @ (mu0 - mu1) - np.log((1 - phi) / phi)
        # *** END CODE HERE ***

    def predict(self, x):
        """Make a prediction given new inputs x.

        Args:
            x: Inputs of shape (m, n).

        Returns:
            Outputs of shape (m,).
        """
        # *** START CODE HERE ***
        m, n = x.shape
        result = np.zeros((m, 1))
        for i in range(m):
            indicator = x[i] @ self.theta + self.theta0
            if indicator>0:
                p1 = 1 / (1 + np.exp(-(x[i] @ self.theta + self.theta0)))
            else:
                p1 = np.exp(x[i] @ self.theta + self.theta0) / (1 + np.exp(x[i] @ self.theta + self.theta0))
            if p1 > 0.5:
                result[i] = 1.0
            else:
                result[i] = 0.0
        self.pred_result = result
        # *** END CODE HERE

for i in range(1, 3):
    train_path = f'/Users/xiaobai/Documents/CS/CS229/cs229-2018-autumn-main/problem-sets/PS1/data/ds{i}_train.csv'
    eval_path = f'/Users/xiaobai/Documents/CS/CS229/cs229-2018-autumn-main/problem-sets-solutions/PS1/code/data/ds{i}_valid.csv'
    pred_path = './output/'
    main2(train_path, eval_path, pred_path)

error rate on validation set is: 0.17

在这里插入图片描述

error rate on validation set is: 0.09

在这里插入图片描述

p01fg

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

for i in range(1, 3):
    train_path = f'/Users/xiaobai/Documents/CS/CS229/cs229-2018-autumn-main/problem-sets/PS1/data/ds{i}_train.csv'
    eval_path = f'/Users/xiaobai/Documents/CS/CS229/cs229-2018-autumn-main/problem-sets-solutions/PS1/code/data/ds{i}_valid.csv'
    pred_path = './output/'
    df = pd.read_csv(train_path)
    df2 = pd.read_csv(eval_path)
    x_train, y_train = df.loc[:, ['x_1', 'x_2']].values, df.loc[:, 'y'].values.reshape((-1, 1))
    x_valid, y_valid = df2.loc[:, ['x_1', 'x_2']].values, df2.loc[:, 'y'].values.reshape((-1, 1))

    # *** START CODE HERE ***
    clf1 = LogisticRegression()
    clf1.fit(x_train, y_train)
    clf1.predict(x_valid)
    error_rate = np.abs(clf1.predict_result - y_valid).sum() / y_valid.shape[0]
    print(f'error rate on validation set with Logistic is: {error_rate}')
    
    clf2 = GDA()
    clf2.fit(x_train, y_train)
    df1 = pd.read_csv(eval_path)
    x_valid, y_valid = df1.loc[:, ['x_1', 'x_2']].values, df1.loc[:, 'y'].values.reshape((-1, 1))
    clf2.predict(x_valid)
    y_pred = clf2.pred_result
    error_rate = np.abs(y_pred - y_valid).sum() / y_pred.shape[0]
    print(f'error rate on validation set with GDA is: {error_rate:.2f}')
    
    fig1 = plt.figure()
    ax1 = fig1.add_subplot(121)
    ax1.scatter(df.loc[df['y']==0, 'x_1'], df.loc[df['y']==0, 'x_2'], c='b', s=5, label='-')
    ax1.scatter(df.loc[df['y']==1, 'x_1'], df.loc[df['y']==1, 'x_2'], c='g', s=5, label='+')
    ax1.plot(df.loc[:, 'x_1'], (-clf1.weight[0]-clf1.weight[1]*df.loc[:, 'x_1'])/clf1.weight[2], c='r', label='Log')
    ax1.plot(df.loc[:, 'x_1'].values, (-(df.loc[:, 'x_1'].values*clf2.theta[0] + clf2.theta0)/clf2.theta[1]).flatten(), c='orange', label='GDA')
    ax1.legend()
    ax1.set_title('trainning set', **dict(fontsize=15, weight='black'))
    
    ax2 = fig1.add_subplot(122)
    ax2.scatter(df2.loc[df2['y']==0, 'x_1'], df2.loc[df2['y']==0, 'x_2'], c='b', s=5, label='-')
    ax2.scatter(df2.loc[df2['y']==1, 'x_1'], df2.loc[df2['y']==1, 'x_2'], c='g', s=5, label='+')
    ax2.plot(df2.loc[:, 'x_1'], (-clf1.weight[0]-clf1.weight[1]*df2.loc[:, 'x_1'])/clf1.weight[2], c='r', label='Log')
    ax2.plot(df2.loc[:, 'x_1'].values, (-(df2.loc[:, 'x_1'].values*clf2.theta[0] + clf2.theta0)/clf2.theta[1]).flatten(), c='orange', label='GDA')
    ax2.legend()
    ax2.set_title('validation set', **dict(fontsize=15, weight='black'))
    
    plt.show()

迭代次数为7
最终权重:
[[-6.26018491]
 [ 2.47707251]
 [-0.0299125 ]]
error rate on validation set with Logistic is: 0.18132893797905528
error rate on validation set with GDA is: 0.17

在这里插入图片描述

迭代次数为8
最终权重:
[[ 2.38425454]
 [ 3.6371206 ]
 [-3.81234337]]
error rate on validation set with Logistic is: 0.13388695090321312
error rate on validation set with GDA is: 0.09

在这里插入图片描述

p02cde_posonly

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

# from p01b_logreg import LogisticRegression

# Character to replace with sub-problem letter in plot_path/pred_path
WILDCARD = 'X'


def main3(train_path, valid_path, test_path, pred_path):
    """Problem 2: Logistic regression for incomplete, positive-only labels.

    Run under the following conditions:
        1. on y-labels,
        2. on l-labels,
        3. on l-labels with correction factor alpha.

    Args:
        train_path: Path to CSV file containing training set.
        valid_path: Path to CSV file containing validation set.
        test_path: Path to CSV file containing test set.
        pred_path: Path to save predictions.
    """
    pred_path_c = pred_path.replace(WILDCARD, 'c')
    pred_path_d = pred_path.replace(WILDCARD, 'd')
    pred_path_e = pred_path.replace(WILDCARD, 'e')

    # *** START CODE HERE ***
    # Part (c): Train and test on true labels
    # Make sure to save outputs to pred_path_c
    df = pd.read_csv(train_path)
    df2 = pd.read_csv(valid_path)
    df3 = pd.read_csv(test_path)
    x_train, y_train = df.loc[:, ['x_1', 'x_2']].values, df.loc[:, 't'].values.reshape((-1, 1))
    x_test, y_test = df3.loc[:, ['x_1', 'x_2']].values, df3.loc[:, 't'].values.reshape((-1, 1))
    clf_c = LogisticRegression()
    clf_c.fit(x_train, y_train)
    clf_c.predict(x_test)
    error_rate_c = np.abs(clf_c.predict_result - y_test).sum() / y_test.shape[0]
    print(f'error rate on test set with Logistic t_label is: {error_rate_c}')
    df_c_predict = pd.DataFrame(clf_c.predict_result, index=None, columns=['y_predict'])
    df_c_predict.to_csv(pred_path_c)
    # Part (d): Train on y-labels and test on true labels
    # Make sure to save outputs to pred_path_d
    x_train_pos, y_train_pos = df.loc[:, ['x_1', 'x_2']].values, df.loc[:, 'y'].values.reshape((-1, 1))
    x_test, y_test = df3.loc[:, ['x_1', 'x_2']].values, df3.loc[:, 't'].values.reshape((-1, 1))
    clf_d = LogisticRegression()
    clf_d.fit(x_train_pos, y_train_pos)
    clf_d.predict(x_test)
    error_rate_d = np.abs(clf_d.predict_result - y_test).sum() / y_test.shape[0]
    print(f'error rate on test set with Logistic y_label is: {error_rate_d}')
    df_d_predict = pd.DataFrame(clf_d.predict_result, index=None, columns=['y_predict'])
    df_d_predict.to_csv(pred_path_d)
    # Part (e): Apply correction factor using validation set and test on true labels
    # Plot and use np.savetxt to save outputs to pred_path_e
    x_valid = df2.loc[df2['y']==1, ['x_1', 'x_2']].values
    clf_d.predict(x_valid)
    alpha = clf_d.predict_result.sum() / x_valid.shape[0]
    print('alpha', alpha)
    df_e_predict = df_d_predict / alpha
    error_rate_e = np.abs(df_e_predict.values - y_test).sum() / y_test.shape[0]
    print(f'error rate on test set with Logistic fixed_y_label is: {error_rate_e}')
    df_e_predict.to_csv(pred_path_e)
    
    
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(df3.loc[df3['t']==0, 'x_1'], df3.loc[df3['t']==0, 'x_2'], c='b', s=5, label='-')
    ax.scatter(df3.loc[df3['t']==1, 'x_1'], df3.loc[df3['t']==1, 'x_2'], c='g', s=5, label='+')
    ax.plot(df3.loc[:, 'x_1'], (-clf_c.weight[0]-clf_c.weight[1]*df3.loc[:, 'x_1'])/clf_c.weight[2], c='r', label='real label')
    ax.plot(df3.loc[:, 'x_1'], (-clf_d.weight[0]-clf_d.weight[1]*df3.loc[:, 'x_1'])/clf_d.weight[2], c='y', label='partial label')
    ax.plot(df3.loc[:, 'x_1'], (np.log(alpha/(2 - alpha))-clf_d.weight[1]*df3.loc[:, 'x_1']-clf_d.weight[0])/clf_d.weight[2], c='orange', label='modified partial label')
    ax.legend()
    ax.set_ylim(-10, 10)
    plt.show()
    # *** END CODER HERE

train_path = '/Users/xiaobai/Documents/CS/CS229/cs229-2018-autumn-main/problem-sets/PS1/data/ds3_train.csv'
valid_path = '/Users/xiaobai/Documents/CS/CS229/cs229-2018-autumn-main/problem-sets/PS1/data/ds3_valid.csv'
test_path = '/Users/xiaobai/Documents/CS/CS229/cs229-2018-autumn-main/problem-sets/PS1/data/ds3_test.csv'
pred_path = './output_p02/p02X_pred.csv'
main3(train_path, valid_path, test_path, pred_path)

迭代次数为9
最终权重:
[[-0.9711418 ]
 [ 2.73208784]
 [ 1.07403478]]
error rate on test set with Logistic t_label is: 0.05269645255369174
迭代次数为8
最终权重:
[[-2.91607454]
 [ 0.78596651]
 [ 0.09282768]]
error rate on test set with Logistic y_label is: 0.42577117460690234
alpha 0.17174527144749674
error rate on test set with Logistic fixed_y_label is: 0.20683454541319718

在这里插入图片描述

p03d_poisson

method_01

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

plt.rcdefaults()
%matplotlib inline

def main4(lr, train_path, eval_path, pred_path):
    """Problem 3(d): Poisson regression with gradient ascent.

    Args:
        lr: Learning rate for gradient ascent.
        train_path: Path to CSV file containing dataset for training.
        eval_path: Path to CSV file containing dataset for evaluation.
        pred_path: Path to save predictions.
    """
    # Load training set
    df = pd.read_csv(train_path)
    x_train, y_train = df.loc[:, ['x_1', 'x_2', 'x_3', 'x_4']].values, df.loc[:, 'y'].values.reshape((-1, 1))
    df2 = pd.read_csv(eval_path)
    x_valid, y_valid = df2.loc[:, ['x_1', 'x_2', 'x_3', 'x_4']].values, df2.loc[:, 'y'].values.reshape((-1, 1))
    clf = PoissonRegression(lr)
    clf.fit(x_train, y_train)
    clf.predict(x_train)
    df_predict_train = pd.DataFrame(clf.pred_result, index=None, columns=['y_predict'])
    error_rate = np.abs(clf.pred_result - y_train).sum() / y_train.shape[0]
    print(f'error rate on training set is: {error_rate}')
    
    clf.predict(x_valid)
    error_rate = np.abs(clf.pred_result - y_valid).sum() / y_valid.shape[0]
    df_predict = pd.DataFrame(clf.pred_result, index=None, columns=['y_predict'])
    df_predict.to_csv(pred_path+os.path.basename(eval_path))
    print(f'error rate on validation set is: {error_rate}')


    fig1 = plt.figure()
    ax1 = fig1.add_subplot(111)
    ax1.scatter(df2.loc[:, 'y'].values, df_predict.loc[:, 'y_predict'], marker='o', c='b', s=5)
    ax1.set_title('validation set', **dict(fontsize=15, weight='black'))
    
    fig2 = plt.figure()
    ax2 = fig2.add_subplot(121)
    ax2.scatter(np.arange(len(df)), df.loc[:, 'y'].values, c='b', s=10, marker='o', label='real')
    ax2.scatter(np.arange(len(df)), df_predict_train.loc[:, 'y_predict'].values, marker='x', c='r', s=10, alpha=0.5, label='predict')
    ax2.legend()
    ax2.set_title('trainning set', **dict(fontsize=15, weight='black'))
    
    
    ax3 = fig2.add_subplot(122)
    ax3.scatter(np.arange(len(df2)), df2.loc[:, 'y'].values, c='b', s=10, marker='o', label='real')
    ax3.scatter(np.arange(len(df2)), df_predict.loc[:, 'y_predict'].values, c='r', s=10, marker='x', alpha=0.5, label='predict')
    ax3.legend()
    ax3.set_title('validation set', **dict(fontsize=15, weight='black'))
    
    plt.show()
    # *** START CODE HERE ***
    # Fit a Poisson Regression model
    # Run on the validation set, and use np.savetxt to save outputs to pred_path
    # *** END CODE HERE ***


class PoissonRegression:
    """Poisson Regression.

    Example usage:
        > clf = PoissonRegression(step_size=lr)
        > clf.fit(x_train, y_train)
        > clf.predict(x_eval)
    """
    
    def __init__(self, lr, max_cycle=100, tor=1e-6):
        self.lr = lr
        self.max_cycle = max_cycle
        self.tor = tor
        
    def hypothesis(self, x, theta):
        result = x @ theta
        return np.exp(result)

    def fit(self, x, y):
        """Run gradient ascent to maximize likelihood for Poisson regression.

        Args:
            x: Training example inputs. Shape (m, n).
            y: Training example labels. Shape (m,).
        """
        # *** START CODE HERE ***
        m, n = x.shape
        self.theta = np.random.randn(n, 1)
#         self.theta = np.zeros((n, 1))
        for i in range(self.max_cycle):
            data_idx = list(range(m))
            for j in range(m):
                alpha = self.lr / (1.0 + i + j) + 1e-8
                idx = data_idx[np.random.randint(0, len(data_idx))]
                data_idx.remove(idx)
                h = self.hypothesis(x, self.theta)
                error = y - h
                self.theta += alpha * error[idx] * x[idx].reshape((-1, 1))
            if np.linalg.norm(alpha /m * x.T @ error, 1) < self.tor:
                print(f'convergence at {i}')
                break
        print(self.theta, i)


#         next_step = self.lr / m * x.T @ (y - self.hypothesis(x, self.theta))
#         ii = 0
#         while np.linalg.norm(next_step, 1) >= self.tor:
#             self.theta += next_step
#             ii += 1
#             next_step = self.lr / m * x.T @ (y - self.hypothesis(x, self.theta))
            
#         print(ii, self.theta)
            
        
        # *** END CODE HERE ***

    def predict(self, x):
        """Make a prediction given inputs x.

        Args:
            x: Inputs of shape (m, n).

        Returns:
            Floating-point prediction for each input, shape (m,).
        """
        # *** START CODE HERE ***
        m, n = x.shape
        self.pred_result = self.hypothesis(x, self.theta)
        # *** END CODE HERE ***

train_path = f'/Users/xiaobai/Documents/CS/CS229/cs229-2018-autumn-main/problem-sets/PS1/data/ds4_train.csv'
eval_path = f'/Users/xiaobai/Documents/CS/CS229/cs229-2018-autumn-main/problem-sets/PS1/data/ds4_valid.csv'
pred_path = './output_p03/'
main4(lr=3e-7, train_path=train_path, eval_path=eval_path, pred_path=pred_path)

convergence at 19
[[11.29997057]
 [10.79991346]
 [ 2.00011476]
 [ 4.39996554]] 19
error rate on training set is: 1215.1900628240774
error rate on validation set is: 1239.7526943716625

在这里插入图片描述

method_02

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os


def main4(lr, train_path, eval_path, pred_path):
    """Problem 3(d): Poisson regression with gradient ascent.

    Args:
        lr: Learning rate for gradient ascent.
        train_path: Path to CSV file containing dataset for training.
        eval_path: Path to CSV file containing dataset for evaluation.
        pred_path: Path to save predictions.
    """
    # Load training set
    df = pd.read_csv(train_path)
    x_train, y_train = df.loc[:, ['x_1', 'x_2', 'x_3', 'x_4']].values, df.loc[:, 'y'].values.reshape((-1, 1))
    df2 = pd.read_csv(eval_path)
    x_valid, y_valid = df2.loc[:, ['x_1', 'x_2', 'x_3', 'x_4']].values, df2.loc[:, 'y'].values.reshape((-1, 1))
    clf = PoissonRegression(lr)
    clf.fit(x_train, y_train)
    clf.predict(x_train)
    df_predict_train = pd.DataFrame(clf.pred_result, index=None, columns=['y_predict'])
    error_rate = np.abs(clf.pred_result - y_train).sum() / y_train.shape[0]
    print(f'error rate on training set is: {error_rate}')
    
    clf.predict(x_valid)
    error_rate = np.abs(clf.pred_result - y_valid).sum() / y_valid.shape[0]
    df_predict = pd.DataFrame(clf.pred_result, index=None, columns=['y_predict'])
    df_predict.to_csv(pred_path+os.path.basename(eval_path))
    print(f'error rate on validation set is: {error_rate}')


    fig1 = plt.figure()
    ax1 = fig1.add_subplot(111)
    ax1.scatter(df2.loc[:, 'y'].values, df_predict.loc[:, 'y_predict'], marker='o', c='b', s=5)
    ax1.set_title('validation set', **dict(fontsize=15, weight='black'))
    
    fig2 = plt.figure()
    ax2 = fig2.add_subplot(121)
    ax2.scatter(np.arange(len(df)), df.loc[:, 'y'].values, c='b', s=10, marker='o', label='real')
    ax2.scatter(np.arange(len(df)), df_predict_train.loc[:, 'y_predict'].values, marker='x', c='r', s=10, alpha=0.5, label='predict')
    ax2.legend()
    ax2.set_title('trainning set', **dict(fontsize=15, weight='black'))
    
    
    ax3 = fig2.add_subplot(122)
    ax3.scatter(np.arange(len(df2)), df2.loc[:, 'y'].values, c='b', s=10, marker='o', label='real')
    ax3.scatter(np.arange(len(df2)), df_predict.loc[:, 'y_predict'].values, c='r', s=10, marker='x', alpha=0.5, label='predict')
    ax3.legend()
    ax3.set_title('validation set', **dict(fontsize=15, weight='black'))
    
    plt.show()
    # *** START CODE HERE ***
    # Fit a Poisson Regression model
    # Run on the validation set, and use np.savetxt to save outputs to pred_path
    # *** END CODE HERE ***


class PoissonRegression:
    """Poisson Regression.

    Example usage:
        > clf = PoissonRegression(step_size=lr)
        > clf.fit(x_train, y_train)
        > clf.predict(x_eval)
    """
    
    def __init__(self, lr, max_cycle=50, tor=1e-5):
        self.lr = lr
        self.max_cycle = max_cycle
        self.tor = tor
        
    def hypothesis(self, x, theta):
        result = x @ theta
        return np.exp(result)

    def fit(self, x, y):
        """Run gradient ascent to maximize likelihood for Poisson regression.

        Args:
            x: Training example inputs. Shape (m, n).
            y: Training example labels. Shape (m,).
        """
        # *** START CODE HERE ***
        m, n = x.shape
        self.theta = np.random.randn(n, 1)

        next_step = self.lr / m * x.T @ (y - self.hypothesis(x, self.theta))
        ii = 0
        while np.linalg.norm(next_step, 1) >= self.tor:
            self.theta += next_step
            ii += 1
            next_step = self.lr / m * x.T @ (y - self.hypothesis(x, self.theta))
            
        print(ii, self.theta)
            
        
        # *** END CODE HERE ***

    def predict(self, x):
        """Make a prediction given inputs x.

        Args:
            x: Inputs of shape (m, n).

        Returns:
            Floating-point prediction for each input, shape (m,).
        """
        # *** START CODE HERE ***
        m, n = x.shape
        self.pred_result = self.hypothesis(x, self.theta)
        # *** END CODE HERE ***

train_path = f'/Users/xiaobai/Documents/CS/CS229/cs229-2018-autumn-main/problem-sets/PS1/data/ds4_train.csv'
eval_path = f'/Users/xiaobai/Documents/CS/CS229/cs229-2018-autumn-main/problem-sets/PS1/data/ds4_valid.csv'
pred_path = './output_p03/'
main4(lr=3e-7, train_path=train_path, eval_path=eval_path, pred_path=pred_path)

731 [[11.29975098]
 [10.79972481]
 [ 2.00012705]
 [ 4.40020079]]
error rate on training set is: 1219.0224238863411
error rate on validation set is: 1244.19357576215

在这里插入图片描述

p05b_lwr

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os


def main5(tau, train_path, eval_path):
    """Problem 5(b): Locally weighted regression (LWR)

    Args:
        tau: Bandwidth parameter for LWR.
        train_path: Path to CSV file containing dataset for training.
        eval_path: Path to CSV file containing dataset for evaluation.
    """
    # Load training set
    df_train = pd.read_csv(train_path)
    x_train, y_train = df_train.loc[:, 'x_1'].values.reshape((-1, 1)), df_train.loc[:, 'y'].values.reshape((-1, 1))
    x_train = np.column_stack((np.ones((x_train.shape[0], 1)), x_train))
    df_valid = pd.read_csv(eval_path)
    x_eval, y_eval = df_valid.loc[:, 'x_1'].values.reshape((-1, 1)), df_valid.loc[:, 'y'].values.reshape((-1, 1))
    x_eval = np.column_stack((np.ones((x_eval.shape[0], 1)), x_eval))

    # *** START CODE HERE ***
    # Fit a LWR model
    clf = LocallyWeightedLinearRegression(tau=tau)
    clf.fit(x_train, y_train)
    y_train_predict = clf.predict(x_train)
    df_predict_train = pd.DataFrame(y_train_predict, index=None, columns=['y_predict'])
    df_predict_train.to_csv(pred_path+os.path.basename(train_path))
    mse_train = ((y_train_predict - y_train)**2 / y_train.shape[0]).sum()
    print(f'MSE on training set is: {mse_train}')
    
    # Get MSE value on the validation set
    y_pred = clf.predict(x_eval)
    df_pred = pd.DataFrame(y_pred, index=None, columns=['y_predict'])
    df_pred.to_csv(pred_path+os.path.basename(eval_path))
    mse_valid = ((y_pred - y_eval)**2 / y_eval.shape[0]).sum()
    print(f'MSE on validation set is: {mse_valid}')
    
    # Plot validation predictions on top of training set
    fig1 = plt.figure()
    ax1 = fig1.add_subplot(111)
    ax1.scatter(df_train.loc[:, 'x_1'].values, df_train.loc[:, 'y'].values, c='b', s=10, marker='x', label='real')  
    ax1.scatter(df_train.loc[:, 'x_1'].values, df_predict_train.loc[:, 'y_predict'].values, c='r', s=10, marker='o', label='predict')
    ax1.legend()
    ax1.set_title('trainning set with ' + r'$\tau=$' + f'{tau}', **dict(fontsize=15, weight='black'))
    
    fig2 = plt.figure()
    ax2 = fig2.add_subplot(111)
    ax2.scatter(df_valid.loc[:, 'x_1'].values, df_valid.loc[:, 'y'].values, c='b', s=10, marker='x', label='real')  
    ax2.scatter(df_valid.loc[:, 'x_1'].values, df_pred.loc[:, 'y_predict'].values, c='r', s=10, marker='o', label='predict')
    ax2.legend()
    ax2.set_title('validation set with ' + r'$\tau=$'  + f'{tau}'.format(), **dict(fontsize=15, weight='black'))
    
    plt.show()
    # No need to save predictions
    # Plot data
    # *** END CODE HERE ***


class LocallyWeightedLinearRegression():
    """Locally Weighted Regression (LWR).

    Example usage:
        > clf = LocallyWeightedLinearRegression(tau)
        > clf.fit(x_train, y_train)
        > clf.predict(x_eval)
    """

    def __init__(self, tau):
#         super(LocallyWeightedLinearRegression, self).__init__()
        self.tau = tau
        self.x = None
        self.y = None

    def fit(self, x, y):
        """Fit LWR by saving the training set.

        """
        # *** START CODE HERE ***
        self.x = x
        self.y = y
        # *** END CODE HERE ***

    def predict(self, x):
        """Make predictions given inputs x.

        Args:
            x: Inputs of shape (m, n).

        Returns:
            Outputs of shape (m,).
        """
        # *** START CODE HERE ***
        m, n = x.shape
        # w_vector, shape = (m, l), where l is self.x.shape[0]
        w_vector = np.exp(-(np.linalg.norm(self.x - x.reshape(m, -1, n), ord=2, axis=2))**2 / (2 * self.tau**2))
        # w_tensor, shape = (m, l, l), for one input sample, there is a lxl diagonal
        w_tensor = np.apply_along_axis(np.diag, axis=1, arr=w_vector)
        # theta, shape = (m, n, 1), for one input sample (total # = m), there's one (n, 1) vector
        theta = np.linalg.inv(self.x.T @ w_tensor @ self.x) @ self.x.T @ w_tensor @ self.y
        return np.einsum('ij,ijk->i', x, theta).reshape((-1, 1))
        # *** END CODE HERE ***

train_path = f'/Users/xiaobai/Documents/CS/CS229/cs229-2018-autumn-main/problem-sets/PS1/data/ds5_train.csv'
eval_path = f'/Users/xiaobai/Documents/CS/CS229/cs229-2018-autumn-main/problem-sets/PS1/data/ds5_valid.csv'
pred_path = './output_p05/'
tau_list = [0.5, 0.03, 0.05, 0.1, 0.5, 1.0, 10.0]
for tau in tau_list:
    main5(tau=tau, train_path=train_path, eval_path=eval_path)

MSE on training set is: 0.32535235950212116
MSE on validation set is: 0.33053126821375234

在这里插入图片描述

MSE on training set is: 0.005254817960252144
MSE on validation set is: 0.018096163123883825

在这里插入图片描述

MSE on training set is: 0.008170355916900955
MSE on validation set is: 0.012400076150464765

在这里插入图片描述

MSE on training set is: 0.02175834826721926
MSE on validation set is: 0.024224589379809737

在这里插入图片描述

MSE on training set is: 0.32535235950212116
MSE on validation set is: 0.33053126821375234

在这里插入图片描述

MSE on training set is: 0.404981498608221
MSE on validation set is: 0.4000959480180861

在这里插入图片描述

MSE on training set is: 0.45596601295078054
MSE on validation set is: 0.43374392272323625

在这里插入图片描述

ML--小小白

关注

2
点赞
踩
3

收藏

觉得还不错? 一键收藏
打赏
0
评论
CS229 吴恩达机器学习习题答案 problem sets 01 （代码题目部分，欢迎各位前辈指教）

p01b_logregimport numpy as npimport pandas as pdimport matplotlib.pyplot as pltimport osplt.rcdefaults()# import util# from linear_model import LinearModeldef main(train_path, eval_path, pred_path): """Problem 1(b): Logistic regression with
复制链接

扫一扫