python 实现斯坦福机器学习实验1.2

最新推荐文章于 2022-08-16 10:02:51 发布

EntropyPlus

最新推荐文章于 2022-08-16 10:02:51 发布

阅读量557

点赞数

分类专栏：未归位文章标签： python 机器学习

本文链接：https://blog.csdn.net/u012759262/article/details/72983912

版权

未归位专栏收录该内容

5 篇文章 0 订阅

订阅专栏

1、numpy和octave的标准差的算法包要注意使用参数ddof，numpy.std(axis=0, ddof=1)

# % Machine Learning Online Class
#  Exercise 1: Linear regression with multiple variables
#
#   For this part of the exercise, you will need to change some
#   parts of the code below for various experiments (e.g., changing
#   learning rates).
#
#
# % Initialization
#
# % ================ Part 1: Feature Normalization ================
import numpy as np
import matplotlib.pyplot as plt

# ================ Part 1: Feature Normalization ================
def loadData(filename):
    fr = open(filename)
    arrayLines = fr.readlines()
    numberOfLines = len(arrayLines)

    x = np.zeros((numberOfLines, 2))
    y = np.zeros((numberOfLines, 1))
    index = 0
    for line in arrayLines:
        line = line.strip()
        listFormLine = line.split(',')
        x[index, :] = listFormLine[:2]
        y[index] = listFormLine[-1]
        index += 1
    return x, y, numberOfLines

# ================ Part 1.1:  Scale features and set them to zero mean ================

def featureNormalize(X, lenOfData):
    X_norm = X.copy()
    # store the mean of X
    mu = np.zeros((1, X_norm.shape[1]))
    # store the standard deviation of X
    sigma = np.zeros((1, X_norm.shape[1]))


    mu = np.mean(X_norm, axis=0).reshape((1, 2))
    # numpy 中的标准差要加上 ddof=1，或者使用 statistics module
    # https://stackoverflow.com/questions/15389768/standard-deviation-of-a-list
    sigma = X_norm.std(axis=0, ddof=1).reshape((1, 2))

    X_norm = (X_norm - np.tile(mu, (lenOfData, 1))) / np.tile(sigma, (lenOfData, 1))


    return X_norm, mu, sigma

# ================ Part 2: Gradient Descent ================


def gradientDescentMulti(X, y, theta, alpha, num_iters):
    XMatrix = np.mat(X)
    yMatrix = np.mat(y)
    thetaMatrix = np.mat(theta)
    m = XMatrix.shape[0]
    J_history = np.zeros((num_iters, 1))
    for i in range(num_iters):
        thetaMatrix = thetaMatrix - XMatrix.T * (XMatrix * thetaMatrix - yMatrix) * alpha / m

        J_history[i, :] = (computeCostMulti(X, y, thetaMatrix, m))

    return thetaMatrix, J_history


def computeCostMulti(X, y, thetaMatrix, m):
    XMatrix = np.mat(X)
    yMatrix = np.mat(y)

    J = sum(np.array(XMatrix * thetaMatrix - yMatrix) ** 2) / (2 * m)
    return J

def Plotting(num_iters, J_history):
    f2 = plt.figure(2)
    p1 = plt.plot([i for i in range(num_iters)], J_history, color='r', label='J_history')
    # ax =f2.add_axes([0.1, 0.1, 0.7, 0.3])
    plt.xlabel("Number of iterations")
    plt.ylabel("Cost J")
    plt.legend(loc='upper right')
    plt.show()


# %% ================ Part 3: Normal Equations ================
def normalEqn(X, y):

    XMatrix = np.mat(X)
    yMatrix = np.mat(y)
    thetaNormEqn = np.zeros((3, 1))
    thetaNormEqn = np.linalg.pinv(XMatrix.T * XMatrix) * XMatrix.T * yMatrix

    return thetaNormEqn

if __name__ == '__main__':
    print("Loading data ...\n")
    x, y, numberOfLines = loadData('ex1data2.txt')
    # print out some data points
    print('First 10 examples from the dataset: \n')
    # print(' x = [%.0f %.0f], y = %.0f \n', x[0:10, :], y[0:10, :])
    print('Program paused. Press enter to continue.\n')

    # scale features and set them to zero mean
    print('Normalizing Features ...\n')
    x, mu, sigma = featureNormalize(x, numberOfLines)
    # Add a column of ones to x
    columnOne = np.ones((numberOfLines, 1))
    X = np.column_stack((columnOne, x))
## ================ Part 2: Gradient Descent ================
 # Instructions: We have provided you with the following starter
 #               code that runs gradient descent with a particular
 #               learning rate (alpha).
 #
 #               Your task is to first make sure that your functions -
 #               computeCost and gradientDescent already work with
 #               this starter code and support multiple variables.
 #
 #               After that, try running gradient descent with
 #               different values of alpha and see which one gives
 #               you the best result.
 #
 #               Finally, you should complete the code at the end
 #               to predict the price of a 1650 sq-ft, 3 br house.
 #
 # Hint: By using the 'hold on' command, you can plot multiple
 #       graphs on the same figure.
 #
 # Hint: At prediction, make sure you do the same feature normalization.


    print('Running gradient descent ...\n')
    # % Choose some alpha value
    alpha = 0.01
    num_iters = 8500

    #Init Theta and Run Gradient Descent
    theta = np.zeros((3, 1))
    theta, J_history = gradientDescentMulti(X, y, theta, alpha, num_iters)

    #visualize costFunction
    # Plotting(num_iters, J_history)
    #  Display gradient descent's result
    print('Theta computed from gradient descent: ', theta)


    # predict the price of 1650 sq-ft, 3 bedrooms house
    predictHosue = np.array([1650, 3]).reshape(1, 2)
    predictHosue = np.column_stack((1, (predictHosue - mu) / sigma)) * theta
    print(predictHosue)


# ================ Part 3: Normal Equations ================
    print('Solving with normal equations...')
    xNE, yNE, numberOfLines = loadData('ex1data2.txt')
    columnOne = np.ones((numberOfLines, 1))
    XNE = np.column_stack((columnOne, xNE))
    thetaNormEqn = normalEqn(XNE, yNE)
    print(thetaNormEqn)

    price = np.mat(np.array([1, 1650, 3])) * thetaNormEqn

    print(price)