python 实现斯坦福机器学习实验2.1

最新推荐文章于 2024-07-12 10:15:43 发布

EntropyPlus

最新推荐文章于 2024-07-12 10:15:43 发布

阅读量956

点赞数

分类专栏：未归位文章标签： python 机器学习

本文链接：https://blog.csdn.net/u012759262/article/details/73105519

版权

未归位专栏收录该内容

5 篇文章 0 订阅

订阅专栏

1、看了别人实现的梯度下降算法(https://stackoverflow.com/questions/18801002/fminunc-alternate-in-numpy)，在numpy中学到了一个有用的矩阵操作方法，不用将所有的numpy数组通过np.mat()进行转换；np.dot()函数既能达到矩阵运算的目的，在对矩阵中单个元素的操作也较为方便。

2、在梯度最小化时用到了scipy库中的minimize函数
雅克布矩阵介绍：http://jacoxu.com/jacobian%E7%9F%A9%E9%98%B5%E5%92%8Chessian%E7%9F%A9%E9%98%B5/

# Machine Learning Online class: Logistic Regression

# Load data
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as scop
# ==================== Part 1: Loading and Visualizing Data ====================

def loadData(filename):
    fr = open(filename)
    arrayLines = fr.readlines()
    numberOfLines = len(arrayLines)

    x = np.zeros((numberOfLines, 2))
    y = np.zeros((numberOfLines, 1))
    index = 0
    for line in arrayLines:
        line = line.strip()
        listFormLine = line.split(',')
        x[index, :] = listFormLine[:2]
        y[index] = listFormLine[-1]
        index += 1
    return x, y, numberOfLines

def plotData(x, y):
    f2 = plt.figure(2)

    idx_1 = np.where(y == 0)
    p1 = plt.scatter(x[idx_1, 0], x[idx_1, 1], marker='x', color='m', label='Not admitted', s=30)
    idx_2 = np.where(y == 1)
    p2 = plt.scatter(x[idx_2, 0], x[idx_2, 1], marker='+', color='c', label='Admitted', s=50)

    plt.xlabel('Exam 1 Score')
    plt.xlabel('Exam 2 Score')
    plt.legend(loc='upper right')
    # plt.show()
    return plt

# ============ Part 2: Compute Cost and Gradient ============
def sigmod(z):
    g = np.zeros(z.shape)
    # 在python 中，math.log()函数不能对矩阵直接进行操作
    # http://blog.csdn.net/u013634684/article/details/49305655
    g = 1 / (1 + np.exp(-z))


    return g

def costFunction(theta, X, y):
    m, n = X.shape
    theta = theta.reshape((n, 1))
    y = y.reshape((m, 1))

    s1 = np.log(sigmod(np.dot(X, theta)))
    s2 = np.log(1 - sigmod(np.dot(X, theta)))

    s1 = s1.reshape((m, 1))
    s2 = s2.reshape((m, 1))

    s = y * s1 + (1 - y) * s2
    J = -(np.sum(s)) / m

    return J

def Gradient(theta, X, y):

    m, n = X.shape
    theta = theta.reshape((n, 1))
    y = y.reshape((m, 1))

    grad = ((X.T).dot(sigmod(np.dot(X, theta)) - y)) / m

    return grad.flatten()

def mapFeature(x1, x2):
    degree = 6
    out = np.ones(x1.shape[1])
    for i in range(degree):
        for j in range(i):
            newColumn = np.array([(x1 ** (i-j)).dot(x2 ** j)])
            np.column_stack(out, newColumn)



def plotDecisionBoundary(theta, X, y):
    f2 = plotData(X[:, 1:], y)
    # print(X[:, 1:])
    m, n = X.shape
    if n <= 3:
    # Only need 2 points to define a line, so choose two endpoints
        minVals = X[:, 1].min(0)-2
        maxVals = X[:, 1].max(0)+2
        plot_x = np.array([minVals, maxVals])
        plot_y = (-1 / theta[2]) * (plot_x.dot(theta[1]) + theta[0])
        f2.plot(plot_x, plot_y, label="Test Data", color='b')
        plt.show()

    else:
    # Here is the grid range
        u = np.linspace(-1, 1.5, 50)
        v = np.linspace(-1, 1.5, 50)

        z = np.zeros((len(u), len(v)))

        for i in range(len(u)):
            for j in range(len(v)):
                z[i, j] = mapFeature(u[i], v[j])* theta

# ============== Part 4: Predict and Accuracies ==============
def predict(theta, X):
    pass









if __name__ == '__main__':

# ==================== Part 1: Loading and Visualizing Data ====================
    x, y, numberOfLines = loadData('ex2data1.txt')

    # plotData(x, y)

# ============ Part 2: Compute Cost and Gradient ============
# 相关代码 https://stackoverflow.com/questions/18801002/fminunc-alternate-in-numpy
# 梯度下降法http://www.cnblogs.com/LeftNotEasy/archive/2010/12/05/mathmatic_in_machine_learning_1_regression_and_gradient_descent.html
# 雅克布矩阵http://jacoxu.com/jacobian%E7%9F%A9%E9%98%B5%E5%92%8Chessian%E7%9F%A9%E9%98%B5/

    columnOne = np.ones((numberOfLines, 1))
    X = np.column_stack((columnOne, x))

    m, n = X.shape
    # initialTheta = np.zeros((X.shape[1], 1))
    initialTheta = np.zeros(n)

    cost = costFunction(initialTheta, X, y)
    grad = Gradient(initialTheta, X, y)

    print('Cost at initial theta (zeros):\n', cost)
    print('Gradient at initial theta (zeros): \n', grad)

    print('\nProgram paused. Press enter to continue.\n')

    Result = scop.minimize(fun=costFunction, x0=initialTheta, args=(X, y), method='TNC', jac=Gradient)
    optimalTheta = Result.x

    # Print theta to screen
    print('Cost at theta found by fminunc:', Result.fun)
    print('theta: \n', optimalTheta)


    # Plot Boundary
    plotDecisionBoundary(optimalTheta, X, y)

    prob = sigmod(np.array([1, 45, 85]).dot(optimalTheta))
    print('For a student with scores 45 and 85, we predict an admission probability of ', prob)