吴恩达《机器学习》第二次作业——逻辑回归

最新推荐文章于 2024-03-06 02:25:04 发布

216549856

最新推荐文章于 2024-03-06 02:25:04 发布

阅读量1.1k

点赞数

分类专栏： ML

本文链接：https://blog.csdn.net/Cherish_x/article/details/90053827

版权

ML 专栏收录该内容

9 篇文章 0 订阅

订阅专栏

逻辑回归
给的数据集有两个特征变量。
设想你是大学相关部分的管理者，想通过申请学生两次测试的评分，来决定他们是否被录取。现在你拥有之前申请学生的可以用于训练逻辑回归的训练样本集。对于每一个训练样本，你有他们两次测试的评分和最后是被录取的结果。由此建立逻辑回归分类器。

注： https://github.com/fengdu78/Coursera-ML-AndrewNg-Notes/tree/master/code
github上并没有使用梯度下降来优化参数，应该是使用的是其他的优化算法（SciPy’s truncated newton（TNC））。
用梯度下降法优化参数，并不能很好的降低代价函数，相比于其他优化算法。可能是对于这个数据集来说，需要迭代相当多的次数。
在这里插入图片描述

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.optimize as opt

path = 'ex2data1.txt'   #相对路径
data = pd.read_csv(path, header=None, names=['First', 'Second', 'Admission'])

'''观察数据'''
#positive = data[data['Admission'].isin([1])]
#negative = data[data['Admission'].isin([0])]
#fig, ax = plt.subplots(figsize=(12, 8))
#ax.scatter(positive['First'], positive['Second'], s=50, c='b', marker='o', label='Admission')
#ax.scatter(negative['First'], negative['Second'], s=50, c='r', marker='x', label='Not Admission')
#ax.legend()
#ax.set_xlabel('First Score')
#ax.set_ylabel('Second Score')
#plt.show()


'''操作数据，以便后续处理'''
#data['First'] = (data['First'] - data['First'].mean())/data['First'].std()
#data['Second'] = (data['Second'] - data['Second'.mean()])/data['Second'].std()  #特征缩放，

data.insert(0, 'Ones', 1)
cols = data.shape[1]                #data的列数
X = data.iloc[:, 0:cols-1]          #特征变量
y = data.iloc[:, cols-1:cols]       #目标变量
X = np.matrix(X.values)             #转为矩阵
y = np.matrix(y.values)
theta = np.matrix(np.array([0 for i in range(X.shape[1])]))         #初始化θ的值

def sigmoid(x):
    '''sigmoid函数'''
    return 1 / (1 + np.exp(-x))

def computeCost(X, y, theta):
    '''代价函数'''
    h = sigmoid(X * theta.T)
    inner = y.T*np.log(h) + (1-y).T*np.log(1-h)
    return -(np.sum(inner)/X.shape[0])

alpha = 0.001
iters = 150000

def gradienDescent(X, y, theta, alpha, iters):
    cost = np.zeros(iters)
    for i in range(iters):
        h = sigmoid(X * theta.T) - y
        innre = X.T * h
        theta = theta - (alpha/len(X)) * innre.T
        cost[i] = computeCost(X, y, theta)
    return theta, cost

theta, cost = gradienDescent(X, y, theta, alpha, iters)
#print(cost[-1])

'''观察代价函数的迭代'''
#fig, ax = plt.subplots(figsize=(12, 8))
#ax.plot(np.arange(iters), cost, 'r')
#ax.set_xlabel('Iterations')
#ax.set_ylabel('Cost')
#ax.set_title('Error vs. Ttainging Epoch')
#plt.show()

'''检测逻辑回归分类器的精度'''
probability = sigmoid(X * theta.T)
predictions = [1 if x >= 0.5 else 0 for x in probability]
correct = [1 if ((a == 1 and b == 1) or (a == 0 and b == 0)) else 0 for (a, b) in zip(predictions, y)]
accuracy = (sum(map(int, correct)) % len(correct))
print('accuracy = {0}%'.format(accuracy))

逻辑回归的正则化

import numpy as np
import pandas as pd
import scipy.optimize as opt
import matplotlib.pyplot as plt

path = 'ex2data2.txt'

data2 = pd.read_csv(path, header=None, names=['Test1', 'Test2', 'Accepted'])

degree = 5
x1 = data2['Test1']
x2 = data2['Test2']

data2.insert(3, 'Ones', 1)

for i in range(1, degree):
    for j in range(i):
        data2['F' + str(i) + str(j)] = np.power(x1, i- j) * np.power(x2, j)

data2.drop('Test1', axis=1, inplace=True)
data2.drop('Test2', axis=1, inplace=True)


def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def cost(theta, X, y, learningRate):
    theta = np.matrix(theta)
    X = np.matrix(X)
    y = np.matrix(y)
    first = np.multiply(-y, np.log(sigmoid(X * theta.T)))
    second = np.multiply((1 - y), np.log(1 - sigmoid(X * theta.T)))
    reg = (learningRate / (2 * len(X))) * np.sum(np.power(theta[:,1:theta.shape[1]], 2))
    return np.sum(first - second) / len(X) + reg


def gradientReg(theta, X, y, learningRate):
    theta = np.matrix(theta)
    X = np.matrix(X)
    y = np.matrix(y)

    parameters = int(theta.ravel().shape[1])
    grad = np.zeros(parameters)

    error = sigmoid(X * theta.T) - y

    for i in range(parameters):
        term = np.multiply(error, X[:, i])

        if (i == 0):
            grad[i] = np.sum(term) / len(X)
        else:
            grad[i] = (np.sum(term) / len(X)) + ((learningRate / len(X)) * theta[:, i])

    return grad


# set X and y (remember from above that we moved the label to column 0)
cols = data2.shape[1]
X2 = data2.iloc[:,1:cols]
y2 = data2.iloc[:,0:1]

# convert to numpy arrays and initalize the parameter array theta
X2 = np.array(X2.values)
y2 = np.array(y2.values)
theta2 = np.zeros(11)


learningRate = 1

result2 = opt.fmin_tnc(func=cost, x0=theta2, fprime=gradientReg, args=(X2, y2, learningRate))

theta = np.matrix(result2[0])
probability = sigmoid(X2 * theta.T)
predictions = [1 if x >= 0.5 else 0 for x in probability]
correct = [1 if ((a == 1 and b == 1) or (a == 0 and b == 0)) else 0 for (a, b) in zip(predictions, y2)]
accuracy = (sum(map(int, correct)) % len(correct))
print('accuracy = {0}%'.format(accuracy))