吴恩达机器学习课后作业（笔记）（目前更新到ex2正则化逻辑回归

最新推荐文章于 2023-01-16 15:53:25 发布

，zdc

最新推荐文章于 2023-01-16 15:53:25 发布

阅读量574

点赞数

分类专栏：机器学习

本文链接：https://blog.csdn.net/PANDORA_A/article/details/108858139

版权

线性回归梯度下降逻辑回归正则化决策边界

关键词由CSDN通过智能技术生成

机器学习专栏收录该内容

3 篇文章 0 订阅

订阅专栏

ex1

数据集https://www.kesci.com/home/project/5da16a37037db3002d441810/dataset

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# https://www.kesci.com/home/project/5da16a37037db3002d441810/code
# 吴恩达机器学习 单变量线性回归梯度下降python实现


path = r'C:\Users\shinelon\Desktop\MLDataset\ex1\ex1data1.txt'

# 画图
data = pd.read_csv(path,header=None, names=['Population', 'Profit'])
data.head()

data.plot(kind='scatter', x='Population', y='Profit', figsize=(12,8))
plt.show()

画出数据的散点图
在这里插入图片描述

def computeCost(X, y, theta):
    inner = np.power(((X * theta.T) - y), 2)
    return np.sum(inner) / (2 * len(X))

# 单变量代价函数
def computeCost(X, y, theta):
    # theta.T 是转置矩阵   np.power是幂次函数
    inner = np.power(((X * theta.T) - y), 2)
    return np.sum(inner) / (2 * len(X))

# 在第一列(0) 添加名字为 ones 的一列数据,他的数值都是 1
data.insert(0, 'Ones', 1)

# 初始化X和y
cols = data.shape[1]
x = data.iloc[:,:-1]# X是所有行，去掉最后一列
y = data.iloc[:,cols-1:cols]# y是最后一列

x = np.asmatrix(x.values) #将X的值转化为矩阵形式，方便计算
y = np.asmatrix(y.values)
theta = np.asmatrix(np.array([0,0]))

print("computeCost(x,y,theta)= ", computeCost(x,y,theta))



# 单变量梯度下降

# alpha是学习速率  iters是要执行的迭代次数
def gradientDescent(x, y, theta, alpha, iters):
    # 生成一个零矩阵
    temp = np.asmatrix(np.zeros(theta.shape))
    parameters = int(theta.ravel().shape[1])
    cost = np.zeros(iters)

    
    for i in range(iters):
        error = (x * theta.T) - y

        for j in range(parameters):
            term = np.multiply(error, x[:,j])
            temp[0,j] = theta[0,j] - ((alpha / len(x)) * np.sum(term))

        theta = temp
        cost[i] = computeCost(x,y,theta)

    return theta,cost

# 设置初始参数
alpha = 0.01
iters = 1500
g, cost = gradientDescent(x, y, theta,alpha,iters)
print(g)
print(cost)
#  输出 g = [[-3.88858772  1.19231101]]
#  输出 cost = [6.73719046 5.93159357 5.90115471 ... 4.47697612 4.4769761  4.47697609]

#画误差迭代图
fig, ax = plt.subplots(figsize=(10,8))
ax.plot(np.arange(iters), cost, 'r')
ax.set_xlabel('Iterations')
ax.set_ylabel('Cost')
ax.set_title('Error vs. Training Epoch')
plt.show()

误差迭代图
在这里插入图片描述

#预测35000和70000城市规模的小吃摊利润
# g就是计算出的θ
predict1 = [1,3.5]*g.T
print("predict1:",predict1)
predict2 = [1,7]*g.T
print("predict2:",predict2)
# 输出 predict1: [[0.28450083]]
# 输出 predict2: [[4.45758937]]


# x为data的population的分布，最大最小值均由data的Population列决定，一共生成100个点
x = np.linspace(data.Population.min(), data.Population.max(), 100)

f = g[0, 0] + (g[0, 1] * x)

#画图   分别是拟合的线性回归方程以及初始数据
fig, ax = plt.subplots(figsize=(12,8))
ax.plot(x, f, 'r', label='Prediction')
ax.scatter(data.Population, data.Profit, label='Traning Data')
ax.legend(loc=2)
ax.set_xlabel('Population')
ax.set_ylabel('Profit')
ax.set_title('Predicted Profit vs. Population Size')
plt.show()

# 大功告成！！！

拟合的线性回归方程
在这里插入图片描述

多变量线性回归拟合

# 多变量线性回归
path = r'C:\Users\shinelon\Desktop\MLDataset\ex1\ex1data2.txt'
# 读取数据
data2 = pd.read_csv(path,header=None, names=['size','bedrooms','price'])
# 检查数据
# print(data2.head())

# 特征归一化  mean求均值  std求标准差  （数据-数据平均值）/数据的标准差
data2 = (data2 - data2.mean()) / data2.std()

# 加一列1
data2.insert(0,'Ones',1)

# 初始化x和y
cols2 = data2.shape[1]
x2 = data2.iloc[:,:-1]
y2 = data2.iloc[:,cols2-1:cols2]

# 转换格式
x2 = np.asmatrix(x2.values)
y2 = np.asmatrix(y2.values)
theta2 = np.asmatrix(np.array([0,0,0]))

g2,cost2 = gradientDescent(x2,y2,theta2,alpha,iters)
# g2=matrix([[-1.10898288e-16,  8.84042349e-01, -5.24551809e-02]])

正规方程


# 正规方程
def normal(X,y):
    # np.linalg.inv()：矩阵求逆
    # X.T@X等价于X.T.dot(X) 矩阵乘法
    theta = np.linalg.inv(X.T@X)@X.T@y
    return theta

final_theta2 = normal(x,y)

#  final_theta2 = matrix([[-3.89578088],
#                       [ 1.19303364]])

ex2

参考：https://www.kesci.com/home/project/5da1829c037db3002d445baa
数据集下载：https://www.kesci.com/home/project/5da1829c037db3002d445baa/dataset

要求：
ex2data1.txt需要根据学生的2次测试成绩，预测该学生是否被录取。第一列是第一次测试成绩，第二列是第二次测试成绩
ex2data2.txt需要根据芯片2种测试的成绩，预测该芯片是要被接受或抛弃。第一列是第一种测试的成绩，第二列是第二种测试的成绩
具体题目可以参考数据集中的ex1.pdf

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 逻辑回归 https://www.kesci.com/home/project/5da1829c037db3002d445baa

path = r'C:\Users\Administrator\Desktop\MLDataSet\ex2\ex2data1.txt'

# ex2data1 数据分别为成绩1，成绩2，以及是否录用
data = pd.read_csv(path,header=None,names=('Exam 1','Exam 2','Admitted'))

# print(data)

# 把数据分为录用以及非录用

# 使用isin() 清洗数据  格式为 df[df[某列].isin(条件)
positive = data[data['Admitted'].isin([1])]
negative = data[data['Admitted'].isin([0])]

# 画散点图
fig, ax = plt.subplots(figsize=(12,8))
ax.scatter(positive['Exam 1'],positive['Exam 2'],s=50,c='b',marker='o',label='Admitted')
ax.scatter(negative['Exam 1'],negative['Exam 2'],s=50,c='r',marker='x',label='Not Admitted')
ax.legend()
ax.set_xlabel('Exam 1 Score')
ax.set_ylabel('Exam 2 Score')
plt.show()

散点图
在这里插入图片描述

# sigmoid函数
# 公式 https://www.jianshu.com/p/506595ec4b58
#  https://www.kesci.com/home/project/5da1829c037db3002d445baa
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# 画sigmoid函数
# x1 = np.linspace(-100, 100, 100)
# y1 = sigmoid(x1)
# fig, ax = plt.subplots(figsize=(12,8))
# ax.plot(x1,y1,'r',label='Prediction')
# plt.show()


# 代价函数和梯度
# 公式 cnblogs.com/qkloveslife/p/9851598.html
def cost(theta, X, y):
    theta = np.matrix(theta)
    X = np.matrix(X)
    y = np.matrix(y)
    # np.multiply累加  np.log求对数
    first = np.multiply(-y,np.log(sigmoid(X * theta.T)))
    second = np.multiply((1-y) , np.log(1 - sigmoid(X * theta.T)))
    return np.sum(first - second) / (len(X))

# 初始化数据
# 给data加上第一列常数列1
data.insert(0,'Ones',1)

# 初始化x,y,θ 取得data列数
cols = data.shape[1]
X = data.iloc[:,:cols-1]
y = data.iloc[:,cols-1:]
# theta有三个，分别对应常数列1以及成绩1和2
theta = np.zeros(3)

# 转换类型  把数组转换为矩阵
X = np.matrix(X.values)
y = np.matrix(y.values)

# 测试
theta = np.matrix(theta)

# 用初始θ计算代价
print('cost(theta, X, y)= ',cost(theta, X, y) )
# 输出 cost(theta, X, y)=  0.6931471805599453

# 实现梯度计算的函数
def gradient(theta, X, y):
    # 转换类型
    theta = np.matrix(theta)
    X = np.matrix(X)
    y = np.matrix(y)
    
    # 参数是theta的个数
    parameters = int(theta.ravel().shape[1])
    grad = np.zeros(parameters)
    
    # 误差
    error = sigmoid(X * theta.T)-y
    
    for i in range(parameters):
        term = np.multiply(error, X[:,i])
        grad[i] = np.sum(term) / len(X)
        
    return grad


# 使用工具库计算θ的值
import scipy.optimize as opt

result = opt.fmin_tnc(func=cost, x0=theta, fprime=gradient, args=(X,y))
print('result = ',result)
# 输出 result =  (array([-25.1613186 ,   0.20623159,   0.20147149]), 36, 0)

# 最优theta对应的代价
print('最优θ对应的代价： ',cost(result[0],X,y))
# 输出： 最优θ对应的代价：  0.20349770158947464
# 画出决策曲线
plotting_x1 = np.linspace(30,100,100)
plotting_h1 = ( - result[0][0] - result[0][1] * plotting_x1) / result[0][2]

fig, ax = plt.subplots(figsize=(12,8))
ax.plot(plotting_x1, plotting_h1, 'y', label='Prediction')
ax.scatter(positive['Exam 1'], positive['Exam 2'], s=50, c='b', marker='o', label='Admitted')
ax.scatter(negative['Exam 1'], negative['Exam 2'], s=50, c='r', marker='x', label='Not Admitted')
ax.legend()
ax.set_xlabel('Exam 1 Score')
ax.set_ylabel('Exam 2 Score')
plt.show()

决策曲线
在这里插入图片描述
从图中可以看到，有差不多9个数据点不在决策好的范围内，下面就通过数据评价该逻辑回归模型算出的θ值是否准确


# 评价逻辑回归模型

# 实现hθ
def hfunc1(theta, X):
    # 点乘
    return sigmoid(np.dot(theta.T, X))
    
    
# 预测成绩1为45，成绩2为85的学生，的录用概率

print('成绩1为45，成绩2为85的学生的录用概率为 ',hfunc1(result[0], [1,45,85]))
# 输出： 成绩1为45，成绩2为85的学生的录用概率为  0.7762906232076073

# 定义预测函数
def predict(theta, X):
    probability = sigmoid(X * theta.T)
    # 如果预测值大于等于0.5 返回1 认为会被录取 否则返回0
    return [1 if x >= 0.5 else 0 for x in probability]

# 统计预测正确率
# 这是计算出的最小值θ
theta_min = np.matrix(result[0])
predictions = predict(theta_min, X)
# 正确率 
correct = [1 if(a==1 and b==1) or (a==0 and b==0) else 0 for (a,b) in zip(predictions, y)]
# 准确率  分类正确的样本数占总样本数的比例
accuracy = (sum(map(int,correct)) % len(correct))
print('accuracy = {0}%'.format(accuracy))
# 准确率为89%

ex2-data2

在训练的第二部分，我们将实现加入正则项提升逻辑回归算法。设想你是工厂的生产主管，你有一些芯片在两次测试中的测试结果，测试结果决定是否芯片要被接受或抛弃。
正则化可以避免数据过拟合

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#  设想你是工厂的生产主管，你有一些芯片在两次测试中的测试结果，测试结果决定是否芯片要被接受或抛弃。
# https://www.kesci.com/home/project/5da1829c037db3002d445baa/code
path = r'C:\Users\Administrator\Desktop\MLDataSet\ex2\ex2data2.txt'

data = pd.read_csv(path, header=None, names=(['Test 1','Test 2','Accepted']))

positive = data[data['Accepted'].isin([1])]
negative = data[data['Accepted'].isin([0])]

fig, ax = plt.subplots(figsize=(12,8))
ax.scatter(positive['Test 1'], positive['Test 2'], s=50, c='b', marker='o', label='Accpted')
ax.scatter(negative['Test 1'], negative['Test 2'], s=50, c='r', marker='x', label='Not Accpted')
ax.legend()
ax.set_xlabel('Test 1 Score')
ax.set_ylabel('Test 2 Score')
plt.show()

在这里插入图片描述
这次的数据不像之前的数据一样有线性的决策界限，因此对于这种数据，这个数据集不能像之前一样使用直线将两部分分割。而逻辑回归只适用于线性的分割，所以，这个数据集不适合直接使用逻辑回归。
一种更好的使用数据集的方式是为每组数据创造更多的特征。所以我们为每组x1,x2添加了最高到6次幂的特征

（一共新增了27列的特征列）

# 特征映射 
# 

degree = 6
x1 = data['Test 1']
x2 = data['Test 2']

data.insert(3, 'Ones', 1) 

# 维度增加
for i in range(1, degree+1):
    for j in range(0, i+1):
        data['F' + str(i-j) + str(j)] = np.power(x1, i-j) * np.power(x2, j)

# 除去原来维度（test1 test2）
# axis： 1沿着列 0沿着行  inplace： Ture对原数据改动 flase新建数据
data.drop('Test 1', axis=1, inplace=True)
data.drop('Test 2', axis=1, inplace=True)

print(data)

# sigmoid函数
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

我们需要修改代价和梯度函数，包括正则化项。首先是代价函数
在这里插入图片描述
然后是梯度下降函数


# 实现正则化的代价参数
def costReg(theta, X, y, learningRate):
    theta = np.matrix(theta)
    X = np.matrix(X)
    y = np.matrix(y)
    first = np.multiply(-y, np.log(sigmoid(X * theta.T)))
    second = np.multiply((1-y), np.log(1 - sigmoid(X * theta.T)))
    reg = (learningRate / (2 * len(X))) * np.sum(np.power(theta[:,1:theta.shape[1]],2)) 
    return np.sum(first - second) / len(X) +reg


# 实现正则化的梯度函数
def gradientReg(theta, X, y, learningRate):
    theta = np.matrix(theta)
    X = np.matrix(X)
    y = np.matrix(y)
    
    parameters = int(theta.ravel().shape[1])
    grad = np.zeros(parameters)
    
    error = sigmoid(X * theta.T) - y
    
    for i in range(parameters):
        term = np.multiply(error, X[:i])
        
        if(i==0):
            grad[i] = np.sum(term) /len(X)
        else:
            grad[i] = (np.sum(term) / len(X)) + ((learningRate / len(X)) * theta[:,i])
            
    return grad

接下来初始化变量


# 初始化X， y，θ
cols = data.shape[1]
X = data.iloc[:,1:cols]
y = data.iloc[:,0:1]
theta = np.zeros(cols-1)


# 类型转换
X = np.matrix(X.values)
y = np.matrix(y.values)

# λ设为1
learningRate = 1

print('costReg(theta, X, y, learningRate) = ',costReg(theta, X, y, learningRate))

# costReg(theta, X, y, learningRate) =  0.6931471805599454

使用工具库计算Θ的值

# 使用工具库计算θ的值
import scipy.optimize as opt

result = opt.fmin_tnc(func=costReg, x0=theta, fprime=gradientReg, args=(X, y, learningRate))

print(result)

# 计算出参数
# result = (array([ 1.27271027,  0.62529965,  1.18111686, -2.01987399, -0.9174319 ,
#         -1.43166929,  0.12393227, -0.36553118, -0.35725403, -0.17516291,
#         -1.45817009, -0.05098418, -0.61558554, -0.27469165, -1.19271298,
#         -0.2421784 , -0.20603299, -0.04466178, -0.2777895 , -0.29539514,
#         -0.45645981, -1.04319154,  0.02779373, -0.2924487 ,  0.0155576 ,
#         -0.32742405, -0.1438915 , -0.92467487]),
#  32,
#  1)

验证参数


# 验证
# 定义预测函数
def predict(theta, X):
    probability = sigmoid(X * theta.T)
    # 如果预测值大于等于0.5 返回1 认为会被录取 否则返回0
    return [1 if x >= 0.5 else 0 for x in probability]


theta_min = np.matrix(result[0])
predictions = predict(theta_min, X)
# 计算正确个数
correct = [1 if((a == 1 and b == 1) or (a == 0 and b == 0)) else 0 for (a,b) in zip(predictions, y)]

画出决策曲线

# 画出决策曲线


def hfunc2(theta, x1, x2):
    temp = theta[0][0]
    place = 0
    for i in range(1, degree+1):
        for j in range(0, i+1):
            temp+= np.power(x1, i-j) * np.power(x2, j) * theta[0][place+1]
            place+=1
    return temp

def find_decision_boundary(theta):
    t1 = np.linspace(-1, 1.5, 1000)
    t2 = np.linspace(-1, 1.5, 1000)
    cordinates = [(x, y) for x in t1 for y in t2]
    x_cord, y_cord = zip(*cordinates)
    h_val = pd.DataFrame({'x1':x_cord, 'x2':y_cord})
    h_val['hval'] = hfunc2(theta, h_val['x1'], h_val['x2'])
    
    decision = h_val[np.abs(h_val['hval']) < 2 * 10**-3]
    return decision.x1, decision.x2


# fig, ax = plt.subplots(figsize=(12,8))
# ax.scatter(positive['Test 1'], positive['Test 2'], s=50, c='b', marker='o', label='Accepted')
# ax.scatter(negative['Test 1'], negative['Test 2'], s=50, c='r', marker='x', label='Rejected')
# ax.set_xlabel('Test 1 Score')
# ax.set_ylabel('Test 2 Score')

# x, y = find_decision_boundary(result)
# plt.scatter(x, y, c='y', s=10, label='Prediction')
# ax.legend()
# plt.show()



# 改变λ，观察决策曲线
learningRate2 = 1
result3 = opt.fmin_tnc(func=costReg, x0=theta, fprime=gradientReg, args=(X, y, learningRate2))

fig1, ax1 = plt.subplots(figsize=(12,8))
ax1.scatter(positive['Test 1'], positive['Test 2'], s=50, c='b', marker='o', label='Accepted')
ax1.scatter(negative['Test 1'], negative['Test 2'], s=50, c='r', marker='x', label='Rejected')
ax1.set_xlabel('Test 1 Score')
ax1.set_ylabel('Test 2 Score')

x1, y1 = find_decision_boundary(result3)
plt.scatter(x1, y1, c='y', s=10, label='Prediction')
ax1.legend()
plt.show()

当学习率λ为1时

在这里插入图片描述

当学习率λ为0时

出现过拟合
在这里插入图片描述

当学习率λ为100时
出现欠拟合
在这里插入图片描述

scipy.optimize 优化函数

上面代码中用到了scipy.optimize.fmin_tnc()

常见的输入参数

func：优化的目标函数

x0：初值

fprime：提供优化函数func的梯度函数，不然优化函数func必须返回函数值和梯度，或者设置approx_grad=True

approx_grad :如果设置为True，会给出近似梯度

args：元组，是传递给优化函数的参数

输出值

x ： 数组，返回的优化问题目标值

nfeval ： 整数，function evaluations的数目

在进行优化的时候，每当目标优化函数被调用一次，就算一个function evaluation。在一次迭代过程中会有多次function evaluation。这个参数不等同于迭代次数，而往往大于迭代次数。

rc ： int,Return code, see below

例子：

result = (array([ 1.27271027,  0.62529965,  1.18111686, -2.01987399, -0.9174319 ,
        -1.43166929,  0.12393227, -0.36553118, -0.35725403, -0.17516291,
        -1.45817009, -0.05098418, -0.61558554, -0.27469165, -1.19271298,
        -0.2421784 , -0.20603299, -0.04466178, -0.2777895 , -0.29539514,
        -0.45645981, -1.04319154,  0.02779373, -0.2924487 ,  0.0155576 ,
        -0.32742405, -0.1438915 , -0.92467487]),
  32,
  1)

，zdc

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
吴恩达机器学习课后作业（笔记）（目前更新到ex2正则化逻辑回归

ex1数据集https://www.kesci.com/home/project/5da16a37037db3002d441810/datasetimport numpy as npimport pandas as pdimport matplotlib.pyplot as plt# https://www.kesci.com/home/project/5da16a37037db3002d441810/code# 吴恩达机器学习单变量线性回归梯度下降python实现path = r'C
复制链接

扫一扫

专栏目录