李宏毅机器学习Task6

最新推荐文章于 2022-11-17 17:53:46 发布

孤客...

最新推荐文章于 2022-11-17 17:53:46 发布

阅读量214

点赞数

分类专栏：机器学习打卡学习

本文链接：https://blog.csdn.net/qq_38364952/article/details/90744714

版权

机器学习同时被 2 个专栏收录

4 篇文章 0 订阅

订阅专栏

打卡学习

2 篇文章 0 订阅

订阅专栏

个人自己创建数据，实现分类任务
要求：
学习LR学习算法的核心代码就好
要求写出详细的注释说明
注意矩阵维度

参考负责人

#coding=utf-8
#Version:python3.6.0
#Tools:Pycharm 2017.3.2
__date__ = '2019/6/2 21:43'
__author__ = 'ranchunfu'

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

np.random.seed(1)

#制造数据
def get_data(seed):
    np.random.seed(seed)
    data_size_1 = 300
    x1_1 = np.random.normal(loc=5.0, scale=1.0, size=data_size_1)
    x2_1 = np.random.normal(loc=4.0, scale=1.0, size=data_size_1)
    y_1 = [0 for _ in range(data_size_1)]
    data_size_2 = 400
    x1_2 = np.random.normal(loc=10.0, scale=2.0, size=data_size_2)
    x2_2 = np.random.normal(loc=8.0, scale=2.0, size=data_size_2)
    y_2 = [1 for _ in range(data_size_2)]
    x1 = np.concatenate((x1_1, x1_2), axis=0)
    x2 = np.concatenate((x2_1, x2_2), axis=0)

    x = np.hstack((x1.reshape(-1, 1), x2.reshape(-1, 1)))
    y = np.concatenate((y_1, y_2), axis=0)
    data_size_all = data_size_1 + data_size_2
    shuffled_index = np.random.permutation(data_size_all)
    x = x[shuffled_index]
    y = y[shuffled_index]
    return x, y

x, y =get_data(1)

#分类训练集，测试集
def train_test_split(x,y):
    split_index = int(len(y)*0.7)
    x_train = x[:split_index]
    y_train = y[:split_index]
    x_test = x[split_index:]
    y_test = y[split_index:]
    return x_train, y_train, x_test, y_test

#数据维度处理
# x.shape(2,m)  y.shape(1,m)
x_train, y_train, x_test, y_test = train_test_split(x, y )
x_train = (x_train - np.min(x_train, axis=0)) / (np.max(x_train, axis=0) - np.min(x_train, axis=0))
x_test = (x_test - np.min(x_test, axis=0)) / (np.max(x_test, axis=0) - np.min(x_test, axis=0))
x_train = x_train.T
y_train = y_train.reshape(1,-1)
x_test = x_test.T
y_test = y_test.reshape(1,-1)


class LR:
    def __init__(self, learning_rate=0.1, iter=100):
        self.learning_rate = learning_rate
        self.iter = iter

    def _sigmoid(self, Z):
        return 1 / (1 + np.exp(-Z))

    # 向前传播
    def _forward(self, x):
        Z = np.dot(self.w, x) + self.b
        return self._sigmoid(Z)

    def fit(self, x, y):
        self.x = x
        self.y = y
        self.w = np.random.randn(1, x.shape[0]) * 0.01
        self.b = np.zeros(1)

    def cost(self):
        cost = -self.y * np.log(self._forward(self.x)) - (1 - self.y) * np.log(1 - self._forward(self.x))
        return np.mean(cost)

    # 反向传播
    def grad(self):
        m = x.shape[1]
        for i in range(self.iter):
            A = self._forward(self.x)
            dz = A - self.y
            # dw = 1 / m * np.dot(self.x, dz.T)
            dw = 1 / m * np.dot(dz, self.x.T)
            db = 1 / m * np.sum(dz)
            self.w = self.w - self.learning_rate * dw
            self.b = self.b - self.learning_rate * db

    def predict(self, x):
        Z = np.dot(self.w, x) + self.b
        A = self._sigmoid(Z)
        y_pred = np.zeros(shape=(1,A.shape[1]),dtype=int)
        for i in range(A.shape[1]):
            if A[0, i] <= 0.5:
                y_pred[0, i] = 0
            else:
                y_pred[0, i] = 1
        return y_pred



if __name__ == '__main__':
    lr = LR(learning_rate=0.1, iter=1500)
    lr.fit(x_train, y_train)
    lr.grad()
    y_train_pred = lr.predict(x_train)
    print(100 - np.mean(np.abs(y_train_pred - y_train)) * 100)

    lr.fit(x_test, y_test)
    lr.grad()
    y_test_pred = lr.predict(x_test)
    print(100 - np.mean(np.abs(y_test_pred - y_test)) * 100)

    #测试数据可视化与分类边界
    cValue = ['g', 'b']
    plt.scatter(x_test[0, :], x_test[1, :], c=[cValue[i] for i in np.squeeze(y_test)], marker='o')
    x1 = np.arange(0.1, 1.0, 0.1)
    x2 = (-lr.b - lr.w[0,1]*x1)/lr.w[0,1]
    plt.plot(x1,x2,'r')
    plt.show()