机器学习—LogisticRegression(python类实现)

最新推荐文章于 2023-03-10 14:06:54 发布

Spratumn

最新推荐文章于 2023-03-10 14:06:54 发布

阅读量269

点赞数

分类专栏：机器学习

本文链接：https://blog.csdn.net/Spratumn/article/details/102771666

版权

机器学习专栏收录该内容

3 篇文章 0 订阅

订阅专栏

import numpy as np
import matplotlib.pyplot as plt


class LogisticRegression:
    """
    sigmoid function: 1/(1+exp(-z))

    y_hat = sigmoid(w*x+b)
    loss: -y*log(y_hat)-(1-y)*log(1-y_hat)
    dw = (y_hat-y)*x
    db = y_hat-y
    w = w-learning_rate*dw
    b = b-learning_rate*db
    """
    def __init__(self, feature_size, learn_rate=0.001, iter_num=1000, batch_size=50):
        self.feature_size = feature_size
        self.w = np.random.rand(self.feature_size, 1)
        self.b = 0.0
        self.lr = learn_rate
        self.iter_num = iter_num
        self.batch_size = batch_size
        self.w_log = []
        self.b_log = []
        self.loss_log = []

    @staticmethod
    def sigmoid(z):
        return 1 / (1 + np.exp(-1 * z))

    def calculate(self, x):
        """
        calculate y_hat
        :param x: input x sample,shape of (1,n)
        :return: y_hat,shape of (1,1)
        """
        z = np.dot(x, self.w)[0][0] + self.b
        y_hat = self.sigmoid(z)
        return y_hat

    @staticmethod
    def gradient(y_hat, y, x):
        """
        :param y_hat:
        :param y:
        :param x:
        :return: dw shape of (n,1)
        """
        diff = y_hat - y
        dw = np.multiply(np.transpose(x), diff)
        db = diff
        return dw, db

    def avg_loss(self, x_batch, y_batch):
        avg_l = 0.0
        for i in range(self.batch_size):
            y_hat = self.calculate(x_batch[i])
            avg_l += y_batch[i]*np.log(y_hat)+(1-y_batch[i])*np.log(1-y_hat)
        avg_l /= self.batch_size
        return -1 * avg_l

    def cal_gradient(self, x_batch, y_batch):
        avg_dw, avg_db = np.zeros((self.feature_size, 1)), 0.0
        for i in range(self.batch_size):
            y_hat = self.calculate(x_batch[i])
            dw, db = self.gradient(y_hat, y_batch[i], x_batch[i])
            avg_dw = np.add(avg_dw, dw)
            avg_db += db
        avg_dw = np.divide(avg_dw, self.batch_size)
        avg_db /= self.batch_size
        self.w = np.add(self.w, np.multiply(avg_dw, -1 * self.lr))
        self.b -= self.lr * avg_db

    def train(self, x_list, y_list):
        sample_count = len(x_list)
        for i in range(self.iter_num):
            batch_index = np.random.choice(sample_count, self.batch_size)
            x_batch = [x_list[index] for index in batch_index]
            y_batch = [y_list[index] for index in batch_index]
            self.cal_gradient(x_batch, y_batch)
            if i % 50 == 0:
                loss = self.avg_loss(x_batch, y_batch)
                self.w_log.append(self.w)
                self.b_log.append(self.b)
                self.loss_log.append(loss)
                print('w: {0}, b: {1}, loss: {2}'.format(self.w, self.b, loss))
                print('--------------------------------------------------------------------------')

    def plot_train_log(self):
        x = range(len(self.loss_log))
        plt.title('loss')
        plt.plot(x, self.loss_log)
        plt.show()


def get_samples(sample_count=500):
    x_list = []
    y_list = []
    for i in range(sample_count):
        if i < sample_count//2:
            x1 = np.random.randint(0, 5)
            x2 = np.random.randint(0, 5)
            x3 = np.random.randint(0, 5)
            x_list.append([[x1, x2, x3]])
            y_list.append(0)
        else:
            x1 = np.random.randint(5, 9)
            x2 = np.random.randint(5, 9)
            x3 = np.random.randint(5, 9)
            x_list.append([[x1, x2, x3]])
            y_list.append(1)
    return x_list, y_list


if __name__ == '__main__':
    x_sample, y_sample = get_samples(sample_count=500)
    logistic_reg = LogisticRegression(feature_size=3, learn_rate=0.001, iter_num=10000, batch_size=100)
    # print(x_sample[500:510])
    # print(y_sample[500:510])
    logistic_reg.train(x_sample, y_sample)
    print(logistic_reg.calculate([[1, 3, 2]]))
    print(logistic_reg.calculate([[8, 7, 8]]))
    logistic_reg.plot_train_log()

Spratumn

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
机器学习—LogisticRegression(python类实现)

import numpy as npimport matplotlib.pyplot as pltclass LogisticRegression: """ sigmoid function: 1/(1+exp(-z)) y_hat = sigmoid(w*x+b) loss: -y*log(y_hat)-(1-y)*log(1-y_hat) dw...
复制链接

扫一扫

专栏目录