机器学习/深度学习入门：python实现线性回归

最新推荐文章于 2024-08-05 22:24:28 发布

M_Z_G_Y

最新推荐文章于 2024-08-05 22:24:28 发布

阅读量887

点赞数 1

分类专栏：机器学习/深度学习

本文链接：https://blog.csdn.net/m_z_g_y/article/details/79776138

版权

机器学习/深度学习专栏收录该内容

26 篇文章 2 订阅

订阅专栏

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2018/4/1 9:36
# @Author  : HJH
# @Site    : 
# @File    : linear2.py
# @Software: PyCharm

import numpy as np
from sklearn.datasets import load_diabetes
import matplotlib.pyplot as plt
from numpy import mat
from numpy import random
from numpy.linalg import det


class linear(object):
    def __init__(self):
        self.W = None
        self.b = None

    def loss(self, X, y):
        num_train = X.shape[0]

        h = X.dot(self.W) + self.b
        loss = 0.5 * np.sum(np.square(h - y)) / num_train

        dW = X.T.dot((h - y)) / num_train
        db = np.sum((h - y)) / num_train

        return loss, dW, db

    def train(self, X, y, learn_rate=0.001, iters=10000):
        num_feature = X.shape[1]
        self.W = np.random.rand(num_feature, 1)
        self.b = np.zeros((num_feature,1))
        loss_list = []

        for i in range(iters):
            loss, dW, db = self.loss(X, y)
            loss_list.append(loss)
            self.W -= learn_rate * dW
            self.b -= learn_rate * db

            if i % 500 == 0:
                print('iters = %d,loss = %f' % (i, loss))
        return loss_list

    def predict(self, X_test):
        y_pred = X_test.dot(self.W) + self.b
        return y_pred

    #使用岭回归来
    #相对于直接使用最小二乘法来计算线性回归
    #岭回归损失了无偏性，来换取高的数值稳定性，从而得到较高的计算精度。
    def ridge_regression(self, X, y,lam=0.2):
        [m, n] = np.shape(X)
        # print(m,n)
        # print(y.shape)
        x_mat = np.hstack((X, mat(np.ones((m, 1)))))
        # print(x_mat.shape)
        self.weight = mat(random.rand(n + 1,1))
        xTx = x_mat.T * x_mat + lam * mat(np.eye(n))
        # print(xTx.shape)
        if det(xTx) == 0.0:
            print("the det of xTx is zero!")
            return
        self.weight = xTx.I * x_mat.T * y
        # print(self.weight.shape)
        return self.weight


def load_batasets():
    #直接加载sklearn中的数据E:\Anaconda\Anaconda3-4.2.0\Lib\site-packages\sklearn\datasets\data
    diabetes = load_diabetes()
    #print(diabetes.keys())
    data = diabetes.data  # real -0.2<x<0.2
    target = diabetes.target  # integer 25<y<346
    # print(data.shape)
    # print(target.shape)
    # print(data[:5])
    # print(target[:5])
    #print(data)
    X = data[:, :1]
    #print(X)
    y = target
    #print(y)

    X_train = X[:-20]
    X_test = X[-20:]
    y_train = y[:-20].reshape((-1, 1))
    y_test = y[-20:].reshape((-1, 1))
    # print('X_train=', X_train.shape)
    # print('X_test=', X_test.shape)
    # print('y_train=', y_train.shape)
    # print('y_test=', y_test.shape)
    return X_train,X_test,y_train,y_test


def plot(X_train,y_train,X_test,y_test,linear,loss_list,ridge_weight):
    f = X_train.dot(linear.W) + linear.b
    ridge_f=X_train.dot(ridge_weight[0]) + ridge_weight[1]
    fig = plt.figure()
    plt.subplot(211)
    plt.scatter(X_train, y_train, color='black')
    plt.scatter(X_test, y_test, color='blue')
    plt.scatter(X_test, linear.predict(X_test), color='red')
    plt.plot(X_train, f, 'r', label='gradient')
    plt.plot(X_train, ridge_f, 'y', label='ridge')
    plt.legend()
    plt.xlabel('X')
    plt.ylabel('y')

    plt.subplot(212)
    plt.plot(loss_list, color='blue')
    plt.xlabel('epochs')
    plt.ylabel('errors')
    plt.show()




if __name__=='__main__':
    X_train, X_test, y_train, y_test=load_batasets()
    classfiy=linear()
    loss_list=classfiy.train(X_train,y_train)
    ridge_weight=classfiy.ridge_regression(X_train,y_train)
    plot(X_train, y_train,X_test, y_test,classfiy,loss_list,ridge_weight)

多特征:

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2018/4/5 11:04
# @Author  : HJH
# @Site    : 
# @File    : temp.py
# @Software: PyCharm


import numpy as np
from sklearn.datasets import load_diabetes
import matplotlib.pyplot as plt
from numpy import mat
from numpy import random
from numpy.linalg import det


class linear(object):
    def __init__(self):
        self.W = None

    def loss(self, X, y):
        num_train = X.shape[0]
        # print(self.W.shape,self.b.shape,X.shape)
        h = X.dot(self.W)

        loss = 0.5 * np.sum(np.square(h - y)) / num_train

        dW = X.T.dot((h - y)) / num_train

        return loss, dW

    def train(self, X, y, learn_rate=0.001, iters=10000):
        num_feature = X.shape[1]
        self.W = np.random.rand(num_feature, 1)
        loss_list = []

        for i in range(iters):
            loss, dW = self.loss(X, y)
            loss_list.append(loss)
            self.W -= learn_rate * dW

            if i % 500 == 0:
                print('iters = %d,loss = %f' % (i, loss))
        return loss_list

    def predict(self, X_test):
        y_pred = X_test.dot(self.W)
        return y_pred



def load_batasets():
    #直接加载sklearn中的糖尿病数据集E:\Anaconda\Anaconda3-4.2.0\Lib\site-packages\sklearn\datasets\data
    diabetes = load_diabetes()
    data = diabetes.data  # real -0.2<x<0.2
    target = diabetes.target  # integer 25<y<346

    X = data[:, :]
    y = target
    X_train = X[:-20,:]
    X_train=np.hstack((X_train, mat(np.ones((X_train.shape[0], 1)))))
    X_test = X[-20:,:]
    X_test=np.hstack((X_test, mat(np.ones((X_test.shape[0], 1)))))
    y_train = y[:-20].reshape((-1, 1))
    y_test = y[-20:].reshape((-1, 1))
    # print('X_train=', X_train.shape)
    # print('X_test=', X_test.shape)
    # print('y_train=', y_train.shape)
    # print('y_test=', y_test.shape)
    return X_train,X_test,y_train,y_test


def plot(X_train,y_train,X_test,y_test,linear,loss_list):
    X_train=X_train[:,:1]
    f = X_train.dot(linear.W[0])+linear.W[-1]
    f=f.T
    fig = plt.figure()
    plt.subplot(211)
    plt.scatter(X_train, y_train, color='black')
    plt.scatter(X_test[:,:1], y_test[:,:1], color='blue')
    plt.scatter(X_test[:,:1], linear.predict(X_test)[:,:1], color='red')
    plt.plot(X_train, f, 'r',label='gradient')
    plt.legend()
    plt.xlabel('X')
    plt.ylabel('y')

    plt.subplot(212)
    plt.plot(loss_list, color='blue')
    plt.xlabel('epochs')
    plt.ylabel('errors')
    plt.show()


if __name__=='__main__':
    X_train, X_test, y_train, y_test=load_batasets()
    classfiy=linear()
    loss_list=classfiy.train(X_train,y_train)
    plot(X_train, y_train,X_test, y_test,classfiy,loss_list)