《机器学习实战》——线性回归

最新推荐文章于 2021-01-15 16:13:52 发布

Halo_Linch

最新推荐文章于 2021-01-15 16:13:52 发布

阅读量217

点赞数

分类专栏： ML

本文链接：https://blog.csdn.net/dake13/article/details/83820216

版权

ML 专栏收录该内容

3 篇文章 0 订阅

订阅专栏

线性回归原理比较简单，其在一维特征时候的方程我们在高中阶段就学习过了，对于多维特征的线性回归，只是在其基础上进行扩展，对于寻找合适参数的过程，可以使用梯度下降的方法来进行，但对于线性回归而言，其实是有数值解的：

其相关代码如下：

import numpy as np
import  matplotlib.pyplot as plt
def loaddata():
    file = open(r'E:\学习资料\AI+CS\01 个人\《机器学习实战》-Peter Harriton\MLiA_SourceCode\machinelearninginaction\Ch08\ex0.txt')
    data = []
    label = []
    for i in file.readlines():
        line = i.strip().split()
        data.append([float(line[0]),float(line[1])])
        label.append([float(line[-1])])
    file.close()
    return data,label

#整个样本的梯度下降
def calw(data,label):
    x = np.mat(data)
    y = np.mat(label)
    xTx = np.dot(x.T,x)
    if np.linalg.det(xTx) == 0:
        print('the result is wrong!')
        return
    w1 = np.dot(xTx.I,x.T)
    w = np.dot(w1,y)
    return w


def plotdata(data,label,w):
    data = np.array(data)
    label = np.array(label)
    x = []
    for i in range(data.shape[0]):
        x.append(data[i][1])
    fig = plt.figure()
    ax1 = fig.add_subplot(1,1,1)
    ax1.scatter(x,label,s = 5,color = 'blue',marker = 'o')
    yhat = np.dot(data,w)
    ax1.plot(data[:,1],yhat,color = 'red')
    plt.title('data')
    plt.xlabel('X')
    plt.show()

#局部加权线性回归（Locally Weighted Linear Regression）
def lwlr(testpoint,data,label, k = 1.0):
    x = np.mat(data)
    label = np.mat(label)
    m = x.shape[0]
    weight = np.eye(m)
    for i in range(m):
        error = testpoint - x[i,:]
        weight[i,i] = np.exp(error * error.T/(-2*k**2))
    xTx = x.T * weight * x
    if np.linalg.det(xTx) == 0:
        print('it is wrong')
        return 
    ws = xTx.I * x.T * weight * label
    return testpoint * ws
def lwlrtest(testarr,data,label,k = 1.0):
    data = np.mat(data)
    m = data.shape[0]
    yhat = np.zeros(m)
    for i in range(m):
        yhat[i] = lwlr(testarr[i],data,label, k )
    return yhat
def plotlwlr(data,label,k=1.0):
    data = np.mat(data)
    label = np.mat(label)
    f,ax = plt.subplots(3,1,sharex = False,sharey = False,figsize = (10,6))
    ax[0].scatter(data[:,1].flatten().A[0],label.flatten().A[0],c = 'blue',s = 5)
    ax[1].scatter(data[:,1].flatten().A[0],label.flatten().A[0],c = 'blue',s = 5)
    ax[2].scatter(data[:,1].flatten().A[0],label.flatten().A[0],c = 'blue',s = 5)
    yhat_0 = lwlrtest(data,data,label,k = 1.0)
    yhat_1 = lwlrtest(data,data,label,k = 0.01)
    yhat_2 = lwlrtest(data,data,label,k = 0.003)
    index = data[:,1].argsort(axis = 0)
    xmat = data[index][:,0,:]
    ax[0].plot(xmat[:,1],yhat_0[index],c = 'red')
    ax[1].plot(xmat[:,1],yhat_1[index],c = 'red')
    ax[2].plot(xmat[:,1],yhat_2[index],c = 'red')
    
  
    plt.show()
data,label = loaddata()
plotlwlr(data,label,k=1.0)
yhat = lwlrtest(data,data,label,k = 1.0)
yhat = np.mat(yhat)
yhat.shape

鲍鱼年龄预测：

#示例：鲍鱼年龄预测—数据读取
def loaddata_0(filename):
    f = open(filename)
    num = len(open(filename).readline().strip().split()) - 1   #数据特征的数目---更改这个部位，将f.readline() 改为 open(filename).readline()
    data = []
    label = []
    for i in f.readlines():
        line = i.strip().split()          #将字符串分割返还的是列表
        temp = []
        for j in range(num):
            temp.append(float(line[j]))
        data.append(temp)
        label.append(float(line[-1]))
    f.close()
    return data,label
def lwlr(testpoint,data,label, k = 1.0):
    x = np.mat(data)
    label = np.mat(label).T
    m = x.shape[0]
    weight = np.eye(m)
    for i in range(m):
        error = testpoint - x[i,:]
        weight[i,i] = np.exp(error * error.T/(-2*k**2))
    xTx = x.T * weight * x
    if np.linalg.det(xTx) == 0:
        print('it is wrong')
        return 
    ws = xTx.I * x.T * weight * label
    return testpoint * ws
def lwlrtest(testarr,data,label,k = 1.0):
    data = np.mat(data)
    m = data.shape[0]
    yhat = np.zeros(m)
    for i in range(m):
        yhat[i] = lwlr(testarr[i],data,label, k )
    return yhat
def calw_0(data,label):
    x = np.mat(data)
    y = np.mat(label).T
    xTx = np.dot(x.T,x)
    if np.linalg.det(xTx) == 0:               #计算行列式的值
        print('the result is wrong!')
        return
    w1 = np.dot(xTx.I,x.T)
    w = np.dot(w1,y)
    return w
def error(yhat,label):
    yhat = np.array(yhat)
    label = np.array(label)
    error_sum = ((yhat - label)**2).sum()
    return error_sum

data,label= loaddata_0(r'E:\学习资料\AI+CS\01 个人\《机器学习实战》-Peter Harriton\MLiA_SourceCode\machinelearninginaction\Ch08\abalone.txt')

print('训练集与测试集相同时，查看各误差结果：')
yhat_0 = lwlrtest(data[0:99],data[0:99],label[0:99],k = 1.0)
yhat_1 = lwlrtest(data[0:99],data[0:99],label[0:99],k = 0.1)
yhat_2 = lwlrtest(data[0:99],data[0:99],label[0:99],k = 10)
print('当k=1时，误差为：%f' %(error(yhat_0,label[0:99])) )
print('当k=0.1时，误差为：%f' %(error(yhat_1,label[0:99])) )
print('当k=10时，误差为：%f' %(error(yhat_2,label[0:99])) )
print('')
print('训练集与测试集不同的情况下，查看结果：')
yhat_0 = lwlrtest(data[100:199],data[0:99],label[0:99],k = 1.0)
yhat_1 = lwlrtest(data[100:199],data[0:99],label[0:99],k = 0.1)
yhat_2 = lwlrtest(data[100:199],data[0:99],label[0:99],k = 10)
print('当k=1时，误差为：%f' %(error(yhat_0,label[100:199])) )
print('当k=0.1时，误差为：%f' %(error(yhat_1,label[100:199])) )
print('当k=10时，误差为：%f' %(error(yhat_2,label[100:199])) )
print('')
print('比较简单线性回归和加权线性回归，k = 1.0时的误差大小：')
print('加权线性回归，k=1时，误差为：%f' %(error(yhat_0,label[100:199])) )
w = calw_0(data[0:99],label[0:99])
yhat = data[100:199] * w

print('简单线性回归，误差为：%f' %(error(yhat.flatten().A[0],label[100:199])) )

岭回归：

import numpy as np
import matplotlib.pyplot as plt
#示例：鲍鱼年龄预测—数据读取
def loaddata_0(filename):
    f = open(filename)
    num = len(open(filename).readline().strip().split()) - 1   #数据特征的数目---更改这个部位，将f.readline() 改为 open(filename).readline()
    data = []
    label = []
    for i in f.readlines():
        line = i.strip().split()          #将字符串分割返还的是列表
        temp = []
        for j in range(num):
            temp.append(float(line[j]))
        data.append(temp)
        label.append(float(line[-1]))
    f.close()
    return data,label
data,label = loaddata_0(r'E:\学习资料\AI+CS\01 个人\《机器学习实战》-Peter Harriton\MLiA_SourceCode\machinelearninginaction\Ch08\abalone.txt')
def ridgeRegres(data,label,lam):
    x = np.mat(data)
    y = np.mat(label)
    m,n = x.shape
    lam_I = lam * np.mat(np.eye(n))
    xTx = x.T * x + lam_I
    if np.linalg.det(xTx) == 0:
        print('该矩阵为奇异矩阵，不能计算逆矩阵')
        return 
    w_lam = xTx.I * x.T * y
    return w_lam
def normdata(data,label):
    x = np.mat(data) #维度为（m,n）
    y = np.mat(label).T    #维度为（n,1）
    n = x.shape[1]
    x_mean = np.mean(x,axis = 0)
    y_mean = np.mean(y,axis = 0)
    y_new = y - y_mean
    x_var = np.var(x,axis = 0 )
    x_new = (x - x_mean)/ x_var
    num = 30
    wmat = np.zeros((num,n))
    for i in range(num):
        w_lam = ridgeRegres(x_new,y_new,np.exp(i-10))
        wmat[i,:] = w_lam.T
    return wmat
wmat =  normdata(data,label)
f = plt.figure()
ax = f.add_subplot(1,1,1)
ax.plot(wmat)
plt.show()