《机器学习实战》——线性回归

线性回归原理比较简单,其在一维特征时候的方程我们在高中阶段就学习过了,对于多维特征的线性回归,只是在其基础上进行扩展,对于寻找合适参数的过程,可以使用梯度下降的方法来进行,但对于线性回归而言,其实是有数值解的:

                                                                        

其相关代码如下:

import numpy as np
import  matplotlib.pyplot as plt
def loaddata():
    file = open(r'E:\学习资料\AI+CS\01 个人\《机器学习实战》-Peter Harriton\MLiA_SourceCode\machinelearninginaction\Ch08\ex0.txt')
    data = []
    label = []
    for i in file.readlines():
        line = i.strip().split()
        data.append([float(line[0]),float(line[1])])
        label.append([float(line[-1])])
    file.close()
    return data,label

#整个样本的梯度下降
def calw(data,label):
    x = np.mat(data)
    y = np.mat(label)
    xTx = np.dot(x.T,x)
    if np.linalg.det(xTx) == 0:
        print('the result is wrong!')
        return
    w1 = np.dot(xTx.I,x.T)
    w = np.dot(w1,y)
    return w


def plotdata(data,label,w):
    data = np.array(data)
    label = np.array(label)
    x = []
    for i in range(data.shape[0]):
        x.append(data[i][1])
    fig = plt.figure()
    ax1 = fig.add_subplot(1,1,1)
    ax1.scatter(x,label,s = 5,color = 'blue',marker = 'o')
    yhat = np.dot(data,w)
    ax1.plot(data[:,1],yhat,color = 'red')
    plt.title('data')
    plt.xlabel('X')
    plt.show()

#局部加权线性回归(Locally Weighted Linear Regression)
def lwlr(testpoint,data,label, k = 1.0):
    x = np.mat(data)
    label = np.mat(label)
    m = x.shape[0]
    weight = np.eye(m)
    for i in range(m):
        error = testpoint - x[i,:]
        weight[i,i] = np.exp(error * error.T/(-2*k**2))
    xTx = x.T * weight * x
    if np.linalg.det(xTx) == 0:
        print('it is wrong')
        return 
    ws = xTx.I * x.T * weight * label
    return testpoint * ws
def lwlrtest(testarr,data,label,k = 1.0):
    data = np.mat(data)
    m = data.shape[0]
    yhat = np.zeros(m)
    for i in range(m):
        yhat[i] = lwlr(testarr[i],data,label, k )
    return yhat
def plotlwlr(data,label,k=1.0):
    data = np.mat(data)
    label = np.mat(label)
    f,ax = plt.subplots(3,1,sharex = False,sharey = False,figsize = (10,6))
    ax[0].scatter(data[:,1].flatten().A[0],label.flatten().A[0],c = 'blue',s = 5)
    ax[1].scatter(data[:,1].flatten().A[0],label.flatten().A[0],c = 'blue',s = 5)
    ax[2].scatter(data[:,1].flatten().A[0],label.flatten().A[0],c = 'blue',s = 5)
    yhat_0 = lwlrtest(data,data,label,k = 1.0)
    yhat_1 = lwlrtest(data,data,label,k = 0.01)
    yhat_2 = lwlrtest(data,data,label,k = 0.003)
    index = data[:,1].argsort(axis = 0)
    xmat = data[index][:,0,:]
    ax[0].plot(xmat[:,1],yhat_0[index],c = 'red')
    ax[1].plot(xmat[:,1],yhat_1[index],c = 'red')
    ax[2].plot(xmat[:,1],yhat_2[index],c = 'red')
    
  
    plt.show()
data,label = loaddata()
plotlwlr(data,label,k=1.0)
yhat = lwlrtest(data,data,label,k = 1.0)
yhat = np.mat(yhat)
yhat.shape

 

 鲍鱼年龄预测:

#示例:鲍鱼年龄预测—数据读取
def loaddata_0(filename):
    f = open(filename)
    num = len(open(filename).readline().strip().split()) - 1   #数据特征的数目---更改这个部位,将f.readline() 改为 open(filename).readline()
    data = []
    label = []
    for i in f.readlines():
        line = i.strip().split()          #将字符串分割返还的是列表
        temp = []
        for j in range(num):
            temp.append(float(line[j]))
        data.append(temp)
        label.append(float(line[-1]))
    f.close()
    return data,label
def lwlr(testpoint,data,label, k = 1.0):
    x = np.mat(data)
    label = np.mat(label).T
    m = x.shape[0]
    weight = np.eye(m)
    for i in range(m):
        error = testpoint - x[i,:]
        weight[i,i] = np.exp(error * error.T/(-2*k**2))
    xTx = x.T * weight * x
    if np.linalg.det(xTx) == 0:
        print('it is wrong')
        return 
    ws = xTx.I * x.T * weight * label
    return testpoint * ws
def lwlrtest(testarr,data,label,k = 1.0):
    data = np.mat(data)
    m = data.shape[0]
    yhat = np.zeros(m)
    for i in range(m):
        yhat[i] = lwlr(testarr[i],data,label, k )
    return yhat
def calw_0(data,label):
    x = np.mat(data)
    y = np.mat(label).T
    xTx = np.dot(x.T,x)
    if np.linalg.det(xTx) == 0:               #计算行列式的值
        print('the result is wrong!')
        return
    w1 = np.dot(xTx.I,x.T)
    w = np.dot(w1,y)
    return w
def error(yhat,label):
    yhat = np.array(yhat)
    label = np.array(label)
    error_sum = ((yhat - label)**2).sum()
    return error_sum

data,label= loaddata_0(r'E:\学习资料\AI+CS\01 个人\《机器学习实战》-Peter Harriton\MLiA_SourceCode\machinelearninginaction\Ch08\abalone.txt')

print('训练集与测试集相同时,查看各误差结果:')
yhat_0 = lwlrtest(data[0:99],data[0:99],label[0:99],k = 1.0)
yhat_1 = lwlrtest(data[0:99],data[0:99],label[0:99],k = 0.1)
yhat_2 = lwlrtest(data[0:99],data[0:99],label[0:99],k = 10)
print('当k=1时,误差为:%f' %(error(yhat_0,label[0:99])) )
print('当k=0.1时,误差为:%f' %(error(yhat_1,label[0:99])) )
print('当k=10时,误差为:%f' %(error(yhat_2,label[0:99])) )
print('')
print('训练集与测试集不同的情况下,查看结果:')
yhat_0 = lwlrtest(data[100:199],data[0:99],label[0:99],k = 1.0)
yhat_1 = lwlrtest(data[100:199],data[0:99],label[0:99],k = 0.1)
yhat_2 = lwlrtest(data[100:199],data[0:99],label[0:99],k = 10)
print('当k=1时,误差为:%f' %(error(yhat_0,label[100:199])) )
print('当k=0.1时,误差为:%f' %(error(yhat_1,label[100:199])) )
print('当k=10时,误差为:%f' %(error(yhat_2,label[100:199])) )
print('')
print('比较简单线性回归和加权线性回归,k = 1.0时的误差大小:')
print('加权线性回归,k=1时,误差为:%f' %(error(yhat_0,label[100:199])) )
w = calw_0(data[0:99],label[0:99])
yhat = data[100:199] * w

print('简单线性回归,误差为:%f' %(error(yhat.flatten().A[0],label[100:199])) )

 

岭回归:

import numpy as np
import matplotlib.pyplot as plt
#示例:鲍鱼年龄预测—数据读取
def loaddata_0(filename):
    f = open(filename)
    num = len(open(filename).readline().strip().split()) - 1   #数据特征的数目---更改这个部位,将f.readline() 改为 open(filename).readline()
    data = []
    label = []
    for i in f.readlines():
        line = i.strip().split()          #将字符串分割返还的是列表
        temp = []
        for j in range(num):
            temp.append(float(line[j]))
        data.append(temp)
        label.append(float(line[-1]))
    f.close()
    return data,label
data,label = loaddata_0(r'E:\学习资料\AI+CS\01 个人\《机器学习实战》-Peter Harriton\MLiA_SourceCode\machinelearninginaction\Ch08\abalone.txt')
def ridgeRegres(data,label,lam):
    x = np.mat(data)
    y = np.mat(label)
    m,n = x.shape
    lam_I = lam * np.mat(np.eye(n))
    xTx = x.T * x + lam_I
    if np.linalg.det(xTx) == 0:
        print('该矩阵为奇异矩阵,不能计算逆矩阵')
        return 
    w_lam = xTx.I * x.T * y
    return w_lam
def normdata(data,label):
    x = np.mat(data) #维度为(m,n)
    y = np.mat(label).T    #维度为(n,1)
    n = x.shape[1]
    x_mean = np.mean(x,axis = 0)
    y_mean = np.mean(y,axis = 0)
    y_new = y - y_mean
    x_var = np.var(x,axis = 0 )
    x_new = (x - x_mean)/ x_var
    num = 30
    wmat = np.zeros((num,n))
    for i in range(num):
        w_lam = ridgeRegres(x_new,y_new,np.exp(i-10))
        wmat[i,:] = w_lam.T
    return wmat
wmat =  normdata(data,label)
f = plt.figure()
ax = f.add_subplot(1,1,1)
ax.plot(wmat)
plt.show()

 

 

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值