1.简单的线性回归算法

最新推荐文章于 2024-01-13 22:14:54 发布

yokan_de_s

最新推荐文章于 2024-01-13 22:14:54 发布

阅读量637

点赞数

分类专栏：机器学习基础算法

本文链接：https://blog.csdn.net/yokan_de_s/article/details/80335930

版权

机器学习基础算法专栏收录该内容

18 篇文章 1 订阅

订阅专栏

'''
    @author yokan
    @date 2018/5/15
'''
#简单的线性回归算法
import math
import numpy as np
import matplotlib.pyplot as pl

#暂时无用,这是sklearn框架做线性回归的算法,框架学习在后面,我准备先学算法
from sklearn.linear_model import LinearRegression
from mpl_toolkits.mplot3d import axes3d

#1.从linear_regression_data1导入数据
data = np.loadtxt("linear_regression_data1.txt",delimiter=",")   #delimiter 以,分割成矩阵
print(data)
#2.把数据拆分成两列方便后面的计算
x = np.c_[np.ones(data.shape[0]),data[:,0]]                    #np.ones(data.shape[0])在前面是没用的,是求内积的时候纬度不够的时候用的
y = np.c_[data[:,1]]                                           #把txt中的数据拆分成x,y两列,加一列1,后面求内积
#我也很好奇为啥要加一列1,不是1,2,3,4,5以下是我个人的理解:
#我们求的theta最终应该是这种形式的y = θ(0)*1 + θ(1)*(x1)^i + θ(2)*(x2)^i ...所以theta0对应的就是1

#3求损失函数 公式为(1/2*m)*(sum(h(x(i)) - y(i)))^2
def getlossMethod(x,y,theta = [[0],[0]]):      #theta 默认值给0
    m = len(y)              #m为
    h = x.dot(theta)        #x和theta的内积就是h
    loss_v = 1/2*m*sum(math.sqrt(h-y))     #基本就是求预估值h和y的差值,直到这个值近似于0那么这两个值就基本相似了
    return loss_v

#4求出具体的theta
def gettheta(x,y,theta = [[0],[0]],r_count = 1500,alpha = 0.01):          #这是一个梯度下降的问题,多试几次才会接近完美的值,这里就试1500次,alpha为学习效率
    m = len(y)
    for i in range(r_count):
        h = x.dot(theta)
        theta = theta - alpha*(1/m)* (x.T.dot(h - y))                       #梯度下降的算法 = theta - alpha*(1/m)*(偏微分(损失函数)
    return theta
#5画图
theta = gettheta(x,y)
xx = np.arange(5, 23)
yy = theta[0] + theta[1]*xx                                         # 根据画图框架需求的数据得出x和y轴方向的值
pl.scatter(x[:, 1], y, s=30, c='r', marker='x', linewidths=1)       # 画出数据的散点图
pl.plot(xx, yy)       # 画出梯度下降的收敛线
# pl.show()                                                           # 展示
#6调试写的算法,这里为止从学习角度来说算学习完了
print(theta.T.dot([1, 3.5])*10000)                                  #测试,很多种方式可以测试,我就按7月的方式弄了
print(theta.T.dot([1, 7])*10000)
#得出的结果可以算是期望



#以下注释的为sklearn求线性回归的代码,学到框架再回来重新弄
# regr = LinearRegression()
# regr.fit(X[:,1].reshape(-1,1), y.ravel())
# plt.plot(xx, regr.intercept_+regr.coef_*xx, label='Linear regression (Scikit-learn GLM)')
#
# plt.xlim(4,24)
# plt.xlabel('Population of City in 10,000s')
# plt.ylabel('Profit in $10,000s')
# plt.legend(loc=4);

"""
 @Time    : 2018/6/26
 @Author  : yokan
"""
#以下是用吴恩达的逻辑写出来的
import numpy as np
import matplotlib.pyplot as plt
alpha = 0.01  #学习率
gradient_descent_count = 2000 #梯度下降1500次
data = np.loadtxt('l_data.txt',delimiter=',')
x = np.c_[np.ones((data.shape[0],1)),data[:,0]]
y = data[:,1].reshape((-1,1)) #站起来,别趴下
m = data.shape[0]
#定义损失函数
def lossfunction(theta):
    h_theta = x.dot(theta) #矩阵乘法
    loss_v = 1/(2*m)*np.sum(np.square(h_theta - y))
    return loss_v

def gradient_descent(theta = np.array([[0.0],[0.0]]),loss_values = []):
    for i in range(gradient_descent_count):
        h_theta = x.dot(theta)
        temp1 = theta[0,:][0] - alpha * 1 / m * np.sum(h_theta - y)
        temp2 = theta[1,:][0] - alpha * 1 / m * np.sum(x.T.dot((h_theta - y)))
        theta[0,:][0] = temp1
        theta[1,:][0] = temp2
        loss_values.append([lossfunction(theta),i])
    return theta,np.array(loss_values)

theta , loss_values = gradient_descent()
# plt.plot(loss_values[:,1],loss_values[:,0],c = 'blue')
# plt.show()            #查看梯度下降情况
plt.scatter(x[:,1],y,c='r',s = 10)
x_arr = np.arange(0,23,0.1)
y_arr = x_arr * theta[1,:][0] + theta[0,:][0]   #y = ax + b
plt.plot(x_arr,y_arr,c='black')
plt.show()