线性回归算法

线性回归一般用于做预测,损失函数、梯度下降等相关知识见:吴恩达机器学习总结

python代码:

#使用LinearRegression进行线性回归
#房价预测
import pandas as pd
from io import StringIO
from sklearn import linear_model
import matplotlib.pyplot as plt

# csv_data = 'square_feet,price\n150,6450\n200,7450\n250,8450\n300,9450\n350,11450\n400,15450\n600,18450\n'
# df = pd.read_csv(StringIO(csv_data))
# print(df)
# x=np.array(df['square_feet']).reshape(-1,1)
# y=df['price']
x=np.array([150,200,250,300,350,400,600])
x_=x.reshape(-1,1)
y=np.array([6450,7450,8450,9450,11450,15450,18450])
regr=linear_model.LinearRegression()
regr.fit(x_,y)
a,b=regr.coef_,regr.intercept_
#待预测数据
area=238.5
# 方式1:根据直线方程计算的价格
# print(a*area+b)
# 方式2:根据predict方法预测的价格
# print(regr.predict(area))
plt.scatter(x,y,color='blue',marker='X')
plt.plot(x,regr.predict(x_),color='red',linewidth=4)

采用梯度下降,梯度下降分为批量梯度下降、随机梯度下降、小批量梯度下降。

import numpy as np
#批量梯度下降
X=2*np.random.random(size=20000).reshape(-1,2)
y=X[:,0]*2.+X[:,1]*3.+3.+np.random.normal(size=10000)
# X_b=np.array([[1,150],[1,200],[1,250],[1,300],[1,350],[1,400],[1,600]])
# y=np.array([[6450],[7450],[8450],[9450],[11450],[15450],[18450]])
temp=np.ones((len(y),1))
X_b=np.hstack((temp,X))
theta=np.zeros(X_b.shape[1])
alpha=0.01
episilon=1e-8

def loss(X_b,y,theta):
    return np.sum((y-np.dot(X_b,theta))**2)/len(y)
def dJ(X_b,y,theta):
    gradient=X_b.T.dot(X_b.dot(theta)-y)*2./len(y)
    return gradient
def BGD(X_b,y,theta):
    #梯度下降过程
    while True:
        last_theta=theta
        theta=theta-alpha*dJ(X_b,y,theta)
        if abs(loss(X_b,y,theta)-loss(X_b,y,last_theta))<=episilon:
            break
    return theta
rst=BGD(X_b,y,theta)
print(rst)



#随机梯度下降
X=2*np.random.random(size=20000).reshape(-1,2)
y=X[:,0]*2.+X[:,1]*3.+3.+np.random.normal(size=10000)
temp=np.ones((len(y),1))
X_b=np.hstack((temp,X))
theta=np.zeros(X_b.shape[1])
def dJ_sgd(theta, X_b_i, y_i):
    return X_b_i.T.dot(X_b_i.dot(theta) - y_i) * 2
def sgd(X_b_i, y, theta, n_iters):
    t0 = 5
    t1 = 50
    
    def learn_rate(t):
        return t0/(t + t1)
    
    theta = theta
    for cur_iter in range(n_iters):
        rand_i = np.random.randint(len(X_b))
        gradient = dJ_sgd(theta, X_b[rand_i], y[rand_i])
        theta = theta - learn_rate(cur_iter) * gradient
    
    return theta
print(sgd(X_b, y, theta, n_iters=len(X_b)//3))




#小批量随机下降
def dJ_mbgd(theta, X_b_n, y_n, num):
    return X_b_n.T.dot(X_b_n.dot(theta) - y_n) * 2 / num

def mbgd(theta, X_b, y, num, n_iters):
    t0 = 5
    t1 = 50
    theta = theta
    num = num
    
    def learn_rate(t):
        return t0/(t + t1)
    
    for cur_iter in range(n_iters):
        x_index = np.random.randint(0, len(y), num)
        gradient = dJ_mbgd(theta, X_b[x_index,], y[x_index], num)
        theta  = theta -  learn_rate(cur_iter) * gradient
        
    return theta
print(mbgd(theta, X_b, y, num=20, n_iters=len(X_b)//3))

 

发布了11 篇原创文章 · 获赞 0 · 访问量 244
展开阅读全文

没有更多推荐了,返回首页

©️2019 CSDN 皮肤主题: 大白 设计师: CSDN官方博客

分享到微信朋友圈

×

扫一扫,手机浏览