线性回归与梯度下降-CSDN博客

线性回归实现

由上第一个公式，我们可以假设所有x0x0都等于1，即可得到第二个公式

class LinearRegression():
    def __init__(self):
        self.w=None

    def fit(self,X,y):
        X=np.insert(X,0,1,axis=1) #加入了一列全为1的X0到X中
        print(X.shape)
        #X.T.dot(X)表示X的转置点乘X
        #np.linalg.inv（）是矩阵求逆的函数
        X_=np.linalg.inv(X.T.dot(X)) 
        self.w=X_.dot(X.T).dot(y)

    def predict(self,X):
        X=np.insert(X,0,1,axis=1) 
        y_pred=X.dot(self.w)
        return y_pred

1梯度下降实现

import pandas
import numpy as np
import matplotlib.pyplot as plt
pga=pandas.read_csv("pga.csv")

pga.distance=(pga.distance-pga.distance.mean())/pga.distance.std()
pga.accuracy=(pga.accuracy-pga.accuracy.mean())/pga.accuracy.std()
print(pga.head())

plt.scatter(pga.distance,pga.accuracy)
plt.xlabel('normalized distance')
plt.ylabel('normalized accuracy')
plt.show()

def cost(theta0,theta1,x,y):
    #初始化
    J=0
    m=len(x)
    for i in range(m):
        h=theta1*x[i]+theta0
        J+=(h-y[i])**2
    J/=(2*m)
    return J

print(cost(0,1,pga.distance,pga.accuracy))

#求J对theta0和theta1的偏导
def partial_cost_theta1(theta0,theta1,x,y):
    h=theta0+theta1*x
    diff=(h-y)*x
    partial=diff.sum()/(x.shape[0])
    return partial

partial1=partial_cost_theta1(0,5,pga.distance,pga.accuracy)
print("partial1=",partial1)

def partial_cost_theta0(theta0,theta1,x,y): 
    h=theta0+theta1*x
    diff=(h-y)
    partial=diff.sum()/(x.shape[0])
    return partial

partial0=partial_cost_theta0(1,1,pga.distance,pga.accuracy)

def gradient_descent(x,y,alpha=0.1,theta0=0,theta1=0):
    max_epochs=1000
    counter=0
    c=cost(theta1,theta0,pga.distance,pga.accuracy)
    costs=[c]
    convergence_thres=0.000001
    cprev=c+10
    theta0s=[theta0]
    theta1s=[theta1]

    while (np.abs(cprev-c)>convergence_thres) and (counter<max_epochs):
        cprev=c
        update0=alpha*partial_cost_theta0(theta0,theta1,x,y)
        update1=alpha*partial_cost_theta1(theta0,theta1,x,y)
        theta0-=update0
        theta1-=update1

        theta0s.append(theta0)
        theta1s.append(theta1)

        c=cost(theta0,theta1,pga.distance,pga.accuracy)

        costs.append(c)
        counter+=1
    return {"theta0":theta0,"theta1":theta1,"costs":costs}

print("Theta1=",gradient_descent(pga.distance,pga.accuracy)['theta1'])
descend=gradient_descent(pga.distance,pga.accuracy,alpha=.01)
plt.scatter(range(len(descend["costs"])),descend["costs"])
plt.show()

结果：

逻辑回归