线性回归实现
由上第一个公式,我们可以假设所有x0x0都等于1,即可得到第二个公式
class LinearRegression():
def __init__(self):
self.w=None
def fit(self,X,y):
X=np.insert(X,0,1,axis=1) #加入了一列全为1的X0到X中
print(X.shape)
#X.T.dot(X)表示X的转置点乘X
#np.linalg.inv()是矩阵求逆的函数
X_=np.linalg.inv(X.T.dot(X))
self.w=X_.dot(X.T).dot(y)
def predict(self,X):
X=np.insert(X,0,1,axis=1)
y_pred=X.dot(self.w)
return y_pred
1梯度下降实现
import pandas
import numpy as np
import matplotlib.pyplot as plt
pga=pandas.read_csv("pga.csv")
pga.distance=(pga.distance-pga.distance.mean())/pga.distance.std()
pga.accuracy=(pga.accuracy-pga.accuracy.mean())/pga.accuracy.std()
print(pga.head())
plt.scatter(pga.distance,pga.accuracy)
plt.xlabel('normalized distance')
plt.ylabel('normalized accuracy')
plt.show()
def cost(theta0,theta1,x,y):
#初始化
J=0
m=len(x)
for i in range(m):
h=theta1*x[i]+theta0
J+=(h-y[i])**2
J/=(2*m)
return J
print(cost(0,1,pga.distance,pga.accuracy))
#求J对theta0和theta1的偏导
def partial_cost_theta1(theta0,theta1,x,y):
h=theta0+theta1*x
diff=(h-y)*x
partial=diff.sum()/(x.shape[0])
return partial
partial1=partial_cost_theta1(0,5,pga.distance,pga.accuracy)
print("partial1=",partial1)
def partial_cost_theta0(theta0,theta1,x,y):
h=theta0+theta1*x
diff=(h-y)
partial=diff.sum()/(x.shape[0])
return partial
partial0=partial_cost_theta0(1,1,pga.distance,pga.accuracy)
def gradient_descent(x,y,alpha=0.1,theta0=0,theta1=0):
max_epochs=1000
counter=0
c=cost(theta1,theta0,pga.distance,pga.accuracy)
costs=[c]
convergence_thres=0.000001
cprev=c+10
theta0s=[theta0]
theta1s=[theta1]
while (np.abs(cprev-c)>convergence_thres) and (counter<max_epochs):
cprev=c
update0=alpha*partial_cost_theta0(theta0,theta1,x,y)
update1=alpha*partial_cost_theta1(theta0,theta1,x,y)
theta0-=update0
theta1-=update1
theta0s.append(theta0)
theta1s.append(theta1)
c=cost(theta0,theta1,pga.distance,pga.accuracy)
costs.append(c)
counter+=1
return {"theta0":theta0,"theta1":theta1,"costs":costs}
print("Theta1=",gradient_descent(pga.distance,pga.accuracy)['theta1'])
descend=gradient_descent(pga.distance,pga.accuracy,alpha=.01)
plt.scatter(range(len(descend["costs"])),descend["costs"])
plt.show()
结果:
逻辑回归