import numpy as np
import matplotlib.pyplot as plt
def load_data(path):
data=np.loadtxt(path,delimiter=',',skiprows=1)
#print(data.shape)
#print(data)
return data
def get_gradient(theta,x,y):
m=x.shape[0]
Y_estimate=np.dot(x,theta)
assert (Y_estimate.shape==(m,))
error=Y_estimate-y
assert (error.shape==(m,))
cost =1.0/(2*m)*np.sum(error**2)
#grad=(1.0/m)*np.dot(x.T,error).reshape(-1)#(2,)
grad = (1.0 / m) * np.dot(error,x) # (2,)
return grad,cost
def gradient_descent(x,y,iterations,alpha):
theta=np.random.randn(2)
costs=[]
for i in range(iterations):
grad,cost=get_gradient(theta,x,y)
new_theta=theta-alpha*grad
if i%100==0:
print('{} iterations cost={}'.format(i,cost))
costs.append(cost)
theta=new_theta
return costs,theta
if __name__=='__main__':
path='data/diabetes.csv'
data=load_data(path)
print(data.shape)
data_x=data[:,0].reshape(-1,1)#(m,1)
m=data_x.shape[0]
#data_x 归一化处理
average_data_x=np.mean(data_x)
Stand_xvari=1.0/m*np.sqrt(np.sum(np.square(data_x-average_data_x)))
# #print(average_data_x)
datax_bn=(data_x-average_data_x)/Stand_xvari
print(datax_bn.shape)
#添加一列为1的向量 实际上就是乘以 theta 就是b
data_x=np.hstack((np.ones_like(datax_bn),datax_bn))#(m,2)
print(data_x.shape)
data_y=data[:, 1]
# data_y归一化处理
average_data_y = np.mean(data_y)
Stand_yvari = 1.0 / m * np.sqrt(np.sum(np.square(data_y - average_data_y)))
# print(average_data_x)
datay_bn = (data_y - average_data_y) / Stand_yvari
print(datay_bn.shape)
costs,theta=gradient_descent(data_x,datay_bn,iterations=50000,alpha=0.002)
print(data_y.shape)
#print(theta)
y_predict=np.dot(data_x,theta)#theta[0]+theta[1]*data_x[:,1]
print(y_predict.shape)
plt.figure()
#样本图
print(data_x[:2])
plt.scatter(data_x[:,1],datay_bn)
plt.plot(data_x[:,1],y_predict)
plt.show()
np实现线性回归
最新推荐文章于 2024-03-02 08:00:00 发布