In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
取出图像数据,列表示三通道的同类事物图像数据
In [2]:
data = pd.read_table('ex1data1.txt', header=None,names=['x','y'],delimiter=',')
print data.head()
data.describe() #自动计算x y均值、标准差、最值等
Out[2]:
提取训练数据,输入和标签
In [3]:
X = data.loc[:,'x']
Y = data.loc[:,'y']
one = np.ones(X.shape)
X1 = np.array([one,X])
#X1 = np.insert(X, 0, values=one, axis=0)
print X1.shape
print Y.shape
迭代计算theta(线性方程参数) 损失函数方程 y = theta1 + theta2*x
In [4]:
def theta_cal(X, Y, a, num):
theta1 = 0
theta2 = 0
res = np.array([0, 0])
for count in range(num):
sum1 = 0
sum2 = 0
for i in range(X.shape[0]):
sum1 += theta1+theta2*X[i]-Y[i]
sum2 += (theta1+theta2*X[i]-Y[i])*X[i]
theta1 = theta1 - a*sum1/X.shape[0]
theta2 = theta2 - a*sum2/X.shape[0]
temp = np.array([theta1, theta2])
res = np.row_stack((res, temp))
return res
根据得到的theta,计算损失值
In [7]:
def lost_fun(theta, X, Y):
res = []
for i in range(theta.shape[0]):
lost = (theta[i].T).dot(X) - Y.T
lost = lost**2
lost = lost.sum()/X.shape[0]
res.append(lost)
return res
In [13]:
a = theta_cal(X, Y, 0.01, 3000)
print a
计算梯度下降的迭代损失值
In [14]:
lost = lost_fun(a, X1, Y)
lost = np.array(lost)
x=np.arange(0,lost.shape[0],1)
plt.axis([0, x.shape[0], 430, 600])
plt.plot(x, lost)
plt.show()
绘制散点图和拟合曲线逼近图
In [39]:
plt.scatter(X, Y)
x = np.linspace(0,25,25)
#基本上前10次梯度下降已经拟合出一条效果还行的曲线
for i in np.linspace(0, 10, 10):
i = i.astype(int)
plt.plot(x, a[i,0]+a[i,1]*x)
plt.show()
In [ ]:
In [ ]: