python线性回归代码_机器学习算法整理(一)线性回归与梯度下降 python实现

#coding: utf-8


get_ipython().run_line_magic('matplotlib', 'inline')importpandas as pdimportmatplotlib.pylab as plt#Read data from csv

pga = pd.read_csv("pga.csv")#Normalize the data 归一化值 (x - mean) / (std)

pga.distance = (pga.distance - pga.distance.mean()) /pga.distance.std()

pga.accuracy= (pga.accuracy - pga.accuracy.mean()) /pga.accuracy.std()print(pga.head())

plt.scatter(pga.distance, pga.accuracy)

plt.xlabel('normalized distance')

plt.ylabel('normalized accuracy') accuracy = $\theta_1$ $distance_i$ + $\theta_0$ + $\alpha$#- $\theta_0$是bias



from sklearn.linear_model importLinearRegressionimportnumpy as np#We can add a dimension to an array by using np.newaxis

print("Shape of the series:", pga.distance.shape)print("Shape with newaxis:", pga.distance[:, np.newaxis].shape)#The X variable in must have 2 dimensions

lm =LinearRegression()[:, np.newaxis], pga.accuracy)

theta1=lm.coef_[0]print(theta1)#### accuracy = $\theta_1$ $distance_i$ + $\theta_0$ + $\alpha$#- $\theta_0$是bias#- #### 没有用梯度下降来求代价函数


#The cost function of a single variable linear model#单变量 代价函数

defcost(theta0, theta1, x, y):#Initialize cost

J =0#The number of observations

m =len(x)#Loop through each observation


for i inrange(m):#Compute the hypothesis


h = theta1 * x[i] +theta0#Add to cost

J += (h - y[i])**2

#Average and normalize cost

J /= (2*m)returnJ#The cost for theta0=0 and theta1=1

print(cost(0, 1, pga.distance, pga.accuracy))

theta0= 100theta1s= np.linspace(-3,2,100)

costs=[]for theta1 intheta1s:

costs.append(cost(theta0, theta1, pga.distance, pga.accuracy))

plt.plot(theta1s, costs)[6]:

importnumpy as npfrom mpl_toolkits.mplot3d importAxes3D#Example of a Surface Plot using Matplotlib#Create x an y variables

x = np.linspace(-10,10,100)

y= np.linspace(-10,10,100)#We must create variables to represent each possible pair of points in x and y#ie. (-10, 10), (-10, -9.8), ... (0, 0), ... ,(10, 9.8), (10,9.8)#x and y need to be transformed to 100x100 matrices to represent these coordinates#np.meshgrid will build a coordinate matrices of x and y

X, Y =np.meshgrid(x,y)#print(X[:5,:5],"\n",Y[:5,:5])

#Compute a 3D parabola

Z = X**2 + Y**2

#Open a figure to place the plot on

fig =plt.figure()#Initialize 3D plot

ax = fig.gca(projection='3d')#Plot the surface

ax.plot_surface(X=X,Y=Y,Z=Z) these for your excerise

theta0s = np.linspace(-2,2,100)

theta1s= np.linspace(-2,2, 100)

COST= np.empty(shape=(100,100))#Meshgrid for paramaters

T0S, T1S =np.meshgrid(theta0s, theta1s)#for each parameter combination compute the cost

for i in range(100):for j in range(100):

COST[i,j]=cost(T0S[0,i], T1S[j,0], pga.distance, pga.accuracy)#make 3d plot

fig2 =plt.figure()

ax= fig2.gca(projection='3d')

ax.plot_surface(X=T0S,Y=T1S,Z=COST) 求导


#对 theta1 进行求导

defpartial_cost_theta1(theta0, theta1, x, y):#Hypothesis

h = theta0 + theta1*x#Hypothesis minus observed times x

diff = (h - y) *x#Average to compute partial derivative

partial = diff.sum() /(x.shape[0])returnpartial

partial1= partial_cost_theta1(0, 5, pga.distance, pga.accuracy)print("partial1 =", partial1)#对theta0 进行求导#Partial derivative of cost in terms of theta0

defpartial_cost_theta0(theta0, theta1, x, y):#Hypothesis

h = theta0 + theta1*x#Difference between hypothesis and observation

diff = (h -y)#Compute partial derivative

partial = diff.sum() /(x.shape[0])returnpartial

partial0= partial_cost_theta0(1, 1, pga.distance, pga.accuracy)print("partial0 =", partial0)#### 梯度下降进行更新


#x is our feature vector -- distance#y is our target variable -- accuracy#alpha is the learning rate#theta0 is the intial theta0#theta1 is the intial theta1

def gradient_descent(x, y, alpha=0.1, theta0=0, theta1=0):

max_epochs= 1000 #Maximum number of iterations 最大迭代次数

counter = 0 #Intialize a counter 当前第几次

c = cost(theta1, theta0, pga.distance, pga.accuracy) ## Initial cost 当前代价函数

costs = [c] #Lets store each update 每次损失值都记录下来

#Set a convergence threshold to find where the cost function in minimized

#When the difference between the previous cost and current cost

#is less than this value we will say the parameters converged

#设置一个收敛的阈值 (两次迭代目标函数值相差没有相差多少,就可以停止了)

convergence_thres = 0.000001cprev= c + 10theta0s=[theta0]

theta1s=[theta1]#When the costs converge or we hit a large number of iterations will we stop updating


while (np.abs(cprev - c) > convergence_thres) and (counter

cprev=c#Alpha times the partial deriviative is our updated

#先求导, 导数相当于步长

update0 = alpha *partial_cost_theta0(theta0, theta1, x, y)

update1= alpha *partial_cost_theta1(theta0, theta1, x, y)#Update theta0 and theta1 at the same time

#We want to compute the slopes at the same set of hypothesised parameters

#so we update after finding the partial derivatives

#-= 梯度下降,+=梯度上升

theta0 -=update0

theta1-=update1#Store thetas


theta1s.append(theta1)#Compute the new cost


c =cost(theta0, theta1, pga.distance, pga.accuracy)#Store updates,可以进行保存当前代价值


counter+= 1 #Count

#将当前的theta0, theta1, costs值都返回去

return {'theta0': theta0, 'theta1': theta1, "costs": costs}print("Theta0 =", gradient_descent(pga.distance, pga.accuracy)['theta0'])print("Theta1 =", gradient_descent(pga.distance, pga.accuracy)['theta1'])print("costs =", gradient_descent(pga.distance, pga.accuracy)['costs'])

descend= gradient_descent(pga.distance, pga.accuracy, alpha=.01)

plt.scatter(range(len(descend["costs"])), descend["costs"





