# 多元线性回归分析-Python&SPSS

## 1.观察数据

import numpy
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

data.head() 

data = (data - data.mean())/data.std()

PE=θ0*X01∗AT+θ2∗V+θ3∗AP+θ4∗RH

PE = hθ(x) = θx (θ应转置)

# 2.线性回归

def CostFunction(X,y,theta):
inner = np.power((X*theta.T)-y,2)
return np.sum(inner)/(2*len(X))

col = data.shape[1]
X = data.iloc[:,0:col-1]
y = data.iloc[:,col-1:col]
X = np.matrix(X.values)
y = np.matrix(y.values)
theta = np.matrix(np.array([0,0,0,0,0]))
temp = np.matrix(np.zeros(theta.shape))
CostFunction(X,y,theta)


j：特征编号

m:样本编号

def gradientDescent(X,y,theta,alpha,iters):
temp = np.matrix(np.zeros(theta.shape))
parameters = int(theta.ravel().shape[1])
cost = np.zeros(iters)
for i in range(iters):
error = (X*theta.T)-y

for j in range(parameters):
term = np.multiply(error,X[:,j])
temp[0,j] = theta[0,j] - (alpha/len(X))*np.sum(term)

theta = temp
cost[i] = CostFunction(X,y,theta)

return theta,cost

 -5.22080706e-14,-8.63485491e-01,-1.74182863e-01,2.16058120e-02,-1.35205248e-01

predicted = X*g.T
predicted = predicted.flatten().A[0]
y_f= y.flatten().A[0]
fig, ax = plt.subplots()
ax.scatter(y_f,predicted)
ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()

# 3.sckit-learn

from sklearn import linear_model
model = linear_model.LinearRegression()
model.fit(X, y)  

0，-0.86350078，-0.17417154，0.02160293，-0.13521023

# 4.SPSS

然后进行线性回归分析得到结果：

• 广告
• 抄袭
• 版权
• 政治
• 色情
• 无意义
• 其他

120