print("-----------------多变量线性回归-----------------")
#ex1data2.txt包含三个变量:房子的大小,卧室的数量,卧室的数量,目标(房子的价格)
path='D:\python_test\machine_learning\ex1data2.txt'
data2=pd.read_csv(path,header=None,names=['Size','Bedrooms','Price'])
print(data2.head())
#预处理步骤-特征归一化(特征缩放)
print('===============================')
data2=(data2-data2.mean())/data2.std()#data2.mean表示平均值,data2.std表示标准差
print(data2.head())
data2.insert(0,'Ones',1)
#set X(training data) and y(target variable)
cols=data2.shape[1]
print('===============================')
X2=data2.iloc[:,0:cols-1]
y2=data2.iloc[:,cols-1:cols]
#convert to matrices and initialize theta
X2=np.matrix(X2.values)
y2=np.matrix(y2.values)
theta2=np.matrix(np.array([0,0,0]))
#perfrom linear regression on the data set
g2,cost2=gradientDescent(X2,y2,theta2,alpha,iters)
#get the cost (error) of the model
print(computeCost(X2, y2, g2))
fig,ax=plt.subplots(figsize=(12,8))
ax.plot(np.arange(iters),cost2,'r')
ax.set_xlabel('Iterations')
ax.set_ylabel('Cost')
ax.set_title('Error vs. Training Epoch')
plt.show()
代码块运行截图一:
代码块二:
print("===============================")
#可以使用scikit-learn的线性回归函数,而不是从头开始实现这些算法,将scikit-learn的线性回归算法应用于第一部分的数据
from sklearn import linear_model
model=linear_model.LinearRegression()
model.fit(X, y)
#scikit-learn model的预测算法
x = np.array(X[:, 1])
print(x)
f=model.predict(X).flatten()#默认按行的方向降维
print(f)
fig,ax=plt.subplots(figsize=(12,8))
ax.plot(x,f,'r',label='Prediction')
ax.scatter(data.Population,data.Profit,label='Traning Data')
ax.legend(loc=2)
ax.set_xlabel('Population')
ax.set_ylabel('Profit')
ax.set_title('Predicted Profit vs. Population Size')
plt.show()