import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
data=pd.read_csv('F:\\机器学习100天\\007-010 线性回归\\008 简单线性回归-实战\\code\\Data.csv')
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
#对数据的选择的方法:1
# X = data.iloc[:, :-1]
# y = data.iloc[:, 1]
#对数据的选择的方法:2
X=np.array(data['polulation']).reshape(-1,1)
y=data['median_house_value']
#对数据进行拆分
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0)
#建模
lr=LinearRegression().fit(x_train,y_train)
print('训练集',lr.score(x_train,y_train))
print('测试集',lr.score(x_test,y_test))
# 对测试集进行预测
y_pred = lr.predict(x_test)
print('预测',y_pred)
#计算损失函数的值
J=1/x_train.shape[0]*np.sum((lr.predict(x_train)-y_train)**2)
print('损失函数的值',J)
#计算权重和截距
w=lr.coef_[0]
b=lr.intercept_
print('权重',w)
print('截距',b)
#可视化训练集
plt.scatter(x_train,y_train,color='red')
plt.plot(x_train,lr.predict(x_train),color='blue')
plt.title('population VS median_house_value')
plt.xlabel('population')
plt.ylabel('median_house_value')
plt.show()
#可视化测试集
plt.scatter(x_test,y_test,color='red')
plt.plot(x_test,lr.predict(x_test),color='blue')
plt.title('population VS median_house_value')
plt.xlabel('population')
plt.ylabel('median_house_value')
plt.show()
机器学习第二篇:简单线性回归
最新推荐文章于 2024-03-05 17:39:43 发布