数据:
房屋面积和价格:
数据获取:链接:https://pan.baidu.com/s/1KQT9U21M707hB2u0x5D0tA
提取码:4567
import numpy as np
import matplotlib.pyplot as plt
# 读取数据
with open("线性回归第2题数据.txt", "r", encoding="utf-8") as f:
lines = f.readlines()
area = [float(line.strip().split("\t")[0]) for line in lines]
price = [float(line.strip().split("\t")[1]) for line in lines]
f.close()
# 数据分类(训练组、测试组)
train_size = int(0.8 * len(area))
train_area, train_price = np.array(area[:train_size]), np.array(price[:train_size])
test_area, test_price = np.array(area[train_size:]), np.array(price[train_size:])
# 系数计算
train_area_mean = np.mean(train_area)
train_price_mean = np.mean(train_price)
numerator = np.sum((train_area - train_area_mean) * (train_price - train_price_mean))
denominator = np.sum((train_area - train_area_mean) ** 2)
slope = numerator / denominator
intercept = train_price_mean - slope * train_area_mean
print("公式:y={}*price+{}".format(slope, intercept))
# 画图
plt.scatter(train_area, train_price, color='blue')
plt.plot(train_area, intercept + slope * train_area, color='red')
plt.xlabel("Area")
plt.ylabel("Price")
plt.title('train Data')
plt.show()
plt.scatter(test_area, test_price, color='blue')
plt.plot(test_area, intercept + slope * test_area, color='red')
plt.title('Test Data')
plt.xlabel('Area')
plt.ylabel('Price')
plt.show()
运行图片: