import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import matplotlib.pyplot as plt
import random
# A1,当地销售价格,以数百美元计;
# A2,卫生间数量;
# A3,场地面积以千平方英尺计;
# A4,居住面积以千平方英尺为单位;
# A5 车库数量;
# A6,房间数量;
# A7,卧室数量;
# A8,以年为单位的年龄;
# A9,1=砖,2=砖/木,3=铝/木,4=木。
# A10,1=两层,2=分层,3=牧场
# A11,起火点数量。
# B,售价。
# 构建线性回归模型,这个是自己实现的,一定要把前向传播和反向传播的流程走一遍
class Network(object):
def __init__(self, num_of_weight):
w = []
for i in range(num_of_weight):
w.append(random.random())
b = random.random()
self.w = w # 权重
self.b = b # 偏置
self.eta = 0.0610 # 学习率
def forward(self, x):
w = np.array(self.w)
w[2] = 0
w[7] = 0
w[9] = 0
x = np.array(x)
z = np.dot(x, w) + self.b # 跟线性回归方程一样
return z
def loss(self, z, y):
cost = (z - y) ** 2 # 损失函数值
return cost
# 2 * (z - y) * x[j] 是 损失值对每一个权重(偏置,在这里就一个)的偏导(也就是梯度)
# z = w1x1 + w2x2 + w3x3 + w4x4 + w5x5 + ... + w11x11 + b , loss = (y - z)**2 你可以试着求一下, z是预测值,y是真实值
def gradient(self, x, y, z):
tmp = 2 * (z - y)
gradient_w = []
gradient_b = tmp
for j in range(len(self.w)):
gradient_w.append(tmp * x[j])
gradient_w = np.array(gradient_w)
gradient_b = np.array(gradient_b)
return gradient_w, gradient_b
def update(self, gradient_w, gradient_b):
for i in range(len(gradient_w)):
self.w[i] = self.w[i] - self.eta * gradient_w[i] # 这个是梯度下降公式,网上有
self.b = self.b - self.eta * gradient_b # 这个是梯度下降公式,网上有
def train(self, tr_x, tr_y): # 训练的意思,就是不断使用梯度下降更改权重和偏置的意思
n = len(tr_x)
losses = []
ys = []
zs = []
for i in range(0, n, 1):
x = tr_x[i]
y = tr_y[i]
z = self.forward(x)
loss = self.loss(z, y)
gradient_w, gradient_b = self.gradient(x, y, z)
self.update(gradient_w, gradient_b)
print(f"损失值: {loss}")
losses.append(loss)
ys.append(y)
zs.append(z)
return losses, ys, zs
if __name__ == '__main__':
data = pd.read_csv("原始房价预测表.csv", encoding='gbk')
# dataframe转换成np数组
data = data.values
# 数据归一化,就是把数据映射到(0,1)
sc1 = StandardScaler()
sc2 = StandardScaler()
tr_x = sc1.fit_transform(data[:, :-1]) # data[:, :-1] 输出看看是什么
tr_y = sc2.fit_transform(data[:, 11].reshape(-1, 1)) # data[:, 11], reshape(-1, 1)意思是行向量变列向量
net = Network(11)
# 开始训练
losses, real, predict = net.train(tr_x, tr_y)
# 画损失值图
plot_x = range(len(losses))
plot_y = losses
plt.plot(plot_x, plot_y)
plt.show()
# 画预测值和真实值图
plt.plot(range(len(real)), real, 'r', label="real")
plt.plot(range(len(predict)), predict, 'b', label='predict')
plt.show()
# 预测
x1 = np.array([11.5, 2.0, 7.25, 1.75, 1, 8, 5, 15, 2, 1, 1]).reshape(-1, 1)
x2 = np.array([8.9, 1.0, 5.7, 1.11, 0, 5, 3, 35, 4, 3, 0]).reshape((-1, 1))
x1 = sc1.fit_transform(x1).reshape(1, -1)
x2 = sc1.fit_transform(x2).reshape(1, -1)
y1 = net.forward(x1)
y2 = net.forward(x2)
y1 = sc2.inverse_transform(y1) # 反归一化
y2 = sc2.inverse_transform(y2)
print("\n")
print(f"销售价格:{y1}")
print(f"销售价格:{y2}")
python实现线性回归模型
最新推荐文章于 2024-05-10 03:05:19 发布