使用自己的数据利用pytorch搭建全连接神经网络进行回归预测

本文详细介绍了如何使用Python的PyTorch库构建一个全连接神经网络,对加州房价数据进行回归预测。步骤包括数据预处理、模型设计、训练过程以及性能评估。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

1、导入库

引入必要的库,包括PyTorch、Pandas等。

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.datasets import fetch_california_housing

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import SGD
import torch.utils.data as Data
import matplotlib.pyplot as plt
import seaborn as sns

2、数据准备

这里使用sklearn自带的加利福尼亚房价数据,首次运行会下载数据集,建议下载之后,处理成csv格式单独保存,再重新读取。

后续完整代码中,数据也是采用先下载,单独保存之后,再重新读取的方式。

# 导入数据
housedata = fetch_california_housing()  # 首次运行会下载数据集
data_x, data_y = housedata.data, housedata.target  # 读取数据和标签
data_df = pd.DataFrame(data=data_x, columns=housedata.feature_names)  # 将数据处理成dataframe格式
data_df['target'] = data_y  # 添加标签列
data_df.to_csv("california_housing.csv")  # 将数据输出为CSV文件
housedata_df = pd.read_csv("california_housing.csv")  # 重新读取数据

3、数据拆分

# 切分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(housedata[:, :-1], housedata[:, -1],test_size=0.3, random_state=42)

4、数据标准化

# 数据标准化处理
scale = StandardScaler()
x_train_std = scale.fit_transform(X_train)
x_test_std = scale.transform(X_test)

5、数据转换

# 将数据集转为张量
X_train_t = torch.from_numpy(x_train_std.astype(np.float32))
y_train_t = torch.from_numpy(y_train.astype(np.float32))
X_test_t = torch.from_numpy(x_test_std.astype(np.float32))
y_test_t = torch.from_numpy(y_test.astype(np.float32))

# 将训练数据处理为数据加载器
train_data = Data.TensorDataset(X_train_t, y_train_t)
test_data = Data.TensorDataset(X_test_t, y_test_t)
train_loader = Data.DataLoader(dataset=train_data, batch_size=64, shuffle=True, num_workers=1)

6、模型搭建

# 搭建全连接神经网络回归
class FNN_Regression(nn.Module):
    def __init__(self):
        super(FNN_Regression, self).__init__()
        # 第一个隐含层
        self.hidden1 = nn.Linear(in_features=8, out_features=100, bias=True)
        # 第二个隐含层
        self.hidden2 = nn.Linear(100, 100)
        # 第三个隐含层
        self.hidden3 = nn.Linear(100, 50)
        # 回归预测层
        self.predict = nn.Linear(50, 1)

    # 定义网络前向传播路径
    def forward(self, x):
        x = F.relu(self.hidden1(x))
        x = F.relu(self.hidden2(x))
        x = F.relu(self.hidden3(x))
        output = self.predict(x)
        # 输出一个一维向量
        return output[:, 0]

7、模型训练

# 定义优化器
optimizer = torch.optim.SGD(testnet.parameters(), lr=0.01)
loss_func = nn.MSELoss()  # 均方根误差损失函数
train_loss_all = []

# 对模型迭代训练,总共epoch轮
for epoch in range(30):
    train_loss = 0
    train_num = 0
    # 对训练数据的加载器进行迭代计算
    for step, (b_x, b_y) in enumerate(train_loader):
        output = testnet(b_x)  # MLP在训练batch上的输出
        loss = loss_func(output, b_y)  # 均方根损失函数
        optimizer.zero_grad()  # 每次迭代梯度初始化0
        loss.backward()  # 反向传播,计算梯度
        optimizer.step()  # 使用梯度进行优化
        train_loss += loss.item() * b_x.size(0)
        train_num += b_x.size(0)
    train_loss_all.append(train_loss / train_num)

8、模型预测

y_pre = testnet(X_test_t)
y_pre = y_pre.data.numpy()
mae = mean_absolute_error(y_test, y_pre)
print('在测试集上的绝对值误差为:', mae)

9、完整代码

# -*- coding: utf-8 -*-
# @Time : 2023/8/11 15:58
# @Author : huangjian
# @Email : huangjian013@126.com
# @File : FNN_demo.py

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.datasets import fetch_california_housing

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import SGD
import torch.utils.data as Data
from torchsummary import summary
from torchviz import make_dot
import matplotlib.pyplot as plt
import seaborn as sns


# 搭建全连接神经网络回归
class FNN_Regression(nn.Module):
    def __init__(self):
        super(FNN_Regression, self).__init__()
        # 第一个隐含层
        self.hidden1 = nn.Linear(in_features=8, out_features=100, bias=True)
        # 第二个隐含层
        self.hidden2 = nn.Linear(100, 100)
        # 第三个隐含层
        self.hidden3 = nn.Linear(100, 50)
        # 回归预测层
        self.predict = nn.Linear(50, 1)

    # 定义网络前向传播路径
    def forward(self, x):
        x = F.relu(self.hidden1(x))
        x = F.relu(self.hidden2(x))
        x = F.relu(self.hidden3(x))
        output = self.predict(x)
        # 输出一个一维向量
        return output[:, 0]


# 导入数据
housedata_df = pd.read_csv("california_housing.csv")
housedata = housedata_df.values
# 切分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(housedata[:, :-1], housedata[:, -1],test_size=0.3, random_state=42)

# 数据标准化处理
scale = StandardScaler()
x_train_std = scale.fit_transform(X_train)
x_test_std = scale.transform(X_test)

# 将训练数据转为数据表
datacor = np.corrcoef(housedata_df.values, rowvar=0)
datacor = pd.DataFrame(data=datacor, columns=housedata_df.columns, index=housedata_df.columns)
plt.figure(figsize=(8, 6))
ax = sns.heatmap(datacor, square=True, annot=True, fmt='.3f', linewidths=.5, cmap='YlGnBu',
                 cbar_kws={'fraction': 0.046, 'pad': 0.03})
plt.show()

# 将数据集转为张量
X_train_t = torch.from_numpy(x_train_std.astype(np.float32))
y_train_t = torch.from_numpy(y_train.astype(np.float32))
X_test_t = torch.from_numpy(x_test_std.astype(np.float32))
y_test_t = torch.from_numpy(y_test.astype(np.float32))

# 将训练数据处理为数据加载器
train_data = Data.TensorDataset(X_train_t, y_train_t)
test_data = Data.TensorDataset(X_test_t, y_test_t)
train_loader = Data.DataLoader(dataset=train_data, batch_size=64, shuffle=True, num_workers=1)

# 输出网络结构
testnet = FNN_Regression()
summary(testnet, input_size=(1, 8))  # 表示1个样本,每个样本有8个特征

# 输出网络结构
testnet = FNN_Regression()
x = torch.randn(1, 8).requires_grad_(True)
y = testnet(x)
myMLP_vis = make_dot(y, params=dict(list(testnet.named_parameters()) + [('x', x)]))

# 定义优化器
optimizer = torch.optim.SGD(testnet.parameters(), lr=0.01)
loss_func = nn.MSELoss()  # 均方根误差损失函数
train_loss_all = []

# 对模型迭代训练,总共epoch轮
for epoch in range(30):
    train_loss = 0
    train_num = 0
    # 对训练数据的加载器进行迭代计算
    for step, (b_x, b_y) in enumerate(train_loader):
        output = testnet(b_x)  # MLP在训练batch上的输出
        loss = loss_func(output, b_y)  # 均方根损失函数
        optimizer.zero_grad()  # 每次迭代梯度初始化0
        loss.backward()  # 反向传播,计算梯度
        optimizer.step()  # 使用梯度进行优化
        train_loss += loss.item() * b_x.size(0)
        train_num += b_x.size(0)
    train_loss_all.append(train_loss / train_num)

# 可视化训练损失函数的变换情况
plt.figure(figsize=(8, 6))
plt.plot(train_loss_all, 'ro-', label='Train loss')
plt.legend()
plt.grid()
plt.xlabel('epoch')
plt.ylabel('Loss')
plt.show()

y_pre = testnet(X_test_t)
y_pre = y_pre.data.numpy()
mae = mean_absolute_error(y_test, y_pre)
print('在测试集上的绝对值误差为:', mae)

# 可视化测试数据的拟合情况
index = np.argsort(y_test)
plt.figure(figsize=(8, 6))
plt.plot(np.arange(len(y_test)), y_test[index], 'r', label='Original Y')
plt.scatter(np.arange(len(y_pre)), y_pre[index], s=3, c='b', label='Prediction')
plt.legend(loc='upper left')
plt.grid()
plt.xlabel('Index')
plt.ylabel('Y')
plt.show()

### 使用PyTorch构建和训练MLP神经网络以执行回归预测 #### 定义模型结构 为了创建一个多层感知机(MLP),可以继承`torch.nn.Module`类来定义自定义的神经网络架构。对于回归任务,通常最后一层不采用激活函数以便直接输出连续值。 ```python import torch from torch import nn, optim class MLPRegression(nn.Module): def __init__(self, input_dim=784, hidden_dims=[128], output_dim=1): # 对于MNIST数据集input_dim应为图像像素数即28*28=784;output_dim设置成1因为是单变量回归[^1] super().__init__() layers = [] current_dim = input_dim for hdim in hidden_dims: layers.append(nn.Linear(current_dim, hdim)) layers.append(nn.ReLU()) # 添加ReLU作为激活函数 current_dim = hdim layers.append(nn.Linear(current_dim, output_dim)) # 输出层不需要激活函数 self.model = nn.Sequential(*layers) def forward(self, x): return self.model(x).squeeze(-1) # 去除多余的维度适应回归输出 ``` #### 准备数据集 针对特定的应用场景准备相应的数据加载器。这里假设有一个名为`CustomDataset`的数据集类已经实现了必要的接口方法如`__len__()`, `__getitem__()`等。 ```python train_loader = DataLoader(CustomDataset(train=True), batch_size=64, shuffle=True) test_loader = DataLoader(CustomDataset(train=False), batch_size=64, shuffle=False) ``` #### 训练过程 设定损失函数以及优化算法之后,在循环体内完成参数更新操作。 ```python device = 'cuda' if torch.cuda.is_available() else 'cpu' model = MLPRegression().to(device) criterion = nn.MSELoss() # 回归问题常用均方误差(MSE)作为目标函数 optimizer = optim.Adam(model.parameters(), lr=0.001) for epoch in range(num_epochs): model.train() running_loss = 0. for inputs, targets in train_loader: optimizer.zero_grad() outputs = model(inputs.to(device)) loss = criterion(outputs, targets.float().to(device)) loss.backward() optimizer.step() running_loss += loss.item() avg_train_loss = running_loss / len(train_loader) print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_train_loss:.4f}") ``` #### 测试/验证阶段 在测试集中评估模型的表现情况,并计算平均绝对误差或其他评价指标。 ```python def evaluate_regression(model, test_loader): model.eval() predictions, actuals = [], [] with torch.no_grad(): for inputs, targets in test_loader: outputs = model(inputs.to(device)).detach().cpu().numpy() targets = targets.cpu().numpy() predictions.extend(outputs.tolist()) actuals.extend(targets.tolist()) mae = mean_absolute_error(actuals, predictions) mse = mean_squared_error(actuals, predictions) r2 = r2_score(actuals, predictions) return {'mae': mae, 'mse': mse, 'r2': r2} ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值