pt第五章全连接神经网络邮件分类

model_s3xy

已于 2024-09-27 21:40:43 修改

阅读量323

点赞数 4

分类专栏： pytorch学习笔记文章标签：神经网络人工智能深度学习

于 2024-08-26 14:46:11 首次发布

本文链接：https://blog.csdn.net/m0_60402335/article/details/141563766

版权

pytorch学习笔记专栏收录该内容

8 篇文章 0 订阅

订阅专栏

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.manifold import TSNE

import torch
import torch.nn as nn
from torch.optim import SGD, Adam
import torch.utils.data as Data

import matplotlib.pyplot as plt
import seaborn as sns
import hiddenlayer as hl
from torchviz import make_dot

C:\Anaconda3\envs\DL_01\lib\site-packages\tqdm\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm

spam = pd.read_csv("./spambase.csv")
spam.head()

	word_freq_make	word_freq_address	word_freq_all	word_freq_our	word_freq_over	word_freq_remove	word_freq_internet	word_freq_order	word_freq_mail	...	char_freq_;	char_freq_(	char_freq_!	char_freq_$	char_freq_#	capital_run_length_average	capital_run_length_longest	capital_run_length_total	Class
0	0	0.64	0.64	0.32	0.00	0.00	0.00	0.00	0.00	...	0.00	0.000	0.778	0.000	0.000	3.756	61.0	278.0	1.0
1	0.21	0.28	0.50	0.14	0.28	0.21	0.07	0.00	0.94	...	0.00	0.132	0.372	0.180	0.048	5.114	101.0	1028.0	1.0
2	0.06	0.00	0.71	1.23	0.19	0.19	0.12	0.64	0.25	...	0.01	0.143	0.276	0.184	0.010	9.821	485.0	2259.0	1.0
3	0	0.00	0.00	0.63	0.00	0.31	0.63	0.31	0.63	...	0.00	0.137	0.137	0.000	0.000	3.537	40.0	191.0	1.0
4	0	0.00	0.00	0.63	0.00	0.31	0.63	0.31	0.63	...	0.00	0.135	0.135	0.000	0.000	3.537	40.0	191.0	1.0

5 rows × 58 columns

pd.value_counts(spam.Class)

C:\Users\14557\AppData\Local\Temp\ipykernel_23956\795502889.py:1: FutureWarning: pandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.
  pd.value_counts(spam.Class)





Class
0.0    2788
1.0    1813
Name: count, dtype: int64

# 将所有非数值特征转换为数值
label_encoders = {}
for column in spam.columns:
    if spam[column].dtype == 'object':
        le = LabelEncoder()
        spam[column] = le.fit_transform(spam[column])
        label_encoders[column] = le

# 选择特征和标签
X = spam.iloc[:, 0:57].values
y = spam.Class.values

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=123)

# 对数据进行标准化处理
scalers = MinMaxScaler(feature_range=(0, 1))
X_train_s = scalers.fit_transform(X_train)
X_test_s = scalers.transform(X_test)

print("训练集和测试集已准备好，数据已标准化。")

训练集和测试集已准备好，数据已标准化。

colname = spam.columns.values[:-1]
plt.figure(figsize=(20,14))
for ii in range(len(colname)):
    plt.subplot(7,9,ii+1)
    sns.boxplot(x = y_train,y = X_train_s[:,ii])
    plt.title(colname[ii])
plt.subplots_adjust(hspace=0.4)
plt.show()

在这里插入图片描述

##全连接神经网络
class MLPclassifica(nn.Module):
    def __init__(self):
        super(MLPclassifica,self).__init__()
        ##定义第一个隐藏层
        self.hidden1 = nn.Sequential(
            nn.Linear(
                in_features = 57,
                out_features = 30,
                bias = True
            ),
            nn.ReLU()
        )
        ##隐藏层2
        self.hidden2 = nn.Sequential(
            nn.Linear(30,10),
            nn.ReLU()
        )
        ##分类层
        self.classifica = nn.Sequential(
            nn.Linear(10,2),
            nn.Sigmoid()
        )
    ##定义前向传播
    def forward(self, x):
        fc1 = self.hidden1(x)
        fc2 = self.hidden2(fc1)
        output = self.classifica(fc2)
        ##输出两个隐藏层和输出层
        return fc1,fc2,output

##输出网络结构
mlpc = MLPclassifica()
#使用make_dot可视化网络
x = torch.randn(1,57).requires_grad_(True)
y = mlpc(x)
Mymlpcvis = make_dot(y, params=dict(list(mlpc.named_parameters()), **{'x': x}))
Mymlpcvis

在这里插入图片描述

##将数据转换为张量
X_train_nots = torch.from_numpy(X_train.astype(np.float32))
y_train_t = torch.from_numpy(y_train.astype(np.int64))
X_test_nots = torch.from_numpy(X_test.astype(np.float32))
y_test_t = torch.from_numpy(y_test.astype(np.int64))
##将数据转化为张量后，使用TensorDataset讲X和Y整理到一起
train_data_nots = Data.TensorDataset(X_train_nots,y_train_t)

C:\Users\14557\AppData\Local\Temp\ipykernel_23956\3774708658.py:3: RuntimeWarning: invalid value encountered in cast
  y_train_t = torch.from_numpy(y_train.astype(np.int64))

##定义一个数据加载器，将训练数据进行批量处理
train_nots_loader = Data.DataLoader(
    dataset = train_data_nots,
    batch_size = 64,
    shuffle=True,
    num_workers = 4
)

##定义优化器
optimizer = torch.optim.Adam(mlpc.parameters(),lr=0.01)
loss_func = nn.CrossEntropyLoss()
##记录训练过程的指标
history1 = hl.History()
##使用canvas可视化
canvas1 = hl.Canvas()
print_step = 25

##对模型进行迭代训练
for epoch in range(15):
    for step,(b_x, b_y) in enumerate (train_nots_loader):
        _,_,output = mlpc(b_x)
        train_loss = loss_func(output, b_y)
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        niter = epoch*len(train_nots_loader)+step+1
        ##计算每次经过print_step次迭代后的输出
        if niter % print_step == 0:
            _,_,output = mlpc(X_test_nots)
            _,pre_lab = torch.max(output,1)
            test_accuracy = accuracy_score(y_test_t, pre_lab)
            ##为history添加epoch,损失和精度
            history1.log(niter, train_loss=train_loss,test_accuracy=test_accuracy)
            with canvas1:
                canvas1.draw_plot(history1["train_loss"])
                canvas1.draw_plot(history1["test_accuracy"])