pt第五章 全连接神经网络邮件分类

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.manifold import TSNE

import torch
import torch.nn as nn
from torch.optim import SGD, Adam
import torch.utils.data as Data

import matplotlib.pyplot as plt
import seaborn as sns
import hiddenlayer as hl
from torchviz import make_dot
C:\Anaconda3\envs\DL_01\lib\site-packages\tqdm\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
spam = pd.read_csv("./spambase.csv")
spam.head()
word_freq_makeword_freq_addressword_freq_allword_freq_3dword_freq_ourword_freq_overword_freq_removeword_freq_internetword_freq_orderword_freq_mail...char_freq_;char_freq_(char_freq_[char_freq_!char_freq_$char_freq_#capital_run_length_averagecapital_run_length_longestcapital_run_length_totalClass
000.640.640.00.320.000.000.000.000.00...0.000.0000.00.7780.0000.0003.75661.0278.01.0
10.210.280.500.00.140.280.210.070.000.94...0.000.1320.00.3720.1800.0485.114101.01028.01.0
20.060.000.710.01.230.190.190.120.640.25...0.010.1430.00.2760.1840.0109.821485.02259.01.0
300.000.000.00.630.000.310.630.310.63...0.000.1370.00.1370.0000.0003.53740.0191.01.0
400.000.000.00.630.000.310.630.310.63...0.000.1350.00.1350.0000.0003.53740.0191.01.0

5 rows × 58 columns

pd.value_counts(spam.Class)
C:\Users\14557\AppData\Local\Temp\ipykernel_23956\795502889.py:1: FutureWarning: pandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.
  pd.value_counts(spam.Class)





Class
0.0    2788
1.0    1813
Name: count, dtype: int64
# 将所有非数值特征转换为数值
label_encoders = {}
for column in spam.columns:
    if spam[column].dtype == 'object':
        le = LabelEncoder()
        spam[column] = le.fit_transform(spam[column])
        label_encoders[column] = le

# 选择特征和标签
X = spam.iloc[:, 0:57].values
y = spam.Class.values

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=123)

# 对数据进行标准化处理
scalers = MinMaxScaler(feature_range=(0, 1))
X_train_s = scalers.fit_transform(X_train)
X_test_s = scalers.transform(X_test)

print("训练集和测试集已准备好,数据已标准化。")
训练集和测试集已准备好,数据已标准化。
colname = spam.columns.values[:-1]
plt.figure(figsize=(20,14))
for ii in range(len(colname)):
    plt.subplot(7,9,ii+1)
    sns.boxplot(x = y_train,y = X_train_s[:,ii])
    plt.title(colname[ii])
plt.subplots_adjust(hspace=0.4)
plt.show()
        

在这里插入图片描述

##全连接神经网络
class MLPclassifica(nn.Module):
    def __init__(self):
        super(MLPclassifica,self).__init__()
        ##定义第一个隐藏层
        self.hidden1 = nn.Sequential(
            nn.Linear(
                in_features = 57,
                out_features = 30,
                bias = True
            ),
            nn.ReLU()
        )
        ##隐藏层2
        self.hidden2 = nn.Sequential(
            nn.Linear(30,10),
            nn.ReLU()
        )
        ##分类层
        self.classifica = nn.Sequential(
            nn.Linear(10,2),
            nn.Sigmoid()
        )
    ##定义前向传播
    def forward(self, x):
        fc1 = self.hidden1(x)
        fc2 = self.hidden2(fc1)
        output = self.classifica(fc2)
        ##输出两个隐藏层和输出层
        return fc1,fc2,output

##输出网络结构
mlpc = MLPclassifica()
#使用make_dot可视化网络
x = torch.randn(1,57).requires_grad_(True)
y = mlpc(x)
Mymlpcvis = make_dot(y, params=dict(list(mlpc.named_parameters()), **{'x': x}))
Mymlpcvis

在这里插入图片描述

##将数据转换为张量
X_train_nots = torch.from_numpy(X_train.astype(np.float32))
y_train_t = torch.from_numpy(y_train.astype(np.int64))
X_test_nots = torch.from_numpy(X_test.astype(np.float32))
y_test_t = torch.from_numpy(y_test.astype(np.int64))
##将数据转化为张量后,使用TensorDataset讲X和Y整理到一起
train_data_nots = Data.TensorDataset(X_train_nots,y_train_t)
C:\Users\14557\AppData\Local\Temp\ipykernel_23956\3774708658.py:3: RuntimeWarning: invalid value encountered in cast
  y_train_t = torch.from_numpy(y_train.astype(np.int64))
##定义一个数据加载器,将训练数据进行批量处理
train_nots_loader = Data.DataLoader(
    dataset = train_data_nots,
    batch_size = 64,
    shuffle=True,
    num_workers = 4
)
##定义优化器
optimizer = torch.optim.Adam(mlpc.parameters(),lr=0.01)
loss_func = nn.CrossEntropyLoss()
##记录训练过程的指标
history1 = hl.History()
##使用canvas可视化
canvas1 = hl.Canvas()
print_step = 25
##对模型进行迭代训练
for epoch in range(15):
    for step,(b_x, b_y) in enumerate (train_nots_loader):
        _,_,output = mlpc(b_x)
        train_loss = loss_func(output, b_y)
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        niter = epoch*len(train_nots_loader)+step+1
        ##计算每次经过print_step次迭代后的输出
        if niter % print_step == 0:
            _,_,output = mlpc(X_test_nots)
            _,pre_lab = torch.max(output,1)
            test_accuracy = accuracy_score(y_test_t, pre_lab)
            ##为history添加epoch,损失和精度
            history1.log(niter, train_loss=train_loss,test_accuracy=test_accuracy)
            with canvas1:
                canvas1.draw_plot(history1["train_loss"])
                canvas1.draw_plot(history1["test_accuracy"])

在这里插入图片描述


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值