基于cnn和lstm的网络流量分类

最近做了一个基于cnn的网络流量分类系统,文章最后附有代码下载地址,以及数据集。

首先看一下视频效果:

基于cnn和lstm的网络流量分类

b站观看:视频连接

环境要求:
tensorflow=1.13.1
其他库版本无要求,安装最新版即可。
训练网络代码:
train.py

# -*- coding: utf-8 -*-
import os
import sys
import csv
import time
import json
import datetime
import pickle as pkl
import tensorflow as tf
from tensorflow.contrib import learn
from tensorflow.python.framework import graph_util
import data_helper
from rnn_classifier import rnn_clf
from cnn_classifier import cnn_clf
from clstm_classifier import clstm_clf

try:
    from sklearn.model_selection import train_test_split
except ImportError as e:
    error = "Please install scikit-learn."
    print(str(e) + ': ' + error)
    sys.exit()

# Show warnings and errors only
# 仅显示警告和错误
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# Parameters
# 参数
# =============================================================================

# Model choices
# 型号选择
tf.flags.DEFINE_string('clf', 'clstm', "Type of classifiers. Default: cnn. You have four choices: [cnn, lstm, blstm, clstm]")

# Data parameters
# 数据参数
tf.flags.DEFINE_string('data_file', './data/data.csv', 'Data file path')
tf.flags.DEFINE_string('stop_word_file', None, 'Stop word file path')
tf.flags.DEFINE_string('language', 'en', "Language of the data file. You have two choices: [ch, en]")
tf.flags.DEFINE_integer('min_frequency', 0, 'Minimal word frequency')
tf.flags.DEFINE_integer('num_classes', 3, 'Number of classes')
tf.flags.DEFINE_integer('max_length', 0, 'Max document length')
tf.flags.DEFINE_integer('vocab_size', 0, 'Vocabulary size')
tf.flags.DEFINE_float('test_size', 0.1, 'Cross validation test size')

# Model hyperparameters
# 模型超参数
tf.flags.DEFINE_integer('embedding_size', 256, 'Word embedding size. For CNN, C-LSTM.')
tf.flags.DEFINE_string('filter_sizes', '3, 4, 5', 'CNN filter sizes. For CNN, C-LSTM.')
tf.flags.DEFINE_integer('num_filters', 128, 'Number of filters per filter size. For CNN, C-LSTM.')
tf.flags.DEFINE_integer('hidden_size', 128, 'Number of hidden units in the LSTM cell. For LSTM, Bi-LSTM')
tf.flags.DEFINE_integer('num_layers', 2, 'Number of the LSTM cells. For LSTM, Bi-LSTM, C-LSTM')
tf.flags.DEFINE_float('keep_prob', 0.5, 'Dropout keep probability')  # All
tf.flags.DEFINE_float('learning_rate', 1e-3, 'Learning rate')  # All
tf.flags.DEFINE_float('l2_reg_lambda', 0.001, 'L2 regularization lambda')  # All

# Training parameters
# 训练参数
tf.flags.DEFINE_integer('batch_size', 32, 'Batch size')
tf.flags.DEFINE_integer('num_epochs', 10, 'Number of epochs')
tf.flags.DEFINE_float('decay_rate', 1, 'Learning rate decay rate. Range: (0, 1]')  # Learning rate decay:学习速率衰减
tf.flags.DEFINE_integer('decay_steps', 100000, 'Learning rate decay steps')  # Learning rate decay:学习速率衰减
tf.flags.DEFINE_integer('evaluate_every_steps', 100, 'Evaluate the model on validation set after this many steps')
tf.flags.DEFINE_integer('save_every_steps', 1000, 'Save the model after this many steps')
tf.flags.DEFINE_integer('num_checkpoint', 2, 'Number of models to store')

FLAGS = tf.app.flags.FLAGS

def main_train(aa,data_path='./data/data.csv',llog=False):

    if FLAGS.clf == 'lstm':
        FLAGS.embedding_size = FLAGS.hidden_size
    elif FLAGS.clf == 'clstm':
        FLAGS.hidden_size = len(FLAGS.filter_sizes.split(",")) * FLAGS.num_filters

    # Output files directory
    # 输出文件目录
    timestamp = str(int(time.time()))
    model_dir = os.path.join(os.path.curdir,'model')
    params_dir = os.path.join(os.path.curdir,'params')
    if not os.path.exists(params_dir):
        os.makedirs(params_dir)

    # Load and save data
    # 加载和保存数据
    # =============================================================================

    data, labels, lengths, vocab_processor = data_helper.load_data(file_path=data_path,
                                                                sw_path=FLAGS.stop_word_file,
                                                                min_frequency=FLAGS.min_frequency,
                                                                max_length=FLAGS.max_length,
                                                                language=FLAGS.language,
                                                                shuffle=True)

    # Save vocabulary processor
    # 保存词汇处理器
    vocab_processor.save(os.path.join(params_dir, 'vocab'))

    FLAGS.vocab_size = len(vocab_processor.vocabulary_._mapping)

    FLAGS.max_length = vocab_processor.max_document_length

    params = FLAGS.flag_values_dict()
    # Print parameters
    # 打印参数
    model = params['clf']
    if model == 'cnn':
        del params['hidden_size']
        del params['num_layers']
    elif model == 'lstm' or model == 'blstm':
        del params['num_filters']
        del params['filter_sizes']
        params['embedding_size'] = params['hidden_size']
    elif model == 'clstm':
        params['hidden_size'] = len(list(map(int, params['filter_sizes'].split(",")))) * params['num_filters']

    params_dict = sorted(params.items(), key=lambda x: x[0])
    print('Parameters:')
    for item in params_dict:
        print('{}: {}'.format(item[0], item[1]))
    print('')

    # Save parameters to file
    # 将参数保存到文件
    params_file = open(os.path.join(params_dir,'params.pkl'), 'wb')
    pkl.dump(params, params_file, True)
    params_file.close()


    # Simple Cross validation
    # 简单的交叉验证
    x_train, x_valid, y_train, y_valid, train_lengths, valid_lengths = train_test_split(data,
                                                                                        labels,
                                                                                        lengths,
                                                                                        test_size=FLAGS.test_size,
                                                                                        random_state=22)
    # Batch iterator
    # 批处理迭代器
    train_data = data_helper.batch_iter(x_train, y_train, train_lengths, FLAGS.batch_size, FLAGS.num_epochs)

    # Train
    # 训练
    # =============================================================================

    with tf.Graph().as_default():
        with tf.Session() as sess:
            if FLAGS.clf == 'cnn':
                classifier = cnn_clf(FLAGS)
            elif FLAGS.clf == 'lstm' or FLAGS.clf == 'blstm':
                classifier = rnn_clf(FLAGS)
            elif FLAGS.clf == 'clstm':
                classifier = clstm_clf(FLAGS)
            else:
                raise ValueError('clf should be one of [cnn, lstm, blstm, clstm]')

            # Train procedure
            # 训练程序
            global_step = tf.Variable(0, name='global_step', trainable=False)
            # Learning rate decay
            # 学习速率衰减
            starter_learning_rate = FLAGS.learning_rate
            learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                    global_step,
                                                    FLAGS.decay_steps,
                                                    FLAGS.decay_rate,
                                                    staircase=True)
            optimizer = tf.train.AdamOptimizer(learning_rate)
            grads_and_vars = optimizer.compute_gradients(classifier.cost)
            train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step,name='op_to_store')

            # Summaries
            # 摘要
            loss_summary = tf.summary.scalar('Loss', classifier.cost)
            accuracy_summary = tf.summary.scalar('Accuracy', classifier.accuracy)

            # Train summary
            # 训练摘要
            train_summary_op = tf.summary.merge_all()
            train_summary_dir = os.path.join(os.path.curdir, 'summaries', 'train')
            train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

            # Validation summary
            # 验证摘要
            valid_summary_op = tf.summary.merge_all()
            valid_summary_dir = os.path.join(os.path.curdir, 'summaries', 'valid')
            valid_summary_writer = tf.summary.FileWriter(valid_summary_dir, sess.graph)

            saver = tf.train.Saver(max_to_keep=FLAGS.num_checkpoint)

            sess.run(tf.global_variables_initializer())


            def run_step(input_data, is_training=True):
                """Run one step of the training process."""
                # 运行训练过程的一个步骤
                input_x, input_y, sequence_length = input_data

                fetches = {'step': global_step,
                        'cost': classifier.cost,
                        'accuracy': classifier.accuracy,
                        'learning_rate': learning_rate}
                feed_dict = {classifier.input_x: input_x,
                            classifier.input_y: input_y}

                if FLAGS.clf != 'cnn':
                    fetches['final_state'] = classifier.final_state
                    feed_dict[classifier.batch_size] = len(input_x)
                    feed_dict[classifier.sequence_length] = sequence_length

                if is_training:
                    fetches['train_op'] = train_op
                    fetches['summaries'] = train_summary_op
                    feed_dict[classifier.keep_prob] = FLAGS.keep_prob
                else:
                    fetches['summaries'] = valid_summary_op
                    feed_dict[classifier.keep_prob] = 1.0

                vars = sess.run(fetches, feed_dict)
                step = vars['step']
                cost = vars['cost']
                accuracy = vars['accuracy']
                summaries = vars['summaries']

                # Write summaries to file
                # 将摘要写入文件
                if is_training:
                    train_summary_writer.add_summary(summaries, step)
                else:
                    valid_summary_writer.add_summary(summaries, step)

                time_str = datetime.datetime.now().isoformat()

                ll="{}: step: {}, loss: {:g}, accuracy: {:g}".format(time_str, step, cost, accuracy)
                print(ll)

                return accuracy,ll


            print('Start training ...')

            for train_input in train_data:
                input_x,input_y,_ =train_input
                _,ll=run_step(train_input, is_training=True)

                if llog:
                    aa(ll)

                current_step = tf.train.global_step(sess, global_step)

                if current_step % FLAGS.evaluate_every_steps == 0:
                    print('\nValidation')
                    if llog:
                        aa('\nValidation')
                    run_step((x_valid, y_valid, valid_lengths), is_training=False)
                    if llog:
                        aa('')

                if current_step % FLAGS.save_every_steps == 0:
                    # saver = tf.train.Saver()
                    # save_path = saver.save(sess, os.path.join('./model2', 'clf'),current_step)
                    if os.path.exists(model_dir):
                        os.system('rm -rf '+model_dir)
                    # prediction_signature = tf.saved_model.signature_def_utils.predict_signature_def(
                    # inputs={"input_x": classifier.input_x},outputs={"input_y":classifier.input_y},
                    # )
                    # builder = tf.saved_model.builder.SavedModelBuilder(os.path.join(model_dir,timestamp))
                    # # 构造模型保存的内容,指定要保存的 session,特定的 tag, 
                    # # 输入输出信息字典,额外的信息
                    # builder.add_meta_graph_and_variables(sess,
                    #                                 [tf.saved_model.SERVING],
                    #                                 signature_def_map={"classification":prediction_signature},
                    #                                 # assets_collection={"params":params,"vocab":vocab_processor}
                    #                                 )
                    # builder.save()
                    tf.saved_model.simple_save(sess,
                    os.path.join(model_dir,timestamp),
                    inputs={"input_x": classifier.input_x},
                    outputs={"input_y": classifier.input_y})

            print('\nAll the files have been saved to {}\n'.format(model_dir))
            if llog:
                aa('\nAll the files have been saved to {}\n'.format(model_dir))
if __name__ =="__main__":
    main_train(aa=0)

测试代码test.py

# -*- coding: utf-8 -*-
import warnings
warnings.filterwarnings('ignore')
import os
import csv
import numpy as np
import pickle as pkl
import tensorflow as tf
from tensorflow.contrib import learn


import data_helper

# Show warnings and errors only
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# File paths
# tf.flags.DEFINE_string('test_data_file', './data/data.csv', '''Test data file path''')
# tf.flags.DEFINE_string('output_data_file', './out/pre.csv', '''Output data file path''')
# tf.flags.DEFINE_string('model_dir', './model/1600693479', '''Restore the model from this run''')
# tf.flags.DEFINE_string('params_dir', None, '''Restore the model from this run''')

# tf.flags.DEFINE_string("checkpoint_dir", "./", "Checkpoint directory from training run")

# tf.flags.DEFINE_string('checkpoint', 'clf', '''Restore the graph from this checkpoint''')
# Test batch size
# tf.flags.DEFINE_integer('batch_size', 64, 'Test batch size')
# FLAGS = tf.flags.FLAGS
def mains(data_path=r'./data/data.csv',output_data_file='./out/pre.csv',model_dir='./model/1600693479',batch_size2=64):


    # print('\n',"*****打印超参数如下:******")
    # for key in FLAGS.flag_values_dict():
    #     print(key, FLAGS[key].value)
    # print("************************",'\n')

    #Restore parameters
    # os.path.join(FLAGS.params_dir
    with open('D:\\CNNLASTM\\CNN-LSTM-Flow-Analysis-master\\params\\params.pkl', 'rb') as f:
        params = pkl.load(f, encoding='bytes')
        # print("模型超参数***************************")
        # print(params)
    # Restore vocabulary processor

    #vocab_path ='./vocab/vocab'
    #vocab_processor = learn.preprocessing.VocabularyProcessor.restore(vocab_path)

    # Load test data

    data, labels, lengths, _ = data_helper.load_data(file_path=data_path,
                                                    sw_path=params['stop_word_file'],
                                                    min_frequency=params['min_frequency'],
                                                    max_length=params['max_length'],
                                                    language=params['language'],
                                                    #vocab_processor=vocab_processor,
                                                    shuffle=False)
    # Restore graph
    graph = tf.Graph()
    with tf.Session(graph=tf.Graph()) as sess:
        sess = tf.Session()

        # # Restore metagraph
        # saver = tf.train.import_meta_graph(os.path.join('./model2','clf-1000.meta'))
        # # Restore weights
        # saver.restore(sess, './model2/clf-1000')

        tf.saved_model.loader.load(sess, ['serve'], model_dir)
        graph = tf.get_default_graph()
        # sess.run(tf.global_variables_initializer())




        # Get tensors
        input_x = graph.get_tensor_by_name('input_x:0')
        input_y = graph.get_tensor_by_name('input_y:0')
        keep_prob = graph.get_tensor_by_name('keep_prob:0')
        predictions = graph.get_tensor_by_name('softmax/predictions:0')
        accuracy = graph.get_tensor_by_name('accuracy/accuracy:0')

        # Generate batches
        batches = data_helper.batch_iter(data, labels, lengths, batch_size2, 1)

        num_batches = int(len(data)/batch_size2)
        all_predictions = []
        sum_accuracy = 0
        # Test
        for batch in batches:
            x_test, y_test, x_lengths = batch
            if params['clf'] == 'cnn':
                feed_dict = {input_x: x_test, input_y: y_test, keep_prob: 1.0}
                batch_predictions, batch_accuracy = sess.run([predictions, accuracy], feed_dict)
            else:
                batch_size = graph.get_tensor_by_name('batch_size:0')
                sequence_length = graph.get_tensor_by_name('sequence_length:0')
                feed_dict = {input_x: x_test, input_y: y_test, batch_size: batch_size2, sequence_length: x_lengths, keep_prob: 1.0}

                batch_predictions, batch_accuracy = sess.run([predictions, accuracy], feed_dict)
            sum_accuracy += batch_accuracy
            all_predictions = np.concatenate([all_predictions, batch_predictions])

        final_accuracy = sum_accuracy / num_batches

    # Print test accuracy
    print('Test accuracy: {}'.format(final_accuracy))

    if os.path.exists(output_data_file):
        os.remove(output_data_file)
        # os.makedirs('./out')

    # Save all predictions
    with open(output_data_file, 'w', encoding='utf-8', newline='') as f:
        csvwriter = csv.writer(f)
        csvwriter.writerow(['True class', 'Prediction'])
        for i in range(len(all_predictions)):
            csvwriter.writerow([labels[i], all_predictions[i]])
        print('Predictions saved to {}'.format(output_data_file))
    return final_accuracy
if __name__ =="__main__":
    # tf.app.run()
    mains()

界面代码main.py

import numpy as np
from PyQt5.QtWidgets import *
from PyQt5 import QtWidgets
from PyQt5.QtGui import QPixmap,QImage
from PyQt5 import QtGui
from untitled import Ui_Form
from PyQt5.QtWidgets import QFileDialog
import sys
import os
from test import mains
from train import main_train

class My(QtWidgets.QWidget,Ui_Form):
    def __init__(self):
        super(My,self).__init__()
        self.setupUi(self)
        self.use_palette()
        self.pushButton.clicked.connect(self.get_csv)
        self.pushButton_2.clicked.connect(self.verity)
        self.pushButton_3.clicked.connect(self.train)

    def use_palette(self):
        self.setWindowTitle("网络流量分类系统")
        window_pale = QtGui.QPalette()
        window_pale.setBrush(self.backgroundRole(), QtGui.QBrush(QtGui.QPixmap("background.jpg")))
        self.setPalette(window_pale)


    def get_csv(self):
        self.filePath, imgType = QFileDialog.getOpenFileName(self,
                                                            "选择文件",
                                                            "",
                                                            " *.csv;;*.png;;*.jpeg;;*.bmp;;All Files (*)")

        if not os.path.exists(self.filePath):
            self.waring("请选择正确的文件!")
            self.label.setText("NONE")
            return
        self.label.setText(self.filePath)

        return
    def verity(self):
        path=self.label.text()
        if not os.path.exists(path):
            msg_box = QMessageBox(QMessageBox.Warning, '警告', '请先选择数据')
            msg_box.exec_()
            return
        self.printf('*****打印超参数如下:******')
        self.printf('Data_dir: '+self.filePath)
        self.printf('model_dir: '+'./model/1600693479')
        self.printf('batch_size:'+'64')
        self.printf('model_dir: '+'./model/1600693479')

        self.printf('\n处理中.....')
        Acc=mains(data_path=self.filePath)
        self.printf('\n处理完毕!')

        self.printf('\n\nTest accuracy: ' + str(Acc))
        self.printf('Predictions saved to ./out/pre.csv')

        return


    def train(self):
        path = self.label.text()
        if not os.path.exists(path):
            msg_box = QMessageBox(QMessageBox.Warning, '警告', '请先选择数据')
            msg_box.exec_()
            return

        # self.textBrowser.setText('Start training ...')

        # self.printf('Start training ...')
        # self.textBrowser.clear()


        main_train(data_path=self.filePath,aa=self.printf,llog=True)
        # for train_input in train_data:
        #     input_x, input_y, _ = train_input
        #     run_step(train_input, is_training=True)
        #     current_step = tf.train.global_step(sess, global_step)
        #
        #     if current_step % FLAGS.evaluate_every_steps == 0:
        #         print('\nValidation')
        #         run_step((x_valid, y_valid, valid_lengths), is_training=False)
        #         print('')
        #
        #     if current_step % FLAGS.save_every_steps == 0:
        #         # saver = tf.train.Saver()
        #         # save_path = saver.save(sess, os.path.join('./model2', 'clf'),current_step)
        #         if os.path.exists(model_dir):
        #             os.system('rm -rf ' + model_dir)
        #         # prediction_signature = tf.saved_model.signature_def_utils.predict_signature_def(
        #         # inputs={"input_x": classifier.input_x},outputs={"input_y":classifier.input_y},
        #         # )
        #         # builder = tf.saved_model.builder.SavedModelBuilder(os.path.join(model_dir,timestamp))
        #         # # 构造模型保存的内容,指定要保存的 session,特定的 tag,
        #         # # 输入输出信息字典,额外的信息
        #         # builder.add_meta_graph_and_variables(sess,
        #         #                                 [tf.saved_model.SERVING],
        #         #                                 signature_def_map={"classification":prediction_signature},
        #         #                                 # assets_collection={"params":params,"vocab":vocab_processor}
        #         #                                 )
        #         # builder.save()
        #         tf.saved_model.simple_save(sess,
        #                                    os.path.join(model_dir, timestamp),
        #                                    inputs={"input_x": classifier.input_x},
        #                                    outputs={"input_y": classifier.input_y})
        #
        # print('\nAll the files have been saved to {}\n'.format(model_dir))
        # return


    def waring(self,cause):
        msg_box = QMessageBox(QMessageBox.Warning, '警告', cause)
        msg_box.exec_()


if __name__ == '__main__':
    app = QtWidgets.QApplication(sys.argv)

    a=My()
    a.show()
    sys.exit(app.exec_())

下载地址:下载列表8

  • 7
    点赞
  • 16
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
基于CNN-LSTM的SDN流量预测平台是一种用于预测软件定义网络(SDN)流量的系统。SDN是一种分离网络控制平面和数据平面的新型网络架构,其灵活性和可编程性使得流量预测成为提高网络性能和管理效率的关键问题之一。 该平台的设计思路是结合了卷积神经网络(CNN)和长短期记忆网络(LSTM)两种深度学习模型。CNN用于提取流量数据的时空特征,将其转换为高级特征表示。LSTM则用于对时间序列数据的建模和预测,可以捕捉到流量数据的时序依赖性。 具体实现中,首先需要收集和预处理SDN网络的流量数据,包括实时采集和离线处理。然后,通过使用CNN模型对流量数据进行特征提取和转换。CNN模型可以通过多个卷积层和池化层来提取数据的空间和频域特征,从而捕捉到流量的时空信息。 接下来,将CNN模型的输出传递给LSTM模型,进行时间序列的建模和预测。LSTM的记忆单元能够有效地学习流量数据的长期依赖性,从而对未来的流量进行准确的预测。在训练过程中,可以使用历史流量数据进行端到端的优化,以提高预测的准确性。 最后,将预测结果反馈给SDN网络的控制器,用于优化网络资源的分配和管理。通过实时监测流量状况并进行预测,该平台可以帮助网络管理员进行合理的资源规划和决策,提高SDN网络的性能和可靠性。 综上所述,基于CNN-LSTM的SDN流量预测平台通过结合卷积神经网络和长短期记忆网络,能够对SDN网络的流量进行准确的预测,提高网络性能和管理效率。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值