用moore数据集做网络流量分类

用于做网络流量分类
下载地址
https://www.cl.cam.ac.uk/research/srg/netos/projects/archive/nprobe/data/papers/sigmetrics/index.html
使用moore做流量分类
这里用了BP神经网络,CNN神经网络,朴素贝叶斯,决策树,KNN,SVM导入的模块有

import tensorflow as tf
import numpy as np
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from sklearn import metrics, neighbors
from tensorflow import keras
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, roc_curve
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import time
from sklearn.ensemble import RandomForestClassifier

数据处理

list_y = ['WWW','MAIL','FTP-CONTROL','FTP-PASV','ATTACK','P2P','DATABASE','FTP-DATA','MULTIMEDIA','SERVICES','INTERACTIVE','GAMES']
#read the file,change 'Y,N,?,', translate to tensor
def data_prepross(filename):
    X, Y = [], []
    for f in filename:
        print(f)
        with open(os.getcwd() + '/' + f, 'r') as file:

            for n, i in enumerate(file.readlines()[253:]):
                i = i.replace('Y','1')
                i = i.replace('N', '0')
                spl = i.split(',')
                if spl.count('?')>8:
                    continue
                i = i.replace('\n', '')
                fz = [float(f) for f in i.split(',')[:-1] if f != '?']
                meana = sum(fz) / len(fz)
                i = i.replace('?', str(0))
                #均值填充,加高斯白噪声
                x = [float(j) for j in i.split(',')[:-1]] +[meana]*8  +  np.random.normal(0,1,256)
                #x = [float(j) for j in i.split(',')[:-1]] + [0] * 8
                #x =x.tolist()
                y = i.split(',')[-1].replace('FTP-CO0TROL','FTP-CONTROL')
                y = y.replace('I0TERACTIVE','INTERACTIVE' )
                y = list_y.index(y)
                X.append(x)
                Y.append(y)
            file.close()
    return X, Y


#data nomalization
#train_x,train_y = data_prepross(['entry01.weka.allclass.arff',])
total_x,total_y = data_prepross(['entry01.weka.allclass.arff','entry02.weka.allclass.arff','entry03.weka.allclass.arff','entry04.weka.allclass.arff',
                                 'entry05.weka.allclass.arff','entry09.weka.allclass.arff', 'entry10.weka.allclass.arff','entry07.weka.allclass.arff','entry08.weka.allclass.arff','entry06.weka.allclass.arff'])

train_x,test_x,train_y,test_y = train_test_split(total_x,total_y,test_size=0.25, random_state=0)
train_x = tf.convert_to_tensor(train_x, dtype=tf.float32)
train_y= tf.convert_to_tensor(train_y,dtype=tf.int32)
test_x = tf.convert_to_tensor(test_x, dtype=tf.float32)
test_y = tf.convert_to_tensor(test_y,dtype= tf.int32)
train_x = tf.keras.utils.normalize(train_x, axis=1)
test_x = tf.keras.utils.normalize(test_x, axis=1)

BP神经网络

num_classes =12
num_pixels=256

def baseline_model():
     model = Sequential()
     model.add(layers.Dense(num_pixels, input_dim=num_pixels,  activation='relu'))
     #layers.Dropout(0.5)
     model.add(layers.Dense(num_classes,  activation='softmax'))
     model.summary()
     model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
     return model
#,class_weight=class_weight
def baseline():
    t1 = time.time()
    model= baseline_model()
    X_train = tf.reshape(train_x,[-1,256])
    X_test =  tf.reshape(test_x, [-1,256])
    history = model.fit(X_train, train_y, validation_data=(X_test, test_y), nb_epoch=20, batch_size=batchsize, verbose=2,)
    scores = model.evaluate(X_test, test_y, verbose=0)
    predict_y = model.predict(X_test)
    t2 = time.time()
    print("Baseline Error: %.2f%%" % (100-scores[1]*100),t2-t1)
    print(history.history)
    return scores,predict_y

#CNN

def simple_CNNmodel():
    model = keras.models.Sequential([
        layers.Conv2D(filters=8, kernel_size=(3, 3),padding='same',input_shape=(16, 16, 1), activation='relu'),
        layers.MaxPooling2D(pool_size=(2, 2), padding = 'same' ),
        layers.Conv2D(filters=16, kernel_size=(3, 3), padding='same',activation='relu'),
        layers.MaxPooling2D(pool_size=(2, 2), padding = 'same' ),
        #layers.Dropout(0.25),
        #(5,5,16) > 400
        layers.Flatten(),
        layers.Dense(256, activation='relu'),
        #layers.Dropout(0.5),
        #layers.Dense(84, activation='relu'),
        layers.Dense(128, activation='relu'),
        #layers.Dropout(0.5),
        layers.Dense(12, activation='softmax')
    ])
# Compile model
    model.compile(loss="sparse_categorical_crossentropy", optimizer='adam', metrics=['accuracy'])
    return model
#,class_weight=class_weight
def simple_CNN():
    t1 = time.time()
    model = simple_CNNmodel()
    X_train = tf.reshape(train_x,[-1,16,16,1])
    X_test =  tf.reshape(test_x, [-1,16,16,1])
    model.summary()
    history = model.fit(X_train, train_y, validation_data=(X_test, test_y), nb_epoch=25, batch_size=128, verbose=2)
    scores = model.evaluate(X_test, test_y, verbose=0)
    t2 = time.time()
    pred_y = model.predict(X_test)
    print(scores)
    print("Baseline Error: %.2f%%" % (100 - scores[1] * 100),t2-t1)
    print(history.history)
    return scores,pred_y

朴素贝叶斯

def Bayes(trainData,trainLable,testData,testLable):
    t1 = time.time()
    mnb = GaussianNB()  #
    mnb.fit(trainData, trainLable)  #
    y_predict = mnb.predict(testData)
    t2 =time.time()
    print(t2-t1)
    print(confusion_matrix(testLable,y_predict))
    print('The Accuracy of Naive Bayes Classifier is:', mnb.score(testData,testLable ))

决策树

def DecisionTr(trainData,trainLable,testData,testLable):
    t1 = time.time()
    model = DecisionTreeClassifier()
    model.fit(trainData, trainLable)
    predicted = model.predict(testData)
    score = metrics.accuracy_score(testLable, predicted)
    t2 = time.time()
    print(t2-t1,score)
    print(confusion_matrix(testLable,predicted))

SVM支持向量机

def SVM(trainData,trainLable,testData,testLable):
    t1=time.time()
    clf = SVC()
    clf.fit(trainData, trainLable)
    svmPredict=clf.predict(testData)
    svmScore=metrics.accuracy_score(testLable, svmPredict)
    t2=time.time()
    print(t2-t1,svmScore)

KNN

def Knn(trainData,trainLable,testData,testLable):
    t1=time.time()
    knn = KNeighborsClassifier()
    knn.fit(trainData, trainLable)
    knnPredict = knn.predict(testData)
    knnscore=metrics.accuracy_score(testLable, knnPredict)
    t2=time.time()
    print(t2-t1,knnscore)
    print(confusion_matrix(testLable, knnPredict))
Knn(train_x.numpy(),train_y.numpy(),test_x.numpy(),test_y.numpy())

灰度图片绘图

def plt_image(trainx,trainy):
    p_www = np.where(trainy == 0)[0][0]
    p_mail = np.where(trainy == 1)[0][0]
    p_control = np.where(trainy == 2)[0][0]
    p_pasv = np.where(trainy == 3)[0][0]
    p_attack = np.where(trainy == 4)[0][0]
    p_p2p = np.where(trainy == 5)[0][0]
    p_database = np.where(trainy == 6)[0][0]
    p_data = np.where(trainy == 7)[0][0]
    p_multimedia = np.where(trainy == 8)[0][0]
    p_service = np.where(trainy == 9)[0][0]
    p_interactive = np.where(trainy == 10)[0][0]
    p_games = np.where(trainy == 11)[0][0]
    plt.figure(num='classffication', figsize=(6, 12))
    plt.subplot(3,4,1)
    plt.title('WWW')
    plt.imshow(np.reshape(trainx[p_www],(16,16)))
    plt.subplot(3,4,2)
    plt.title('MAIL')
    plt.imshow(np.reshape(trainx[p_mail],(16,16)))
    plt.subplot(3,4,3)
    plt.title('FTP-CONTROL')
    plt.imshow(np.reshape(trainx[p_control],(16,16)))
    plt.subplot(3,4,4)
    plt.title('FTP-PASV')
    plt.imshow(np.reshape(trainx[p_pasv],(16,16)))
    plt.subplot(3,4,5)
    plt.title('ATTCK')
    plt.imshow(np.reshape(trainx[p_attack],(16,16)))
    plt.subplot(3,4,6)
    plt.title('P2P')
    plt.imshow(np.reshape(trainx[p_p2p],(16,16)))
    plt.subplot(3,4,7)
    plt.title('DATABASE')
    plt.imshow(np.reshape(train_x[p_database],(16,16)))
    plt.subplot(3,4,8)
    plt.title('FTP-DATA')
    plt.imshow(np.reshape(trainx[p_data],(16,16)))
    plt.subplot(3,4,9)
    plt.title('MULTIMEDIA')
    plt.imshow(np.reshape(train_x[p_multimedia],(16,16)))
    plt.subplot(3,4,10)
    plt.title('SERVICES')
    plt.imshow(np.reshape(trainx[p_service],(16,16)))
    plt.subplot(3,4,11)
    plt.title('INTERACTIVE')
    plt.imshow(np.reshape(train_x[p_interactive],(16,16)))
    plt.subplot(3,4,12)
    plt.title('GAMES')
    plt.imshow(np.reshape(trainx[p_games],(16,16)))
    plt.show()

混淆矩阵绘图

`def plot_confusion_matrix(title, pred_y):
    cm = confusion_matrix(test_y,np.argmax(pred_y, 1))
    #cm = confusion_matrix(test_y, pred_y> 0.5)
    labels_name = list_y
    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]    # 归一化
    plt.imshow(cm, interpolation='nearest')    # 在特定的窗口上显示图像
    plt.title(title)    # 图像标题
    plt.colorbar()
    num_local = np.array(range(len(labels_name)))
    plt.xticks(num_local, labels_name, rotation=90)    # 将标签印在x轴坐标上
    plt.yticks(num_local, labels_name)    # 将标签印在y轴坐标上
    plt.ylabel('True')
    plt.xlabel('Predicted')
    plt.show()
评论 9
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值