基于TensorFlow2实现MalConv恶意软件检测

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Embedding, Conv1D, multiply, GlobalMaxPool1D, Input, Activation
import tensorflow as tf
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import datetime
%matplotlib inline
import csv
import os
import struct
from sklearn.model_selection import train_test_split

定义一维卷积神经网络

def Malconv(max_len=2000000, win_size=500, vocab_size=256):
    inp = Input((max_len,))
    emb = Embedding(vocab_size, 8)(inp)

    conv1 = Conv1D(kernel_size=(win_size), filters=128, strides=(win_size), padding='same')(emb)
    conv2 = Conv1D(kernel_size=(win_size), filters=128, strides=(win_size), padding='same')(emb)
    a = Activation('sigmoid', name='sigmoid')(conv2)

    mul = multiply([conv1, a])
    a = Activation('relu', name='relu')(mul)
    p = GlobalMaxPool1D()(a)
    d = Dense(64)(p)
    out = Dense(1, activation='sigmoid')(d)

    return Model(inp, out)

定义变量

mal_file_dir = 'E:\malware1500'
ben_file_dir = 'E:\PeBenFileForEXE'
ben_max_sum = 1000
mal_max_sum = 1000

遍历PE文件夹中的恶意与良性软件,做好标签存入CSV

def gen_csv():
    rows = []
    mallist = os.listdir(mal_file_dir)
    mal_num = 0
    for a in mallist:
        abspath = mal_file_dir+os.sep+a
        if(os.path.getsize(abspath)<2000000):
    #         print(abspath,os.path.getsize(abspath))
            rows.append((a,'1'))
            mal_num = mal_num + 1
        if mal_num>=mal_max_sum:
            break;
    benlist = os.listdir(ben_file_dir)
    ben_num = 0
    for b in benlist:
        abspath = ben_file_dir+os.sep+b
        if(os.path.getsize(abspath)<2000000):
    #         print(abspath,os.path.getsize(abspath))
            rows.append((b,'0'))
            ben_num = ben_num + 1
        if ben_num>=ben_max_sum:
            break;
    print(ben_num,mal_num)
    headers = ['filename','label']
    with open('MalAndBenFile.csv','w',encoding='utf8',newline='') as f :
        writer = csv.writer(f)
        writer.writerow(headers)
        writer.writerows(rows)

执行获取CSV

gen_csv()

字节填充

def fill_list(my_list: list, length, fill=100): # 使用 fill字符/数字 填充,使得最后的长度为 length
#     print(len(my_list))
    if len(my_list) >= length:
        return my_list
    else:
        return my_list + (length - len(my_list)) * [fill]

获取数据

data = pd.read_csv('./MalAndBenFile.csv')

在这里插入图片描述
PE文件转字节列表

def read_bin(file_name):
    """
    function: read a bin file, return the list of the content in file
    """
    with open(file_name, "rb") as f:
        f_content = f.read()
        content = struct.unpack("B" * len(f_content), f_content)
        f.close()
    return list(content)

构造训练与测试数据集

data = data.to_numpy()
new_data = []
new_label= []

for i in data:
    if i[1] == 0:
        p = ben_file_dir+os.sep+i[0]
        new_data.append(fill_list(read_bin(p),2000000))
        new_label.append(0)
    elif i[1] == 1:
        p = mal_file_dir+os.sep+i[0]
        new_data.append(fill_list(read_bin(p),2000000))
        new_label.append(1)
new_data = np.array(new_data,dtype='int32')
new_label = np.array(new_label,dtype='int32')
x_train, x_test, y_train, y_test = train_test_split(new_data,new_label,test_size=0.25)

x_train = tf.cast(x_train, tf.int32)
x_test = tf.cast(x_test, tf.int32)
y_train = tf.cast(y_train, tf.int32)
y_test = tf.cast(y_test, tf.int32)

# 标签one-hot处理(可用可不用)
# train_labels = tf.keras.utils.to_categorical(train_labels,num_classes=10) 
# test_labels = tf.keras.utils.to_categorical(test_labels,num_classes=10)

### 创建Dataset
dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(2000).batch(32)
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

模型训练

tf.keras.backend.clear_session()
model = Malconv()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
model.fit(dataset,validation_data=test_dataset, epochs=10)
Epoch 1/10
47/47 [==============================] - 321s 7s/step - loss: 0.4718 - acc: 0.7947 - val_loss: 0.2674 - val_acc: 0.8700
Epoch 2/10
47/47 [==============================] - 320s 7s/step - loss: 0.1739 - acc: 0.9353 - val_loss: 0.1817 - val_acc: 0.9260
Epoch 3/10
47/47 [==============================] - 321s 7s/step - loss: 0.0577 - acc: 0.9833 - val_loss: 0.1847 - val_acc: 0.9220
Epoch 4/10
47/47 [==============================] - 322s 7s/step - loss: 0.0191 - acc: 0.9973 - val_loss: 0.2146 - val_acc: 0.9340
Epoch 5/10
47/47 [==============================] - 321s 7s/step - loss: 0.0035 - acc: 1.0000 - val_loss: 0.2066 - val_acc: 0.9280
Epoch 6/10
47/47 [==============================] - 322s 7s/step - loss: 0.0015 - acc: 1.0000 - val_loss: 0.2175 - val_acc: 0.9320
Epoch 7/10
47/47 [==============================] - 320s 7s/step - loss: 9.3740e-04 - acc: 1.0000 - val_loss: 0.2352 - val_acc: 0.9360
Epoch 8/10
47/47 [==============================] - 320s 7s/step - loss: 6.8522e-04 - acc: 1.0000 - val_loss: 0.2359 - val_acc: 0.9360
Epoch 9/10
47/47 [==============================] - 321s 7s/step - loss: 5.1924e-04 - acc: 1.0000 - val_loss: 0.2400 - val_acc: 0.9360
Epoch 10/10
47/47 [==============================] - 321s 7s/step - loss: 4.1028e-04 - acc: 1.0000 - val_loss: 0.2508 - val_acc: 0.9360

模型保存

model.save("./MalConvTF2.h5")
  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值