学习使用决策树和随机森林算法检测暴力破解

1.使用决策树算法检测POP3暴力破解

# -*- coding:utf-8 -*-

import re
import matplotlib.pyplot as plt
import os
from sklearn.feature_extraction.text import CountVectorizer
from sklearn import cross_validation
import os
from sklearn.datasets import load_iris
from sklearn import tree
import pydotplus

#加载KDD99数据集中的数据 
def load_kdd99(filename):
    x=[]
    with open(filename) as f:
        for line in f:
            line=line.strip('\n')
            line=line.split(',')
            x.append(line)
    return x

def get_guess_passwdandNormal(x):
    v=[]
    w=[]
    y=[]
#筛选标记为guess-password和normal且是POP3协议的数据
    for x1 in x:
        if ( x1[41] in ['guess_passwd.','normal.'] ) and ( x1[2] == 'pop_3' ):
            if x1[41] == 'guess_passwd.':
                y.append(1)
            else:
                y.append(0)
#挑选与POP3密码破解相关的网络特征以及TCP协议中的特征作为样本特征
            x1 = [x1[0]] + x1[4:8]+x1[22:30]
            v.append(x1)

    for x1 in v :
        v1=[]
        for x2 in x1:
            v1.append(float(x2))
        w.append(v1)
    return w,y

if __name__ == '__main__':
    v=load_kdd99("/Users/zhanglipeng/Data/kdd99/corrected")
    x,y=get_guess_passwdandNormal(v)
#实例化决策树算法
    clf = tree.DecisionTreeClassifier()
#使用十折交叉验证法
    print  cross_validation.cross_val_score(clf, x, y, n_jobs=-1, cv=10)

    clf = clf.fit(x, y)
    dot_data = tree.export_graphviz(clf, out_file=None)
    graph = pydotplus.graph_from_dot_data(dot_data)
    graph.write_pdf("/Users/zhanglipeng/Data/iris-dt.pdf")

运行结果:

(base) zhanglipengdeMacBook-Pro:WSaL zhanglipeng$ python TreeFTP.py

/anaconda2/lib/python2.7/site-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.

  "This module will be removed in 0.20.", DeprecationWarning)

[0.90463215 1.         1.         1.         1.         1.

1.         1.         1.         1.        ]

可知准确率为90.5%

2.使用决策树算法检测FTP暴力破解

原代码:

# -*- coding:utf-8 -*-

import re
import matplotlib.pyplot as plt
import os
from sklearn.feature_extraction.text import CountVectorizer
from sklearn import cross_validation
import os
from sklearn.datasets import load_iris
from sklearn import tree
import pydotplus


def load_one_flle(filename):
    x=[]
    with open(filename) as f:
        line=f.readline()
        line=line.strip('\n')
    return line

def load_adfa_training_files(rootdir):
    x=[]
    y=[]
    list = os.listdir(rootdir)
    for i in range(0, len(list)):
        path = os.path.join(rootdir, list[i])
        if os.path.isfile(path):
            x.append(load_one_flle(path))
            y.append(0)
    return x,y

def dirlist(path, allfile):
    filelist = os.listdir(path)
    
    for filename in filelist:
        filepath = os.path.join(path, filename)
        if os.path.isdir(filepath):
            dirlist(filepath, allfile)
        else:
            allfile.append(filepath)
    return allfile

def load_adfa_hydra_ftp_files(rootdir):
    x=[]
    y=[]
    allfile=dirlist(rootdir,[])
    for file in allfile:
        if re.match(r"../data/ADFA-LD/Attack_Data_Master/Hydra_FTP_\d+/UAD-Hydra-FTP*",file):
            x.append(load_one_flle(file))
            y.append(1)
    return x,y



if __name__ == '__main__':
    x1,y1=load_adfa_training_files("/Users/zhanglipeng/Data/ADFA-LD/Training_Data_Master/")
    x2,y2=load_adfa_hydra_ftp_files("/Users/zhanglipeng/Data/ADFA-LD/Attack_Data_Master/")
    
    x=x1+x2
    y=y1+y2
    #print x
    vectorizer = CountVectorizer(min_df=1)
    x=vectorizer.fit_transform(x)
    x=x.toarray()
    #print y
    clf = tree.DecisionTreeClassifier()
    print  cross_validation.cross_val_score(clf, x, y, n_jobs=-1, cv=10)
    
    
    clf = clf.fit(x, y)
    dot_data = tree.export_graphviz(clf, out_file=None)
    graph = pydotplus.graph_from_dot_data(dot_data)
    graph.write_pdf("/Users/zhanglipeng/Data/photo/6/ftp.pdf")

报错:

Traceback (most recent call last):

  File "TreeFTP2.py", line 4, in <module>

    import matplotlib.pyplot as plt

  File "/anaconda2/envs/python27/lib/python2.7/site-packages/matplotlib/pyplot.py", line 115, in <module>

    _backend_mod, new_figure_manager, draw_if_interactive, _show = pylab_setup()

  File "/anaconda2/envs/python27/lib/python2.7/site-packages/matplotlib/backends/__init__.py", line 62, in pylab_setup

    [backend_name], 0)

  File "/anaconda2/envs/python27/lib/python2.7/site-packages/matplotlib/backends/backend_macosx.py", line 17, in <module>

    from matplotlib.backends import _macosx

RuntimeError: Python is not installed as a framework. The Mac OS X backend will not be able to function correctly if Python is not installed as a framework. See the Python documentation for more information on installing Python as a framework on Mac OS X. Please either reinstall Python as a framework, or try one of the other backends. If you are using (Ana)Conda please install python.app and replace the use of 'python' with 'pythonw'. See 'Working with Matplotlib on OSX' in the Matplotlib FAQ for more information.

import matplotlib.pyplot as plt 改成下面的

import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt

出现新的报错:

(python27) zhanglipengdeMacBook-Pro:WSaL zhanglipeng$ python TreeFTP2.py

Traceback (most recent call last):

  File "TreeFTP2.py", line 9, in <module>

    from sklearn import cross_validation

ImportError: cannot import name cross_validation

解决方法可以看:https://blog.csdn.net/sinat_17697111/article/details/84835873

出现新的报错:

(python27) zhanglipengdeMacBook-Pro:WSaL zhanglipeng$ python TreeFTP2.py

Traceback (most recent call last):

  File "TreeFTP2.py", line 13, in <module>

    import pydotplus

ImportError: No module named pydotplus

conda命令安装一下就可以了。

 

最后的代码为:

# -*- coding:utf-8 -*-

import re
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import os
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split

import os
from sklearn.datasets import load_iris
from sklearn import tree
import pydotplus


def load_one_flle(filename):
    x=[]
    with open(filename) as f:
        line=f.readline()
        line=line.strip('\n')
    return line

def load_adfa_training_files(rootdir):
    x=[]
    y=[]
    list = os.listdir(rootdir)
    for i in range(0, len(list)):
        path = os.path.join(rootdir, list[i])
        if os.path.isfile(path):
            x.append(load_one_flle(path))
            y.append(0)
    return x,y

def dirlist(path, allfile):
    filelist = os.listdir(path)
    
    for filename in filelist:
        filepath = os.path.join(path, filename)
        if os.path.isdir(filepath):
            dirlist(filepath, allfile)
        else:
            allfile.append(filepath)
    return allfile

def load_adfa_hydra_ftp_files(rootdir):
    x=[]
    y=[]
    allfile=dirlist(rootdir,[])
    for file in allfile:
        if re.match(r"../data/ADFA-LD/Attack_Data_Master/Hydra_FTP_\d+/UAD-Hydra-FTP*",file):
            x.append(load_one_flle(file))
            y.append(1)
    return x,y



if __name__ == '__main__':
    x1,y1=load_adfa_training_files("/Users/zhanglipeng/Data/ADFA-LD/Training_Data_Master/")
    x2,y2=load_adfa_hydra_ftp_files("/Users/zhanglipeng/Data/ADFA-LD/Attack_Data_Master/")
    
    x=x1+x2
    y=y1+y2
    #print x
    vectorizer = CountVectorizer(min_df=1)
    x=vectorizer.fit_transform(x)
    x=x.toarray()
    #print y
    clf = tree.DecisionTreeClassifier()
    print  cross_val_score(clf, x, y, n_jobs=-1, cv=10)
    
    
    clf = clf.fit(x, y)
    dot_data = tree.export_graphviz(clf, out_file=None)
    graph = pydotplus.graph_from_dot_data(dot_data)
    graph.write_pdf("/Users/zhanglipeng/Data/ftp.pdf")

3.使用随机森林算法检测FTP暴力破解

# -*- coding:utf-8 -*-

import re
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import os
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
import os
from sklearn.datasets import load_iris
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
import numpy as np


def load_one_flle(filename):
    x=[]
    with open(filename) as f:
        line=f.readline()
        line=line.strip('\n')
    return line

def load_adfa_training_files(rootdir):
    x=[]
    y=[]
    list = os.listdir(rootdir)
    for i in range(0, len(list)):
        path = os.path.join(rootdir, list[i])
        if os.path.isfile(path):
            x.append(load_one_flle(path))
            y.append(0)
    return x,y

def dirlist(path, allfile):
    filelist = os.listdir(path)

    for filename in filelist:
        filepath = os.path.join(path, filename)
        if os.path.isdir(filepath):
            dirlist(filepath, allfile)
        else:
            allfile.append(filepath)
    return allfile

def load_adfa_hydra_ftp_files(rootdir):
    x=[]
    y=[]
    allfile=dirlist(rootdir,[])
    for file in allfile:
        if re.match(r"/Users/zhanglipeng/Data/ADFA-LD/Attack_Data_Master/Hydra_FTP_\d+/UAD-Hydra-FTP*",file):
            x.append(load_one_flle(file))
            y.append(1)
    return x,y



if __name__ == '__main__':

    x1,y1=load_adfa_training_files("/Users/zhanglipeng/Data/ADFA-LD/Training_Data_Master/")
    x2,y2=load_adfa_hydra_ftp_files("/Users/zhanglipeng/Data/ADFA-LD/Attack_Data_Master/")

    x=x1+x2
    y=y1+y2
    #print x
    vectorizer = CountVectorizer(min_df=1)
    x=vectorizer.fit_transform(x)
    x=x.toarray()
    #print y
    clf1 = tree.DecisionTreeClassifier()
    score=cross_val_score(clf1, x, y, n_jobs=-1, cv=10)
    print  np.mean(score)
    clf2 = RandomForestClassifier(n_estimators=10, max_depth=None,min_samples_split=2, random_state=0)
    score=cross_val_score(clf2, x, y, n_jobs=-1, cv=10)
    print  np.mean(score)

和上面的程序一样,输出结果(用红色标出)的同时也报了错:

/anaconda2/envs/python27/lib/python2.7/site-packages/sklearn/externals/joblib/externals/loky/backend/semlock.py:217: RuntimeWarning: semaphore are broken on OSX, release might increase its maximal value

  "increase its maximal value", RuntimeWarning)

/anaconda2/envs/python27/lib/python2.7/site-packages/sklearn/externals/joblib/externals/loky/backend/semlock.py:217: RuntimeWarning: semaphore are broken on OSX, release might increase its maximal value

  "increase its maximal value", RuntimeWarning)

/anaconda2/envs/python27/lib/python2.7/site-packages/sklearn/externals/joblib/externals/loky/backend/semlock.py:217: RuntimeWarning: semaphore are broken on OSX, release might increase its maximal value

  "increase its maximal value", RuntimeWarning)

/anaconda2/envs/python27/lib/python2.7/site-packages/sklearn/externals/joblib/externals/loky/backend/semlock.py:217: RuntimeWarning: semaphore are broken on OSX, release might increase its maximal value

  "increase its maximal value", RuntimeWarning)

/anaconda2/envs/python27/lib/python2.7/site-packages/sklearn/externals/joblib/externals/loky/backend/semlock.py:217: RuntimeWarning: semaphore are broken on OSX, release might increase its maximal value

  "increase its maximal value", RuntimeWarning)

/anaconda2/envs/python27/lib/python2.7/site-packages/sklearn/externals/joblib/externals/loky/backend/semlock.py:217: RuntimeWarning: semaphore are broken on OSX, release might increase its maximal value

  "increase its maximal value", RuntimeWarning)

0.9678470847084707

/anaconda2/envs/python27/lib/python2.7/site-packages/sklearn/externals/joblib/externals/loky/backend/semlock.py:217: RuntimeWarning: semaphore are broken on OSX, release might increase its maximal value

  "increase its maximal value", RuntimeWarning)

/anaconda2/envs/python27/lib/python2.7/site-packages/sklearn/externals/joblib/externals/loky/backend/semlock.py:217: RuntimeWarning: semaphore are broken on OSX, release might increase its maximal value

  "increase its maximal value", RuntimeWarning)

/anaconda2/envs/python27/lib/python2.7/site-packages/sklearn/externals/joblib/externals/loky/backend/semlock.py:217: RuntimeWarning: semaphore are broken on OSX, release might increase its maximal value

  "increase its maximal value", RuntimeWarning)

0.9838684868486848

/anaconda2/envs/python27/lib/python2.7/site-packages/sklearn/externals/joblib/externals/loky/backend/semlock.py:217: RuntimeWarning: semaphore are broken on OSX, release might increase its maximal value

  "increase its maximal value", RuntimeWarning)

/anaconda2/envs/python27/lib/python2.7/site-packages/sklearn/externals/joblib/externals/loky/backend/semlock.py:217: RuntimeWarning: semaphore are broken on OSX, release might increase its maximal value

  "increase its maximal value", RuntimeWarning)

/anaconda2/envs/python27/lib/python2.7/site-packages/sklearn/externals/joblib/externals/loky/backend/semlock.py:217: RuntimeWarning: semaphore are broken on OSX, release might increase its maximal value

  "increase its maximal value", RuntimeWarning)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

东方隐侠-千里

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值