1.数据搜集
加载ADFA-LD中正常样本数据:
def load_adfa_training_files(rootdir):
x=[]
y=[]
list = os.listdir(rootdir)
for i in range(0, len(list)):
path=os.path.join(rootdir,list[i])
if os.path.isfile(path):
x.append(load_one_file(path))
y.append(0)
return x,y
定义遍历目录下文件的函数:
def dirlist(path, allfile):
filelist = os.listdir(path)
for filename in filelist:
filepath = os.path.join(path,filename)
if os.path.isdir(filepath):
dirlist(filepath,allfile)
else:
allfile.append(filepath)
return allfile
从攻击数据集中筛选出和WebShell相关的数据:
def load_adfa_webshell_files(rootdir):
x=[]
y=[]
allfile=dirlist(rootdir,[])
for file in allfile:
if re.match(r" ..",file):
x.append(load_one_file(file))
y.append(1)
return x,y
2.特征化
x1,y1 = load_adfa_training_file("...")
x2,y2 = load_adfa_webshell_files("...")
x = x1+x2
y = y1+y2
vectorizer = CountVectorizer(min_df=1)
x = vectorizer.fit_transform(x)
x = x.toarray()
3。训练样本与效果验证与(三)一样