最近老师的项目需要做一个安全评估模型,想到以后可能会用到,所以在此也记录一下。主要是根据已知标签的数据来预测后面数据的标签值。这里的标签是0或1,相当于二分类问题,所以我这里使用神经网络来训练模型,用的是python中的sklearn这个包。数据如下:
3,4,7,4,5,2,0,1,1
1,3,7,4,5,2,0,0.3,0
6,4,5,4,5,2,0,1,1
5,3,5,4,5,2,0,0.3,0
5,4,7,7,5,2,0,0.5,1
6,4,5,7,5,2,0,0.5,0
4,8,7,4,7,2,75,0.5,1
3,8,6,4,2,1,0,1,1
3,8,6,4,2,1,0,1,1
3,8,6,4,2,1,0,1,0
8,4,7,7,5,1,0,0.2,1
9,3,5,9,3,1,0,0.01,0
9,8,6,4,2,1,0,1,1
9,3,5,9,3,1,0,0.01,0
9,8,6,4,2,1,0,1,1
9,8,6,8,4,2,0,0.11,0
9,8,5,8,4,2,0,0.11,1
5,6,7,8,5,2,0,0.2,1
6,6,7,8,5,2,0,0.2,0
4,4,5,4,5,1,0,0.11,0
3,8,6,4,2,1,0,1,1
3,8,6,4,2,1,0,1,1
3,8,7,4,7,2,75,0.5,1
当然,这里数据每个属性的值是由具体的物理意义的,实际使用中的数据非常多。代码如下:
# coding: utf-8
# 导入需要的包
from sklearn.neural_network import MLPClassifier
import pandas as pd
import numpy as np
#获取数据函数,返回数据属性列表和标签列表
def read_data():
f = open('rfid_data.csv') #读取文件数据,路径可能需要修改
df = pd.read_csv(f)
data_x = df.iloc[:, :8].values
data_y = df.iloc[:,8].values
normalized_data = (data_x - np.mean(data_x, axis=0))/np.std(data_x,axis=0)
x_list = []
y_list = []
for i in range(len(normalized_data)):
x = normalized_data[i,:]
y = data_y[i]
x_list.append(list(x))
y_list.append(y)
return x_list, y_list
#结果评估函数
def evaluate(array):
system_score = np.mean(array)
if system_score <= 0.1:
print 'the system is safe.'
elif 0.1 <system_score <= 0.2:
print 'the system is safer.'
elif 0.2 <system_score <= 0.5:
print 'the system is medium security.'
elif 0.5 <system_score <= 0.8:
print 'the system is low security.'
elif system_score > 0.8:
print 'the system is insecurity.'
if __name__ == '__main__':
X, y = read_data()
train_x = X[0:20]
train_y = y[0:20]
test_x = X[20:]
test_y = y[20:]
# 运用神经网络模型训练
clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 1), random_state=1)
clf.fit(train_x,train_y)
prediction = clf.predict(test_x)
evaluate(prediction)