基于决策树模型进行分类算法。
import numpy as np
import sklearn.ensemble as se
import sklearn.preprocessing as sp
# 读取样本数据
data = []
with open('data.txt', 'r') as f:
for line in f:
data.append(line[:-1].split(","))
data = np.array(data)
train_x, train_y, encoders = [], [], []
# 创建标签编码器,转换数据为标签编码数据
for index, row in enumerate(data.T):
encoder = sp.LabelEncoder() # 生成标签编码器
if index < (len(data.T) - 1):
train_x.append(encoder.fit_transform(row)) # 转换数据
else:
train_y = encoder.fit_transform(row)
encoders.append(encoder) # 保存标签编码数据到encoders
# 整理数据集
train_x, train_y = np.array(train_x).T, np.array(train_y).astype('f8')
# 训练随机森林分类器
model = se.RandomForestClassifier(max_depth=4, n_estimators=150, random_state=6)
model.fit(train_x, train_y)
# 测试数据输入转换
while True:
x = input("[测试集输入:]").strip()
x = np.array(x.split(","))
x = x.reshape(-1, 1) # 必须转换为n行1列的二维数组
test_x = []
for i, row in enumerate(x): # 0,1,2
encoder = encoders[i]
test_x.append(encoder.transform(row))
test_x = np.array(test_x).T
pred_y = model.predict(test_x)