下载文件地址:
链接: https://pan.baidu.com/s/1VdjCSw9MfKZ1WAcaiDyi8w 提取码: wtck
"""
使用随机森林分类器 对汽车进行分类
"""
import numpy as np
import sklearn.preprocessing as sp
import sklearn.ensemble as se
import sklearn.model_selection as ms
# 读取数据
lines = np.loadtxt(r".\car.txt", delimiter=',', dtype='str')
print(lines.shape)
# 整理样本空间 并编码 (对列执行标签编码)
train_x, train_y = [], []
encoders = [] # 标签编码数组
for index, row in enumerate(lines.T):
encoder = sp.LabelEncoder()
if index < (len(lines.T) - 1): # 训练样本 X
train_x.append(encoder.fit_transform(row))
else:
train_y = encoder.fit_transform(row) # 训练样本结果 Y
encoders.append(encoder)
train_x = np.array(train_x).T # 转置
train_y = np.array(train_y)
print(train_x.shape, train_y.shape)
print(train_x[0], train_y[0])
# 随机森林分类模型
model = se.RandomForestClassifier(
max_depth=6, n_estimators=200, random_state=7)
score = ms.cross_val_score(model, train_x, train_y, cv=5, scoring="f1_weighted") # 交叉验证
print(score.mean())
# 训练模型
model.fit(train_x, train_y)
# 自定义测试数据 用已经训练好的模型进行测试
data = [
['high', 'med', '5more', '4', 'big', 'low', 'unacc'],
['high', 'high', '4', '4', 'med', 'med', 'acc'],
['low', 'low', '2', '4', 'small', 'high', 'good'],
['low', 'med', '3', '4', 'med', 'high', 'vgood']]
data = np.array(data)
# 自定义数据集整理 标签编码
test_x, test_y = [], []
for index, row in enumerate(data.T):
encoder = encoders[index] # 使用原保存的标签编码
if index < (len(data.T) - 1):
test_x.append(encoder.transform(row))
else:
test_y = encoder.transform(row)
test_x = np.array(test_x).T
test_y = np.array(test_y)
print("----------------->测试数据\n", test_x.shape, test_y.shape)
print(test_x[0], test_y[0])
# 预测结果
prd_test_y = model.predict(test_x)
print("----------------->预测结果\n", test_y)
print(prd_test_y)
print(encoders[-1].inverse_transform(test_y))
print(encoders[-1].inverse_transform(prd_test_y))