"""
酒的分类
"""
from sklearn.datasets import load_wine
import numpy as np
# 从sklearn的datasets模块载入数据集
wine_dataset = load_wine()
"""
使用load_wine函数载入的酒数据集,是一种bunch对象 key:value
"""
# 打印酒数据集中的键
print("红酒数据集中的键:\n{}".format(wine_dataset.keys()))
# 使用.shape打印数据的概况
print("数据概况:{}".format(wine_dataset['data'].shape))
# 更多细节通过打印DESCR键来获得
print(wine_dataset['DESCR'])
红酒数据集中的键:
dict_keys([‘data’, ‘target’, ‘target_names’, ‘DESCR’, ‘feature_names’])
数据概况:(178, 13)
… _wine_dataset:
Wine recognition dataset
Attribute Information:
- Alcohol
- Malic acid
- Ash
- Alcalinity of ash
- Magnesium
- Total phenols
- Flavanoids
- Nonflavanoid phenols
- Proanthocyanins
- Color intensity
- Hue
- OD280/OD315 of diluted wines
- Proline
class:
- class_0
- class_1
- class_2
省略一部分描述
# 生成训练数据集和测试数据集
"""
train_test_split函数,用来帮助用户把数据集拆分
先随机排列,默认情况75%训练数据,25% 测试数据
"""
# 导入数据集拆分工具
from sklearn.model_selection import train_test_split
# 将数据集拆分为训练数据集和测试数据集
"""
train_test_split会生成一个伪随机数用来拆分数据集,通过固定random_state的值,会一直生成相同随机数
当值设为0或缺省时,则每次随机数都不同
"""
X_train, X_test, y_train, y_test = train_test_split(
wine_dataset['data'], wine_dataset['target'], random_state=0
)
# 打印训练集中特征向量和目标的形态
print('X_train shape:{}'.format(X_train.shape))
print('X_test shape:{}'.format(X_test.shape))
print('y_train shape:{}'.format(y_train.shape))
print('y_test shape:{}'.format(y_test.shape))
X_train shape:(133, 13)
X_test shape:(45, 13)
y_train shape:(133,)
y_test shape:(45,)
# 使用K最近邻算法进行建模
# 导入KNN分类模型
from sklearn.neighbors import KNeighborsClassifier
# 指定模型的n_neighbors参数值为1
knn = KNeighborsClassifier(n_neighbors=1)
# 对数据进行拟合
knn.fit(X_train, y_train)
print(knn)
# 使用模型对新样本的分类进行预测
# 打印模型得分
print("测试数据集得分:{:.2f}".format(knn.score(X_test, y_test)))
# 用建好的模型对新酒进行分类预测
X_new = np.array([[13.2, 2.77, 2.51, 18.5, 96.6, 1.04, 2.55, 0.57, 1.47, 6.2, 1.05, 3.33, 820]])
# 使用.predict进行预测
prediction = knn.predict(X_new)
print('预测新红酒的分类为:{}'.format(wine_dataset['target_names'][prediction]))
KNeighborsClassifier(algorithm=‘auto’, leaf_size=30, metric=‘minkowski’,
metric_params=None, n_jobs=None, n_neighbors=1, p=2,
weights=‘uniform’)
测试数据集得分:0.76
预测新红酒的分类为:[‘class_2’]