1 数据集介绍:
虹膜
150个实例
萼片长度,萼片宽度,花瓣长度,花瓣宽度
(sepal length, sepal width, petal length and petal width)
三个类别:
Iris setosa, Iris versicolor, Iris virginica.
2. 利用Python的机器学习库sklearn: SkLearnExample.py
from sklearn import neighbors
from sklearn import datasets
#实例化一个KNN分类器
knn = neighbors.KNeighborsClassifier()
#返回一个数据集(iris花类的数据集)
iris = datasets.load_iris()
print(iris)
#fit函数,一般代表建立模型。
knn.fit(iris.data, iris.target)
predictedLabel = knn.predict([[0.1, 0.2, 0.3, 0.4]])
print(predictedLabel)
#数据集由字典构成,key是data,value是一个特征值矩阵。150行代表150个花的实例,每个数组代表花的属性(长度等。。),第二个矩阵代表花的类别矩阵。
{'data': array([[ 5.1, 3.5, 1.4, 0.2],
[ 4.9, 3. , 1.4, 0.2],
[ 4.7, 3.2, 1.3, 0.2],
[ 4.6, 3.1, 1.5, 0.2],
[ 5. , 3.6, 1.4, 0.2],
[ 5.4, 3.9, 1.7, 0.4],
........................
[ 6.2, 3.4, 5.4, 2.3],
[ 5.9, 3. , 5.1, 1.8]]),
'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]),
'feature_names': ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)'],
'target_names': array(['setosa', 'versicolor', 'virginica'],dtype='<U10'), 'DESCR': 'Iris Plants Database\n====================\n\nNotes\n-----\nData Set Characteristics:\n :Number of Instances: 150 (50 in each of three classes)\n
...............................................................................
data.\n - Many, many more ...\n'}