多层感知机+聚类算法

最新推荐文章于 2023-06-28 01:36:54 发布

超级小菜鸟+无敌小透明

最新推荐文章于 2023-06-28 01:36:54 发布

阅读量569

点赞数 2

分类专栏：多层感知器和聚类文章标签：算法聚类 sklearn 人工智能

本文链接：https://blog.csdn.net/m0_65523585/article/details/128227422

版权

多层感知器和聚类专栏收录该内容

1 篇文章 0 订阅

订阅专栏

多层感知器

from sklearn.neural_network import MLPClassifier

from sklearn.datasets import load_wine
wine=load_wine()

from sklearn.model_selection import train_test_split

X=wine.data[:,:2]#选取前两个特征进行建模

y=wine.target#类别变量

print(y)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]

#数据可视化
import matplotlib.pyplot as plt
%matplotlib inline
plt.figure(dpi=300)
plt.scatter(X[:,0],X[:,1], c=y,cmap=plt.cm.spring, edgecolor='k')

<matplotlib.collections.PathCollection at 0x1e16e0ccbe0>

在这里插入图片描述

#拆分训练集与测试集
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=8)
#MLPClassifier“三板斧”
mlp = MLPClassifier(random_state=8)
mlp.fit(X_train,y_train)
print(mlp.score(X_train, y_train), mlp.score(X_test, y_test))

0.3870967741935484 0.42592592592592593

mlp = MLPClassifier(hidden_layer_sizes=(100,100,100),random_state=8)
mlp.fit(X_train,y_train)
print(mlp.score(X_train, y_train), mlp.score(X_test, y_test))

0.8064516129032258 0.7777777777777778


C:\Users\yangxiuyun\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:614: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.
  warnings.warn(

mlp = MLPClassifier(hidden_layer_sizes=(100,100),random_state=8)
mlp.fit(X_train,y_train)
print(mlp.score(X_train, y_train), mlp.score(X_test, y_test))

0.7741935483870968 0.7777777777777778


C:\Users\yangxiuyun\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:614: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.
  warnings.warn(

mlp = MLPClassifier(hidden_layer_sizes=(100,100),activation='tanh',random_state=8)
mlp.fit(X_train,y_train)
print(mlp.score(X_train, y_train), mlp.score(X_test, y_test))

0.8145161290322581 0.7592592592592593


C:\Users\yangxiuyun\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:614: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.
  warnings.warn(

mlp = MLPClassifier(hidden_layer_sizes=(100,100),
                    activation='relu',
                    solver='sgd',
                    random_state=8)
mlp.fit(X_train,y_train)
print(mlp.score(X_train, y_train), mlp.score(X_test, y_test))

0.5806451612903226 0.5


C:\Users\yangxiuyun\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:614: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.
  warnings.warn(

MNIST手写数字识别

from sklearn.datasets import fetch_openml
mnist=fetch_openml('mnist_784')

mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

print(mnist.target[:100])

0     5
1     0
2     4
3     1
4     9
     ..
95    0
96    7
97    8
98    3
99    1
Name: class, Length: 100, dtype: category
Categories (10, object): ['0', '1', '2', '3', ..., '6', '7', '8', '9']

#定义自变量和因变量
X=mnist.data/255.#数据标准化
y=mnist.target

X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=8)

mlp = MLPClassifier (hidden_layer_sizes=(100,100),
                     activation='relu',
                     solver='lbfgs',random_state=8)
mlp.fit(X_train, y_train)

MLPClassifier(hidden_layer_sizes=(100, 100), random_state=8, solver='lbfgs')

print("训练集准确率为{:.2%}，测试集准确率为{:.2%}"\
      .format(mlp.score(X_train, y_train),mlp.score(X_test,y_test)))

训练集准确率为100.00%，测试集准确率为97.56%

from PIL import Image

num=Image.open(r"C:/Users/yang'xiu'yun/Desktop/8.jpg")

#调整图片尺寸
num=num.resize((28,28))

#获取图片像素值
arr =[]
for i in range(28):
    for j in range (28):
        pixel = num.getpixel((j,i))[0]
        arr. append(pixel)

import numpy as np

np.array(arr).reshape((28,28))

array([[255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 247, 223, 223,
        239, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 253, 255, 255, 255, 255, 247, 206, 231, 233,
        210, 216, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 221, 238, 255, 255,
        255, 220, 234, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 216, 250, 255, 255,
        255, 235, 230, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 215, 252, 255, 255,
        255, 224, 239, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 216, 248, 255, 255,
        255, 214, 251, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 234, 222, 255, 255,
        250, 215, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 212, 234, 255,
        239, 228, 227, 239, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 253, 210, 233,
        227, 216, 213, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 254, 213,
        173, 215, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 236,
        165, 236, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 247, 208,
        249, 208, 236, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 254, 210, 243,
        255, 251, 208, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 234, 222, 255,
        255, 255, 220, 244, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 221, 245, 255,
        255, 255, 228, 234, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 221, 247, 255,
        255, 255, 236, 229, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 221, 244, 255,
        255, 255, 227, 234, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 232, 227, 255,
        255, 253, 208, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 252, 206, 251,
        248, 210, 234, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 238, 203,
        208, 235, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 253,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255]])

arr =[]
for i in range(28):
    for j in range (28):
        #像素值标准化
        pixel = 1-float(num. getpixel((j,i))[0])/255.
        arr. append (pixel)
#将像素矩阵拉长为一个向量
arr1 = np.array (arr) .reshape(1,-1)

print("该图片的数字识别为:{}".format(mlp. predict(arr1)))

该图片的数字识别为:['8']

MLPClassifier拟合IRIS数据

from sklearn.datasets import load_iris
iris=load_iris()

#对自变量做标准化
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
irisZX = StandardScaler().fit_transform(iris.data)

输入层：4神经元==>隐含层：5神经元==>输出层：3神经元

from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(activation = 'logistic',hidden_layer_sizes = (5),
                    solver = 'lbfgs',random_state = 1)
clf.fit(irisZX,iris.target)
clf.score(irisZX,iris.target)

0.9933333333333333

层次聚类

import pandas as pd

seeds_df=pd.read_csv("C:/Users/yang'xiu'yun/Documents/Tencent Files/2905041081/FileRecv/seeds-less-rows.csv")
seeds_df.head()

	area	perimeter	compactness	length	width	asymmetry_coefficient	groove_length	grain_variety
0	14.88	14.57	0.8811	5.554	3.333	1.018	4.956	Kama wheat
1	14.69	14.49	0.8799	5.563	3.259	3.586	5.219	Kama wheat
2	14.03	14.16	0.8796	5.438	3.201	1.717	5.001	Kama wheat
3	13.99	13.83	0.9183	5.119	3.383	5.234	4.781	Kama wheat
4	14.11	14.26	0.8722	5.520	3.168	2.688	5.219	Kama wheat

seeds_df.grain_variety.value_counts()

Rosa wheat        14
Kama wheat        14
Canadian wheat    14
Name: grain_variety, dtype: int64

varities=list(seeds_df.pop('grain_variety'))
samples=seeds_df.values

samples

array([[14.88  , 14.57  ,  0.8811,  5.554 ,  3.333 ,  1.018 ,  4.956 ],
       [14.69  , 14.49  ,  0.8799,  5.563 ,  3.259 ,  3.586 ,  5.219 ],
       [14.03  , 14.16  ,  0.8796,  5.438 ,  3.201 ,  1.717 ,  5.001 ],
       [13.99  , 13.83  ,  0.9183,  5.119 ,  3.383 ,  5.234 ,  4.781 ],
       [14.11  , 14.26  ,  0.8722,  5.52  ,  3.168 ,  2.688 ,  5.219 ],
       [13.02  , 13.76  ,  0.8641,  5.395 ,  3.026 ,  3.373 ,  4.825 ],
       [15.49  , 14.94  ,  0.8724,  5.757 ,  3.371 ,  3.412 ,  5.228 ],
       [16.2   , 15.27  ,  0.8734,  5.826 ,  3.464 ,  2.823 ,  5.527 ],
       [13.5   , 13.85  ,  0.8852,  5.351 ,  3.158 ,  2.249 ,  5.176 ],
       [15.36  , 14.76  ,  0.8861,  5.701 ,  3.393 ,  1.367 ,  5.132 ],
       [15.78  , 14.91  ,  0.8923,  5.674 ,  3.434 ,  5.593 ,  5.136 ],
       [14.46  , 14.35  ,  0.8818,  5.388 ,  3.377 ,  2.802 ,  5.044 ],
       [11.23  , 12.63  ,  0.884 ,  4.902 ,  2.879 ,  2.269 ,  4.703 ],
       [14.34  , 14.37  ,  0.8726,  5.63  ,  3.19  ,  1.313 ,  5.15  ],
       [16.84  , 15.67  ,  0.8623,  5.998 ,  3.484 ,  4.675 ,  5.877 ],
       [17.32  , 15.91  ,  0.8599,  6.064 ,  3.403 ,  3.824 ,  5.922 ],
       [18.72  , 16.19  ,  0.8977,  6.006 ,  3.857 ,  5.324 ,  5.879 ],
       [18.88  , 16.26  ,  0.8969,  6.084 ,  3.764 ,  1.649 ,  6.109 ],
       [18.76  , 16.2   ,  0.8984,  6.172 ,  3.796 ,  3.12  ,  6.053 ],
       [19.31  , 16.59  ,  0.8815,  6.341 ,  3.81  ,  3.477 ,  6.238 ],
       [17.99  , 15.86  ,  0.8992,  5.89  ,  3.694 ,  2.068 ,  5.837 ],
       [18.85  , 16.17  ,  0.9056,  6.152 ,  3.806 ,  2.843 ,  6.2   ],
       [19.38  , 16.72  ,  0.8716,  6.303 ,  3.791 ,  3.678 ,  5.965 ],
       [18.96  , 16.2   ,  0.9077,  6.051 ,  3.897 ,  4.334 ,  5.75  ],
       [18.14  , 16.12  ,  0.8772,  6.059 ,  3.563 ,  3.619 ,  6.011 ],
       [18.65  , 16.41  ,  0.8698,  6.285 ,  3.594 ,  4.391 ,  6.102 ],
       [18.94  , 16.32  ,  0.8942,  6.144 ,  3.825 ,  2.908 ,  5.949 ],
       [17.36  , 15.76  ,  0.8785,  6.145 ,  3.574 ,  3.526 ,  5.971 ],
       [13.32  , 13.94  ,  0.8613,  5.541 ,  3.073 ,  7.035 ,  5.44  ],
       [11.43  , 13.13  ,  0.8335,  5.176 ,  2.719 ,  2.221 ,  5.132 ],
       [12.01  , 13.52  ,  0.8249,  5.405 ,  2.776 ,  6.992 ,  5.27  ],
       [11.34  , 12.87  ,  0.8596,  5.053 ,  2.849 ,  3.347 ,  5.003 ],
       [12.02  , 13.33  ,  0.8503,  5.35  ,  2.81  ,  4.271 ,  5.308 ],
       [12.44  , 13.59  ,  0.8462,  5.319 ,  2.897 ,  4.924 ,  5.27  ],
       [11.55  , 13.1   ,  0.8455,  5.167 ,  2.845 ,  6.715 ,  4.956 ],
       [11.26  , 13.01  ,  0.8355,  5.186 ,  2.71  ,  5.335 ,  5.092 ],
       [12.46  , 13.41  ,  0.8706,  5.236 ,  3.017 ,  4.987 ,  5.147 ],
       [11.81  , 13.45  ,  0.8198,  5.413 ,  2.716 ,  4.898 ,  5.352 ],
       [11.27  , 12.86  ,  0.8563,  5.091 ,  2.804 ,  3.985 ,  5.001 ],
       [12.79  , 13.53  ,  0.8786,  5.224 ,  3.054 ,  5.483 ,  4.958 ],
       [12.67  , 13.32  ,  0.8977,  4.984 ,  3.135 ,  2.3   ,  4.745 ],
       [11.23  , 12.88  ,  0.8511,  5.14  ,  2.795 ,  4.325 ,  5.003 ]])

%matplotlib inline
from scipy.cluster.hierarchy import linkage,dendrogram
import matplotlib.pyplot as plt
#进行层次聚类
mergings = linkage (samples,method='complete')

mergings

array([[33.        , 36.        ,  0.27162909,  2.        ],
       [21.        , 26.        ,  0.31365739,  2.        ],
       [18.        , 43.        ,  0.32846589,  3.        ],
       [38.        , 41.        ,  0.34657328,  2.        ],
       [19.        , 22.        ,  0.37233454,  2.        ],
       [15.        , 27.        ,  0.38916958,  2.        ],
       [ 4.        , 11.        ,  0.48519909,  2.        ],
       [ 2.        , 13.        ,  0.60220511,  2.        ],
       [23.        , 25.        ,  0.64447995,  2.        ],
       [ 0.        ,  9.        ,  0.66671658,  2.        ],
       [32.        , 37.        ,  0.68359363,  2.        ],
       [39.        , 42.        ,  0.75541297,  3.        ],
       [12.        , 29.        ,  0.76129577,  2.        ],
       [30.        , 34.        ,  0.79066703,  2.        ],
       [24.        , 47.        ,  0.89015184,  3.        ],
       [ 1.        ,  6.        ,  0.96077742,  2.        ],
       [31.        , 45.        ,  0.98956619,  3.        ],
       [16.        , 50.        ,  1.05891757,  3.        ],
       [17.        , 20.        ,  1.11543099,  2.        ],
       [ 8.        , 40.        ,  1.13733735,  2.        ],
       [44.        , 46.        ,  1.1662041 ,  5.        ],
       [ 5.        , 61.        ,  1.28676337,  3.        ],
       [35.        , 52.        ,  1.37690488,  3.        ],
       [48.        , 49.        ,  1.52865125,  4.        ],
       [53.        , 64.        ,  1.66517195,  6.        ],
       [14.        , 56.        ,  1.74234784,  4.        ],
       [51.        , 65.        ,  1.91015424,  6.        ],
       [ 7.        , 57.        ,  1.91749035,  3.        ],
       [28.        , 55.        ,  2.08980038,  3.        ],
       [54.        , 58.        ,  2.13385537,  5.        ],
       [ 3.        , 10.        ,  2.22187038,  2.        ],
       [59.        , 67.        ,  2.31852251,  7.        ],
       [60.        , 62.        ,  2.33686195,  7.        ],
       [68.        , 69.        ,  2.76779035,  9.        ],
       [66.        , 70.        ,  3.13448417,  9.        ],
       [63.        , 71.        ,  3.25744652,  8.        ],
       [73.        , 74.        ,  3.71580316, 14.        ],
       [72.        , 75.        ,  4.68116988, 11.        ],
       [76.        , 77.        ,  5.45789312, 17.        ],
       [78.        , 79.        ,  6.74608427, 25.        ],
       [80.        , 81.        ,  9.61230238, 42.        ]])

#树状图结果
fig = plt.figure(figsize=(15,8))
dendrogram(mergings,
           labels=varities,
           leaf_rotation=90,
           leaf_font_size=6,
)
plt.show ()

在这里插入图片描述

#树状图结果
fig = plt.figure(figsize=(15,20))
dendrogram(mergings,
           labels=varities,
           leaf_rotation=90,
           leaf_font_size=6,
)
plt.show ()

在这里插入图片描述

#树状图结果
fig = plt.figure(figsize=(15,8))
dendrogram(mergings,
           orientation='bottom',
           labels=varities,
           leaf_rotation=90,
           leaf_font_size=6,
)
plt.show ()

在这里插入图片描述

#得到标签结果
from scipy.cluster.hierarchy import fcluster
labels = fcluster(mergings,6,criterion='distance')
df = pd.DataFrame ({'labels':labels, 'varities': varities})
ct = pd.crosstab(df['labels'],df['varities'])
ct

varities	Canadian wheat	Kama wheat	Rosa wheat
labels
1	14	3	0
2	0	0	14
3	0	11	0

DBSCAN聚类

#生成模拟数据集
from sklearn. datasets import make_moons
X,y = make_moons (n_samples = 200,
                  noise = 0.05,
                  random_state=8)

import matplotlib.pyplot as plt

#数据可视化
plt.scatter(X[:,0],X[:,1],c=y,cmap=plt.cm.spring,edgecolors = 'k')

<matplotlib.collections.PathCollection at 0x22e84005b50>

在这里插入图片描述

X1,yl = make_moons(n_samples = 200,
                    noise = 0,
                    random_state=8)
#数据可视化
plt.figure(dpi=300)
plt.scatter(X1[:,0],X1[:,1],c = yl,cmap = plt.cm.spring,edgecolors='k')

<matplotlib.collections.PathCollection at 0x22e84ff1e20>

在这里插入图片描述

from sklearn.cluster import KMeans

#利用K-Means算法聚类
#模型实例化
kmeans = KMeans(n_clusters=2,init='random')
#模型训练与预测
y_km = kmeans.fit_predict(X)

#结果可视化
plt.scatter(X[y_km==0,0],
            X[y_km==0,1],
            c='red',
            marker='o',
            s=40,
            label='cluster 1',
            edgecolor='k')
plt.scatter(X[y_km==1,0],
            X[y_km==1,1],
            c='green',
            marker='s',
            s=40,
            label='cluster 2',
            edgecolor='k')
plt.title(' K-means clustering')
plt.legend()

<matplotlib.legend.Legend at 0x1f554bc9550>

在这里插入图片描述

#利用DBSCAN聚类
#导入DBSCAN模块
from sklearn.cluster import DBSCAN
#模型实例化
db = DBSCAN(eps=0.2,min_samples=5)
#训练模型并预测
y_db = db.fit_predict(X)

#DBSCAN结果可视化
plt.scatter(X[y_db==0,0],
            X[y_db==0,1],
            c='red',
            marker='o',
            s=40,
            label='cluster l',
            edgecolor='k')
plt. scatter(X[y_db==1,0],
             X[y_db==1,1],
             c='green',
             marker='s',
             s=40,
             label='cluster 2',
             edgecolor='k')
plt.title('K-means clustering')
plt.legend()

<matplotlib.legend.Legend at 0x1f556b0df10>

在这里插入图片描述

#DBSCAN
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
dbscan_data=pd.read_csv("C:/Users/yang'xiu'yun/Documents/Tencent Files/2905041081/FileRecv/dbscan_data.txt")
dbscan_data.head()
plt.scatter(dbscan_data['x1'],dbscan_data['x2'])

<matplotlib.collections.PathCollection at 0x1f557e45940>

在这里插入图片描述

import numpy as np
from sklearn.cluster import DBSCAN
db=DBSCAN(eps=0.5,min_samples=100)
db.fit(dbscan_data)

DBSCAN(min_samples=100)

labels6=db.labels_
dbscan_data['cluster_db']=labels6
#colors=np. random.random(100)
colors=np.array(['red','green','blue','yellow','teal','orange','cyan','black','goldenrod','tomato'])
plt.figure(figsize = (15,8))
plt.scatter(dbscan_data['x1'],dbscan_data['x2' ],c=colors[dbscan_data['cluster_db']])

<matplotlib.collections.PathCollection at 0x1f557864580>

在这里插入图片描述

db=DBSCAN(eps=0.2,min_samples=100).fit(dbscan_data)

db

DBSCAN(eps=0.2, min_samples=100)

import numpy as np
from sklearn.cluster import DBSCAN
db=DBSCAN(eps=0.2,min_samples=30)
db.fit(dbscan_data)

DBSCAN(eps=0.2, min_samples=30)

labels6=db.labels_
dbscan_data['cluster_db']=labels6
#colors=np. random.random(100)
colors=np.array(['red','green','blue','yellow','teal','orange','cyan','black','goldenrod','tomato'])
plt.scatter(dbscan_data['x1'],dbscan_data['x2' ],c=colors[dbscan_data['cluster_db']])

<matplotlib.collections.PathCollection at 0x1f5578733d0>

在这里插入图片描述

KMEANS聚类

from sklearn.datasets import make_blobs

from sklearn.cluster import KMeans

#随机生成含150个类别数为3的数据集
X,y = make_blobs(n_samples=150,centers=3,random_state=8)

import matplotlib.pyplot as plt
%matplotlib inline
plt.figure(dpi=300)
plt.scatter(X[:,0],X[:,1],c = y,cmap = plt.cm.spring,edgecolors='k')

<matplotlib.collections.PathCollection at 0x292fd30cca0>

在这里插入图片描述

kmeans=KMeans(n_clusters=3)

kmeans.fit(X)

KMeans(n_clusters=3)

kmeans.cluster_centers_#查看聚类中心的坐标

array([[ 7.51338019,  9.44881625],
       [-5.43790266, -9.83963795],
       [ 7.21711781,  0.68887741]])

kmeans.labels_#查看数据点的标签

array([2, 1, 0, 2, 0, 0, 1, 0, 1, 0, 0, 1, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2,
       1, 2, 0, 1, 2, 0, 0, 0, 2, 0, 1, 1, 2, 1, 2, 0, 0, 1, 1, 0, 0, 0,
       1, 1, 2, 2, 2, 0, 1, 1, 0, 0, 2, 1, 0, 1, 0, 2, 2, 1, 1, 2, 2, 1,
       2, 1, 1, 2, 2, 2, 2, 0, 2, 1, 1, 0, 2, 2, 2, 0, 0, 0, 1, 1, 0, 2,
       0, 1, 1, 0, 0, 1, 2, 0, 2, 0, 1, 1, 2, 2, 2, 1, 0, 0, 1, 0, 2, 1,
       0, 2, 2, 0, 1, 2, 0, 1, 1, 0, 0, 0, 0, 2, 2, 2, 0, 1, 1, 1, 2, 0,
       2, 0, 1, 2, 2, 2, 2, 2, 2, 0, 1, 0, 0, 1, 1, 0, 0, 1])

kmeans.n_iter_#查看运行时迭代的次数

kmeans.inertia_#簇内误差平方和

329.1821351189805

plt.plot(range(1,20),distortion,marker='o')
plt.xticks(range(1,20))
plt.grid(linestyle='--',)
plt.xlabel('Number of clusters')
plt.ylabel("Distortion")

Text(0, 0.5, 'Distortion')

在这里插入图片描述

轮廓系数

#计算并绘制轮廓系数图
km = KMeans(n_clusters=3)
y_km = km.fit_predict(X)
from matplotlib import cm
from sklearn.metrics import silhouette_samples
cluster_labels = np.unique(y_km)
n_clusters = cluster_labels.shape[0]

y_ax_lower, y_ax_upper = 0,0
yticks = []
for i ,c in enumerate(cluster_labels) :
    c_silhouette_vals = silhouette_vals[y_km== c]
    c_silhouette_vals.sort()
    y_ax_upper += len(c_silhouette_vals)
    color = cm.jet(i/n_clusters)
    plt.barh(range(y_ax_lower,y_ax_upper),
             c_silhouette_vals,
             height=1.0,
             edgecolor='none',
             color=color)
    yticks.append((y_ax_lower+y_ax_upper)/2)
    y_ax_lower += len(c_silhouette_vals)
silhouette_avg = np. mean(silhouette_vals)
plt.axvline(silhouette_avg,
            color='red',
            linestyle='--')
plt.ylabel("Cluster")
plt.xlabel("Silhouette coefficient")

Text(0.5, 0, 'Silhouette coefficient')

在这里插入图片描述

#计算并绘制轮廓系数图k=2
km = KMeans(n_clusters=2)
y_km = km.fit_predict(X)
from matplotlib import cm
from sklearn.metrics import silhouette_samples
cluster_labels = np.unique(y_km)
n_clusters = cluster_labels.shape[0]
y_ax_lower, y_ax_upper = 0,0
yticks = []
for i ,c in enumerate(cluster_labels) :
    c_silhouette_vals = silhouette_vals[y_km== c]
    c_silhouette_vals.sort()
    y_ax_upper += len(c_silhouette_vals)
    color = cm.jet(i/n_clusters)
    plt.barh(range(y_ax_lower,y_ax_upper),
             c_silhouette_vals,
             height=1.0,
             edgecolor='none',
             color=color)
    yticks.append((y_ax_lower+y_ax_upper)/2)
    y_ax_lower += len(c_silhouette_vals)
silhouette_avg = np. mean(silhouette_vals)
plt.axvline(silhouette_avg,
            color='red',
            linestyle='--')
plt.ylabel("Cluster")
plt.xlabel("Silhouette coefficient")