sklearn skmeans 小麦聚类

github地址: github.com/yangjinghit…

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
复制代码
data1 = pd.DataFrame({'X':np.random.randint(1,50,100), 'Y':np.random.randint(1,50, 100)})
复制代码
data = pd.concat([data1 + 50, data1])
复制代码
plt.style.use('ggplot')
复制代码
plt.scatter(data['X'], data['Y'])
复制代码
<matplotlib.collections.PathCollection at 0x11084cf60>
复制代码

from sklearn.cluster import KMeans

复制代码
y_pred = KMeans(n_clusters=2).fit_predict(data)
复制代码
y_pred
复制代码
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1], dtype=int32)
复制代码
plt.scatter(data.X, data.Y, c=y_pred)
复制代码
<matplotlib.collections.PathCollection at 0x1a15f51b70>
复制代码

from sklearn import metrics
复制代码
metrics.calinski_harabaz_score(data, y_pred)
复制代码
651.578135407151
复制代码
y_pred_three = KMeans(n_clusters=3).fit_predict(data)
metrics.calinski_harabaz_score(data, y_pred_three)
复制代码
440.2030577799353
复制代码
y_pred_four = KMeans(n_clusters=4).fit_predict(data)
metrics.calinski_harabaz_score(data, y_pred_four)
复制代码
422.21475785975883
复制代码
data = pd.read_csv('seeds_dataset.txt', header=None, delim_whitespace=True, names=['x1','x2','x3','x4','x5','x6','x7','y'])
复制代码
data
复制代码
x1x2x3x4x5x6x7y
015.2614.840.87105.7633.3122.22105.2201
114.8814.570.88115.5543.3331.01804.9561
214.2914.090.90505.2913.3372.69904.8251
313.8413.940.89555.3243.3792.25904.8051
416.1414.990.90345.6583.5621.35505.1751
514.3814.210.89515.3863.3122.46204.9561
614.6914.490.87995.5633.2593.58605.2191
714.1114.100.89115.4203.3022.70005.0001
816.6315.460.87476.0533.4652.04005.8771
916.4415.250.88805.8843.5051.96905.5331
1015.2614.850.86965.7143.2424.54305.3141
1114.0314.160.87965.4383.2011.71705.0011
1213.8914.020.88805.4393.1993.98604.7381
1313.7814.060.87595.4793.1563.13604.8721
1413.7414.050.87445.4823.1142.93204.8251
1514.5914.280.89935.3513.3334.18504.7811
1613.9913.830.91835.1193.3835.23404.7811
1715.6914.750.90585.5273.5141.59905.0461
1814.7014.210.91535.2053.4661.76704.6491
1912.7213.570.86865.2263.0494.10204.9141
2014.1614.400.85845.6583.1293.07205.1761
2114.1114.260.87225.5203.1682.68805.2191
2215.8814.900.89885.6183.5070.76515.0911
2312.0813.230.86645.0992.9361.41504.9611
2415.0114.760.86575.7893.2451.79105.0011
2516.1915.160.88495.8333.4210.90305.3071
2613.0213.760.86415.3953.0263.37304.8251
2712.7413.670.85645.3952.9562.50404.8691
2814.1114.180.88205.5413.2212.75405.0381
2913.4514.020.86045.5163.0653.53105.0971
...........................
18011.4112.950.85605.0902.7754.95704.8253
18112.4613.410.87065.2363.0174.98705.1473
18212.1913.360.85795.2402.9094.85705.1583
18311.6513.070.85755.1082.8505.20905.1353
18412.8913.770.85415.4953.0266.18505.3163
18511.5613.310.81985.3632.6834.06205.1823
18611.8113.450.81985.4132.7164.89805.3523
18710.9112.800.83725.0882.6754.17904.9563
18811.2312.820.85945.0892.8217.52404.9573
18910.5912.410.86484.8992.7874.97504.7943
19010.9312.800.83905.0462.7175.39805.0453
19111.2712.860.85635.0912.8043.98505.0013
19211.8713.020.87955.1322.9533.59705.1323
19310.8212.830.82565.1802.6304.85305.0893
19412.1113.270.86395.2362.9754.13205.0123
19512.8013.470.88605.1603.1264.87304.9143
19612.7913.530.87865.2243.0545.48304.9583
19713.3713.780.88495.3203.1284.67005.0913
19812.6213.670.84815.4102.9113.30605.2313
19912.7613.380.89645.0733.1552.82804.8303
20012.3813.440.86095.2192.9895.47205.0453
20112.6713.320.89774.9843.1352.30004.7453
20211.1812.720.86805.0092.8104.05104.8283
20312.7013.410.88745.1833.0918.45605.0003
20412.3713.470.85675.2042.9603.91905.0013
20512.1913.200.87835.1372.9813.63104.8703
20611.2312.880.85115.1402.7954.32505.0033
20713.2013.660.88835.2363.2328.31505.0563
20811.8413.210.85215.1752.8363.59805.0443
20912.3013.340.86845.2432.9745.63705.0633

210 rows × 8 columns

data.columns
复制代码
Index(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'y'], dtype='object')
复制代码
y_pred = KMeans(n_clusters=3).fit_predict(data[['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7']])
复制代码
y_pred
复制代码
array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 2,
       2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 0, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2,
       2, 2, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1,
       2, 2, 2, 2, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)
复制代码
y_pre = pd.Series(y_pred)
y_pre
复制代码
0      2
1      2
2      2
3      2
4      2
5      2
6      2
7      2
8      2
9      2
10     2
11     2
12     2
13     2
14     2
15     2
16     0
17     2
18     2
19     0
20     2
21     2
22     2
23     2
24     2
25     2
26     0
27     2
28     2
29     2
      ..
180    0
181    0
182    0
183    0
184    0
185    0
186    0
187    0
188    0
189    0
190    0
191    0
192    0
193    0
194    0
195    0
196    0
197    0
198    0
199    0
200    0
201    2
202    0
203    0
204    0
205    0
206    0
207    0
208    0
209    0
Length: 210, dtype: int32
复制代码
y_pre.unique()
复制代码
array([2, 0, 1])
复制代码
y_pre = y_pre.map({2:1, 1:2, 0:3})
y_pre
复制代码
0      1
1      1
2      1
3      1
4      1
5      1
6      1
7      1
8      1
9      1
10     1
11     1
12     1
13     1
14     1
15     1
16     3
17     1
18     1
19     3
20     1
21     1
22     1
23     1
24     1
25     1
26     3
27     1
28     1
29     1
      ..
180    3
181    3
182    3
183    3
184    3
185    3
186    3
187    3
188    3
189    3
190    3
191    3
192    3
193    3
194    3
195    3
196    3
197    3
198    3
199    3
200    3
201    1
202    3
203    3
204    3
205    3
206    3
207    3
208    3
209    3
Length: 210, dtype: int64
复制代码
data_p = pd.DataFrame({'y_pre':y_pre, 'y':data.y})
复制代码
data_p['acc'] = data_p.y_pre == data_p.y
复制代码
data_p
复制代码
yy_preacc
011True
111True
211True
311True
411True
511True
611True
711True
811True
911True
1011True
1111True
1211True
1311True
1411True
1511True
1613False
1711True
1811True
1913False
2011True
2111True
2211True
2311True
2411True
2511True
2613False
2711True
2811True
2911True
............
18033True
18133True
18233True
18333True
18433True
18533True
18633True
18733True
18833True
18933True
19033True
19133True
19233True
19333True
19433True
19533True
19633True
19733True
19833True
19933True
20033True
20131False
20233True
20333True
20433True
20533True
20633True
20733True
20833True
20933True

210 rows × 3 columns

data_p.acc.sum()/len(data_p)
复制代码
0.8952380952380953
复制代码
  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值