from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=2).fit(X)
kmeans.predict(X)
n_clusters :
The number of clusters to form as well as the number of centroids to generate.#分为几类
n_init :
Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia.#初始次数
max_iter :
Maximum number of iterations of the k-means algorithm for a single run.#迭代次数
##使用 3 个特征聚类
feature_1 = "salary"
feature_2 = "exercised_stock_options"
feature_3 = "total_payments"
poi = "poi"
features_list = [poi, feature_1, feature_2, feature_3]
data = featureFormat(data_dict, features_list )
poi, finance_features = targetFeatureSplit( data )
from sklearn.cluster import KMeans
pred =[]
pred = KMeans(n_clusters=2).fit_predict(finance_features)
### in the "clustering with 3 features" part of the mini-project,
### you'll want to change this line to
### for f1, f2, _ in finance_features:
### (as it's currently written, the line below assumes 2 features)
for f1, f2, f3 in finance_features:
plt.scatter( f1, f2, f3 )
plt.show()
##股票期权范围
import numpy as np
stocklist=[]
for item in data_dict:
stock = data_dict[item]['exercised_stock_options']
if stock != 'NaN':
stocklist.append(stock)
stocklist = np.array(stocklist)
print "max:",np.max(stocklist)
print "min:",np.min(stocklist)
##薪酬范围
salarylist=[]
for item in data_dict:
stock = data_dict[item]['salary']
if stock != 'NaN':
salarylist.append(stock)
salarylist = np.array(salarylist)
print "max:",np.max(salarylist)
print "min:",np.min(salarylist)