Classifier
make samples
from sklearn.datasets import make_blobs
X,y = make_blobs(n_samples=500, n_features=3,centers=4,cluster_std=2,center_box=(-10,10,10), shuffle=True, random_state=1)
KNeighborsClassifier
class sklearn.neighbors.KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski', metric_params=None, n_jobs=1, **kwargs)
from sklearn.neighbors import KNeighborsClassifier as KNN
knn = KNN(n_neighbors=3)
knn.firt(X,y)
p = knn.predict_proba(X)
KMeans
from sklearn.cluster import kMeans
clusterer = KMeans(n_clusters=n_clusters, random_state=10)
clusterer = KMeans(n_clusters=n_clusters, random_state=10)
cluster_labels = clusterer.fit_predict(X)
determine the number of cluster
silhouette
from sklearn.metrics import silhouette_samples, silhouette_score
#The silhouette_score gives the average value for all the samples.This gives a perspective into the density and separation of the formed clusters
silhouette_avg = silhouette_score(X, cluster_labels)
#Compute the silhouette scores for each sample
sample_silhouette_values = silhouette_samples(X, cluster_labels)
Finding the K in K-Means Clustering
Using BIC to estimate the number of k in KMEANS