K-means图片处理及降维代码

K-means图片处理

from pylab import imread,imshow,figure,show,subplot
from numpy import reshape,flipud
from sklearn.cluster import KMeans
from copy import deepcopy,copy

img = imread('./data/sample2.jpeg')

# reshape the picture
pixel = reshape(img,(img.shape[0]*img.shape[1],3))
pixel_new = deepcopy(pixel)
print(img.shape)
model = KMeans(n_clusters=10)
labels = model.fit_predict(pixel)
palette = model.cluster_centers_
#print(palette)
for i in range(len(pixel)):
    pixel_new[i,:] =palette[labels[i]]
imshow(reshape(pixel_new,(img.shape[0],img.shape[1],3)))

在这里插入图片描述

基于K-means的用户分层

import pandas as pd
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns

数据

  • 数据里包含了产品的信息,以及用户购买产品的记录。
data_offer = pd.read_excel("./data/WineKMC.xlsx",sheet_name=0)
data_offer.columns = ["offer_id", "campaign", "varietal", "min_qty", "discount", "origin", "past_peak"]
data_offer.head()

在这里插入图片描述

data_transactions = pd.read_excel("./data/WineKMC.xlsx",sheet_name=1)
data_transactions.columns = ["customer_name", "offer_id"]
data_transactions['n']=1
data_transactions.head()

在这里插入图片描述

import numpy as np

#合并两个dataframe
cust_compare = data_transactions.merge(data_offer,on = 'offer_id')

#Drop unnecessary columns
cust_compare = cust_compare.drop(['campaign', 'varietal', 'min_qty', 'discount', 'origin', 'past_peak'], axis = 1)

#Create pivot table
table = pd.pivot_table(cust_compare,index='customer_name',columns='offer_id',aggfunc=np.sum,fill_value=0)
#cust_compare,  index = 'customer_name', columns = 'offer_id', aggfunc=np.sum, fill_value = 0)
table

在这里插入图片描述

from sklearn.cluster import KMeans
#choosing K=5
kmeans_5 = KMeans(n_clusters = 5).fit_predict(table)
points = list(kmeans_5)
d = {x:points.count(x) for x in points}
heights = list(d.values())
plt.bar(range(5),heights)
plt.xlabel('Cluster')
plt.xlabel('# of sample')

在这里插入图片描述

利用降维的方法来可视化样本

from sklearn.decomposition import PCA
pca = PCA(n_components = 2)
data_new = pca.fit_transform(table)

print(table.shape)
print(data_new.shape)

colors = ['r', 'g', 'b', 'y', 'c', 'm']
fig, ax = plt.subplots()

for i in range(5):
        points = np.array([data_new[j] for j in range(len(data_new)) if kmeans_5[j] == i])
        ax.scatter(points[:, 0], points[:, 1], s=7, c=colors[i])

在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值