1.导入相应包
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin
from sklearn.datasets import load_sample_image
from sklearn.utils import shuffle
import pandas as pd
2.加载数据集
china = load_sample_image("china.jpg")
china
3.维度转换并查看颜色种类
newimage = china.reshape(427 * 640, 3)
newimage
pd.DataFrame(newimage).drop_duplicates().shape
输出显示该图片的颜色种类为96615种。
4.绘制原图像
plt.imshow(china)
输出图像如下:
5.建立模型进行聚类(64类)
n_cluster = 64
china = np.array(china, dtype=np.float64) / china.max()
w, h, d = tuple(china.shape)
image_array = np.reshape(china, (w * h, d))
image_array_sample = shuffle(image_array, random_state=0)[: 1000]
kmeans = KMeans(n_clusters=n_cluster, random_state=0).fit(image_array_sample)
kmeans.cluster_centers_
6.将原图像中的颜色通过类别改为对应的颜色(64种)
image_kmeans = image_array.copy()
for i in range(w * h):
#每个像素点
image_kmeans[i] = kmeans.cluster_centers_[labels[i]]
image_kmeans
7.将矩阵转为原始三维图像格式
image_kmeans = image_kmeans.reshape(w, h, d)
8.随机矢量量化
centroid_random = shuffle(image_array, random_state=0)[:n_cluster]
labels_random = pairwise_distances_argmin(centroid_random,image_array,axis=0)
labels_random.shape
len(set(labels_random))
image_random = image_array.copy()
for i in range(w*h):
image_random[i] = centroid_random[labels_random[i]]
image_random = image_random.reshape(w,h,d)
image_random.shape
9.可视化
plt.figure(figsize=(10,10))
plt.axis('off')
plt.title('Original image (96,615 colors)')
plt.imshow(china)
plt.figure(figsize=(10,10))
plt.axis('off')
plt.title('Quantized image (64 colors, K-Means)')
plt.imshow(image_kmeans)
plt.figure(figsize=(10,10))
plt.axis('off')
plt.title('Quantized image (64 colors, Random)')
plt.imshow(image_random)
plt.show()
最终绘制的原图像为:
Kmeans聚类降维压缩后的图像为:
随机矢量量化的图像为:
可以从图像对比中看出Kmeans的图像压缩效果较好。