问题描述
收集图片,分成N簇。
本人使用3簇
!!! 图片的尺寸要一致 !!!
解决方案
- 下载PCV
- 将里面的
PCV
文件夹复制粘贴到以下代码同一文件夹 - 聚类
IPython代码
# -*- coding: utf-8 -*-
import pickle
from pylab import *
from PIL import Image
from scipy.cluster.vq import *
from PCV.tools import pca, imtools
PCA_MODE = 'pca_mode.pkl' # 保存的PCA文件
IMG_TRAIN_LIST = './train/' # 用于计算PCA的图片列表
IMG_CLUSTER_LIST = './train/' # 用于聚类的图片列表
CLUSTER = 3 # 簇数
IMG_WIDTH, IMG_HEIGHT = 1280, 720 # 图片尺寸
# 获取训练图像列表
imlist = imtools.get_imlist(IMG_TRAIN_LIST)
im = array(Image.open(imlist[0])) # 打开任意一张图片
m, n = im.shape[:2] # 图片尺寸(要都统一)
imnbr = len(imlist) # 图片数量
print('The number of training images is {}'.format(imnbr))
immatrix = array([array(Image.open(imname)).flatten() for imname in imlist], 'f') # 拼接所有图片
# 保存均值和主成分
print('Training...')
V, S, immean = pca.pca(immatrix) # PCA降维
f = open(PCA_MODE, 'wb')
pickle.dump(immean, f)
pickle.dump(V, f)
f.close()
print('{} had saved'.format(PCA_MODE))
The number of training images is 60
Training…
pca_mode.pkl had saved
# 获取聚类图像列表
imlist = imtools.get_imlist(IMG_CLUSTER_LIST)
imnbr = len(imlist)
print('The number of clustering images is {}'.format(imnbr))
# 加载主成分
with open(PCA_MODE, 'rb') as f:
immean = pickle.load(f)
V = pickle.load(f)
print('{} had loaded'.format(PCA_MODE))
# 使用主成分进行k-means聚类
immatrix = array([array(Image.open(im)).flatten() for im in imlist], 'f') # 拼接所有图片
immean = immean.flatten()
projected = array([dot(V[:CLUSTER*10], immatrix[i] - immean) for i in range(imnbr)]) # 前10×CLUSTER个主成分
projected = whiten(projected)
centroids, distortion = kmeans(projected, CLUSTER)
code, distance = vq(projected, centroids)
The number of clustering images is 60
pca_mode.pkl had loaded
for k in range(CLUSTER):
ind = where(code == k)[0]
figure(figsize=(20,20),dpi = 200)
gray()
for i in range(minimum(len(ind),imnbr)):
subplot(CLUSTER, imnbr/ CLUSTER, i + 1)
imshow(immatrix[ind[i]].reshape((IMG_HEIGHT, IMG_WIDTH, 3)))
axis('off')
show()