# 图像基础8 图像分类——PCA 图像特征提取算法

PCA (Principal Component Analysis），是一种掌握事物主要矛盾的统计分析方法，它可以从多元事物中解析出主要影响因素，揭示事物的本质，简化复杂的问题。

——摘自百度百科

pip install -i https://pypi.douban.com/simple mlpy
# -*- coding: utf-8 -*-
import cv2
import numpy as np

print(u'正在处理中')
w_fg = 20
h_fg = 15
picflag = 3

# 返回图像特征码
img = cv2.resize(fnimg,(800,600))
w = img.shape[1]
h = img.shape[0]
w_interval =int( w/w_fg)
h_interval =int( h/h_fg)
alltz = []
for now_h in range(0,h,h_interval):
for now_w in range(0,w,w_interval):
b = img[now_h:now_h + h_interval, now_w:now_w+w_interval,0]
g = img[now_h:now_h + h_interval, now_w:now_w+w_interval,1]
r = img[now_h:now_h + h_interval, now_w:now_w+w_interval,2]
btz = np.mean(b)
gtz = np.mean(g)
rtz = np.mean(r)
alltz.append([btz,gtz,rtz])
result_alltz = np.array(alltz).T
pca = mlpy.PCA()
pca.learn(result_alltz)
result_alltz = pca.transform(result_alltz, k=len(result_alltz)/2)
result_alltz = result_alltz.reshape(len(result_alltz))
return result_alltz

def get_cossimi(x,y):
myx = np.array(x)
myy = np.array(y)
cos1 = np.sum(myx*myy)
cos21 = np.sqrt(sum(myx*myx))
cos22 = np.sqrt(sum(myy*myy))
return cos1/float(cos21*cos22)

# x和d样本初始化
train_x = []
d = []

# 读取图像，提取每类图像的特征
# 计算类别特征码，通过每个类别所有样本的区域特征的平均值，提取类别特征
for ii in range(1,picflag+1):
smp_x = []
b_tz = np.array([0,0,0])
g_tz = np.array([0,0,0])
r_tz = np.array([0,0,0])
mytz = np.zeros((3,w_fg*h_fg))
for jj in range(1,3):
fn = 'p'+str(ii)+'-' + str(jj) + '.png'
mytz += np.array(tmptz)
mytz /=3
train_x.append(mytz[0].tolist()+mytz[1].tolist() + mytz[2].tolist())

# 计算ptest3的分类
# 计算待分类图像的特征码与每个类别特征码之间的余弦距离，距离最大者为图像所属分类
fn = 'ptest3.png'
simtz = testtz[0].tolist() + testtz[1].tolist() + testtz[2].tolist()
maxtz = 0
nowi = 0
for i in range(0,picflag):
nowsim = get_cossimi(train_x[i], simtz)
if nowsim>maxtz:
maxtz = nowsim
nowi = i
print(u'%s 属于第 %d 类' % (fn,nowi+1))

# 计算ptest1的分类
fn = 'ptest1.png'
simtz = testtz[0].tolist() + testtz[1].tolist() + testtz[2].tolist()
maxtz=0
nowi=0
for i in range(0,picflag):
nowsim = get_cossimi(train_x[i],simtz)
if nowsim>maxtz:
maxtz = nowsim
nowi = i
print(u'%s 属于第 %d 类' % (fn,nowi+1))

# 计算ptest2的分类
fn = 'ptest2.png'
simtz = testtz[0].tolist() + testtz[1].tolist() + testtz[2].tolist()
maxtz = 0
nowi = 0
for i in range(0,picflag):
nowsim = get_cossimi(train_x[i],simtz)
if nowsim>maxtz:
maxtz = nowsim
nowi = i
print(u'%s 属于第%d类' % (fn,nowi+1))