LBP(Local Binary Pattern,局部二值模式) 算法详解
一幅彩色图像需先转换为灰度图像.
原始的LBP算子定义为在3*3的窗口内,以窗口中心像素为阈值,将相邻的8个像素的灰度值与其进行比较,若周围像素值大于中心像素值,则该像素点的位置被标记为1,否则为0。这样,3*3邻域内的8个点经比较可产生8位二进制数(通常转换为十进制数即LBP码,共256种),即得到该窗口中心像素点的LBP值,并用这个值来反映该区域的纹理信息。如下图所示:
其中https://blog.csdn.net/qq_26898461/article/details/46875517这篇博客中说到:
对于8个采样点,灰度不变性LBP将产生256中输出,旋转不变性LBP将产生36个输出,而基于unifrom的旋转不变LBP将只有9中输出。【uniform形式有58中输出】
对于8个点是2^8次方,256种,一个直方图长度是256,而且存在稀疏,这个直方图就是统计0-255个数的计数,但是如果是提取10个点或者20个点,那就是2^10,2^20次方了,这个直方图很大的,计算量就上来了,而且数据存在很多稀疏,所以uniform就来了。
以上这段话很关键,正常256(2^8)和58(p*(p-1)+2)很好理解,但是uniform是9个,这个是定理,但是我没理解.
而且我的代码中也是这个uniform,也是p+1
对LBP特征向量进行提取的步骤 (这是copy别人的)https://blog.csdn.net/shenjianhua005/article/details/110009765
(1)首先将检测窗口划分为16×16的小区域(cell);
(2)对于每个cell中的一个像素,将相邻的8个像素的灰度值与其进行比较,若周围像素值大于中心像素值,则该像素点的位置被标记为1,否则为0。这样,3*3邻域内的8个点经比较可产生8位二进制数,即得到该窗口中心像素点的LBP值;
(3)然后计算每个cell的直方图,即每个数字(假定是十进制数LBP值)出现的频率;然后对该直方图进行归一化处理。
(4)最后将得到的每个cell的统计直方图进行连接成为一个特征向量,也就是整幅图的LBP纹理特征向量;
针对第四点我增加我个人理解,如果直方图的长图为256,那么16*16个区域,最终连接起来的特征长度为16*16*256,这还好,如果没有uniform(即等价模式)的话,那2^20*16*16,那将是一个很大的数,不利于计算。
然后便可利用SVM或者其他机器学习算法进行分类了。
# !/usr/bin/env python
# -*- coidng: utf-8 -*-
from skimage import feature
from sklearn.svm import LinearSVC
import joblib
import numpy as np
from imutils import paths
import cv2
from xml.dom.minidom import parse
from glob2 import glob
from tqdm import tqdm
import os
class LocalBinaryPatterns():
def __init__(self,numPoints,radius):
self.numPoints = numPoints
self.radius = radius
def describe(self,image,eps=1e-7):
lbp = feature.local_binary_pattern(image,self.numPoints,self.radius,method="uniform")
(hist,_) = np.histogram(lbp.ravel(),bins=np.arange(0,self.numPoints+3),range=(0,self.numPoints+2))
hist = hist.astype("float")
hist /= (hist.sum()+eps)
return hist
def train(train_path,desc):
data_array = []
labels = []
xml_list = glob(os.path.join(train_path,"*.xml"))
zangwu = glob(os.path.join(r"D:\work\data","*.xml"))
liepian = glob(os.path.join(r"D:\work\data", "*.xml"))
xml_list = xml_list + zangwu + liepian
for xml in tqdm(xml_list):
[fdir,fname] = os.path.split(xml)
img_path = os.path.join(fdir,fname.replace(".xml",".jpg"))
if not os.path.exists(img_path):continue
loc_info = readXml(xml)
for loc in loc_info:
loc = [int(loc[i]) if i > 0 else loc[i] for i in range(len(loc))]
rgb_image = cv2.imread(img_path)
roi_rgb_img = rgb_image[loc[2]:loc[4]+1,loc[1]:loc[3],:]
roi_gray_img = cv2.cvtColor(roi_rgb_img,cv2.COLOR_BGR2GRAY)
hist = desc.describe(roi_gray_img)
labels.append(loc[0])
data_array.append(hist)
model = LinearSVC(C=100.0, random_state=42,max_iter=100000)
model.fit(data_array,labels)
print(model.score(data_array,labels))
joblib.dump(model, "train_model.m")
def predict(test_patn,desc):
model_predict = joblib.load("train_model.m")
xml_list = glob(os.path.join(test_patn, "*.xml"))
for xml in xml_list:
[fdir, fname] = os.path.split(xml)
img_path = os.path.join(fdir, fname.replace(".xml", ".jpg"))
if not os.path.exists(img_path): continue
loc_info = readXml(xml)
for loc in loc_info:
loc = [int(loc[i]) if i > 0 else loc[i] for i in range(len(loc))]
rgb_image = cv2.imread(img_path)
roi_rgb_img = rgb_image[loc[2]:loc[4] + 1, loc[1]:loc[3], :]
roi_gray_img = cv2.cvtColor(roi_rgb_img, cv2.COLOR_BGR2GRAY)
hist = desc.describe(roi_gray_img)
prediction = model_predict.predict(hist.reshape(1,-1))
cv2.putText(rgb_image,prediction[0],(loc[1],loc[2]),cv2.FONT_HERSHEY_COMPLEX, 5, (0, 255, 0), 12)
# cv2.imshow("img",rgb_image)
# cv2.waitKey(0)
cv2.namedWindow('img', cv2.WINDOW_KEEPRATIO)
# cv2.resizeWindow('test', 800, 800)
cv2.imshow('img', rgb_image)
cv2.waitKey(0)
def readXml(xml_path):
print(xml_path)
small_dom_tree = parse(xml_path)
small_root_node = small_dom_tree.documentElement
print(small_root_node)
# using list
info_list = []
objects = small_root_node.getElementsByTagName('object')
for obj in objects:
name = obj.getElementsByTagName('name')[0].childNodes[0].nodeValue
xmin = obj.getElementsByTagName('xmin')[0].childNodes[0].nodeValue
ymin = obj.getElementsByTagName('ymin')[0].childNodes[0].nodeValue
xmax = obj.getElementsByTagName('xmax')[0].childNodes[0].nodeValue
ymax = obj.getElementsByTagName('ymax')[0].childNodes[0].nodeValue
info_list.append((name, xmin, ymin, xmax, ymax))
return info_list
if __name__ == "__main__":
xml_path = r"D:\work\data"
test_path = r"D:\work\data"
desc = LocalBinaryPatterns(24,8)
train(xml_path, desc)
predict(test_path,desc)
参考:
https://blog.csdn.net/zouxy09/article/details/7929531
https://www.pyimagesearch.com/2015/12/07/local-binary-patterns-with-python-opencv/