# -*- coding: utf-8 -*-import cv2 as cv
import skimage.io
import skimage.feature
import skimage.color
import skimage.transform
import skimage.util
import skimage.segmentation
import numpy
# "Selective Search for Object Recognition" by J.R.R. Uijlings et al.## - Modified version with LBP extractor for texture vectorizationdef_generate_segments(im_orig, scale, sigma, min_size):"""
segment smallest regions by the algorithm of Felzenswalb and
Huttenlocher
"""# open the Image# min_size:一般用于限制区域框的面积大小。
im_mask = skimage.segmentation.felzenszwalb(
skimage.util.img_as_float(im_orig), scale=scale, sigma=sigma,
min_size=min_size)# merge mask channel to the image as a 4th channel
im_orig = numpy.append(
im_orig, numpy.zeros(im_orig.shape[:2])[:,:, numpy.newaxis], axis=2)
im_orig[:,:,3]= im_mask
return im_orig
def_sim_colour(r1, r2):"""
calculate the sum of histogram intersection of colour
"""returnsum([min(a, b)for a, b inzip(r1["hist_c"], r2["hist_c"])])def_sim_texture(r1, r2):"""
calculate the sum of histogram intersection of texture
"""returnsum([min(a, b)for a, b inzip(r1["hist_t"], r2["hist_t"])])def_sim_size(r1, r2, imsize):"""
calculate the size similarity over the image
"""return1.0-(r1["size"]+ r2["size"])/ imsize
def_sim_fill(r1, r2, imsize):"""
calculate the fill similarity over the image
"""
bbsize =((max(r1["max_x"], r2["max_x"])-min(r1["min_x"], r2["min_x"]))*(max(r1["max_y"], r2["max_y"])-min(r1["min_y"], r2["min_y"])))return1.0-(bbsize - r1["size"]- r2["size"])/ imsize
def_calc_sim(r1, r2, imsize):return(_sim_colour(r1, r2)+ _sim_texture(r1, r2)+ _sim_size(r1, r2, imsize)+ _sim_fill(r1, r2, imsize))def_calc_colour_hist(img):"""
calculate colour histogram for each region
the size of output histogram will be BINS * COLOUR_CHANNELS(3)
number of bins is 25 as same as [uijlings_ijcv2013_draft.pdf]
extract HSV
"""
BINS =25
hist = numpy.array([])for colour_channel in(0,1,2):# extracting one colour channel
c = img[:, colour_channel]# calculate histogram for each colour and join to the result
hist = numpy.concatenate([hist]+[numpy.histogram(c, BINS,(0.0,255.0))[0]])# L1 normalize
hist = hist /len(img)return hist
def_calc_texture_gradient(img):"""
calculate texture gradient for entire image
The original SelectiveSearch algorithm proposed Gaussian derivative
for 8 orientations, but we use LBP instead.
output will be [height(*)][width(*)]
"""
ret = numpy.zeros((img.shape[0], img.shape[1], img.shape[2]))for colour_channel in(0,1,2):
ret[:,:, colour_channel]= skimage.feature.local_binary_pattern(
img[:,:, colour_channel],8,1.0)return ret
def_calc_texture_hist(img):"""
calculate texture histogram for each region
calculate the histogram of gradient for each colours
the size of output histogram will be
BINS * ORIENTATIONS * COLOUR_CHANNELS(3)
"""
BINS =10
hist = numpy.array([])for colour_channel in(0,1,2):# mask by the colour channel
fd = img[:, colour_channel]# calculate histogram for each orientation and concatenate them all# and join to the result
hist = numpy.concatenate([hist]+[numpy.histogram(fd, BINS,(0.0,1.0))[0]])# L1 Normalize
hist = hist /len(img)return hist
def_extract_regions(img):
R ={}# get hsv image
hsv = skimage.color.rgb2hsv(img[:,:,:3])# pass 1: count pixel positions 获取各个区域的范围(坐标)for y, i inenumerate(img):for x,(r, g, b, l)inenumerate(i):# initialize a new region 设置初始值if l notin R:
R[l]={"min_x":0xffff,"min_y":0xffff,"max_x":0,"max_y":0,"labels":1}# bounding boxif R[l]["min_x"]> x:
R[l]["min_x"]= x
if R[l]["min_y"]> y:
R[l]["min_y"]= y
if R[l]["max_x"]< x:
R[l]["max_x"]= x
if R[l]["max_y"]< y:
R[l]["max_y"]= y
# pass 2: calculate texture gradient 计算全图的纹理的梯度(hsv三个通道)
tex_grad = _calc_texture_gradient(img)# pass 3: calculate colour histogram of each region# 计算这个区域的相关属性for k, v in R.items():# colour histogram# 获取当前区域k在原始图像上的像素点组成的一个3通道的图像(2维矩阵,1维是大小,2维是通道)# fixme 是将每个通道的像素值拉成了1列。(这样才能求该通道的颜色直方图)
masked_pixels = hsv[:,:,:][img[:,:,3]== k]# 获取大小,但是记住,这个不是矩形框的大小,只是矩形框内的轮廓区域大小
R[k]["size"]=len(masked_pixels /4)# 获取各个通道的直方图特征信息(颜色)
R[k]["hist_c"]= _calc_colour_hist(masked_pixels)# texture histogram# 获取各个通道的直方图特征信息(纹理)
R[k]["hist_t"]= _calc_texture_hist(tex_grad[:,:][img[:,:,3]== k])return R
def_extract_neighbours(regions):defintersect(a, b):if(a["min_x"]<= b["min_x"]<= a["max_x"]and a["min_y"]<= b["min_y"]<= a["max_y"])or(
a["min_x"]<= b["max_x"]<= a["max_x"]and a["min_y"]<= b["max_y"]<= a["max_y"])or(
a["min_x"]<= b["min_x"]<= a["max_x"]and a["min_y"]<= b["max_y"]<= a["max_y"])or(
a["min_x"]<= b["max_x"]<= a["max_x"]and a["min_y"]<= b["min_y"]<= a["max_y"]):returnTruereturnFalse
R = regions.items()
r =[elm for elm in R]
R = r
neighbours =[]for cur, a inenumerate(R[:-1]):for b in R[cur +1:]:if intersect(a[1], b[1]):
neighbours.append((a, b))return neighbours
def_merge_regions(r1, r2):
new_size = r1["size"]+ r2["size"]
rt ={"min_x":min(r1["min_x"], r2["min_x"]),"min_y":min(r1["min_y"], r2["min_y"]),"max_x":max(r1["max_x"], r2["max_x"]),"max_y":max(r1["max_y"], r2["max_y"]),"size": new_size,"hist_c":(
r1["hist_c"]* r1["size"]+ r2["hist_c"]* r2["size"])/ new_size,"hist_t":(
r1["hist_t"]* r1["size"]+ r2["hist_t"]* r2["size"])/ new_size,"labels": r1["labels"]+ r2["labels"]# 代表该框合并过1次。}return rt
defselective_search(im_orig, scale=1.0, sigma=0.8, min_size=50):'''Selective Search
Parameters
----------
im_orig : ndarray
Input image
scale : int
Free parameter. Higher means larger clusters in felzenszwalb segmentation.
sigma : float
Width of Gaussian kernel for felzenszwalb segmentation.
min_size : int
Minimum component size for felzenszwalb segmentation.
Returns
-------
img : ndarray
image with region label
region label is stored in the 4th value of each pixel [r,g,b,(region)]
regions : array of dict
[
{
'rect': (left, top, right, bottom),
'labels': [...]
},
...
]
'''# 断言,要求输入的图像im_orig要求格式必须为3通道的。assert im_orig.shape[2]==3,"3channels image is expected"# load image and get smallest regions# region label is stored in the 4th value of each pixel [r,g,b,(region)]# fixme 1、使用felzenszwalb生成原始的细粒度的区域信息,返回值和原始图像大小一致,但是是4通道的。[r,g,b,(region)], 形状是:[高,宽, 4]
img = _generate_segments(im_orig, scale, sigma, min_size)if img isNone:returnNone,{}# 计算图像的大小(图像中的像素的个数)
imsize = img.shape[0]* img.shape[1]# fixme 2、基于提取出来的信息,计算各个区域的坐标信息(因为felzenszwalb仅返回这个轮廓信息)
R = _extract_regions(img)# extract neighbouring information# 计算相近的邻居
neighbours = _extract_neighbours(R)# calculate initial similarities# fixme 3、计算各个邻居区域的相似度
S ={}for(ai, ar),(bi, br)in neighbours:
S[(ai, bi)]= _calc_sim(ar, br, imsize)# hierarchal search# fixme 4、合并区域while S !={}:# 对S以相识度进行排序,get highest similarity# i, j = sorted(S.items(), cmp=lambda a, b: cmp(a[1], b[1]))[-1][0]
i, j =sorted(list(S.items()), key=lambda a: a[1])[-1][0]# merge corresponding regions# 合并新区域
t =max(R.keys())+1.0
R[t]= _merge_regions(R[i], R[j])# # TODO: 自己加一个(额外加的), 删除合并前的i、j区域# del R[i]# del R[j]# 获取需要删除的键值对(邻居区域):删除S中其他邻居对中有i or j的,因为i和j被合并了。# mark similarities for regions to be removed
key_to_delete =[]for k, v in S.items():if(i in k)or(j in k):
key_to_delete.append(k)# 做一个删除操作# remove old similarities of related regionsfor k in key_to_delete:del S[k]# calculate similarity set with the new region# 计算临近区域的相似度for k infilter(lambda a: a !=(i, j), key_to_delete):# 得到临近区域的下标
n = k[1]if k[0]in(i, j)else k[0]# 计算新区域和邻近区域的相似度
S[(t, n)]= _calc_sim(R[t], R[n], imsize)# 获取区域信息
regions =[]for k, r in R.items():
regions.append({'rect':(
r['min_x'], r['min_y'],
r['max_x']- r['min_x'], r['max_y']- r['min_y']),'size': r['size'],'labels': r['labels']})return img, regions
if __name__ =='__main__':# img_path = './images/000129.jpg'
img_path ='./images/11.png'
img = cv.imread(img_path)print("开始ss候选框获取....")
img_lbl, regions = selective_search(img, scale=1000, sigma=0.9, min_size=100)print(regions)print("完成候选框的获取....")print(img_lbl.shape)
show_image = img.copy()for k, region inenumerate(regions):
x, y, w, h = region['rect']# 获取候选框的左上角坐标 和 高宽
x, y, w, h =int(x),int(y),int(w),int(h)
show_image = cv.rectangle(show_image, pt1=(x, y), pt2=(w + x, h + y), color=[0,255,0])# 截取API
tmp_img = img[y:y + h, x:x + w,:]
cv.imwrite('./output/img_{}.jpg'.format(k), tmp_img)
cv.imshow('image', img)
cv.imshow('show_image', show_image)
cv.imshow('im_mask', img_lbl[:,:,3])
cv.waitKey(0)
cv.destroyAllWindows()