Efficient Graph-Based Image Segmentation
论文:Efficient Graph-Based Image Segmentation
作者:Pedro F. Felzenszwalb, Daniel P. Huttenlocher
时间:2004
地址:https://cs.brown.edu/people/pfelzens/segment

一、完整代码
作者在文章开头地址中使用C++实现了这一过程,为了便于python学者理解,这里我们使用python代码进行实现
1.1 Python 完整程序
import cv2
import matplotlib.pyplot as plt
import numpy as np
class UnionFindSet:
"""
创建一个交并集
"""
def __init__(self, datalist):
self.father = {}
self.size = {}
for data in datalist:
self.father[data] = data
self.size[data] = 1
def find(self, node):
father = self.father[node]
if father != node:
if father != self.father[father]:
self.size[father] -= 1
father = self.find(father)
self.father[node] = father
return father
def union(self, node_a, node_b):
if node_a is None or node_b is None:
return
a_head = self.find(node_a)
b_head = self.find(node_b)
if a_head != b_head:
a_size = self.size[a_head]
b_size = self.size[b_head]
if a_size >= b_size:
self.father[b_head] = a_head
self.size[a_head] += b_size
else:
self.father[a_head] = b_head
self.size[b_head] += a_size
def the_same(self, node_a, node_b):
return self.find(node_a) == self.find(node_b)
class Edge:
def __init__(self, vi, vj, weight):
self.vi = vi
self.vj = vj
self.weight = weight
def gaussian_blur(img, kernel_size, sigma):
"""
img: the pic
kernel_size: size
sigma: Gaussian sigma
output: img
"""
img = cv2.GaussianBlur(np.array(img),(kernel_size,kernel_size),sigma,sigma)
return img
def calculate_weight(img, x1, y1, x2, y2):
r = img[x1][y1][0] - img[x2][y2][0]
g = img[x1][y1][1] - img[x2][y2][1]
b = img[x1][y1][2] - img[x2][y2][2]
return np.sqrt(np.sum(np.square(np.array([r, g, b]))))
def calculate_weights(img, h, w):
lst = []
for i in range(h):
for j in range(w):
# 右上角
if i != 0 and j < w - 1:
weight = calculate_weight(img, i, j, i - 1, j + 1)
edge = Edge(i * w + j, (i - 1) * w + j + 1, weight)
lst.append(edge)
# 右
if j < w - 1:
weight = calculate_weight(img, i, j, i, j + 1)
edge = Edge(i * w + j, i * w + j + 1, weight)
lst.append(edge)
# 右下角
if i != h - 1 and j < w - 1:
weight = calculate_weight(img, i, j, i + 1, j + 1)
edge = Edge(i * w + j, (i + 1) * w + j + 1, weight)
lst.append(edge)
# 下
if i != h - 1:
weight = calculate_weight(img, i, j, i + 1, j)
edge = Edge(i * w + j, (i + 1) * w + j, weight)
lst.append(edge)
lst = sorted(lst, key=lambda x: x.weight)
return lst
def get_size(S, node):
return S.size[S.find(node)]
def main(img, k, min_size):
h, w, chanles = img.shape
lst = calculate_weights(img.astype(int), h, w)
S = UnionFindSet(np.arange(h * w))
Int = np.zeros(shape=h * w)
# 因为从小到大排列 diff 就是weight
for edge in lst:
vi = edge.vi
vj = edge.vj
weight = edge.weight
if not S.the_same(vi, vj) and weight <= min(Int[vi] + k / get_size(S, vi), Int[vj] + k / get_size(S, vj)):
S.union(vi, vj)
Int[vi] = Int[vj] = max(Int[vi], Int[vj], weight)
for edge in lst:
vi = edge.vi
vj = edge.vj
if not S.the_same(vi, vj) and (get_size(S, vi) <= min_size or get_size(S, vj) <= min_size):
S.union(vi, vj)
Int[vi] = Int[vj] = max(Int[vi], Int[vj], weight)
trade = dict(zip(set(S.father.values()), range(len(set(S.father.values())))))
img_ = np.array([trade[item] for item in list(S.father.values())]).reshape(h, w)
return img_
def felzenszwalb(img, k, min_size):
img = gaussian_blur(img, 5,5)
img_ = main(img, k, min_size)
return img_
def get_color(origin_img, img):
"""
orgin_img: origin img
img: after process img
"""
new_img = np.zeros_like(origin_img)
for i in range(np.max(img)):
new_img[:,:,0][img == i] = np.random.randint(255)
new_img[:,:,1][img == i] = np.random.randint(255)
new_img[:,:,2][img == i] = np.random.randint(255)
return new_img
if __name__ == '__main__':
img = plt.imread('../data/cat-dog.jpg')
img_ = felzenszwalb(img, 5000, 500)
new_img = get_color(img, img_)
plt.imshow(new_img)
plt.show()
1.2 skimage 导包
import numpy as np
from skimage import segmentation
import matplotlib.pyplot as plt
def get_color(origin_img, img):
"""
orgin_img: origin img
img: after process img
"""
new_img = np.zeros_like(origin_img)
for i in range(np.max(img)):
new_img[:,:,0][img == i] = np.random.randint(255)
new_img[:,:,1][img == i] = np.random.randint(255)
new_img[:,:,2][img == i] = np.random.randint(255)
return new_img
if __name__ == '__main__':
img = plt.imread('../data/cat-dog.jpg')
img_ = segmentation.felzenszwalb(
img,
scale=500,
sigma=0.95
)
new_img = get_color(img, img_)
plt.imshow(new_img)
plt.show()
二、论文解读
2.1 目的及其意义
目的:将图像根据颜色RGB特征按区域进行分割
意义:将分割后得到的区域进行有效筛选后,可以进行例如区域推荐,目标检测等等高级操作,当然其本质还是图像分割
特点:该方法的一个重要特点是,它能够在低变异性图像区域保留细节,而忽略了高变异性图像区域的细节。也就是说在变化不大的区域可以对细节进行保留,在变化很大的区域对细节进行剔除,这个效果根据后面合并小区域得到的。
2.2 框架以及指标
正如论文题目所说,这个方法是基于Graph的图像分割技术,是用图 G = ( V , E ) G=(V,E) G=(V,E) 来表达问题并对问题进行分析的,在此问题中, G G G是无向图,其 V V V中每一个节点对应于图中每一个像素点的位置, E E E中的每一条边表示相邻的两个节点的信息以及其差异值即权重。
权重定义如下: w ( v i , v j ) = ∣ I ( p i ) − I ( p j ) ∣ = ( r i − r j ) 2 + ( g i − g j ) 2 + ( b i − b j ) 2 w(v_i, v_j ) = |I(p_i) − I(p_j )|= \sqrt{(r_i-r_j)^2 + (g_i-g_j)^2 + (b_i-b_j)^2} w(vi,vj)=∣I(pi)−I(pj)∣=(ri−rj)2+(gi−gj)2+(bi−bj)2 其中 r i , g i , b i r_i, g_i,b_i ri,gi,bi是图像在三个通道上的值的大小;
现在问题已经表示完毕,接下来我们需要对图像进行分割,其本质就是把 V V V中的节点分成不同的集合 C 1 , C 2 , … , C m . C_1,C_2, \dots,C_m. C1,C2,…,Cm. 为了表示集合中节点的整体关系,我们定义一个指标: I n t ( C ) = max e ∈ M S T ( C , E ) w ( v i , v j ) Int(C) = \max_{e\in MST(C,E)}w(v_i, v_j ) Int(C)=e∈MST(C,E)maxw(vi,vj) 这个指标可以表示集合内部的最大差异,其中MST是最小生成树,论文的算法主要是围绕MST的Kruskal算法展开,在这里我们可以通过对 E E E排序的方式进行构造,并不需要对MST的生成有要求,感兴趣可以自行了解;
上面对集合内部节点整体关系定义了一个指标,我们还需要对集合之间的关系定义一个指标,指标如下: D i f f ( C 1 , C 2 ) = min v i ∈ C 1 , v j ∈ C 2 , ( v i , v j ) ∈ E w ( v i , v j ) Diff(C_1,C_2)=\min_{v_i \in C_1, v_j \in C_2, (v_i, v_j) \in E}w(v_i, v_j) Diff(C1,C2)=vi∈C1,vj∈C2,(vi,vj)∈Eminw(vi,vj) 该指标可以表示集合之间的最小差异;
我们如何对集合之间进行合并呢?定义以下指标: D ( C 1 , C 2 ) = { T r u e D i f f ( C 1 , C 2 ) > M I n t ( C 1 , C 2 ) F a l s e o t h e r w i s e D(C_1,C_2)= \begin{cases} True & \quad Diff(C_1, C_2) > MInt(C_1, C_2)\\ False & \quad otherwise \end{cases} D(C1,C2)={TrueFalseDiff(C1,C2)>MInt(C1,C2)otherwise 其中True表示合并,False表示不合并,其中 M I n t ( C 1 , C 2 ) = m i n ( I n t ( C 1 ) + τ ( C 1 ) , I n t ( C 2 ) + τ ( C 2 ) ) MInt(C_1, C_2)=min(Int(C1) + τ (C1), Int(C2) + τ (C2)) MInt(C1,C2)=min(Int(C1)+τ(C1),Int(C2)+τ(C2)) τ ( C ) = k ∣ C ∣ τ (C)=\frac{k}{|C|} τ(C)=∣C∣k 这里 k k k是一个常数, ∣ C ∣ |C| ∣C∣表示集合 C C C的大小;
指标定义到此完毕!
2.3 论文算法流程
算法原文如下图所示:
- 初始化,设 V V V中有n个节点, E E E中有m条边, S 0 = ( C 1 , C 2 , … , C n ) S^0=(C_1, C_2, \dots, C_n) S0=(C1,C2,…,Cn) 即每一个节点单独成一个集合
- 对 E E E根据权重进行由小到大的排序
- 遍历 E E E中每个元素,得到每个元素中包含的两个节点 v i , v j v_i,v_j vi,vj,找到 v i , v j v_i,v_j vi,vj属于的集合 C i , C j C_i,C_j Ci,Cj,如果 C i = C j C_i=C_j Ci=Cj,则跳过,否则计算 D ( C i , C j ) D(C_i,C_j) D(Ci,Cj)判断是否合并,得到 S 0 , S 1 , … , S m S^0,S^1,\dots,S^m S0,S1,…,Sm
- S = S m S = S^m S=Sm
三、过程实现
为了适应实际情况,实现分为三步走:高斯模糊 -> 主要算法 -> 随机上色
3.1 导包
import cv2
import numpy as np
import matplotlib.pyplot as plt
3.2 高斯模糊
由于是对图像进行分割,所以不需要图片过于细节,把图像进行高斯平滑处理,去掉细节
def gaussian_blur(img, kernel_size, sigma):
"""
img: the pic
kernel_size: size
sigma: Gaussian sigma
output: img
"""
img = cv2.GaussianBlur(np.array(img),(kernel_size,kernel_size),sigma,sigma)
return img
在kernel_size = 5, sigma = 5 得到结果如下:

左边是原图, 右边是平滑过后的图像;
3.3 主要算法
首先创建一个并查集类
class UnionFindSet:
"""
创建一个并查集
"""
def __init__(self, datalist):
self.father = {}
self.size = {}
for data in datalist:
self.father[data] = data
self.size[data] = 1
def find(self, node):
father = self.father[node]
if father != node:
if father != self.father[father]:
self.size[father] -= 1
father = self.find(father)
self.father[node] = father
return father
def union(self, node_a, node_b):
if node_a is None or node_b is None:
return
a_head = self.find(node_a)
b_head = self.find(node_b)
if a_head != b_head:
a_size = self.size[a_head]
b_size = self.size[b_head]
if a_size >= b_size:
self.father[b_head] = a_head
self.size[a_head] += b_size
else:
self.father[a_head] = b_head
self.size[b_head] += a_size
def the_same(self, node_a, node_b):
return self.find(node_a) == self.find(node_b)
再进行主要算法计算
class Edge:
def __init__(self, vi, vj, weight):
self.vi = vi
self.vj = vj
self.weight = weight
def calculate_weight(img, x1, y1, x2, y2):
r = img[x1][y1][0] - img[x2][y2][0]
g = img[x1][y1][1] - img[x2][y2][1]
b = img[x1][y1][2] - img[x2][y2][2]
return np.sqrt(np.sum(np.square(np.array([r,g,b]))))
def calculate_weights(img, h, w):
lst = []
for i in range(h):
for j in range(w):
# 右上角
if i != 0 and j < w-1:
weight = calculate_weight(img, i, j, i-1,j+1)
edge = Edge(i*w+j, (i-1)*w+j+1, weight)
lst.append(edge)
# 右
if j < w-1:
weight = calculate_weight(img, i, j, i, j+1)
edge = Edge(i*w+j, i*w+j+1, weight)
lst.append(edge)
# 右下角
if i != h-1 and j < w-1:
weight = calculate_weight(img, i, j, i+1, j+1)
edge = Edge(i*w+j, (i+1)*w+j+1, weight)
lst.append(edge)
# 下
if i != h-1:
weight = calculate_weight(img, i, j, i+1, j)
edge = Edge(i*w+j, (i+1)*w+j, weight)
lst.append(edge)
lst = sorted(lst, key=lambda x:x.weight)
return lst
def get_size(S, node):
return S.size[S.find(node)]
def main(k, min_size):
h, w, chanles = img.shape
lst = calculate_weights(img.astype(int), h, w)
S = UnionFindSet(np.arange(h*w))
Int = np.zeros(shape=h*w)
# 因为从小到大排列 diff 就是weight
for edge in lst:
vi = edge.vi
vj = edge.vj
weight = edge.weight
if not S.the_same(vi, vj) and weight <= min(Int[vi] + k/get_size(S, vi), Int[vj] + k/get_size(S, vj)):
S.union(vi, vj)
Int[vi] = Int[vj] = max(Int[vi], Int[vj], weight)
# 合并小集合
for edge in lst:
vi = edge.vi
vj = edge.vj
if not S.the_same(vi, vj) and (get_size(S, vi) <= min_size or get_size(S, vj) <= min_size):
S.union(vi, vj)
Int[vi] = Int[vj] = max(Int[vi], Int[vj], weight)
trade = dict(zip(set(S.father.values()),range(len(set(S.father.values())))))
img_ = np.array([trade[item] for item in list(S.father.values())]).reshape(h,w)
return img_
main(5000,500)
3.4 随机上色
相比于之前的步骤,这一步很简单
def get_color(origin_img, img):
"""
orgin_img: the original img
img: the img after process
"""
new_img = np.zeros_like(origin_img)
for i in range(np.max(img)):
new_img[:,:,0][img == i] = np.random.randint(255)
new_img[:,:,1][img == i] = np.random.randint(255)
new_img[:,:,2][img == i] = np.random.randint(255)
return new_img
得到结果如下:
四、整体总结
没什么好总结的,干就完了!