grab-cut 算法（附代码可复现）

原创已于 2025-05-22 20:51:26 修改

· 319 阅读

1 ·

版权

文章标签：

#算法 #计算机视觉 #人工智能

于 2025-05-22 20:50:01 首次发布

GrabCut是由Rother等人在2004年提出的交互式前景/背景分割算法，是对Graph Cut算法的重要改进。它的核心思想是通过最少的用户交互（通常只需要画一个矩形框）来实现精确的目标分割。

核心原理

1. 基本思想

将图像分割问题转化为图论中的最小割问题
使用高斯混合模型(GMM)来建模前景和背景的颜色分布
通过迭代优化不断改善分割结果

2. 算法流程

1. 用户输入矩形框标注目标区域
2. 初始化：矩形外=背景，矩形内=可能前景
3. 建立前景和背景的高斯混合模型
4. 构建图结构，计算最小割
5. 更新GMM参数
6. 重复步骤4-5直到收敛

技术细节

图结构构建

节点: 每个像素作为一个节点
边权重:
- N-links: 相邻像素间的平滑项
- T-links: 像素到源点/汇点的数据项

能量函数

E = E_data + λ × E_smooth

E_data: 数据项（基于GMM的似然）
E_smooth: 平滑项（相邻像素一致性）

高斯混合模型

前景和背景各用K个高斯分量建模（通常K=5）
通过EM算法迭代更新GMM参数

python 代码

import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import maximum_flow
import networkx as nx
from PIL import Image
import argparse
import os

class GrabCut:
    """
    GrabCut算法的完整实现
    基于论文: "GrabCut" Interactive Foreground Extraction using Iterated Graph Cuts
    """
    
    def __init__(self, n_components=5, gamma=50, lambda_val=9*50, n_iter=5):
        """
        初始化GrabCut算法参数
        
        Args:
            n_components: GMM组件数量
            gamma: 边缘权重参数
            lambda_val: 平滑项权重
            n_iter: 迭代次数
        """
        self.n_components = n_components
        self.gamma = gamma
        self.lambda_val = lambda_val
        self.n_iter = n_iter
        
        # 标签定义
        self.GC_BGD = 0      # 背景
        self.GC_FGD = 1      # 前景
        self.GC_PR_BGD = 2   # 可能背景
        self.GC_PR_FGD = 3   # 可能前景
        
    def init_mask_with_rect(self, img_shape, rect):
        """
        用矩形框初始化mask
        
        Args:
            img_shape: 图像形状 (height, width)
            rect: 矩形框 (x, y, width, height)
        
        Returns:
            mask: 初始化的mask
        """
        mask = np.zeros(img_shape[:2], dtype=np.uint8)
        mask.fill(self.GC_BGD)  # 全部设为背景
        
        x, y, w, h = rect
        # 矩形内部设为可能前景
        mask[y:y+h, x:x+w] = self.GC_PR_FGD
        
        return mask
    
    def init_gmm(self, img, mask):
        """
        初始化前景和背景的高斯混合模型
        
        Args:
            img: 输入图像
            mask: 分割mask
            
        Returns:
            bgd_gmm, fgd_gmm: 背景和前景的GMM模型
        """
        # 提取背景和前景像素
        bgd_pixels = img[(mask == self.GC_BGD) | (mask == self.GC_PR_BGD)]
        fgd_pixels = img[(mask == self.GC_FGD) | (mask == self.GC_PR_FGD)]
        
        # 创建GMM模型
        bgd_gmm = GaussianMixture(n_components=self.n_components, random_state=42)
        fgd_gmm = GaussianMixture(n_components=self.n_components, random_state=42)
        
        # 训练GMM
        if len(bgd_pixels) > 0:
            bgd_gmm.fit(bgd_pixels.reshape(-1, 3))
        if len(fgd_pixels) > 0:
            fgd_gmm.fit(fgd_pixels.reshape(-1, 3))
            
        return bgd_gmm, fgd_gmm
    
    def calculate_beta(self, img):
        """
        计算beta参数用于边缘权重
        """
        h, w = img.shape[:2]
        beta = 0
        count = 0
        
        # 水平相邻像素
        for i in range(h):
            for j in range(w-1):
                diff = img[i, j] - img[i, j+1]
                beta += np.sum(diff**2)
                count += 1
        
        # 垂直相邻像素
        for i in range(h-1):
            for j in range(w):
                diff = img[i, j] - img[i+1, j]
                beta += np.sum(diff**2)
                count += 1
        
        # 对角相邻像素
        for i in range(h-1):
            for j in range(w-1):
                diff = img[i, j] - img[i+1, j+1]
                beta += np.sum(diff**2)
                count += 1
                
                diff = img[i, j+1] - img[i+1, j]
                beta += np.sum(diff**2)
                count += 1
        
        beta = 1.0 / (2 * beta / count) if beta > 0 else 0
        return beta
    
    def construct_graph(self, img, mask, bgd_gmm, fgd_gmm):
        """
        构建图结构用于最小割
        """
        h, w = img.shape[:2]
        n_pixels = h * w
        
        # 计算beta参数
        beta = self.calculate_beta(img)
        
        # 创建图的容量矩阵
        # 节点编号: 0 - source, 1 to n_pixels - 像素节点, n_pixels+1 - sink
        n_nodes = n_pixels + 2
        source = 0
        sink = n_pixels + 1
        
        # 初始化容量矩阵
        capacity = np.zeros((n_nodes, n_nodes))
        
        # 计算数据项(T-links)
        for i in range(h):
            for j in range(w):
                pixel_idx = i * w + j + 1  # 像素节点编号(从1开始)
                pixel_color = img[i, j].reshape(1, -1)
                
                # 计算前景和背景的似然
                if len(bgd_gmm.means_) > 0:
                    bgd_prob = -bgd_gmm.score(pixel_color)
                else:
                    bgd_prob = 0
                    
                if len(fgd_gmm.means_) > 0:
                    fgd_prob = -fgd_gmm.score(pixel_color)  
                else:
                    fgd_prob = 0
                
                # 根据mask状态设置T-links
                if mask[i, j] == self.GC_BGD:
                    # 确定背景
                    capacity[source, pixel_idx] = 0
                    capacity[pixel_idx, sink] = self.lambda_val
                elif mask[i, j] == self.GC_FGD:
                    # 确定前景
                    capacity[source, pixel_idx] = self.lambda_val
                    capacity[pixel_idx, sink] = 0
                else:
                    # 可能前景/背景
                    capacity[source, pixel_idx] = bgd_prob
                    capacity[pixel_idx, sink] = fgd_prob
        
        # 计算平滑项(N-links)
        for i in range(h):
            for j in range(w):
                pixel_idx = i * w + j + 1
                
                # 右邻居
                if j < w - 1:
                    neighbor_idx = i * w + (j + 1) + 1
                    diff = img[i, j] - img[i, j+1]
                    weight = self.gamma * np.exp(-beta * np.sum(diff**2))
                    capacity[pixel_idx, neighbor_idx] = weight
                    capacity[neighbor_idx, pixel_idx] = weight
                
                # 下邻居
                if i < h - 1:
                    neighbor_idx = (i + 1) * w + j + 1
                    diff = img[i, j] - img[i+1, j]
                    weight = self.gamma * np.exp(-beta * np.sum(diff**2))
                    capacity[pixel_idx, neighbor_idx] = weight
                    capacity[neighbor_idx, pixel_idx] = weight
                
                # 对角邻居
                if i < h - 1 and j < w - 1:
                    neighbor_idx = (i + 1) * w + (j + 1) + 1
                    diff = img[i, j] - img[i+1, j+1]
                    weight = self.gamma * np.exp(-beta * np.sum(diff**2)) / np.sqrt(2)
                    capacity[pixel_idx, neighbor_idx] = weight
                    capacity[neighbor_idx, pixel_idx] = weight
                
                if i < h - 1 and j > 0:
                    neighbor_idx = (i + 1) * w + (j - 1) + 1
                    diff = img[i, j] - img[i+1, j-1]
                    weight = self.gamma * np.exp(-beta * np.sum(diff**2)) / np.sqrt(2)
                    capacity[pixel_idx, neighbor_idx] = weight
                    capacity[neighbor_idx, pixel_idx] = weight
        
        return capacity, source, sink
    
    def solve_min_cut(self, capacity, source, sink):
        """
        求解最小割问题
        """
        # 转换为稀疏矩阵
        capacity_sparse = csr_matrix(capacity)
        
        # 计算最大流(最小割)
        flow_value, flow_dict = maximum_flow(capacity_sparse, source, sink)
        
        return flow_dict
    
    def update_mask(self, flow_dict, img_shape, source, sink):
        """
        根据最小割结果更新mask
        """
        h, w = img_shape[:2]
        mask = np.zeros((h, w), dtype=np.uint8)
        
        for i in range(h):
            for j in range(w):
                pixel_idx = i * w + j + 1
                
                # 检查是否与source连通(前景)
                if flow_dict.residual[source, pixel_idx] > 0:
                    mask[i, j] = self.GC_PR_FGD
                else:
                    mask[i, j] = self.GC_PR_BGD
        
        return mask
    
    def grabcut(self, img, rect, n_iter=None):
        """
        主要的GrabCut算法流程
        
        Args:
            img: 输入图像
            rect: 初始矩形框 (x, y, width, height)
            n_iter: 迭代次数
            
        Returns:
            mask: 分割结果mask
        """
        if n_iter is None:
            n_iter = self.n_iter
            
        # 初始化mask
        mask = self.init_mask_with_rect(img.shape, rect)
        
        # 迭代优化
        for iteration in range(n_iter):
            print(f"GrabCut迭代 {iteration + 1}/{n_iter}")
            
            # 学习GMM模型
            bgd_gmm, fgd_gmm = self.init_gmm(img, mask)
            
            # 构建图
            capacity, source, sink = self.construct_graph(img, mask, bgd_gmm, fgd_gmm)
            
            # 求解最小割
            flow_dict = self.solve_min_cut(capacity, source, sink)
            
            # 更新mask
            new_mask = self.update_mask(flow_dict, img.shape, source, sink)
            
            # 检查收敛
            if np.array_equal(mask, new_mask):
                print(f"收敛于第 {iteration + 1} 次迭代")
                break
                
            mask = new_mask
            
        return mask


class GrabCutOpenCV:
    """
    使用OpenCV实现的GrabCut算法封装
    """
    
    def __init__(self):
        pass
    
    def grabcut(self, img, rect, n_iter=5):
        """
        使用OpenCV的GrabCut实现
        
        Args:
            img: 输入图像
            rect: 初始矩形框 (x, y, width, height)  
            n_iter: 迭代次数
            
        Returns:
            mask: 分割结果
        """
        # 初始化mask和模型
        mask = np.zeros(img.shape[:2], np.uint8)
        bgd_model = np.zeros((1, 65), np.float64)
        fgd_model = np.zeros((1, 65), np.float64)
        
        # 执行GrabCut
        cv2.grabCut(img, mask, rect, bgd_model, fgd_model, n_iter, cv2.GC_INIT_WITH_RECT)
        
        # 创建输出mask
        mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
        
        return mask2


def demo_grabcut():
    """
    GrabCut算法演示
    """
    # 创建测试图像
    img = create_test_image()
    
    # 定义矩形框 (x, y, width, height)
    rect = (50, 50, 200, 150)
    
    # 方法1: 自定义实现
    print("使用自定义GrabCut实现...")
    grabcut_custom = GrabCut(n_iter=3)
    mask_custom = grabcut_custom.grabcut(img, rect)
    
    # 方法2: OpenCV实现
    print("使用OpenCV GrabCut实现...")
    grabcut_opencv = GrabCutOpenCV()
    mask_opencv = grabcut_opencv.grabcut(img, rect)
    
    # 可视化结果
    visualize_results(img, rect, mask_custom, mask_opencv)


def create_test_image():
    """
    创建测试图像
    """
    # 创建一个简单的测试图像
    img = np.zeros((300, 400, 3), dtype=np.uint8)
    
    # 背景
    img[:, :] = [100, 150, 200]  # 蓝色背景
    
    # 前景对象(红色圆形)
    center = (200, 150)
    radius = 80
    cv2.circle(img, center, radius, (50, 50, 200), -1)  # 红色圆
    
    # 添加一些噪声
    noise = np.random.randint(-20, 20, img.shape, dtype=np.int16)
    img = np.clip(img.astype(np.int16) + noise, 0, 255).astype(np.uint8)
    
    return img


def visualize_results(img, rect, mask_custom, mask_opencv):
    """
    可视化GrabCut结果
    """
    plt.figure(figsize=(15, 10))
    
    # 原始图像
    plt.subplot(2, 3, 1)
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    plt.title('原始图像')
    plt.axis('off')
    
    # 矩形框
    plt.subplot(2, 3, 2)
    img_with_rect = img.copy()
    x, y, w, h = rect
    cv2.rectangle(img_with_rect, (x, y), (x+w, y+h), (0, 255, 0), 2)
    plt.imshow(cv2.cvtColor(img_with_rect, cv2.COLOR_BGR2RGB))
    plt.title('初始矩形框')
    plt.axis('off')
    
    # 自定义实现结果
    plt.subplot(2, 3, 3)
    result_custom = img.copy()
    result_custom[mask_custom == 0] = [0, 0, 0]  # 背景变黑
    plt.imshow(cv2.cvtColor(result_custom, cv2.COLOR_BGR2RGB))
    plt.title('自定义GrabCut结果')
    plt.axis('off')
    
    # OpenCV实现结果
    plt.subplot(2, 3, 4)
    result_opencv = img.copy()
    result_opencv[mask_opencv == 0] = [0, 0, 0]  # 背景变黑
    plt.imshow(cv2.cvtColor(result_opencv, cv2.COLOR_BGR2RGB))
    plt.title('OpenCV GrabCut结果')
    plt.axis('off')
    
    # 自定义mask
    plt.subplot(2, 3, 5)
    plt.imshow(mask_custom, cmap='gray')
    plt.title('自定义实现Mask')
    plt.axis('off')
    
    # OpenCV mask
    plt.subplot(2, 3, 6) 
    plt.imshow(mask_opencv, cmap='gray')
    plt.title('OpenCV实现Mask')
    plt.axis('off')
    
    plt.tight_layout()
    plt.show()


def batch_grabcut(image_dir, output_dir, rects_file):
    """
    批量处理图像的GrabCut分割
    
    Args:
        image_dir: 图像目录
        output_dir: 输出目录
        rects_file: 矩形框文件(每行格式: filename x y w h)
    """
    grabcut = GrabCutOpenCV()
    
    # 读取矩形框信息
    rects = {}
    with open(rects_file, 'r') as f:
        for line in f:
            parts = line.strip().split()
            filename = parts[0]
            rect = tuple(map(int, parts[1:5]))
            rects[filename] = rect
    
    # 处理每张图像
    for filename, rect in rects.items():
        img_path = os.path.join(image_dir, filename)
        if not os.path.exists(img_path):
            continue
            
        # 读取图像
        img = cv2.imread(img_path)
        if img is None:
            continue
            
        print(f"处理图像: {filename}")
        
        # 执行GrabCut
        mask = grabcut.grabcut(img, rect)
        
        # 保存结果
        result = img.copy()
        result[mask == 0] = [0, 0, 0]  # 背景变黑
        
        output_path = os.path.join(output_dir, f"grabcut_{filename}")
        cv2.imwrite(output_path, result)
        
        # 保存mask
        mask_path = os.path.join(output_dir, f"mask_{filename}")
        cv2.imwrite(mask_path, mask * 255)


def main():
    """
    主函数
    """
    parser = argparse.ArgumentParser(description='GrabCut算法演示')
    parser.add_argument('--mode', choices=['demo', 'batch'], default='demo',
                       help='运行模式')
    parser.add_argument('--image_dir', help='图像目录(批量模式)')
    parser.add_argument('--output_dir', help='输出目录(批量模式)')
    parser.add_argument('--rects_file', help='矩形框文件(批量模式)')
    
    args = parser.parse_args()
    
    if args.mode == 'demo':
        demo_grabcut()
    elif args.mode == 'batch':
        if not all([args.image_dir, args.output_dir, args.rects_file]):
            print("批量模式需要指定 --image_dir, --output_dir, --rects_file")
            return
        batch_grabcut(args.image_dir, args.output_dir, args.rects_file)


if __name__ == "__main__":
    main()

算法缺陷：

核心算法缺陷

1. 颜色假设过强

# 问题示例：前景背景颜色相似时失效
前景：绿色苹果
背景：绿色叶子
结果：无法有效区分，分割边界模糊

根本原因：仅基于RGB颜色建模，忽略纹理、形状等信息
失效场景：迷彩服、绿草中的绿色物体、雪中的白色物体

2. 高斯混合模型局限

假设过简单：认为前景/背景颜色服从高斯分布
组件数固定：通常K=5，无法自适应调整
多模态处理差：复杂纹理和渐变色彩建模不准确

3. 图割方法限制

局部最优：容易陷入局部最优解
边界不准确：在细节边缘（如毛发、半透明区域）表现差
拓扑约束：难处理复杂拓扑结构（如网状物体）