Python机器视觉（4）——图像分割

之剑%

已于 2024-03-30 14:48:48 修改

阅读量160

点赞数 3

分类专栏：机器视觉文章标签：计算机视觉人工智能

于 2024-03-26 13:57:03 首次发布

本文链接：https://blog.csdn.net/qq_64474928/article/details/137044240

版权

机器视觉专栏收录该内容

4 篇文章 0 订阅

订阅专栏

基于像素的分割

阈值法

全局阈值法

import numpy as np 
import cv2 
from matplotlib import pyplot as plt 

#读取图片
img = cv2.imread('segmentation3.png')
# 灰度化
gray_img = 0.2126*img[:,:,2] + 0.7152*img[:,:,1] + 0.0722*img[:,:,0]
gray_img = gray_img.astype(np.uint8)
#初始化阈值
threshold = []
threshold.append(128)
for gray in range(0,256):
    v0 = gray_img[np.where(gray_img < threshold[-1])]
    v1 = gray_img[np.where(gray_img >= threshold[-1])]
    if len(v0) != 0:
        u0 = np.mean(v0)
    else:
        u0 = 0
    if len(v1) != 0:
        u1 = np.mean(v1)
    else:
        u1 = 0
    threshold.append((u0 + u1)/2)
    if threshold[-1] == threshold[-2]:
        break
print('threshold = ',threshold[-1])
#二值化，但用更新后的阈值threshold[-1]
gray_img[gray_img>=threshold[-1]] = 255
gray_img[gray_img<threshold[-1]] = 0
img[:,:,0] = gray_img
img[:,:,1] = gray_img
img[:,:,2] = gray_img
plt.imshow(img)
plt.show()

如果想以灰度图为掩膜，求出分隔后的图像，可以改成以下代码：

img[gray_img != 255,:] = 0
cv2.imshow('image',img)
cv2.waitKey(0)
cv2.destroyAllWindows()

局部阈值法

下面是局部的全局阈值法

import numpy as np 
import cv2 
from matplotlib import pyplot as plt 
import copy
 
#读取图片
img = cv2.imread('segmentation3.png')
# 灰度化
gray_img = 0.2126*img[:,:,2] + 0.7152*img[:,:,1] + 0.0722*img[:,:,0]
gray_img = gray_img.astype(np.uint8)

for row in range(0,gray_img.shape[0],16):
    for col in range(0,gray_img.shape[1],16):
        threshold = []
        threshold.append(128)
        for gray in range(0,256):
            v0 = gray_img[np.where(gray_img[row:row+16,col:col+16] < threshold[-1])]
            v1 = gray_img[np.where(gray_img[row:row+16,col:col+16] >= threshold[-1])]
            if len(v0) != 0:
                u0 = np.mean(v0)
            else:
                u0 = 0
            if len(v1) != 0:
                u1 = np.mean(v1)
            else:
                u1 = 0
            threshold.append((u0 + u1)/2)
            if threshold[-1] == threshold[-2]:
                break
        #print('threshold = ',threshold[-1])
        #二值化，但用更新后的阈值threshold[-1]
        gray_img_new = copy.copy(gray_img)
        gray_img_new[gray_img>=threshold[-1]] = 255
        gray_img_new[gray_img<threshold[-1]] = 0

        img[row:row+16,col:col+16,0] = gray_img_new[row:row+16,col:col+16]
        img[row:row+16,col:col+16,1] = gray_img_new[row:row+16,col:col+16]
        img[row:row+16,col:col+16,2] = gray_img_new[row:row+16,col:col+16]

plt.imshow(img)
plt.show()

下面是局部的大津算法，但运行起来太慢了

import numpy as np 
import cv2 
from matplotlib import pyplot as plt 
import copy
 
#读取图片
img = cv2.imread('segmentation3.png')
# 灰度化
gray_img = 0.2126*img[:,:,2] + 0.7152*img[:,:,1] + 0.0722*img[:,:,0]
gray_img = copy.copy(gray_img.astype(np.uint8))
ksize = 30
for row in range(0,gray_img.shape[0],ksize):
    for col in range(0,gray_img.shape[1],ksize):
        kesai = 0
        for gray in range(0,256):
            v0 = gray_img[np.where(gray_img < gray)]
            v1 = gray_img[np.where(gray_img >= gray)]
            if len(v0) != 0:
                u0 = np.mean(v0)
                w0 = len(v0)/(gray_img.shape[0]*gray_img.shape[1])
            else:
                u0 = 0
                w0 = 0
            if len(v1) != 0:
                u1 = np.mean(v1)
                w1 = len(v1)/(gray_img.shape[0]*gray_img.shape[1])
            else:
                u1 = 0
                w1 = 0
            kesai_new = w0*w1*(u0-u1)**2
            if kesai_new > kesai:
                kesai = kesai_new
                threshold = gray
        #print('threshold = ',threshold[-1])
        #二值化，但用更新后的阈值threshold[-1]
        gray_img_new = copy.copy(gray_img)
        gray_img_new[gray_img>=threshold] = 255
        gray_img_new[gray_img<threshold] = 0
 
        img[row:row+ksize,col:col+ksize,0] = gray_img_new[row:row+ksize,col:col+ksize]
        img[row:row+ksize,col:col+ksize,1] = gray_img_new[row:row+ksize,col:col+ksize]
        img[row:row+ksize,col:col+ksize,2] = gray_img_new[row:row+ksize,col:col+ksize]
    print(row)
plt.imshow(img)
plt.show()

大津算法

import numpy as np 
import cv2 
from matplotlib import pyplot as plt 

#读取图片
img = cv2.imread('segmentation3.png')
# 灰度化
gray_img = 0.2126*img[:,:,2] + 0.7152*img[:,:,1] + 0.0722*img[:,:,0]
gray_img = gray_img.astype(np.uint8)
#初始化方差
kesai = 0
for gray in range(0,256):
    v0 = gray_img[np.where(gray_img < gray)]
    v1 = gray_img[np.where(gray_img >= gray)]
    if len(v0) != 0:
        u0 = np.mean(v0)
        w0 = len(v0)/(gray_img.shape[0]*gray_img.shape[1])
    else:
        u0 = 0
        w0 = 0
    if len(v1) != 0:
        u1 = np.mean(v1)
        w1 = len(v1)/(gray_img.shape[0]*gray_img.shape[1])
    else:
        u1 = 0
        w1 = 0
    kesai_new = w0*w1*(u0-u1)**2
    if kesai_new > kesai:
        kesai = kesai_new
        threshold = gray
print('kesai = ',kesai)
print('threshold = ',threshold)
#二值化，但用更新后的阈值threshold
gray_img[gray_img>=threshold] = 255
gray_img[gray_img<threshold] = 0
img[:,:,0] = gray_img
img[:,:,1] = gray_img
img[:,:,2] = gray_img
plt.imshow(img)
plt.show()

分水岭算法

基于聚类的分割

由两个大步骤组成：图像特征提取和k聚类算法。k聚类算法的实现很简单，首先初始化簇心，计算每个点到簇心的距离，以距簇心越近，就属于该簇心，然后更新每个点的簇心值，进行下一步计算直到不能更新为止。图像特征提取，由于是单张图片，所以直接用像素值即可，将其转化为列数据。

import numpy as np
import cv2
def kMeans(data, iter, k):
    data = data.reshape(-1, 3)
    data = np.column_stack((data, np.ones(img.shape[0]*img.shape[1])))
    # 1.随机产生初始簇心
    cluster_center = data[np.random.choice(img.shape[0]*img.shape[1], k),:]
    # 2.分类
    distance = [[] for j in range(k)]
    for i in range(iter):
        #print("迭代次数：", i)
        # 2.1距离计算
        for j in range(k):
            distance[j] = np.sqrt(np.sum((data - cluster_center[j])**2, axis=1))#每一行单独左右计算
        # 2.2归类
        data[:, 3] = np.argmin(distance, axis=0)    #每一列单独上下计算
        # 3.计算新簇心
        for j in range(k):
            cluster_center[j] = np.mean(data[data[:, 3] == j], axis=0)
    return data[:, 3]

img = cv2.imread('image.png')
image_show = kMeans(img, 100, 2)
image_show = image_show.reshape(img.shape[0], img.shape[1])
cv2.imshow('image_show', image_show)
cv2.waitKey(0)
cv2.destroyAllWindows()

活动轮廓

蛇形模型

创建内部能量和外部能量，初始化轮廓形状，然后一步步迭代，计算平均误差，如果平均误差到达收敛条件，则输出。

$Ax_{t}+f_{x}(x_{t-1},y_{t-1})=-\gamma (x_{t}-x_{t-1})$

$Ay_{t}+f_{y}(x_{t-1},y_{t-1})=-\gamma (y_{t}-y_{t-1})$

说一说实现中遇到的一些困惑与解决办法：

1.自己实现的sobel和gaussian与OpenCV中的库函数输出结果不一样，经过查证，是几方面的不同：filter数组存储数据类型不一样，opencv用有符号的16位或32位记录；

2.填充方式不一样，OpenCV默认是反射填充，如果要求输入图像与滤波后输出图像大小一样，填充方式不同会导致输出图像边框数值不同，对结果影响很大，这里我sobel滤波删去了边框，而gaussian滤波希望与原图像大小保持一致，故采取与OpenCV一致的反射填充；

3.效果不好，需要不断的改gaussian核与方差大小、初始数据点、alpha与beta取值；

4.还没有实现鼠标自由圈点进行初始化。

import cv2
import numpy as np
import copy
import math
import matplotlib.pyplot as plt

def Sobel(img,direction):   #sobel算子
    core_size = 3
    # 零填充
    pad = core_size//2
    filter = img.copy().astype(np.int32)
    filter = np.pad(filter,((pad,pad),(pad,pad)),'constant',constant_values=0)
    #滤波计算
    tmp = filter.copy()
    if direction == 'x':
        core = np.array([[1,0,-1],[2,0,-2],[1,0,-1]])
    elif direction == 'y':
        core = np.array([[1,2,1],[0,0,0],[-1,-2,-1]])
    else:
        print('error: please input the right string of direction')
    for y in range(img.shape[0]):
        for x in range(img.shape[1]):
            filter[pad+y,pad+x] = np.sum(tmp[y:y+core_size,x:x+core_size]*core)
    return filter[pad+1:img.shape[0],pad+1:img.shape[1]]

def gaussian(img):          #高斯算子
    core_size = 5
    pad = core_size//2
    filter = img.copy().astype(np.int32)
    filter = np.pad(filter,((pad,pad),(pad,pad)),'reflect')
    #滤波计算
    tmp = filter.copy()
    core = np.zeros((5,5))
    kesai = 5
    for dx in range(-2,3):
        for dy in range(-2,3):
            core[dx+2,dy+2] = (1/(2*kesai*math.pi))*math.exp(-((dx**2+dy**2)/(2*kesai**2)))
    core = core/np.sum(core)
    for y in range(img.shape[0]):
        for x in range(img.shape[1]):
            filter[pad+y,pad+x] = np.sum(tmp[y:y+core_size,x:x+core_size]*core)
    return filter[pad:pad+img.shape[0],pad:pad+img.shape[1]]

def getdata(centre, radius, N):     #初始化数据
    t = np.linspace(0, 2 * np.pi, N)
    x = centre[0] + radius * np.cos(t)
    y = centre[1] + radius * np.sin(t)
    return np.array([x, y])

def mark_points(img):               #鼠标标记点位的坐标
    Points = []                               #初始化
    def on_mouse(event, x, y, flags, param):        #opencv调用函数的格式
        if event == cv2.EVENT_LBUTTONDOWN:          #如果是鼠标左键按下
            Points.append([x,y])              #添加点位坐标
    cv2.namedWindow('img', cv2.WINDOW_AUTOSIZE)
    cv2.setMouseCallback('img', on_mouse)           #调用鼠标函数
    while True:
        cv2.imshow('img', img)
        # 按 q 键退出
        if cv2.waitKey(1) & 0xFF == ord('q'):
            cv2.destroyAllWindows()
            break
    print(Points)
    return np.array(Points)                 #转为numpy数组格式，方便科学计算

def extEnergy(img):          #计算外部能量
    dx = Sobel(img,'x')
    dy = Sobel(img,'y')
    E = -(dx**2 + dy**2)
    return E

def getA(alpha, beta, n):    #初始化A矩阵
    a = 2 * alpha + 6 * beta
    b = -(alpha + 4 * beta)
    c = beta
    A = a * np.eye(n)
    A += b * np.roll(np.eye(n), 1, 0) + b * np.roll(np.eye(n), -1, 0)
    A += c * np.roll(np.eye(n), 2, 0) + c * np.roll(np.eye(n), -2, 0)
    return A

#蛇形模型
def snakeModel(img, data, alpha, beta, step, max_iter, convergence):
    x, y, errs, n = data[0].copy(), data[1].copy(), [], len(data[0])
    # 计算5对角循环矩阵A，及其逆
    A = getA(alpha,beta,n)
    inv = np.linalg.inv(A+step*np.eye(n))
    # 计算外部能量，及其梯度
    extE = extEnergy(img)
    fx = Sobel(extE,'x')
    fy = Sobel(extE,'y')
    fx, fy = fx / np.max([abs(fx)]), fy / np.max([abs(fy)])#主要是fx太大了，归一化可以避免它在下面求解时出现主导地位
    for iter in range(max_iter):
        x_pre, y_pre = x.copy(), y.copy()
        row, col = np.uint8(y), np.uint8(x)
        xn = inv @ (step * x + fx[row, col])
        yn = inv @ (step * y + fy[row, col])

        x,y = xn, yn  #更新x和y
        err = np.mean(np.abs(x_pre - x) + np.abs(y_pre - y))    #求平均误差
        errs.append(err)
        if err < convergence:
            break
    return x, y, errs

img = cv2.imread("2.png",0)
gaussian = gaussian(img)
#img = cv2.GaussianBlur(img, (5, 5), 5)
# 构造初始轮廓线
#init = mark_points(img)
#lens = len(init)
init = getdata((230, 222), 240, 200)
x, y, errs = snakeModel(gaussian,data=init,alpha=0.1,beta=1,step=0.1,max_iter=2500,convergence=0.01)

plt.figure() # 绘制轮廓图
plt.imshow(img,cmap='gray')
plt.plot(init[0], init[1], '--r', lw=1)
plt.plot(x, y, 'g', lw=1)
plt.xticks([]), plt.yticks([]), plt.axis("off")
plt.figure() # 绘制收敛趋势图
plt.plot(range(len(errs)),errs)
plt.show()

水平集

图割

形态学运算

形态学运算主要分为腐蚀和膨胀，针对二值化的图像。腐蚀为算子扫过原图像，如果算子内的值均为255，就赋值255，否则为0，这样图像就被腐蚀了（变瘦）；膨胀是算子内只要有一个值为255，就赋值255，全0则0，这样图像就被膨胀了（变胖）。以下为代码，先采用局部的全局阈值法转为二值化图像，再对图像进行腐蚀和膨胀。

import numpy as np 
import cv2 
from matplotlib import pyplot as plt 
import copy

def Eroding(img):   #sobel算子
    core_size = 3
    # 零填充
    pad = core_size//2
    filter = copy.copy(img.astype(np.uint8))
    filter = np.pad(filter,((pad,pad),(pad,pad)),'reflect')
    #滤波计算
    tmp = filter.copy()
    core = np.array([[1,1,1],[1,1,1],[1,1,1]])
    for y in range(img.shape[0]):
        for x in range(img.shape[1]):
            if np.sum(tmp[y:y+core_size,x:x+core_size]*core)/9 == 255:
                filter[pad+y,pad+x] = 255
            else:
                filter[pad+y,pad+x] = 0
    return filter[pad:pad+img.shape[0],pad:pad+img.shape[1]]

def Dilating(img):   #sobel算子
    core_size = 3
    # 零填充
    pad = core_size//2
    filter = copy.copy(img.astype(np.uint8))
    filter = np.pad(filter,((pad,pad),(pad,pad)),'reflect')
    #滤波计算
    tmp = filter.copy()
    core = np.array([[1,1,1],[1,1,1],[1,1,1]])
    for y in range(img.shape[0]):
        for x in range(img.shape[1]):
            if np.sum(tmp[y:y+core_size,x:x+core_size]*core) >= 255:
                filter[pad+y,pad+x] = 255
            else:
                filter[pad+y,pad+x] = 0
    return filter[pad:pad+img.shape[0],pad:pad+img.shape[1]]

def local_globel_threshold(img,ksize):
    gray_img = copy.copy(img.astype(np.uint8))
    for row in range(0,gray_img.shape[0],ksize):
        for col in range(0,gray_img.shape[1],ksize):
            threshold = []
            threshold.append(128)
            for gray in range(0,256):
                v0 = gray_img[np.where(gray_img[row:row+ksize,col:col+ksize] < threshold[-1])]
                v1 = gray_img[np.where(gray_img[row:row+ksize,col:col+ksize] >= threshold[-1])]
                if len(v0) != 0:
                    u0 = np.mean(v0)
                else:
                    u0 = 0
                if len(v1) != 0:
                    u1 = np.mean(v1)
                else:
                    u1 = 0
                threshold.append((u0 + u1)/2)
                if threshold[-1] == threshold[-2]:
                    break
            #print('threshold = ',threshold[-1])
            #二值化，但用更新后的阈值threshold[-1]
            gray_img_new = copy.copy(gray_img)
            gray_img_new[gray_img>=threshold[-1]] = 255
            gray_img_new[gray_img<threshold[-1]] = 0
    
            img[row:row+ksize,col:col+ksize] = gray_img_new[row:row+ksize,col:col+ksize]
    return img


img = cv2.imread('segmentation3.png',0)
img = local_globel_threshold(img,30)
img = Dilating(img)
img = Eroding(img)
cv2.imshow('img',img)
cv2.waitKey(0)
cv2.destroyAllWindows()