水下鲍鱼的标签识别与单目测量（只是介绍思路）

retard4sure

已于 2024-05-18 17:22:33 修改

阅读量1.3k

点赞数 23

文章标签：人工智能

于 2024-05-18 11:17:22 首次发布

本文链接：https://blog.csdn.net/x12345676534/article/details/139021831

版权

1、鲍鱼标签识别部分：（只是方法和效果展示）

（1）主要目的：本部分旨在通过标签图片准确获取标签的字符内容，4位字符对应一个鲍鱼，从而便于个体鲍鱼数据的记录。

（2）主要问题：问题主要集中在图像的清晰度方面，水下标签图像受到水面折射、拍摄距离、拍摄光线的各种因素影响而模糊不清，一般的字符识别模型识别错误率很高，所以需要使用大量数据集训练特定的识别模型进行识别。

（3）主要内容以及结果展示：最初尝试通过图像对比度增强，图像高斯滤波一系列图像增强方法进行水下图像增强，再对二值化图像进行字符分割，通过easyOCR对每个字符分别进行字符识别。但实际操作后发现字符间大多存在粘连，分割效果并不理想，大大影响了整个识别过程。最后决定使用yolov5对标签图片字符直接进行识别，在制作了包括所有所需字符的数据集之后，经过多次尝试，最终训练得到了一个有较高准确率的字符识别模型。

4、一些小优化：对于正反标签，采用双向输入，取置信度均值较高的一方，正确效果最佳。

如图4中的置信度均值理论上会远低于正向输入时的置信度均值。

2、鲍鱼生长数据测量部分：（只是思路和效果展示）

（1）主要目的：为了达到获取鲍鱼生长数据的方便灵活性，希望实现在任意高度下（已知养殖场水深）通过单张图片即可获得较为准确的鲍鱼生长数据。

（2）主要问题：拍摄的水下鲍鱼图片受到水面折射影响，与在空气中的偏差较大，需要进行折射校正，而校正需要获取相机内参以及视深，故校正问题转变为单目获取视深的问题。

（3）主要内容以及结果展示：最先尝试了通过特征点提取和单应性矩阵获得以进行图像校正的方法，但实际上获取的特征点对应关系不准，导致通过单应性矩阵变换后得到的校正图像效果差。故采取利用视深、相机内参、镜头到水面的距离等参数进行校正的校正方法。对于视深，则采用已知水深，通过实例分割模型获取拍摄图片中标签的像素大小进而获取视深这一方法，通过最小二乘法拟合多条不同水深情况下的标签大小与对应视深的关系曲线。对水下图像进行校正后，通过实例分割模型获得轮廓点列进而获得图像中的鲍鱼长宽数据。再由真实世界的标签长宽数据和图像中的标签长宽数据获得真实世界中的鲍鱼长宽数据。

（4）关于主要代码：（仅供参考！！！不提供权重文件，因为是具体场景下的，适用性不广）

下图是我在测量这part的一些python文件，其中需要用到相机标定代码、拟合曲线代码、折射校正代码、实例分割代码、maskrcnn模型训练代码、labelme文件转coco代码等。

主函数：

import os
import time
import glob
import json
from refraction import RDC
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import torch
from torchvision import transforms
from network_files import MaskRCNN
from backbone import resnet50_fpn_backbone
from draw_box_utils import draw_objs
import cv2
import mydetect2

def extractROI(img, rotate_rect):
    def getAffineMat(rect):
        (xc, yc), (w, h), angle = rect
        angle = angle * np.pi / 180.0
        alpha = np.cos(angle)
        beta = np.sin(angle)

        R = np.array([[alpha, beta], [-beta, alpha]]).reshape((2, 2))
        Pc = np.array((xc, yc)).reshape((2, 1))
        t = np.array((w / 2, h / 2)).reshape((2, 1))
        T = t - np.matmul(R, Pc)

        M = np.hstack((R, T))
        return M

    _, (w, h), _ = rotate_rect
    M = getAffineMat(rotate_rect)
    ROI = cv2.warpAffine(img, M, (round(w + 1), round(h + 1)))

    return ROI


class SegmentNet:

    def __init__(self, num_classes=91, box_thresh=0.5, device='cpu'):
        backbone = resnet50_fpn_backbone()
        self.model = MaskRCNN(backbone,
                              num_classes=num_classes,
                              rpn_score_thresh=box_thresh,
                              box_score_thresh=box_thresh)
        self.device = device
        self.data_transform = transforms.Compose([transforms.ToTensor()])

    def load_weights(self, weights_path):
        assert os.path.exists(weights_path), "{} file dose not exist.".format(weights_path)
        weights_dict = torch.load(weights_path, map_location='cpu')
        weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict
        self.model.load_state_dict(weights_dict)
        self.model.to(self.device)

    def predict(self, img):
        img = self.data_transform(img)
        # expand batch dimension
        img = torch.unsqueeze(img, dim=0)
        self.model.eval()  # 进入验证模式
        with torch.no_grad():
            # init
            img_height, img_width = img.shape[-2:]
            init_img = torch.zeros((1, 3, img_height, img_width), device=self.device)
            self.model(init_img)
            # t_start = time_synchronized()
            # print("start detecting......")
            predictions = self.model(img.to(self.device))[0]
            # print("finish detecting!")
            # t_end = time_synchronized()
            # print("inference+NMS time: {}".format(t_end - t_start))
            predict_boxes = predictions["boxes"].to("cpu").numpy()
            predict_classes = predictions["labels"].to("cpu").numpy()
            predict_scores = predictions["scores"].to("cpu").numpy()
            predict_mask = predictions["masks"].to("cpu").numpy()
            predict_mask = np.squeeze(predict_mask, axis=1)  # [batch, 1, h, w] -> [batch, h, w]

            if len(predict_boxes) == 0:
                print("没有检测到任何目标!")
                return

            return predict_boxes, predict_classes, predict_scores, predict_mask

    @staticmethod
    def get_mask(class_id: int = 1,
                 classes: np.ndarray = None,
                 scores: np.ndarray = None,
                 masks: np.ndarray = None,
                 box_thresh: float = 0.1,
                 mask_thresh: float = 0.5, ):
        idxs_confidence = np.greater(scores, box_thresh)
        idxs_class = (classes == class_id)
        idxs = np.bitwise_and(idxs_confidence, idxs_class)

        class_mask = None
        if masks is not None:
            masks = masks[idxs]
            masks = np.where(masks > mask_thresh, True, False)
            class_mask = np.sum(masks, axis=0, dtype=np.int32)
            class_mask = np.array(class_mask, dtype='bool')

        return class_mask


def point_line_distance(point, a, b, c):
    return abs(a*point[0] + b*point[1] + c) / np.sqrt(a**2 + b**2)


def closest_value(h):
    values = [60, 65, 70, 75, 80]
    closest = min(values, key=lambda x: abs(x - h))
    return closest


def remove_border(image):
    # 将图像转换为灰度图
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # 使用阈值处理来将图像二值化
    _, thresh = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)
    # 寻找图像的轮廓
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    # 获取图像的边界框
    x, y, w, h = cv2.boundingRect(contours[0])
    # 根据边界框裁剪图像
    cropped_image = image[y:y + h, x:x + w]
    return cropped_image

def main():
    num_classes = 1  # 不包含背景
    box_thresh = 0.5
    # 所有需要设置的参数:
    # 鲍鱼权重文件
    weights_path_abalone = "./save_weights/model_abalone.pth"
    # 矫正折射用的标签权重文件
    weights_path_label_small = "./save_weights/model_label_both_25.pth"
    # 在放大鲍鱼上用的标签权重文件
    weights_path_label_big = "./save_weights/model_25_label_big.pth"
    # 输入图像目录
    img_folder = r'./image/my_test'
    # 将非jpg文件转化为jpg文件
    for filename in os.listdir(img_folder):
        # 拼接文件路径
        filepath = os.path.join(img_folder, filename)
        # 检查文件是否为jpg格式
        if filename.lower().endswith(('.png', '.jpeg', '.gif', '.bmp')):
            # 打开图像文件
            with Image.open(filepath) as img:
                # 构造新的文件名，将原文件的扩展名替换为jpg
                new_filepath = os.path.splitext(filepath)[0] + '.jpg'
                # 将图像保存为jpg格式
                img.convert('RGB').save(new_filepath, 'JPEG')
                print(f"Converted {filename} to jpg.")
    img_path_list = glob.glob(os.path.join(img_folder, '*.jpg'))
    # 所拍摄图像的水深(单位cm)
    water_depth = 30
    # 真实标签的长宽
    label_d1 = 0.8
    label_d2 = 0.45
    # 该设备各种水深下的视深与像素面积关系拟合函数
    def function_water_depth(s,water_depth):
        depth = None
        if water_depth == 30:
            depth = 6.066e-07 * s ** 2 - 0.01589 * s + 164.1
        elif water_depth == 40:
            depth = 5.83e-08 * s ** 2 - 0.003548 * s + 97.9
        elif water_depth == 50:
            depth = s ** s + s + 1
        elif water_depth == 55:
            depth = s ** s + s + 1
        elif water_depth == 60:
            depth = s ** s + s + 1
        return depth
    #相机的内参矩阵和畸变矩阵（单目校正时new_camera_matrix与camera_matrix应相同）
    new_camera_matrix = np.array([[1.10585377e+04, 0.00000000e+00, 1.52895074e+03],
                                  [0.00000000e+00, 1.10051668e+04, 2.14093612e+03],
                                  [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]])
    dist_coeffs = np.array([[5.25431775e-01, -2.59524359e+01, 1.08860490e-02, 8.91495020e-04, 3.24353684e+02]])
    camera_matrix = np.array([[1.10585377e+04, 0.00000000e+00, 1.52895074e+03],
                              [0.00000000e+00, 1.10051668e+04, 2.14093612e+03],
                              [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]])

    # get devices
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    # create model
    model_abalone = SegmentNet(num_classes=num_classes + 1, box_thresh=box_thresh, device=device)
    model_label_small = SegmentNet(num_classes=num_classes + 1, box_thresh=box_thresh, device=device)
    model_label_big = SegmentNet(num_classes=num_classes + 1, box_thresh=box_thresh, device=device)
    # load train weights
    model_abalone.load_weights(weights_path=weights_path_abalone)
    model_label_small.load_weights(weights_path=weights_path_label_small)
    model_label_big.load_weights(weights_path=weights_path_label_big)

    for img_path in img_path_list:
        print("*----------------------------------------新图片-----------------------------------------*")
        print("this is {} !".format(img_path))

        # 加载图像
        assert os.path.exists(img_path), f"{img_path} does not exits."
        original_img_1 = Image.open(img_path).convert('RGB')
        # 这里割这个标签是为了校正折射，后面在鲍鱼掩码上还会再割一次
        result = model_label_small.predict(original_img_1)
        if result is None:
            print("检测不到标签")
            continue
        mask = SegmentNet.get_mask(classes=result[1], scores=result[2], masks=result[3])
        if mask is None:
            return
        else:
            mask = mask.astype("uint8") * 255
        contour, h = cv2.findContours(mask, mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_SIMPLE)
        # 读入图像
        original_img = cv2.imread(img_path)
        # 校正折射
        area = cv2.contourArea(contour[0])                 # 暂时先取第一个标签轮廓来校正吧，后面改
        depth = function_water_depth(area, water_depth)
        original_img = RDC.undistort(original_img, camera_matrix, dist_coeffs, new_camera_matrix, depth-water_depth, depth)
        # 消除校正后四周的黑边
        refraction_img = original_img
        original_img = remove_border(original_img)
        # 展示校正后的有黑边图像
        # cv2.namedWindow("Refraction Image", cv2.WINDOW_NORMAL)
        # cv2.imshow("Refraction Image", refraction_img)
        # cv2.waitKey(0)
        # cv2.destroyAllWindows()
        # 将 NumPy 数组转换为 PIL 图像对象
        image_PIL = Image.fromarray(original_img)
        # 然后再将其转换为 RGB 模式
        image_RGB = image_PIL.convert('RGB')
        result_abalone = model_abalone.predict(image_RGB)
        if result_abalone is None:
            print("检测不到鲍鱼")
            continue
        mask_abalone = SegmentNet.get_mask(classes=result_abalone[1], scores=result_abalone[2], masks=result_abalone[3])
        if mask_abalone is None:
            return
        else:
            mask_abalone = mask_abalone.astype("uint8") * 255
        # 获得鲍鱼轮廓
        contour_abalone, h_abalone = cv2.findContours(mask_abalone, mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_SIMPLE)
        # 画出轮廓图像
        cv2.drawContours(original_img, contour_abalone, -1, (0, 255, 0), 2)
        # 求输入图像的长宽
        image_height, image_width, _ = original_img.shape
        # 遍历所有轮廓，这是第i次
        i = 1
        for c in contour_abalone:
            close_edge = 0
            # 先舍去所有靠近图片边缘的轮廓（避免轮廓残缺，阈值设为50，可根据实际需要更改，实验室条件下我暂定为50）
            for contour_point in c:
                x, y = contour_point[0]
                if x < 30 or x > (image_width - 30) or y < 30 or y > (image_height - 30):
                    close_edge = 1
                    break
            if close_edge == 1:
                continue
            print(f"--------------------这是第{i}个鲍鱼--------------------------")
            # 获取bounding外接矩形
            x, y, w, h = cv2.boundingRect(c)
            # 截取图像块
            abalone_image = original_img[y - 30:y + h + 30, x - 30:x + w + 30]
            abalone_image_height = h + 60
            abalone_image_width = w + 60
            # 在鲍鱼图像块上切割标签:
            # 将 NumPy 数组转换为 PIL 图像对象
            abalone_image_PIL = Image.fromarray(abalone_image)
            # 然后再将其转换为 RGB 模式
            abalone_image_RGB = abalone_image_PIL.convert('RGB')
            result_label = model_label_big.predict(abalone_image_RGB)
            if result_label is None:
                print("检测不到标签")
                continue
            mask_label = SegmentNet.get_mask(classes=result_label[1], scores=result_label[2], masks=result_label[3])
            if mask_label is None:
                return
            else:
                mask_label = mask_label.astype("uint8") * 255
            contour_label, h_label = cv2.findContours(mask_label, mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_SIMPLE)
            # 画出轮廓图像
            cv2.drawContours(abalone_image, contour_label, -1, (0, 255, 0), 2)
            # 遍历所有标签j次
            j = 0
            Overlapping_abalone_situation = 0
            # 求轮廓面积并显示
            for k in contour_label:
                close_edge2 = 0
                # 先舍去所有靠近图片边缘的轮廓（避免轮廓残缺，阈值设为20，可根据实际需要更改，实验室条件下我暂定为20）
                for contour_point2 in k:
                    x, y = contour_point2[0]
                    if x < 20 or x > (abalone_image_height - 20) or y < 20 or y > (abalone_image_width - 20):
                        close_edge2 = 1
                        break
                if close_edge2 == 1:
                    continue
                else:
                    area_label = cv2.contourArea(k)
                    j = j + 1
                if j == 3:
                    print("警告:检测到1个鲍鱼上有2个以上标签，可能出现多个鲍鱼堆叠情况，所以舍弃这个鲍鱼轮廓")
                    Overlapping_abalone_situation = 1
                    break
                print(f"标签轮廓的面积是:{area_label}")
                # 获取bounding外接矩形
                x1, y1, w1, h1 = cv2.boundingRect(k)
                # 截取图像块
                label_image = abalone_image[y1 - 10:y1 + h1 + 10, x1 - 10:x1 + w1 + 10]
                # 将 NumPy 数组转换为 Pillow 图像对象
                image_to_save = Image.fromarray(label_image)
                # 保存图像
                image_to_save.save("./temporary_label/label_image.jpg")
                # 标签识别
                mydetect2.run()
                # 删除图像
                os.remove("./temporary_label/label_image.jpg")
                # 计算轮廓的最小外接矩形
                rect_label = cv2.minAreaRect(k)
                # 绘制最小外接矩形
                box = cv2.boxPoints(rect_label)
                box = np.int0(box)
                cv2.drawContours(abalone_image, [box], 0, (0, 255, 0), 2)
                # 获取长和宽
                if j == 1:
                    label_length, label_width = rect_label[1]
                    if label_length < label_width:
                        label_length, label_width = label_width, label_length  # 确保length是长边
                elif j == 2:
                    label_length1, label_width1 = rect_label[1]
                    if label_length1 < label_width1:
                        label_length1, label_width1 = label_width1, label_length1  # 确保length是长边
                        label_length = (label_length + label_length1)/2
                        label_width = (label_width + label_width1)/2
                box_center = tuple(np.int0(rect_label[0]))  # 矩形中心点坐标，用于放置文本
                # 如果长宽比大于1.78，则说明宽过小，使其等于长/1.78;反之，则说明长过小，使其等于宽*1.78
                # if label_length/label_width > 1.78:
                #     label_width = label_length/1.78
                # else:
                #     label_length = label_width * 1.78
                print(f"标签的长是{label_length}")
                print(f"标签的宽是{label_width}")
                # cv2.namedWindow("Abalone Image", cv2.WINDOW_NORMAL)
                # cv2.imshow("Abalone Image", abalone_image)
                # cv2.waitKey(0)
                # cv2.destroyAllWindows()
            # 是否出现多个鲍鱼堆叠情况
            if Overlapping_abalone_situation == 1:
                continue
            # 求轮廓面积并显示
            area_abalone = cv2.contourArea(c)
            print(f"鲍鱼轮廓的面积是:{area_abalone}")
            cv2.putText(original_img, f"{area_abalone:.2f}", tuple(c[0][0]), cv2.FONT_HERSHEY_SIMPLEX, 4, (255, 255, 255), 4)
            # 近似轮廓
            epsilon = 0.01 * cv2.arcLength(c, True)
            approx = cv2.approxPolyDP(c, epsilon, True)
            # 计算近似轮廓的最小包络圆
            (x, y), radius = cv2.minEnclosingCircle(approx)
            center = (int(x), int(y))
            radius = int(radius)
            # 获取轮廓与最小包络圆的交点
            intersection_points = []
            for point in approx:
                distance = np.sqrt((point[0][0] - x) ** 2 + (point[0][1] - y) ** 2)
                if abs(distance - radius) < 1:  # 距离小于1为交点
                    intersection_points.append((point[0][0], point[0][1]))
            # 在图像上绘制交点
            for point in intersection_points:
                cv2.circle(original_img, point, 5, (0, 255, 0), -1)
            # 连接交点
            if len(intersection_points) == 2:
                cv2.line(original_img, intersection_points[0], intersection_points[1], (0, 255, 0), 2)
            # 求两交点距离
            abalone_length = pow(((intersection_points[0][0]-intersection_points[1][0])**2+(intersection_points[0][1]-intersection_points[1][1])**2),0.5)
            print(f"鲍鱼的长是:{abalone_length}")
            # 绘制最小包络圆
            cv2.circle(original_img, center, radius, (255, 0, 0), 2)
            # 计算直线方程
            p1, p2 = intersection_points[0], intersection_points[1]
            A = p2[1] - p1[1]
            B = p1[0] - p2[0]
            C = p2[0] * p1[1] - p1[0] * p2[1]
            # 分类距离
            d1, d2 = [], []
            for pt in c:
                x, y = pt[0][0], pt[0][1]
                pt = (x, y)
                distance = point_line_distance(pt, A, B, C)
                if A * pt[0] + B * pt[1] + C > 0:
                    d1.append(distance)
                else:
                    d2.append(distance)
            # 获取最长距离
            max_distance_d1 = max(d1)
            max_distance_d2 = max(d2)
            abalone_width = max_distance_d1+max_distance_d2
            print(f"鲍鱼的宽是:{abalone_width}")
            # 计算真实的鲍鱼数据
            print("计算真实鲍鱼的数据:")
            print(f"长是{label_d1*abalone_length/label_length+0.3}")
            print(f"宽是{label_d2*abalone_width/label_width+0.3}")
            # 检测次数+1
            i = i + 1
        # 显示结果图像
        cv2.namedWindow("Original Image", cv2.WINDOW_NORMAL)
        cv2.imshow("Original Image", original_img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()


if __name__ == '__main__':
    main()

大体思路就是先割标签校正折射，再切割鲍鱼，再在鲍鱼上切标签，算长宽。由于养殖场的鲍鱼上贴2个标签，所以将一个鲍鱼上允许出现的标签数量设为2,超过则会放弃这个鲍鱼。

（5）效果展示：