darknet-ocr文字检测部分单独实现（darknet_ocr）

最新推荐文章于 2024-12-01 20:21:58 发布

憨憨憨憨0820

最新推荐文章于 2024-12-01 20:21:58 发布

阅读量4k

点赞数 10

分类专栏： chineseocr darknet-ocr 文章标签：深度学习 ocr

本文链接：https://blog.csdn.net/csdnqq970820/article/details/104961172

版权

chineseocr 同时被 2 个专栏收录

2 篇文章

订阅专栏

darknet-ocr

2 篇文章

订阅专栏

运行chineseocr(yolov3+crnn) 中单独检测的部分（darknet_detect)，由于cuda版本问题，遇到：
OSError: libcudart.so.9.2: cannot open shared object file: No such file or directory
所以用到darknet_ocr中单独检测的部分，源码链接：添加链接描述

该链接中包含darknet框架下文字检测text.py脚本，在dnn目录下，但是由于有额外的输出需求以及遇到的一些问题，所以做了一些修改，之后会附上完整的代码集合以供参考。

import cv2
import numpy as np
import time
from config import textPath, anchors
from helper.image import resize_img, get_origin_box, soft_max, reshape
from helper.detectors import TextDetector
from config import scale, maxScale, TEXT_LINE_SCORE
from dnn.image import rotate_cut_img, sort_box
from PIL import Image
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt


textNet   =  cv2.dnn.readNetFromDarknet(textPath.replace('weights','cfg'),textPath)

def detect_box(image, scale=600, maxScale=900):
    H, W = image.shape[:2]
    image, rate = resize_img(image, scale, maxScale=maxScale)
    h, w = image.shape[:2]
    inputBlob = cv2.dnn.blobFromImage(image, scalefactor=1.0, size=(w, h), swapRB=False, crop=False);
    outputName = textNet.getUnconnectedOutLayersNames()
    textNet.setInput(inputBlob)
    out = textNet.forward(outputName)[0]
    clsOut = reshape(out[:, :20, ...])
    boxOut = reshape(out[:, 20:, ...])
    boxes = get_origin_box((w, h), anchors, boxOut[0])
    scores = soft_max(clsOut[0])
    boxes[:, 0:4][boxes[:, 0:4] < 0] = 0
    boxes[:, 0][boxes[:, 0] >= w] = w - 1
    boxes[:, 1][boxes[:, 1] >= h] = h - 1
    boxes[:, 2][boxes[:, 2] >= w] = w - 1
    boxes[:, 3][boxes[:, 3] >= h] = h - 1
    print('scores:', scores)
    print('boxes:', boxes)
    print('rate:', rate)
    print('w:', w)
    print('h:', h)
    return scores, boxes, rate, w, h
timeTake = time.time()

def detect_lines(image, scale=600,
                 maxScale=900,
                 MAX_HORIZONTAL_GAP=30,
                 MIN_V_OVERLAPS=0.6,
                 MIN_SIZE_SIM=0.6,
                 TEXT_PROPOSALS_MIN_SCORE=0.7,
                 TEXT_PROPOSALS_NMS_THRESH=0.3,
                 TEXT_LINE_NMS_THRESH=0.9,
                 TEXT_LINE_SCORE=0.9
                 ):
    MAX_HORIZONTAL_GAP = max(16, MAX_HORIZONTAL_GAP)
    detectors = TextDetector(MAX_HORIZONTAL_GAP, MIN_V_OVERLAPS, MIN_SIZE_SIM)
    scores, boxes, rate, w, h = detect_box(image, scale, maxScale)
    size = (h, w)
    text_lines, scores = detectors.detect(boxes, scores, size, \
                                          TEXT_PROPOSALS_MIN_SCORE, TEXT_PROPOSALS_NMS_THRESH, TEXT_LINE_NMS_THRESH,
                                          TEXT_LINE_SCORE)
    if len(text_lines) > 0:
        text_lines = text_lines / rate
    print('text_lines:', text_lines)
    print('scores:', scores)
    return text_lines, scores

timeTake = time.time()-timeTake
print('It take:{}s'.format(timeTake))


def detect(img):
    image = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    boxes, scores = detect_lines(image, scale=scale, maxScale=maxScale)
    data = []
    n = len(boxes)
    for i in range(n):
        box = boxes[i]
        box = [int(x) for x in box]
        if scores[i] > TEXT_LINE_SCORE:
            data.append({'box': box, 'prob': round(float(scores[i]), 2), 'text': None})
    res = {'data': data, 'errCode': 0}
    return res

def ocr_batch(img, boxes, leftAdjustAlph=0.01, rightAdjustAlph=0.01):
    """
    batch for ocr
    """
    im = Image.fromarray(img)
    newBoxes = []
    for index, box in enumerate(boxes):
        partImg, box = rotate_cut_img(im, box, leftAdjustAlph, rightAdjustAlph)
        box['img'] = partImg.convert('L')
        newBoxes.append(box)
        cvPartImg = np.array(partImg)
        #cvImg = cv2.cvtColor(cvPartImg, cv2.COLOR_RGB2BGR)
        cvImg = cv2.cvtColor(cvPartImg, cv2.COLOR_RGB2BGR)
        #cv2.imshow('part', cvImg)
        cv2.waitKey(0)
    #return res

def drawDetectBox(img, resJson):
    for idx in range(len(resJson['data'])):
        box = resJson['data'][idx]['box']
        [x1,y1,x2,y2,x3,y3,x4,y4] = box
        p1 = (int(x1), int(y1))
        p2 = (int(x2), int(y2))
        p3 = (int(x3), int(y3))
        p4 = (int(x4), int(y4))
        cv2.line(img, p1, p2, (0, 255, 0))
        cv2.line(img, p2, p3, (0, 255, 0))
        cv2.line(img, p3, p4, (0, 255, 0))
        cv2.line(img, p4, p1, (0, 255, 0))
        #cv2.putText(img, str(text_tags[idx]), (int(p1[0]), int(p1[1])), 1, 1, (0, 0, 255))
    #cv2.imshow('detect', img)
#    cv2.waitKey(0)


    #return box



def show_img(imgs: np.ndarray, color=True):
    if (len(imgs.shape) == 3 and color) or (len(imgs.shape) == 2 and not color):
        imgs = np.expand_dims(imgs, axis=0)
    for img in imgs:
        plt.figure()
        plt.imshow(img, cmap=None if color else 'gray')

imgDir = './test/'
img = cv2.imread(imgDir + 'img.jpeg')
res = detect(img)
print(res)
boxes = []
for idx in range(len(res['data'])):
    box = res['data'][idx]['box']
    boxes.append(box)
ocr_batch(img, boxes)
drawDetectBox(img, res)
cv2.imwrite('detect7.jpg', img)
show_img(img, color = True)
plt.show()

由于之前在代码中用到cv2.imshow()语句在linux系统下运行，显示：cannot connect to X server, 需要将该语句注释掉，之后用plt.show()替换。
运行之后得到结果：
在这里插入图片描述

另外整个代码的运行都在上篇文章中同样的镜像里，所以运行text.py文件的代码是：

docker run -v /.../OCR-DARKNET/darknet-ocr:/chineseocr/darknet-ocr -w /chineseocr/darknet-ocr chineseocr:v2 python text.py

改了几个版本的text.py满足不同需求，有需要的可以一起讨论，可能大家环境不同或者是有opencv库的问题，如果有什么差异或者更新欢迎随时讨论！