ubuntu20.04搭建paddleOCR

pip install paddlepaddle
git clone https://github.com/PaddlePaddle/PaddleOCR
cd PaddleOCR
#查看需求文件,安装需求
pip3 install -r requirements.txt

在安装过程中,会出现gcc的问题

Building wheels for collected packages: python-Levenshtein, bce-python-sdk, future
  Building wheel for python-Levenshtein (setup.py) ... error
  ERROR: Command errored out with exit status 1:
   command: /root/anaconda3/envs/ocr/bin/python -u -c 'import io, os, sys, setuptools, tokenize; sys.argv[0] = '"'"'/tmp/pip-install-1ewi0ck6/                                                python-levenshtein_cb2b479bc8744e4fb56871534ccbfca5/setup.py'"'"'; __file__='"'"'/tmp/pip-install-1ewi0ck6/python-levenshtein_cb2b479bc8744e4f                                                b56871534ccbfca5/setup.py'"'"';f = getattr(tokenize, '"'"'open'"'"', open)(__file__) if os.path.exists(__file__) else io.StringIO('"'"'from se                                                tuptools import setup; setup()'"'"');code = f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'"'                                                "'))' bdist_wheel -d /tmp/pip-wheel-knx85gss
       cwd: /tmp/pip-install-1ewi0ck6/python-levenshtein_cb2b479bc8744e4fb56871534ccbfca5/
  Complete output (31 lines):
  running bdist_wheel
  running build
  running build_py
  creating build
  creating build/lib.linux-x86_64-3.8
  creating build/lib.linux-x86_64-3.8/Levenshtein
  copying Levenshtein/StringMatcher.py -> build/lib.linux-x86_64-3.8/Levenshtein
  copying Levenshtein/__init__.py -> build/lib.linux-x86_64-3.8/Levenshtein
  running egg_info
  writing python_Levenshtein.egg-info/PKG-INFO
  writing dependency_links to python_Levenshtein.egg-info/dependency_links.txt
  deleting python_Levenshtein.egg-info/entry_points.txt
  writing namespace_packages to python_Levenshtein.egg-info/namespace_packages.txt
  writing requirements to python_Levenshtein.egg-info/requires.txt
  writing top-level names to python_Levenshtein.egg-info/top_level.txt
  reading manifest file 'python_Levenshtein.egg-info/SOURCES.txt'
  reading manifest template 'MANIFEST.in'
  warning: no previously-included files matching '*pyc' found anywhere in distribution
  warning: no previously-included files matching '*so' found anywhere in distribution
  warning: no previously-included files matching '.project' found anywhere in distribution
  warning: no previously-included files matching '.pydevproject' found anywhere in distribution
  adding license file 'COPYING'
  writing manifest file 'python_Levenshtein.egg-info/SOURCES.txt'
  copying Levenshtein/_levenshtein.c -> build/lib.linux-x86_64-3.8/Levenshtein
  copying Levenshtein/_levenshtein.h -> build/lib.linux-x86_64-3.8/Levenshtein
  running build_ext
  building 'Levenshtein._levenshtein' extension
  creating build/temp.linux-x86_64-3.8
  creating build/temp.linux-x86_64-3.8/Levenshtein
  gcc -pthread -B /root/anaconda3/envs/ocr/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -f                                                PIC -I/root/anaconda3/envs/ocr/include/python3.8 -c Levenshtein/_levenshtein.c -o build/temp.linux-x86_64-3.8/Levenshtein/_levenshtein.o
  error: command 'gcc' failed: No such file or directory
  ----------------------------------------
  ERROR: Failed building wheel for python-Levenshtein
  Running setup.py clean for python-Levenshtein
  Building wheel for bce-python-sdk (setup.py) ... done
  Created wheel for bce-python-sdk: filename=bce_python_sdk-0.8.64-py3-none-any.whl size=202973 sha256=4c692a466b1f9b9edcb8d0d615bc81164604616                                                3889fbb3d83a15f08c2d1ecfc
  Stored in directory: /root/.cache/pip/wheels/88/12/83/e1691769d9552209d668e0db7ee723e110af3eda7e5a7a3a5c
  Building wheel for future (setup.py) ... done
  Created wheel for future: filename=future-0.18.2-py3-none-any.whl size=491070 sha256=066cfa308e6947f08415f3e40c604f7ce166266c06ab81079f50f6c                                                d2d2ebde3
  Stored in directory: /root/.cache/pip/wheels/1b/3e/31/72653079400d50aff1c3492982a6965994629072cad3b97720
Successfully built bce-python-sdk future
Failed to build python-Levenshtein
Installing collected packages: pytz, pyparsing, platformdirs, filelock, distlib, virtualenv, toml, tifffile, scipy, pyyaml, PyWavelets, python                                                -dateutil, pyflakes, pycryptodome, pycodestyle, packaging, nodeenv, networkx, mccabe, kiwisolver, imageio, identify, future, fonttools, cycler                                                , cfgv, Babel, shellcheck-py, shapely, scikit-image, pre-commit, pandas, opencv-python, matplotlib, lxml, Flask-Babel, flake8, et-xmlfile, css                                                utils, cssselect, cachetools, bce-python-sdk, visualdl, tqdm, python-Levenshtein, pyclipper, premailer, openpyxl, opencv-contrib-python, lmdb,                                                 imgaug, cython, attrdict
    Running setup.py install for python-Levenshtein ... error
    ERROR: Command errored out with exit status 1:
     command: /root/anaconda3/envs/ocr/bin/python -u -c 'import io, os, sys, setuptools, tokenize; sys.argv[0] = '"'"'/tmp/pip-install-1ewi0ck                                                6/python-levenshtein_cb2b479bc8744e4fb56871534ccbfca5/setup.py'"'"'; __file__='"'"'/tmp/pip-install-1ewi0ck6/python-levenshtein_cb2b479bc8744e                                                4fb56871534ccbfca5/setup.py'"'"';f = getattr(tokenize, '"'"'open'"'"', open)(__file__) if os.path.exists(__file__) else io.StringIO('"'"'from                                                 setuptools import setup; setup()'"'"');code = f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'                                                "'"'))' install --record /tmp/pip-record-ywrynebe/install-record.txt --single-version-externally-managed --compile --install-headers /root/ana                                                conda3/envs/ocr/include/python3.8/python-Levenshtein
         cwd: /tmp/pip-install-1ewi0ck6/python-levenshtein_cb2b479bc8744e4fb56871534ccbfca5/
    Complete output (32 lines):
    running install
    /root/anaconda3/envs/ocr/lib/python3.8/site-packages/setuptools/command/install.py:34: SetuptoolsDeprecationWarning: setup.py install is d                                                eprecated. Use build and pip and other standards-based tools.
      warnings.warn(
    running build
    running build_py
    creating build
    creating build/lib.linux-x86_64-3.8
    creating build/lib.linux-x86_64-3.8/Levenshtein
    copying Levenshtein/StringMatcher.py -> build/lib.linux-x86_64-3.8/Levenshtein
    copying Levenshtein/__init__.py -> build/lib.linux-x86_64-3.8/Levenshtein
    running egg_info
    writing python_Levenshtein.egg-info/PKG-INFO
    writing dependency_links to python_Levenshtein.egg-info/dependency_links.txt
    writing namespace_packages to python_Levenshtein.egg-info/namespace_packages.txt
    writing requirements to python_Levenshtein.egg-info/requires.txt
    writing top-level names to python_Levenshtein.egg-info/top_level.txt
    reading manifest file 'python_Levenshtein.egg-info/SOURCES.txt'
    reading manifest template 'MANIFEST.in'
    warning: no previously-included files matching '*pyc' found anywhere in distribution
    warning: no previously-included files matching '*so' found anywhere in distribution
    warning: no previously-included files matching '.project' found anywhere in distribution
    warning: no previously-included files matching '.pydevproject' found anywhere in distribution
    adding license file 'COPYING'
    writing manifest file 'python_Levenshtein.egg-info/SOURCES.txt'
    copying Levenshtein/_levenshtein.c -> build/lib.linux-x86_64-3.8/Levenshtein
    copying Levenshtein/_levenshtein.h -> build/lib.linux-x86_64-3.8/Levenshtein
    running build_ext
    building 'Levenshtein._levenshtein' extension
    creating build/temp.linux-x86_64-3.8
    creating build/temp.linux-x86_64-3.8/Levenshtein
    gcc -pthread -B /root/anaconda3/envs/ocr/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes                                                 -fPIC -I/root/anaconda3/envs/ocr/include/python3.8 -c Levenshtein/_levenshtein.c -o build/temp.linux-x86_64-3.8/Levenshtein/_levenshtein.o
    error: command 'gcc' failed: No such file or directory
    ----------------------------------------
ERROR: Command errored out with exit status 1: /root/anaconda3/envs/ocr/bin/python -u -c 'import io, os, sys, setuptools, tokenize; sys.argv[0                                                ] = '"'"'/tmp/pip-install-1ewi0ck6/python-levenshtein_cb2b479bc8744e4fb56871534ccbfca5/setup.py'"'"'; __file__='"'"'/tmp/pip-install-1ewi0ck6/                                                python-levenshtein_cb2b479bc8744e4fb56871534ccbfca5/setup.py'"'"';f = getattr(tokenize, '"'"'open'"'"', open)(__file__) if os.path.exists(__fi                                                le__) else io.StringIO('"'"'from setuptools import setup; setup()'"'"');code = f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(c                                                ompile(code, __file__, '"'"'exec'"'"'))' install --record /tmp/pip-record-ywrynebe/install-record.txt --single-version-externally-managed --co                                                mpile --install-headers /root/anaconda3/envs/ocr/include/python3.8/python-Levenshtein Check the logs for full command output.

然后安装gcc的过程中,又发现如下问题:

(ocr) root@spider:~/apps/ocr/PaddleOCR# apt-get  build-dep  gcc
Reading package lists... Done
Picking 'gcc-defaults' as source package instead of 'gcc'
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
Some packages could not be installed. This may mean that you have
requested an impossible situation or if you are using the unstable
distribution that some required packages have not yet been created
or been moved out of Incoming.
The following information may help to resolve the situation:

The following packages have unmet dependencies:
 g++ : Depends: cpp (= 4:9.3.0-1ubuntu2) but 4:11.2.0-1ubuntu1 is to be installed
 gcc : Depends: cpp (= 4:9.3.0-1ubuntu2) but 4:11.2.0-1ubuntu1 is to be installed
 libc6-dev : Depends: libc6 (= 2.31-0ubuntu9.9) but 2.35-0ubuntu3 is to be installed
             Depends: libc-dev-bin (= 2.31-0ubuntu9.9)
             Depends: libcrypt-dev but it is not going to be installed
E: Unable to correct problems, you have held broken packages.

使用lsb_release -a查看系统代号:

(base) root@spider:~/apps# lsb_release -a
No LSB modules are available.
Distributor ID: Ubuntu
Description:    Ubuntu 22.04 LTS
Release:        22.04
Codename:       jammy
(base) root@spider:~/apps# vim /etc/apt/sources.list
deb http://mirrors.aliyun.com/ubuntu/ focal main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ focal main restricted universe multiverse

deb http://mirrors.aliyun.com/ubuntu/ focal-security main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ focal-security main restricted universe multiverse

deb http://mirrors.aliyun.com/ubuntu/ focal-updates main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ focal-updates main restricted universe multiverse

deb http://mirrors.aliyun.com/ubuntu/ focal-proposed main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ focal-proposed main restricted universe multiverse

deb http://mirrors.aliyun.com/ubuntu/ focal-backports main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ focal-backports main restricted universe multiverse

参照:https://blog.csdn.net/weixin_43894075/article/details/115141599
将focal改为jammy,然后apt update & apt upgrade
然后再次安装gcc即可
然后出现各种问题:
直到把paddle版本减低到2.0.0rc1,问题就全部解决了
python -m pip install paddlepaddle==2.0.0rc1

另外,gcc版本可能有问题,需要安装低版本的gcc环境
PaddlePaddle最高支持gcc8,而Ubuntu22.04的gcc版本是11.2.0,可能存在不兼容问题 而Ubuntu20.04,gcc版本为 9.4.0, 已验证可以正常安装paddlepaddle-gpu
apt install gcc-9 g+±9
apt install gcc-11 g+±11
参见:https://blog.csdn.net/zhqh100/article/details/124410399

(ocr) root@spider:~/apps/ocr# dpkg -l | grep gcc
ii  gcc                                        4:11.2.0-1ubuntu1                       amd64        GNU C compiler
ii  gcc-10-base:amd64                          10.3.0-15ubuntu1                        amd64        GCC, the GNU Compiler Collection (base package)
ii  gcc-11                                     11.2.0-19ubuntu1                        amd64        GNU C compiler
ii  gcc-11-base:amd64                          11.2.0-19ubuntu1                        amd64        GCC, the GNU Compiler Collection (base package)
ii  gcc-12-base:amd64                          12-20220319-1ubuntu1                    amd64        GCC, the GNU Compiler Collection (base package)
ii  gcc-9                                      9.4.0-5ubuntu1                          amd64        GNU C compiler
ii  gcc-9-base:amd64                           9.4.0-5ubuntu1                          amd64        GCC, the GNU Compiler Collection (base package)
ii  libgcc-11-dev:amd64                        11.2.0-19ubuntu1                        amd64        GCC support library (development files)
ii  libgcc-9-dev:amd64                         9.4.0-5ubuntu1                          amd64        GCC support library (development files)
ii  libgcc-s1:amd64                            12-20220319-1ubuntu1                    amd64        GCC support library
ii  libuno-cppuhelpergcc3-3                    1:7.3.3-0ubuntu0.22.04.1                amd64        LibreOffice UNO runtime environment -- CPPU helper library
ii  libuno-purpenvhelpergcc3-3                 1:7.3.3-0ubuntu0.22.04.1                amd64        LibreOffice UNO runtime environment -- "purpose environment" helper
ii  libuno-salhelpergcc3-3                     1:7.3.3-0ubuntu0.22.04.1                amd64        LibreOffice UNO runtime environment -- SAL helpers for C++ library


 update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 30 --slave /usr/bin/g++ g++ /usr/bin/g++-9
 update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 20 --slave /usr/bin/g++ g++ /usr/bin/g++-11
 #然后手工切换下gcc
 (base) root@spider:~/apps/ocr# update-alternatives --config gcc
There are 2 choices for the alternative gcc (providing /usr/bin/gcc).

  Selection    Path             Priority   Status
------------------------------------------------------------
  0            /usr/bin/gcc-9    30        auto mode
* 1            /usr/bin/gcc-11   20        manual mode
  2            /usr/bin/gcc-9    30        manual mode

Press <enter> to keep the current choice[*], or type selection number: 0
update-alternatives: using /usr/bin/gcc-9 to provide /usr/bin/gcc (gcc) in auto mode
(base) root@spider:~/apps/ocr# gcc -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/9/lto-wrapper
OFFLOAD_TARGET_NAMES=nvptx-none:hsa
OFFLOAD_TARGET_DEFAULT=1
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Ubuntu 9.4.0-5ubuntu1' --with-bugurl=file:///usr/share/doc/gcc-9/README.Bugs --enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++,gm2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-9 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-9-bVKGhJ/gcc-9-9.4.0/debian/tmp-nvptx/usr,hsa --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu
Thread model: posix
gcc version 9.4.0 (Ubuntu 9.4.0-5ubuntu1)

然后启动python服务

(base) root@spider:~/apps/ocr# cat server.py
 #!/usr/bin/python
import base64
from flask import Flask,jsonify,request,abort
from ocr_utils import *
import json
import numpy as np
import logging
from logging.handlers import RotatingFileHandler
from threading import Thread
import time

app = Flask(__name__)

@app.route('/', methods=['GET', 'POST'])
def home():
    return '<h1>Home</h1>'

@app.route('/ocr', methods=['POST'])
def ocr():
    try:
        app.logger.info(request.headers)
        app.logger.info(type(request.json))
        app.logger.info(request.json)
        app.logger.info(request.json['path'])
        app.logger.info(request.json['image'])
        data=request.data
        app.logger.info(data)
    except BaseException :
        app.logger.error("发生了异常")
        return '<h3>Bad request param .</h3>'
    else:
        path=request.json['path']
        image=request.json['image']
        ocrResult=image_ocr(path,image)
        app.logger.info("ocrResult....................................................")
        app.logger.info(ocrResult)
        # 第一种
        response = parseOcrResult(ocrResult)# 将python的字典转换为json字符串
        return response,200,{"Content-Type":"application/json"}

def parseOcrResult(ocrResult):
    text = ""
    score="0"
    if len(ocrResult)>0:
      extractResult=ocrResult[0][1]
      text=extractResult[0]
      score=extractResult[1]
    # 返回json数据的方法
    data = {
      "text":text,
      "score": np.float(score)
    }
    respnse = json.dumps(data, ensure_ascii=False)
    return respnse

if __name__ == '__main__':
    app.run(host='0.0.0.0',port=30003,debug=True)

(base) root@spider:~/apps/ocr# cat ocr_utils.py
# !usr/bin/env python
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
   File Name:     g_ocr
   Description :
   Author :       yangst
   date:          2022/1/25
-------------------------------------------------
   Change Activity:
                   2022/1/25:
-------------------------------------------------
"""

from PIL import Image
from paddleocr import PaddleOCR

# 加载ocr模型
ocr = PaddleOCR(use_angle_cls=True, lang="ch", cls_thresh=0.1, det_db_box_thresh=0.1)

def image_ocr(path,imageName):
    """
    针对图片进行处理
    1. 读取待识别图片
    2. 根据待识别图片的大小判断是否粘贴到bg.png
    3. 生成待识别图片对应的新的图片
    :param image_path: 待识别图片路径
    :return:
    """
    bakImg="/root/apps/ocr/bak/"+imageName
    baseheight=500
    img = Image.open(path+"/"+imageName)  # 加载图片
    pixels = img.load()
    for y in range(img.size[1]):  # 透明转白色背景(如果是透明图片,白色字体需要另外处理)
        for x in range(img.size[0]):
            if pixels[x, y][3] < 255:
                pixels[x, y] = (255, 255, 255, 255)
    bg = Image.open("bg2.png")
    bg.paste(img,(50,30))  # 复制到背景图
    w,h = bg.size
    print('img_size:', h, w)
    hpercent = (baseheight / float(h))
    wsize = int((float(w) * float(hpercent)))
    bg = bg.resize((wsize, baseheight), Image.ANTIALIAS)
    bg.save(bakImg)
    return ocr.ocr(bakImg)

调用服务

(base) root@spider:~/apps/ocr# curl --location --request POST 'http://127.0.0.1:30003/ocr' --header 'Content-Type: application/json' --data '{
    "path": "/root/apps/ocr",
    "image": "xxxx.png"
}'
{"text": "一个文字的图片", "score": 0.9016667604446411}(base) root@spider:~/apps/ocr#

  • 0
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值