tesseract-ocr-4.X.X安装部署
Maven安装依赖包
cd D:\jzdev2\infra\components\tesseract-ocr\lib
mvn install:install-file -Dpackaging=jar -DgeneratePom=true -DcreateChecksum=true -DgroupId=com.sun.media -DartifactId=jai_imageio -Dversion=1.1 -Dfile=jai_imageio-1.1.jar
Window下部署
-
安装chrome
-
安装jdk-8
-
创建目录:
d:/work/tesseract-ocr-4.1.1/
d:/work/tesseract-ocr-4.1.1/temp/
d:/data/log/tesseract-ocr-4.1.1/ -
编写启动脚本 start.bat
-
设置定时任务,故障自启动
Tess4j注意点
- 不同版本的tess4j使用不同的tessdata,否则会报错;
- 不同版本的tess4j适用于不同的windows版本,例如 4.4.0可以跑在win2016上,而4.1.1就不行;
Linux下部署
1. 准备Linux环境
Tesseract 4.0.beta-1
Leptonica 1.75.3 (via Lept4J) 32- and 64-bit DLLs
The Linux shared object library (libtesseract.so) equivalent to the DLL is available in Tesseract 4.0.beta-1
wget http://www.leptonica.org/source/leptonica-1.75.3.tar.gz
wget https://github.com/tesseract-ocr/tesseract/archive/4.0.0-beta.1.tar.gz
tar -zxf leptonica-1.75.3.tar.gz
./autobuild
./configure
make -j
make install
cd /usr/local/lib
[root@localhost lib]# ll
total 34408
-rw-r--r--. 1 root root 15737144 Jul 18 09:00 liblept.a
-rwxr-xr-x. 1 root root 947 Jul 18 09:00 liblept.la
lrwxrwxrwx. 1 root root 16 Jul 18 09:00 liblept.so -> liblept.so.5.0.2
lrwxrwxrwx. 1 root root 16 Jul 18 09:00 liblept.so.5 -> liblept.so.5.0.3
-rwxr-xr-x. 1 root root 9613208 Jul 18 09:00 liblept.so.5.0.2
-rwxr-xr-x. 1 root root 9872024 Jul 18 07:38 liblept.so.5.0.3
drwxr-xr-x. 2 root root 21 Jul 18 09:00 pkgconfig
tar -zxf 4.0.0-beta.1.tar.gz
cd tesseract-4.0.0-beta.1/
./autogen.sh
PKG_CONFIG_PATH=/usr/local/lib/pkgconfig LIBLEPT_HEADERSDIR=/usr/local/include ./configure --with-extra-includes=/usr/local/include --with-extra-libraries=/usr/local/lib
LDFLAGS="-L/usr/local/lib" CFLAGS="-I/usr/local/include" make -j
make install
ldconfig
[root@localhost lib]# ll
total 181208
-rw-r--r--. 1 root root 15737144 Jul 18 09:00 liblept.a
-rwxr-xr-x. 1 root root 947 Jul 18 09:00 liblept.la
lrwxrwxrwx. 1 root root 16 Jul 18 09:00 liblept.so -> liblept.so.5.0.2
lrwxrwxrwx. 1 root root 16 Jul 18 09:00 liblept.so.5 -> liblept.so.5.0.3
-rwxr-xr-x. 1 root root 9613208 Jul 18 09:00 liblept.so.5.0.2
-rwxr-xr-x. 1 root root 9872024 Jul 18 07:38 liblept.so.5.0.3
-rw-r--r--. 1 root root 111330880 Jul 18 09:26 libtesseract.a
-rwxr-xr-x. 1 root root 1040 Jul 18 09:26 libtesseract.la
lrwxrwxrwx. 1 root root 21 Jul 18 09:26 libtesseract.so -> libtesseract.so.4.0.0
lrwxrwxrwx. 1 root root 21 Jul 18 09:26 libtesseract.so.4 -> libtesseract.so.4.0.0
-rwxr-xr-x. 1 root root 38982880 Jul 18 09:26 libtesseract.so.4.0.0
drwxr-xr-x. 2 root root 41 Jul 18 09:26 pkgconfig
[root@localhost lib]#
tesseract -v
2. Docker打包 Centos7依赖环境
docker tag centos7_tesseract-ocr:4.0.0-beta.1 172.18.1.82:5000/centos7_tesseract-ocr:4.0.0-beta.1
docker push 172.18.1.82:5000/centos7_tesseract-ocr:4.0.0-beta.1
docker pull 172.18.1.82:5000/centos7_tesseract-ocr:4.0.0-beta.1
docker tag 172.18.1.82:5000/centos7_tesseract-ocr:4.0.0-beta.1 prod-registry.ys.jzdev.info:5000/centos7_tesseract-ocr:4.0.0-beta.1
docker push prod-registry.ys.jzdev.info:5000/centos7_tesseract-ocr:4.0.0-beta.1
3. Docker打包 tesseract-ocr-prod
docker build -t tesseract-ocr-prod:4.1.1 ./
docker images
docker tag tesseract-ocr-prod:4.1.1 172.18.1.82:5000/tesseract-ocr-prod:4.1.1
docker push 172.18.1.82:5000/tesseract-ocr-prod:4.1.1
docker pull 172.18.1.82:5000/tesseract-ocr-prod:4.1.1
docker tag 172.18.1.82:5000/tesseract-ocr-prod:4.1.1 prod-registry.ys.jzdev.info:5000/tesseract-ocr-prod:4.1.1
docker push prod-registry.ys.jzdev.info:5000/tesseract-ocr-prod:4.1.1
docker run -d -v /data/log/:/data/log/ -p 16080:16080 tesseract-ocr-prod:4.1.1 /bin/bash
docker ps
docker exec -it xxxxxxx /bin/bash
4. 运行Docker
[root@win2k8-051 tesseract-ocr-4.1.1]# vi target/classes/application.properties
改为test或者prod
docker build -t tesseract-ocr:4.1.1 ./
docker images
docker run -d -v /data/log/:/data/log/ -p 16080:16080 tesseract-ocr:4.1.1 /bin/bash
docker ps
docker exec -it xxxxxxx /bin/bash