文档抽取问答系统
https://github.com/PaddlePaddle/PaddleNLP/tree/develop/pipelines/examples/document-intelligence
镜像构建
FROM nvidia/cuda:11.6.0-cudnn8-devel-ubuntu20.04
RUN echo "==> Install common tools" && apt update && \
DEBIAN_FRONTEND=noninteractive apt install -y vim git ssh wget curl libgomp1 gcc g++ \
tar mc libgomp1 libbz2-dev libssl-dev \
libncursesw5-dev libdb-dev libexpat1-dev zlib1g-dev liblzma-dev libgdbm-dev \
libffi-dev openssl libreadline6-dev libsqlite3-dev liblzma-dev \
sqlite3 ffmpeg libsm6 libxext6 && \
wget https://www.python.org/ftp/python/3.7.5/Python-3.7.5.tgz && \
tar -zxvf Python-3.7.5.tgz && \
cd Python-3.7.5 && \
./configure --prefix=/usr/local && \
make && make install && \
cd / && rm -rf compile
WORKDIR /root
COPY entrypoint.sh .
#COPY paddleocr .paddleocr
#COPY paddlenlp .paddlenlp
RUN echo "==> Install PaddlePaddle" && \
git clone https://github.com/PaddlePaddle/PaddleNLP.git && \
pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
pip3 install --upgrade pip && \
pip3 install lanms==1.0.2 && \
paddlenlp==2.4.1 && \
python3 -m pip install paddlepaddle-gpu==2.3.2.post111 -f \
https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html && \
pip3 install -r /root/PaddleNLP/requirements.txt && \
pip3 install paddle-pipelines==0.3 && \
cd /usr/local/bin/ && ln -s python3 python
CMD ["/bin/bash","entrypoint.sh"]
entrypoint.sh
#!/bin/bash
echo "Run document-intelligence service"
cd /root/PaddleNLP/pipelines
export PIPELINE_YAML_PATH=rest_api/pipeline/docprompt.yaml
export QUERY_PIPELINE_NAME=query_documents
bash examples/document-intelligence/run_docprompt_server.sh
构建镜像
docker build -t padlepadle:02 .
启动镜像
docker run -p 8890:8891 -d --runtime=nvidia padlepadle:02 .
测试
curl --request POST --url 'http://127.0.0.1:8890/query_documents' -H "Content-Type: application/json" --data '{"meta": {"doc": "https://bj.bcebos.com/paddlenlp/taskflow/document_intelligence/images/invoice.jpg", "prompt": ["发票号码是多少?", "校验码是多少?"]}}'
返回结果
{"meta":{"doc":"https://bj.bcebos.com/paddlenlp/taskflow/document_intelligence/images/invoice.jpg","prompt":["发票号码是多少?","校验码是多少?"]},"results":[[{"prompt":"发票号码是多少?","result":[{"value":"No44527206","prob":0.74,"start":2,"end":2}]}