录数据的正确方式

丘比特爱睡觉

已于 2022-03-20 17:59:21 修改

阅读量895

点赞数 2

文章标签： python go

于 2022-03-19 22:41:44 首次发布

本文链接：https://blog.csdn.net/weixin_45744832/article/details/123604286

版权

这篇博客介绍了如何利用Go语言和Python代码自动化处理眼科检查报告，如IOLMaster和Pentacam。通过读取PDF内容并应用正则表达式提取关键数据，将IOLMaster报告快速转换为CSV。对于包含图片的Pentacam报告，首先使用图像处理技术提取图片，然后借助OCR API识别文本，最后整理并保存到CSV文件。整个流程大大提高了数据录入效率。

摘要由CSDN通过智能技术生成

当你的手上有很多份诸如IOL Master 和Pentacam报告，你要把它们都录入Excel，你会一个字一个字地敲进去吗？我反正不会，那怎么搞？

去奴役电脑！！！

话不多说，咱来点直接的，上代码~~~

一、录入IOL Master数据：以下为Go代码，可直接编译成exe

package main
import (
    "strconv"
    "fmt"
    "io/ioutil"
    "github.com/ledongthuc/pdf"
    "encoding/csv"
    "io"
    "log"
    "os"
    "bufio"
    "strings"
    "regexp"

)

var (
  reAL =`(AL:)((-{3})|(\d{2}.\d{2}mm))`
  reACD = `(ACD:)((-{3})|(\d.\d{2}mm))`
  reLT = `(LT:)((-{3})|(\d.\d{2}mm))`
  reWTW =`(WTW:)((-{3})|(\d{2}.\dmm))`
  reAngle = `@{1,3}°`
  reK1 =`(K1:)((-{3})|(\d{2}.\d{2}D@\d{1,3}°))`  
  reΔK =`(ΔK:)((-{3})|(-\d{1,2}.\d{2}D@\d{1,3}°))` 
  reK2 =`(K2:)((-{3})|(\d{2}.\d{2}D@\d{1,3}°))` 
  reTSE = `(TSE:)((-{3})|(\d{2}.\d{2}D))`                                         
  reTK1 = `(TK1:)((-{3})|(\d{2}.\d{2}D@\d{1,3}°))`
  reΔTK = `(ΔTK:)((-{3})|(-\d{1,2}.\d{2}D@\d{1,3}°))`
  reTK2 =`(TK2:)((-{3})|(\d{2}.\d{2}D@\d{1,3}°))`
)
type Eye struct { AL,ACD,K1,K1_angle,K2,K2_angle,ΔK,ΔK_angle,WTW,LT,TSE,TK1,TK1_angle,ΔTK,ΔTK_angle,TK2,TK2_angle string
}


func ReadFile(fileName string) (res [] string) {
    file, err := os.OpenFile(fileName, os.O_RDWR, 0666)
    if err != nil {
        fmt.Println("Open file error!", err)
        return
    }
    defer file.Close()

    stat, err := file.Stat()
    if err != nil {
        panic(err)
    }

    var size = stat.Size()
    fmt.Println("file size=", size)

    buf := bufio.NewReader(file)
    for {
        line, err := buf.ReadString('\n')
        line = strings.TrimSpace(line)
        res =append(res,line)
        //fmt.Println(line)
        if err != nil {
            if err == io.EOF {
                fmt.Println("File read ok!")
                break
            } else {
                fmt.Println("Read file error!", err)
                return
            }
        }
    }
    return res
}

func ScanFiles(fileDir string) []string {
    var fileNameList []string
    files, _ := ioutil.ReadDir(fileDir) //读取目录
    for _, onefile := range files {     //遍历目录下文件
        if !onefile.IsDir() { //是文件
            fileName := onefile.Name()
            fileNameList = append(fileNameList, fileDir + fileName)
        }
    }
    return fileNameList
}


func SaveFile(Name string,ID string, OD Eye, OS Eye){
    //这样可以追加写
    nfs, err := os.OpenFile("./Data.csv", os.O_RDWR|os.O_CREATE, 0666)
    if err != nil {
        log.Fatalf("can not create file, err is %+v", err)
    }
    defer nfs.Close()
    nfs.Seek(0, io.SeekEnd)
 
    w := csv.NewWriter(nfs)
    //设置属性
    w.Comma = ','
    w.UseCRLF = true
    /*
    row := []string{"Name", "ID", "AL_OD", "AL_OS"}
    err = w.Write(row)
    if err != nil {
        log.Fatalf("can not write, err is %+v", err)
    }
    */
    w.Flush()
    var newContent [][]string
    Data :=[]string{Name,ID,OD.AL,OD.ACD,OD.K1,OD.K1_angle,OD.K2,OD.K2_angle,OD.ΔK,OD.K2_angle,OD.WTW,OD.LT,OD.TSE,OD.TK1,OD.TK1_angle,OD.ΔTK,OD.ΔTK_angle,OD.TK2,OD.TK2_angle,
    OS.AL,OS.ACD,OS.K1,OS.K1_angle,OS.K2,OS.K2_angle,OS.ΔK,OS.K2_angle,OS.WTW,OS.LT,OS.TSE,OS.TK1,OS.TK1_angle,OS.ΔTK,OS.ΔTK_angle,OS.TK2,OS.TK2_angle}

    newContent = append(newContent, Data)
    w.WriteAll(newContent)

}

func SaveFile_new(Name string, ID string, eye Eye){
    //这样可以追加写
    nfs, err := os.OpenFile("./Data_Wanted.csv", os.O_RDWR|os.O_CREATE, 0666)
    if err != nil {
        log.Fatalf("can not create file, err is %+v", err)
    }
    defer nfs.Close()
    nfs.Seek(0, io.SeekEnd)
 
    w := csv.NewWriter(nfs)
    //设置属性
    w.Comma = ','
    w.UseCRLF = true
    /*
    row := []string{"Name", "ID", "AL_OD", "AL_OS"}
    err = w.Write(row)
    if err != nil {
        log.Fatalf("can not write, err is %+v", err)
    }
    */
    w.Flush()
    var newContent [][]string
    Data :=[]string{Name,ID,eye.AL,eye.ACD,eye.K1,eye.K1_angle,eye.K2,eye.K2_angle,eye.ΔK,eye.K2_angle,eye.WTW,eye.LT}

    newContent = append(newContent, Data)
    w.WriteAll(newContent)
}

func FindIt(content string,re string) []string {
    _re := regexp.MustCompile(re)
    data := _re.FindAllString(content,-1)
    return data
}

func ReadPdfGroup(path string) (Eye, Eye, error) {
    f, r, err := pdf.Open(path)
    defer func() {
        _ = f.Close()
    }()
    if err != nil {
        panic(err)
    }
    p := r.Page(3)
    rows, _ := p.GetTextByRow()
    var content string
    var OD, OS Eye
    for _, row := range rows {
        for _, word := range row.Content {
            //fmt.Println(word.S)
            content = content + word.S
        }
                //fmt.Println(word.S)
    }
    AL :=FindIt(content,reAL)
    OD.AL, OS.AL =AL[0][3:],AL[1][3:]
    ACD :=FindIt(content,reACD)
    OD.ACD, OS.ACD = ACD[0][4:],ACD[1][4:]
    LT :=FindIt(content,reLT)
    OD.LT, OS.LT = LT[0][3:],LT[1][3:]
    WTW :=FindIt(content,reWTW)
    OD.WTW, OS.WTW= WTW[0][4:],WTW[1][4:]
    K1:=FindIt(content,reK1)
    if K1[0][3:]!="---"{
        OD.K1 = strings.Split(K1[0],"@")[0][3:]
        OD.K1_angle =strings.Split(K1[0],"@")[1]
    }else{
        OD.K1 = "---"
        OD.K1_angle ="---"
    }
    if K1[1][3:]!="---"{
        OS.K1 = strings.Split(K1[1],"@")[0][3:]
        OS.K1_angle =strings.Split(K1[1],"@")[1]
    }else{
        OS.K1 = "---"
        OS.K1_angle ="---"
    }
    ΔK :=FindIt(content,reΔK)
    if ΔK[0][4:]!="---"{
        OD.ΔK = strings.Replace(strings.Split(ΔK[0],"@")[0][4:],"-","",-1)
        //OD.ΔK_angle =strings.Split(ΔK[0],"@")[1]
    }else{
        OD.ΔK = "---"
        //OD.ΔK_angle ="---"
    }
    if ΔK[1][4:]!="---"{
        OS.ΔK = strings.Replace(strings.Split(ΔK[1],"@")[0][4:],"-","",-1)
        //OS.ΔK_angle =strings.Split(ΔK[0],"@")[1]
    }else{
        OS.ΔK = "---"
        //OS.ΔK_angle ="---"
    }
    K2 := FindIt(content,reK2)
    if K2[0][3:]!="---"{
        OD.K2 = strings.Split(K2[0],"@")[0][3:]
        OD.K2_angle =strings.Split(K2[0],"@")[1]
    }else{
        OD.K2 = "---"
        OD.K2_angle ="---"
    }
    if K2[1][3:]!="---"{
        OS.K2 = strings.Split(K2[1],"@")[0][3:]
        OS.K2_angle =strings.Split(K2[1],"@")[1]
    }else{
        OS.K2 = "---"
        OS.K2_angle ="---"
    }
    TSE :=FindIt(content,reTSE)
    OD.TSE, OS.TSE = TSE[0][4:],TSE[1][4:]
    TK1:=FindIt(content,reTK1)
    if TK1[0][4:]!="---"{
        OD.TK1 = strings.Split(TK1[0],"@")[0][4:]
        OD.TK1_angle =strings.Split(TK1[0],"@")[1]
    }else{
        OD.TK1 = "---"
        OD.TK1_angle ="---"
    }
    if TK1[1][4:]!="---"{
        OS.TK1 = strings.Split(TK1[1],"@")[0][4:]
        OS.TK1_angle =strings.Split(TK1[1],"@")[1]
    }else{
        OS.TK1 = "---"
        OS.TK1_angle ="---"
    }
    ΔTK := FindIt(content,reΔTK)
    if ΔTK[0][5:]!="---"{
        OD.ΔTK = strings.Split(ΔTK[0],"@")[0][5:]
        OD.ΔTK_angle =strings.Split(ΔTK[0],"@")[1]
    }else{
        OD.ΔTK = "---"
        OD.ΔTK_angle ="---"
    }
    if ΔTK[1][5:]!="---"{
        OS.ΔTK = strings.Split(ΔTK[1],"@")[0][5:]
        OS.ΔTK_angle =strings.Split(ΔTK[1],"@")[1]
    }else{
        OS.ΔTK = "---"
        OS.ΔTK_angle ="---"
    }
    TK2:=FindIt(content,reTK2)
    if TK2[0][4:]!="---"{
        OD.TK2 = strings.Split(TK2[0],"@")[0][4:]
        OD.TK2_angle =strings.Split(TK2[0],"@")[1]
    }else{
        OD.TK2 = "---"
        OD.TK2_angle ="---"
    }
    if TK2[1][4:]!="---"{
        OS.TK2 = strings.Split(TK2[1],"@")[0][4:]
        OS.TK2_angle =strings.Split(TK2[1],"@")[1]
    }else{
        OS.TK2 = "---"
        OS.TK2_angle ="---"
    }
    return OD, OS, nil
}

func main(){
    noData :=Eye{"","","","","","","","","","","","","","","","",""}
    OD_title := Eye{"眼轴（IOL master）","ACD（IOLmaster）","K1（IOLmaster）","K1（IOLmaster）角度","K2（IOLmaster）", "K2（IOLmaster）角度","角膜散光（IOLmaster）", "角膜散光轴位", "WTW（IOLmaster）","LT(IOLmaster)","OD_TSE","OD_TK1","OD_TK1_angle","OD_ΔTK","OD_ΔTK_angle","OD_TK2","OD_TK2_angle"}
    OS_title :=Eye{"眼轴（IOL master）","ACD（IOLmaster）","K1（IOLmaster）","K1（IOLmaster）角度","K2（IOLmaster）", "K2（IOLmaster）角度","角膜散光（IOLmaster）", "角膜散光轴位", "WTW（IOLmaster）","LT(IOLmaster)","OS_TSE","OS_TK1","OS_TK1_angle","OS_ΔTK","OS_ΔTK_angle","OS_TK2","OS_TK2_angle"}
    SaveFile("Name","ID",OD_title,OS_title)
    Names := ReadFile("./Names.txt")
    IDs := ReadFile("./IDs.txt")
    WhichEye :=ReadFile("./WhichEye.txt")
    //fmt.Println(WhichEye)
    //fmt.Println(len(Names),len(IDs))
    //count :=1
    for n1 := 0;n1<=len(Names)/8;n1++{
        //fmt.Println(n1)
        if n1 == len(Names)/8{
          left := len(Names[n1*8:])
          //fmt.Println(left)
          for i:=0;i<left;i++{
             j := n1*8+i
             if Names[j]!=""{
                           Dir:= "./Exams/"+ strconv.Itoa(n1) +"/"+ Names[j] + IDs[j]+"/IOLMaster/"
                           //fmt.Println("Near THe End" +Dir)
                           files:= ScanFiles(Dir)
                           if len(files) !=0{
                               OD, OS,err :=ReadPdfGroup(files[0])
                               if  err !=nil {
                                    panic(err)
                                    //continue
                               }
                               fmt.Println(OD, OS)
                               SaveFile(Names[j],IDs[j],OD,OS)
                               if WhichEye[j] =="右"{
                                SaveFile_new(Names[j],IDs[j],OD)
                               }else{
                                SaveFile_new(Names[j],IDs[j],OS)
                               }
                           }else{
                            SaveFile("","",noData,noData)
                            SaveFile_new("","",noData)
                           }
                       }else{
                            SaveFile("","",noData,noData)
                            SaveFile_new("","",noData)
                       }
             //count++
             //fmt.Println(count)
            }
        }else{
                for i:=0;i<8;i++{
                    j := n1*8+i
                    if Names[j]!=""{
                           Dir:= "./Exams/"+ strconv.Itoa(n1) +"/"+ Names[j] + IDs[j]+"/IOLMaster/"
                           //fmt.Println(Dir)
                           files:= ScanFiles(Dir)
                           if len(files) !=0{
                               OD, OS,err :=ReadPdfGroup(files[0])
                               if  err !=nil {
                                    panic(err)
                                    //continue
                               }
                               fmt.Println(OD, OS)
                               SaveFile(Names[j],IDs[j],OD,OS)
                               if WhichEye[j] =="右"{
                                SaveFile_new(Names[j],IDs[j],OD)
                               }else{
                                SaveFile_new(Names[j],IDs[j],OS)
                               }
                           }else{
                            SaveFile("","",noData,noData)
                            SaveFile_new("","",noData)
                           }
                    }else{
                         SaveFile("","",noData,noData)
                         SaveFile_new("","",noData)
                    }
                }
                //count++
                //fmt.Println(count)
        }
    }
}

因为IOL Master报告PDF内本身就是文本内容，读取准确，几乎不可能出错，上述代码运行速度也很快，大概几分钟就能录入将近一千份数据。

二、Pentacam怎么搞？

用上面的方法直接从PDF读取文本将会得到什么呢？什么都没有，Pentacam报告的PDF文档里面是图片，文字不能直接录入，咱只能求助第三方OCR了！

1.处理图片

在调用第三方OCR接口前，请务必将个人隐私信息从报告中去除！！！并且为了省钱，咱们最好一次调用就能获得一份报告的信息，我们先对图片进行处理！

PicManager.py代码如下

import fitz
import os
import cv2
import numpy as np
def ExtractPic(dir):
    files = os.listdir(dir)
    if "temp" not in files:
        os.mkdir(dir + '/temp')
    for file in files:
        if "pdf" in file:
            path = dir + '/' + file
            _dir = file.split('.')[0]
            temp_dir = dir +'/temp'
            if _dir not in os.listdir(temp_dir):
                os.mkdir(temp_dir+ '/' + _dir)
            pdf_document = fitz.open(path)
            for current_page in range(len(pdf_document)):
                #print(current_page)
                for image in  pdf_document.get_page_images(current_page):
                    #print(image)
                    xref = image[0]
                    pix = fitz.Pixmap(pdf_document,xref)
                    if pix.n<5:
                        pix.save(temp_dir + '/%s/%s-%s.jpg' %(_dir,current_page+1,xref)) #保存地址
                    else:
                        pix1 = fitz.Pixmap(fitz.csRGB,pix)
                        pix1.save(temp_dir + '/%s/%s-%s.jpg' %(_dir,current_page+1,xref)) #保存地址
                        pix1=None
                    pix = None

def del_temp(dir):
    for root, dirs, files in os.walk(dir, topdown=False):
        for name in files:
            os.remove(os.path.join(root, name))
        for name in dirs:
            os.rmdir(os.path.join(root, name))
    os.rmdir(dir)

def check(_dir) :
    OK = False
    dirs = os.listdir(_dir)
    for dir_ in dirs:
        _dir_ = _dir + '/' + dir_
        files = os.listdir(_dir_)
        #print(files)
        if len(files) == 6:
            print("Both eyes' data in one PDF!")
            if "1-9.jpg" not in files:
                if "1-13.jpg" not in files:
                    del_temp(_dir_)
                    print("Not Pre-OP, directory deleted")
                else:
                    print("Pre-OP")
                    OK =True
            else:
                print("Pre-OP")
                OK = True
        elif len(files) == 3:
            if "1-7.jpg" not in files:
                del_temp(_dir_)
                print("Not Pre-OP, directory deleted")
            else:
                print("Pre-OP")
                OK = True
        else:
            del_temp(_dir_)
            print("Not Pre-OP, directory deleted")
    if not OK:
        #del_temp(_dir)
        print("No data here, temp directory deleted")
    return OK

def join_image(image1, image2):
    """
    垂直合并两个opencv图像矩阵为一个图像矩阵
    :param image1:
    :param image2:
    :return:
    """

    h1, w1 = image1.shape[0:2]
    h2, w2 = image2.shape[0:2]

    if h1 > h2:
        margin_height = h1 - h2
        if margin_height % 2 == 1:
            margin_top = int(margin_height / 2)
            margin_bottom = margin_top + 1
        else:
            margin_top = margin_bottom = int((h1 - h2) / 2)
        image2 = cv2.copyMakeBorder(image2, margin_top, margin_bottom, 0, 0, cv2.BORDER_CONSTANT, value=[0, 0, 0])
    elif h2 > h1:
        margin_height = h2 - h1
        if margin_height % 2 == 1:
            margin_top = int(margin_height / 2)
            margin_bottom = margin_top + 1
        else:
            margin_top = margin_bottom = int(margin_height / 2)
        image1 = cv2.copyMakeBorder(image1, margin_top, margin_bottom, 0, 0, cv2.BORDER_CONSTANT, value=[0, 0, 0])
    return np.concatenate((image1, image2), axis=0)


def ProcessPic(dir):
    ExtractPic(dir)
    _dir = dir + "/temp"
    OK = check(_dir)
    if OK:
        dirs = os.listdir(_dir)
        print("Ready to work!")
        try:
            path0 =_dir + '/'+ dirs[-2] + "/1-6.jpg"
            path1 =_dir + '/'+ dirs[-1] + "/1-6.jpg"
            image0 = cv2.imdecode(np.fromfile(path0, dtype=np.uint8), -1)
            image1 = cv2.imdecode(np.fromfile(path1, dtype=np.uint8), -1)

            height, width, _ = image0.shape
            points = [(0, 40), (0, 0), (330, 0), (330, 40)]
            points_ = [(380, 40), (380, 0), (400, 0), (400, 40)]

            img_roi0 = image0[110:150, width - 400:]
            img_roi0 = cv2.fillPoly(img_roi0, [np.array(points)], [0, 0, 0])
            img_roi0 = cv2.fillPoly(img_roi0, [np.array(points_)], [0, 0, 0])
            img_roi0_ = image0[430:800, width - 400:]
            ####
            img_roi1 = image1[110:150, width - 400:]
            img_roi1 = cv2.fillPoly(img_roi1, [np.array(points)], [0, 0, 0])
            img_roi1 = cv2.fillPoly(img_roi1, [np.array(points_)], [0, 0, 0])
            img_roi1_ = image1[430:800, width - 400:]
            ####
            _image0 = join_image(img_roi0,img_roi0_)
            _image1 = join_image(img_roi1,img_roi1_)
            img_roi = np.vstack([_image0, _image1])
            img_roi = cv2.resize(img_roi, (0, 0), fx=2.0, fy=2.0, interpolation=cv2.INTER_NEAREST)
            #img_roi = np.hstack([img_roi0, img_roi1])

            '''
            cv2.imwrite("Processed.jpg",img_roi)
            cv2.namedWindow("roi")
            cv2.imshow("roi", img_roi)
            cv2.waitKey(0)
            '''
            return img_roi
        except:
            path0 = _dir + '/'+ dirs[-1] + "/1-10.jpg"
            path0_ = _dir + '/'+ dirs[-1] + "/1-14.jpg"
            path1 = _dir + '/'+ dirs[-1] + "/2-14.jpg"
            path1_ = _dir + '/' + dirs[-1] + "/2-10.jpg"
            try:
                image0 = cv2.imdecode(np.fromfile(path0, dtype=np.uint8), -1)
                image1 = cv2.imdecode(np.fromfile(path1, dtype=np.uint8), -1)
            except:
                image0 = cv2.imdecode(np.fromfile(path0_, dtype=np.uint8), -1)
                image1 = cv2.imdecode(np.fromfile(path1_, dtype=np.uint8), -1)
            height, width, _ = image0.shape
            points = [(0, 40), (0, 0), (330, 0), (330, 40)]
            points_ = [(380, 40), (380, 0), (400, 0), (400, 40)]

            img_roi0 = image0[110:150, width - 400:]
            img_roi0 = cv2.fillPoly(img_roi0, [np.array(points)], [0, 0, 0])
            img_roi0 = cv2.fillPoly(img_roi0, [np.array(points_)], [0, 0, 0])
            img_roi0_ = image0[430:800, width - 400:]
            ####
            img_roi1 = image1[110:150, width - 400:]
            img_roi1 = cv2.fillPoly(img_roi1, [np.array(points)], [0, 0, 0])
            img_roi1 = cv2.fillPoly(img_roi1, [np.array(points_)], [0, 0, 0])
            img_roi1_ = image1[430:800, width - 400:]
            ####
            _image0 = join_image(img_roi0, img_roi0_)
            _image1 = join_image(img_roi1, img_roi1_)
            img_roi = np.vstack([_image0, _image1])
            img_roi = cv2.resize(img_roi, (0, 0), fx=2.0, fy=2.0, interpolation=cv2.INTER_NEAREST)

            '''
            cv2.imwrite("Processed.jpg", img_roi)
            cv2.namedWindow("roi")
            cv2.imshow("roi", img_roi)
            cv2.waitKey(0)
            '''
            return img_roi

if __name__ == '__main__':
    #ExtractPic(r"./","test_bizzare.pdf")
    #check("./")
    ProcessPic(r".\Pentacam")

下图是处理后的图片示例

2.调用第三方OCR API

在处理完图片后就可以调用第三方OCR API了

OCR.py代码如下,请将下面的
AK = "bD**************vSue"
CK = "n**********************oO9"
替换为自己的
此处用的是百度的OCR API,每天有免费的两百次调用吧,购买也还算实惠。

#coding=utf-8
import re
import csv
import urllib
import json
import base64
import requests
from PicManager import *

# client_id 为官网获取的AK， client_secret 为官网获取的SK

class Pentacam (object):
	whichEye = ""
	K1 = ""
	K1_ = ""
	Axis = ""
	K2 = ""
	K2_ = ""
	Km = ""
	Km_ = ""
	Km_D = ""
	Astig = ""
	Astig_ = ""
	Astig_D = ""
	Apex_C = ""
	Apex_1 = ""
	Apex_3 = ""
	Apex_min = ""
	Apex_max = ""
	Pupil_C = ""
	Pupil_1 = ""
	Pupil_3 = ""
	Pupil_min = ""
	Pupil_max = ""
	TCA = ""
	TCSA = ""
	TCIA = ""
	ACD_Int = ""
	ACD_Ext = ""
	#A_S = ""
	Cornea = ""
	Pupil_Dia = ""
	Pachy_Apex = ""
	Pachy_Thinnest = ""
	Pachy_D = ""

class Eyes (object):
	OD = []
	OS = []

def get_token():
	# host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=【官网获取的AK】&client_secret=【官网获取的SK】'
	AK = "b*********************ue"
	CK = "n***********************oO9"
	host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id='+AK+'&client_secret='+CK
	request = urllib.request.Request(host)
	request.add_header('Content-Type', 'application/json; charset=UTF-8')
	response = urllib.request.urlopen(request)
	content = response.read()
	to_json = json.loads(content)
	token = to_json['access_token']
	return token

def cv2_base64(img):
	#image =cv2.resize(image,(0,0),fx=3,fy=3,interpolation=cv2.INTER_NEAREST)
	base64_str = cv2.imencode('.jpg',img)[1].tobytes()
	base64_str = base64.b64encode(base64_str)
	return base64_str

def recognize(image):
	token = get_token()
	#print(token)
	'''
	with open(path,'rb') as f:
		img = base64.b64encode(f.read())
	'''
	img = cv2_base64(image)
	host = 'https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic'
	headers={
	   'Content-Type':'application/x-www-form-urlencoded'
	}
	access_token = token
	host=host+'?access_token='+access_token
	data={}
	data['access_token']=access_token
	data['image'] =img
	res = requests.post(url=host,headers=headers,data=data)
	req=res.json()
	#print(req)
	Data, Data_OD, Data_OS = [], [], []
	try:
		#print("words_result_num is:"+str(req['words_result_num']))
		results = req['words_result']
		#print(results)
		for result in results:
			Data.append(result['words'])
		index_OD = Data.index('OD')
		try:
			index_OS = Data.index('OS')
		except:
			index_OS = Data.index('os')
		if index_OD <index_OS:
			Data_OD = Data[index_OD:index_OS]
			Data_OS = Data[index_OS:]
		else:
			Data_OD = Data[index_OD:]
			Data_OS = Data[index_OS:index_OD]
		#print(Data_OD)
		#print(Data_OS)

		#print(index_OD,index_OS)
		#print(Data_OD,Data_OS)

		#print(Data_OS)
	except:
		print(req['error_msg'])
	return Data_OD,Data_OS

def search(results,word):
	index = results[word]
	res = results[index+1]
	return res

def parse_results(results):
	pentacam = Pentacam()
	reData = re.compile("(((^\d)|(^-))+.*)|(.*\d.*D$)")
	Data = list(filter(reData.match,results))
	#print(len(Data))
	#print(Data)
	pentacam_list = []

	if len(Data) ==31:
		pentacam.whichEye = results[0].replace('os','OS')
		pentacam.K1 = Data[0]
		pentacam.K1_ = Data[1].split("？")[0]
		pentacam.Axis = Data[2]
		pentacam.K2 = Data[3]
		pentacam.K2_ = Data[4]
		pentacam.Km = Data[5]
		pentacam.Km_ = Data[6]
		pentacam.Km_D = Data[7]
		pentacam.Astig = Data[8]
		pentacam.Astig_ = Data[9]
		pentacam.Astig_D = Data[10]
		if ":" in pentacam.Astig_D:
			pentacam.Astig_D = pentacam.Astig_D.split(":")[1]

		pentacam.Apex_C = Data[11]
		pentacam.Apex_1 = Data[12]
		pentacam.Apex_3 = Data[13]
		pentacam.Apex_min = Data[14]
		pentacam.Apex_max = Data[15]
		pentacam.Pupil_C = Data[16]
		pentacam.Pupil_1 = Data[17]
		pentacam.Pupil_3 = Data[18]
		pentacam.Pupil_min = Data[19]
		pentacam.Pupil_max = Data[20]
		pentacam.TCA = Data[21]
		pentacam.TCSA = Data[22].replace('褕','衸')
		pentacam.TCIA = Data[23].replace('褕','衸')
		pentacam.ACD_Int = Data[24]
		pentacam.ACD_Ext = Data[25]
		pentacam.Cornea = Data[26]
		pentacam.Pupil_Dia = Data[27]
		pentacam.Pachy_Apex = Data[28][:4].replace('褕','衸')
		pentacam.Pachy_Thinnest = Data[29].replace('褕','衸')
		pentacam.Pachy_D = Data[30].replace('褕','衸')
		pentacam_list = [
				pentacam.whichEye,
				pentacam.K1,
				pentacam.K1_,
				pentacam.Axis,
				pentacam.K2,
				pentacam.K2_,
				pentacam.Km,
				pentacam.Km_,
				pentacam.Km_D,
				pentacam.Astig,
				pentacam.Astig_,
				pentacam.Astig_D,
				pentacam.Apex_C,
				pentacam.Apex_1,
				pentacam.Apex_3,
				pentacam.Apex_min,
				pentacam.Apex_max,
				pentacam.Pupil_C,
				pentacam.Pupil_1,
				pentacam.Pupil_3,
				pentacam.Pupil_min,
				pentacam.Pupil_max,
				pentacam.TCA,
				pentacam.TCSA,
				pentacam.TCIA,
				pentacam.ACD_Int,
				pentacam.ACD_Ext,
				pentacam.Cornea,
				pentacam.Pupil_Dia,
				pentacam.Pachy_Apex,
				pentacam.Pachy_Thinnest,
				pentacam.Pachy_D,
			]
	else:
		pentacam_list = [results[0].replace('os','OS')]+Data
		#print(len(Data),Data)
	return pentacam_list



def GetData(dir):
	img = ProcessPic(dir)
	Data_OD, Data_OS = recognize(img)
	#print(results_OD)
	#print(results_OS)
	eyes = Eyes()
	eyes.OD = parse_results(Data_OD)
	eyes.OS = parse_results(Data_OS)
	#print(len(eyes.OD),len(eyes.OS))
	print(eyes.OD)
	print(eyes.OS)
	del_temp(dir + "/temp")
	return eyes.OD,eyes.OS


def del_temp(dir):
	for root, dirs, files in os.walk(dir, topdown=False):
		for name in files:
			os.remove(os.path.join(root, name))
		for name in dirs:
			os.rmdir(os.path.join(root, name))
	os.rmdir(dir)


if __name__ == '__main__':
	'''
	img = ProcessPic("./Pentacam1")
	results_OD,results_OS = recognize(img)
	'''
	GetData(r".\Pentacam")

3.循环完成对全部报告的处理和识别

如下

CrackPentacam.py

import csv
import OCR
from OCR import *
headers0 = ['Name','ID',
              'OD', 'K1_SimK', 'K1_TCRP', 'Axis',
              'K2_SimK', 'K2_TCRP',
              'Km_SimK', 'Km_TCRP','Km_D',
              'Astig_SimK','Astig_TCRP','Astig_D',
              'Apex_C','Apex_1','Apex_3','Apex_min','Apex_max',
              'Pupil_C','Pupil_1','Pupil_3','Pupil_min','Pupil_max',
              'TCA','TCSA','TCIA','ACD_Int','ACD_Ext','Cornea',
              'Pupil_Dia','Pachy_Apex','Pachy_Thinnest','Pachy_D',

              'OS', 'K1_SimK', 'K1_TCRP', 'Axis',
              'K2_SimK', 'K2_TCRP',
              'Km_SimK', 'Km_TCRP','Km_D',
              'Astig_SimK','Astig_TCRP','Astig_D',
              'Apex_C','Apex_1','Apex_3','Apex_min','Apex_max',
              'Pupil_C','Pupil_1','Pupil_3','Pupil_min','Pupil_max',
              'TCA','TCSA','TCIA','ACD_Int','ACD_Ext','Cornea',
              'Pupil_Dia','Pachy_Apex','Pachy_Thinnest','Pachy_D',
              ]

def SavaData(data):
    with open('Data.csv', 'a+',newline='') as f:
        f_csv = csv.writer(f)
        #f_csv.writerow(headers0)
        #f_csv.writerows(rows)
        f_csv.writerow(data)
def SavaData_Wanted(data):
    with open('Data_Wanted.csv', 'a+',newline='') as f:
        f_csv = csv.writer(f)
        #f_csv.writerow(headers0)
        #f_csv.writerows(rows)
        f_csv.writerow(data)

def CrackPentacam(Name,ID,dir):
    OD,OS = GetData(dir)
    Data = [Name,ID] +OD +OS
    Data_OD = [Name,ID] +OD
    Data_OS = [Name,ID] +OS
    SavaData(Data)
    _dir = dir + '/temp'
    try:
        del_temp(_dir)
        #print("Temp directory deleted!")
    except:
        print("Failed to delete temp directory")
    return Data,Data_OD,Data_OS


if __name__ == '__main__':
    index = input("please input from where to start, default input is 0:")
    noData, noData_= ['']*65, ['']*65
    SavaData(headers0)
    headers0[2] = '眼别'
    SavaData_Wanted(headers0[:34])
    with open('./Names.txt', 'r',encoding='UTF-8') as f:
        Names = f.read().splitlines()
    with open("./IDs.txt",'r',encoding='UTF-8') as f_:
        IDs = f_.read().splitlines()
    with open("./WhichEye.txt",'r',encoding='UTF-8') as _f:
        WhichEye = _f.read().splitlines()
    for n in range(int(index),int(len(Names)/8)+1):
        print(n)
        m = 8
        if n == int(len(Names)/8):
            m = len(Names) - n*8
        for i in range(m):
            j=n*8+i
            if Names[j]!='':
                dir = r'../Exams/{}/{}{}/Pentacam'.format(n,Names[j],IDs[j])
                try:
                    Data, Data_OD, Data_OS = CrackPentacam(Names[j], IDs[j], dir)
                    if WhichEye[j] == '右':
                        SavaData_Wanted(Data_OD)
                    else:
                        SavaData_Wanted(Data_OS)

                except:
                    noData_[0],noData_[1] = Names[j],IDs[j]
                    SavaData(noData_)
                    SavaData_Wanted(noData_[:35])
                    continue
            else:
                SavaData(noData)
                SavaData_Wanted(noData[:35])

4.完事，睡瞌睡去！