PDFCrackers Plus版


2/04/2022 11:34


1. CrackMaster_without_units.go

1.1 文件夹目录

  • CrackMaster_without_units.go
  • Master.xlsx 为输入,示例如下:
NameIDWhichEye
xxxxxxx

  Plus版采用多线程解析,速度超乎想象。输出结果为两个工作表,Data_Wanted, Data_All,均为时间倒序排列的IOLMaster检查的各项生物学参数,示例如下:

NameIDWhichEye检查日期眼轴TK2_angle检查日期眼轴TK2_angle
xxxxxxx2022030822.882021040722.88
  若想获取按时间正序排列或者只要最新一次的结果,可将注释掉的代码取消注释

1.2 源码

package main
import (
    "fmt"
    "io/ioutil"
    "os"
    "strconv"
    "strings"
    "regexp"
    "sync"
    "github.com/360EntSecGroup-Skylar/excelize"
    "github.com/ledongthuc/pdf"

)

var (
  reAL =`(AL:)((-{3})|(\d{2}.\d{2}mm))`
  reACD = `(ACD:)((-{3})|(\d.\d{2}mm))`
  reLT = `(LT:)((-{3})|(\d.\d{2}mm))`
  reWTW =`(WTW:)((-{3})|(\d{2}.\dmm))`
  reAngle = `@{1,3}°`
  reK1 =`(K1:)((-{3})|(\d{2}.\d{2}D@\d{1,3}°))`  
  reΔK =`(ΔK:)((-{3})|(-\d{1,2}.\d{2}D@\d{1,3}°))` 
  reK2 =`(K2:)((-{3})|(\d{2}.\d{2}D@\d{1,3}°))` 
  reTSE = `(TSE:)((-{3})|(\d{2}.\d{2}D))`                                         
  reTK1 = `(TK1:)((-{3})|(\d{2}.\d{2}D@\d{1,3}°))`
  reΔTK = `(ΔTK:)((-{3})|(-\d{1,2}.\d{2}D@\d{1,3}°))`
  reTK2 =`(TK2:)((-{3})|(\d{2}.\d{2}D@\d{1,3}°))`

  chanInfoMap         chan map[string]string
  waitGroup           sync.WaitGroup
  lock                sync.Mutex
)

type Eye struct {
    //AL,ACD,LT,WTW,K1,K1_angle,ΔK,K2,K2_angle,TSE,TK1,TK1_angle,ΔTK,ΔTK_angle,TK2,TK2_angle string
    date, whichEye,AL,ACD,K1,K1_angle,K2,K2_angle,ΔK,ΔK_angle,WTW,LT,TSE,TK1,TK1_angle,ΔTK,ΔTK_angle,TK2,TK2_angle string
}


func PathExists(path string) (bool, error) {
    _, err := os.Stat(path)
    if err == nil {
        return true, nil
    }
    if os.IsNotExist(err) {
        return false, nil
    }
    return false, err
}

func ScanFiles(fileDir string) []string {
    exist, err := PathExists(fileDir)
    if err != nil {
        fmt.Printf("get dir error![%v]\n", err)
    }
    var fileNameList []string
    if exist{
        files, _ := ioutil.ReadDir(fileDir) //读取目录
        for _, onefile := range files {     //遍历目录下文件
            if !onefile.IsDir() { //是文件
                fileName := onefile.Name()
                fileNameList = append(fileNameList, fileDir + fileName)
            }
        }
    }
    return fileNameList
}

func FindIt(content string,re string) []string {
    _re := regexp.MustCompile(re)
    data := _re.FindAllString(content,-1)
    return data
}

func ParsePDF(path string) (Eye, Eye, error) {
    f, r, err := pdf.Open(path)
    defer func() {
        _ = f.Close()
    }()
    if err != nil {
        panic(err)
    }
    p := r.Page(3)
    rows, _ := p.GetTextByRow()
    var content string
    var OD, OS Eye
    OD.whichEye = "OD"
    OS.whichEye = "OS"
    for _, row := range rows {
        for _, word := range row.Content {
            //fmt.Println(word.S)
            content = content + word.S
        }
                //fmt.Println(word.S)
    }
    AL :=FindIt(content,reAL)
        if len(AL) !=0{
        if AL[0][3:]!="---"{
            OD.AL = AL[0][3:8]
        }else{
            OD.AL = AL[0][3:]
        }
        if AL[1][3:]!="---"{
            OS.AL = AL[1][3:8]
        }else{
            OS.AL = AL[1][3:]
        }  
    }

    ACD :=FindIt(content,reACD)
    if len(ACD)!=0{
        if ACD[0][4:]!="---"{
            OD.ACD = ACD[0][4:8]
        }else{
            OD.ACD = ACD[0][4:]
        }
        if ACD[1][4:]!="---"{
            OS.ACD = ACD[1][4:8]
        }else{
            OS.ACD = ACD[1][4:]
        }        
    }

    LT :=FindIt(content,reLT)
    if len(LT)!=0{
        if LT[0][3:]!="---"{
            OD.LT = LT[0][3:7]
        }else{
            OD.LT = LT[0][3:]
        }
        if LT[1][3:]!="---"{
            OS.LT = LT[1][3:7]
        }else{
            OS.LT = LT[1][3:]
        }
    }

    WTW :=FindIt(content,reWTW)
    if len(WTW)!=0{
        if WTW[0][4:]!="---"{
            OD.WTW = WTW[0][4:8]
        }else{
            OD.WTW = WTW[0][4:]
        }
        if WTW[1][4:]!="---"{
            OS.WTW = WTW[1][4:8]
        }else{
            OS.WTW = WTW[1][4:]
        }       
    }

    K1:=FindIt(content,reK1)
    if len(K1)!=0{
        if K1[0][3:]!="---"{
            OD.K1 = strings.Split(K1[0],"@")[0][3:8]
            OD.K1_angle =strings.Split(K1[0],"@")[1]
            OD.K1_angle = OD.K1_angle[:len(OD.K1_angle)-2]
        }else{
            OD.K1 = "---"
            OD.K1_angle ="---"
        }
        if K1[1][3:]!="---"{
            OS.K1 = strings.Split(K1[1],"@")[0][3:8]
            OS.K1_angle =strings.Split(K1[1],"@")[1]
            OS.K1_angle = OS.K1_angle[:len(OS.K1_angle)-2]
        }else{
            OS.K1 = "---"
            OS.K1_angle ="---"
        }      
    }

    ΔK :=FindIt(content,reΔK)
        if len(ΔK)!=0{
        if ΔK[0][4:]!="---"{
            OD.ΔK = strings.Replace(strings.Split(ΔK[0],"@")[0][4:9],"-","",-1)
            //OD.ΔK_angle =strings.Split(ΔK[0],"@")[1]
        }else{
            OD.ΔK = "---"
            //OD.ΔK_angle ="---"
        }
        if ΔK[1][4:]!="---"{
            OS.ΔK = strings.Replace(strings.Split(ΔK[1],"@")[0][4:9],"-","",-1)
            //OS.ΔK_angle =strings.Split(ΔK[0],"@")[1]
        }else{
            OS.ΔK = "---"
            //OS.ΔK_angle ="---"
        }
    }

    K2 := FindIt(content,reK2)
    if len(K2)!=0{
        if K2[0][3:]!="---"{
            OD.K2 = strings.Split(K2[0],"@")[0][3:8]
            OD.K2_angle =strings.Split(K2[0],"@")[1]
            OD.K2_angle = OD.K2_angle[:len(OD.K2_angle)-2]
        }else{
            OD.K2 = "---"
            OD.K2_angle ="---"
        }
        if K2[1][3:]!="---"{
            OS.K2 = strings.Split(K2[1],"@")[0][3:8]
            OS.K2_angle =strings.Split(K2[1],"@")[1]
            OS.K2_angle = OS.K2_angle[:len(OS.K2_angle)-2]
        }else{
            OS.K2 = "---"
            OS.K2_angle ="---"
        }    
    }

    TSE :=FindIt(content,reTSE)
    if len(TSE)!=0{
        if TSE[0][4:]!="---"{
            OD.TSE = TSE[0][4:9]
        }else{
            OD.TSE = TSE[0][4:]
        }
        if TSE[1][4:]!="---"{
            OS.TSE = TSE[1][4:9]
        }else{
            OS.TSE = TSE[1][4:]
        }
    }

    TK1:=FindIt(content,reTK1)
    if len(TK1)!=0{
        if TK1[0][4:]!="---"{
            OD.TK1 = strings.Split(TK1[0],"@")[0][4:9]
            OD.TK1_angle =strings.Split(TK1[0],"@")[1]
            OD.TK1_angle = OD.TK1_angle[:len(OD.TK1_angle)-2]
        }else{
            OD.TK1 = "---"
            OD.TK1_angle ="---"
        }
        if TK1[1][4:]!="---"{
            OS.TK1 = strings.Split(TK1[1],"@")[0][4:9]
            OS.TK1_angle =strings.Split(TK1[1],"@")[1]
            OS.TK1_angle = OS.TK1_angle[:len(OS.TK1_angle)-2]
        }else{
            OS.TK1 = "---"
            OS.TK1_angle ="---"
        }     
    }

    ΔTK := FindIt(content,reΔTK)
    if len(ΔTK)!=0{
        if ΔTK[0][5:]!="---"{
            OD.ΔTK = strings.Split(ΔTK[0],"@")[0][5:10]
            OD.ΔTK_angle =strings.Split(ΔTK[0],"@")[1]
            OD.ΔTK_angle = OD.ΔTK_angle[:len(OD.ΔTK_angle)-2]
        }else{
            OD.ΔTK = "---"
            OD.ΔTK_angle ="---"
        }
        if ΔTK[1][5:]!="---"{
            OS.ΔTK = strings.Split(ΔTK[1],"@")[0][5:10]
            OS.ΔTK_angle =strings.Split(ΔTK[1],"@")[1]
            OS.ΔTK_angle = OS.ΔTK_angle[:len(OS.ΔTK_angle)-2]
        }else{
            OS.ΔTK = "---"
            OS.ΔTK_angle ="---"
        }
    }

    TK2:=FindIt(content,reTK2)
        if len(TK2)!=0{
        if TK2[0][4:]!="---"{
            OD.TK2 = strings.Split(TK2[0],"@")[0][4:9]
            OD.TK2_angle =strings.Split(TK2[0],"@")[1]
            OD.TK2_angle = OD.TK2_angle[:len(OD.TK2_angle)-2]
        }else{
            OD.TK2 = "---"
            OD.TK2_angle ="---"
        }
        if TK2[1][4:]!="---"{
            OS.TK2 = strings.Split(TK2[1],"@")[0][4:9]
            OS.TK2_angle =strings.Split(TK2[1],"@")[1]
            OS.TK2_angle = OS.TK2_angle[:len(OS.TK2_angle)-2]
        }else{
            OS.TK2 = "---"
            OS.TK2_angle ="---"
        }
    }
    return OD, OS,nil
}

func NumCov(Num int)  string{
    var(
        Str string = ""
        k int
        temp []int
    )
    Slice := []string{"","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O",
        "P","Q","R","S","T","U","V","W","X","Y","Z"}
    Num = Num-64
    if Num >26 {  
        for {
            k = Num % 26  
            if k == 0 {
                temp = append(temp, 26)
                k = 26
            } else {
                temp = append(temp, k)
            }
            Num = (Num - k) / 26 
            if Num <= 26{   
                temp = append(temp, Num)
                break
            }
        }
    }else{
        return Slice[Num]
    }
    
    for _,value := range temp{
        Str = Slice[value] + Str //因为数据切分后存储顺序是反的,所以Str要放在后面
    }
    return Str
}
 

func WriteXlsx(f *excelize.File,num string,eye []string, OU []string, numFile int){
    for i:=0;i<11;i++{
        Col_ := 'D' + i + 11*numFile 
        Col := NumCov(int(Col_))
        //fmt.Printf("Writing Sheet Data_Wanted:%s\n",Col+num)
        f.SetCellValue("Data_Wanted", Col+num,eye[i])
        /*
        content,_:= f.GetCellValue("Data_Wanted", string(Col)+num)
        if content ==""{
            //fmt.Printf("Writing:%s",OU[i])
            f.SetCellValue("Data_Wanted", string(Col)+num,eye[i])
        }//else{fmt.Println(content)}
        */
    }
    for i:=0;i<37;i++{
        Col_ := 'C' + i+1 + 37*numFile 
        Col := NumCov(int(Col_))
        //fmt.Printf("Writing Sheet Data_All:%s\n",Col+num)
        f.SetCellValue("Data_All", Col+num,OU[i])
        /*
        content,_:= f.GetCellValue("Data_All", string(Col)+num)

        if content ==""{
            //fmt.Printf("Writing:%s",OU[i])
            f.SetCellValue("Data_All", string(Col)+num,OU[i])
        }//else{fmt.Println(content)}
        */
    }
    f.Save()
}
func SaveResult(num string, WichEye string, OD Eye, OS Eye,numFile int){
    f, err := excelize.OpenFile("Master.xlsx")
    if err != nil {
        fmt.Println(err)
        return
    }
    var eye Eye
    switch WichEye {
    case "OD","右":
        eye = OD
    case "OS","左":
        eye = OS
    default:
        return
    }
    eye_data := []string{eye.date,eye.AL,eye.ACD,eye.K1,eye.K1_angle,eye.K2,eye.K2_angle,eye.ΔK,eye.K2_angle,eye.WTW,eye.LT}
    OU_data := []string{OD.date,OD.whichEye,OD.AL,OD.ACD,OD.K1,OD.K1_angle,OD.K2,OD.K2_angle,OD.ΔK,OD.K2_angle,OD.WTW,OD.LT,OD.TSE,OD.TK1,OD.TK1_angle,OD.ΔTK,OD.ΔTK_angle,OD.TK2,OD.TK2_angle,
                                OS.whichEye,OS.AL,OS.ACD,OS.K1,OS.K1_angle,OS.K2,OS.K2_angle,OS.ΔK,OS.K2_angle,OS.WTW,OS.LT,OS.TSE,OS.TK1,OS.TK1_angle,OS.ΔTK,OS.ΔTK_angle,OS.TK2,OS.TK2_angle}
    WriteXlsx(f,num,eye_data,OU_data,numFile)
}

func SaveResults(InfoMap map[string]string) {
    Dir:= "./Exams/"+ InfoMap["mark"]+"/IOLMaster/"
    files:= ScanFiles(Dir)
    eye_title := []string{"检查日期","眼轴(IOL master)","ACD(IOLmaster)","K1(IOLmaster)","K1(IOLmaster)角度","K2(IOLmaster)", "K2(IOLmaster)角度","角膜散光(IOLmaster)", "角膜散光轴位", "WTW(IOLmaster)","LT(IOLmaster)","TSE","TK1","TK1_angle","ΔTK","ΔTK_angle","TK2","TK2_angle"}
    OU_title := []string{"检查日期","眼别","眼轴(IOL master)","ACD(IOLmaster)","K1(IOLmaster)","K1(IOLmaster)角度","K2(IOLmaster)", "K2(IOLmaster)角度","角膜散光(IOLmaster)", "角膜散光轴位", "WTW(IOLmaster)","LT(IOLmaster)","TSE","TK1","TK1_angle","ΔTK","ΔTK_angle","TK2","TK2_angle",
    
                                  "眼别","眼轴(IOL master)","ACD(IOLmaster)","K1(IOLmaster)","K1(IOLmaster)角度","K2(IOLmaster)", "K2(IOLmaster)角度","角膜散光(IOLmaster)", "角膜散光轴位", "WTW(IOLmaster)","LT(IOLmaster)","TSE","TK1","TK1_angle","ΔTK","ΔTK_angle","TK2","TK2_angle"}
    //If you want to arrange data of exams  in time order, use code as follows:
    /* 
    if l:=len(files); l !=0{
        for i:=0;i<l;i++{
            OD, OS, _ := ParsePDF(files[i])
            _date := strings.Split(files[i],"/")
            date:= _date[len(_date)-1][:8]
            OD.date, OS.date  = date,date
            lock.Lock()
            fmt.Println(InfoMap["mark"])
            fmt.Println(OD, OS)
            f, _ := excelize.OpenFile("Master.xlsx")
            WriteXlsx(f,"1",eye_title,OU_title,i)
            SaveResult(InfoMap["num"],InfoMap["WhichEye"],OD, OS,i)
            lock.Unlock()      
        } 
    }
    */
    //Data arranged in reversed time order
    if l:=len(files); l !=0{
        for i:=l-1;i>=0;i--{
            OD, OS, _ := ParsePDF(files[i])
            _date := strings.Split(files[i],"/")
            date:= _date[len(_date)-1][:8]
            OD.date, OS.date  = date,date
            lock.Lock()
            fmt.Println(InfoMap["mark"])
            fmt.Println(OD, OS)
            f, _ := excelize.OpenFile("Master.xlsx")
            WriteXlsx(f,"1",eye_title,OU_title,l-1-i)
            SaveResult(InfoMap["num"],InfoMap["WhichEye"],OD, OS,l-1-i)
            lock.Unlock()      
        } 
    }
    //If you want to get only data of the latest exam, use code as follows: 
    /*
    if l:=len(files); l !=0{
        OD, OS, _ := ParsePDF(files[l-1])
        _date := strings.Split(files[l-1],"/")
        date:= _date[len(_date)-1][:8]
        OD.date, OS.date  = date,date
        lock.Lock()
        fmt.Println(InfoMap["mark"])
        fmt.Println(OD, OS)
        f, _ := excelize.OpenFile("Master.xlsx")
        WriteXlsx(f,"1",eye_title,OU_title,0)
        SaveResult(InfoMap["num"],InfoMap["WhichEye"],OD, OS,0)
        lock.Unlock()      
    }
    */
    

}



func formatID(ID string) (string){
        number0 := 12 - len(ID)
        for i := 0; i < number0; i++{
            ID = "0"+ID
        }
        return ID;
}

func main(){
    chanInfoMap =  make(chan map[string]string, 200)
    /*
    index := f.NewSheet("Data_All")
    err = f.CopySheet(1, index)
    if err != nil {
        fmt.Println(err)
        return
    }
    */
    f, err := excelize.OpenFile("Master.xlsx")
    if err != nil {
        fmt.Println(err)
        return
    }
    Sheetname := f.GetSheetName(0)
    if Sheetname != "Data_Wanted"{
        f.SetSheetName(Sheetname,"Data_Wanted")
    }
    index := f.NewSheet("Data_All")
    /*
    index_ :=f.GetActiveSheetIndex()
    fmt.Println(index_,index,name)
    */
    f.CopySheet(0, index)
    f.Save()
    waitGroup.Add(1)
    go func(){
        defer waitGroup.Done()
        rows, _:= f.GetRows("Data_Wanted")
        //fmt.Println(rows)
        for i, row := range rows {
                if i == 0 {
                    continue
                }else if len(row)!=0{
                    InfoMap := map[string]string{
                                "num":strconv.Itoa(i+1),
                                "mark":row[0] + formatID(row[1]),
                                "WhichEye":row[2],
                            }
                    //fmt.Println(InfoMap)
                    chanInfoMap <- InfoMap              
                }
        }
        close(chanInfoMap)
    }()

    for i := 0; i < 20; i++ {
            waitGroup.Add(1)
            go func(){
                defer waitGroup.Done()
                for InfoMap := range chanInfoMap{
                    SaveResults(InfoMap)
                }
            }()
    }
    waitGroup.Wait()
    
}

2. CrackMaster_with_units.go

2.1 文件夹目录

  • CrackMaster_with_units.go
  • Master.xlsx 为输入,同上:

2.2 源码

package main
import (
    "fmt"
    "io/ioutil"
    "os"
    "strconv"
    "strings"
    "regexp"
    "sync"
    "github.com/360EntSecGroup-Skylar/excelize"
    "github.com/ledongthuc/pdf"

)

var (
  reAL =`(AL:)((-{3})|(\d{2}.\d{2}mm))`
  reACD = `(ACD:)((-{3})|(\d.\d{2}mm))`
  reLT = `(LT:)((-{3})|(\d.\d{2}mm))`
  reWTW =`(WTW:)((-{3})|(\d{2}.\dmm))`
  reAngle = `@{1,3}°`
  reK1 =`(K1:)((-{3})|(\d{2}.\d{2}D@\d{1,3}°))`  
  reΔK =`(ΔK:)((-{3})|(-\d{1,2}.\d{2}D@\d{1,3}°))` 
  reK2 =`(K2:)((-{3})|(\d{2}.\d{2}D@\d{1,3}°))` 
  reTSE = `(TSE:)((-{3})|(\d{2}.\d{2}D))`                                         
  reTK1 = `(TK1:)((-{3})|(\d{2}.\d{2}D@\d{1,3}°))`
  reΔTK = `(ΔTK:)((-{3})|(-\d{1,2}.\d{2}D@\d{1,3}°))`
  reTK2 =`(TK2:)((-{3})|(\d{2}.\d{2}D@\d{1,3}°))`

  chanInfoMap         chan map[string]string
  waitGroup           sync.WaitGroup
  lock                sync.Mutex
)

type Eye struct {
    //AL,ACD,LT,WTW,K1,K1_angle,ΔK,K2,K2_angle,TSE,TK1,TK1_angle,ΔTK,ΔTK_angle,TK2,TK2_angle string
    date, whichEye,AL,ACD,K1,K1_angle,K2,K2_angle,ΔK,ΔK_angle,WTW,LT,TSE,TK1,TK1_angle,ΔTK,ΔTK_angle,TK2,TK2_angle string
}


func PathExists(path string) (bool, error) {
    _, err := os.Stat(path)
    if err == nil {
        return true, nil
    }
    if os.IsNotExist(err) {
        return false, nil
    }
    return false, err
}

func ScanFiles(fileDir string) []string {
    exist, err := PathExists(fileDir)
    if err != nil {
        fmt.Printf("get dir error![%v]\n", err)
    }
    var fileNameList []string
    if exist{
        files, _ := ioutil.ReadDir(fileDir) //读取目录
        for _, onefile := range files {     //遍历目录下文件
            if !onefile.IsDir() { //是文件
                fileName := onefile.Name()
                fileNameList = append(fileNameList, fileDir + fileName)
            }
        }
    }
    return fileNameList
}

func FindIt(content string,re string) []string {
    _re := regexp.MustCompile(re)
    data := _re.FindAllString(content,-1)
    return data
}

func ParsePDF(path string) (Eye, Eye, error) {
    f, r, err := pdf.Open(path)
    defer func() {
        _ = f.Close()
    }()
    if err != nil {
        panic(err)
    }
    p := r.Page(3)
    rows, _ := p.GetTextByRow()
    var content string
    var OD, OS Eye
    OD.whichEye = "OD"
    OS.whichEye = "OS"
    for _, row := range rows {
        for _, word := range row.Content {
            //fmt.Println(word.S)
            content = content + word.S
        }
                //fmt.Println(word.S)
    }
    AL :=FindIt(content,reAL)
    if len(AL) !=0{
        OD.AL, OS.AL =AL[0][3:],AL[1][3:] 
    }

    ACD :=FindIt(content,reACD)
    if len(ACD)!=0{
        OD.ACD, OS.ACD = ACD[0][4:],ACD[1][4:]       
    }

    LT :=FindIt(content,reLT)
    if len(LT)!=0{
        OD.LT, OS.LT = LT[0][3:],LT[1][3:]
    }

    WTW :=FindIt(content,reWTW)
    if len(WTW)!=0{
        OD.WTW, OS.WTW= WTW[0][4:],WTW[1][4:]      
    }

    K1:=FindIt(content,reK1)
        if len(K1)!=0{
        if K1[0][3:]!="---"{
            OD.K1 = strings.Split(K1[0],"@")[0][3:]
            OD.K1_angle =strings.Split(K1[0],"@")[1]
        }else{
            OD.K1 = "---"
            OD.K1_angle ="---"
        }
        if K1[1][3:]!="---"{
            OS.K1 = strings.Split(K1[1],"@")[0][3:]
            OS.K1_angle =strings.Split(K1[1],"@")[1]
        }else{
            OS.K1 = "---"
            OS.K1_angle ="---"
        }     
    }

    ΔK :=FindIt(content,reΔK)
    if len(ΔK)!=0{
        if ΔK[0][4:]!="---"{
            OD.ΔK = strings.Replace(strings.Split(ΔK[0],"@")[0][4:],"-","",-1)
            //OD.ΔK_angle =strings.Split(ΔK[0],"@")[1]
        }else{
            OD.ΔK = "---"
            //OD.ΔK_angle ="---"
        }
        if ΔK[1][4:]!="---"{
            OS.ΔK = strings.Replace(strings.Split(ΔK[1],"@")[0][4:],"-","",-1)
            //OS.ΔK_angle =strings.Split(ΔK[0],"@")[1]
        }else{
            OS.ΔK = "---"
            //OS.ΔK_angle ="---"
        }
    }

    K2 := FindIt(content,reK2)
    if len(K2)!=0{
        if K2[0][3:]!="---"{
            OD.K2 = strings.Split(K2[0],"@")[0][3:]
            OD.K2_angle =strings.Split(K2[0],"@")[1]
        }else{
            OD.K2 = "---"
            OD.K2_angle ="---"
        }
        if K2[1][3:]!="---"{
            OS.K2 = strings.Split(K2[1],"@")[0][3:]
            OS.K2_angle =strings.Split(K2[1],"@")[1]
        }else{
            OS.K2 = "---"
            OS.K2_angle ="---"
        }  
    }

    TSE :=FindIt(content,reTSE)
    if len(TSE)!=0{
        OD.TSE, OS.TSE = TSE[0][4:],TSE[1][4:]
    }

    TK1:=FindIt(content,reTK1)
    if len(TK1)!=0{
        if TK1[0][4:]!="---"{
            OD.TK1 = strings.Split(TK1[0],"@")[0][4:]
            OD.TK1_angle =strings.Split(TK1[0],"@")[1]
        }else{
            OD.TK1 = "---"
            OD.TK1_angle ="---"
        }
        if TK1[1][4:]!="---"{
            OS.TK1 = strings.Split(TK1[1],"@")[0][4:]
            OS.TK1_angle =strings.Split(TK1[1],"@")[1]
        }else{
            OS.TK1 = "---"
            OS.TK1_angle ="---"
        }    
    }

    ΔTK := FindIt(content,reΔTK)
    if len(ΔTK)!=0{
        if ΔTK[0][5:]!="---"{
            OD.ΔTK = strings.Split(ΔTK[0],"@")[0][5:]
            OD.ΔTK_angle =strings.Split(ΔTK[0],"@")[1]
        }else{
            OD.ΔTK = "---"
            OD.ΔTK_angle ="---"
        }
        if ΔTK[1][5:]!="---"{
            OS.ΔTK = strings.Split(ΔTK[1],"@")[0][5:]
            OS.ΔTK_angle =strings.Split(ΔTK[1],"@")[1]
        }else{
            OS.ΔTK = "---"
            OS.ΔTK_angle ="---"
        }
    }

    TK2:=FindIt(content,reTK2)
    if len(TK2)!=0{
        if TK2[0][4:]!="---"{
            OD.TK2 = strings.Split(TK2[0],"@")[0][4:]
            OD.TK2_angle =strings.Split(TK2[0],"@")[1]
        }else{
            OD.TK2 = "---"
            OD.TK2_angle ="---"
        }
        if TK2[1][4:]!="---"{
            OS.TK2 = strings.Split(TK2[1],"@")[0][4:]
            OS.TK2_angle =strings.Split(TK2[1],"@")[1]
        }else{
            OS.TK2 = "---"
            OS.TK2_angle ="---"
        }
    }
    return OD, OS,nil
}

func NumCov(Num int)  string{
    var(
        Str string = ""
        k int
        temp []int
    )
    Slice := []string{"","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O",
        "P","Q","R","S","T","U","V","W","X","Y","Z"}
    Num = Num-64
    if Num >26 {  
        for {
            k = Num % 26  
            if k == 0 {
                temp = append(temp, 26)
                k = 26
            } else {
                temp = append(temp, k)
            }
            Num = (Num - k) / 26 
            if Num <= 26{   
                temp = append(temp, Num)
                break
            }
        }
    }else{
        return Slice[Num]
    }
    
    for _,value := range temp{
        Str = Slice[value] + Str //因为数据切分后存储顺序是反的,所以Str要放在后面
    }
    return Str
}
 

func WriteXlsx(f *excelize.File,num string,eye []string, OU []string, numFile int){
    for i:=0;i<11;i++{
        Col_ := 'D' + i + 11*numFile 
        Col := NumCov(int(Col_))
        //fmt.Printf("Writing Sheet Data_Wanted:%s\n",Col+num)
        f.SetCellValue("Data_Wanted", Col+num,eye[i])
        /*
        content,_:= f.GetCellValue("Data_Wanted", string(Col)+num)
        if content ==""{
            //fmt.Printf("Writing:%s",OU[i])
            f.SetCellValue("Data_Wanted", string(Col)+num,eye[i])
        }//else{fmt.Println(content)}
        */
    }
    for i:=0;i<37;i++{
        Col_ := 'C' + i+1 + 37*numFile 
        Col := NumCov(int(Col_))
        //fmt.Printf("Writing Sheet Data_All:%s\n",Col+num)
        f.SetCellValue("Data_All", Col+num,OU[i])
        /*
        content,_:= f.GetCellValue("Data_All", string(Col)+num)

        if content ==""{
            //fmt.Printf("Writing:%s",OU[i])
            f.SetCellValue("Data_All", string(Col)+num,OU[i])
        }//else{fmt.Println(content)}
        */
    }
    f.Save()
}

func SaveResult(num string, WichEye string, OD Eye, OS Eye,numFile int){
    f, err := excelize.OpenFile("Master.xlsx")
    if err != nil {
        fmt.Println(err)
        return
    }
    var eye Eye
    switch WichEye {
    case "OD","右":
        eye = OD
    case "OS","左":
        eye = OS
    default:
        return
    }
    eye_data := []string{eye.date,eye.AL,eye.ACD,eye.K1,eye.K1_angle,eye.K2,eye.K2_angle,eye.ΔK,eye.K2_angle,eye.WTW,eye.LT}
    OU_data := []string{OD.date,OD.whichEye,OD.AL,OD.ACD,OD.K1,OD.K1_angle,OD.K2,OD.K2_angle,OD.ΔK,OD.K2_angle,OD.WTW,OD.LT,OD.TSE,OD.TK1,OD.TK1_angle,OD.ΔTK,OD.ΔTK_angle,OD.TK2,OD.TK2_angle,
                                OS.whichEye,OS.AL,OS.ACD,OS.K1,OS.K1_angle,OS.K2,OS.K2_angle,OS.ΔK,OS.K2_angle,OS.WTW,OS.LT,OS.TSE,OS.TK1,OS.TK1_angle,OS.ΔTK,OS.ΔTK_angle,OS.TK2,OS.TK2_angle}
    WriteXlsx(f,num,eye_data,OU_data,numFile)
}

func SaveResults(InfoMap map[string]string) {
    Dir:= "./Exams/"+ InfoMap["mark"]+"/IOLMaster/"
    files:= ScanFiles(Dir)
    eye_title := []string{"检查日期","眼轴(IOL master)","ACD(IOLmaster)","K1(IOLmaster)","K1(IOLmaster)角度","K2(IOLmaster)", "K2(IOLmaster)角度","角膜散光(IOLmaster)", "角膜散光轴位", "WTW(IOLmaster)","LT(IOLmaster)","TSE","TK1","TK1_angle","ΔTK","ΔTK_angle","TK2","TK2_angle"}
    OU_title := []string{"检查日期","眼别","眼轴(IOL master)","ACD(IOLmaster)","K1(IOLmaster)","K1(IOLmaster)角度","K2(IOLmaster)", "K2(IOLmaster)角度","角膜散光(IOLmaster)", "角膜散光轴位", "WTW(IOLmaster)","LT(IOLmaster)","TSE","TK1","TK1_angle","ΔTK","ΔTK_angle","TK2","TK2_angle",
    
                                  "眼别","眼轴(IOL master)","ACD(IOLmaster)","K1(IOLmaster)","K1(IOLmaster)角度","K2(IOLmaster)", "K2(IOLmaster)角度","角膜散光(IOLmaster)", "角膜散光轴位", "WTW(IOLmaster)","LT(IOLmaster)","TSE","TK1","TK1_angle","ΔTK","ΔTK_angle","TK2","TK2_angle"}
    //If you want to arrange data of exams in time order, use code as follows:
    /* 
    if l:=len(files); l !=0{
        for i:=0;i<l;i++{
            OD, OS, _ := ParsePDF(files[i])
            _date := strings.Split(files[i],"/")
            date:= _date[len(_date)-1][:8]
            OD.date, OS.date  = date,date
            lock.Lock()
            fmt.Println(InfoMap["mark"])
            fmt.Println(OD, OS)
            f, _ := excelize.OpenFile("Master.xlsx")
            WriteXlsx(f,"1",eye_title,OU_title,i)
            SaveResult(InfoMap["num"],InfoMap["WhichEye"],OD, OS,i)
            lock.Unlock()      
        } 
    }
    */
    //Data arranged in reversed time order
    if l:=len(files); l !=0{
        for i:=l-1;i>=0;i--{
            OD, OS, _ := ParsePDF(files[i])
            _date := strings.Split(files[i],"/")
            date:= _date[len(_date)-1][:8]
            OD.date, OS.date  = date,date
            lock.Lock()
            fmt.Println(InfoMap["mark"])
            fmt.Println(OD, OS)
            f, _ := excelize.OpenFile("Master.xlsx")
            WriteXlsx(f,"1",eye_title,OU_title,l-1-i)
            SaveResult(InfoMap["num"],InfoMap["WhichEye"],OD, OS,l-1-i)
            lock.Unlock()      
        } 
    }
    //If you want to get only data of the latest exam, use code as follows: 
    /*
    if l:=len(files); l !=0{
        OD, OS, _ := ParsePDF(files[l-1])
        _date := strings.Split(files[l-1],"/")
        date:= _date[len(_date)-1][:8]
        OD.date, OS.date  = date,date
        lock.Lock()
        fmt.Println(InfoMap["mark"])
        fmt.Println(OD, OS)
        f, _ := excelize.OpenFile("Master.xlsx")
        WriteXlsx(f,"1",eye_title,OU_title,0)
        SaveResult(InfoMap["num"],InfoMap["WhichEye"],OD, OS,0)
        lock.Unlock()      
    }
    */
    

}


func formatID(ID string) (string){
        number0 := 12 - len(ID)
        for i := 0; i < number0; i++{
            ID = "0"+ID
        }
        return ID;
}

func main(){
    chanInfoMap =  make(chan map[string]string, 200)
    /*
    index := f.NewSheet("Data_All")
    err = f.CopySheet(1, index)
    if err != nil {
        fmt.Println(err)
        return
    }
    */
    f, err := excelize.OpenFile("Master.xlsx")
    if err != nil {
        fmt.Println(err)
        return
    }
    Sheetname := f.GetSheetName(0)
    if Sheetname != "Data_Wanted"{
        f.SetSheetName(Sheetname,"Data_Wanted")
    }
    index := f.NewSheet("Data_All")
    /*
    index_ :=f.GetActiveSheetIndex()
    fmt.Println(index_,index,name)
    */
    f.CopySheet(0, index)
    f.Save()
    waitGroup.Add(1)
    go func(){
        defer waitGroup.Done()
        rows, _:= f.GetRows("Data_Wanted")
        //fmt.Println(rows)
        for i, row := range rows {
                if i == 0 {
                    continue
                }else if len(row)!=0{
                    InfoMap := map[string]string{
                                "num":strconv.Itoa(i+1),
                                "mark":row[0] + formatID(row[1]),
                                "WhichEye":row[2],
                            }
                    //fmt.Println(InfoMap)
                    chanInfoMap <- InfoMap              
                }
        }
        close(chanInfoMap)
    }()

    for i := 0; i < 20; i++ {
            waitGroup.Add(1)
            go func(){
                defer waitGroup.Done()
                for InfoMap := range chanInfoMap{
                    SaveResults(InfoMap)
                }
            }()
    }
    waitGroup.Wait()
    
}

3. CrackHumphrey.go

3.1 文件夹目录

NameID
xxxxxxx

3.2 源码

package main

import (
    "io/ioutil"
    "os/exec"
    "context"
    "fmt"
    "sync"
    "strconv"
    "github.com/google/go-tika/tika"
    "github.com/PuerkitoBio/goquery"
    "github.com/360EntSecGroup-Skylar/excelize"
    "strings"
    "regexp"
    "log"
    "os"
    "time"
)



var (
    reg0 = regexp.MustCompile(`GHT:.+`)
    reg1 = regexp.MustCompile(`VFI:.+`)
    reg2 = regexp.MustCompile(`MD30-2:.+`)
    reg3 = regexp.MustCompile(`PSD30-2:.+`)
    chanInfoMap         chan map[string]string
    waitGroup           sync.WaitGroup
    lock                sync.Mutex
)


type Eye struct {
    date, whichEye,result string
}



func PathExists(path string) (bool, error) {
    _, err := os.Stat(path)
    if err == nil {
        return true, nil
    }
    if os.IsNotExist(err) {
        return false, nil
    }
    return false, err
}

func ScanFiles(fileDir string) []string {
    exist, err := PathExists(fileDir)
    if err != nil {
        fmt.Printf("get dir error![%v]\n", err)
    }
    var fileNameList []string
    if exist{
        files, _ := ioutil.ReadDir(fileDir) //读取目录
        for _, onefile := range files {     //遍历目录下文件
            if !onefile.IsDir() { //是文件
                fileName := onefile.Name()
                fileNameList = append(fileNameList, fileDir + fileName)
            }
        }
    }
    return fileNameList
}

func NumCov(Num int)  string{
    var(
        Str string = ""
        k int
        temp []int
    )
    Slice := []string{"","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O",
        "P","Q","R","S","T","U","V","W","X","Y","Z"}
    Num = Num-64
    if Num >26 {  
        for {
            k = Num % 26  
            if k == 0 {
                temp = append(temp, 26)
                k = 26
            } else {
                temp = append(temp, k)
            }
            Num = (Num - k) / 26 
            if Num <= 26{   
                temp = append(temp, Num)
                break
            }
        }
    }else{
        return Slice[Num]
    }
    
    for _,value := range temp{
        Str = Slice[value] + Str //因为数据切分后存储顺序是反的,所以Str要放在后面
    }
    return Str
}
 

func WriteXlsx(f *excelize.File,num string, OU []string, numFile int){
    for i:=0;i<5;i++{
        Col_ := 'C' + i + 5*numFile 
        Col := NumCov(int(Col_))
        f.SetCellValue("Data_All", string(Col)+num,OU[i])
        /*
        content,_:= f.GetCellValue("Data_All", string(Col)+num)
        if content ==""{
            //fmt.Printf("Writing:%s",OU[i])
            f.SetCellValue("Data_All", string(Col)+num,OU[i])
        }//else{fmt.Println(content)}
        */
    }
    f.Save()
}
func SaveResult(num string, OD Eye, OS Eye, numFile int){
    f, err := excelize.OpenFile("Humphrey.xlsx")
    if err != nil {
        fmt.Println(err)
        return
    }
    OU_data := []string{OD.date,OD.whichEye,OD.result,OS.whichEye,OS.result}
    WriteXlsx(f,num,OU_data,numFile)
}

func SaveResults(InfoMap map[string]string) {
    Dir:= "./Exams/"+ InfoMap["mark"]+"/Humphrey/"
    eye_title := []string{"检查日期","眼别","Result","眼别","Result"}
    
    files:= ScanFiles(Dir)
    //If you want to arrange data of exams in time order, use code as follows:
    /*
    if l:=len(files); l !=0{
        for i, _ := range files{
            OD, OS:= ParsePDF(files[i])
            _date := strings.Split(files[i],"/")
            date:= _date[len(_date)-1][:8]
            OD.date, OS.date  = date,date
            lock.Lock()
            fmt.Println(InfoMap["mark"])
            fmt.Println(OD, OS)
            f, _ := excelize.OpenFile("Humphrey.xlsx")
            WriteXlsx(f,"1",eye_title,i)
            SaveResult(InfoMap["num"],OD, OS,i)
            lock.Unlock()
        }      
    }
    */
        //Data arranged in reversed time order
    if l:=len(files); l !=0{
        for i:=l-1;i>=0;i--{
            OD, OS:= ParsePDF(files[i])
            _date := strings.Split(files[i],"/")
            date:= _date[len(_date)-1][:8]
            OD.date, OS.date  = date,date
            lock.Lock()
            fmt.Println(InfoMap["mark"])
            fmt.Println(OD, OS)
            f, _ := excelize.OpenFile("Humphrey.xlsx")
            WriteXlsx(f,"1",eye_title,l-1-i)
            SaveResult(InfoMap["num"],OD, OS,l-1-i)
            lock.Unlock()      
        } 
    }
    //If you want to get only data of the latest exam, use code as follows: 
    /*
    if l:=len(files); l !=0{
        OD, OS:= ParsePDF(files[l-1])
        _date := strings.Split(files[l-1],"/")
        date:= _date[len(_date)-1][:8]
        OD.date, OS.date  = date,date
        lock.Lock()
        fmt.Println(InfoMap["mark"])
        fmt.Println(OD, OS)
        f, _ := excelize.OpenFile("Humphrey.xlsx")
        WriteXlsx(f,"1",eye_title,0)
        SaveResult(InfoMap["num"],OD, OS,0)
        lock.Unlock()
    }
    */
    
}

func ParsePDF(path string) (OD Eye, OS Eye){
    f, err := os.Open(path)
    if err != nil {
        log.Fatal(err)
    }
    defer f.Close()

    //fmt.Println(f.Name())
    client := tika.NewClient(nil, "http://localhost:9998")
    body, err := client.Parse(context.Background(), f)
    if err != nil {
        fmt.Println(err)
    }
    //fmt.Println(body)
    result := strings.Split(HtmlParser(body),"\n")
    OD.whichEye,OS.whichEye = "OD","OS"
    OD.result,OS.result = result[1],result[0]
    //fmt.Println(result)
    return OD, OS
}

func HtmlParser(html string)(result string){
      doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
      //fmt.Println(doc)
      if err != nil {
        log.Fatal(err)
      }
      // Find the review items
      doc.Find("div").Each(func(i int, selection *goquery.Selection) {
        text := selection.Find("p").Text()
        result =result + reg0.FindString(text) + " " + reg1.FindString(text) +  " " + reg2.FindString(text) +  " " + reg3.FindString(text) +"\n"
      })
      //fmt.Printf("%s", result)
      return result
}

func startServer() {
    // Print Go Version
    cmdOutput, err := exec.Command("cmd", "/c","java -jar tika-server-standard-2.3.0.jar").Output()
    if err != nil {
        log.Fatal(err)
    }
    fmt.Printf("%s", cmdOutput)
}

func formatID(ID string) (string){
        number0 := 12 - len(ID)
        for i := 0; i < number0; i++{
            ID = "0"+ID
        }
        return ID;
}

func main(){
    chanInfoMap =  make(chan map[string]string, 200)
    f, err := excelize.OpenFile("Humphrey.xlsx")
    if err != nil {
        fmt.Println(err)
        return
    }
    Sheetname := f.GetSheetName(0)
    if Sheetname != "Data_All"{
        f.SetSheetName(Sheetname,"Data_All")
    }
    f.Save()
    go startServer()
    time.Sleep(2* time.Second)
    waitGroup.Add(1)
    go func(){
        defer waitGroup.Done()
        rows, _:= f.GetRows("Data_All")
        //fmt.Println(rows)
        for i, row := range rows {
                if i == 0 {
                    continue
                }else if len(row)!=0{
                    InfoMap := map[string]string{
                                "num":strconv.Itoa(i+1),
                                "mark":row[0] + formatID(row[1]),
                            }
                    //fmt.Println(InfoMap)
                    chanInfoMap <- InfoMap           
                }
        }
        close(chanInfoMap)
    }()
    for i := 0; i < 20; i++ {
            waitGroup.Add(1)
            go func(){
                defer waitGroup.Done()
                for InfoMap := range chanInfoMap{
                    SaveResults(InfoMap)
                }
            }()
    }
    waitGroup.Wait()
}

4. CrackOctopus.go

4.1 文件夹目录

  • tika-server-standard-2.3.0.jar
  • CrackOctopus.go
  • Octopus.xlsx 为输入,同Humphrey.xlsx:

4.2 源码

package main

import (
    "io/ioutil"
    "os/exec"
    "context"
    "fmt"
    "sync"
    "strconv"
    "github.com/google/go-tika/tika"
    "github.com/PuerkitoBio/goquery"
    "github.com/360EntSecGroup-Skylar/excelize"
    "strings"
    "regexp"
    "log"
    "os"
    "time"
)


var (
    reg0 = regexp.MustCompile(`MS.+`)
    reg1 = regexp.MustCompile(`MD.+`)
    reg2 = regexp.MustCompile(`sLV.+`)
    chanInfoMap         chan map[string]string
    waitGroup           sync.WaitGroup
    lock                sync.Mutex
)


type Eye struct {
    date, whichEye,result string
}



func PathExists(path string) (bool, error) {
    _, err := os.Stat(path)
    if err == nil {
        return true, nil
    }
    if os.IsNotExist(err) {
        return false, nil
    }
    return false, err
}

func ScanFiles(fileDir string) []string {
    exist, err := PathExists(fileDir)
    if err != nil {
        fmt.Printf("get dir error![%v]\n", err)
    }
    var fileNameList []string
    if exist{
        files, _ := ioutil.ReadDir(fileDir) //读取目录
        for _, onefile := range files {     //遍历目录下文件
            if !onefile.IsDir() { //是文件
                fileName := onefile.Name()
                fileNameList = append(fileNameList, fileDir + fileName)
            }
        }
    }
    return fileNameList
}

func NumCov(Num int)  string{
    var(
        Str string = ""
        k int
        temp []int
    )
    Slice := []string{"","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O",
        "P","Q","R","S","T","U","V","W","X","Y","Z"}
    Num = Num-64
    if Num >26 {  
        for {
            k = Num % 26  
            if k == 0 {
                temp = append(temp, 26)
                k = 26
            } else {
                temp = append(temp, k)
            }
            Num = (Num - k) / 26 
            if Num <= 26{   
                temp = append(temp, Num)
                break
            }
        }
    }else{
        return Slice[Num]
    }
    
    for _,value := range temp{
        Str = Slice[value] + Str //因为数据切分后存储顺序是反的,所以Str要放在后面
    }
    return Str
}



func WriteXlsx(f *excelize.File,num string, OU []string, numFile int){
    for i:=0;i<5;i++{
        Col_ := 'C' + i + 5*numFile 
        Col := NumCov(int(Col_))
        f.SetCellValue("Data_All", string(Col)+num,OU[i])
        /*
        content,_:= f.GetCellValue("Data_All", string(Col)+num)
        if content ==""{
            //fmt.Printf("Writing:%s",OU[i])
            f.SetCellValue("Data_All", string(Col)+num,OU[i])
        }//else{fmt.Println(content)}
        */
    }
    f.Save()
}

func SaveResult(num string, OD Eye, OS Eye, numFile int){
    f, err := excelize.OpenFile("Octopus.xlsx")
    if err != nil {
        fmt.Println(err)
        return
    }
    OU_data := []string{OD.date,OD.whichEye,OD.result,OS.whichEye,OS.result}
    WriteXlsx(f,num,OU_data,numFile)
}

func SaveResults(InfoMap map[string]string) {
    Dir:= "./Exams/"+ InfoMap["mark"]+"/Octopus/"
    eye_title := []string{"检查日期","眼别","Result","眼别","Result"}
    
    files:= ScanFiles(Dir)
    //If you want to arrange data of exams in time order, use code as follows:
    /*
    if l:=len(files); l !=0{
        for i, _ := range files{
            OD, OS:= ParsePDF(files[i])
            _date := strings.Split(files[i],"/")
            date:= _date[len(_date)-1][:8]
            OD.date, OS.date  = date,date
            lock.Lock()
            fmt.Println(InfoMap["mark"])
            fmt.Println(OD, OS)
            f, _ := excelize.OpenFile("Octopus.xlsx")
            WriteXlsx(f,"1",eye_title,i)
            SaveResult(InfoMap["num"],OD, OS,i)
            lock.Unlock()
        }      
    }
    */
        //Data arranged in reversed time order
    if l:=len(files); l !=0{
        for i:=l-1;i>=0;i--{
            OD, OS:= ParsePDF(files[i])
            _date := strings.Split(files[i],"/")
            date:= _date[len(_date)-1][:8]
            OD.date, OS.date  = date,date
            lock.Lock()
            fmt.Println(InfoMap["mark"])
            fmt.Println(OD, OS)
            f, _ := excelize.OpenFile("Octopus.xlsx")
            WriteXlsx(f,"1",eye_title,l-1-i)
            SaveResult(InfoMap["num"],OD, OS,l-1-i)
            lock.Unlock()      
        } 
    }
    //If you want to get only data of the latest exam, use code as follows: 
    /*
    if l:=len(files); l !=0{
        OD, OS:= ParsePDF(files[l-1])
        _date := strings.Split(files[l-1],"/")
        date:= _date[len(_date)-1][:8]
        OD.date, OS.date  = date,date
        lock.Lock()
        fmt.Println(InfoMap["mark"])
        fmt.Println(OD, OS)
        f, _ := excelize.OpenFile("Octopus.xlsx")
        WriteXlsx(f,"1",eye_title,0)
        SaveResult(InfoMap["num"],OD, OS,0)
        lock.Unlock()
    }
    */
    
}

func ParsePDF(path string) (OD Eye, OS Eye){
    /*
    cmd := exec.Command(`java -jar tika-server-standard-2.3.0.jar`)
    if runtime.GOOS == "windows" {
         cmd.SysProcAttr = &syscall.SysProcAttr{HideWindow: true}
    }
    err := cmd.Run()
    if err != nil {
        fmt.Println(err)
        return
    }
    */
    // Optionally pass a port as the second argument.
    f, err := os.Open(path)
    if err != nil {
        log.Fatal(err)
    }
    defer f.Close()

    //fmt.Println(f.Name())
    client := tika.NewClient(nil, "http://localhost:9998")
    body, err := client.Parse(context.Background(), f)
    if err != nil {
        fmt.Println(err)
    }
    //fmt.Println(body)
    result := strings.Split(HtmlParser(body),"\n")
    OD.whichEye,OS.whichEye = "OD","OS"
    OD.result,OS.result = result[0],result[1]
    //fmt.Println(result)
    return OD, OS
    /*
    file, err := os.Create("body.html")
    if err != nil {
        fmt.Println(err)
        return
    }
    defer file.Close()
    file.WriteString(body)
    */

}

func HtmlParser(html string)(result string){
      doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
      //fmt.Println(doc)
      if err != nil {
        log.Fatal(err)
      }
      // Find the review items
      doc.Find("div").Each(func(i int, selection *goquery.Selection) {
        text := selection.Find("p").Text()
        result =result + reg0.FindString(text) + " " + reg1.FindString(text) +  " " + reg2.FindString(text) +"\n"
      })
      //fmt.Printf("%s", result)
      return result
}

func startServer() {
    // Print Go Version
    cmdOutput, err := exec.Command("cmd", "/c","java -jar tika-server-standard-2.3.0.jar").Output()
    if err != nil {
        log.Fatal(err)
    }
    fmt.Printf("%s", cmdOutput)
}

func formatID(ID string) (string){
        number0 := 12 - len(ID)
        for i := 0; i < number0; i++{
            ID = "0"+ID
        }
        return ID;
}

func main(){
    chanInfoMap =  make(chan map[string]string, 200)
    f, err := excelize.OpenFile("Octopus.xlsx")
    if err != nil {
        fmt.Println(err)
        return
    }
    Sheetname := f.GetSheetName(0)
    if Sheetname != "Data_All"{
        f.SetSheetName(Sheetname,"Data_All")
    }
    f.Save()
    go startServer()
    time.Sleep(2* time.Second)
    waitGroup.Add(1)
    go func(){
        defer waitGroup.Done()
        rows, _:= f.GetRows("Data_All")
        //fmt.Println(rows)
        for i, row := range rows {
                if i == 0 {
                    continue
                }else if len(row)!=0{
                    InfoMap := map[string]string{
                                "num":strconv.Itoa(i+1),
                                "mark":row[0] + formatID(row[1]),
                            }
                    //fmt.Println(InfoMap)
                    chanInfoMap <- InfoMap           
                }
        }
        close(chanInfoMap)
    }()
    for i := 0; i < 20; i++ {
            waitGroup.Add(1)
            go func(){
                defer waitGroup.Done()
                for InfoMap := range chanInfoMap{
                    SaveResults(InfoMap)
                }
            }()
    }
    waitGroup.Wait()
}

  1. 需要自己配置java运行环境 ↩︎

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值