pdf转图片
package main
//sudo apt install libmagic-dev libmagickwand-dev
import (
"strconv"
"gopkg.in/gographics/imagick.v2/imagick"
)
func ConvertPdfToJpg(pdfName string, imageName string, pdfSize int) error {
imagick.Initialize()
defer imagick.Terminate()
mw := imagick.NewMagickWand()
defer mw.Destroy()
if err := mw.SetResolution(200, 200); err != nil {
return err
}
if err := mw.ReadImage(pdfName); err != nil {
return err
}
if err := mw.SetImageAlphaChannel(imagick.ALPHA_CHANNEL_FLATTEN); err != nil {
return err
}
if err := mw.SetCompressionQuality(90); err != nil {
return err
}
if err := mw.SetFormat("jpg"); err != nil {
return err
}
for i := 0; i < pdfSize; i++ {
mw.SetIteratorIndex(i)
if err := mw.WriteImage(imageName + "_" + strconv.Itoa(i) + ".jpg"); err != nil {
return err
}
}
return nil
}
图片识别
原理可参考:图像识别
tesseract-ocr
tesseract test_0.jpg output --oem 1 -l chi_sim+eng
you can use command, develop api,also you can try tranning
百度云OCR
个人有需求:偶尔需要识别pdf图片中的文字形成文本文档,为方便使用最好能生成可执行文件点击就可用,尽量减少依赖,故这里使用golang调用。
百度AI平台QuickStart文档配套DEMO程序的python实现
阿里,腾讯等云平台也有相关服务,替换下面代码中的请求api即可。
- 登录云平台创建授权应用(一般是oauth2机制);
- 打开对应平台的OCR接口说明,百度通用文字识别(标准版)
- 实现代码如下:
package main
import (
"bufio"
"encoding/base64"
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"net/url"
"os"
"strings"
"time"
)
var maxLen int = 5 * 1024 * 1024
var API_KEY string = ""
var SECRET_KEY string = ""
var OCR_URL string = "https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic?access_token="
var TOKEN_URL string = "https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials"
// 通用文字识别(标准版) https://ai.baidu.com/ai-doc/OCR/zk3h7xz52
func GeneralBasic(name string, access_token string) {
if access_token == "" {
log.Fatal("access_token is empty")
}
fmt.Println("OCR File[" + name + "]")
file, _ := os.Open(name)
defer file.Close()
time.Sleep(2 * time.Second) //qps limit 1
OCR_URL += access_token
res, err := http.Post(OCR_URL, "application/x-www-form-urlencoded", strings.NewReader("image="+url.QueryEscape(base64e(file))))
if err != nil {
log.Fatal(err)
}
defer res.Body.Close()
mapdata := make(map[string]interface{})
json.NewDecoder(res.Body).Decode(&mapdata)
if mapdata["words_result_num"] == nil {
data, _ := ioutil.ReadAll(res.Body)
fmt.Printf("%s identification failed:%+v\n", file.Name(), data)
return
}
if int(mapdata["words_result_num"].(float64)) == 0 {
fmt.Println(file.Name() + " identification content is empty...")
return
}
f, err := os.OpenFile(file.Name()+".txt", os.O_RDWR|os.O_CREATE, os.ModePerm)
if err != nil {
log.Fatal(err)
}
defer f.Close()
w := bufio.NewWriter(f)
words_result := mapdata["words_result"].([]interface{})
for _, obj := range words_result {
m := obj.(map[string]interface{})
fmt.Fprintln(w, m["words"])
}
w.Flush()
}
// 鉴权认证机制 http://ai.baidu.com/ai-doc/REFERENCE/Ck3dwjhhu
// post方式报错:unsupported_grant_type
func Fetch_token() string {
res, err := http.Get(TOKEN_URL + "&client_id=" + API_KEY + "&client_secret=" + SECRET_KEY)
if err != nil {
log.Fatal(err)
}
defer res.Body.Close()
// data, err := ioutil.ReadAll(res.Body)
if err != nil {
log.Fatal(err)
}
mapdata := make(map[string]interface{})
json.NewDecoder(res.Body).Decode(&mapdata)
// json.Unmarshal(data, &mapdata)
for key, value := range mapdata {
// fmt.Println("key:", key, " => value :", value)
if key == "access_token" {
return value.(string)
}
if key == "error_description" {
log.Fatal(value)
}
}
return ""
}
// base64 encode
func base64e(file *os.File) string {
sourcebuffer := make([]byte, maxLen)
n, _ := file.Read(sourcebuffer)
return base64.StdEncoding.EncodeToString(sourcebuffer[:n])
}
// base64 decode
func base64d(str string) ([]byte, error) {
return base64.StdEncoding.DecodeString(str)
}
github上有测试样例,替换地址及添加自己的api_key和secret_key即可