最近做了个在线预览的需求
步骤 office->pdf->jpg
工具 : office 转 pdf https://github.com/gotenberg/gotenberg
pdf 转 jpg https://imagemagick.org/
office转pdf直接启动一个docker容器去转换,通过channel控制并发数量,并发数量太多会导致内存不够,转换失败
docker run --rm -p 3000:3000 gotenberg/gotenberg:7
pdf转jpg直接 通过go执行cmd方式转换
convert -density 100 -quality 100 -background white -alpha remove "./test.pdf" ./test/test.jpg
density 图片密度(数值越大,需要内存越高,图片效果越好) quality(0-100)图片质量
为了提升图片转换效率,先获取pdf总页数,然后通过go channel 控制并发数量,一页一页转换
func getPdfPages(filePath string) (int, error) { if out, err := exec.Command("identify", filePath).Output(); err != nil { return 0, err } else { slice := strings.Split(strings.TrimSpace(string(out)), "\n") return len(slice), nil } }
convertPageLimitChan := make(chan struct{}, 15) defer close(convertPageLimitChan) var eg errgroup.Group for i := 0; i < pages; i++ { convertPageLimitChan <- struct{}{} page := i eg.Go(func() error { defer func() { <-convertPageLimitChan }() cmd := exec.Command("convert", "-density", "100", "-quality", "100", "-background", "white", "-alpha", "remove", fmt.Sprintf("%s[%d]", filePath, page), fmt.Sprintf("/tmp/%s/%s-%d.jpg", fileName, fileName, page)) logger.Info(cmd.Args) cr := cmd.Run() return cr }) } if er := eg.Wait(); er != nil { logger.Error("convert err: ", er) return }