ABBYY OCR客开demo

1.开发在java中的集成需要java开发环境,并且引入jar包(厂商提供的jar包):com.abbyy.FREngine.jar

2.java集成的代码展示(并且实现了生成pdf每页大小一致功能,对汉字的支持,生成pdf为双层pdf可以进行复制):

package com.iboyaa.ocr;

import com.abbyy.FREngine.*;
import com.iboyaa.model.Material;
import com.iboyaa.ocr.BatchProcessing.ImageSourceImpl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.text.SimpleDateFormat;
import java.util.Date;

public class OcrUtil {
    private final static Logger logger = LoggerFactory.getLogger(OcrUtil.class);

    private IEngine engine = null;
    public static void main( String[] args ) {
        long start = System.currentTimeMillis();
        try {
            Material material =new Material();
            material.setFolderOnly("1535702091255");
            //String folderdOly ="1535532134900";
            OcrUtil ocrUtil = new OcrUtil();
            ocrUtil.Run(material);
        } catch( Exception ex ) {
            displayMessage( ex.getMessage() );
        }
        System.out.println(System.currentTimeMillis()-start);
    }
    private static void displayMessage( String message ) {
        System.out.println( message );
        logger.info(message);
    }
    public void Run(String folderdOly) throws Exception {
        // Load ABBYY FineReader Engine
        loadEngine();
        try{
            // Process with ABBYY FineReader Engine
            processWithEngine(folderdOly);
        } finally {
            // Unload ABBYY FineReader Engine
            unloadEngine();
        }
    }
    public  void Run(Material material) throws Exception {
        // Load ABBYY FineReader Engine
        loadEngine();
        try{
            // Process with ABBYY FineReader Engine
            processWithEngine(material);
        } finally {
            // Unload ABBYY FineReader Engine
            unloadEngine();
        }
    }
    private void processWithEngine(Material material) {
        try {
            // Setup FREngine
            setupFREngine();
            // Batch processing
            batchProcessing(material);
        } catch( Exception ex ) {
            displayMessage( ex.getMessage() );
        }
    }

    private void processWithEngine(String folderdOly) {
        try {
            // Setup FREngine
            setupFREngine();
            // Batch processing
            batchProcessing(folderdOly);
        } catch( Exception ex ) {
            displayMessage( ex.getMessage() );
        }
    }
    //保证生成的pdf中内容大小一致
    private void batchProcessing(Material material) throws Exception {
        System.out.println("-------------");
        String folderdOly =material.getFolderOnly();
        SimpleDateFormat df = new SimpleDateFormat("yyyyMMdd");//设置日期格式
        String dateName ="";
        if (folderdOly!=null&&!"".equals(folderdOly)){
            Date date =new Date(Long.parseLong(folderdOly));
            dateName = df.format(date);// new Date()为获取当前系统时间,也可使用当前时间戳
        }else{
            Date date =new Date(System.currentTimeMillis());
            dateName = df.format(date);// new Date()为获取当前系统时间,也可使用当前时间戳
        }
        //图片路径文件夹
        String sourceFolder = CombinePaths( SamplesConfig.GetSamplesFolder(), "saomiaoImg/"+dateName+"/"+folderdOly+"/img");
        //pdf生成路径文件夹
        String resultFolder = CombinePaths( SamplesConfig.GetSamplesFolder(), "saomiaoImg/"+dateName+"/"+folderdOly );

        // Check source folder existence
        if( !isDirectoryExist( sourceFolder ) ) {
            throw new Exception( "Cannot find " + sourceFolder );
        }
        // Create result folder if it doesn't exist
        createDirectory( resultFolder );

        // Create ImageSourceImpl for accessing to images files in source folder
        ImageSourceImpl imageSource = new ImageSourceImpl( sourceFolder );
        if( imageSource.IsEmpty() ) {
            throw new Exception( "No images in specified folder." );
        }
        IBatchProcessor batchProcessor = engine.CreateBatchProcessor();


        // Obtain recognized pages and export them to RTF format
        // Create document
        IFRDocument document = engine.CreateFRDocument();
        //对图片设置大小一样
        IPrepareImageMode pim = engine.CreatePrepareImageMode();
        pim.setCorrectSkew(false);
       // pim.setCorrectSkewMode(CorrectSkewModeEnum.CSM_CorrectSkewByHorizontalLines.getValue() + CorrectSkewModeEnum.CSM_CorrectSkewByVerticalText.getValue());
        pim.setAutoOverwriteResolution(false);
        pim.setOverwriteResolution(true);

        // Start batch processor for specified image source
        batchProcessor.Start( imageSource, null, pim, null, null );
        IFRPage page = batchProcessor.GetNextProcessedPage();

        try {
            int num = 0;
            while( page != null ) {
                num++;
                // Synthesize page before export
                page.Synthesize(null);
                // Export page to file with the same name and pdf extension
                String resultFilePath = page.getSourceImagePath();
                displayMessage( "Process..." +resultFilePath+"-----");
                document.AddImageFile(resultFilePath, pim, null);
                page = batchProcessor.GetNextProcessedPage();
            }
            //编码,汉字
            IRecognizerParams  rp = engine.CreateRecognizerParams();
            rp.setLowResolutionMode(true);
            rp.SetPredefinedTextLanguage("English, ChinesePRC");

            IObjectsExtractionParams oep = engine.CreateObjectsExtractionParams();
            oep.setDetectTextOnPictures(true);
            oep.setEnableAggressiveTextExtraction(true);


            IPageAnalysisParams pap = engine.CreatePageAnalysisParams();
            pap.setEnableTextExtractionMode(true);

            IPagePreprocessingParams ppp = engine.CreatePagePreprocessingParams();
            ppp.setCorrectOrientation(true);


            IDocumentProcessingParams dpp = engine.CreateDocumentProcessingParams();
            dpp.getPageProcessingParams().setRecognizerParams(rp);
            dpp.getPageProcessingParams().setObjectsExtractionParams(oep);
            dpp.getPageProcessingParams().setPageAnalysisParams(pap);
            dpp.getPageProcessingParams().setPagePreprocessingParams(ppp);

            // Process document
            displayMessage( "Process..." );
            document.Process( dpp );

            IPDFExportParams pdfParams = engine.CreatePDFExportParams();
            //快速
            pdfParams.setScenario(PDFExportScenarioEnum.PES_MaxSpeed);
            //均衡
            //pdfParams.setScenario( PDFExportScenarioEnum.PES_Balanced );
            pdfParams.getPDFFeatures().getPaperSizeParams().setPaperSizeMode(PaperSizeModeEnum.PSM_ImageSize);
//            pdfParams.getPDFFeatures().getPaperSizeParams().setPaperSizeMode(PaperSizeModeEnum.PSM_UserDefined);
//            pdfParams.getPDFFeatures().getPaperSizeParams().setPaperWidth(11909);
//            pdfParams.getPDFFeatures().getPaperSizeParams().setPaperHeight(16834);

            String pdfExportPath =resultFolder + "/"+folderdOly+".pdf";
            document.Export( pdfExportPath, FileExportFormatEnum.FEF_PDF, pdfParams );
            material.setPage(num);
            material.setAddress("/saomiaoImg/"+dateName+"/"+folderdOly+ "/"+folderdOly+".pdf");


        }catch( Exception ex ) {
            displayMessage( ex.getMessage() );
        } finally {
            // Close document
            document.Close();
        }

    }

    private void setupFREngine() {
        displayMessage( "Loading predefined profile..." );
        engine.LoadPredefinedProfile( "DocumentConversion_Accuracy" );
        // Possible profile names are:
        //   "DocumentConversion_Accuracy", "DocumentConversion_Speed",
        //   "DocumentArchiving_Accuracy", "DocumentArchiving_Speed",
        //   "BookArchiving_Accuracy", "BookArchiving_Speed",
        //   "TextExtraction_Accuracy", "TextExtraction_Speed",
        //   "FieldLevelRecognition",
        //   "BarcodeRecognition_Accuracy", "BarcodeRecognition_Speed",
        //   "HighCompressedImageOnlyPdf",
        //   "BusinessCardsProcessing",
        //   "EngineeringDrawingsProcessing",
        //   "Version9Compatibility",
        //   "Default"
    }
    public static String CombinePaths( String path1, String path2 ) {
        File file1 = new File( path1 );
        File file2 = new File( file1, path2 );
        return file2.getPath();
    }
    private static boolean isDirectoryExist( String path ) {
        File file = new File( path );
        return file.exists();
    }

    private static void createDirectory( String path ) {
        File file = new File( path );
        if( !file.exists() ) {
            file.mkdir();
        }
    }
    //无法保证生成的pdf中内容大小一致
    private void batchProcessing(String folderdOly) throws Exception {
        SimpleDateFormat df = new SimpleDateFormat("yyyyMMdd");//设置日期格式
        String dateName ="";
        if (folderdOly!=null&&!"".equals(folderdOly)){
            Date date =new Date(Long.parseLong(folderdOly));
            dateName = df.format(date);// new Date()为获取当前系统时间,也可使用当前时间戳
        }else{
            Date date =new Date(System.currentTimeMillis());
            dateName = df.format(date);// new Date()为获取当前系统时间,也可使用当前时间戳
        }
        //图片路径文件夹
        String sourceFolder = CombinePaths( SamplesConfig.GetSamplesFolder(), "saomiaoImg/"+dateName+"/"+folderdOly+"/img");
        //pdf生成路径文件夹
        String resultFolder = CombinePaths( SamplesConfig.GetSamplesFolder(), "saomiaoImg/"+dateName+"/"+folderdOly );


        // Check source folder existence
        if( !isDirectoryExist( sourceFolder ) ) {
            throw new Exception( "Cannot find " + sourceFolder );
        }
        createDirectory( resultFolder );

        // Create ImageSourceImpl for accessing to images files in source folder
        ImageSourceImpl imageSource = new ImageSourceImpl( sourceFolder );
        if( imageSource.IsEmpty() ) {
            throw new Exception( "No images in specified folder." );
        }
        IBatchProcessor batchProcessor = engine.CreateBatchProcessor();


        // Obtain recognized pages and export them to RTF format
        // Create document
        IFRDocument document = engine.CreateFRDocument();

        IPrepareImageMode pim = engine.CreatePrepareImageMode();
        pim.setCorrectSkew(true);
        pim.setCorrectSkewMode(CorrectSkewModeEnum.CSM_CorrectSkewByHorizontalLines.getValue() + CorrectSkewModeEnum.CSM_CorrectSkewByVerticalText.getValue());
        pim.setAutoOverwriteResolution(true);

        // Start batch processor for specified image source
        batchProcessor.Start( imageSource, null, pim, null, null );
        IFRPage page = batchProcessor.GetNextProcessedPage();
        try {
            while( page != null ) {
                // Synthesize page before export
                page.Synthesize(null);
                // Export page to file with the same name and pdf extension
                String resultFilePath = page.getSourceImagePath();
                displayMessage( "Process..." +resultFilePath+"-----");
                document.AddImageFile(resultFilePath, null, null);
                page = batchProcessor.GetNextProcessedPage();
            }
            IRecognizerParams  rp = engine.CreateRecognizerParams();
            rp.setLowResolutionMode(true);
            rp.SetPredefinedTextLanguage("English, ChinesePRC");

            IObjectsExtractionParams oep = engine.CreateObjectsExtractionParams();
            oep.setDetectTextOnPictures(true);
            oep.setEnableAggressiveTextExtraction(true);


            IPageAnalysisParams pap = engine.CreatePageAnalysisParams();
            pap.setEnableTextExtractionMode(true);

            IPagePreprocessingParams ppp = engine.CreatePagePreprocessingParams();
            ppp.setCorrectOrientation(true);

            IDocumentProcessingParams dpp = engine.CreateDocumentProcessingParams();
            dpp.getPageProcessingParams().setRecognizerParams(rp);
            dpp.getPageProcessingParams().setObjectsExtractionParams(oep);
            dpp.getPageProcessingParams().setPageAnalysisParams(pap);
            dpp.getPageProcessingParams().setPagePreprocessingParams(ppp);
            // Process document
            displayMessage( "Process..." );
            document.Process( dpp );


            // Save results
            displayMessage( "Saving results..." );

//           Save results to rtf with default parameters
//          String rtfExportPath = resultFolder+ "\\Demo.rtf";
//          document.Export( rtfExportPath, FileExportFormatEnum.FEF_RTF, null );
//
//          // Save results to pdf using 'balanced' scenario
            IPDFExportParams pdfParams = engine.CreatePDFExportParams();
            pdfParams.setScenario( PDFExportScenarioEnum.PES_Balanced );

            String pdfExportPath =resultFolder + "\\"+folderdOly+".pdf";
            document.Export( pdfExportPath, FileExportFormatEnum.FEF_PDF, pdfParams );


        }catch( Exception ex ) {
            displayMessage( ex.getMessage() );
        } finally {
            // Close document
            document.Close();
        }

    }
    private void loadEngine() throws Exception {
        displayMessage( "Initializing Engine..." );
        engine = Engine.GetEngineObject( SamplesConfig.GetDllFolder(), SamplesConfig.GetDeveloperSN() );
//        IMultiProcessingParams multiProcessingParams=engine.getMultiProcessingParams();
//        multiProcessingParams.set


    }
    private void unloadEngine() throws Exception {
        displayMessage( "Deinitializing Engine..." );
        engine = null;
        Engine.DeinitializeEngine();
    }
}
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 4
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值