TESS4J

包含头文件:
import java.awt.Color;
import java.awt.image.BufferedImage;
import javax.imageio.ImageIO;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import java.io.IOException;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileDescriptor;
import java.io.PrintStream;
import java.io.FileNotFoundException;
import java.awt.Color;
import java.awt.image.BufferedImage;
import java.io.BufferedOutputStream;
import java.lang.RuntimeException;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.util.CellRangeAddress;

程序主体代码:
public class aa
{
	public static void main(String[] args) throws IOException
	{
		File root1 = new File("D:\\new\\tmall2");
File[] files1 = root1.listFiles();
        int count1=0;
		for(File f : files1)//去水印
		{
			count1++;
		BufferedImage img = ImageIO.read(f);
		for(int x=0;x<img.getWidth();x++)
		{
			for(int y=0;y<img.getHeight();y++)
			{
				int rgb = img.getRGB(x, y);
				Color color = new Color(rgb);
				int r=color.getRed();
				int g=color.getGreen();
				int b=color.getBlue();
				int c=255;
				if(r==0&&g==0&&b==0)
				{
					Color newColor = new Color(c,c,c);
					img.setRGB(x,y,newColor.getRGB());
				}
			}
		}
		int newwidth=(img.getWidth())/2;
		int newheight=(img.getHeight())/4;
		BufferedImage newimg=img.getSubimage(0, 0, newwidth, newheight);
		float resizeTimes=2.0f;
		int width2=(int)(newwidth*resizeTimes);
		int height2=(int)(newheight*resizeTimes);
		BufferedImage result=new BufferedImage(width2,height2,BufferedImage.TYPE_INT_RGB);
		result.getGraphics().drawImage(newimg.getScaledInstance(width2, height2, java.awt.Image.SCALE_SMOOTH), 0, 0, null);
		String path="D:\\new\\tmall\\4.png";
		StringBuilder strb=new StringBuilder(path);
		strb.replace(13, 14, Integer.toString(count1));
		path=strb.toString();
		ImageIO.write(result, "png", new File(path));
		}
		//构建excel表
		HSSFWorkbook wkb=new HSSFWorkbook();
		HSSFSheet sheet = wkb.createSheet("企业表");
		HSSFRow row1=sheet.createRow(0); 
		row1.createCell(0).setCellValue("企业名称");
	    row1.createCell(3).setCellValue("企业注册号");   
	sheet.addMergedRegion(new CellRangeAddress(0,0,0,2));
	sheet.addMergedRegion(new CellRangeAddress(0,0,3,5));
		try {

            ITesseract instance = new Tesseract();
instance.setDatapath("F:\\MyDownloads\\Tess4J");
            File root = new File("D:\\new\\tmall");
File[] files = root.listFiles();
            int count=0;
for(File file : files)
            {
		count++;
		HSSFRow newrow=sheet.createRow(count);
		newrow.createCell(0);
	newrow.createCell(3);
	sheet.addMergedRegion(new CellRangeAddress(count,count,0,2));
				sheet.addMergedRegion(new CellRangeAddress(count,count,3,5));
		instance.setLanguage("eng");//英文识别注册号
		String numresult=instance.doOCR(file);
		
		instance.setLanguage("chi_sim");
	String result = instance.doOCR(file);//中文识别名称
	char[] cnum=numresult.toCharArray();
	int n=0;
	int m=0;
	String num=new String();
	for(int j=0;j<cnum.length;j++)//获取注册号循环
	{
		if(cnum[j]==':')
			m++;
		else if(m==1)
		{
			if((cnum[j]<='9'&&cnum[j]>='0')||(cnum[j]>='A'&&cnum[j]<='Z'))
			{
				num+=cnum[j];
				n++;
			}
			if(n==18)
			{
				newrow.getCell(3).setCellValue(num); 
				break;
			}
		}
		else if(m>1) break;
	}
	char[] c=result.toCharArray();
	int a=0;
	String chi=new String();
	for(int i=0;i< c.length;++i)//获取名称循环
	{
		if(c[i]==':')
			++a;
		else
		{
				if(a==2)
				{
					if(c[i] == '司')
					{
						chi += '司';
						newrow.getCell(0).setCellValue(chi);
						break;
					}
					else chi += c[i];
				}
				else if(a>=2) break;
		}
	}
                FileOutputStream output=new FileOutputStream("D:\\new\\result.xls");
                try {
wkb.write(output);
output.flush();
wkb.close();
                } catch(IOException e)
                {}
                result=null;
                num=null;
                chi=null;
		}
        } catch (TesseractException e) {
e.printStackTrace();
        }
	}
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值