1.引用包,文字库
<dependency>
<groupId>net.sourceforge.tess4j</groupId>
<artifactId>tess4j</artifactId>
<version>4.6.0</version>
</dependency>
文字库 链接:https://pan.baidu.com/s/11u3j20V0nU4mGRy60_sTWw
提取码:estx
1.获取屏幕指定区域
public static BufferedImage getScreen(){
try {
Robot robot = new Robot();
Toolkit toolkit = Toolkit.getDefaultToolkit();
Dimension screenSize = toolkit.getScreenSize();
int width = screenSize.width;
int height = screenSize.height;
Rectangle screenRect = new Rectangle(width, height);
BufferedImage capture = robot.createScreenCapture(screenRect);
//指定区域
BufferedImage subimage = capture.getSubimage(100, 100, 500, 500);
return subimage;
} catch (AWTException e) {
e.printStackTrace();
}
return null;
}
2. 为了提高图片文字识别率,图片灰度化
/**
* 图片根据通道转成黑白(灰度化)
* @param channel 根据哪个通道转黑白 1:Red 2:Green 3:Blue
*/
public static void changeAllARGB(BufferedImage image, int channel){
int width = image.getWidth();
int height = image.getHeight();//height
for(int i = 0; i < height; i++){
ArrayList<int[]> list = new ArrayList();//用list记录每一行的像素
for(int j = 0; j < width; j++){
int[] transARGB = transARGB(image.getRGB(j, i),channel);//获取只显示指定通道图层
//System.out.println("正在处理的像素点坐标 x:" + j + " y:" + i);
image.setRGB(j,i,array2Integer(transARGB));
}
}
}
/**
* int型rgb转换成数组格式 并保留指定的通道
* @param rgb int型的rgb值 通过BufferedImage的getRGB方法获取的
* @param channel 通道标识 1:Red 2:Green 3:Blue other:保留所有通道
* @return 返回的是int型数组,ARGB四个通道的值 [Alpha,Red,Green,Blue]
*/
public static int[] transARGB(int rgb, int channel){
String argb = Integer.toBinaryString(rgb);
//分割及二进制转换
Integer alpha = Integer.parseInt(argb.substring(0,8),2);
Integer red = Integer.parseInt(argb.substring(8,16),2);
Integer green = Integer.parseInt(argb.substring(16,24),2);
Integer blue = Integer.parseInt(argb.substring(24,32),2);
//定义数组
int[] argbArr = {alpha,red,green,blue};
//只保留某一个通道的信息 即把除了Alpha通道外的全部改成所选通道的值
if (channel == 1){
argbArr = new int[]{argbArr[0],argbArr[1],argbArr[1],argbArr[1]};
}
if (channel == 2){
argbArr = new int[]{argbArr[0],argbArr[2],argbArr[2],argbArr[2]};
}
if (channel == 3){
argbArr = new int[]{argbArr[0],argbArr[3],argbArr[3],argbArr[3]};
}
return argbArr;
}
/**
* 只用于此class的方法转换 把记录ARGB的数组转成int信息
* @param arr
* @return
*/
public static int array2Integer(int[] arr){
//System.out.println(Arrays.toString(arr));
//利用Integer.toBinaryString转码时发现一个问题,只会从第一个1开始记录,例如0011100会被记为11100,而我需要的是8位代码
String s1 = get8BitBinaryString(Integer.toBinaryString(arr[0]));
String s2 = get8BitBinaryString(Integer.toBinaryString(arr[1]));
String s3 = get8BitBinaryString(Integer.toBinaryString(arr[2]));
String s4 = get8BitBinaryString(Integer.toBinaryString(arr[3]));
String origin = s1 + s2 + s3 + s4;
if(origin.startsWith("1")){
//判断首位是否是1,如果是1,则按照补码计算
String[] split = origin.split("");
StringBuilder originCode = new StringBuilder("");
for(int i = 0; i < split.length; i++){
if(split[i].equals("1")){
originCode.append("0");
}
if(split[i].equals("0")){
originCode.append("1");
}
}
//System.out.println(originCode);
//循环结果 +1 即为原码
int i = Integer.parseInt(originCode.toString(),2) + 1;
return -i;
}
System.out.println(origin);
return Integer.parseInt(origin, 2);
}
/**
* 把参数转换为8位二进制代码
* @param s
* @return
*/
public static String get8BitBinaryString (String s){
if(s.length() == 8){
return s;
}else{
int length = s.length();
String zero = "";
for (int i = 0; i < 8 - length; i++){/*通过循环判断前面要加几个零*/
zero += "0";
}
return zero + s;
}
}
3. 同样是为了提高图片文字识别率,二值化
/**
* 二值化
* @param image
* @return
*/
public static BufferedImage binaryization(BufferedImage image){
int width=image.getWidth();
int height=image.getHeight();
int r=0;
BufferedImage bufferedImageT= new BufferedImage(width,height, BufferedImage.TYPE_3BYTE_BGR );
for(int y=0;y<height;y++){
for(int x=0;x<width;x++){
Color color = new Color(image.getRGB(x,y));
int gray = (color.getRed() + color.getGreen() + color.getBlue())/3;
//设置阈值
if (gray <100){
r = 255;
}else{r=0;}
Color color_end = new Color(r,r,r);
bufferedImageT.setRGB(x,y,color_end.getRGB());
}
}
return bufferedImageT;
}
4. 识别文字
public static void getScreenCard(BufferedImage bi){
Tesseract tesseract =new Tesseract();
//设置语言库位置(根据自己位置设置)
tesseract.setDatapath("E:\\Java\\untitled\\src\\main\\resources\\static\\tessdata");
//设置语言类型
tesseract.setLanguage("chi_sim");
try {
File file = new File("00.png");
bi=ImageIO.read(file);
//1、图片灰度处理
changeAllARGB(bi,1);
//二值化处理
bi=binaryization(bi);
// 图片放大10倍,增强识别率
bi = ImageHelper.getScaledInstance(bi, bi.getWidth() * 3, bi.getHeight() * 3);
// BufferedImage image = ImageIO.read();
String result = tesseract.doOCR(bi);
System.out.println(result);
} catch (Exception e) {
e.printStackTrace();
}
}
5.测试类:
public static void main(String[] args) throws IOException {
ScreenCapture.getScreenCard(ScreenCapture.getScreen());
}