简单的图片验证

在编写爬虫爬去一个航空公司官网的时候,发现航班信息都是图片,比如航班号,舱位信息,价格,时间等。相对而言,图片相对比较简单,没有干扰线条,文字也是端端正正的。所以,可以处理.

package com.weixuan;

import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.*;

/**
 * Create by fengtang
 * 2015/8/25 0025
 * ImageToString
 */
public class ImageToString {

    /**
     * 配置文件,图片识别特征库
     */
    public static Properties prop = new Properties();
    private static final int GRAYCVALUE = 128;

    static {
        try {
            String fileName = "E:\\IDEA\\ImageToString\\src\\main\\resource\\config\\FeatureLibrary.properties";
            File myFile = new File(fileName);
            InputStreamReader isr;
            isr = new InputStreamReader(new FileInputStream(myFile), "UTF-8");
            prop.load(isr);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * @param imag 原始图片文件
     * @return 图片对应的字符串
     * @throws Exception
     * @brief 获取图片对应的字符串
     */
    public static String getString(BufferedImage imag) throws Exception {
        /**
         *  1.获取有效图形
         */
        imag = getPicValidByValue(imag, GRAYCVALUE);

        /**
         *  2.切割图片
         */
        BufferedImage[] imagList = getSplitPics(imag);
        String[] code = new String[imagList.length];
        String[] picCode = new String[imagList.length];
        String validateCode = "";
        for (int i = 0; i < imagList.length; i++) {
            /**
             * 3.获取切割后的有效图形
             */
            imagList[i] = getPicValidByValue(imagList[i], GRAYCVALUE);
            /**
             * 4.获取图片的点阵描述字符串
             */
            picCode[i] = getSingleBmpCode(imagList[i], GRAYCVALUE);
            /**
             * 5.匹配结果
             */
            code[i] = match(picCode[i]);
            if (code[i] == null) {
                throw new RuntimeException("匹配出错");
            }
            /**
             * 6.拼装最后结果
             */
            validateCode = validateCode + code[i];
        }
        validateCode = validateCode.replace("semicolon", ":");
        return validateCode;
    }

    /**
     * @param singlepic 灰度图
     * @param grayValue 背前景灰色界限
     * @return 灰度图片的点阵描述字串(1表示灰点,0表示背景)
     * @brief 返回灰度图片的点阵描述字串.
     */
    private static String getSingleBmpCode(BufferedImage singlepic, int grayValue) {
        if (singlepic == null) {
            return null;
        }
        int[] pixel;
        StringBuilder code = new StringBuilder();
        for (int y = 0; y < singlepic.getHeight(); y++) {
            for (int x = 0; x < singlepic.getWidth(); x++) {
                pixel = getPixel(singlepic, x, y);
                if (pixel[0] < grayValue) {
                    code.append("1");
                } else {
                    code.append("0");
                }
            }
        }
        return code.toString();
    }

    /**
     * @param imag     图片文件
     * @param grayVale 灰度背景分界值
     * @return 有效图片文件
     * @brief 获取有效图形
     */
    private static BufferedImage getPicValidByValue(BufferedImage imag, int grayVale) {
        int posX1 = imag.getWidth();
        int posY1 = imag.getHeight();
        int posX2 = 0;
        int posY2 = 0;
        /**
         * 找有效区
         */
        for (int i = 0; i < imag.getHeight(); i++) {
            for (int j = 0; j < imag.getWidth(); j++) {
                int pixelValue = getPixel(imag, j, i)[0];
                /**
                 *     根据灰度值
                 */
                if (pixelValue < GRAYCVALUE) {
                    if (posX1 > j) {
                        posX1 = j;
                    }
                    if (posY1 > i) {
                        posY1 = i;
                    }
                    if (posX2 < j) {
                        posX2 = j;
                    }
                    if (posY2 < i) {
                        posY2 = i;
                    }
                }
            }
        }
        return imag.getSubimage(posX1, posY1, posX2 - posX1 + 1, posY2 - posY1 + 1);
    }

    /**
     * @param imag 源图像。
     * @param j    图像上指定像素位置的 x 坐标。
     * @param i    图像上指定像素位置的 y 坐标。
     * @return 返回包含 rgb 颜色分量值的数组。元素 index 由小到大分别对应 r,g,b。
     * @brief 取得图像上指定位置像素的 rgb 颜色分量.
     */
    private static int[] getPixel(BufferedImage imag, int j, int i) {
        int[] rgb = new int[3];
        int pixel = imag.getRGB(j, i);
        rgb[0] = (pixel & 0xff0000) >> 16;
        rgb[1] = (pixel & 0xff00) >> 8;
        rgb[2] = pixel & 0xff;
        return rgb;
    }

    /**
     * @param picCode piCode
     * @return String
     * @brief 匹配.
     */
    private static String match(String picCode) {
        String c = null;
        Iterator it = prop.entrySet().iterator();
        while (it.hasNext()) {
            Map.Entry entry = (Map.Entry) it.next();
            char[] validateC = entry.getValue().toString().toCharArray();
            char[] picCodeC = picCode.toCharArray();
            boolean rightFlag = true;
            if (validateC.length == picCodeC.length) {
                for (int i = 0; i < picCodeC.length; i++) {
                    if (picCodeC[i] != validateC[i]) {
                        rightFlag = false;
                        break;
                    }
                }
            } else {
                rightFlag = false;
            }
            if (rightFlag) {
                c = entry.getKey().toString();
                break;
            }
        }
        return c;
    }

    /**
     * @param img img
     * @return BufferedImage
     * @brief 图片切割.
     */
    private static BufferedImage[] getSplitPics(BufferedImage img) {
        int posx1 = img.getWidth();
        int posy1 = img.getHeight();
        List<Integer> point = new ArrayList<Integer>();
        for (int i = 0; i < img.getWidth(); i++) {
            boolean flag = true;
            for (int j = 0; j < img.getHeight(); j++) {
                if (getPixel(img, i, j)[0] < GRAYCVALUE) {
                    flag = false;
                    break;
                }
            }
            if (flag) {
                if (point.size() != 0) {
                    int temp1 = point.get(point.size() - 1);
                    if (i == temp1) {
                        point.set(point.size() - 1, i + 1);
                    } else {
                        point.add(i + 1);
                    }
                } else {
                    point.add(i + 1);
                }

            }
        }
        BufferedImage[] listImage = new BufferedImage[point.size() + 1];
        for (int i = 0; i < point.size() + 1; i++) {
            if (i == 0) {
                listImage[i] = img.getSubimage(0, 0, point.get(i), posy1);
            } else if (i < point.size()) {
                listImage[i] = img.getSubimage(point.get(i - 1), 0, point.get(i) - point.get(i - 1), posy1);
            } else {
                listImage[i] = img.getSubimage(point.get(i - 1), 0, posx1 - point.get(i - 1), posy1);
            }

        }
        return listImage;
    }
}

FeatureLibrary.properties

#################图片识别特征库###################
semicolon=111100001111
A=000100000010000010100001010000101000011100010001001000101110111
B=111110010001010001011110010011010001010001010001111110
C=001111010001100000100000100000100000100000010001001110
D=111100010010010001010001010001010001010001010010111100
E=111111010001010000010010011110010010010000010001111111
F=111111010001010000010010011110010010010000010000111000
G=001110010010100000100000100000100111100010010010001100
H=111011101000100100010010001001111100100010010001001000101110111
I=111110010000100001000010000100001000010011111
J=011111000100000100000100000100000100000100000100100100111000
K=111011010010010100011000011100010100010010010010111011
L=111000010000010000010000010000010000010000010001111111
M=111011101101100110110011011001010100101010010101001010101101011
N=111011101100100110010010101001010100101010010011001001101110010
O=001100010010100001100001100001100001100001010010001100
P=111110010001010001010001011110010000010000010000111000
Q=001100010010100001100001100001100001101101010010001110000011
R=111110001000100100010011110001010000100100010010001000101110011
S=011111000110000010000011000001000011000111110
T=111111010100100001000010000100001000010001110
U=111011101000100100010010001001000100100010010001001000100011100
V=111011101000100100010001010000101000010100001010000010000001000
W=110101101010100101010010101001010100110110001010000101000010100
X=110111010010010100001100001000001100010010010010111011
Y=111011101000100010100001010000010000001000000100000010000011100
Z=111111001000010001000010000100010000100111111
0=011101000110001100011000110001100011000101110
1=010110010010010010010010111
2=011101000110001000010001000100010001000011111
3=011101000100001001100000100001000011001101110
4=000010000110001010010010010010100010011110000010000111
5=111111000010000111101000100001100011000101110
6=001110100110000101101100110001100011000101110
7=111111001000010000100010000100001000010000100
8=011101000110001010100111010001100011000101110
9=011101000110001100011001101101000011001011100
舱=001000010000011110011000010010100100011011000010010010000001111110111100010010100100011010100100010110101100010010100010100010100010100110011110
¥=111000111010000010001000100000101000000010000011111110000010000000010000000010000000111000

测试

package com.weixuan;

import org.junit.Test;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;

/**
 * Create by fengtang
 * 2015/8/25 0025
 * ImageToString
 */
public class TestProcessImage {

    public static final String fileName = "C:\\Users\\fengtang\\Desktop\\img\\flightNo.png";
    @Test
    public void processImageTest() {
        try {
            BufferedImage imag = ImageIO.read(new File(fileName));;
            System.out.println(ImageToString.getString(imag));
        } catch (IOException e) {
            e.printStackTrace();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

测试的航班号和价格

航班号

价格

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值