Java实现从正方教务系统抓取数据(二)--验证码识别

声明:此验证码识别仅供学习使用,不得用于其他用途。转载请注明出处。

参考资料:http://blog.csdn.net/problc/article/details/5794460
http://www.cnblogs.com/nayitian/p/3282862.html
http://www.geekso.com/Valite2/

原理:
多下载些验证码样本,因为教务网验证码不是特别复杂,字体颜色是纯蓝色,所以就去除背景干扰点(我试了试去不去噪点效果差不多),二值化得到黑白图,然后分割(虽然有少量粘连,但图方便直接平分成四份)取出0-9a-z的单个字模。破解时将验证码分割二值化与二值化的字模对比,达到某个阀值认为匹配上了。
二值化后我设字体为黑色,对应像素位置为1,背景为白色对应像素位置为0,用一个二维数组表示。这里有兴趣的可以用三元组矩阵存储优化,再次就不再多说。总之参照上面的的参考资料,理解起来很容易。下面就直接贴代码了。

public class ImagePreProcess {

    private static Map<BufferedImage, String> trainMap = null;
    private static int index = 0;
    //验证码地址
    public static String getImageUrl = "http://jw1.hustwenhua.net/(miprmm3loyyf2kbrpifo4run)/CheckCode.aspx";
    public static String srcPath = "img\\";
    public static String trainPath = "train\\";
    public static String tempPath = "temp\\";

    public static int isBlue(int colorInt) {  
        Color color = new Color(colorInt);  
        int rgb = color.getRed() + color.getGreen() + color.getBlue();
        if (rgb == 153) {  
            return 1;  
        }  
        return 0;  
    }  

    public static int isBlack(int colorInt) {
        Color color = new Color(colorInt);
        if (color.getRed() + color.getGreen() + color.getBlue() <= 100) {
            return 1;
        }
        return 0;
    }

    public static int isWhite(int colorInt) {
        Color color = new Color(colorInt);
        if (color.getRed() + color.getGreen() + color.getBlue() > 600) {
            return 1;
        }
        return 0;
    }
    /**
     * 去除背景,二值化
     * @param picFile
     * @return
     * @throws Exception
     */
    public static BufferedImage removeBackgroud(String picFile)
            throws Exception {
        BufferedImage img = ImageIO.read(new File(picFile));  
        img = img.getSubimage(5, 1, img.getWidth()-5, img.getHeight()-2); 
        img = img.getSubimage(0, 0, 50, img.getHeight());
        int width = img.getWidth();  
        int height = img.getHeight();  
        for(int x=0; x<width; x++){
            for(int y=0; y<height; y++){
                if(isBlue(img.getRGB(x, y)) == 1){
                    img.setRGB(x, y, Color.BLACK.getRGB());
                }else{
                    img.setRGB(x, y, Color.WHITE.getRGB());
                }
            }
        }
        return img;  
    }
    /**
     * 按自己的规则分割验证码
     * @param img
     * @return
     * @throws Exception
     */
    public static List<BufferedImage> splitImage(BufferedImage img)
            throws Exception {
        List<BufferedImage> subImgs = new ArrayList<BufferedImage>();
        int width = img.getWidth()/4;
        int height = img.getHeight();
        subImgs.add(img.getSubimage(0, 0, width, height));
        subImgs.add(img.getSubimage(width, 0, width, height));
        subImgs.add(img.getSubimage(width*2, 0, width, height));
        subImgs.add(img.getSubimage(width*3, 0, width, height));
        return subImgs;
    }
    /**
     * 载入训练好的字摸
     * @return
     * @throws Exception
     */
    public static Map<BufferedImage, String> loadTrainData() throws Exception {
        if (trainMap == null) {
            Map<BufferedImage, String> map = new HashMap<BufferedImage, String>();
            File dir = new File("train");
            File[] files = dir.listFiles();
            for (File file : files) {
                map.put(ImageIO.read(file), file.getName().charAt(0) + "");
            }
            trainMap = map;
        }
        return trainMap;
    }
    /**
     * 识别分割的单个字符
     * @param img
     * @param map
     * @return
     */
    public static String getSingleCharOcr(BufferedImage img,
            Map<BufferedImage, String> map) {
        String result = "#";
        int width = img.getWidth();
        int height = img.getHeight();
        int min = width * height;
        for (BufferedImage bi : map.keySet()) {
            int count = 0;
            if (Math.abs(bi.getWidth()-width) > 2)
                continue;
            int widthmin = width < bi.getWidth() ? width : bi.getWidth();
            int heightmin = height < bi.getHeight() ? height : bi.getHeight();
            Label1: for (int x = 0; x < widthmin; ++x) {
                for (int y = 0; y < heightmin; ++y) {
                    if (isBlack(img.getRGB(x, y)) != isBlack(bi.getRGB(x, y))) {
                        count++;
                        if (count >= min)
                            break Label1;
                    }
                }
            }
            if (count < min) {
                min = count;
                result = map.get(bi);
            }
        }
        return result;
    }
    /**
     * 验证码识别
     * @param file  要验证的验证码本地路径
     * @return
     * @throws Exception
     */
    public static String getAllOcr(String file) throws Exception {
        BufferedImage img = removeBackgroud(file);
        List<BufferedImage> listImg = splitImage(img);
        Map<BufferedImage, String> map = loadTrainData();
        String result = "";
        for (BufferedImage bi : listImg) {
            result += getSingleCharOcr(bi, map);
        }
        ImageIO.write(img, "PNG", new File("result\\" + result + ".png"));
        return result;
    }

    /**
     * 下载验证码
     * @param url
     */
    public static void downloadImage(String url) {
        CloseableHttpClient httpClient = JwUtils.getHttpClient();
        for(int i=0; i<10; i++){
            HttpGet getMethod = new HttpGet(url);
            HttpResponse response = null;
            try {
                response = httpClient.execute(getMethod);
                if("HTTP/1.1 200 OK".equals(response.getStatusLine().toString())){
                    HttpEntity entity = response.getEntity();

                    InputStream is = entity.getContent();
                    OutputStream os = new FileOutputStream(new File(srcPath+i+".png"));
                    int length = -1;
                    byte[] bytes = new byte[1024];
                    while((length = is.read(bytes)) != -1){
                        os.write(bytes, 0, length);
                    }
                    os.close();
                }
            } catch (ClientProtocolException e) {
                e.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * 机器训练
     * @throws Exception
     */
    public static void trainData() throws Exception {
        File dir = new File("temp\\");
        File[] files = dir.listFiles();
        for (File file : files) {
            BufferedImage img = removeBackgroud("temp\\" + file.getName());
            List<BufferedImage> listImg = splitImage(img);
            if (listImg.size() == 4) {
                for (int j = 0; j < listImg.size(); ++j) {
                    ImageIO.write(listImg.get(j), "PNG", new File("train\\"
                            + file.getName().charAt(j) + "-" + (index++)
                            + ".png"));
                }
            }
        }
    }

    /**
     * @param args
     * @throws Exception
     */
    public static void main(String[] args) throws Exception {
        downloadImage(getImageUrl);
        //trainData();
        for (int i = 0; i < 10; ++i) {
            String text = getAllOcr("img\\" + i + ".png");
            System.out.println(i + ".png = " + text);
        }
    }
}

这是识别效果图:
这里写图片描述

这里写图片描述

验证码识别的源码:http://download.csdn.net/detail/sinat_18127633/9210335

  • 2
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值