(java)
http://www.bubuko.com/infodetail-1007293.html
id="cproIframe_u2385951_1" width="680" height="250" src="http://pos.baidu.com/acom?adn=0&adp=1&at=0&aurl=&c01=1&cad=1&ccd=24&cec=UTF-8&cfv=0&ch=0&col=zh-CN&conBW=1&conOP=1&cpa=1&cpro_lu=1%2C%23dfe4f9%2C%23000000%2C%E5%AE%8B%E4%BD%93&dai=1&dis=0<r=https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DGwyMa3rNDW5ZzeoO1fOeBxTBGL0lHzGevZEspmulpZfetnKRvhocuLqlDNxPyeN0UNRcd_cDvQgtUT4PQlTuoK%26wd%3D%26eqid%3Ddfdcafe100011a2600000005563bac6a<u=http%3A%2F%2Fwww.bubuko.com%2Finfodetail-1007293.html&lu_161=0&lunum=6&n=65035100_cpr&pat=6&pcs=1280x675&pih=0&pis=10000x10000&piw=0&ps=152x140&psr=1280x800&pss=1280x675&ptbg=90&ptp=0&ptt=0&qn=c5a2d2ba6879747c&rad=&rsi0=680&rsi1=250&rsi5=4&rss0=%23FFFFFF&rss1=%23FFFFFF&rss2=%23000000&rss3=&rss4=&rss5=&rss6=%23e10900&rss7=&scale=&skin=tabcloud_skin_5&stid=5&td_id=2385951&titFF=%25E5%25BE%25AE%25E8%25BD%25AF%25E9%259B%2585%25E9%25BB%2591&titFS=14&titSU=0&titTA=left&tn=baiduCustNativeAD&tpr=1446750794660&ts=1&version=2.0&xuanting=0&dtm=BAIDU_DUP2_SETJSONADSLOT&dc=2&di=u2385951&ti=Webdriver%E9%85%8D%E5%90%88Tesseract-OCR%20%E8%87%AA%E5%8A%A8%E8%AF%86%E5%88%AB%E7%AE%80%E5%8D%95%E7%9A%84%E9%AA%8C%E8%AF%81%E7%A0%81&tt=1446750794620.42.233.238" align="center,center" marginwidth="0" marginheight="0" scrolling="no" frameborder="0" allowtransparency="true" style="padding: 0px;">
Webdriver配合Tesseract-OCR 自动识别简单的验证码
标签:des class style log com http 使用 代码 src
验证码: 如下,在进行自动化测试,遇到验证码的问题,一般有两种方式
1.找开发去掉验证码或者使用万能验证码
2.使用OCR自动识别
使用OCR自动化识别,一般识别率不是太高,处理一般简单验证码还是没问题
这里使用的是Tesseract-OCR,下载地址:https://github.com/A9T9/Free-Ocr-Windows-Desktop/releases
怎么使用呢?
进入安装后的目录:
tesseract.exe test.png test -1
准备一份网页,上面使用该验证码
<html> <head> <title>Table test by Young</title> </head> <body> </br> <h1> Test </h1> <img src="http://csujwc.its.csu.edu.cn/sys/ValidateCode.aspx?t=1"> </br> </body> </html>
要识别验证码,首先得取得验证码,这两款采取对 页面元素部分截图的方式,首先获取整个页面的截图
然后找到页面元素坐标进行截取
/** * This method for screen shot element * * @param driver * @param element * @param path * @throws InterruptedException */ public static void screenShotForElement(WebDriver driver, WebElement element, String path) throws InterruptedException { File scrFile = ((TakesScreenshot) driver) .getScreenshotAs(OutputType.FILE); try { Point p = element.getLocation(); int width = element.getSize().getWidth(); int height = element.getSize().getHeight(); Rectangle rect = new Rectangle(width, height); BufferedImage img = ImageIO.read(scrFile); BufferedImage dest = img.getSubimage(p.getX(), p.getY(), rect.width, rect.height); ImageIO.write(dest, "png", scrFile); Thread.sleep(1000); FileUtils.copyFile(scrFile, new File(path)); } catch (IOException e) { e.printStackTrace(); } }
截取完元素,就可以调用Tesseract-OCR生成text
// use Tesseract to get strings Runtime rt = Runtime.getRuntime(); rt.exec("cmd.exe /C tesseract.exe D:\\Tesseract-OCR\\test.png D:\\Tesseract-OCR\\test -1 ");
接下来通过java读取txt
/** * This method for read TXT file * * @param filePath */ public static void readTextFile(String filePath) { try { String encoding = "GBK"; File file = new File(filePath); if (file.isFile() && file.exists()) { // 判断文件是否存在 InputStreamReader read = new InputStreamReader( new FileInputStream(file), encoding);// 考虑到编码格式 BufferedReader bufferedReader = new BufferedReader(read); String lineTxt = null; while ((lineTxt = bufferedReader.readLine()) != null) { System.out.println(lineTxt); } read.close(); } else { System.out.println("找不到指定的文件"); } } catch (Exception e) { System.out.println("读取文件内容出错"); e.printStackTrace(); } }
整体代码如下:
1 package com.dbyl.tests; 2 3 import java.awt.Rectangle; 4 import java.awt.image.BufferedImage; 5 import java.io.BufferedReader; 6 import java.io.File; 7 import java.io.FileInputStream; 8 import java.io.IOException; 9 import java.io.InputStreamReader; 10 import java.io.Reader; 11 import java.util.concurrent.TimeUnit; 12 13 import javax.imageio.ImageIO; 14 15 import org.apache.commons.io.FileUtils; 16 import org.openqa.selenium.By; 17 import org.openqa.selenium.OutputType; 18 import org.openqa.selenium.Point; 19 import org.openqa.selenium.TakesScreenshot; 20 import org.openqa.selenium.WebDriver; 21 import org.openqa.selenium.WebElement; 22 23 import com.dbyl.libarary.utils.DriverFactory; 24 25 public class TesseractTest { 26 27 public static void main(String[] args) throws IOException, 28 InterruptedException { 29 30 WebDriver driver = DriverFactory.getChromeDriver(); 31 driver.get("file:///C:/Users/validation.html"); 32 driver.manage().timeouts().pageLoadTimeout(30, TimeUnit.SECONDS); 33 WebElement element = driver.findElement(By.xpath("//img")); 34 35 // take screen shot for element 36 screenShotForElement(driver, element, "D:\\Tesseract-OCR\\test.png"); 37 38 driver.quit(); 39 40 // use Tesseract to get strings 41 Runtime rt = Runtime.getRuntime(); 42 rt.exec("cmd.exe /C tesseract.exe D:\\Tesseract-OCR\\test.png D:\\Tesseract-OCR\\test -1 "); 43 44 Thread.sleep(1000); 45 // Read text 46 readTextFile("D:\\Tesseract-OCR\\test.txt"); 47 } 48 49 /** 50 * This method for read TXT file 51 * 52 * @param filePath 53 */ 54 public static void readTextFile(String filePath) { 55 try { 56 String encoding = "GBK"; 57 File file = new File(filePath); 58 if (file.isFile() && file.exists()) { // 判断文件是否存在 59 InputStreamReader read = new InputStreamReader( 60 new FileInputStream(file), encoding);// 考虑到编码格式 61 BufferedReader bufferedReader = new BufferedReader(read); 62 String lineTxt = null; 63 while ((lineTxt = bufferedReader.readLine()) != null) { 64 System.out.println(lineTxt); 65 } 66 read.close(); 67 } else { 68 System.out.println("找不到指定的文件"); 69 } 70 } catch (Exception e) { 71 System.out.println("读取文件内容出错"); 72 e.printStackTrace(); 73 } 74 } 75 76 /** 77 * This method for screen shot element 78 * 79 * @param driver 80 * @param element 81 * @param path 82 * @throws InterruptedException 83 */ 84 public static void screenShotForElement(WebDriver driver, 85 WebElement element, String path) throws InterruptedException { 86 File scrFile = ((TakesScreenshot) driver) 87 .getScreenshotAs(OutputType.FILE); 88 try { 89 Point p = element.getLocation(); 90 int width = element.getSize().getWidth(); 91 int height = element.getSize().getHeight(); 92 Rectangle rect = new Rectangle(width, height); 93 BufferedImage img = ImageIO.read(scrFile); 94 BufferedImage dest = img.getSubimage(p.getX(), p.getY(), 95 rect.width, rect.height); 96 ImageIO.write(dest, "png", scrFile); 97 Thread.sleep(1000); 98 FileUtils.copyFile(scrFile, new File(path)); 99 } catch (IOException e) { 100 e.printStackTrace(); 101 } 102 } 103 104 }
踩
(
0)
赞
(
0)
举报
width="336" height="280" frameborder="0" marginwidth="0" marginheight="0" vspace="0" hspace="0" allowtransparency="true" scrolling="no" allowfullscreen="true" id="aswift_0" name="aswift_0" style="padding: 0px; left: 0px; position: absolute; top: 0px;">
width="336" height="280" frameborder="0" marginwidth="0" marginheight="0" vspace="0" hspace="0" allowtransparency="true" scrolling="no" allowfullscreen="true" id="aswift_1" name="aswift_1" style="padding: 0px; left: 0px; position: absolute; top: 0px;">
评论
一句话评论(0)
共0条
width="300" height="600" frameborder="0" marginwidth="0" marginheight="0" vspace="0" hspace="0" allowtransparency="true" scrolling="no" allowfullscreen="true" id="aswift_2" name="aswift_2" style="padding: 0px; left: 0px; position: absolute; top: 0px;">
分享档案
文章周排行
- 45种攻入网站后台的方法 2014-09-21
- chrome jsonView插件安装 2015-03-26
- 将Web项目War包部署到Tomcat服务器基本步骤 2015-01-17
- dubbox开发rest+json指南【转】 2014-11-23
- HTML中的<select>标签如何设置默认选中的选项 2014-11-12
- 论文笔记:Faster R-CNN:Towards Real-Time Object Detection with Region Proposal Networks 2015-06-17
- CSS实现响应式全屏背景图 2015-03-14
- WebStorm+Node.js开发环境的配置 2015-03-13
- CSS属性 2015-03-14
- 312个免费高速HTTP代理IP(能隐藏自己真实IP地址) 2015-04-14
最新新闻
- 揭秘谷歌AMP项目:它是如何提高网页加载速度的? 2015-11-05
- 百度通过跟踪用户发现鬼城 2015-11-05
- 国产大飞机C919换上航空公司涂装效果图 2015-11-05
- 入室小偷竟替户主代签收快递 案发后被快递员指认 2015-11-05
- 三季度空气净化器品牌口碑排行榜 2015-11-05
- BBC商店上线 可付费下载数字内容 2015-11-05
- 苹果新专利:指纹进入应急模式,iPhone 可能救你一命 2015-11-05
- 研究显示Android和iOS应用都会大量收集用户数据 2015-11-05
- 北京移动VoLTE试商启动 推4G高清语音业务 2015-11-05
- 保险业规定使用国内科技产品引国外团体不满 2015-11-05
width="300" height="250" frameborder="0" marginwidth="0" marginheight="0" vspace="0" hspace="0" allowtransparency="true" scrolling="no" allowfullscreen="true" id="aswift_3" name="aswift_3" style="padding: 0px; left: 0px; position: absolute; top: 0px;">
id="cproIframe_u2385978" src="http://pos.baidu.com/acom?adn=3&adp=1&at=0&aurl=&c01=1&cad=1&ccd=24&cec=UTF-8&cfv=0&ch=0&col=zh-CN&conBW=1&conOP=0&cpa=1&dai=2&dis=0<r=https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DGwyMa3rNDW5ZzeoO1fOeBxTBGL0lHzGevZEspmulpZfetnKRvhocuLqlDNxPyeN0UNRcd_cDvQgtUT4PQlTuoK%26wd%3D%26eqid%3Ddfdcafe100011a2600000005563bac6a<u=http%3A%2F%2Fwww.bubuko.com%2Finfodetail-1007293.html&lunum=6&n=65035100_cpr&pat=17&pcs=1263x675&pis=10000x10000&ps=2504x831&psr=1280x800&pss=1263x8311&ptt=0&qn=df2cd68e9a9a5524&rad=&rsi0=300&rsi1=300&rsi5=4&rss0=%23FFFFFF&rss1=%23FFFFFF&rss2=%23000000&rss3=%23444444&rss4=%23008000&rss5=&rss6=%23e10900&rss7=&scale=&skin=&td_id=2385978&titFF=%25E5%25BE%25AE%25E8%25BD%25AF%25E9%259B%2585%25E9%25BB%2591&titFS=14&titSU=0&tn=baiduCustNativeAD&tpr=1446750794660&ts=1&xuanting=1&dtm=BAIDU_DUP2_SETJSONADSLOT&dc=2&di=u2385978&ti=Webdriver%E9%85%8D%E5%90%88Tesseract-OCR%20%E8%87%AA%E5%8A%A8%E8%AF%86%E5%88%AB%E7%AE%80%E5%8D%95%E7%9A%84%E9%AA%8C%E8%AF%81%E7%A0%81&tt=1446750794620.1746.1951.1954" width="300" height="300" align="center,center" marginwidth="0" marginheight="0" scrolling="no" frameborder="0" allowtransparency="true" style="padding: 0px;">
id="cproIframe_u1738566_3" width="680" height="250" src="http://pos.baidu.com/acom?adn=4&adp=1&at=0&aurl=&c01=1&cad=1&ccd=24&cec=UTF-8&cfv=0&ch=0&col=zh-CN&conBW=1&conOP=1&cpa=1&dai=3&dis=0<r=https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DGwyMa3rNDW5ZzeoO1fOeBxTBGL0lHzGevZEspmulpZfetnKRvhocuLqlDNxPyeN0UNRcd_cDvQgtUT4PQlTuoK%26wd%3D%26eqid%3Ddfdcafe100011a2600000005563bac6a<u=http%3A%2F%2Fwww.bubuko.com%2Finfodetail-1007293.html&lu_161=0&lunum=6&n=65035100_cpr&pat=6&pcs=1263x675&pih=0&pis=10000x10000&piw=0&ps=8330x131&psr=1280x800&pss=1263x8351&ptbg=90&ptp=0&ptt=0&qn=8764113a7b5840fc&rad=&rsi0=680&rsi1=250&rsi5=4&rss0=%23FFFFFF&rss1=%23FFFFFF&rss2=%23000000&rss3=%23444444&rss4=%23008000&rss5=&rss6=%23e10900&rss7=&scale=&skin=tabcloud_skin_3&stid=5&td_id=1738566&titFF=%25E5%25BE%25AE%25E8%25BD%25AF%25E9%259B%2585%25E9%25BB%2591&titFS=14&titSU=0&titTA=left&tn=baiduCustNativeAD&tpr=1446750794660&ts=1&version=2.0&xuanting=0&dtm=BAIDU_DUP2_SETJSONADSLOT&dc=2&di=u1738566&ti=Webdriver%E9%85%8D%E5%90%88Tesseract-OCR%20%E8%87%AA%E5%8A%A8%E8%AF%86%E5%88%AB%E7%AE%80%E5%8D%95%E7%9A%84%E9%AA%8C%E8%AF%81%E7%A0%81&tt=1446750794620.1978.2076.2076" align="center,center" marginwidth="0" marginheight="0" scrolling="no" frameborder="0" allowtransparency="true" style="padding: 0px;">
width="300" height="250" frameborder="0" marginwidth="0" marginheight="0" vspace="0" hspace="0" allowtransparency="true" scrolling="no" allowfullscreen="true" id="aswift_4" name="aswift_4" style="padding: 0px; left: 0px; position: absolute; top: 0px;">
关闭