selenium WebDriver 截取网站的验证码

最新推荐文章于 2024-07-01 23:56:52 发布

jshuanghua

最新推荐文章于 2024-07-01 23:56:52 发布

阅读量1.6k

点赞数 1

分类专栏： java开发

java开发专栏收录该内容

4 篇文章 0 订阅

订阅专栏

selenium WebDriver 截取网站的验证码

在做爬虫项目的时候，有时候会遇到验证码的问题，由于某些网站的验证码是动态生成的，即使是同一个链接，在不同的时间访问可能产生不同的验证码，

一刚开始的思路就是打开这个验证码的链接，然后通过java代码get请求保存验证码图片到本地，然后用打码工具解析验证码，将验证码自动输入验证框就

可以把验证码的问题解决了，但是问题来，每次的请求同一个地址，产生的验证码图片是不一样的，所以这种方法行不通。所以只能将图片先用selenium WebDriver

截取到本地，然后用打码工具解析ok ,自动填写验证，很好把验证码的问题解决了。

 
           package 
           com.entrym.main; 
          
           import 
           java.awt.image.BufferedImage; 
          
           import 
           java.io.File; 
          
           import 
           java.io.IOException; 
          
           import 
           java.util.ArrayList; 
          
           import 
           java.util.Date; 
          
           import 
           java.util.HashMap; 
          
           import 
           java.util.List; 
          
           import 
           java.util.Set; 
          
           import 
           javax.imageio.ImageIO; 
          
           import 
           org.apache.commons.io.FileUtils; 
          
           import 
           org.apache.commons.lang3.StringUtils; 
          
           import 
           org.json.JSONObject; 
          
           import 
           org.jsoup.Jsoup; 
          
           import 
           org.jsoup.nodes.Document; 
          
           import 
           org.jsoup.nodes.Element; 
          
           import 
           org.openqa.selenium.By; 
          
           import 
           org.openqa.selenium.Cookie; 
          
           import 
           org.openqa.selenium.OutputType; 
          
           import 
           org.openqa.selenium.Point; 
          
           import 
           org.openqa.selenium.TakesScreenshot; 
          
           import 
           org.openqa.selenium.WebDriver; 
          
           import 
           org.openqa.selenium.WebElement; 
          
           import 
           org.openqa.selenium.chrome.ChromeDriver; 
          
           import 
           org.openqa.selenium.support.ui.ExpectedCondition; 
          
           import 
           org.openqa.selenium.support.ui.WebDriverWait; 
          
           import 
           com.entrym.crawler.util.verifyCode.Captcha; 
          
           import 
           com.entrym.crawler.util.verifyCode.DamaUtil; 
          
           import 
           com.entrym.domain.SogouInfo; 
          
           import 
           com.entrym.domain.Wxinfo; 
          
           import 
           com.entrym.util.ConfigUtil; 
          
           import 
           com.entrym.util.DateUtil; 
          
           import 
           com.entrym.util.HttpUtils; 
          
           import 
           com.google.gson.Gson; 
          
           import 
           com.vdurmont.emoji.EmojiParser; 
          
           public 
           class 
           WebTest { 
          
           private 
           static 
           final 
           String GET_TITLE= 
           "/titles/getxiaoshuo" 
           ; 
          
           private 
           static 
           final 
           String PATH= 
           new 
           File( 
           "config/config.properties" 
           ).getAbsolutePath(); 
          
           private 
           static 
           final 
           String CHROME_HOME= 
           new 
           File( 
           "config/chromedriver.exe" 
           ).getAbsolutePath(); 
          
           private 
           static 
           final 
           String CHROME_HOME_LINUX= 
           new 
           File( 
           "config/chromedriver" 
           ).getAbsolutePath(); 
          
           private 
           static 
           final 
           String BASEURL=ConfigUtil.reads(PATH,  
           "baseurl" 
           ); 
          
           public 
           static 
           void 
           main(String[] args)  
           throws 
           IOException { 
          
           WebDriver driver= 
           null 
           ; 
          
           //          System.setProperty("webdriver.gecko.driver", FIREFOX_HOME); 
          
           System.out.println(PATH); 
          
           String osname=System.getProperty( 
           "os.name" 
           ).toLowerCase(); 
          
           if 
           (osname.indexOf( 
           "linux" 
           )>= 
           0 
           ){ 
          
           System.setProperty( 
           "webdriver.chrome.driver" 
           , CHROME_HOME_LINUX); 
          
           //              driver = new MarionetteDriver(); 
          
           } 
           else 
           { 
          
           System.setProperty( 
           "webdriver.chrome.driver" 
           , CHROME_HOME); 
          
           //              driver = new MarionetteDriver(); 
          
           } 
          
           driver= 
           new 
           ChromeDriver(); 
          
           driver.get( 
           "http://weixin.sogou.com/antispider/?from=%2fweixin%3Ftype%3d2%26query%3dz+%26ie%3dutf8%26s_from%3dinput%26_sug_%3dy%26_sug_type_%3d" 
           ); 
          
           WebElement ele = driver.findElement(By.id( 
           "seccodeImage" 
           )); 
          
           // Get entire page screenshot 
          
           File screenshot = ((TakesScreenshot)driver).getScreenshotAs(OutputType.FILE); 
          
           BufferedImage  fullImg = ImageIO.read(screenshot); 
          
           // Get the location of element on the page 
          
           Point point = ele.getLocation(); 
          
           // Get width and height of the element 
          
           int 
           eleWidth = ele.getSize().getWidth(); 
          
           int 
           eleHeight = ele.getSize().getHeight(); 
          
           // Crop the entire page screenshot to get only element screenshot 
          
           BufferedImage eleScreenshot= fullImg.getSubimage(point.getX(), point.getY(), 
          
           eleWidth, eleHeight); 
          
           ImageIO.write(eleScreenshot,  
           "png" 
           , screenshot); 
          
           // Copy the element screenshot to disk 
          
           File screenshotLocation =  
           new 
           File( 
           "D:/captcha/test.png" 
           ); 
          
           FileUtils.copyFile(screenshot, screenshotLocation); 
          
           WebElement classelement = driver.findElement(By.className( 
           "p2" 
           )); 
          
           String errorText=classelement.getText(); 
          
           System.out.println( 
           "输出的内容是" 
           +classelement.getText()); 
          
           if 
           (errorText.indexOf( 
           "用户您好，您的访问过于频繁，为确认本次访问为正常用户行为" 
           )>= 
           0 
           ){ 
          
           System.out.println( 
           "*********************" 
           ); 
          
           DamaUtil util= 
           new 
           DamaUtil(); 
          
           System.out.println( 
           "===================" 
           ); 
          
           String code= 
           "" 
           ;            
           //验证码 
          
           Captcha captcha= 
           new 
           Captcha(); 
          
           captcha.setFilePath( 
           "test.png" 
           ); 
          
           code = DamaUtil.getCaptchaResult(captcha); 
          
           System.out.println( 
           "打码处理出来的验证码是" 
           +code); 
          
           WebElement elementsumbit = driver.findElement(By.id( 
           "seccodeInput" 
           )); 
          
           // 输入关键字 
          
           elementsumbit.sendKeys(code); 
          
           try 
           { 
          
           Thread.sleep( 
           1000 
           ); 
          
           }  
           catch 
           (InterruptedException e) { 
          
           // TODO Auto-generated catch block 
          
           e.printStackTrace(); 
          
           } 
          
           // 提交 input 所在的  form 
          
           elementsumbit.submit(); 
          
           System.out.println( 
           "成功" 
           ); 
          
           } 
          
           } 
          
           }

以上就代码，关键的代码在Stack Overflow得到的，不得不说谷歌还是很强大的

driver.get("http://www.google.com");
WebElement ele = driver.findElement(By.id("hplogo"));

// Get entire page screenshot
File screenshot = ((TakesScreenshot)driver).getScreenshotAs(OutputType.FILE);
BufferedImage  fullImg = ImageIO.read(screenshot);

// Get the location of element on the page
Point point = ele.getLocation();

// Get width and height of the element
int eleWidth = ele.getSize().getWidth();
int eleHeight = ele.getSize().getHeight();

// Crop the entire page screenshot to get only element screenshot
BufferedImage eleScreenshot= fullImg.getSubimage(point.getX(), point.getY(),
    eleWidth, eleHeight);
ImageIO.write(eleScreenshot, "png", screenshot);

// Copy the element screenshot to disk
File screenshotLocation = new File("C:\\images\\GoogleLogo_screenshot.png");
FileUtils.copyFile(screenshot, screenshotLocation);

以上就是关键的截取代码，在国外的链接是http://stackoverflow.com/questions/13832322/how-to-capture-the-screenshot-of-a-specific-element-rather-than-entire-page-usin
感兴趣的小伙伴可以研究一下