Selenium的使用

1.前提工作。

一、注入对应的依赖jar包
https://mvnrepository.com/
可以从以上网站找到对应的依赖,然后注入maven仓库中。

 <!-- Selemiun -->
        <!-- https://mvnrepository.com/artifact/org.seleniumhq.selenium/selenium-java -->
        <dependency>
            <groupId>org.seleniumhq.selenium</groupId>
            <artifactId>selenium-java</artifactId>
            <version>3.141.59</version>
        </dependency>

二、准备浏览器的驱动器。
http://chromedriver.storage.googleapis.com/index.html

若版本太新找不到则参考该网址
https://chromedriver.com/
https://chromedriver.com/download
可以在这个网址下载对应的版本的驱动器,我使用的是谷歌的,所以下载谷歌对应版本的驱动器。
谷歌版本:
在这里插入图片描述
驱动器:(此处需要下载对应的,win统一用win32)
在这里插入图片描述
下载完成后解压驱动器到自定义路径。
我的是:
E:\selenium\chromedriver.exe

2.操作案例:

①打开某个网页并让网页滚动,抓取相对应的元素

package com.xp.climb.selenium;
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.firefox.FirefoxDriver;
public class SinaNewsRolling {

	public static void main(String[] args) throws IOException, InterruptedException {
		//chromedriver配置
		System.setProperty("webdriver.chrome.driver", "E:\\selenium\\chromedriver.exe");
		//声明使用的是谷歌浏览器
		ChromeDriver driver = new ChromeDriver();
		//使用火狐浏览器打开任意某一新闻页
		driver.get("http://k.sina.com.cn/article_6436034945_17f9e198100100krct.html?from=home");
		// 执行JS操作
		JavascriptExecutor JS = (JavascriptExecutor) driver;
		try {
			JS.executeScript("scrollTo(0, 5000)");
			System.out.println("1");
			Thread.sleep(5000);             //调整休眠时间可以获取更多的内容
			JS.executeScript("scrollTo(5000, 10000)");
			System.out.println("2");
			Thread.sleep(5000);
			JS.executeScript("scrollTo(10000, 30000)"); // 继续下拉
			System.out.println("3");
			Thread.sleep(5000);
			JS.executeScript("scrollTo(10000, 50000)"); //继续下拉
			System.out.println("4");
		} catch (Exception e) {
			System.out.println("Error at loading the page ...");
			driver.quit();
		}
		String html = driver.getPageSource();
//		System.out.println(html);
		//解析数据
		Document doc = Jsoup.parse(html);
		Elements elements = doc.select("[id=tycard_list]")
				.select("div[class=ty-card ty-card-type1 clearfix]");
		for (Element ele : elements) {
			String newsTitle = ele.select("h3[class=ty-card-tt]").select("a").text();
			String newsUrl = ele.select("h3[class=ty-card-tt]").select("a").attr("href");
			System.out.println(newsTitle + "\t" + newsUrl);
		}
		driver.quit();  // 关闭浏览器
	}
}

②打开qq邮箱并自动登录,点击收件箱。抓取相对应的元素

package com.xp.climb.selenium;
import java.io.IOException;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.openqa.selenium.By;
import org.openqa.selenium.Cookie;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.firefox.FirefoxDriver;

public class LoginQqEmail {

	public static void main(String[] args) throws IOException, InterruptedException {
		//chromedriver配置
		System.setProperty("webdriver.chrome.driver", "E:\\selenium\\chromedriver.exe");
		//声明使用的是谷歌浏览器
		ChromeDriver driver = new ChromeDriver();
		//使用谷歌浏览器打开QQ邮箱网页
		driver.get("https://mail.qq.com/");
		//元素定位,提交用户名以及密码

		driver.manage().timeouts().implicitlyWait(5,TimeUnit.SECONDS);
		//快捷登录。
//		driver.switchTo().frame("login_frame").findElement(By.id("img_out_995536807")).click();
        driver.switchTo().frame("login_frame").findElement(By.id("switcher_plogin")).click();
        Thread.sleep(5000);
		driver.findElement(By.id("u")).clear();  //清空后输入
		driver.findElement(By.id("u")).sendKeys("换成你的账号");
		driver.manage().timeouts().implicitlyWait(5, TimeUnit.SECONDS);
		driver.findElement(By.id("p")).clear(); //清空后输入
		driver.findElement(By.id("p")).sendKeys("换成你的密码");
//		//元素定位,点击登陆按钮
		driver.findElement(By.id("login_button")).click();
		Thread.sleep(10*1000);  //休息一段时间,使得网页充分加载。注意这里非常有必要
        driver.findElement(By.id("folder_1")).click();
        Thread.sleep(5*1000);
		Set<Cookie> cookies = driver.manage().getCookies();
		//获取登陆的cookies
		String cookieStr = "";
		for (Cookie cookie : cookies) {
			cookieStr += cookie.getName() + "=" + cookie.getValue() + "; ";
		}
		System.out.println(cookieStr);
		//基于Jsoup,使用cookies请求个人信息页面
		Response orderResp = Jsoup   //添加一些header信息
				.connect("https://mail.qq.com/cgi-bin/frame_html?sid=Se7bEL8SNGKkATpa&r=36b065894a56b95efa607a2b42377adb")
//				.header("Host", "www.renren.com")
				.header("Connection", "keep-alive")
				.header("Cache-Control", "max-age=0")
				.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*;q=0.8")
//				.header("Origin", "http://www.renren.com")
				.header("Referer", "https://mail.qq.com/")
				.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0")
				.header("Content-Type", "application/x-www-form-urlencoded")
				.header("Accept-Encoding", "gzip, deflate, br")
				.header("Upgrade-Insecure-Requests", "1")
				.cookie("Cookie", cookieStr)
				.execute();
		//解析数据
		Document doc = orderResp.parse();
//		System.out.println(doc);
		org.jsoup.select.Elements elements = doc.select("iframe[id=mainFrame]")
				.select("div[class=tf no]");
		for (Element element : elements) {
			if (element.text().contains("博客园登录用户名")) {
				System.out.println(element.text());
			}
		}
		driver.quit();  // 关闭浏览器
	}
}

③打开网页,将验证码图片读取,存入本地,在控制台中输入对应的验证码,通过验证。抓取相对应的元素

package com.xp.climb.selenium;

import java.awt.image.BufferedImage;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.concurrent.TimeUnit;
import javax.imageio.ImageIO;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.openqa.selenium.By;
import org.openqa.selenium.OutputType;
import org.openqa.selenium.Point;
import org.openqa.selenium.TakesScreenshot;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.firefox.FirefoxBinary;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.openqa.selenium.firefox.FirefoxOptions;
public class ScreenshotTest {

	public static void main(String[] args) throws IOException, InterruptedException {
//		FirefoxBinary firefoxBinary = new FirefoxBinary();
//		firefoxBinary.addCommandLineOptions("--headless");
		//设置路径
		System.setProperty("webdriver.chrome.driver", "E:\\selenium\\chromedriver.exe");
//		FirefoxOptions firefoxOptions = new FirefoxOptions();
//		firefoxOptions.setBinary(firefoxBinary);
//		FirefoxDriver driver = new FirefoxDriver(firefoxOptions);
		ChromeDriver driver = new ChromeDriver();
		//直到加载该网页为止
		while (true){
			try{
				driver.get("http://weixin.sogou.com/antispider/?"
						+ "from=%2fweixin%3Ftype%3d2%26query"
						+ "%3dcomputer+%26ie%3dutf8%26s_from%"
						+ "3dinput%26_sug_%3dy%26_sug_type_%3d");
			}
			catch (Exception e)
			{
				driver.quit();
				driver = new ChromeDriver();
				driver.manage().timeouts()
				.pageLoadTimeout(10, TimeUnit.SECONDS);
				continue;
			}
			break;
		}
		//此处获取页面的图片。
		WebElement webEle = driver.findElement(By.id("seccodeImage"));
		// Get entire page screenshot
		//读取图片
		File screenshot = ((TakesScreenshot)driver)
				.getScreenshotAs(OutputType.FILE);
		BufferedImage  fullImg = ImageIO.read(screenshot); 
		//-------
		Point point = webEle.getLocation();//获取图片位置
		int eleWidth = webEle.getSize().getWidth();//宽度
		int eleHeight = webEle.getSize().getHeight();//高度
		BufferedImage eleScreenshot = fullImg.getSubimage(point.getX(), point.getY(),
				eleWidth, eleHeight);//缓存图片
		//写入图片到本地(输出)
		ImageIO.write(eleScreenshot, "png", new File("E:/selenium/test.png"));
		System.out.println("请输入验证码:");
		BufferedReader buff=new BufferedReader(new InputStreamReader(System.in));//输入缓存区
		String captcha_solution="";
		try {
			captcha_solution = buff.readLine();
		} catch (IOException e) {
			e.printStackTrace();
		}
		driver.findElement(By.name("c")).sendKeys(captcha_solution);
		driver.findElementById("submit").click();
		Thread.sleep(10*1000);  //休息一段时间,使得网页充分加载。注意这里非常有必要
		String html = driver.getPageSource();//获取网页
		Document doc = Jsoup.parse(html);//解析网页
		Elements elements = doc.select("div[class=txt-box]");
		for (Element ele : elements) {
			String newsTitle = ele.select("h3").select("a").text();
			String newsUrl = ele.select("h3").select("a").attr("href");
			System.out.println(newsTitle + "\t" + newsUrl);
		}
		driver.quit();  // 关闭浏览器
	}
}

  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值