selenium 是一款比较好用,模拟人工操作浏览器的一款工具。本是做自动化测试的,但是由于项目中要进行项目的爬取,但是由于项目的复杂度有高,本人采用selenim 进行自动化获取数据。
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.TimeUnit;
public class PPs {
public static void main(String[] args) throws InterruptedException {
String[] data = {"党参","三七","黄连"};
ChromeOptions chromeOptions = new ChromeOptions();
// chromeOptions.addArguments("--headless","--no-sandbox","--disable-gpu","--window-size=1290,1080");
chromeOptions.addArguments("--headless");
System.setProperty("webdriver.chrome.driver", "C:\\Program Files\\Google\\Chrome\\Application\\chromedriver.exe");
WebDriver driver = new ChromeDriver();
driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
List<String> strList = Arrays.asList(data);
for (String s: strList) {
getData(driver,s);
}
driver.close();
}
private static void getData(WebDriver driver,String data) throws InterruptedException {
driver.get("https://www.yt1998.com");
WebElement top_search = driver.findElement(new By.ById("fltext"));
String typeText = top_search.getText();
if(!"今日价格".equals(typeText)){top_search.click();
WebElement type = driver.findElement(new By.ByXPath("//*[@id=\"fltype\"]/ul/li[1]/a"));
type.click();
}
WebElement kw = driver.findElement(new By.ByXPath("//*[@id=\"kw\"]"));
kw.sendKeys(data);
WebElement su = driver.findElement(new By.ByXPath("//*[@id=\"su\"]"));
su.click();
WebElement dd = driver.findElement(new By.ByXPath("/html/body/div[3]/div/div[2]/dl[1]/dd"));
List<WebElement> aList = dd.findElements(new By.ByTagName("a"));
for (int i = 2; i <= aList.size(); i++) {
WebElement webElement = driver.findElement(new By.ByXPath("/html/body/div[3]/div/div[2]/dl[1]/dd/a["+i+"]"));
// System.out.println(webElement.getText());
webElement.click();
extracted(driver);
// Thread.sleep(1000);
}
}
/**
* 获取当前的市场的数据
* @param driver
*/
private static void extracted(WebDriver driver) {
WebElement priceList = driver.findElement(new By.ById("priceList"));
List<WebElement> trList = priceList.findElements(new By.ByTagName("tr"));
for (WebElement tr :trList) {
List<WebElement> tdList = tr.findElements(new By.ByTagName("td"));
String font1 = tdList.get(0).findElement(new By.ByTagName("font")).getText();
String font2 = tdList.get(1).findElement(new By.ByTagName("font")).getText();
String font3 = tdList.get(2).findElement(new By.ByTagName("font")).getText();
String font4 = tdList.get(3).findElement(new By.ByTagName("font")).getText();
String font5 = tdList.get(4).findElement(new By.ByTagName("font")).getText();
String font6 = tdList.get(5).findElement(new By.ByTagName("font")).getText();
String font7 = tdList.get(6).findElement(new By.ByTagName("font")).getText();
String font8 = tdList.get(7).findElement(new By.ByTagName("font")).getText();
String font9 = tdList.get(8).findElement(new By.ByTagName("font")).getText();
String font10 = tdList.get(9).findElement(new By.ByTagName("font")).getText();
String font11 = tdList.get(10).findElement(new By.ByTagName("font")).getText();
System.out.print(font1+"\t");
System.out.print(font2+"\t");
System.out.print(font3+"\t");
System.out.print(font4+"\t");
System.out.print(font5+"\t");
System.out.print(font6+"\t");
System.out.print(font7+"\t");
System.out.print(font8+"\t");
System.out.print(font9+"\t");
System.out.print(font10+"\t");
System.out.println(font11+"\t");
}
}
}