最近需要做亚马逊评论信息的爬虫,爬虫的工具方法很多,刚好看了几篇selenium文章,练习一下。
准备工作:
需要chromedriver
驱动的下载地址如下:
http://chromedriver.storage.googleapis.com/index.html
自己看看下载一下吧
开始
新建一个SpringBoot项目,不多言述
导入Maven
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-java</artifactId>
<version>3.9.0</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>27.1-jre</version>
</dependency>
写个测试类吧
(注意地址格式,里面有个asin,我们可以直接替换成其他asin)
F12走起,找页面规律
可以看到页面评论div列表
直接找到这个div列表父标签进行遍历找里面对应标签内容信息就好了
private static void getSource(WebDriver webDriver) {
String source = webDriver.getPageSource();
//总体评论DIV
WebElement element = webDriver.findElement(By.id("cm_cr-review_list"));
List<WebElement> elements = element.findElements(By.tagName("div"));
for (WebElement webElement : elements) {
String className = webElement.getAttribute("class");
//过滤找到对应评论列表
if (!"a-section review aok-relative".equalsIgnoreCase(className)){
continue;
}
//对应评论信息
WebElement userNameEle = webElement.findElement(By.className("a-profile-name"));
System.out.println("用户:"+userNameEle.getText());
WebElement aRowEle = webElement.findElement(By.className("a-row"));
WebElement starNumEle = aRowEle.findElement(By.className("a-link-normal"));
System.out.println("星数:"+starNumEle.getAttribute("title"));
List<WebElement> list = aRowEle.findElements(By.tagName("a"));
WebElement starLevelEle = list.stream().filter(ele -> ele.getAttribute("class").equalsIgnoreCase("a-size-base a-link-normal review-title a-color-base review-title-content a-text-bold")).collect(Collectors.toList()).get(0);
System.out.println("星级:"+starLevelEle.getText());
WebElement creatMessEle = webElement.findElements(By.tagName("span")).stream().filter(ele->ele.getAttribute("class").equalsIgnoreCase("a-size-base a-color-secondary review-date")).collect(Collectors.toList()).get(0);
System.out.println("评论时间:" + creatMessEle.getText());
WebElement contentEle = webElement.findElements(By.tagName("div")).stream().filter(ele->ele.getAttribute("class").equalsIgnoreCase("a-row a-spacing-small review-data")).collect(Collectors.toList()).get(0);
System.out.println("评论:" + contentEle.getText());
System.out.println("--------------------------------------");
}
}
System.getProperties().setProperty("webdriver.chrome.driver","chromedriver软件位置\\chromedriver.exe");
//开启webDriver进程
WebDriver webDriver = new ChromeDriver();
webDriver.get("https://www.amazon.com/-/zh/Sliding-Unfinished-Environmental-Installation-K-Frame/product-reviews/B07QFPZRN6/ref=cm_cr_arp_d_viewopt_srt?ie=UTF8&reviewerType=all_reviews&sortBy=recent&pageNumber=1");
getSource(webDriver);
//关闭webDriver进程
webDriver.close();
webDriver.quit();
效果展示一下