/*** Project Name:padwebcollector
* File Name:DiscussService.java
* Package Name:com.pad.service
* Date:2018年7月25日下午4:59:44
* Copyright (c) 2018 All Rights Reserved.
**/
packagecom.pad.service;importjava.util.ArrayList;importjava.util.List;importorg.openqa.selenium.By;importorg.openqa.selenium.WebDriver;importorg.openqa.selenium.WebElement;importorg.openqa.selenium.phantomjs.PhantomJSDriver;importcn.edu.hfut.dmic.webcollector.crawler.DeepCrawler;importcn.edu.hfut.dmic.webcollector.model.Links;importcn.edu.hfut.dmic.webcollector.model.Page;importcom.pad.entity.DiscussInfo;importcom.pad.impl.DiscussInfoImpl;public class DiscussService extendsDeepCrawler {publicDiscussService(String crawlPath) {super(crawlPath);//TODO Auto-generated constructor stub
}
@OverridepublicLinks visitAndGetNextLinks(Page page) {//TODO Auto-generated method stub
WebDriver driver =getWebDriver(page);
Analysis analysis= newAnalysis();
List discusslist = newArrayList();
List list = driver.findElements(By.className("content"));int i = 1;
String r_msg= "观望";for(WebElement el : list) {if(!"".equals(el.getText().trim())){
r_msg=analysis.analysis(el.getText());
}
DiscussInfo info= newDiscussInfo();
info.setLine_no(String.valueOf(i));
info.setResult_msg(r_msg);
info.setContent_msg(el.getText());
discusslist.add(info);
System.out.println(i+" "+el.getText());
i++;
}
driver.close();
driver.quit();
DiscussInfoImpl impl= newDiscussInfoImpl();
impl.saveData(discusslist);return null;
}public staticWebDriver getWebDriver(Page page) {
System.setProperty("phantomjs.binary.path", "D:\\******\\phantomjs.exe");
WebDriver driver= newPhantomJSDriver();
driver.get(page.getUrl());returndriver;
}public static voidmain(String[] args) {
DiscussService dis=new DiscussService("discuss");
dis.addSeed("https://*******/index/0000012");try{
dis.start(1);
}catch(Exception e) {
e.printStackTrace();
}
}
}