准备工作:
1.需要加入js插件
phantomjs-2.1.1-windows 放到对应的磁盘位置
2.依赖
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-java</artifactId>
<version>3.5.2</version>
</dependency>
import com.demo13.pojo.Result;
import com.demo13.reptile.pojo.AContentsEntity;
import com.demo13.reptile.pojo.AppStore;
import com.demo13.service.AppService;
import com.demo13.service.AppStoreService;
import org.openqa.selenium.By;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.os.ExecutableFinder;
import org.openqa.selenium.phantomjs.PhantomJSDriver;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@Controller
@RequestMapping("/simple")
public class Simpl4jReptileController {
@Autowired
AppStoreService appStoreService;
static final String URL="http://app.mi.com/topList";
@ResponseBody
@RequestMapping("/reptile")
public Result addApp(@RequestParam(value = "a") Integer a){
try {
String path = "D:\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe";
String page="#page=";
//初始化js
System.setProperty("phantomjs.binary.path", path);
//建立驱动
PhantomJSDriver webDriver = new PhantomJSDriver();
//获取种类集合从每个种类再去分页遍历
List<String> categoryList=getCategoryList(webDriver,URL);
//去重(防止category有重复的)
Set<String> set=new HashSet<>(categoryList);
List<String> distinctList=new ArrayList<>(set);
//循环调用获取a标签
List<AppStore> list;
//一个种类一个种类的去添加
for(String categoryUrl:distinctList){
list= getAList(webDriver, categoryUrl, page);
System.out.println(categoryUrl+"该cateGory已经爬取完成-------");
/*appStoreService.insertAppStore(list);*/
}
}catch (Exception e){
e.printStackTrace();
return Result.buildFailure("服务器异常");
}
return Result.buildSuccess("爬取完成");
}
@ResponseBody
@RequestMapping("/hostReptile")
public Result addHostApp(@RequestParam(value = "a") Integer a){
try {
List<AppStore> aContentsEntities=new ArrayList<>();
AppStore aContentsEntity;
String path = "D:\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe";
String page="?page=";
//初始化js
System.setProperty("phantomjs.binary.path", path);
//建立驱动
PhantomJSDriver webDriver = new PhantomJSDriver();
List<AppStore> hostAppList=getHostAppList(webDriver,page);
appStoreService.insertAppStore(hostAppList);
}catch (Exception e){
e.printStackTrace();
return Result.buildFailure("服务器异常");
}
return Result.buildSuccess("爬取完成");
}
public static List<AppStore> getHostAppList(PhantomJSDriver webDriver,String page){
List<AppStore> aContentsEntities=new ArrayList<>();
AppStore aContentsEntity;
boolean lookNext=true;
int num=1;
try {
while(lookNext){
//从第1页开始
String pageUrl=URL+page+num;
System.out.println(pageUrl+"-*-*-*-*--*-*--*-*");
webDriver.get(pageUrl);
webDriver.executePhantomJS("window.scrollTo(0, document.body.scrollHeight); var lenOfPage=document.body.scrollHeight; return lenOfPage;");
Thread.sleep(2000);
List<WebElement> elements=webDriver.findElements(By.className("applist"));
for(WebElement el:elements){
List<WebElement> wel = el.findElements(By.tagName("li"));
if(wel!=null&&wel.size()>0){
for(WebElement al:wel){
aContentsEntity=getPacageAndAppName(al);
aContentsEntities.add(aContentsEntity);
}
++num;
}else{
//如果为false证明该category没有值了 就跳出循环返回flase;
lookNext=false;
}
}
}
}catch (Exception e){
e.printStackTrace();
}
return aContentsEntities;
}
public static List<AppStore> getAList(PhantomJSDriver webDriver, String category, String page){
List<AppStore> aContentsEntities=new ArrayList<>();
AppStore aContentsEntity;
boolean lookNext=true;
int num=0;
try {
while(lookNext){
//从第0页开始
String pageUrl=category+page+num;
System.out.println(pageUrl+"-*-*-*-*--*-*--*-*");
webDriver.get(pageUrl);
Thread.sleep(3000);
List<WebElement> elements=webDriver.findElements(By.id("all-applist"));
for(WebElement el:elements){
List<WebElement> wel = el.findElements(By.tagName("li"));
if(wel!=null&&wel.size()>0){
for(WebElement al:wel){
aContentsEntity=getPacageAndAppName(al);
aContentsEntities.add(aContentsEntity);
}
++num;
}else{
//如果为false证明该category没有值了 就跳出循环返回flase;
lookNext=false;
}
}
}
}catch (Exception e){
e.printStackTrace();
}
return aContentsEntities;
}
/**
* 获取报名和应用名称
*/
public static AppStore getPacageAndAppName(WebElement a){
AppStore aContentsEntity=new AppStore();
WebElement webElement = a.findElement(By.tagName("a"));
String hrefValue=webElement.getAttribute("href");
//获取包名
String packageName=getHrefValue(hrefValue);
aContentsEntity.setAppPackage(packageName);
//获取应用名称
WebElement img = a.findElement(By.tagName("img"));
String imgAltValue=img.getAttribute("alt");
aContentsEntity.setAppName(imgAltValue);
String src=img.getAttribute("src");
String targetSrc="http://resource.xiaomi.net/miuimarket/app/lazyload.gif";
if(src.length()==targetSrc.length()){
System.out.println(packageName);
System.out.println(src);
System.out.println(imgAltValue);
}
aContentsEntity.setAppIconAddress(src);
aContentsEntity.setAppStatus(1);
return aContentsEntity;
}
/**
* 在路径上获取应用包名
* @param href
* @return
*/
public static String getHrefValue(String href){
int i=href.indexOf("?");
int w=href.indexOf("=",i+1);
String packageName=href.substring(w+1);
return packageName;
}
/**
* 获取快速入口的categoryList(种类集合)
* @param p
* @param url
* @return
*/
public static List<String> getCategoryList(PhantomJSDriver p, String url){
List<String> categroyList=new ArrayList<>();
p.get(url);
List<WebElement> elements=p.findElements(By.className("category-list"));
for(WebElement e:elements){
List<WebElement> li=e.findElements(By.tagName("li"));
for(WebElement ae:li){
WebElement webElement = ae.findElement(By.tagName("a"));
categroyList.add(webElement.getAttribute("href"));
}
}
return categroyList;
}
}
---------------------------------------------------------------------------------------------
package com.demo13.reptile.pojo;
/**
* /**
*
* @author
* @date :Created in 2019/6/4 15:04
* @description:应用商店实体(全部已知包名存放表)
* @modified By:
* @version: 1.1$
*/
public class AppStore {
private Integer appId;
private String appName;
private String appPackage;
private Integer createUserId;
private Integer appStatus;
private String appIconAddress;
public Integer getAppId() {
return appId;
}
public void setAppId(Integer appId) {
this.appId = appId;
}
public String getAppName() {
return appName;
}
public void setAppName(String appName) {
this.appName = appName;
}
public String getAppPackage() {
return appPackage;
}
public void setAppPackage(String appPackage) {
this.appPackage = appPackage;
}
public Integer getCreateUserId() {
return createUserId;
}
public void setCreateUserId(Integer createUserId) {
this.createUserId = createUserId;
}
public Integer getAppStatus() {
return appStatus;
}
public void setAppStatus(Integer appStatus) {
this.appStatus = appStatus;
}
public String getAppIconAddress() {
return appIconAddress;
}
public void setAppIconAddress(String appIconAddress) {
this.appIconAddress = appIconAddress;
}
}
-------------------------------------------------------------------------------