packagecom.zyt.creenshot.service.crawlerData.impl;importcom.zyt.creenshot.entity.CarBaseData;importcom.zyt.creenshot.mapper.CarBaseDataMapper;importcom.zyt.creenshot.service.crawlerData.ICrawlerData;importcom.zyt.creenshot.util.DocumentHelper;importcom.zyt.creenshot.util.HttpConnectionManager;importlombok.extern.slf4j.Slf4j;importorg.apache.commons.collections4.CollectionUtils;importorg.apache.commons.lang3.StringUtils;importorg.jsoup.Jsoup;importorg.jsoup.nodes.Document;importorg.jsoup.nodes.Element;importorg.jsoup.select.Elements;importorg.springframework.beans.factory.annotation.Autowired;importorg.springframework.stereotype.Component;importjava.util.ArrayList;importjava.util.List;/*** @ClassName:CrawlerDataImpl
* @Description:
* @Author:zhaiyutao
* @Data:2019/7/8 17:48
* @Vesion: v1.0*/@Component
@Slf4jpublic class CrawlerDataImpl implementsICrawlerData {
@AutowiredprivateHttpConnectionManager connectionManager;
@Autowired(required= false)privateCarBaseDataMapper carBaseDataMapper;
@Overridepublic voidcrawlerCarBaseData() {
String url= "***********要爬取的网址*************";
String resultHtml= DocumentHelper.getProxyHttp(url, null, 0, "GBK", connectionManager);if(StringUtils.isEmpty(resultHtml)){
log.error("没有爬到网站数据");
}
Document html=Jsoup.parse(resultHtml);//解析品牌
Elements brandList = html.select("div[class=braRow]");if(null != brandList && brandList.size() > 0){
List listCar = new ArrayList<>();//获取车的大品牌
for(Element brand : brandList){
Elements brandBig= brand.select("div[class=braRow-icon]");//大品牌名称 和 车标
String brandName = brandBig.select("p").text().replace("?","·");
String brandPic= brandBig.select("img[src]").attr("#src");
Elements smallBrandList= brand.select("div[class=modA noBorder]");for( Element sb : smallBrandList){
Elements brandItem= sb.select("div[class=thA]");//细分品牌
String brandSmallName = brandItem.select("a[href]").text();
Elements sbInner= sb.select("div[class=tbA ]");for(Element in : sbInner){
dealCarData(listCar, brandName, brandPic, brandSmallName, in);
}
Elements sbInnerNother= sb.select("div[class=tbA mt10 noBorder]");for(Element inner : sbInnerNother){
dealCarData(listCar, brandName, brandPic, brandSmallName, inner);
}
}
}if(CollectionUtils.isNotEmpty(listCar)){
carBaseDataMapper.insertBatch(listCar);
}
}
}private void dealCarData(ListlistCar, String brandName, String brandPic, String brandSmallName, Element in) {
String carTypeName= in.select("p[class=stit]").text().split("(")[0];
Elements li= in.select("li");for(Element element : li){
Element tit= element.select("p[class=tit]").get(0);
Element price= element.select("p[class=price]").get(0);
Elements carHref= tit.select("a[href]");
String priceStr=price.text();if(null !=carHref){
String href= carHref.attr("href");if(StringUtils.isEmpty(href)){continue;
}
String carName= carHref.attr("title");
String carId= StringUtils.substring(href, 1, href.length() - 1);
CarBaseData carBaseData= newCarBaseData();
carBaseData.setCarId(carId);
carBaseData.setCarName(carName);
carBaseData.setBrandName(brandName);
carBaseData.setBrandPic(brandPic);
carBaseData.setSubBrandName(brandSmallName);
carBaseData.setCarType(carTypeName);
carBaseData.setCarPrice(priceStr);
listCar.add(carBaseData);
}if(listCar.size()>=500){
carBaseDataMapper.insertBatch(listCar);
listCar.clear();
}
}
}
}