<!--Jsoup-->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.3</version>
</dependency>
示例,爬取该网站的品牌,通过select选择器,先选择div块,再读取div中按标签的内容
@GetMapping("/reptile")
public CommonResult<?> reptile() {
// 解析url地址
Document document = null;
try {
document = Jsoup.parse(new URL("http://o2**.****.com/search?"), 1000);
} catch (IOException e) {
e.printStackTrace();
}
//获取title的内容
Element title = document.select("div.brand_cen").first();
MachineBrand machineBrand = null;
List<MachineBrand> brandList = new ArrayList<>();
char c;
for (c = 'A'; c <= 'Z'; ++c) {
if (String.valueOf(c).equals("U")) {
continue;
}
final String divClass = "div.brand_box" + c;
final Element div = title.select(divClass).first();
final ListIterator<Element> a = div.select("a").listIterator();
int sort = 0;
while (a.hasNext()) {
final Element next = a.next();
final String text = next.text();
machineBrand = new MachineBrand();
machineBrand.setName(text);
machineBrand.setSort(sort);
machineBrand.setType(String.valueOf(c));
brandList.add(machineBrand);
sort++;
}
}
brandService.saveBatch(brandList);
return CommonResult.success("1");
}