参考:https://www.cnblogs.com/sam-uncle/p/10922366.html
爬虫初体验-XX商城商品信息
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.75</version>
</dependency>
public static void main(String[] args) throws Exception {
String address = "https://search.jd.com/Search?keyword=理肤泉&enc=utf-8";
Document ducument = Jsoup.connect(address).get();
//获取商品列表id
Element productElement = ducument.getElementById("J_goodsList");
//获取列表中商品
Elements productElements = productElement.getElementsByClass("gl-i-wrap");
List<JDProduct> jdProductList = new ArrayList<>(productElements.size());
productElements.stream().forEach(productE -> {
JDProduct jdProduct = new JDProduct();
//获取商品url
Elements imgElements = productE.getElementsByClass("p-img").select("a");
jdProduct.setProductUrl(imgElements.attr("href"));
//获取商品图片地址
jdProduct.setProductImg(imgElements.select("img").attr("data-lazy-img"));
//获取商品名称
Elements productNameElements = productE.getElementsByClass("p-name p-name-type-2");
jdProduct.setProductName("【"+productNameElements.select("span").text()+"】"+productNameElements.select("font").text()+productNameElements.select("em").text());
//获取商品金额
Elements productPriceElements = productE.getElementsByClass("p-price").select("i");
jdProduct.setProductPrice(productPriceElements.text());
jdProductList.add(jdProduct);
});
System.out.println(JSON.toJSON(jdProductList));
}