目的:
利用Java实现爬取淘宝、京东、天猫某商品信息。
导入jsoup包
jsoup 是一款Java 的HTML解析器,可直接解析某个URL地址、HTML文本内容。它提供了一套非常省力的API,可通过DOM,CSS以及类似于jQuery的操作方法来取出和操作数据,我本人使用的是1.11.3版本。
当前版本的jsoup API文档:https://tool.oschina.net/apidocs/apidoc?api=jsoup-1.6.3
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.3</version>
</dependency>
自定义常量
public class UrlConst {
/**
* 淘宝商品信息接口url
*/
public static final String PRODUCT_TAOBAO_GET = "https://odin.re.taobao.com/search_tbuad?_noSEC=true&catid=&frcatid=&ac=hU/XF8+10BsCAXGMTsIzFMOF&ip=113.140.78.194&wangwangid=&offset=&count=10&pid=430672_1006&refpid=mm_26632258_3504122_32538762&buckid=&clk1=79e0aebf83b1a06d7ab4e329dc3c96c6&elemtid=1&propertyid=&loc=&gprice=&ismall=&page=&creativeid=&feature_names=spGoldMedal%2CspIsNew%2CpromoPrice%2CfeedbackContent%2CfeedbackNick%2Ctags%2CfeedbackCount%2CdsrDescribe%2CdsrDescribeGap%2CdsrService%2CdsrServiceGap%2CdsrDeliver%2CdsrDeliverGap&reqFields=eurl%2CimgUrl%2Cismall%2CitemId%2Cloc%2Cprice%2CsellCount%2CpromoPrice%2CpromoName%2CsellerPayPostfee%2Ctitle%2CdsrDeliver%2CdsrDescribe%2CdsrService%2CdsrDescribeGap%2CdsrServiceGap%2CdsrDeliverGap%2CspGoldMedal%2Cisju%2CpriceDiscount%2CwangwangId%2Credkeys&sbid=&ua=Mozilla%2F5.0%20(Windows%20NT%2010.0%3B%20Win64%3B%20x64)%20AppleWebKit%2F537.36%20(KHTML%2C%20like%20Gecko)%20Chrome%2F79.0.3945.117%20Safari%2F537.36&pvoff=&X-Client-Scheme=https&keyword=";
/**
* 京东商品信息接口url
*/
public static final String PRODUCT_JINGDONG_GET = "https://search.jd.com/Search?enc=utf-8&keyword=";
/**
* 天猫商品信息接口url
*/
public static final String PRODUCT_TIANMAO_GET = "https://list.tmall.com/search_product.htm?type=p&from=.list.pc_1_searchbutton&q=";
}
商品pojo
public class Product {
private String price; //价格
private String title; //标题
private String url; //链接
private String photo; //图片
public Product() {
}
public Product(String price, String title, String url, String photo) {
this.price = price;
this.title = title;
this.url = url;
this.photo = photo;
}
public String getPhoto() {
return photo;
}
public void setPhoto(String photo) {
this.photo = photo;
}
public String getUrl() {
return url;