- java爬虫来了,不要天天说Python,Python,Python!!!java同样也可以!
- 我这个是maven工程!
- 首先安装导入依赖
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.14.2</version>
</dependency>
编写代码
package com.nx;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
public class JSOUPTEST {
public static void main(String[] args) throws IOException {
List<JD> jds = new JSOUPTEST().goodlist("电脑");
for (JD info : jds){
System.out.println(info.toString());
}
}
public List<JD> goodlist(String key) throws IOException {
String url = "https://search.jd.com/Search?keyword=" + key;
Document document = Jsoup.parse(new URL(url), 30000);
Element element = document.getElementById("J_goodsList");
Elements elements = element.getElementsByTag("li");
ArrayList<JD> list = new ArrayList<>();
for (Element elements1 : elements){
String imgurl = elements1.getElementsByTag("img").eq(0).attr("data-lazy-img");
String price = elements1.getElementsByClass("p-price").eq(0).text();
String title = elements1.getElementsByClass("p-name").eq(0).text();
JD jd = new JD();
jd.setImgurl(imgurl);
jd.setPrice(price);
jd.setTitle(title);
list.add(jd);
}
return list;
}
}
实体类(用来封装数据)
package com.nx;
public class JD {
private String imgurl;
private String price;
private String title;
public String getImgurl() {
return imgurl;
}
public void setImgurl(String imgurl) {
this.imgurl = imgurl;
}
public String getPrice() {
return price;
}
public void setPrice(String price) {
this.price = price;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
@Override
public String toString() {
return "JD{" +
"imgurl='" + imgurl + '\'' +
", price='" + price + '\'' +
", title='" + title + '\'' +
'}';
}
}
效果图