Jsoup爬虫之Java爬虫工具类
正文
1.导入依赖
<!-- jsoup爬虫依赖-->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.22</version>
</dependency>
2.编写工具类
import com.example.entry.Comments;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
public class JsoupUtil {
// 爬虫工具类
public List<Comments> spaderJD(String keyWords)throws Exception{
String url="https://search.jd.com/Search?keyword="+keyWords+"&pvid=9045b0472b9f4c17bdf98ec6037a8afc";
Document document = Jsoup.parse(new URL(url),30000);
Element list = document.getElementById("J_goodsList");
Elements li = list.getElementsByTag("li");
ArrayList<Comments> commentsArrayList = new ArrayList<>();
for (Element element:li){
String src = element.getElementsByTag("img").attr("src");
String price = element.getElementsByClass("p-price").eq(0).text();
String name = element.getElementsByClass("p-name").eq(0).text();
Comments comments = new Comments();
comments.setImg(src);
comments.setPrice(price);
comments.setName(name);
commentsArrayList.add(comments);
}
return commentsArrayList;
}
}
3.编写工具类对应实体类
import lombok.Data;
@Data
public class Comments {
private String img;
private String price;
private String name;
}
4.测试类进行测试
import com.example.Util.JsoupUtil;
public class text {
public static void main(String[] args) throws Exception{
JsoupUtil jsoupUtil = new JsoupUtil();
jsoupUtil.spaderJD("java").forEach(System.out::println);
}
}