导入依赖
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.12.1</version>
</dependency>
代码
package com.xsh.crawler;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
public class Crawler {
public void jsoupList(String url){
try {
Document document = Jsoup.connect(url).get();
// 使用 css选择器 提取列表新闻 a 标签
Elements elements = document.select("div#sy_load > ul > li > div.syl_pic > a > img");
for (Element element:elements){
// System.out.println(element);
// 获取详情页链接
String d_url = element.attr("src");
String title = element.attr("alt");
System.out.println("链接" + d_url + " 标题" + title);
}
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
String url = "https://www.ivsky.com/";
Crawler crawlerBase = new Crawler();
crawlerBase.jsoupList(url);
}
}