java爬虫技术
/*
爬虫demo
*/
public static void jsoupList(String url){
try {
Document document = Jsoup.connect(url).get();
// 使用 css选择器 提取列表新闻 a 标签
// <a href="https://voice.hupu.com/nba/2484553.html" target="_blank">霍华德:夏休期内曾节食30天,这考验了我的身心</a>
//虎扑抓取
Elements elements = document.select("div.news-list > ul > li > div.list-hd > h4 > a");
for (Element element:elements){
// System.out.println(element);
// 获取详情页链接
String d_url = element.attr("href");
// 获取标题
String title = element.ownText();
//获取内容
String content = element.ownText();
System.out.println("详情页链接:"+d_url+" ,详情页标题:"+title);
}
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
//String url = "https://voice.hupu.com/nba";
CrawlerBase crawlerBase = new CrawlerBase();
jsoupList(url);
}
实体类
public class CrawlerBase {
private String d_url;
private String title;
public String getD_url() {
return d_url;
}
public void setD_url(String d_url) {
this.d_url = d_url;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public void jsoupList(String url) {
// TODO Auto-generated method stub
}
}