jsoup爬取虎牙直播的信息,实时得到主播名字,直播间链接,推荐位,标题,人气的数据
主要代码如下:
package main;
import java.util.ArrayList;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import Entity.HuyaDetails;
import Service.HuyaImpl;
import Service.IHuya;
public class QueryHuya {
public static void main(String[] args) {
System.out.println("小乌龟开始爬了。。。。");
IHuya ihuya = new HuyaImpl();
Document document = null;
String name;
String link;
String tag;
String title;
String js_num;
try {
String url="http://www.huya.com/g/";
HuyaDetails huyaDetails = new HuyaDetails();
String absolute = "#js-live-list > li";
//王者荣耀版块为wzry,英雄联盟为lol,还有其他模块,修改下面的 url + " "就可以了
document = (Document) Jsoup.connect(url + "wzry").get();
Elements video = document.select(absolute);
ArrayList<String> strArray = new ArrayList<String> ();
for(Element v : video) {
name = v.select("span > span.avatar.fl > i").text(); //主播名字
link = v.select("a.video-info.new-clickstat").attr("href"); //直播间链接
tag = v.select("a.video-info.new-clickstat > em").text(); //推荐位
title = v.select("a.title.new-clickstat").text(); //标题
js_num = v.select("span > span.num > i.js-num").text(); //人气
// System.out.println("name = " + name + "\t link = " + link +
// "\t tag = " + tag + "\t title = " + title + "\t js_num = " + js_num);
huyaDetails.setName(name);
huyaDetails.setTag(tag);
huyaDetails.setTitle(title);
huyaDetails.setJs_num(js_num);
huyaDetails.setLink(link);
strArray.add(link);
System.out.println(huyaDetails.toString());
//储存到数据库中,不用储存的话把这行代码注释就可以了
ihuya.SaveDetails(huyaDetails);
}
System.out.println("小乌龟爬完了。。。。");
// 写一个数组储存并遍历输出直播间链接
for(int i = 0; i < strArray.size() ; i++) {
System.out.println("第" + (i+1) + "个 : "+ strArray.get(i));
}
}catch (Exception e) {
e.printStackTrace();
}
}
}