package com.spider;
import org.apache.http.impl.client.CloseableHttpClient;
import org.jsoup.Jsoup;
/**
* @author nidayu
* @Description:
* @date 2015/10/22
*/
public class TieBa {
public static void main(String[] args){
CloseableHttpClient httpClient = HttpClientTestNew.getInstance();
// 贴吧
String tiebaUrl = null, tiebaChapter = null;
// 灵域
// tiebaUrl = "http://tieba.baidu.com/f?kw=%E7%81%B5%E5%9F%9F&ie=gbk&tab=good&cid=0&pn=300";tiebaChapter = "一千四百三十四章";
// 魔天记
// tiebaUrl = "http://tieba.baidu.com/f?kw=%E9%AD%94%E5%A4%A9%E8%AE%B0&ie=utf-8&tab=good&cid=2";tiebaChapter = "1411";
// getTieBa(httpClient, tiebaUrl, tiebaChapter);
// 起点
String url = null, chapter = null;
// 一剑飞仙
// url = "http://read.qidian.com/BookReader/Rph5iVEas1Q1.aspx"; chapter = "二十二、";
// 真武世界
// url = "http://read.qidian.com/BookReader/SsH0QR3uBSU1.aspx";chapter = "第一章";
// 盛唐崛起
// url = "http://read.qidian.com/BookReader/PHJRvEIGX-Y1.aspx";chapter = "第九章";
// 巫神纪
// url = "http://read.qidian.com/BookReader/wjGb4uJndg01.aspx";chapter = "第一百零八章";
// getQiDian(httpClient, url, chapter);
// 纵横
String zonghengUrl = null, zonghengChapter = null;
// 终极教师
// zonghengUrl = "http://book.zongheng.com/showchapter/347511.html"; zonghengChapter = "第一章";
// getZongHeng(httpClient, zonghengUrl, zonghengChapter);
}
// 获取贴吧数据
private static void getTieBa(CloseableHttpClient httpClient, String url, String chapter){
// 正文
String[][] tiebaHeader = {{"Host", "tieba.baidu.com"}};
String tiebaHtml = HttpClientTestNew.getUrl(httpClient, url, tiebaHeader);
String s = Jsoup.parse(tiebaHtml).select("a[title*=" + chapter +"]").attr("href");
System.out.println(Jsoup.parse(tiebaHtml).select("a[title*=" + chapter +"]").attr("title"));
String[][] headers = {{"Referer", url}, {"Host", "tieba.baidu.com"}};
if (!s.startsWith("http")){
s = "http://tieba.baidu.com"+s;
}
String tiebaInfo = HttpClientTestNew.getUrl(httpClient, s, headers);
String text = Jsoup.parse(tiebaInfo).select("div").text();
String[] list = text.split(" ");
for (int i=0; i<list.length; i++) {
System.out.println(list[i]);
}
}
// 获取起点数据
private static void getQiDian(CloseableHttpClient httpClient, String url, String chapter){
// 正文
String qidianHtml = HttpClientTestNew.getUrl(httpClient, url, null);
String qidianInfo = Jsoup.parse(qidianHtml).select("a").stream()
.filter(a -> a.text().contains(chapter))
.map(a -> a.attr("href"))
.findFirst().get();
System.out.println(qidianInfo);
qidianHtml = HttpClientTestNew.getUrl(httpClient, qidianInfo, null);
String des = Jsoup.parse(qidianHtml).select("script[src^=http://files]").attr("src");
String[][] qidianHeader = {{"Host", "files.qidian.com"}, {"Referer", qidianInfo}, {"Accept-Encoding", "deflate, sdch"}, {"Content-Type", "text/plain"}, {"Accept-Ranges", "bytes"}, {"Content-Encoding", "gzip"}};
qidianHtml = HttpClientTestNew.getUrl(httpClient, des, qidianHeader, "gbk");
Jsoup.parse(qidianHtml).select("p").stream().forEach(p -> System.out.println(p.text()));
}
// 获取纵横数据
private static void getZongHeng(CloseableHttpClient httpClient, String url, String chapter){
// 正文
String zonghengHtml = HttpClientTestNew.getUrl(httpClient, url, null);
String chapterId = Jsoup.parse(zonghengHtml).select("td[chapterName^=" + chapter + "]").attr("chapterId");
String zonghengUrlDes = url.substring(0, url.length() - 5)+"/"+chapterId+".html";
String[][] zonghengHeader = {{"Referer", url}, {"Host", "book.zongheng.com"}};
zonghengHtml = HttpClientTestNew.getUrl(httpClient, zonghengUrlDes.replace("showchapter", "chapter"), zonghengHeader);
System.out.println(zonghengHtml);
Jsoup.parse(zonghengHtml).select("p").stream().forEach(p -> System.out.println(p.text()));
}
}
贴吧抓数据
最新推荐文章于 2024-04-16 11:33:19 发布