protected Logger logger = LogManager.getLogger(this.getClass());
private static final String articleListBox = "lstBox",
pageBox = "page_nav";
public void getHtml() {
String str = null;
try {
HttpRequestTool.setProxy("10.37.84.117", "8080");
Header[] headerList = {
new BasicHeader("Host", "write.blog.csdn.net"),
new BasicHeader("User-Agent",
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0"),
new BasicHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"),
new BasicHeader("Accept-Language", "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3"),
new BasicHeader("Accept-Encoding", "gzip, deflate"),
new BasicHeader(
"Cookie",
"/*用抓包工具获得你的CSDN博客主页的cookie*/"),
new BasicHeader("Connection", "keep-alive") };
// list contains all title_num
List<Title_Num> itemlist = new LinkedList<Title_Num>();
//
str = HttpRequestTool.getMethod("http", "write.blog.csdn.net", "80", "postlist", headerList, null);
Source source = new Source(str);
getArticlesOnePage(source, itemlist);
// check total page 获得总页数的html标签
String pageInfo = source.getFirstElementByClass(pageBox).getFirstElement("span").getTextExtractor().toString();
// 正则表达式获得总页数
Matcher matcher = Pattern.compile("[^\\d](\\d{1,})[^\\d]").matcher(pageInfo);
String sTotalPage = null;
if(matcher.find())
sTotalPage = matcher.group(1);
int iTotalPage = Integer.parseInt(sTotalPage);
if(iTotalPage>1){
for(int i=2;i<=iTotalPage;i++){
String pageSuffix = String.format("postlist/0/0/enabled/%d", i);
str = HttpRequestTool.getMethod("http", "write.blog.csdn.net", "80", pageSuffix, headerList, null);
source = new Source(str);
getArticlesOnePage(source, itemlist);
}
}
// 输出
for(Title_Num title_Num:itemlist){
System.out.println(title_Num.getTitle()+title_Num.getNumber());
}
} catch (URISyntaxException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
private void getArticlesOnePage(Source source, List<Title_Num> itemlist){
// get 1st page
List<Element> articles = source.getElementById(articleListBox).getChildElements();
articles.remove(0);
for (Element article : articles) {
int col=0;
Title_Num title_Num = new Title_Num();
for (Element column : article.getChildElements()) {
if(col==0)
title_Num.setTitle(column.getTextExtractor().toString());
if(col==2)
title_Num.setNumber(Integer.parseInt(column.getTextExtractor().toString()));
col++;
}
itemlist.add(title_Num);
}
}
public static void main(String[] args) {
new CsdnGet().getHtml();
}