package collect;
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class FenYeXiala {
public static void main(String[] args) throws IOException {
for(int page_number=1;;page_number++){
int data_rows = getPageNum(page_number);
System.out.println("当前执行:" + page_number + "页,数据数:" + data_rows);
//返回数据为空时,结束循环
if(data_rows == 0){
break;
}
}
}
//获取内容
public static void forEachData(Elements items){
String host="https://www.oschina.net";
//过滤广告
for (Element element : items) {
if(!items.attr("data-tracepid").isEmpty()){
continue;
}
//标题
String title=element.select("a").first().text();
//标题地址
String url=element.select("a").first().attr("href");
if(!url.startsWith("http://")){
url=host+url;
}
System.out.println(title);
System.out.println(url);
System.out.println("-----------------------------------------------------");
}
}
//获得总页码
public static int getPageNum(int pageNum) throws IOException{
String page_url="https://www.oschina.net/action/ajax/get_more_news_list?newsType=&p="+pageNum;
Document document = Jsoup.connect(page_url)
.userAgent("ozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36")
.post();
Elements items=document.select("div[class=item box]");
forEachData(items);
return items.size();
}
}
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class FenYeXiala {
public static void main(String[] args) throws IOException {
for(int page_number=1;;page_number++){
int data_rows = getPageNum(page_number);
System.out.println("当前执行:" + page_number + "页,数据数:" + data_rows);
//返回数据为空时,结束循环
if(data_rows == 0){
break;
}
}
}
//获取内容
public static void forEachData(Elements items){
String host="https://www.oschina.net";
//过滤广告
for (Element element : items) {
if(!items.attr("data-tracepid").isEmpty()){
continue;
}
//标题
String title=element.select("a").first().text();
//标题地址
String url=element.select("a").first().attr("href");
if(!url.startsWith("http://")){
url=host+url;
}
System.out.println(title);
System.out.println(url);
System.out.println("-----------------------------------------------------");
}
}
//获得总页码
public static int getPageNum(int pageNum) throws IOException{
String page_url="https://www.oschina.net/action/ajax/get_more_news_list?newsType=&p="+pageNum;
Document document = Jsoup.connect(page_url)
.userAgent("ozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36")
.post();
Elements items=document.select("div[class=item box]");
forEachData(items);
return items.size();
}
}