附件:jsoup-1.8.1.jar
import org.jsoup.Jsoup;
import org.jsoup.helper.Validate;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
/**
* Example program to list links from a URL.
*/
public class ListLinksFromURL {
public static void main(String[] args) throws IOException {
//Validate.isTrue(args.length == 1, "usage: supply url to fetch");
String url="http://blog.csdn.net/fei20121106/article/category/2924169";
print("Fetching %s...", url);
reslove(url);
}
public static void reslove(String url) throws IOException {
Document doc = Jsoup.connect(url).userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31").get();
Element results=doc.getElementById("article_list");
Elements links = results.select("a[href]");
Elements media = results.select("[src]");
Elements imports = results.select("link[href]");
print("\nLinks: (%d)", links.size());
/* for (Element link : links) {
if(!link.text().equals("阅读")&&!link.text().equals("评论"))
print("-[%s](%s)",trim(link.text(), 35),link.attr("abs:href"));
}*/
for (int i=links.size()-1;i>-1;--i) {
Element link=links.get(i);
if(!link.text().equals("阅读")&&!link.text().equals("评论")){
print("- [%s](%s)",link.text(),link.attr("abs:href"));
// reslovepage(link.attr("abs:href"));
}
}
/* print("\nText: (%d)", media.size());
print(" * %s", results.toString());
print("\nMedia: (%d)", media.size());
for (Element src : media) {
if (src.tagName().equals("img"))
print(" * %s: <%s> %sx%s (%s)",
src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"),
trim(src.attr("alt"), 20));
else
print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
}*/
}
public static void reslovepage(String url) throws IOException {
Document doc = Jsoup.connect(url).userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31").get();
Element results=doc.getElementById("article_content");
Elements links = results.select("a[href]");
Elements media = results.select("[src]");
Elements imports = results.select("link[href]");
print("\nLinks: (%d)", links.size());
for (Element link : links) {
if(!link.text().equals("copy")&&!link.text().equals("view plain"))
print(" - [%s](%s)",trim(link.text(), 35),link.attr("abs:href"));
}
/* for (int i=links.size()-1;i>-1;--i) {
Element link=links.get(i);
if(!link.text().equals("copy")&&!link.text().equals("view plain")){
print("- [%s](%s)",link.text(),link.attr("abs:href"));
}
}*/
/* print("\nText: (%d)", media.size());
print(" * %s", results.toString());
print("\nMedia: (%d)", media.size());
for (Element src : media) {
if (src.tagName().equals("img"))
print(" * %s: <%s> %sx%s (%s)",
src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"),
trim(src.attr("alt"), 20));
else
print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
}*/
}
private static void print(String msg, Object... args) {
System.out.println(String.format(msg, args));
}
private static String trim(String s, int width) {
if (s.length() > width)
return s.substring(0, width-1) + ".";
else
return s;
}
}
package demo.com.csdnjsoup;
import android.text.TextUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Example program to list links from a URL.
*/
public class ListLinksFromURL {
public static List<String> sumSource = new ArrayList<>();
public static void getAll() throws IOException {
for (int i = 1; i < 44; i++) {
main("" + i);
}
Collections.sort(sumSource, new Comparator<String>() {
@Override
public int compare(String s, String t1) {
//print("%s...%s", s, t1);
s = s.substring(0, 18);
t1 = t1.substring(0, 18);
if(TextUtils.isEmpty(s)){
return -1;
}else if(TextUtils.isEmpty(t1)){
return 1;
}else {
String s1 = getIndex(s);
String s2 = getIndex(t1);
if (TextUtils.isEmpty(s1) || TextUtils.isEmpty(s2)) {
return s.compareTo(t1);
}else {
String[] sa1 = s1.split("\\.");
String[] sa2 = s2.split("\\.");
if (sa1.length > sa2.length) {
for (int i = 0; i < sa2.length; i++) {
if (Integer.valueOf(sa1[i]) > Integer.valueOf(sa2[i])) {
return 1;
} else if (Integer.valueOf(sa1[i]) < Integer.valueOf(sa2[i])) {
return -1;
}
}
return 1;
} else if (sa1.length < sa2.length) {
for (int i = 0; i < sa1.length; i++) {
if (Integer.valueOf(sa1[i]) > Integer.valueOf(sa2[i])) {
return 1;
} else if (Integer.valueOf(sa1[i]) < Integer.valueOf(sa2[i])) {
return -1;
}
}
return -1;
} else {
for (int i = 0; i < sa1.length; i++) {
if (Integer.valueOf(sa1[i]) > Integer.valueOf(sa2[i])) {
return 1;
} else if (Integer.valueOf(sa1[i]) < Integer.valueOf(sa2[i])) {
return -1;
}
}
return 0;
}
}
}
}
});
print("共有文章 %s...", sumSource.size() + "");
String resultStr = "";
for (String item: sumSource) {
resultStr = resultStr + "\n" + item;
}
print("%s...", resultStr);
}
public static String getIndex(String s){
Pattern p = Pattern.compile("\\[([\\S\\s]*)");
Matcher m = p.matcher(s);
if (m.find()) {
return s.substring(m.start() + 2, m.end() - 1);
}
return "";
}
public static boolean isNumeric(String str){
for (int i = 0; i < str.length(); i++){
System.out.println(str.charAt(i));
if (!Character.isDigit(str.charAt(i))){
return false;
}
}
return true;
}
public static void main(String i) throws IOException {
//Validate.isTrue(args.length == 1, "usage: supply url to fetch");
String url="http://blog.csdn.net/fei20121106/article/list/" + i;
print("Fetching %s...", url);
reslove(url);
}
public static void reslove(String url) throws IOException {
Document doc = Jsoup.connect(url).userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31").get();
Element results=doc.getElementById("article_list");
Elements links = results.select("a[href]");
Elements media = results.select("[src]");
Elements imports = results.select("link[href]");
print("\nLinks: (%d)", links.size());
/* for (Element link : links) {
if(!link.text().equals("阅读")&&!link.text().equals("评论"))
print("-[%s](%s)",trim(link.text(), 35),link.attr("abs:href"));
}*/
int sum = 0;
for (int i=links.size()-1;i>-1;--i) {
Element link=links.get(i);
if(!link.text().equals("阅读")&&!link.text().equals("评论")){
print("- [%s](%s)",link.text(),link.attr("abs:href"));
// reslovepage(link.attr("abs:href"));
sum ++;
sumSource.add("- [" + link.text() + "]( "+ link.attr("abs:href") + ")");
}
}
print("共%s",sum +"");
/* print("\nText: (%d)", media.size());
print(" * %s", results.toString());
print("\nMedia: (%d)", media.size());
for (Element src : media) {
if (src.tagName().equals("img"))
print(" * %s: <%s> %sx%s (%s)",
src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"),
trim(src.attr("alt"), 20));
else
print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
}*/
}
public static void reslovepage(String url) throws IOException {
Document doc = Jsoup.connect(url).userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31").get();
Element results=doc.getElementById("article_content");
Elements links = results.select("a[href]");
Elements media = results.select("[src]");
Elements imports = results.select("link[href]");
print("\nLinks: (%d)", links.size());
for (Element link : links) {
if(!link.text().equals("copy")&&!link.text().equals("view plain"))
print(" - [%s](%s)",trim(link.text(), 35),link.attr("abs:href"));
}
/* for (int i=links.size()-1;i>-1;--i) {
Element link=links.get(i);
if(!link.text().equals("copy")&&!link.text().equals("view plain")){
print("- [%s](%s)",link.text(),link.attr("abs:href"));
}
}*/
/* print("\nText: (%d)", media.size());
print(" * %s", results.toString());
print("\nMedia: (%d)", media.size());
for (Element src : media) {
if (src.tagName().equals("img"))
print(" * %s: <%s> %sx%s (%s)",
src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"),
trim(src.attr("alt"), 20));
else
print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
}*/
}
private static void print(String msg, Object... args) {
System.out.println(String.format(msg, args));
}
private static String trim(String s, int width) {
if (s.length() > width)
return s.substring(0, width-1) + ".";
else
return s;
}
}