import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.*;
public class testDOM {
public static void main(String[] args) {
String url = "https://mirrors.edge.kernel.org/pub/software/libs/glibc/hjl/";
getAllUrl(url);
}
public static List getAllUrl(String url){
List list = new ArrayList();
try {
Document doc = (Document) Jsoup.connect(url).get();
Elements links = ((Element) doc).select("a[href]");
for (Element link : links) {
String linkHref = link.attr("abs:href");
String linkText = link.text();
String test = linkHref+"............................"+linkText;
System.out.println(test);
list.add(linkHref);
if(!linkText.endsWith("../")&&linkText.endsWith("/")){
getAllUrl(linkHref);
}
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return list;
}
}
改进版如下:
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.*;
public class testDOM {
private static List list;
public static void main(String[] args) {
list = new ArrayList();
String url = "https://mirrors.edge.kernel.org/pub/software/libs/glibc/hjl/";
List list2 = getAllUrl(url);
for (int i = 0; i <list2.size() ; i++) {
System.out.println(list2.get(i));
}
}
public static List getAllUrl(String url){
try {
Document doc = (Document) Jsoup.connect(url).get();
Elements links = ((Element) doc).select("a[href]");
for (Element link : links) {
String linkHref = link.attr("abs:href");
String linkText = link.text();
String test = linkHref+"............................"+linkText;
// System.out.println(test);
if(!linkText.endsWith("../")&&linkText.endsWith("/")){
getAllUrl(linkHref);
}
list.add(linkHref);
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return list;
}
}
所需jar包链接:密码: 9ir5