1、已知网页文件所在的目录,简单解析网页的方法
File input = new File("E:\\Jsoup.html");
Document doc = Jsoup.parse(input, "UTF-8", "http://www.iie.ac.cn/");
Elements aContents = doc.select("a");
for(Element aContent:aContents){
Elements elements = aContent.getElementsByAttribute("href");
for(Element link : elements){
//String linkHref = link.attr("href");
String absHref = link.attr("abs:href");<span style="font-family: Arial, Helvetica, sans-serif;">//获取绝对地址</span><span style="font-family: Arial, Helvetica, sans-serif;"> </span><span style="font-family: Arial, Helvetica, sans-serif;">
</span> System.out.println(absHref);
}
}
Elements imgContents = doc.select("img");
for(Element imgContent:imgContents){
Elements elements = imgContent.getElementsByAttribute("src");
for(Element link : elements){
String absHref = link.attr("abs:src");//获取绝对地址
System.out.println(absHref);
}
}