使用jsoup操作读取html标签
添加需要的jar包
创建ReadHtml类
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.File;
import java.util.*;
public class ReadHtml {
private final Document document;
public ReadHtml(Document document) {
this.document = document;
}
// 获取div标签个数
public int getDiv() {
Elements ee = document.getElementsByTag("div");
return ee.size();
}
// 获取li标签个数
public int getLi() {
Elements ee = document.getElementsByTag("li");
return ee.size();
}
// 获取总个数
public int getAll() {
Elements all = document.getAllElements();
return all.size();
}
// 获取所有标签以及对应出现次数
public Map<String, Integer> findAll() {
Elements all = document.select("*");
// Elements all1 = document.getAllElements();
// System.out.println(all==all1); flase
Map<String, Integer> map = new HashMap<>();
for (Element element : all) {
if (!map.containsKey(element.tagName())) {
map.put(element.tagName(),document.select(element.tagName()).size());
}
// map.put(element.tagName(),1);
// } else
// map.put(element.tagName(), map.get(element.tagName()) + 1);
}
return map;
}
public static void main(String[] args) throws Exception {
//4:用java解析bdqn.html,分析每个标签出现的次数
// 然后输出,类似
// div 13次
// li 20次
//(存入Map,放入新元素的判断是否已存在,不存在,加入;存在,数量加1)
Document document = Jsoup.parse(new File("I:\\novel\\htmlStudy\\src\\cn\\ruiruo\\zuoye1\\bdqn.html"), "utf-8");
// Document document = Jsoup.connect("https://www.ahbdqn.com/?bd-555-qn&sdclkid=ALos152lArDlA6DiAOA&b_scene_zt=1&renqun_youhua=1903381&bd_vid=8837774786426156861").get();
ReadHtml rr = new ReadHtml(document);
int ss = rr.getDiv();
System.out.println("div:" + ss + "次");
int dd = rr.getLi();
System.out.println("li:" + dd + "次");
System.out.println("共有标签" + rr.getAll() + "个");
System.out.println("---------------------------------------");
Map<String,Integer> map = rr.findAll();
Set<String> set = map.keySet();
for (String s:set) {
System.out.println(s + "标签有:" + map.get(s) + "个\n");
}
System.out.println("一共有:"+map.size()+"个标签");
System.out.println(Arrays.toString(map.values().toArray()));
System.out.println(Arrays.toString(map.keySet().toArray()));
}
}