因为项目需求我需要在网站上爬取中国地区编号的信息,用于创建用户的运费模板,数据格式为key为湖南省,value为长沙市,430100
被爬取的网站链接:https://blog.csdn.net/a497785609/article/details/7656330
try {
//从网站上爬取中国地区编号转变成Map
Document tbTmallPageDoc = Jsoup.connect("https://blog.csdn.net/a497785609/article/details/7656330").timeout(20000)
.userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36")
.get();
Elements elements = tbTmallPageDoc.select("div.htmledit_views").select("p").eq(1);
//格式为一个省份对应多个城市,例如:key为湖南省,value为长沙市,430100
Map<String, Map<String, String>> linkedHashMap = new LinkedHashMap<>();
Arrays.asList(elements.html().split("<br>")).forEach(x -> {
Map<String, String> map = new HashMap<>();
List<String> stringList = Arrays.asList(x.split(" ")).stream()
.filter(z -> !z.isEmpty()).collect(Collectors.toList());
stringList = stringList.subList(2, stringList.size() - 3);
for (int i = 0; i < stringList.size(); i++) {
//如果linkedHashMap已经key已存在则获取value
String key = stringList.get(i) + ";" + stringList.get(++i);
if (linkedHashMap.containsKey(key)) {
map = linkedHashMap.get(key);
map.put(stringList.get(++i), stringList.get(++i));
linkedHashMap.put(key, map);
} else {
map.put(stringList.get(++i), stringList.get(++i));
linkedHashMap.put(key, map);
}
}
});
} catch (IOException e) {
e.printStackTrace();
}