1、添加jar包
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.8.3</version>
</dependency>-->
2、代码
package com.fy.microservice.government.service.impl;
import com.fy.microservice.government.utils.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
public class AreaUtils {
public static void main(String[] args) throws IOException {
doGet("33/3301.html");
}
public static String doGet(String keywords) throws IOException {
String urii = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2021/";
if (keywords.length() == 12) {
urii = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2021/";
} else if (keywords.length() == 14) {
urii = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2021/33/";
} else if (keywords.length() == 17) {
urii = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2021/33/01/";
}
URL url = new URL(urii + keywords);
URLConnection URLconnection = url.openConnection();
HttpURLConnection httpConnection = (HttpURLConnection) URLconnection;
int responseCode = httpConnection.getResponseCode();
if (responseCode == HttpURLConnection.HTTP_OK) {
System.err.println("成功");
InputStream in = httpConnection.getInputStream();
InputStreamReader isr = new InputStreamReader(in);
BufferedReader bufr = new BufferedReader(isr);
String str;
while ((str = bufr.readLine()) != null) {
getData(str, keywords);
}
bufr.close();
} else {
System.err.println("失败");
}
return "";
}
public static Map<String, String> getData(String html, String parentId) throws IOException {
Document doc = Jsoup.parse(html);
String namemmm = "city";
String level = "3";
if (parentId.length() == 12) {
namemmm = "county";
level = "4";
} else if (parentId.length() == 14) {
namemmm = "town";
level = "5";
} else if (parentId.length() == 17) {
namemmm = "village";
level = "6";
}
Elements table = doc.body().getElementsByClass(namemmm + "table");
Elements children = table.first().children();
Elements tr = children.get(0).getElementsByClass(namemmm + "tr");
for (int i = 0; i < tr.size(); i++) {
Element e1 = tr.get(i);
Elements td = e1.getElementsByTag("td");
String text1 = td.get(0).text();
if (parentId.length() == 12) {
text1 = text1.substring(0, 6);
} else if (parentId.length() == 14) {
text1 = text1.substring(0, 9);
}
String url = td.get(0).getElementsByTag("a").attr("href");
String text2 = td.get(1).text();
if (xx(text2)) {
text2 = td.get(2).text();
}
System.out.println("INSERT INTO `sys_org` (`id`, `name`, `pid`, `sort`, `level`, `longcode`, `code`) VALUES (" +
text1 + ",'" + text2 + "','" + parentId + "'," + (i + 1) + "," + level + ",'" + text1 + "','" +
text1 + "');");
if (StringUtils.isNotBlank(url)) {
doGet(url);
}
}
return new HashMap<>();
}
public static boolean xx(String str) {
Pattern pattern = Pattern.compile("[0-9]*");
return pattern.matcher(str).matches();
}
}
3、输出结果