1、导入依赖
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.14.3</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.5</version>
</dependency>
2、代码
package com.lxq.excel;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.lxq.excel.util.HttpClientUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
/**
* @Author lixiaoqiang
* @Date 2023/2/22 16:18
*/
public class GetCityCode {
private static String[] classSrcs=new String[]{
"provincetr","citytr","countytr","towntr","villagetr"
};
private static int i = 0;
static String fileName;
public static void main(String[] args) {
try {
testProvince();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
static void testCatch(){
try {
//Document doc = Jsoup.parse(new URL(url).openStream(), "GBK", url);
// System.out.println(doc.toString());
int a = 10/0;
}catch (Exception e){
System.out.println("catch");
return;
}
System.out.println("last");
}
// 省
public static void testProvince() throws Exception {
String url = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2022/index.html";
Document doc=Jsoup.connect(url).get();
String s = HttpClientUtils.doGet("http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2022/index.html");
System.out.println(s);
//Document doc=Jsoup.parse(s);
//Document doc = Jsoup.parse(new URL(url).openStream(), "gb2312", url);
//System.out.println(doc.toString());
Elements containers = doc.getElementsByClass("provincetr");
Document containerDoc = Jsoup.parse(containers.toString());
int size = containerDoc.select("a").size();
for (int i = 0; i < size; i++) {
String pH = containerDoc.select("a").get(i).attr("href");
String pName = containerDoc.select("a").get(i).text();
System.out.println(containerDoc.select("a").get(i).attr("href"));
System.out.println(containerDoc.select("a").get(i).text());
fileName = pName+".csv";
if (i > 12) {
int t = url.lastIndexOf("/");
String cityUrl = url.substring(0, t + 1);
testCity(cityUrl + pH);
//resCrabData(cityUrl+pH,classSrcs[1]);
}
int t = url.lastIndexOf("/");
String cityUrl = url.substring(0, t + 1);
//testCity(cityUrl + pH,);
}
// System.out.println(containerDoc.toString());
}
public static String selectNextClassSrcByCurSrc(String curSrc){
System.out.println("aaaaaaaaa=="+curSrc);
for(int i =0;i<classSrcs.length;i++){
String s = classSrcs[i];
if(s.equals(curSrc) && i!= (classSrcs.length-1)){
return classSrcs[i+1];
}
}
return null;
}
// 市
public static void testCity(String url) throws Exception {
//Document doc = Jsoup.parse(new URL(url).openStream(), "GBK", url);
// System.out.println(doc.toString());
Document doc=Jsoup.connect(url).get();
Elements containers = doc.getElementsByClass("citytr");
int elementsSize = containers.size();
// System.out.println(elementsSize);
// System.out.println(containers.toString());
for (int i = 0; i < elementsSize; i++) {
Element e = containers.get(i);
// System.out.println(e.toString()+"==");
Document containerDoc = Jsoup.parse(e.toString());
int size = containerDoc.select("a").size();
// System.out.println(size);
String cH = containerDoc.select("a").get(0).attr("href");
String cCode = containerDoc.select("a").get(0).text();
String cName = containerDoc.select("a").get(1).text();
String countryUrl = url.substring(0, url.lastIndexOf("/") + 1);
JSONObject json = new JSONObject();
json.put("cityCode",cCode);
json.put("cityName",cName);
testCountry(countryUrl + cH,json.toJSONString());
/*
* for(int j = 0;j<size;j++){ String
* cH=containerDoc.select("a").get(j).attr("href"); String
* cName=containerDoc.select("a").get(j).text();
* System.out.println(cH+"===="); System.out.println(cName+"==="); }
*/
}
// for(int i =0 ;i<size;i++){
// String cH=containerDoc.select("a").get(i).attr("href");
// String cName=containerDoc.select("a").get(i).text();
// System.out.println(cH);
// System.out.println(cName);
// }
}
// 县
public static void testCountry(String url,String j) throws Exception {
//Document doc = Jsoup.parse(new URL(url).openStream(), "GBK", url);
// System.out.println(doc.toString());
Document doc=Jsoup.connect(url).get();
Elements containers = doc.getElementsByClass("countytr");
int elementsSize = containers.size();
//System.out.println(elementsSize);
// System.out.println(containers.toString());
for (int i = 0; i < elementsSize; i++) {
Element e = containers.get(i);
// System.out.println(e.toString()+"==");
Document containerDoc = Jsoup.parse(e.toString());
int size = containerDoc.select("a").size();
// System.out.println(size);
if (size == 0) {
// int ss=e.select("td").size();
// System.out.println(ss);
System.out.println("code===" + e.select("td").get(0).text());
System.out.println("name===" + e.select("td").get(1).text());
JSONObject json = JSON.parseObject(j);
json.put("countryCode",e.select("td").get(0).text());
json.put("countryName",e.select("td").get(1).text());
write(json);
// int ss=containerDoc.select("td").size();
// System.out.println(ss);
} else {
String cH = containerDoc.select("a").get(0).attr("href");
String cCode = containerDoc.select("a").get(0).text();
String cName = containerDoc.select("a").get(1).text();
JSONObject json = JSON.parseObject(j);
json.put("countryCode",cCode);
json.put("countryName",cName);
String countryUrl = url.substring(0, url.lastIndexOf("/") + 1);
testTown(countryUrl + cH,json.toJSONString());
}
}
}
// 乡、镇
public static void testTown(String url,String j) throws Exception {
List<JSONObject> list = new ArrayList<>();
Document doc;
try {
//Document doc = Jsoup.parse(new URL(url).openStream(), "GBK", url);
// System.out.println(doc.toString());
doc = Jsoup.connect(url).get();
}catch (Exception e){
testTown(url,j);
return;
}
Elements containers = doc.getElementsByClass("towntr");
int elementsSize = containers.size();
//System.out.println(elementsSize);
// System.out.println(containers.toString());
for (int i = 0; i < elementsSize; i++) {
Element e = containers.get(i);
// System.out.println(e.toString()+"==");
Document containerDoc = Jsoup.parse(e.toString());
int size = containerDoc.select("a").size();
// System.out.println(size);
if (size == 0) {
// int ss=e.select("td").size();
// System.out.println(ss);
System.out.println("code===" + e.select("td").get(0).text());
System.out.println("name===" + e.select("td").get(1).text());
JSONObject json = JSON.parseObject(j);
json.put("townCode",e.select("td").get(0).text());
json.put("townName",e.select("td").get(1).text());
write(json);
// int ss=containerDoc.select("td").size();
// System.out.println(ss);
} else {
String cH = containerDoc.select("a").get(0).attr("href");
String cCode = containerDoc.select("a").get(0).text();
String cName = containerDoc.select("a").get(1).text();
JSONObject json = JSON.parseObject(j);
json.put("townCode",cCode);
json.put("townName",cName);
String villageTrUrl = url.substring(0, url.lastIndexOf("/") + 1);
testVillageTr(villageTrUrl + cH,json.toJSONString());
}
}
}
// 乡、镇
public static void testTown2(String url,String j) throws Exception {
//Document doc = Jsoup.parse(new URL(url).openStream(), "GBK", url);
// System.out.println(doc.toString());
Document doc=Jsoup.connect(url).get();
Elements containers = doc.getElementsByClass("towntr");
int elementsSize = containers.size();
//System.out.println(elementsSize);
// System.out.println(containers.toString());
for (int i = 0; i < elementsSize; i++) {
Element e = containers.get(i);
// System.out.println(e.toString()+"==");
Document containerDoc = Jsoup.parse(e.toString());
int size = containerDoc.select("a").size();
// System.out.println(size);
if (size == 0) {
// int ss=e.select("td").size();
// System.out.println(ss);
System.out.println("code===" + e.select("td").get(0).text());
System.out.println("name===" + e.select("td").get(1).text());
JSONObject json = JSON.parseObject(j);
json.put("townCode",e.select("td").get(0).text());
json.put("townName",e.select("td").get(1).text());
write(json);
// int ss=containerDoc.select("td").size();
// System.out.println(ss);
} else {
String cH = containerDoc.select("a").get(0).attr("href");
String cCode = containerDoc.select("a").get(0).text();
String cName = containerDoc.select("a").get(1).text();
JSONObject json = JSON.parseObject(j);
json.put("townCode",cCode);
json.put("townName",cName);
String villageTrUrl = url.substring(0, url.lastIndexOf("/") + 1);
testVillageTr(villageTrUrl + cH,json.toJSONString());
}
}
}
public static void testVillageTr(String url,String j) {
//Document doc = Jsoup.parse(new URL(url).openStream(), "GBK", url);
// System.out.println(doc.toString());
List<JSONObject> list = new ArrayList<>();
try {
Document doc=Jsoup.connect(url).get();
Elements containers = doc.getElementsByClass("villagetr");
int elementsSize = containers.size();
System.err.println(elementsSize);
// System.out.println(containers.toString());
for (int i = 0; i < elementsSize; i++) {
Element e = containers.get(i);
// System.out.println(e.toString()+"==");
Document containerDoc = Jsoup.parse(e.toString());
int size = containerDoc.select("a").size();
// System.out.println(size);
if (size == 0) {
int ss = e.select("td").size();
System.out.println(ss);
String cCode = e.select("td").get(0).text();
String cName = e.select("td").get(2).text();
JSONObject json = JSON.parseObject(j);
json.put("villageTrCode",cCode);
json.put("villageTrName",cName);
list.add(json);
} else {
String cCode = containerDoc.select("a").get(0).text();
String cName = containerDoc.select("a").get(2).text();
JSONObject json = JSON.parseObject(j);
json.put("villageTrCode",cCode);
json.put("villageTrName",cName);
list.add(json);
}
}
}catch (Exception e){
list = new ArrayList<>();
System.err.println("url"+url);
testVillageTr(url,j);
}
list.forEach(a->{
write(a);
});
}
// 村
public static void testVillageTr2(String url,String j) throws Exception {
//Document doc = Jsoup.parse(new URL(url).openStream(), "GBK", url);
// System.out.println(doc.toString());
Document doc=Jsoup.connect(url).get();
Elements containers = doc.getElementsByClass("villagetr");
int elementsSize = containers.size();
System.out.println(elementsSize);
// System.out.println(containers.toString());
for (int i = 0; i < elementsSize; i++) {
Element e = containers.get(i);
// System.out.println(e.toString()+"==");
Document containerDoc = Jsoup.parse(e.toString());
int size = containerDoc.select("a").size();
// System.out.println(size);
if (size == 0) {
int ss = e.select("td").size();
System.out.println(ss);
String cCode = e.select("td").get(0).text();
String cName = e.select("td").get(2).text();
JSONObject json = JSON.parseObject(j);
json.put("villageTrCode",cCode);
json.put("villageTrName",cName);
write(json);
} else {
String cCode = containerDoc.select("a").get(0).text();
String cName = containerDoc.select("a").get(2).text();
JSONObject json = JSON.parseObject(j);
json.put("villageTrCode",cCode);
json.put("villageTrName",cName);
write(json);
}
}
}
private static void write(JSONObject json){
System.out.println(json.toJSONString());
try {
boolean newFile = false;
String filePath = "E:\\md\\tmp\\"+fileName;
// 输出的文件流
File file = new File(filePath);
if (!file.exists()){
file.createNewFile();
newFile = true;
}
FileOutputStream os = new FileOutputStream(file, true);
if(newFile){
os.write("cityCode,cityName,countryCode,countryName,townCode,townName,villageTrCode,villageTrName".getBytes());
String newLine = System.getProperty("line.separator");
os.write(newLine.getBytes());
}
String msg = json.getString("cityCode")+","
+json.getString("cityName")+","
+json.getString("countryCode")+","
+json.getString("countryName")+","
+json.getString("townCode")+","
+json.getString("townName")+","
+json.getString("villageTrCode")+","
+json.getString("villageTrName");
// 开始读取
os.write(msg.getBytes());
String newLine = System.getProperty("line.separator");
os.write(newLine.getBytes());
// 完毕,关闭所有链接
os.close();
}catch (IOException e){
}
}
}