因为想重新初始化行政区划数据,但之前写的工具类用不了,所以对其进行升级。
注意:这里的工具类只能一个省份一个省份的分析。
写这篇文章也仅仅是个人记录,仅供参考
1. 引入jar
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.16.1</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</dependency>
2. 实体类
@ApiModel(value = "行政区划信息表",description = "")
@TableName("sys_area")
public class SysArea implements Serializable {
@ApiModelProperty("区域编码")
@TableId
private String code;
@ApiModelProperty("区域名称")
private String name;
@ApiModelProperty("父级区域编码")
private String pcode;
@ApiModelProperty("地区级别:1-省份,2-城市,3-区县")
private Integer level;
@ApiModelProperty("地理位置全称")
private String address;
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getPcode() {
return pcode;
}
public void setPcode(String pcode) {
this.pcode = pcode;
}
public Integer getLevel() {
return level;
}
public void setLevel(Integer level) {
this.level = level;
}
public String getAddress() {
return address;
}
public void setAddress(String address) {
this.address = address;
}
}
3. 实现数据分析的工具类
package com.xhy.swmall.system.service.impl;
import com.xhy.swmall.common.core.utils.StringUtils;
import com.xhy.swmall.system.domain.SysArea;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
@Service
public class JavaJsoupService {
/**
* 建立连接
*/
private static Document connect(String url) {
if (url == null || url.isEmpty()) {
throw new IllegalArgumentException("无效的url");
}
try {
return Jsoup.connect(url).timeout(200 * 2000).get();
} catch (IOException e) {
System.out.println(url + "地址不存在");
return null;
}
}
/**
* 获取所有的省份
*
* @param url 请求地址:http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2021/index.html
* @return
*/
public List<SysArea> getProvinces(String url, String code, String name) {
List<SysArea> sysAreas = new ArrayList<>();
Document connect = connect(url);
Elements rowProvince = connect.select("table.info_table");
// 省份信息
SysArea provinceSave = new SysArea();
if(StringUtils.isNotEmpty(code) && StringUtils.isNotEmpty(name)) {
provinceSave.setCode(code);
provinceSave.setName(name);
provinceSave.setPcode("0");
provinceSave.setLevel(1);
provinceSave.setAddress(name);
sysAreas.add(provinceSave);
}
List<SysArea> cities = this.getCityAreaCode(rowProvince, provinceSave);
sysAreas.addAll(cities);
return sysAreas;
}
/**
* 获取市行政区划信息
*
* @param province
* @param pArea
* @return
*/
public List<SysArea> getCityAreaCode(Elements province, SysArea pArea) {
List<SysArea> cityAreas = new ArrayList<>();
// 获取城市信息
for (Element cityElement : province) {
Elements cityTr = cityElement.select("tr");
for (Element content : cityTr) {
String classs = content.attr("class");
String flag = content.attr("flag");
String parentAttr = content.attr("parent");
// 城市信息
if(classs.equals("shi_nub")){
String areaName = "";
String areaCode = "";
Elements cityTDS = content.select("td");
for(int i=0; i<cityTDS.size(); i++){
Element tdContent = cityTDS.get(i);
if(i == 0) {
Elements name = tdContent.select("input");
areaName = name.val();
}
if(i == 4) {
// 当为“省直辖县级行政单位”或“自治区直辖县级行政单位”后再在数据库修改
areaCode = areaName.equals("省直辖县级行政单位") || areaName.equals("自治区直辖县级行政单位") ? "000000" : tdContent.text();
}
if(StringUtils.isNotEmpty(areaName) && StringUtils.isNotEmpty(areaCode)) {
break;
}
}
if(StringUtils.isNotEmpty(areaName) && StringUtils.isNotEmpty(areaCode)) {
SysArea city = new SysArea();
city.setCode(areaCode);
city.setName(areaName);
city.setPcode(StringUtils.isNotEmpty(pArea.getCode()) ? pArea.getCode() : "0");
city.setLevel(StringUtils.isNull(pArea.getLevel()) ? 1 : 2);
city.setAddress(StringUtils.isNotEmpty(pArea.getAddress()) ? pArea.getAddress() + areaName : areaName);
cityAreas.add(city);
}
}
// 区县信息
else if(StringUtils.isNotEmpty(parentAttr)) {
SysArea parentArea = cityAreas.stream().filter(e -> e.getName().equals(parentAttr)).collect(Collectors.toList()).get(0);
SysArea areas = this.getDownAreaCode(content, parentArea);
cityAreas.add(areas);
}
}
}
return cityAreas;
}
/**
* 获取区县行政区划信息
*
* @param content
* @param pArea
* @return
*/
public SysArea getDownAreaCode(Element content, SysArea pArea) {
String areaName = "";
String areaCode = "";
Elements cityTDS = content.select("td");
for(int i=0; i<cityTDS.size(); i++){
Element tdContent = cityTDS.get(i);
if(i == 0) {
areaName = tdContent.text();
}
if(i == 4) {
areaCode = tdContent.text();
}
if(StringUtils.isNotEmpty(areaName) && StringUtils.isNotEmpty(areaCode)) {
break;
}
}
SysArea area = new SysArea();
area.setCode(areaCode);
area.setName(areaName);
area.setPcode(pArea.getCode());
area.setLevel(pArea.getLevel() == 2 ? 3 : 2);
area.setAddress(StringUtils.isNotEmpty(pArea.getAddress()) ? pArea.getAddress() + areaName : areaName);
return area;
}
}
4.Controller
package com.xhy.swmall.system.controller.api;
import com.alibaba.fastjson.JSONObject;
import com.xhy.swmall.common.core.utils.poi.ExcelUtil;
import com.xhy.swmall.common.core.web.controller.BaseController;
import com.xhy.swmall.common.core.web.domain.AjaxResult;
import com.xhy.swmall.common.core.web.page.TableDataInfo;
import com.xhy.swmall.common.log.annotation.Log;
import com.xhy.swmall.common.log.enums.BusinessType;
import com.xhy.swmall.common.security.annotation.Anonymous;
import com.xhy.swmall.common.security.annotation.RequiresPermissions;
import com.xhy.swmall.system.domain.SysArea;
import com.xhy.swmall.system.service.ISysAreaService;
import com.xhy.swmall.system.service.impl.JavaJsoupService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.*;
import javax.servlet.http.HttpServletResponse;
import java.util.List;
/**
* 行政区划信息Controller
*
* @author hhl
* @date 2023-07-11
*/
@RestController
@RequestMapping("/area")
public class SysAreaController extends BaseController {
@Autowired
private ISysAreaService sysAreaService;
@Autowired
private JavaJsoupService javaJsoupService;
/**
*
* @param url 民政部发布的国家行政区划地址,如北京市的地址:http://xzqh.mca.gov.cn/defaultQuery?shengji=%B1%B1%BE%A9%CA%D0%A3%A8%BE%A9%A3%A9&diji=-1&xianji=-1
* @param pcode 是为了直辖市而设置的参数,如北京市则传入110000(其他直辖市同理)
* @param pname 是为了直辖市而设置的参数,如北京市则传入北京市(其他直辖市同理)
* @return
*/
@Anonymous
@GetMapping
public AjaxResult init(String url, String pcode, String pname) {
// 读取到行政区划的数据
List<SysArea> sysAreas = javaJsoupService.getProvinces(url, pcode, pname);
// 取到数据后保存(这里的接口是使用mybatisPlus内置的批量新增方法,所以具体的实现我就不发出来了)
sysAreaService.saveBatch(sysAreas);
return AjaxResult.success(JSONObject.parseArray(JSONObject.toJSONString(sysAreas)));
}
}
特别说明:controller的url参数来源:http://xzqh.mca.gov.cn/map