1.概述
因为项目是从无到有的开发所以很多数据都需要自己想办法、如项目需要全国地址信息而数据没有就需要自己去网站拉去了
2.拉取中华人民共和国民政部发布的最新全国各地区域信息
网站地址:http://www.mca.gov.cn/article/sj/xzqh/2020/
找到对应的数据标签
3.创建数据库表存储地区信息
CREATE TABLE [dbo].[S_SYSAREA] (
[id] int IDENTITY(1,1) NOT NULL,
[area_code] nvarchar(50) COLLATE Chinese_PRC_CI_AS NULL,
[area_name] nvarchar(50) COLLATE Chinese_PRC_CI_AS NULL,
[area_level] int NULL,
[province_code] nvarchar(50) COLLATE Chinese_PRC_CI_AS NULL,
[city_code] nvarchar(50) COLLATE Chinese_PRC_CI_AS NULL,
[creator_name] nvarchar(50) COLLATE Chinese_PRC_CI_AS NULL,
[modifier_name] nvarchar(50) COLLATE Chinese_PRC_CI_AS NULL,
[gmt_created] datetime2(7) NULL,
[gmt_modified] datetime2(7) NULL
)
GO
ALTER TABLE [dbo].[S_SYSAREA] SET (LOCK_ESCALATION = TABLE)
GO
EXEC sp_addextendedproperty
'MS_Description', N'主键ID',
'SCHEMA', N'dbo',
'TABLE', N'S_SYSAREA',
'COLUMN', N'id'
GO
EXEC sp_addextendedproperty
'MS_Description', N'地区编码',
'SCHEMA', N'dbo',
'TABLE', N'S_SYSAREA',
'COLUMN', N'area_code'
GO
EXEC sp_addextendedproperty
'MS_Description', N'地区名称',
'SCHEMA', N'dbo',
'TABLE', N'S_SYSAREA',
'COLUMN', N'area_name'
GO
EXEC sp_addextendedproperty
'MS_Description', N'地区级别(1、省,2、市,3、区/县)',
'SCHEMA', N'dbo',
'TABLE', N'S_SYSAREA',
'COLUMN', N'area_level'
GO
EXEC sp_addextendedproperty
'MS_Description', N'省编码',
'SCHEMA', N'dbo',
'TABLE', N'S_SYSAREA',
'COLUMN', N'province_code'
GO
EXEC sp_addextendedproperty
'MS_Description', N'市编码',
'SCHEMA', N'dbo',
'TABLE', N'S_SYSAREA',
'COLUMN', N'city_code'
GO
EXEC sp_addextendedproperty
'MS_Description', N'创建人',
'SCHEMA', N'dbo',
'TABLE', N'S_SYSAREA',
'COLUMN', N'creator_name'
GO
EXEC sp_addextendedproperty
'MS_Description', N'修改人',
'SCHEMA', N'dbo',
'TABLE', N'S_SYSAREA',
'COLUMN', N'modifier_name'
GO
EXEC sp_addextendedproperty
'MS_Description', N'创建日期',
'SCHEMA', N'dbo',
'TABLE', N'S_SYSAREA',
'COLUMN', N'gmt_created'
GO
EXEC sp_addextendedproperty
'MS_Description', N'主键ID',
'SCHEMA', N'dbo',
'TABLE', N'S_SYSAREA',
'COLUMN', N'gmt_modified'
GO
4.项目pom.xml拉取maven依赖
<!--jsoup-->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
5.准备实体接收来取到的数据存入数据库
(一)项目中创建实体Area.java类
/**
* 实体
* 区,县
* */
public class Area {
private String code;
private String name;
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
}
(二)项目中创建实体City .java类
/**
* 实体
* 市
* */
import java.util.List;
public class City {
private String code;
private String name;
private List<Area> areaList;
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public List<Area> getAreaList() {
return areaList;
}
public void setAreaList(List<Area> areaList) {
this.areaList = areaList;
}
}
(三)项目中创建实体Province .java类
/**
* 实体
* 省份
* */
import java.util.List;
public class Province {
private String code;
private String name;
private List<City> cityList;
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public List<City> getCityList() {
return cityList;
}
public void setCityList(List<City> cityList) {
this.cityList = cityList;
}
}
6.创建数据库表实体
/**
* <p>
* 数据库地区信息实体
* </p>
*
* @author zhangpeng
* @since 2021-01-06
*/
import com.baomidou.mybatisplus.annotation.IdType;
import java.io.Serializable;
import java.util.Date;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import com.baomidou.mybatisplus.annotation.TableField;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.Accessors;
@Data
@EqualsAndHashCode(callSuper = false)
@Accessors(chain = true)
@TableName("S_SYSAREA")
public class SSysarea implements Serializable {
private static final long serialVersionUID = 1L;
/**
* 主键ID
*/
@TableId(value = "id", type = IdType.AUTO)
private Integer id;
/**
* 地区编码
*/
@TableField("area_code")
private String areaCode;
/**
* 地区名称
*/
@TableField("area_name")
private String areaName;
/**
* 地区级别(1、省,2、市,3、区/县)
*/
@TableField("area_level")
private Integer areaLevel;
/**
* 省编码
*/
@TableField("province_code")
private String provinceCode;
/**
* 市是编码
*/
@TableField("city_code")
private String cityCode;
/**
* 创建人
*/
private String creatorName = "zhangpeng";
/**
* 修改人
*/
private String modifierName;
/**
* 创建日期
*/
private Date gmtCreated=new Date();
/**
* 修改日期
*/
private Date gmtModified=new Date();
}
7.创建mapper数据批量插入接口
/**
* <p>
* Mapper 接口
* 批量新增
* </p>
*
* @author zhangpeng
* @since 2021-01-06
*/
import org.apache.ibatis.annotations.InsertProvider;
import org.springframework.data.repository.query.Param;
import org.springframework.stereotype.Repository;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import java.util.List;
@Repository
public interface SSysareaMapper extends BaseMapper<SSysarea> {
@InsertProvider(type = addAddress.class,method = "addProvince")
int addProvince(@Param("list") List<SSysarea> list);
}
8.编写sql语句存入数据
/**
* 动态sql
* */
import org.springframework.data.repository.query.Param;
import java.util.List;
public class addAddress {
public String addProvince(@Param("list") List<SSysarea> list){
StringBuffer sb = new StringBuffer(128);
sb.append("insert into S_SYSAREA(area_code,area_name,area_level,province_code,city_code,creator_name,modifier_name,gmt_created,gmt_modified) values");
for (int i = 0; i < list.size(); i++) {
sb.append("(#{list[").append(i).append("].areaCode}")
.append(",#{list[").append(i).append("].areaName}")
.append(",#{list[").append(i).append("].areaLevel}")
.append(",#{list[").append(i).append("].provinceCode}")
.append(",#{list[").append(i).append("].cityCode}")
.append(",#{list[").append(i).append("].creatorName}")
.append(",#{list[").append(i).append("].modifierName}")
.append(",#{list[").append(i).append("].gmtCreated}")
.append(",#{list[").append(i).append("].gmtModified})");
if (i < list.size() - 1) {
sb.append(",");
}
}
return sb.toString();
}
}
9.在测试类建测试拉取数据
import com.szylt.projects.project.address.Area;
import com.szylt.projects.project.address.City;
import com.szylt.projects.project.address.Province;
import com.szylt.projects.project.entity.SSysarea;
import com.szylt.projects.project.mapper.SSysareaMapper;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringRunner;
/**
* 测试
*/
@Test
public void test() throws IOException {
String url = "http://preview.www.mca.gov.cn/article/sj/xzqh/2020/2020/202101041104.html";
Document document = Jsoup.connect(url).timeout(100000).maxBodySize(0).get();
Elements element = document.getElementsByClass("xl7032423");
Elements element2 = document.getElementsByClass("xl7132423");
//System.out.println(element.html());
List<String> stringList = new ArrayList<>();
List<String> stringName = new ArrayList<>();
List<String> stringCode = new ArrayList<>();
for (Element el :element){
String sa =el.getElementsByClass("xl7032423").eq(0).text();
//System.out.println(sa);
stringList.add(sa);
}
for (Element el :element2){
String sa =el.getElementsByClass("xl7132423").eq(0).text();
//System.out.println(sa);
stringList.add(sa);
}
for (int i = 0; i < stringList.size(); i++) {
if (i % 2 == 0) {
//地区代码
stringCode.add(stringList.get(i));
} else {
//地区名字
stringName.add(stringList.get(i));
}
}
System.out.println("stringName size= " + stringName.size() + " stringCode size= " + stringCode.size());
if (stringName.size() != stringCode.size()) {
throw new RuntimeException("数据错误");
}
List<SSysarea> list = new ArrayList<>();
SSysarea sysarea = null;
List<Province> provinceList = processData(stringName, stringCode);
for (Province p:provinceList) {
//System.out.println(p.getCode() + "\t" + p.getName());
sysarea = new SSysarea();
sysarea.setAreaCode(p.getCode());
sysarea.setAreaName(p.getName());
sysarea.setAreaLevel(1);
list.add(sysarea);
for (City c:p.getCityList()) {
//System.out.println(c.getCode() + "\t" + c.getName());
sysarea = new SSysarea();
sysarea.setAreaCode(c.getCode());
sysarea.setAreaName(c.getName());
sysarea.setProvinceCode(p.getCode());
sysarea.setAreaLevel(2);
list.add(sysarea);
for (Area a:c.getAreaList()) {
//System.out.println(a.getCode() + "\t" + a.getName());
sysarea = new SSysarea();
sysarea.setAreaCode(a.getCode());
sysarea.setAreaName(a.getName());
sysarea.setProvinceCode(p.getCode());
sysarea.setCityCode(c.getCode());
sysarea.setAreaLevel(3);
list.add(sysarea);
}
}
}
System.out.println("===================放入数据库的数据=================");
for (SSysarea s:list) {
System.out.println(s.getId() + "\t" + s.getAreaCode() + "\t" + s.getAreaName() + "\t" + s.getAreaLevel() + "\t" + s.getProvinceCode() +
"\t" + s.getCityCode() + "\t" + s.getCreatorName() + "\t" + s.getGmtCreated() );
}
System.out.println(list.size());
//限制条数
int pointsDataLimit = 1000;
Integer size = list.size();
//判断是否有必要分批
if(pointsDataLimit<size){
//分批数
int part = size/pointsDataLimit;
for (int i = 0; i < part; i++) {
//1500条插入一次
List<SSysarea> listPage = list.subList(0, pointsDataLimit);
sSysareaMapper.addProvince(listPage);
//剔除已经插入的
list.subList(0, pointsDataLimit).clear();
}
if (list.size()>0){
//新增最后剩下的
sSysareaMapper.addProvince(list);
}
}else{
sSysareaMapper.addProvince(list);
}
}
/**
* 生成省份列表数据
*
* @param stringName 名称
* @param stringCode 编号
* @return
*/
private static List<Province> processData(List<String> stringName, List<String> stringCode) {
List<Province> provinceList = new ArrayList<>();
for (int i = 0; i < stringCode.size(); i++) {
String provinceName = stringName.get(i);
String provinceCode = stringCode.get(i);
if (provinceCode.endsWith("0000")) {
Province province = new Province();
province.setCode(provinceCode);
province.setName(provinceName);
List<City> cities = new ArrayList<>();
province.setCityList(cities);
provinceList.add(province);
//香港,澳门,台湾,没有市级行政单位划分,城市 地区 和省份保持一致
if (provinceName.contains("香港") || provinceName.contains("澳门") || provinceName.contains("台湾")) {
City city = new City();
List<Area> areas = new ArrayList<>();
city.setName(provinceName);
city.setCode(provinceCode);
city.setAreaList(areas);
cities.add(city);
Area area = new Area();
area.setName(provinceName);
area.setCode(provinceCode);
areas.add(area);
}
//直辖市 城市和省份名称一样
if (provinceName.contains("北京") || provinceName.contains("上海") || provinceName.contains("天津") || provinceName.contains("重庆")) {
City city = new City();
List<Area> areas = new ArrayList<>();
city.setName(provinceName);
city.setCode(provinceCode);
city.setAreaList(areas);
cities.add(city);
//县区
for (int k = 0; k < stringCode.size(); k++) {
String areaName = stringName.get(k);
String areaCode = stringCode.get(k);
if (!provinceCode.equals(areaCode) && areaCode.startsWith(provinceCode.substring(0, 2))) {
Area area = new Area();
area.setName(areaName);
area.setCode(areaCode);
areas.add(area);
}
}
}
for (int j = 0; j < stringCode.size(); j++) {
String cityName = stringName.get(j);
String cityCode = stringCode.get(j);
//遍历获取地级市
if (!cityCode.equals(provinceCode) && cityCode.startsWith(provinceCode.substring(0, 2)) && cityCode.endsWith("00")) {
City city = new City();
List<Area> areas = new ArrayList<Area>();
city.setName(cityName);
city.setCode(cityCode);
city.setAreaList(areas);
cities.add(city);
//遍历获取县区
for (int k = 0; k < stringCode.size(); k++) {
String areaName = stringName.get(k);
String areaCode = stringCode.get(k);
if (!areaCode.equals(cityCode) && areaCode.startsWith(cityCode.substring(0,4))) {
Area area = new Area();
area.setName(areaName);
area.setCode(areaCode);
areas.add(area);
}
}
}
}
}
}
return provinceList;
}
这些步骤走完全国的地区信息便以被存入到数据库拉-同理也可拉取其他信息