使用java程序获取最新的省市区数据, 数据来源**官方* 每隔一段时间会收集已经更新的数据. 废话不多数, 本人java程序员, 因此采用java抓取数据, 代码和步骤如下, 如有不足之处, 欢迎指正. 微信号: WeiJunHu_1020
1, 导入pom.xml依赖
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.3</version>
</dependency>
2, 写java代码
地区信息类: Area.java
package com.junlenet.cpca;
/**
* describe: 地区信息对象, 使用parentCode关联code
* created by hwj on 2019/7/9 23:13
*/
public class Area {
private Long id;
private Long code;
private Long parentCode;
private String name;
public Long getId() {
return id;
}
public void setId(Long id) {
this.id = id;
}
public Long getCode() {
return code;
}
public void setCode(Long code) {
this.code = code;
}
public Long getParentCode() {
return parentCode;
}
public void setParentCode(Long parentCode) {
this.parentCode = parentCode;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
}
入口类 AreaData.java:
package com.junlenet.cpca;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* describe: 到*** 网站上面获取最新的 县以上行政区划代码
* created by hwj on 2019/7/9 23:15
*/
public class AreaData {
public static void main(String[] args) {
try {
//1,http://www.mca.gov.cn/
//2,民政数据-->行政区划代码-->2019年5月***县以上行政区划代码
//3,http://www.mca.gov.cn/article/sj/xzqh/2019/201901-06/201906211421.html
Document doc = Jsoup.connect("http://www.mca.gov.cn/article/sj/xzqh/2019/201901-06/201906211421.html")
.maxBodySize(0).get();
Element tbody = doc.getElementsByTag("tbody").get(0);
Elements trs = tbody.getElementsByTag("tr");
List<Area> areaList = new ArrayList<>();
Area area = null;
for (int i = 3; i < trs.size(); i++) {
area = new Area();
Element tr = trs.get(i);
Elements tds = tr.getElementsByTag("td");
if (tds.size() > 3) {
String code = tds.get(1).text();
String name = tds.get(2).text();
if (code.trim().length() == 0) {
continue;
}
area.setCode(Long.valueOf(code.trim()));
area.setName(name.trim());
areaList.add(area);
}
}
StringBuffer sql = new StringBuffer();
String parentCode = null;
for (int i = 0; i < areaList.size(); i++) {
area = areaList.get(i);
String code = area.getCode().toString();
if (code.endsWith("0000")) {
parentCode = "0";
} else {
if (code.startsWith("11") || code.startsWith("12")
|| code.startsWith("31") || code.startsWith("50")) {
parentCode = code.substring(0, 2) + "0000";
} else {
if (code.endsWith("00")) {
parentCode = code.substring(0, 2) + "0000";
} else {
parentCode = code.substring(0, 4) + "00";
}
}
}
area.setParentCode(Long.valueOf(parentCode));
//System.out.println(province.getCode()+", "+province.getName());
sql.append("INSERT INTO `ft-home`.`sys_area` (`id`, `code`, `parent_code`, `name`, `create_time`, `create_by`, `update_time`, `update_by`, `is_del`)");
sql.append("VALUES (null,'" + area.getCode() + "', '" + area.getParentCode() + "', '" + area.getName() + "', '2019-07-11 00:00:00', '1000', '2019-07-11 00:00:00', '1000', '0');");
sql.append("\n");
}
System.out.println(sql.toString());
} catch (IOException e) {
e.printStackTrace();
}
}
}
3, 建表sql
drop table if exists sys_area;
/*==============================================================*/
/* Table: sys_area */
/*==============================================================*/
create table sys_area
(
id bigint not null auto_increment comment '主键ID',
code bigint comment '地区code',
parent_code bigint comment '父code',
name varchar(200) comment '名称',
create_time datetime comment '创建时间',
create_by bigint comment '创建人',
update_time datetime comment '修改时间',
update_by bigint comment '修改人',
is_del int(2) default 0 comment '是否删除1:是;0:否',
primary key (id)
)
auto_increment = 1000
ENGINE = InnoDB
DEFAULT CHARACTER SET = utf8
COLLATE = utf8_bin;
alter table sys_area comment '地区表';
4, 数据如下:
![数据库数据](https://img-blog.csdnimg.cn/20190711230526520.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L2h1d2VpanVuXzIwMTI=,size_16,color_FFFFFF,t_70)
多一个朋友,多一条路, 欢迎加我微信: