起因
准备做一个省级联动的功能,从网页上查找了很多资源符合要求的很少(缺失港澳台数据,不好实现直辖市、特别行政区判断)。复制代码,能直接运行。
数据来源:中国统计局标准 http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2021/
运行环境:jdk8;
代码
jar 包
<!-- 依赖包 -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.3</version>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>4.5.11</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
</dependency>
<dependency>
<groupId>com.baomidou</groupId>
<artifactId>mybatis-plus-boot-starter</artifactId>
<version>3.3.0</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.35</version>
</dependency>
<dependency>
<groupId>com.baomidou</groupId>
<artifactId>mybatis-plus-generator</artifactId>
<version>3.3.0</version>
</dependency>
<dependency>
<groupId>org.apache.velocity</groupId>
<artifactId>velocity-engine-core</artifactId>
<version>2.1</version>
</dependency>
_服务实现类 _ChinasServiceImpl
package com.hn.yuan.city.service.impl;
import cn.hutool.core.date.DateUtil;
import cn.hutool.core.util.IdUtil;
import cn.hutool.http.HttpRequest;
import cn.hutool.http.HttpUtil;
import com.hn.yuan.city.entity.Chinas;
import com.hn.yuan.city.mapper.ChinasMapper;
import com.hn.yuan.city.service.ChinasService;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.hn.yuan.reptileCity.demoTest.Test;
import lombok.var;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.Date;
import java.util.concurrent.atomic.AtomicInteger;
/**
* <p>
* 服务实现类
* </p>
*
* @author XIAOCAO
*
*/
@Service
public class ChinasServiceImpl extends ServiceImpl<ChinasMapper, Chinas> implements ChinasService {
@Autowired
private ChinasMapper chinasMapper;
/*什么情况下使用AtomicInteger
1、作为多个线程同时使用的原子计数器。
2、在比较和交换操作中实现非阻塞算法。
https://www.jianshu.com/p/073096a729f6
*/
public String addChinas(Chinas chinas) {
chinasMapper.insert(chinas);
return "成功";
}
private static final Logger log = LoggerFactory.getLogger(Test.class);
//AtomicInteger类是系统底层保护的int类型,通过提供执行方法的控制进行值的原子操作
private static AtomicInteger atomicInteger = new AtomicInteger(0);
private static AtomicInteger atomicIntegerErrorNum = new AtomicInteger(0);
public void test(String appendProvide, String appendCity) {
System.out.println("===============开始抓取数据=================");
var url = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2021/index.html";
String html = HttpUtil.get(url);
var htmlDoc = Jsoup.parse(html);
var selectClasses = htmlDoc.getElementsByClass("provincetr");
var startDateStr = DateUtil.formatDateTime(new Date());
long startDate = System.currentTimeMillis();
// ExecutorService executorService = new ThreadPoolExecutor(4, 5, 1L, TimeUnit.SECONDS, new ArrayBlockingQueue(4), Executors.defaultThreadFactory());
for (int i = 0; i < selectClasses.size(); i++) {
int finalI = i;
// executorService.execute(() -> {
// System.out.println(Thread.currentThread().getName() + " " + "--->开始爬数据");
try {
//一个线程跑就行
startProvide(selectClasses, finalI, baseUrl, appendProvide, appendCity);
} catch (Exception e) {
e.printStackTrace();
}
}
System.out.println("耗时 =》");
System.out.println((System.currentTimeMillis() - startDate) / 1000);
System.out.println("开始时间 ==> " + startDateStr);
System.out.println("结束时间 ==>" + DateUtil.formatDateTime(new Date()));
}
private String getHtml(String url) throws Exception {
//以原子方式将当前值递增1并在递增后返回新值。它相当于i ++操作。
atomicInteger.incrementAndGet();
log.info("调用接口次数 :" + atomicInteger.get());
log.info("请求开始时间 ==>" + DateUtil.formatDateTime(new Date()));
log.info(url);
String html = null;
try {
html = getHtml2(url);
} catch (Exception e) {
e.printStackTrace();
log.info(Thread.currentThread().getName() + " = 超时。。睡5秒再重试");
Thread.sleep(1000 * 5);
log.info(Thread.currentThread().getName() + " = 超时重试");
atomicIntegerErrorNum.incrementAndGet();
log.info("error num = " + atomicIntegerErrorNum.get());
try {
//重试1次
html = getHtml2(url);
} catch (Exception e2) {
e2.printStackTrace();
log.info("error num = " + atomicIntegerErrorNum.get());
log.info(Thread.currentThread().getName() + " 第二次超时重试");
log.info(Thread.currentThread().getName() + " = 第二次超时。。睡12分钟再重试");
log.info("error num = " + atomicIntegerErrorNum.get());
Thread.sleep(1000 * 60 * 12);
//重试2次
html = getHtml2(url);
}
}
log.info("请求结束时间 ==>" + DateUtil.formatDateTime(new Date()));
System.out.println("");
System.out.println("");
return html;
}
private String getHtml2(String url) throws Exception {
//链式构建请求
return HttpRequest.get(url)
.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36")//头信息,多个头信息多次调用此方法即可
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")//头信息,多个头信息多次调用此方法即可
.header("Cookie", "_trs_uv=" + IdUtil.simpleUUID() + "; SF_cookie_1=" + IdUtil.fastUUID())
.header("If-None-Match", "f32-5d4bccaa05a80-gzip")
// .header("If-Modified-Since",new Date())
// .form(paramMap)//表单内容
.timeout(15000)//超时,毫秒
.execute().body();
}
//国家统计局链接
public static String baseUrl = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2021/";
/**
* @param selectClasses
* @param i
* @param baseUrl
* @param appendProvide 需要查询的省 ,null表示查询所有
* @throws Exception
*/
private void startProvide(Elements selectClasses, Integer i, String baseUrl, String appendProvide, String appendCity) throws Exception {
var provideCodes = selectClasses.get(i).children();
//1.省份 provincetr
for (int provideCodeIndex = 0; provideCodeIndex < provideCodes.size(); provideCodeIndex++) {
var provideCodeUrl = provideCodes.get(provideCodeIndex).select("a").attr("href");
var provideName = provideCodes.get(provideCodeIndex).select("a").text();
// System.out.println("省份 = " + provideName);
if (!StringUtils.isBlank(provideCodeUrl)) {
if (StringUtils.isBlank(appendProvide)) {
provide(provideCodeUrl, provideName, null);
} else {
if (appendProvide.equals(provideName)) {
System.out.println("要的省份 = " + provideName);
provide(provideCodeUrl, provideName, appendCity);
}
}
}
}
}
/**
* 通过省,市获取下面的数据
*
* @param provideCodeUrl
*/
public void provide(String provideCodeUrl, String provideName, String appendCity) throws Exception {
var provideCode = provideCodeUrl.split("\\.")[0];
// String gotoCityHtml = HttpUtil.get(baseUrl + provideCodeUrl);
String gotoCityHtml = getHtml(baseUrl + provideCodeUrl);
var cityHtmlDoc = Jsoup.parse(gotoCityHtml);
Elements selectCityClass = cityHtmlDoc.select(".citytr");
//拼接省code码插入数据库
var len = provideCode.length();
var provideCode2 = provideCode;
if (len < 6) {
len = 6 - len;
for (int l = 0; l < len; l++) {
provideCode2 += "0";
}
}
if (StringUtils.isBlank(appendCity)) {
//todo 保存到数据库 保存省
System.out.println("保存数据库的id:" + provideCode2 + "省名称:" + provideName + "code码:" + provideCode2 + "level:" + 1);
Chinas chinas = new Chinas().setCode(Long.valueOf(provideCode2)).setName(provideName).setPid(Long.valueOf(0)).setLevel("1");
this.addChinas(chinas);
city(selectCityClass, provideName, provideCode, null);
} else {
city(selectCityClass, provideName, provideCode, appendCity);
}
}
/**
* 城市
*
* @param selectCityClass
* @param provideName
* @param provideCode
* @throws Exception
*/
private void city(Elements selectCityClass, String provideName, String provideCode, String appendCity) throws Exception {
//2.城市 citytr
for (int cityIndex = 0; cityIndex < selectCityClass.size(); cityIndex++) {
var gotoCountyUrl = selectCityClass.get(cityIndex).select("td").get(1).select("a").attr("href");
var cityName = selectCityClass.get(cityIndex).select("td").get(1).select("a").text();
if (StringUtils.isBlank(gotoCountyUrl)) {
continue;
}
if (StringUtils.isBlank(appendCity)) {
this.appendCity(gotoCountyUrl, provideName, provideCode, cityName);
} else {
if (appendCity.equals(cityName)) {
this.appendCity(gotoCountyUrl, provideName, provideCode, cityName);
}
}
}
}
private void appendCity(String gotoCountyUrl, String provideName, String provideCode, String cityName) throws Exception {
//String countytr = HttpUtil.get(baseUrl + gotoCountyUrl);
boolean flag = true;
String countytr = getHtml(baseUrl + gotoCountyUrl);
var countytrDoc = Jsoup.parse(countytr);
Elements countyClass = countytrDoc.select(".countytr");
if (countyClass.size() == 0) {
flag = false;
countyClass = countytrDoc.select(".towntr");
}
var strCityUrl = gotoCountyUrl.split("/");
var cityCode = strCityUrl[strCityUrl.length - 1].split("\\.")[0];
var cityCodeLen = cityCode.length();
var cityCode2 = cityCode;
if (cityCodeLen < 6) {
cityCodeLen = 6 - cityCodeLen;
for (int l = 0; l < cityCodeLen; l++) {
cityCode2 += "0";
}
}
if (cityName.equals("市辖区")) {
cityName = provideName;
}
cityCode = cityCode2;
//拼接省code码插入数据库
var len = provideCode.length();
var provideCode2 = provideCode;
if (len < 6) {
len = 6 - len;
for (int l = 0; l < len; l++) {
provideCode2 += "0";
}
}
if (!flag) {
System.out.println("直辖市,进入此方法");
} else {
//todo 保存到数据库 保存城市
System.out.println("保存数据库的市id:" + cityCode + " 市名称:" + cityName + " code码:" + provideCode2 + "level:" + 2);
Chinas chinas = new Chinas().setCode(Long.valueOf(cityCode)).setName(cityName).setPid(Long.valueOf(provideCode2)).setLevel("2");
this.addChinas(chinas);
county(countyClass,
provideName,
provideCode, cityCode);
}
}
/**
* 街道
*
* @param countyClass
* @param provideCode
*/
private void county(Elements countyClass, String provideName, String provideCode, String cityCode) throws Exception {
//3.县区 countytr
for (int county = 0; county < countyClass.size(); county++) {
var gotoTownUrl = countyClass.get(county).select("td").get(1).select("a").attr("href");
var countyName = countyClass.get(county).select("td").get(1).select("a").text();
if (StringUtils.isBlank(gotoTownUrl)) {
continue;
}
String gotoTownUrls = StringUtils.substringBetween(gotoTownUrl, "/", ".");
//todo 保存到数据库
System.out.println("保存数据库的县 区数据id:" + gotoTownUrls + " 县区名称:" + countyName + " code码:" + cityCode + "level:" + 3);
Chinas chinas = new Chinas().setCode(Long.valueOf(gotoTownUrls)).setName(countyName).setPid(Long.valueOf(cityCode)).setLevel("3");
this.addChinas(chinas);
}
}
}
_服务类 _ChinasService
package com.hn.yuan.city.service;
import com.hn.yuan.city.entity.Chinas;
import com.baomidou.mybatisplus.extension.service.IService;
/**
* <p>
* 服务类
* </p>
* @author XIAOCAO
*
*/
public interface ChinasService extends IService<Chinas> {
public void test(String appendProvide, String appendCity);
}
测试类 testDemo
package com.hn.yuan.city;
import com.hn.yuan.city.service.ChinasService;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringRunner;
@RunWith(SpringRunner.class)
@SpringBootTest
public class testDemo {
@Autowired
private ChinasService chinasService;
@Test
public void tests() {
//获取所有的数据
chinasService.test(null, null);
//获取指定的城市
//chinasService.test("河南省", "郑州市");
}
}
注意:controller、service、mapper、entity层通过mybatis-plus自动生成的;此方法不涉及controller,mapper,entity;
全面省市区县居委5级详情看转载:https://blog.csdn.net/qq_15421685/article/details/124754314
Mysql数据表数据包含港澳台
https://pan.baidu.com/s/1CwnmUGSPFzhZdwIWNTwd_Q?pwd=1102
提取码:1102
各位看官》创作不易,点个赞!!!
诸君共勉:万事开头难,只愿肯放弃。
免责声明:本文章仅用于学习参考
实战模式-Vue+Java后台实现省市区三级联动
实战模式-微信小程序java后台+mysql实现省市区三级联动
微信小程序组件简易实现省市区三级联动
免责声明:本文章仅用于学习参考