【精品】爬取 国家统计局 2024年 省市县乡村 数据

  • @param provinceCode 省份编号

  • @return

*/

public List getCitiesByProvince(String provinceCode) {

List res = new ArrayList<>();

Document connect = connect(“http://localhost:8080/2020/” + provinceCode + “.html”);

Elements rowCity = connect.select(“tr.citytr”);

for (Element cityElement : rowCity) {// 遍历每一行的省份城市

String name = cityElement.select(“td”).text();

String[] split = name.split(" ");

res.add(split[0].substring(0, 4) + “*” + split[1]);

}

return res;

}

@Test

public void testGetCitiesByProvince() {

getCitiesByProvince(“41”).forEach(System.out::println);

}

@Resource

private CityMapper cityMapper;

@Test

void insertCities() {

List pList = getProvinces();

for (String p : pList) {

List list = new ArrayList<>();

String[] split = p.split(“\*”);

List cList = getCitiesByProvince(split[0]);

Province pp = provinceMapper.selectOne(new QueryWrapper().eq(“code”, split[0]));

for (String c : cList) {

String[] tmp = c.split(“\*”);

City city = City.builder().name(tmp[1]).code(tmp[0]).provinceId(pp.getId()).build();

//System.out.println(city);

list.add(city);

}

//一个省一个省的添加

int res = cityMapper.batchInsert(list);

System.out.println(res);

}

}

/**

  • 根据省市编号获取该省份下所有的县

  • @param cityCode 市编号

  • @return

*/

public List getCountriesByCity(String cityCode) {

List res = new ArrayList<>();

Document connect = connect(“http://localhost:8080/2020/” + cityCode + “.html”);

Elements rowCountry = connect.select(“tr.countytr”);

if (rowCountry.size() == 0) {

Elements townCountry = connect.select(“tr.towntr”);

for (Element townElement : townCountry) {

String txt = townElement.select(“td”).text();

String[] split = txt.split(" ");

res.add(split[0].substring(0, 9) + “*” + split[1]);

//比如海南省下的儋州市,只有4级目录,没有country

}

} else {

for (Element countryElement : rowCountry) {// 遍历每一行的省份城市

String txt = countryElement.select(“td”).text();

String[] split = txt.split(" ");

res.add(split[0].substring(0, 6) + “*” + split[1]);

}

}

return res;

}

@Test

void testGetCountiesByProvince() {

getCountriesByCity(“46/4604”).forEach(System.out::println);

}

@Resource

private CountryMapper countryMapper;

@Test

void insertCountry() {

List pList = getProvinces();

for (int i = 0; i < pList.size(); i++) {

String p = pList.get(i);

String[] split = p.split(“\*”);

//System.out.println(split[0] +" "+split[1]); // 13 河北省

List cList = getCitiesByProvince(split[0]);

for (String c : cList) {

String[] split2 = c.split(“\*”);

//System.out.println(split2[0] +" * "+split2[1]);//1301 * 石家庄市

List c2List = getCountriesByCity(split2[0].substring(0, 2) + “/” + split2[0]);

City city = cityMapper.selectOne(new QueryWrapper().eq(“code”, split2[0]));

List list = new ArrayList<>();

for (String c2 : c2List) {

String[] split3 = c2.split(“\*”);

System.out.println(split3[0] + " * " + split3[1]);

Country country = Country.builder().name(split3[1]).code(split3[0]).cityId(city.getId()).build();

list.add(country);

}

int res = countryMapper.batchInsert(list);

System.out.println(res);

}

}

}

/**

  • 根据县编号获取乡

  • @param countryCode

  • @return

*/

public List getTownsByCountry(String countryCode) {

List res = new ArrayList<>();

Document connect = connect(“http://localhost:8080/2020/” + countryCode + “.html”);

if (connect != null) {

Elements rowTown = connect.select(“tr.towntr”);

if (rowTown.size() == 0) {

Elements rowVillage = connect.select(“tr.villagetr”);

for (Element villageElement : rowVillage) {

String txt = villageElement.select(“td”).text();

String[] split = txt.split(" ");

res.add(split[1] + “*” + split[2]);

//比如海南省下的儋州市,只有4级目录,没有country

}

} else {

for (Element townElement : rowTown) {// 遍历每一行的省份城市

String txt = townElement.select(“td”).text();

String[] split = txt.split(" ");

res.add(split[0].substring(0, 9) + “*” + split[1]);

}

}

}

return res;

}

@Test

void testGetTownsByCountry() {

getTownsByCountry(“41/01/410122”).forEach(System.out::println);

}

@Resource

private TownMapper townMapper;

@Test

void insertTown() {

List pList = getProvinces();

for (int i = 0; i < pList.size(); i++) {

String p = pList.get(i);

String[] split = p.split(“\*”);

//System.out.println(split[0] +" "+split[1]); // 13 河北省

List cList = getCitiesByProvince(split[0]);

for (String c : cList) {

String[] split2 = c.split(“\*”);

//System.out.println(split2[0] +" * "+split2[1]);//1301 * 石家庄市

List c2List = getCountriesByCity(split2[0].substring(0, 2) + “/” + split2[0]);

for (String c2 : c2List) {

String[] split3 = c2.split(“\*”);

//System.out.println(split3[0] + " * " + split3[1]); //130324 * 卢龙县

List tList = getTownsByCountry(split3[0].substring(0, 2) + “/” + split3[0].substring(2, 4) + “/” + split3[0]);

List list = new ArrayList<>();

Country country = countryMapper.selectOne(new QueryWrapper().eq(“code”, split3[0]));

for (String t : tList) {

String[] split4 = t.split(“\*”);

//System.out.println(split4[0] + " * " + split4[1]);

Town town = Town.builder().name(split4[1]).code(split4[0]).countryId(country.getId()).build();

//System.out.println(town);

list.add(town);

}

if (list.size() != 0) {

System.out.println(list);

int res = townMapper.batchInsert(list);

System.out.println(res);

}

}

}

}

}

/**

  • 根据乡编号获取村

  • @param townCode

  • @return

*/

public List getVillagesByCountry(String townCode) {

List res = new ArrayList<>();

Document connect = connect(“http://localhost:8080/2020/” + townCode + “.html”);

Elements rowVillage = connect.select(“tr.villagetr”);

for (Element villageElement : rowVillage) {// 遍历每一行的省份城市

String txt = villageElement.select(“td”).text();

String t = txt.substring(13);

res.add(t);

}

return res;

}

@Test

void testGetVillagesByCountry() {

getVillagesByCountry(“41/01/22/410122104”).forEach(System.out::println);

}

@Resource

private VillageMapper villageMapper;

@Test

void insertVillage() {

List pList = getProvinces();

for (int i = 25; i < pList.size(); i++) {

String p = pList.get(i);

String[] split = p.split(“\*”);

//System.out.println(split[0] +" "+split[1]); // 13 河北省

List cList = getCitiesByProvince(split[0]);

for (String c : cList) {

String[] split2 = c.split(“\*”);

//System.out.println(split2[0] +" * "+split2[1]);//1301 * 石家庄市

List c2List = getCountriesByCity(split2[0].substring(0, 2) + “/” + split2[0]);

for (String c2 : c2List) {

String[] split3 = c2.split(“\*”);

//System.out.println(split3[0] + " * " + split3[1]); //130324 * 卢龙县

List tList = getTownsByCountry(split3[0].substring(0, 2) + “/” + split3[0].substring(2, 4) + “/” + split3[0]);

for (String t : tList) {

String[] split4 = t.split(“\*”);

if(split4[0].length()!=3){

//System.out.println(split4[0] + " * " + split4[1]); // 140802204 * 上郭乡

List vList = getVillagesByCountry(split4[0].substring(0, 2) + “/” + split4[0].substring(2, 4) + “/” + split4[0].substring(4, 6) + “/” + split4[0]);

Town town = townMapper.selectOne(new QueryWrapper().eq(“code”, split4[0]));
自我介绍一下,小编13年上海交大毕业,曾经在小公司待过,也去过华为、OPPO等大厂,18年进入阿里一直到现在。

深知大多数前端工程师,想要提升技能,往往是自己摸索成长或者是报班学习,但对于培训机构动则几千的学费,着实压力不小。自己不成体系的自学效果低效又漫长,而且极易碰到天花板技术停滞不前!

因此收集整理了一份《2024年Web前端开发全套学习资料》,初衷也很简单,就是希望能够帮助到想自学提升又不知道该从何学起的朋友,同时减轻大家的负担。

img

既有适合小白学习的零基础资料,也有适合3年以上经验的小伙伴深入学习提升的进阶课程,基本涵盖了95%以上前端开发知识点,真正体系化!

由于文件比较大,这里只是将部分目录截图出来,每个节点里面都包含大厂面经、学习笔记、源码讲义、实战项目、讲解视频,并且会持续更新!

如果你觉得这些内容对你有帮助,可以扫码获取!!(备注:前端)

最后

本人分享一下这次字节跳动、美团、头条等大厂的面试真题涉及到的知识点,以及我个人的学习方法、学习路线等,当然也整理了一些学习文档资料出来是附赠给大家的。知识点涉及比较全面,包括但不限于前端基础,HTML,CSS,JavaScript,Vue,ES6,HTTP,浏览器,算法等等

详细大厂面试题答案、学习笔记、学习视频等资料领取,点击资料领取直通车

前端视频资料:
62825077)]

由于文件比较大,这里只是将部分目录截图出来,每个节点里面都包含大厂面经、学习笔记、源码讲义、实战项目、讲解视频,并且会持续更新!

如果你觉得这些内容对你有帮助,可以扫码获取!!(备注:前端)

最后

本人分享一下这次字节跳动、美团、头条等大厂的面试真题涉及到的知识点,以及我个人的学习方法、学习路线等,当然也整理了一些学习文档资料出来是附赠给大家的。知识点涉及比较全面,包括但不限于前端基础,HTML,CSS,JavaScript,Vue,ES6,HTTP,浏览器,算法等等

详细大厂面试题答案、学习笔记、学习视频等资料领取,点击资料领取直通车

[外链图片转存中…(img-VaEoxK3r-1712562825077)]

前端视频资料:

  • 27
    点赞
  • 23
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
可以使用Python中的requests和BeautifulSoup库来实现爬取国家统计局省市区信息。以下是示例代码: ```python import requests from bs4 import BeautifulSoup url = 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/index.html' response = requests.get(url) response.encoding = 'gbk' soup = BeautifulSoup(response.text, 'html.parser') province_list = soup.select('.provincetr a') for province in province_list: province_name = province.text.strip() province_code = province['href'][:2] print(province_code, province_name) city_url = f'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/{province_code}.html' city_response = requests.get(city_url) city_response.encoding = 'gbk' city_soup = BeautifulSoup(city_response.text, 'html.parser') city_list = city_soup.select('.citytr a') for city in city_list: city_name = city.text.strip() city_code = city['href'][3:5] print('\t', city_code, city_name) county_url = f'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/{province_code}/{city_code}.html' county_response = requests.get(county_url) county_response.encoding = 'gbk' county_soup = BeautifulSoup(county_response.text, 'html.parser') county_list = county_soup.select('.countytr a') if len(county_soup.select('.countytr a')) > 0 else county_soup.select('.towntr a') for county in county_list: county_name = county.text.strip() county_code = county['href'][5:9] print('\t\t', county_code, county_name) ``` 以上代码可以输出各个省市区的名称和代码。注意需要指定网页编码为`gbk`。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值