获取省市区行政区域编码

注:数据源于国家统计局 

脚本取自2020统计结果

# -*- coding: GB2312 -*-

import requests
import bs4
from bs4 import BeautifulSoup
import re

#下载一个 网页
base_url = 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2020/'
url ='index.html'

#模拟浏览器发送http请求

response = requests.get(base_url + url)

response.encoding ='gbk'

html = response.text

soup = BeautifulSoup(html, 'html.parser')

provincetr = soup.find('table',{"class":"provincetable"});
#print (provincetr)
f = open(r"result.txt", "w")

# urls=re.findall(r"<a href=.*?<\/a>",html,re.I)
for i in provincetr.find_all('a'):
  #print(i)
  #print(i['href'])#查a标签的href值
  print("省:" + i.get_text())#查a标签的string
  #f.write(i)
  shiUrl = base_url + i['href']
  #print (shiUrl)
  responseShi = requests.get(shiUrl)
  responseShi.encoding ='gbk'
  htmlShi = responseShi.text
  soupShi = BeautifulSoup(htmlShi, 'html.parser')

  citytr = soupShi.find('table',{"class":"citytable"});
  for c in citytr.find_all('tr',{"class":"citytr"}):
    ca = c.find_all("td");
    # print("市:" + ca[0].get_text()+ " " + ca[1].get_text())#查a标签的string
    #print(c['href'])
    responseQu = requests.get(base_url + ca[0].a['href'])
    responseQu.encoding ='gbk'
    htmlQu = responseQu.text
    soupQu = BeautifulSoup(htmlQu, 'html.parser')

    countytr = soupQu.find('table',{"class":"countytable"});
    if countytr is None:
      print(ca[0].get_text()+ "," + ca[1].get_text() + "," + i.get_text() + ":没有区")
      continue;
    for cq in countytr.find_all('tr',{"class":"countytr"}):
      cqa = cq.find_all("td");
      rec = cqa[0].get_text() + "," + cqa[1].get_text() + "," + ca[0].get_text()+ "," + ca[1].get_text() + "," + i.get_text()
      print(rec)
      f.write(rec + "\n")

f.flush()
f.close()

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值