python实现自动获取国家统计局三级行政区及代码

import requests
import time
from lxml import etree


def analysis_html(address, pattern):
    global trs
    response = requests.get(address, headers=headers)
    response.encoding = 'utf-8'
    text = response.text
    html = etree.HTML(text)
    trs = html.xpath(pattern)


def make_url(url_before, pattern):
    return url_before + tr.xpath(pattern)[0]


def province():
    global tr
    analysis_html(f'{main_address}index.html', '//tr[@class="provincetr"]/td')
    for tr in trs[:-1]:
        province = tr.xpath('./a/text()')[0]
        province_url = make_url(main_address, './a/@href')
        print(province)
        city(province_url, province)
        time.sleep(1)


def city(province_url, province):
    analysis_html(province_url, '//tr[@class="citytr"]')
    for tr in trs:
        city = tr.xpath('./td[2]/a/text()')[0]
        #city_url = make_url(main_address, './td[1]/a/@href') #why?
        page = tr.xpath('./td[1]/a/@href')[0]
        city_url = main_address + page

        city_id = tr.xpath('./td[1]/a/text()')[0]

        country(city_url, city, city_id, province, province_url)
        time.sleep(1)


def country(city_url, city, city_id, province, province_url):
    analysis_html(city_url, '//tr[@class="countytr"]')
    for tr in trs:
        try:
            country = tr.xpath('./td[2]/a/text()')[0]
            country_id = tr.xpath('./td[1]/a/text()')[0]

            # page = tr.xpath('./td[1]/a/@href')[0]
            # country_url = province_url.replace('.html','/') + page

            f.write(f'{country},{country_id}\t,{city},{city_id}\t,{province}\n')
            time.sleep(1)
        except:
            pass


headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.204 Safari/537.36',
    'Cookie':'AD_RS_COOKIE=20080918; _trs_uv=kahvgie3_6_fc6v'
}
main_address = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2021/"

with open(r'省市区.csv', 'a',encoding='utf-8') as f:
    province()

最后存储为相应的csv文件,示例为2021年数据

 

 

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值