用python爬虫区划代码表
爬虫宁夏行政区划代码表。
import requests
import re
import xlsxwriter
import time
time_start=time.time()
agent={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
choose_ls=[depth*2 if depth<=3 else 3*(depth-1) for depth in range(1,6)]
match_level=['provincetr','citytr','countytr','towntr','villagetr']
initurl='http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2020/64.html'
total_dict={
}
depth=0
each_root={
initurl:('','')}
max_depth=5
while depth<max_depth:
total_count=0
next_root={
}
for url in each_root:
code_join=each_root[url][0]+'-' if depth!=0 else each_root[url][0]
zone_join=each_root[url][1]+'-' if depth!=0 else each_root[url][1]
change_root='/'.join(url.split('/')[:-1])+'/'
while True:
try:
req