原数据为: http://www.mca.gov.cn/article/sj/xzqh/2018/201804-12/201804-06041553.html
手动替换了一下格式,注意有些省直属县(第三位为9的)需要手动处理一下,并使用下面的代码处理.
# 输入格式
s = """
110000:北京市
110101:东城区
110102:西城区
110105:朝阳区
110106:丰台区
110107:石景山区
110108:海淀区
110109:门头沟区
110111:房山区
110112:通州区
110113:顺义区
110114:昌平区
110115:大兴区
110116:怀柔区
110117:平谷区
110118:密云区
110119:延庆区
"""
slist = s.split("\n")
province_code = "" # 省级单位的代码
city_code = "" # 市级单位的代码
province_name = "" # 省级单位的名字
city_name = "" # 市级单位的名字
res = {} # 要数出的结果
for index, i in enumerate(slist):
if not i:
continue
i = i.split(":")
code = i[0]
name = i[1]
_province_code = code[0:2]
if _province_code != province_code and code.endswith("0000"): # 省级代码以 0000 结尾
province_code = _province_code
province_name = name
city_code = "" # 清空市的信息
city_name = "" # 清空市的信息
res[name] = {'code': code} # 添加省的字典
else: # 遍历市
_city_code = code[2:4]
if _city_code != city_code and code.endswith("00"): # 省级代码以 0000 结尾
city_code = _city_code
city_name = name
res[province_name][city_name] = {'code': code} # 添加市的字典
else: # 遍历县
print(province_name, city_name, name)
if city_name:
res[province_name][city_name][name] = {'code': code} # 添加县的字典
elif name:
res[province_name][name] = {'code': code} # 添加县的字典,直辖市的
处理结果为:
{
"北京市": {
"code": "110000",
"东城区": {
"code": "110101"
},
"西城区": {
"code": "110102"
},
"朝阳区": {
"code": "110105"
},
"丰台区": {
"code": "110106"
},
"石景山区": {
"code": "110107"
},
"海淀区": {
"code": "110108"
},
"门头沟区": {
"code": "110109"
},
"房山区": {
"code": "110111"
},
"通州区": {
"code": "110112"
},
"顺义区": {
"code": "110113"
},
"昌平区": {
"code": "110114"
},
"大兴区": {
"code": "110115"
},
"怀柔区": {
"code": "110116"
},
"平谷区": {
"code": "110117"
},
"密云区": {
"code": "110118"
},
"延庆区": {
"code": "110119"
}
},
}