一、简介
通过百度开发平台提供的行政区划地址,通过python的方式获取数据,并将数据转化成需求的格式
- 202402191052更新内容,发现香港澳门的level格式和其他地区的格式不同,特根据JSON的结构重新生成level来统一格式。同时,将导出的excel加上时间戳,以保证每次导出都有一份记录。
二、需求格式
1.省市区的对应关系格式
结果展示:
代码如下:
import json
import pandas as pd
import requests
# 接口地址
url = "https://api.map.baidu.com/api_region_search/v1/"
# 此处填写你在控制台-应用管理-创建应用后获取的AK
ak = "xxxxxxxxxxxxx"
params = {
"keyword": "全国",
"sub_admin": "3",
"ak": ak,
"extensions_code": "1",
}
response = requests.get(url=url, params=params)
data = json.loads(response.text)
# if response:
# print(response.json())
# 因为香港澳门的省市县的level有点不对,所以需要根据JSON更新一下level。以保证是以省嵌套市嵌套县……的格式
def set_level(districts, level=0):
for district in districts:
district['level'] = level
set_level(district.get('districts', []), level + 1)
set_level(data['districts'])
# 提取省份和市区信息,并生成对应关系
provinces_cities = []
for dist in data['districts']:
if dist['level'] == 0: # 全国
for district in dist['districts']:
if district['level'] == 1: # 省份级别
for sub_district in district['districts']:
if sub_district['level'] == 2: # 市级别
for sub_sub_district in sub_district['districts']:
if sub_sub_district['level'] == 3: # 区级别
provinces_cities.append((district['code'], district['name'], sub_district['code'], sub_district['name'], sub_sub_district['code'], sub_sub_district['name']))
# 输出省份、城市编码和城市的对应关系
for province_code, province, city_code, city, county_code, county in provinces_cities:
print(f"省份编码: {province_code}, 省份名称: {province}, 城市编码: {city_code}, 城市名称: {city},区县编码: {county_code}, 区县名称: {county}")
# 获取时间戳并格式化
timestamp = int(datetime.datetime.now().timestamp())
formatted_timestamp = datetime.datetime.fromtimestamp(timestamp).strftime('%Y%m%d_%H%M%S')
df = pd.DataFrame(provinces_cities, columns=['省份编码', '省份名称', '城市编码', '城市名称', '区县编码', '区县名称'])
df.to_excel(f'省份城市对应关系_{formatted_timestamp}.xlsx', index=False)
2.上下级的展示格式
结果展示:
代码如下(示例):
import requests, json, openpyxl
import pandas as pd
import datetime
url = "https://api.map.baidu.com/api_region_search/v1/"
# 此处填写你在控制台-应用管理-创建应用后获取的AK
ak = "xxxxxx"
params = {
"keyword": "全国",
"sub_admin": "4",
"ak": ak,
"extensions_code": "1",
}
response = requests.get(url=url, params=params)
# 获取百度地图JSON
data = json.loads(response.text)
# if response:
# print(response.json())
# 因为香港澳门的省市县的level有点不对,所以需要根据JSON更新一下level。以保证是以省嵌套市嵌套县……的格式
def set_level(districts, level=0):
for district in districts:
district['level'] = level
set_level(district.get('districts', []), level + 1)
set_level(data['districts'])
def baidu_addr(addr):
results = []
for dist in data['districts']:
if dist['level'] == 0: # 国家级别
results.append({
'addrcode': '0',
'addrname': dist['name'],
'addrpcode': None,
'addrtype': 'County',
'addrlevel': dist['level'],
'status': 'Y'
})
for dist1 in dist['districts']:
if dist1['level'] == 1: # 省份级别
results.append({
'addrcode': dist1['code'],
'addrname': dist1['name'],
'addrpcode': dist['code'],
'addrtype': 'State',
'addrlevel': dist1['level'],
'status': 'Y'
})
for dist2 in dist1['districts']:
if dist2['level'] == 2: # 市级别
results.append({
'addrcode': dist2['code'],
'addrname': dist2['name'],
'addrpcode': dist1['code'],
'addrtype': 'City',
'addrlevel': dist2['level'],
'status': 'Y'
})
for dist3 in dist2['districts']:
if dist3['level'] == 3: # 区级别
results.append({
'addrcode': dist3['code'],
'addrname': dist3['name'],
'addrpcode': dist2['code'],
'addrtype': 'Country',
'addrlevel': dist3['level'],
'status': 'Y'
})
for dist4 in dist3['districts']:
if dist4['level'] == 4: # 街道级别
results.append({
'addrcode': dist4['code'],
'addrname': dist4['name'],
'addrpcode': dist3['code'],
'addrtype': 'Town',
'addrlevel': dist4['level'],
'status': 'Y'
})
return results
# 处理数据并获取结果列表
processed_data = baidu_addr(data['districts'])
# print(data['districts'])
provinces_cities = []
for row in processed_data:
provinces_cities.append(
[row['addrcode'], row['addrname'], row['addrpcode'], row['addrtype'], row['addrlevel'], row['status']])
# 输出省份、城市编码和城市的对应关系
# 在终端展示生成的neritic
for addrcode, addrname, addrpcode, addrtype, addrlevel, status in provinces_cities:
print(f"地址编码: {addrcode}, 地址名称: {addrname}, 上级编码: {addrpcode}, 地址类型: {addrtype},地址等级: {addrlevel}, 状态: {status}")
# 获取时间戳并格式化
timestamp = int(datetime.datetime.now().timestamp())
formatted_timestamp = datetime.datetime.fromtimestamp(timestamp).strftime('%Y%m%d_%H%M%S')
# 导入到excel
df = pd.DataFrame(provinces_cities, columns=['地址编码', '地址名称', '上级编码', '地址类型', '地址等级', '状态'])
df.to_excel(f'省份城市对应关系_{formatted_timestamp}.xlsx', index=False)