使用百度地图API读取EXCEL(xlsx)并返回行政区信息
1.读excel(使用openpyxl)
2.使用百度地图API进行加载并查询信息,查询得的xml文件保存在临时路径下
3. 使用xml.dom.mini进行parse临时文件
4.得到的结果写入Excel文件中(读写Excel可以定义一个类,或者函数)
下面是结果啦,网页版markdown我真的不太好用。。。
二话不说,上代码
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 2 19:40:09 2019
@author: Joey
"""
#coding=utf-8
import xml.dom.minidom
from openpyxl import load_workbook
import openpyxl
import requests
import os
if not os.path.exists('./temp_file/'):
os.makedirs('./temp_file/')
def get_infor(hospit_name,region="全国"):#return list
response= requests.get('http://api.map.baidu.com/place/v2/search?query=%s&tag=医院®ion=%s&output=xml&ak=XarWIFAtSzT5BstuRUnuOqawzNn1PLlz'%(hospit_name,region))
with open('./temp_file/%s.xml'%hospit_name,'w') as f:
f.write(response.text)
dom = xml.dom.minidom.parse('./temp_file/%s.xml'%hospit_name)
root = dom.documentElement
bb = root.getElementsByTagName('result')
#define a container
rsts=[]
count=0
n_region=''
try:
for i in range(len(bb)):
#rst containts one spot's information
rst=[]
#rst_infor=['possible_name','full_adre',"省",'市','县区']
#Notes:1.子节点也可以使用.getElementsByTagName()
#2.parantNode.getElementByTagName()返回值是list类型
# try:
# num=bb[i].getElementsByTagName('num')[0].firstChild.data ###################
# except:
name_key=bb[i].getElementsByTagName('name')[0].firstChild.data
n_region=name_key
rst.append(name_key)
f_address=bb[i].getElementsByTagName('address')[0].firstChild.data
rst.append(f_address)
a1=bb[i].getElementsByTagName('province')[0].firstChild.data
rst.append(a1)
a2=bb[i].getElementsByTagName('city')[0].firstChild.data
rst.append(a2)
a3=bb[i].getElementsByTagName('area')[0].firstChild.data
rst.append(a3)
rsts.append(rst)
#only find 3 results
count+=1
if count==3:
break
except:
print(n_region)
return get_infor(hospit_name,n_region)
else:
return rsts
#print(get_infor('中国人民解放军总医院第六医学中心'))
class Write_excel(object):
'''修改excel数据'''
def __init__(self, filename):
self.filename = filename
self.wb = load_workbook(self.filename)
self.ws = self.wb.active # 激活正在工作的sheet#######可能有bug
def write(self, row_n, col_n, value):
'''写入数据,如(2,3,"hello"),第二行第三列写入数据"hello"'''
self.ws.cell(row_n, col_n,value )
self.wb.save(self.filename)
def read_xlsx(path):
#return hospital_name list
excel = load_workbook(path)
#读表操作:
table = excel['射频套管_xx家']
#求总有效行数
rows=0
i=1
while table.cell(i,1).value:
rows+=1
i+=1
if i==200:
break
rows-=1
print('一共需查找',rows,'条数据')
hospit_name=[]
for i in range(rows):
hospit_name.append(table.cell(2+i,3).value)
return hospit_name
#测试代码
# for i in read_xlsx('/Users/Eveline/Desktop/数据清洗/智杰的医院清单_OUTPUT.xlsx'):
# print(i)
if __name__=='__main__':
path="/Users/Eveline/Desktop/数据清洗/智杰的医院清单_OUTPUT.xlsx"
#input file path and read hospital list
hospital_name=read_xlsx(path)
#begian write
we = Write_excel(path)#调用class
#写表头:
rst_infor=['possible_name','full_adre',"省",'市','县区']
for i in range(5): #填写表头
we.write(1,8+i,rst_infor[i])
#写所有数据
n=-1#行数计数变量
#迭代每一个医院名
for hos in hospital_name:
print(hos)#test code
n+=1
rsts=get_infor(hos)#获取信息表【3*【5】】
#写这一行
write_times=5*len(rsts)
for i in range(write_times):
we.write(2+n,8+i,rsts[i//5][i%5])
print('done,',len(hospital_name)-1-n,'left')
#we.write(2+i,8+i,rsts[i//5][i%5])
'''python'''
![在这里插入图片描述](https://img-blog.csdnimg.cn/20191203230307813.jpg?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzQwNDU0NzY0,size_16,color_FFFFFF,t_70)