excel格式
#数据爬取
import requests
from fake_useragent import UserAgent
import pandas as pd
#import xlrd
import numpy as np
from urllib.parse import quote
import re
from time import sleep
from random import randint
import random
place_name = pd.read_excel('企业信息获取.xlsx')
#place_name = place_name.iloc[0:10,:]
hangshu = place_name.shape[0]
leishu = place_name.shape[1]
class Url_Mnger:
# hangshu = place_name.shape[0]
# leishu = place_name.shape[1]
def Url_join(self,hangshu):
all_url = []
for i in range(hangshu): #长度
#宽度
village_name = place_name.iloc[i,1]
place_encode = quote(village_name)
url = 'http://api.map.baidu.com/geocoder?address={}'.format(place_encode)
print(village_name,url)
all_url.append(url)
return all_url
#请求发送类
class Response_Cast(object):
def Get_response(self,url):
headers={
'User-Agent':UserAgent().chrome
}
response = requests.get(url=url,headers = headers)
return response.text
#数据管理类
class Info_Manger:
def Parse_html(self,info_text): #解析
latitude=re.findall(r'<lat>(.+)</lat>',info_text)
longitude = re.findall('<lng>(.+)</lng>',info_text)
latitude = latitude[0]
longitude = longitude[0]
print(latitude,longitude)
return latitude,longitude
# def Data_join(self,latitude,longitude):
# lat.append(latitude)
# longi.append(longitude)
# return lat,longi
def Make_dataform(self,lat,longi):
df = pd.DataFrame({'a_point':place_name.iloc[:,1],'a_lat':lat,'a_longi':longi})
return df
#数据保存
def Savedata(self,df):
df.to_excel('geo_data_gaode_quchong.xlsx')
# with open('geo.csv','w',encoding = 'utf-8') as f:
# f.write(df)
# f.close()
class Run_Scrapy:
def __init__(self):
url_manger = Url_Mnger()
url_list = url_manger.Url_join(hangshu)
url_list_length = len(url_list)
response_cast = Response_Cast()
info_manger = Info_Manger()
lat = []
longi = []
# print(url_list)
for url,j in zip(url_list, range(url_list_length)):
print(j,'/',url_list_length)
# sleep(random.uniform(1,5))
if (j % 100) == 0 :
# sleep(random.uniform(3, 10))
response_info = response_cast.Get_response(url)
info_latitude, info_longitude = info_manger.Parse_html(response_info)
lat.append(info_latitude)
longi.append(info_longitude)
# print(lat,longi)
else:
response_info = response_cast.Get_response(url)
info_latitude,info_longitude = info_manger.Parse_html(response_info)
lat.append(info_latitude)
longi.append(info_longitude)
# print(lat,longi)
# if (j % 100) == 10 :
# make_dataform = info_manger.Make_dataform(lat,longi)
# info_manger.Savedata(make_dataform)
make_dataform = info_manger.Make_dataform(lat,longi)
info_manger.Savedata(make_dataform)
if __name__ == '__main__':
Run_Scrapy()
http://api.map.baidu.com/lbsapi/getpoint/index.html