百度爬取经纬度(百度地图的经纬度是存在偏移加密的)

#!/usr/bin/env python
# coding: utf-8
#数据爬取
import requests
from fake_useragent import UserAgent
import pandas as pd
#import xlrd
import numpy as np
from urllib.parse import quote
import re
from time import sleep
from random import randint
import random


# In[2]:


place_name  = pd.read_excel('企业信息获取.xlsx')
hangshu = place_name.shape[0]
leishu = place_name.shape[1]
place_name


# In[ ]:



class Url_Mnger:
    # hangshu = place_name.shape[0]
    # leishu = place_name.shape[1]
    def Url_join(self,hangshu):
        all_url = []
        for i in range(hangshu):       #长度
                #宽度
            village_name = place_name.iloc[i,1]
            place_encode = quote(village_name)
            url = 'http://api.map.baidu.com/geocoder?address={}'.format(place_encode)
            print(village_name,url)
            all_url.append(url)
        return all_url
#请求发送类
class Response_Cast(object):
    def Get_response(self,url):
        headers={
        'User-Agent':UserAgent().chrome
        }
        response = requests.get(url=url,headers = headers)
        return  response.text
#数据管理类
class Info_Manger:
    def Parse_html(self,info_text):      #解析
        latitude=re.findall(r'<lat>(.+)</lat>',info_text)
        longitude = re.findall('<lng>(.+)</lng>',info_text)
        latitude = latitude[0]
        longitude = longitude[0]
        print(latitude,longitude)
        return latitude,longitude

    # def Data_join(self,latitude,longitude):
    #     lat.append(latitude)
    #     longi.append(longitude)
    #     return lat,longi

    def Make_dataform(self,lat,longi):
        df = pd.DataFrame({'a_point':place_name.iloc[:,1],'a_lat':lat,'a_longi':longi})
        return df
#数据保存
    def Savedata(self,df):
        df.to_csv('geo_data_gaode_quchong.csv',encoding='GBK')
        # with open('geo.csv','w',encoding = 'utf-8') as f:
        #     f.write(df)
        #     f.close()

class Run_Scrapy:
    def __init__(self):
        url_manger = Url_Mnger()
        url_list = url_manger.Url_join(hangshu)
        url_list_length = len(url_list)
        response_cast = Response_Cast()
        info_manger = Info_Manger()
        lat = []
        longi = []
#        print(url_list)
        for url,j in zip(url_list, range(url_list_length)):
            print(j,'/',url_list_length)
            sleep(random.uniform(1, 1.5))
            if (j % 100) == 0 :
                # sleep(random.uniform(3, 10))
                response_info = response_cast.Get_response(url)
                info_latitude, info_longitude = info_manger.Parse_html(response_info)
                lat.append(info_latitude)
                longi.append(info_longitude)
#                print(lat,longi)
            else:
                response_info = response_cast.Get_response(url)
                info_latitude,info_longitude = info_manger.Parse_html(response_info)
                lat.append(info_latitude)
                longi.append(info_longitude)
#                print(lat,longi)
      #      if (j % 100) == 10 :
      #          make_dataform = info_manger.Make_dataform(lat,longi)
       #         info_manger.Savedata(make_dataform)
        make_dataform = info_manger.Make_dataform(lat,longi)
        info_manger.Savedata(make_dataform)

if __name__ == '__main__':
    Run_Scrapy





评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小蜗笔记

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值