本次文章的重点难点就是把某一个地址转换为经度纬度来请求,还有就是相关参数可以不携带(也就是不解密)也可以请求成功,主要是为了分享地址转换为经度纬度相关的知识,无任何恶意行为。
网站
aHR0cHM6Ly9oNS53YWltYWkubWVpdHVhbi5jb20vd2FpbWFpL21pbmRleC9ob21lP3R5cGU9bWFpbl9wYWdlJnV0bV9zb3VyY2U9NjAwMzAmY2hhbm5lbD1tdGliJnN0aWRfYj0xJmNldmVudD1pbXQlMkZob21lcGFnZSUyRmNhdGVnb3J5MSUyRjM5NA==
数据链接
相关加密参数
请求头的参数(不带也可以请求成功)
用下面的代码把某一个地址转换为经度纬度
import requests
def gd_map(addr):
"地址转经纬度"
para = {'key': '你的key,从高德开发平台获取', 'address': addr}
# key,# 地址参数
url = 'https://restapi.amap.com/v3/geocode/geo?&city=广州' # 高德地图地理编码API服务地址
result = requests.get(url, para) # GET方式请求
result = result.json()
lon_lat = result['geocodes'][0]['location'] # 获取返回参数geocodes中的location,即经纬度
return lon_lat
print(gd_map("珠江新城"))
我创建了这两个函数,目的是为了在我们爬取数据时,如果需要中断,下一次可以不必从头开始爬取。这样能够节省时间和资源。
def save_progress(page):
"""保存第page页,第index条的数据到progress文件中"""
with open('列表.txt', 'w') as file:
file.write(f'{page}')
def load_progress():
if os.path.exists('列表.txt'):
with open('列表.txt', 'r') as file:
"""如果读到文件中保留的信息,就读取对应的数据"""
progress = file.read().strip().split(',')
if len(progress) == 1:
return int(progress[0])
# 读不到就返回默认页面和条数都为0
return 0
这个是获取到的数据。我是保存到Oracle数据库中了,你们可以根据自己的需求保存csv、excel中
完整代码(把你们的cookie和数据库的信息补全,还有一个某德地图的key值补好就可以运行了)
详细的解释都在代码中了,
import json
import os
import time
import random
import requests
import cx_Oracle as cx
headers = {
'Accept': 'application/json',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': '_hc.v=340f230e-da1c-5657-888e-239b6aa32ec4.1697418112; _lxsdk_cuid=18b3600ecafc8-019f5fdb9273ff-26031151-144000-18b3600ecafc8; WEBDFPID=58u87u0502xw5ww6z455zx969vx5x61581y6399vuyx97958y66681u0-2012778112992-1697418112992OICGUEIfd79fef3d01d5e9aadc18ccd4d0c95074099; __utmz=74597006.1697462928.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); a2h=4; wm_order_channel=mtib; utm_source=60030; terminal=i; w_utmz="utm_campaign=(direct)&utm_source=5000&utm_medium=(none)&utm_content=(none)&utm_term=(none)"; webp=1; iuuid=36186701758143f5b3bc.1698051956.1.0.0; ci=590; cityname=%E6%B1%A8%E7%BD%97; uuid=aa23284d79f241bd9dfd.1698395475.1.0.0; openh5_uuid=36186701758143f5b3bc.1698051956.1.0.0; IJSESSIONID=node0ty7nnialnfys1iguzz9lejuft416681; __utmc=74597006; ci3=1; token=AgGnIWMXiW-TBplbbGDbb3K1a0FzMplLmyeYeTa_zCPVnb1OujlmbUZXINIQ_JGi6NY6ekVohY3-kAAAAADKGwAAsJ70XXMGW8k0jduQGLJDr_x8hiQTrlgBgYM0JTv2zO7MyS1YfGzSGj0L_UQNgAC1; mt_c_token=AgGnIWMXiW-TBplbbGDbb3K1a0FzMplLmyeYeTa_zCPVnb1OujlmbUZXINIQ_JGi6NY6ekVohY3-kAAAAADKGwAAsJ70XXMGW8k0jduQGLJDr_x8hiQTrlgBgYM0JTv2zO7MyS1YfGzSGj0L_UQNgAC1; oops=AgGnIWMXiW-TBplbbGDbb3K1a0FzMplLmyeYeTa_zCPVnb1OujlmbUZXINIQ_JGi6NY6ekVohY3-kAAAAADKGwAAsJ70XXMGW8k0jduQGLJDr_x8hiQTrlgBgYM0JTv2zO7MyS1YfGzSGj0L_UQNgAC1; userId=2221944225; u=2221944225; isid=AgGnIWMXiW-TBplbbGDbb3K1a0FzMplLmyeYeTa_zCPVnb1OujlmbUZXINIQ_JGi6NY6ekVohY3-kAAAAADKGwAAsJ70XXMGW8k0jduQGLJDr_x8hiQTrlgBgYM0JTv2zO7MyS1YfGzSGj0L_UQNgAC1; meishi_ci=590; cityid=590; p_token=AgGnIWMXiW-TBplbbGDbb3K1a0FzMplLmyeYeTa_zCPVnb1OujlmbUZXINIQ_JGi6NY6ekVohY3-kAAAAADKGwAAsJ70XXMGW8k0jduQGLJDr_x8hiQTrlgBgYM0JTv2zO7MyS1YfGzSGj0L_UQNgAC1; au_trace_key_net=default; openh5_uuid=36186701758143f5b3bc.1698051956.1.0.0; isIframe=false; w_token=AgGnIWMXiW-TBplbbGDbb3K1a0FzMplLmyeYeTa_zCPVnb1OujlmbUZXINIQ_JGi6NY6ekVohY3-kAAAAADKGwAAsJ70XXMGW8k0jduQGLJDr_x8hiQTrlgBgYM0JTv2zO7MyS1YfGzSGj0L_UQNgAC1; lt=AgGnIWMXiW-TBplbbGDbb3K1a0FzMplLmyeYeTa_zCPVnb1OujlmbUZXINIQ_JGi6NY6ekVohY3-kAAAAADKGwAAsJ70XXMGW8k0jduQGLJDr_x8hiQTrlgBgYM0JTv2zO7MyS1YfGzSGj0L_UQNgAC1; n=AxO226621683; w_uuid=xDxy5rescdRKAqaA10yZz6jwlFY-PfNr7t1kcrY0mlg3mMSW4rcDMxw--ekYpfPQ; logintype=normal; __utma=74597006.962463916.1697462928.1698994962.1699154673.26; latlng=23.123243,113.307213,1699154672762; i_extend=C_b1Gimthomepagecategory1394H__a; _lx_utm=utm_source%3D60030; _lxsdk_s=18b9e7301bf-2d9-b2e-26a%7C%7C3; w_visitid=a5bd829e-922a-49d3-a791-f3417e1424ab',
#记得替换到你的cookie
'Origin': 'https://h5.waimai.meituan.com',
'Referer': 'https://h5.waimai.meituan.com/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-site',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF XWEB/6945',
# 'mtgsig': '{"a1":"1.1","a2":1698993078513,"a3":"5078uv0z7yz45330zy557v9vuxx7y2w181y06wu31y397958v2z94561","a5":"Qm61coBhO4BCHzCtJ9Gof7a+KIjcO/mO6W==","a6":"h1.5ZQE6AdC2RU6a1qZImupAj3uegauao+ujdQkVtwvr/ODHU7TflBhP0Q4TraCsU83+C6IHz36kHhIdizTGD11VHMlveiXLxfuQrbfPU5+KOIfM3dAhxu0zOqfLExZZxoZ6qSL0sWqt3aLby/SPv7syx5rvRAh/MLG2S5hjaIJwEFJsFhm+Ia7APRDojeSIalKCduwzZt5NFDkQsk+YClh23nT4H+yHNE10dz8t40HOcI6XU6KIV11qzObAnn28DwYRAXn6jfUhSQzdLE7klzojNf7F22h5+LpKmmAaaRsl1N6P0y2nuR3RFeVCufGj1P95bOG9bx3zRIyd5hUuiFQzP8GoO/4aqHejuzC+czr1Vuc=","x0":4,"d1":"8a516b7089917273bd92926b03c0190c"}',#不加这个加密参数也可以
'sec-ch-ua': '""',
'sec-ch-ua-mobile': '?1',
'sec-ch-ua-platform': '""',
}
sjc = int(round(time.time()))
params = {
'set_name': 'waimai-east',
'region_id': '2100000011',
'_': sjc, #时间搓(可以去掉的)
'yodaReady': 'h5',
'csecplatform': '4',
'csecversion': '2.3.0',
}
def save_progress(page):
"""保存第page页,第index条的数据到progress文件中"""
with open('列表.txt', 'w') as file:
file.write(f'{page}')
def load_progress():
if os.path.exists('列表.txt'):
with open('美团_外卖_列表.txt', 'r') as file:
"""如果读到文件中保留的信息,就读取对应的数据"""
progress = file.read().strip().split(',')
if len(progress) == 1:
return int(progress[0])
# 读不到就返回默认页面和条数都为0
return 0
def page_index(page, wm_lat, wm_long):
data = {
'optimus_code': '10',
'optimus_risk_level': '71',
'pageSize': '20',
'page_index': page,
'offset': page * 20,
'content_personalized_switch': '0',
'sort_type': '',
'slider_select_data': '',
'activity_filter_codes': '',
'wm_latitude': wm_lat, # 23(高德地图的经度纬度)
'wm_longitude': wm_long, # 113(高德地图的经度纬度)
'wmUuidDeregistration': '0',
'wmUserIdDeregistration': '0',
'openh5_uuid': 'F1C375A54CF4425572FA49017DEB3A33C851197349ECE31DCED6C8FC542CE4A7',#cookie中携带的我测试了保留也可以
'uuid': 'F1C375A54CF4425572FA49017DEB3A33C851197349ECE31DCED6C8FC542CE4A7',
}
response = requests.post(
'https://i.waimai.meituan.com/tsp/open/openh5/home/shopList',
params=params,
headers=headers,
data=data
)
num = random.uniform(15, 17)
time.sleep(num)
print(f"第{page + 1}页,耗时{num}秒钟")
a = response.json()
return a
def load_index(a, cursor, con):
b = a['data']
c = json.loads(b)['module_list'][0]
d = c['module_list']
for i in d:
data = i['string_data']
data = json.loads(data)
scheme = data['scheme']
poi_name = data['poi_name']
wm_poi_score = data['wm_poi_score']
avg_price_tip = data['avg_price_tip']
month_sales_tip = data['month_sales_tip']
min_price_tip = data['min_price_tip']
shipping_fee_tip = data['shipping_fee_tip']
delivery_time_tip = data['delivery_time_tip']
distance = data['distance']
print(scheme, poi_name, wm_poi_score, avg_price_tip, month_sales_tip, min_price_tip,
shipping_fee_tip, delivery_time_tip, distance)
cursor.execute(
"INSERT INTO mtwm_149 (scheme_A,poi_name_A,wm_poi_score_A,avg_price_tip_A,month_sales_tip_A,min_price_tip_A,shipping_fee_tip_A,delivery_time_tip_A,distance_A) VALUES (:a1, :a2,:a3,:a4,:a5,:a6,:a7,:a8,:a9)",
a1=scheme, a2=poi_name, a3=wm_poi_score, a4=avg_price_tip, a5=month_sales_tip, a6=min_price_tip,
a7=shipping_fee_tip, a8=delivery_time_tip, a9=distance)
con.commit()
def gd_map(addr):
"地址转经纬度"
para = {'key': '你的key', 'address': addr}
# key,# 地址参数
url = 'https://restapi.amap.com/v3/geocode/geo?&city=广州' # 高德地图地理编码API服务地址
#我这里只爬广州的,你们可以选自己需要爬取的
result = requests.get(url, para) # GET方式请求
result = result.json()
lon_lat = result['geocodes'][0]['location'] # 获取返回参数geocodes中的location,即经纬度
return lon_lat
if __name__ == '__main__':
address = input("请输入你需要爬取地址周围的外卖地址:秘制鸭货(萧岗店): ") # 秘制鸭货(萧岗店)
addr = gd_map(address)
wm_long, wm_lat = addr.split(',')
wm_lat = (wm_lat.replace('.', ''))
wm_long = (wm_long.replace('.', ''))
con = cx.connect('你的Oracle的用户名', '密码', '数据库名称')
cursor = con.cursor()
start_page = load_progress()
for page in range(start_page, 101):
data = page_index(page, wm_lat, wm_long)
load_index(data, cursor, con)
save_progress(page + 1)
cursor.close() # 关闭游标
con.close() # 关闭数据库连接
print("已经采集完成")
# 删除保存进度的文件
if os.path.exists('美食列表.txt'):
os.remove('美食列表.txt')
如涉侵到利益,,请联系删除。
注意注意,如果不清楚经纬度与地址之间的转换或高德地图开发者key值的获取,可以看完上一篇文章。