需要注意的地方就是添加请求头,scrapy使用带有表单数据请求的方式,spider文件如下:
# -*- coding: utf-8 -*-
import scrapy
import csv
import json
class MobickSpider(scrapy.Spider):
name = 'mobick'
allowed_domains = ['mobike.com']
start_urls = ['https://mobike.com']
def parse(self, response)
# 通过抓包工具找到摩拜单车的数据传输接口
url = "https://mwx.mobike.com/mobike-api/rent/nearbyBikesInfo.do"
headers = {
'charset': "utf-8",
'platform': "4",
"referer": "https://servicewechat.com/wx40f112341ae33edb/1/",
'content-type': "application/x-www-form-urlencoded",
'user-agent': "MicroMessenger/6.5.4.1000 NetType/WIFI Language/zh_CN",
'host': "mwx.mobike.com",
'connection': "Keep-Alive",
'accept-encoding': "gzip",
'cache-control': "no-cache",
}
# 读取本地经纬度,本地的经纬度是通过高德地图开放接口获取的,可参见上一篇博客
f = open(r'file_path', 'r')
datas = csv.reader(f)
for data in datas:
longitude = data[2]
latitude = data[3]
# 携带的表单数据
payload = {"latitude": str(latitude),
"longitude": str(longitude),
"errMsg": "getMapCenterLocation"}
yield scrapy.FormRequest(
url=url,
headers=headers,
formdata=payload,
meta={'item': data},
callback=self.parse_data
)
f.close()
def parse_data(self, response):
item = response.meta.get('item')
car_data = json.loads(response.text)
if car_data is not None:
print(item)
yield car_data