在使用requests模块和BeautifulSoup登录之后,可以使用我在注释中建议的链接来解析json中可用的所需数据。下面的脚本应该得到您的名称,数量,价格和相关产品的链接。你只能得到21个产品使用下面的脚本。在这个json内容中有一个分页选项。您可以通过使用分页来获得所有产品。在import json
import requests
from bs4 import BeautifulSoup
baseurl = 'https://www.instacart.com/store/'
data_url = "https://www.instacart.com/v3/retailers/159/module_data/dynamic_item_lists/cart_starters/storefront_canonical?origin_source_type=store_root_department&tracking.page_view_id=b974d56d-eaa4-4ce2-9474-ada4723fc7dc&source=web&cache_key=df535d-6863-f-1cd&per=30"
data = {"user": {"email": "alexanderjbusch@gmail.com", "password": "password"},
"authenticity_token": ""}
headers = {
'user-agent':'Mozilla/5.0',
'x-requested-with': 'XMLHttpRequest'
}
with requests.Session() as s:
res = s.get('https://www.instacart.com/',headers={'user-agent':'Mozilla/5.0'})
soup = BeautifulSoup(res.text, 'lxml')
token = soup.select_one("[name='csrf-token']").get('content')
data["authenticity_token"] = token
s.post("https://www.instacart.com/accounts/login",json=data,headers=headers)
resp = s.get(data_url, headers=headers)
for item in resp.json()['module_data']['items']:
name = item['name']
quantity = item['size']
price = item['pricing']['price']
product_page = baseurl + item['click_action']['data']['container']['path']
print(f'{name}\n{quantity}\n{price}\n{product_page}\n')
部分输出:
^{pr2}$