借用:https://scrape.center/ 示例:https://spa1.scrape.center/
直接上代码:
import requests
baseurl = 'https://spa1.scrape.center/api/movie/?'
def getpage(url):
params = {
'limit' : '100',
'offset' : '0'
}
# headers 通过浏览器F12查看内容
headers = { 'authority': 'spa1.scrape.center',
'method': 'GET',
'path' : '/api/movie/?limit=10&offset=10',
'scheme': 'https',
'accept': 'application/json, text/plain, */*',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9',
'referer': 'https://spa1.scrape.center/page/2',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'}
# 自动对params进行编码,然后自动和url进行拼接,去发请求
res = requests.get(url,params=params,headers=headers)
res.encoding = 'utf-8'
return res.json()
def parse_page(json):
if json:
items = json.get('results') #提取results后内容
for item in items:
weibo = {}
weibo['id'] = item.get('id')
weibo['名字'] = item.get('name')
weibo['别名'] = item.get('alias')
weibo['封面'] = item.get('cover')
weibo['类别'] = item.get('categories')
weibo['上映时间'] = item.get('published_at')
weibo['时间'] = item.get('minute')
weibo['评分'] = item.get('score')
weibo['上映区域'] = item.get('regions')
yield weibo
json = getpage(baseurl)
results = parse_page(json)
for result in results:
print(result)
其中headers获取为:
运行后的效果: