抓取加载页面,分析url
每刷新一次url的page_start=0,20,40,…
import requests
from fake_useragent import UserAgent
def getInfo(page):
baseurl = r'https://movie.douban.com/j/search_subjects?type=movie&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=20&page_start='
headers = {
'User-Agent': UserAgent().random
}
respones = requests.get(baseurl+str(page), headers=headers)
return respones
if __name__ == '__main__':
for i in range(10):
respones = getInfo(i*20)
利用json解析出电影名称
完整代码
import requests
from fake_useragent import UserAgent
import json
def getInfo(page):
baseurl = r'https://movie.douban.com/j/search_subjects?type=movie&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=20&page_start='
headers = {
'User-Agent': UserAgent().random
}
respones = requests.get(baseurl+str(page), headers=headers)
return respones
if __name__ == '__main__':
for i in range(10):
respones = getInfo(i*20)
data=json.loads(respones.text)
for j in range(20):
print(data['subjects'][j]['title'])