亲测有效,仅供学习~
from urllib import request
import re
header={"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"}
#https://movie.douban.com/j/search_subjects?type=movie&tag=%E8%B1%86%E7%93%A3%E9%AB%98%E5%88%86&sort=recommend&page_limit=20&page_start=0
#https://movie.douban.com/j/search_subjects?type=movie&tag=%E8%B1%86%E7%93%A3%E9%AB%98%E5%88%86&sort=recommend&page_limit=20&page_start=20
#https://movie.douban.com/j/search_subjects?type=movie&tag=%E8%B1%86%E7%93%A3%E9%AB%98%E5%88%86&sort=recommend&page_limit=20&page_start=40
score_list=[]
movie_name_list=[]
page=int(input("请输入需要爬取的页数"))
for i in range(page):
url = "https://movie.douban.com/j/search_subjects?type=movie&tag=%E8%B1%86%E7%93%A3%E9%AB%98%E5%88%86&sort=recommend&page_limit=20&page_start="+str(i*20)
req = request.Request(url,headers=header)
resp=request.urlopen(req).read().decode()
#"rate":"9.3","cover_x":1100,"title":"海上钢琴师"
patten_score=r'"rate":"(.*?)"'
patten_name=r'"title":"(.*?)"'
score=re.findall(patten_score,resp)
movie_name=re.findall(patten_name,resp)
score_list.extend(score)
movie_name_list.extend(movie_name)
for j in range(len(movie_name_list)):
print("热度排名第",str(j+1)," 电影名称:",str(movie_name_list[j]),",电影评分:",str(score_list[j]))