一、使用条件:1.会装python3和sublime。2.会修改参数。
二、使用方法:第五行参数Num1是页码,Num2是索引页。如果想知道科幻片的评分排序,Num2 =4,网站上共有37页,则Num1 = 38Num2的取值范围
2.倒数第三行的7表示只显示评分大于7分的电影名。
# -*- coding:utf-8 -*-
import urllib.request
from bs4 import BeautifulSoup
import re
def getpage(Num1=5,Num2=20):
q = ['https://www.dy2018.com/'+str(Num2)]
for i in range(2,Num1):
url = 'https://www.dy2018.com/' + str(Num2) + '/index_' + str(i) +'.html'
q.append(url)
return q
def name_score(websites):
film_all = []
for i in websites:
response = urllib.request.urlopen(i)
result=response.read().decode('GBK')
soup = BeautifulSoup(result,"html.parser").get_text()
score_pattern = r"◎评分.*"
score = re.compile(score_pattern)
score_result=[y.split(':',1)[1] for y in re.findall(score,soup)]
filmname_pattern = r"◎片名.*\b"
film = re.compile(filmname_pattern)
film_result=[x.split(':',1)[1] for x in re.findall(film,soup)]
film_all.extend(list(zip(film_result,score_result)))
return film_all
a = sorted(name_score(getpage()),key = lambda a_tuple:a_tuple[1],reverse=1)
for i in a:
if float(i[1]) >7:
print(i)
爱情片:前20页,评分大于8爱情片纪录片评分大于8.5