import requests
from re import findall
def get_url():
url = 'https://movie.douban.com/top250?start=0&filter='
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'
}
response = requests.get(url, headers=headers)
result = response.text
return result
def massage():
url_str = get_url()
name = findall(r'<span class="title">(\w*)</span>', url_str)
relate = findall(r'(\d*) / ([\w\s]*) / ([\w\s]*)\n\s*', url_str)
score = findall(r'<span class="rating_num" property="v:average">([\d.]*)</span>', url_str)
assessment = findall(r'<span>(\d+\w*)</span>', url_str)
introduce = findall(r'<span class="inq">(.*)</span>', url_str)
movie = [[name[x], relate[x][0], relate[x][1], relate[x][2], score[x], assessment[x], introduce[x]] for x in range(25)]
return movie
print(massage())