豆瓣电影TOP250
import requests
from bs4 import BeautifulSoup
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36'}
for n in range(11):
# 获取数据
url_page = 'https://movie.douban.com/top250?start='+str(n*25)+'&filter='
res_moves = requests.get(url_page,headers=headers)
# 解析数据
bs_moves = BeautifulSoup(res_moves.text,'html.parser')
top_moves = bs_moves.find('ol',class_='grid_view')
for tops in top_moves.find_all('li'):
num = tops.find('em').text
title = tops.find('span',class_='title').text
try:
comment = tops.find('span',class_='inq').text
except:
comment = ''
score = tops.find('span',class_='rating_num').text
url_move = tops.find('a')['href']
print(num+'.'+title+'--'+comment+'--'+score+'--'+url_move)