1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41from bs4 import BeautifulSoup
import requests
movie_total = list()
url = 'https://movie.douban.com/top250'
for i in range(10):
web_url = {
'start' : i*25 ,
'filter' : 0
}
res = requests.get(url , params=web_url)
bsObj = BeautifulSoup(res.text , "html.parser")
div_item_list = bsObj.find_all('div',{'class':'item'})
for div_item_temp in div_item_list:
movie_temp = dict()
movie_temp['name'] = div_item_temp.find_all('div')[1].div.a.span.get_text()
movie_temp['score'] = div_item_temp.find('div',{'class':'star'}).find_all('span')[1].get_text()
movie_temp['number'] = div_item_temp.find('div',{'class':'star'}).find_all('span')[-1].get_text()[:-3]
try:
movie_temp['introduction'] = div_item_temp.find_all('div')[1].find('p',{'class','quote'}).span.get_text()
except:
movie_temp['introduction'] = '---这部电影没有简评---'
movie_temp['img'] = div_item_temp.div.a.img['src']
pic_url = requests.get(movie_temp['img'])
with open(movie_temp['name']+'.jpg','wb') as pic_file:
pic_file.write(pic_url.content)
movie_total.append(movie_temp)
print(movie_total)
print(len(movie_total))
for movie_one in movie_total:
print('电影名称:%s\t电影评分:%s\n评价人数:%s\t电影简评:%s\n图片地址:%s'%(movie_one['name'] ,movie_one['score'],movie_one['number'],movie_one['introduction'],movie_one['img']))