输出的电影信息包括电影名,导演名,主演名,上映年份,归属国家,电影分类和评分
import requests
from bs4 import BeautifulSoup
def get_movies():
name_list=[]
eName_list=[]
dirName_list=[]
starName_list=[]
year_list=[]
contr_list=[]
cate_list=[]
score_list=[]
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/74.0.3729.169 Mobile Safari/537.3',
'Host': 'movie.douban.com'}
for i in range(0,10):
link='https://movie.douban.com/top250?start='+str(i*25)
r=requests.get(link,headers=headers,timeout=10)
soup=BeautifulSoup(r.text,"lxml")
div_list1=soup.find_all('div',class_='hd')
div_list2=soup.find_all('div',class_='bd')
div_list3=soup.find_all('div',class_='star')
for each in div_list1:
name=each.find_all('span', class_='title')
name_list.append(name[0].text)
if(len(name)>1):
eName_list.append(name[1])
else:
eName_list.append('null')
for each in div_list2:
info=each.p.text
if(len(info)<3):
continue
info=info.split('\n')
info1=info[1].split(':')
length=len(info1)
starName_list.append(info1[length-1])
length1=len(info1[1])
dirName_list.append(info1[1][0:length1-2])
year=info[2].split('/')
year_list.append(year[0])
contr_list.append(year[1])
cate_list.append(year[2])
for each in div_list3:
info=each.text.strip()
score=info[0:3]
score_list.append(score)
for i in range(0,250):
print('TOP%d'%(i+1))
print("影片名:%s"%name_list[i])
print("导演:%s"%dirName_list[i])
print("主演名:%s"%starName_list[i])
print("上映年份:%s"%year_list[i])
print("国家:%s"%contr_list[i])
print("电影分类:%s"%cate_list[i])
print("评分:%s"%score_list[i])
if(__name__=="__main__"):
get_movies()
输出结果展示:
下一节将介绍把这些信息导入excel文件中…