Python爬虫抓取豆瓣top250电影
import requests
from bs4 import BeautifulSoup
link="https://movie.douban.com/top250?start="
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
'Host':'movie.douban.com'}
movie_list=[]
for i in range(0,10):
tempLink=link+str(i*25)
r=requests.get(tempLink,headers=headers)
soup=BeautifulSoup(r.text,"lxml")
div_list=soup.find_all('div','hd')
#print(str(len(div_list)))
for each in div_list:
movie=each.a.span.text.strip()
movie_list.append(movie)
print(str(len(movie_list)))
liststr=""
for mo in movie_list:
liststr=liststr+"\n"+mo
print(mo)
with open("top250电影.txt","a+") as file:
file.write(liststr)
file.close()
#print(r.status_code)