抓取 豆瓣电影top250 https://movie.douban.com/top250 中的电影图片,并保存到文件夹picture中。
import requests
from bs4 import BeautifulSoup
import lxml
def get(url):
try:
user_agent="Mozilla/5.0(compatible;MSIE 9.0;Windows NT 6.1;Trident/5.0;)"
headers={"User-Agent":user_agent}
'''headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36"
}'''
r=requests.get(url,headers=headers,timeout=30)
r.raise_for_status()
r.encoding=r.apparent_encoding
return r.text
except:
return "产生错误"
url="https://movie.douban.com/top250"
#pdb.set_trace()
soup=BeautifulSoup(get(url),"lxml")
movie=soup.find_all("img")#查找img
x=1#控制打印次数
for i in movie:
imgsrc=i.get("src")
response = requests.get(imgsrc)
#print(imgsrc)
#print("下载第%s张中"%x)
file = open("/home/aistudio/picture/%s.jpg" %x,"wb")
#写入文件
file.write(response.content)
x+=1
#关闭操作
print("下载完成")