1 需求
豆瓣电影Top 250排行榜海报图片下载,并实现持续化存储。
2 代码实现
import re
import requests
# 获取网页源代码
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'}
url = 'https://movie.douban.com/top250'
res = requests.get(url=url, headers=headers).text
# 获取电影名和图片下载地址
p_name = '<img width="100" alt="(.*?)" src="'
name = re.findall(p_name, res)
p_href = '" src="(.*?)" class="">'
href = re.findall(p_href, res)
# 下载图片
for index in range(len(name)):
print(str(index + 1) + "." + name[index])
print(href[index])
res = requests.get(href[index])
file = open('images/' + name[index] + '.jpg', 'wb')
file.write(res.content)
file.close()