1.准备工作
2.开始爬取
- 目录结构
- 代码
import requests, re
def get_stable_image(url):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"
}
req = requests.get(url=url, headers=headers)
html = req.content.decode()
reg = r'data-original="(.*?)" src='
img_url_list = re.findall(reg, html)
count = 0
for img_url in img_url_list:
try:
img_name = img_url.split('/')[-1] + ".jpg"
except Exception as e:
print(e)
continue
img = requests.get(url=img_url)
with open('images/01.爬取斗鱼图片/'+img_name, 'wb') as f:
f.write(img.content)
count += 1
print("已爬取成功%d张图片" % count)
if __name__ == '__main__':
ret = get_stable_image("https://www.douyu.com/g_yz")
爬取成功