import requests
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
"Host": "httpbin.org",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"
}
url = 'https://www.mzitu.com/'
resp = requests.get(url, headers=headers).text
# print(resp)
# <img class='lazy' src='https:/xx'/>
# 爬取图片连接
import re
p = re.compile("<img.*?src='(.*?)'.*?/>")
res = re.findall(p,resp)
for i in res:
print(i)
print(len(res))
要点: 使用 .*? 作非贪婪匹配,然后抽取 src链接