import re
import requests
headers={
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'
}
url="https://segmentfault.com/a/1190000022760744?utm_source=sf-related"
html=requests.get(url=url,headers=headers)
next_url="https://segmentfault.com"
content=re.findall(pattern=r'<p><span class=".*?"><img referrerpolicy=".*?" data-src="(.*?)" src',string=html.text)
temp=0
print(content)
def save(url_sure):
global temp
temp+=1
response=requests.get(url_sure,headers=headers)
with open("D:\爬虫文件\图片\\"+str(temp)+'.jpg','wb')as f:
#干,python最多创建1个文件。
f.write(response.content)
if __name__ =='__main__':
for i in content:
save(next_url+i)
思路就是先爬路径在爬图片。