requests 爬取图片
爬取一张图片
import requests
url = 'https://pic.qiushibaike.com/system/pictures/12416/124164031/medium/6OVZQ8EAZPYTDGZO.jpg'
img_data = requests.get(url=url,verify=False).content
with open('图片.jpg','wb')as f:
f.write(img_data)
爬取第一页的图片
import requests
import re
import os
if not os.path.exists(file_name := r'C:\Users\Administrator\Desktop\1000篇技术博客\python模块整理 20210320\piclibs'):
os.mkdir(file_name)
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}
url = 'https://www.qiushibaike.com/imgrank/'
page_text = requests.get(url=url,headers=headers,verify=False).text
print(page_text)
content = '''
<div class="thumb">
<a href="/article/124179961" target="_blank">
<img src="//pic.qiushibaike.com/system/pictures/12417/124179961/medium/4P9MQYYXOSPAB0P0.jpg" alt="糗事#124179961" class="illustration" width="100%" height="auto">
</a>
</div>
'''
pattern = re.compile(r'<div class="thumb">.*?<img src="//(.*?)".*?</div>',flags=re.DOTALL)
res = pattern.findall(content)[0]
print(res)
res_all = pattern.findall(page_text)
print(res_all)
for i in res_all:
i = 'https://' + i
img_data = requests.get(url=i,headers=headers,verify=False).content
with open(f'{file_name}\\{os.path.basename(i)}','wb')as f:
f.write(img_data)
爬取N页
import requests
import re
import os
if not os.path.exists(file_name := r'C:\Users\Administrator\Desktop\1000篇技术博客\python模块整理 20210320\piclibs'):
os.mkdir(file_name)
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}
pattern = re.compile(r'<div class="thumb">.*?<img src="//(.*?)".*?</div>',flags=re.DOTALL)
for i in range(1,5):
url = 'https://www.qiushibaike.com/imgrank/page/%s' % (i)
page_text = requests.get(url=url,headers=headers,verify=False).text
print(page_text)
res_all = pattern.findall(page_text)
print(res_all)
for i in res_all:
i = 'https://' + i
img_data = requests.get(url=i,headers=headers,verify=False).content
with open(f'{file_name}\\{os.path.basename(i)}','wb')as f:
f.write(img_data)
print('over')