通过访问接口的形式爬取网络的图片
import json
import os
import urllib.request
from _md5 import md5
import requests
from requests import RequestException
# 通过requests.post访问接口获取内容
def get_one_page():
data = {
'applyid': "c8cda453-d2ff-****-a496-09c0ef6cc9a4",
'earmark': "",
'farm': ""
}
url = 'http://hb.****.cn/Server/PutOnEarmarkListData'
try:
response = requests.post(url, data)
if response.status_code == 200:
return response.text
return None
except RequestException:
return None
# 解析数据并且下载的图片
def parse_page_detail(html):
data = json.loads(html)
photos = [item.get('PHOTO') for item in data]
x = 0 # 名称计数
for index in range(len(photos)): # 每一组图片字符串
for photoitem in photos[index].split(','): # 每一组图片用逗号隔开
# 通过自己写访问形式网络的下载图片到本地
# download_image(photoitem)
# 通过urllib.request下载图片到本地
urllib.request.urlretrieve(photoitem, 'D:\photos\%s.jpg' % x)
x += 1
print('第%s张图片下载完成' % x)
return {
'PHOTO': photos
}
#主函数
def main():
data= get_one_page()
result = parse_page_detail(data)
print(result)
if __name__ == '__main__':
main()
# 保存图片到本地
def save_image(content):
file_path = '{0}/{1}.{2}'.format(os.getcwd(), md5(content).hexdigest(), 'jpg')
if not os.path.exists(file_path):
with open(file_path, 'wb') as f:
f.write(content)
f.close()
# 下载图片
def download_image(url):
print('正在下载', url)
try:
response = requests.get(url)
if response.status_code == 200:
save_image(response.content)
return None
return None
except RequestException:
print('图片下载失败', url)
return None