需求:读取数据接口,根据返回的产品信息下载产品图片到本地
代码:
import requests,json,re,os,time
from threading import Thread
class DownloadImg(object):
# 定义构造方法
def __init__(self,token, n, a, b):
# 设置属性
self.folder = n
self.start = a
self.limit = b
postData = {
'token': str(token),
'start': int(self.start),
'end': int(self.limit),
}
postUrl = '' #请求接口地址
rsJson = self.postApi(postData, postUrl)
## 下载
'''
###接口返回 json 示例
{'code': 1, 'message': '操作成功', 'data': [
{'id': 1, 'goods_sn': 'xxxx', 'images': 'http://xxxxx.com/xxx/xxxx.jpg'}
]}
'''
if rsJson['code'] == 1:
self.threadDownload(rsJson['data'])
else:
print(rsJson['message'])
def postApi(self,data, url):
# POST请求封装
headers = {'Content-Type': 'application/json'}
response = requests.post(url=url, data=json.dumps(data), headers=headers)
rsJson = json.loads(response.text) # 转Json格式 方式一
return rsJson
def cleanStr(self,string):
# 清除一特殊字符等保留字母与-_这两个连接符号
string = string.replace("/", "_")
return re.sub(r'[^a-zA-Z0-9_-]', "", string)
def imgDownload(self,imgUrl,saveUrl):
# 下载网络上图片 imgUrl-线上链接,saveUrl-本地存放链接
# 判断是否已经下载,下载过的就没有必要再下载
if not os.path.exists(saveUrl):
try:
header = {"Authorization": "ycn "} # 设置http header
r = requests.get(imgUrl, headers=header, stream=True)
if r.status_code == 200: # 返回状态码
with open(saveUrl, 'wb') as f:
f.write(r.content)
print(f'下载成功{saveUrl}')
except:
print(f'图片链接错误:{saveUrl}')
def threadDownload(self,rows):
# 多线程下载网络上图片 rows-数据库中返回的json数据集,folder-本地文件夹
for row in rows:
imgId = row['id']
imgUrl = row['images']
goodsSn = self.cleanStr(row['goods_sn'])
try:
if not os.path.exists(f'./{self.folder}/{goodsSn}'):
os.makedirs(f'./{self.folder}/{goodsSn}')
saveUrl = f'./{self.folder}/{goodsSn}/{goodsSn}_{imgId}.jpg'
Thread(target=self.imgDownload, args=(imgUrl,saveUrl,)).start()
time.sleep(0.02) #这里解决请求太频繁被服务器拒绝的情况
except:
print(f'当前图片无法下载{imgUrl}')
continue
if __name__ == '__main__':
## 下载
token = '' #接口token
DownloadImg(token,'产品图片',0,10) #参数(token,本地存放目录,查询记录起始,查询记录条数)
运行后截图