近日有运营同事找上门说:“她找不到产品详情页的图片了,现在急需导出图片,重新发布产品到其它电商平台”,然后有了现在这稿子【处理本地已有URL图片链接进行下载并保存---工作实践】
不聊,快速上代码
# 导入基础包
# -*- coding: utf-8 -*-
import requests,os,time
from bs4 import BeautifulSoup
from helloGao.other.WB_AutoApi.Common import Re_TestData
class downImg():
def get_str_btw(s, f, b): # 内容分割
par = s.partition(f)
return (par[2].partition(b))[0][:]
def comTT(self,ImgUrl):
# 拿到URL图片地址,执行下载并保存指定位置
if_txt = downImg.get_str_btw(str(ImgUrl), 'co/', '/')
if if_txt =='shop':
Image_Name = downImg.get_str_btw(str(ImgUrl),'shop/','.')
if if_txt =='detail':
Image_Name = downImg.get_str_btw(str(ImgUrl), 'detail/', '.')
os.makedirs('E:\\Git_wbiao\\helloGao\\other\\Reptile\\image\\', exist_ok=True)
imgName = 'E:\\Git_wbiao\\helloGao\\other\\Reptile\\image\\' +str(time.strftime("%Y%m%d%H%M%S")) +'_'+ Image_Name + '.jpg'
# 处理文件数据最后的换行符或空格
r = requests.get(ImgUrl.replace('\n','').replace(' ',''))
time.sleep(0.5)
if r.status_code == 200:
with open(imgName, 'wb') as f:
f.write(r.content)
print('下载成功')
print(r.url)
os.makedirs('./image/', exist_ok=True)
else:
print('下载失败')
print(ImgUrl)
print(r.url)
def img_select(self):
# 读数据库数据,因为存有HTML标识,需要过滤处理后再返回URL链接
text = Re_TestData.rig_fastDB().goods_img()
data_all = []
for x in range(0,int(len(text))):
data = []
soup = BeautifulSoup(text[x][1],features="lxml")
data.append(text[x][0])
for k in soup.find_all('img'):
T1 = downImg.get_str_btw(str(k), 'src=\"', '\"')
data.append(T1)
data_all.append(data)
return data_all
def img_file(self):
# 读文件
# fi_dir = r"E:\Git_wbiao\helloGao\other\Reptile\1111.txt" # [缺少请求域名]
fi_dir = r"E:\Git_wbiao\helloGao\other\Reptile\222.txt" # [缺少前缀http:]
with open(fi_dir,"r") as f:
date_Txt = f.readlines()
for y in range(0,int(len(date_Txt))):
# 单独下载URL图片
# IMG_url = 'https://image8.wbiao.co/shop/201601_22_L2_628_4_78_6_89643.jpg'
# downImg().comTT(IMG_url)
# 读文件拼接URL,下载图片[缺少请求域名]
# http_dir = "https://image8.wbiao.co/"
# IMG_url = http_dir+date_Txt[y]
# downImg().comTT(IMG_url)
# 读文件拼接URL,下载图片[缺少前缀http:]
IMG_url = "http:" + date_Txt[y]
downImg().comTT(IMG_url)
def img_DB(self):
# 读数据库-商品详情页图片
img_url = downImg().img_select()
for x in range(0,int(len(img_url))):
for y in range(1,int(len(img_url[x]))):
IMG_url = "http:" + img_url[x][y]
downImg().comTT(IMG_url)
time.sleep(3)
# 读数据库-产品图片
http_dir = "https://image8.wbiao.co/"
img_url = Re_TestData.rig_fastDB().product_img()
for y in range(0,int(len(img_url))):
print(http_dir+img_url[y][1])
IMG_url = http_dir+img_url[y][1]
downImg().comTT(IMG_url)
读数据库代码
import pymysql,time
class rig_fastDB():
def goods_img(self):
# 查询商品、详情页内容
coon = pymysql.connect(user='xxxxx',
passwd='xxxxx',
db='test-mall-saas',
host='xxxxxxx',
charset='utf8mb4')
self.cursor = coon.cursor()
res_sel = "SELECT goods_code,pc_intro from test_seller.sj_goods_extend LIMIT 5;"
S_eat = self.cursor.execute(res_sel)
info = self.cursor.fetchall()
self.coon.commit()
self.cursor.close()
self.coon.close()
return info
def product_img(self):
# 查询产品内容
coon = pymysql.connect(user='xxxxx',
passwd='xxxxx',
db='test-mall-saas',
host='xxxxxxx',
charset='utf8mb4')
self.cursor = coon.cursor()
res_sel = "SELECT T1.product_code,T2.file_url from test_seller.sj_product_images AS T1 " \
"LEFT JOIN test_seller.sj_file AS T2 " \
"ON T1.image_code = T2.file_code " \
"WHERE T1.product_code = 57;"
S_eat = self.cursor.execute(res_sel)
info = self.cursor.fetchall()
self.coon.commit()
self.cursor.close()
self.coon.close()
return info