需求:一个目录下多个文件夹,每个文件夹里有多个商品图片,如何对这些数据可视化?
解决方案:建立excel表格,将文件夹里的图片按文件夹名进行展示
import os
import xlsxwriter
from operator import itemgetter
folder = '/data2/library_2403/'
#定义一个获取文件夹里最大的照片
def find_img_path(sku_folder):
images = []
for file in os.listdir(sku_folder):
if file.endswith('.jpg'):
path = os.path.join(sku_folder, file)
size = os.path.getsize(path)
images.append((file, size))
images = sorted(images, key=itemgetter(1), reverse=True)
img_path=os.path.join(sku_folder, images[0][0])
return img_path
#将所有的图片放在一个列表里
img_path_all=[]
for filename in os.listdir(folder):
sku_folder=os.path.join(folder,filename)
if sku_folder.split("/")[-1]=="facebank.pth" or sku_folder.split("/")[-1]=="names.npy":
continue
img_path_all.append(find_img_path(sku_folder))
wb = xlsxwriter.Workbook('/data2/2403sku_all_0201_20230201_NEW.xls') #打开excel
pictureSheet = wb.add_worksheet("Sheet2") #添加一个sheet
pictureSheet.set_column('C:C', 16) #设置行高
for i in range(0,len(img_path_all)):
print(img_path_all[i].split("/")[-2],dict1[img_path_all[i].split("/")[-2]])
cell = 'A%d' % i
cell2 = 'B%d' % i
cell3 = 'C%d' % i
pictureSheet.set_row(i, 95) #设置列宽
pictureSheet.write(cell, img_path_all[i].split("/")[-2])
pictureSheet.write(cell2, dict1[img_path_all[i].split("/")[-2]])
pictureSheet.insert_image(cell3,img_path_all[i],{'x_offset': 5, 'y_offset': 1,'x_scale': 0.5, 'y_scale': 0.5}) # 指定单元格,x、y为缩放比例
wb.close()
本来使用openpyxl模块
但运行时报错zipfile.BadZipFile: File is not a zip file
查看网上解决方案:
1.有可能是因为openpyxl与pandas的版本兼容发生了问题,解决方法是将pd.excelwriter(path,engine=‘openpyxl’)改成pd.excelwriter(path,engine=‘openpyxl’,mode=‘a’)
2.其他解决方案
https://www.cnblogs.com/qingyuu/p/10642249.html
太麻烦了最终选择 xlsxwriter库来解决问题
最终效果图: