问题场景:
有一个很大csv文件存储了不同类别的编号,然后对应编号的图片数据都在linux上,然后我需要每个类别取一小部分,将指定部分的数据从linux上打包下来解决方案:
具体解决方案:
import os
import datetime
import tarfile
import pandas as pd
from tqdm import tqdm
import cv2
filename = '../resource/data_train_100.csv'
df = pd.read_csv(filename)
img_ls = df.iloc[:, 0].tolist()
def re_Iamge_reader(name):
path = ['/e/images/shortImage/', '/e/images/1758/', '/e/images/1762/', '/e/images/Image/', '/e/images/picture/']
img_name = name + '.jpg'
i = 0
while True:
img_path = path[i] + img_name
img = cv2.imread(img_path)
if img is None:
i += 1
if i > 4:
print('所有路径都没这个{}图片,拉倒吧你'.format(img_name))
return None
else:
return img_path
def Is_img(url, name):
img = cv2.imread(url)
if img is None:
url = re_Iamge_reader(name)
return url
else:
return url
def find_file(root, patterns=['*']):
path = "/d/images_Lib/image/"
for i in tqdm(root):
img_path = path + i + '.jpg'
img_path = Is_img(img_path, i)
yield os.path.join(img_path)
def cre_tarfile():
args = ["*.jpg", "*.jepg"]
now = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
filename = "all{0}.tar.gz".format(now)
with tarfile.open(filename, mode='w:gz') as f:
for item in find_file(img_ls, args):
f.add(item)
if __name__ == "__main__":
cre_tarfile()