d
a
t
a
−
e
d
a
.
i
p
y
n
b
data-eda.ipynb
data−eda.ipynb
from glob import glob
import pandas as pd
import numpy as np
import os
import cv2
from PIL import Image
from matplotlib import pyplot as plt
from tqdm import tqdm
# 训练集探索
TRAIN_DATASET_PATH ='/data/nextcloud/dbc2017/files/jupyter/train_data'
image_fns = glob(os.path.join(TRAIN_DATASET_PATH,'*','*.*'))
label_names =[s.split('/')[-2]for s in image_fns]
unique_labels =list(set(label_names))
# 类别数print(len(unique_labels))
# 图片总数print(len(image_fns))
# 每个类别的数量
dir_lst = os.listdir(TRAIN_DATASET_PATH)
number_lst =[]for i in dir_lst:
path = os.path.join(TRAIN_DATASET_PATH,i)if os.path.isdir(path):
num =len(glob(os.path.join(path,'*')))
number_lst.append(num)if num ==0:print(i)