脚本为 https://github.com/tensorflow/models/tree/master/research/slim/datasets/download_and_convert_imagenet.sh
set -e
if [ -z "$1" ]; then
echo "usage download_and_convert_imagenet.sh [data dir]"
exit
fi
# Create the output and temporary directories.
# 创建输出和临时目录。
DATA_DIR="${1%/}"
SCRATCH_DIR="${DATA_DIR}/raw-data/"
mkdir -p "${DATA_DIR}"
mkdir -p "${SCRATCH_DIR}"
WORK_DIR="$0.runfiles/__main__"
# Download the ImageNet data.
# 下载ImageNet数据。
LABELS_FILE="${WORK_DIR}/datasets/imagenet_lsvrc_2015_synsets.txt"
DOWNLOAD_SCRIPT="${WORK_DIR}/datasets/download_imagenet.sh"
"${DOWNLOAD_SCRIPT}" "${SCRATCH_DIR}" "${LABELS_FILE}"
# Note the locations of the train and validation data.
# 请注意训练数据集和验证数据集的位置。
TRAIN_DIRECTORY="${SCRATCH_DIR}train/"
VALIDATION_DIRECTORY="${SCRATCH_DIR}validation/"
# Preprocess the validation data by moving the images into the appropriate
# sub-directory based on the label (synset) of the image
# 通过将图像移动到基于图像的标签(synset)的适当的子目录来预处理验证数据。
echo "Organizing the validation data into sub-directories."
PREPROCESS_VAL_SCRIPT="${WORK_DIR}/datasets/preprocess_imagenet_validation_data.py"
VAL_LABELS_FILE="${WORK_DIR}/datasets/imagenet_2012_validation_synset_labels.txt"
"${PREPROCESS_VAL_SCRIPT}" "${VALIDATION_DIRECTORY}" "${VAL_LABELS_FILE}"
# Convert the XML files for bounding box annotations into a single CSV.
# 将边界框注释的XML文件转换为单个CSV。
echo "Extracting bounding box information from XML.