CBIS_DDSM数据下载方法---无需安装NBIA软件

伙夫而已

于 2024-06-20 15:25:14 发布

阅读量609

点赞数 2

文章标签： python 计算机视觉

本文链接：https://blog.csdn.net/m0_61789789/article/details/139833797

版权

本方法无需安装NBIA软件，直接运行python代码，即可直接下载CBIS_DDSM数据。

首先，TCIA文件下载，如下图，点Download即可下载tcia文件。

然后，修改代码中tcia文件路径，运行代码即可。

CBIS-DDSM - The Cancer Imaging Archive (TCIA)

import concurrent.futures
import requests
import zipfile
from io import BytesIO
import os
import json
from tqdm import tqdm
import argparse

BASE_URL_IMAGE = 'https://services.cancerimagingarchive.net/nbia-api/services/v1/getImage?SeriesInstanceUID={}'
BASE_URL_METADATA = 'https://services.cancerimagingarchive.net/nbia-api/services/v1/getSeriesMetaData?SeriesInstanceUID={}'


class CBISDDSMDownloader:
    def __init__(self, manifest_path, download_path, skip_existing=True):
        self.__skip_existing = skip_existing
        self.__download_path = download_path
        self.__manifest_file_path = manifest_path
        self.__image_series_UID = []

    def __parse_manifest(self):
        with open(self.__manifest_file_path) as file:
            found_starting_line_flag = False
            for line in file:
                if not found_starting_line_flag:
                    if 'ListOfSeriesToDownload=' in line:
                        found_starting_line_flag = True
                else:
                    self.__image_series_UID.append(line.strip())

            if found_starting_line_flag:
                print("Found {} items to download.".format(len(self.__image_series_UID)))
            else:
                print("Incorrect format of the manifest file provided!")

    @staticmethod
    def __get_metadata(series_uid):
        response = requests.get(BASE_URL_METADATA.format(series_uid))
        response_dict = json.loads(response.content.decode("utf-8"))[0]
        return response_dict

    @staticmethod
    def __exists(download_path, num_imgs):
        if os.path.exists(download_path):
            folder_contents = os.listdir(download_path)
            num_dcm = len(list(item for item in folder_contents if item.endswith('.dcm')))
            if num_dcm == num_imgs:
                return True
        return False

    @staticmethod
    def __download_extract_image(series_uid, path):
        response = requests.get(BASE_URL_IMAGE.format(series_uid))
        with zipfile.ZipFile(BytesIO(response.content)) as z:
            z.extractall(path)

    def __payload(self, seriesUID):
        metadata = self.__get_metadata(seriesUID)
        folder_name = metadata['Subject ID']
        series_uid = metadata['Series UID']
        study_uid = metadata['Study UID']
        num_imgs = int(metadata['Number of Images'])
        download_path = os.path.join(self.__download_path, folder_name, study_uid, series_uid)

        if self.__skip_existing and self.__exists(download_path, num_imgs):
            return

        self.__download_extract_image(seriesUID, download_path)

    def start(self):
        self.__parse_manifest()

        with concurrent.futures.ThreadPoolExecutor() as executor:
            # Start the load operations and mark each future with its URL
            future_to_url = {executor.submit(self.__payload, uid): uid for uid in self.__image_series_UID}
            for future in tqdm(concurrent.futures.as_completed(future_to_url), total=len(self.__image_series_UID), unit="file"):
                url = future_to_url[future]
                try:
                    future.result()
                except Exception as exc:
                    print(f"{url} generated an exception: {exc}")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(prog='CBIS DDSM Downloader')
    ######下面路径填写tcia文件路径
    parser.add_argument('-m', '--manifest', default=r'manifest-1718860687510.tcia',
                        help='Path to the manifest file.')
    ######下面路径为下载后保存路径
    parser.add_argument('-p', '--path', default='./CBIS_DDSM',
                        help='Path to the download folder. It will be created if not existing.')
    args = parser.parse_args()
    downloader = CBISDDSMDownloader(args.manifest, args.path)
    downloader.start()