欧空局ODATA api使用

机器人411号

已于 2023-12-08 21:33:58 修改

阅读量499

点赞数 10

文章标签：数据库

于 2023-12-06 20:20:02 首次发布

本文链接：https://blog.csdn.net/qq_37897375/article/details/134426742

版权

欧空局在2023年10月底关闭了网站，https://scihub.copernicus.eu/dhus/#/home网站已经不可使用，数据下载转移到新的网站：https://dataspace.copernicus.eu

本文章主要分享一下新网站提供的众多API中的OData (Open Data Protocol)的调用。官方教程：Documentation - OData (copernicus.eu)

使用Python调用requests库，查询并下载数据

代码如下：

2023/12/8更新

import requests
import os
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
import time


def get_access_token(username: str, password: str) -> str:
    data = {
        "client_id": "cdse-public",
        "username": username,
        "password": password,
        "grant_type": "password",
    }
    try:
        r = requests.post("https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token",
                          data=data,
                          )
        r.raise_for_status()
    except Exception as e:
        raise Exception(
            f"Access token creation failed. Reponse from the server was: {r.json()}"
        )
    return r.json()["access_token"]

#加入top参数
def get_product_url(pro_time, relative_orbit_number, product_type, collection, product_time_type, orbit_direction):
    # array:time:['2016-12-01','2016-12-31']
    # int:relative_orbit_number:360
    # str:product_type:'SR_2_LAN___'大写
    # str:collection:'S3A'
    # str:product_time_type:'NT'
    # orbit_direction:'DESCENDING'
    filter_time = f"ContentDate/Start gt {pro_time[0]}T00:00:00.000Z and ContentDate/Start lt {pro_time[1]}T00:00:00.000Z"
    filter_relative_orbit_number = f"Attributes/OData.CSC.IntegerAttribute/any(att:att/Name eq 'relativeOrbitNumber' and att/OData.CSC.IntegerAttribute/Value eq {relative_orbit_number})"
    filter_product_type = f"Attributes/OData.CSC.StringAttribute/any(att:att/Name eq 'productType' and att/OData.CSC.StringAttribute/Value eq '{product_type}')"
    filter_collection = f"contains(Name,'{collection}')"
    filter_product_time_type = f"contains(Name,'{product_time_type}')"
    filter_orbit_direction = f"Attributes/OData.CSC.StringAttribute/any(att:att/Name eq 'orbitDirection' and att/OData.CSC.StringAttribute/Value eq '{orbit_direction}')"
    filter_top = '&$top=999'
    filter_area = "Attributes/OData.CSC.Intersects(area=geography'SRID=4326;POLYGON((98.982415 27.856909,98.982415 36.398266,116.208977 36.398266,116.208977 27.856909,98.982415 27.856909))')"
    url = "https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=" + filter_relative_orbit_number+" and "+filter_product_type + \
        " and "+filter_time+" and "+filter_collection + " and " + \
        filter_product_time_type + " and " + filter_orbit_direction + filter_top
    return url


def get_new_product(folder, product):
    file_folder = os.listdir(folder)
    file_name = []
    new_products = []
    for file in file_folder:
        new_file = file.rstrip('.zip')
        new_file = new_file.rstrip('.nc')
        new_file = new_file.rstrip('.SEN3')
        file_name.append(new_file)
    for i in product:
        if i['Name'].rstrip('.SEN3') not in file_name:
            file_id = f"https://zipper.dataspace.copernicus.eu/odata/v1/Products({i['Id']})/$value"
            product_list = [file_id, i['Name'], int(i['Name'][16:24])]
            new_products.append(product_list)
    new_products.sort(key=lambda x: x[2])
    return new_products


def download_file(url, file_name):
    max_retries = 5
    retries = 0
    response = session.get(url, headers=headers, stream=True)
    product_size = int(response.headers.get('content-length', 0))
    while retries < max_retries:
        try:
            if product_size > 1024:
                fname = "E:/S3Achina/"+file_name+".zip"
                total = int(response.headers.get('content-length', 0))
                with open(fname, 'wb') as file, tqdm(
                    desc=fname,
                    total=total,
                    unit='iB',
                    unit_scale=True,
                    unit_divisor=1024,
                ) as bar:
                    for data in response.iter_content(chunk_size=2048):
                        size = file.write(data)
                        bar.update(size)
                real_size = int(os.path.getsize(fname))
                if product_size == real_size:
                    print(f"有一个下载完成")
                    return
                else:
                    os.remove(fname)
                    x = 10/0
            else:
                x = 10/0
        except:
            retries += 1
            if retries < max_retries:
                time.sleep(5)


# 程序入口--------------------------------------------------------------------------------------
# requests操作，从给定url获得查询结果
# 以测高数据为例，输入时间范围，orbit，产品类型，卫星，时间类型，轨道类型
url = get_product_url(['2018-01-01', '2023-12-01'], 310,
                      'SR_2_LAN___', 'S3A', 'NT', 'ASCENDING')
access_token = get_access_token("你的账号", "你的密码")
headers = {"Authorization": f"Bearer {access_token}"}
session = requests.Session()
session.headers.update(headers)
response_all = session.get(url, headers=headers, stream=True)
data = response_all.json()
product = data['value']
print(f'共有{len(product)}个数据要下载')
# for i in product:
#     print(i['Name'])
down_index = 1
while down_index != 0:
    new_products = get_new_product('E:/S3Achina', product)
    print(f'剩余{len(new_products)}个产品')
    # for j in new_products:
    #     print(j[1])
    # 下载
    with ThreadPoolExecutor() as executor:
        # 提交下载任务给线程池
        futures = [executor.submit(
            download_file, download_url[0], download_url[1]) for download_url in new_products]
        # 等待所有任务完成
        for future in futures:
            future.result()
    new_products = get_new_product('E:/S3Achina', product)
    down_index = len(new_products)

机器人411号

关注

10
点赞
踩
10

收藏

觉得还不错? 一键收藏
5
评论
欧空局ODATA api使用

本文章主要分享一下新网站提供的众多API中的OData (Open Data Protocol)的调用。使用Python调用requests库，查询并下载数据。欧空局在2023年10月底关闭了网站，
复制链接

扫一扫