欧空局在2023年10月底关闭了网站,https://scihub.copernicus.eu/dhus/#/home网站已经不可使用,数据下载转移到新的网站:https://dataspace.copernicus.eu
本文章主要分享一下新网站提供的众多API中的OData (Open Data Protocol)的调用。官方教程:Documentation - OData (copernicus.eu)
使用Python调用requests库,查询并下载数据
代码如下:
2023/12/8更新
import requests
import os
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
import time
def get_access_token(username: str, password: str) -> str:
data = {
"client_id": "cdse-public",
"username": username,
"password": password,
"grant_type": "password",
}
try:
r = requests.post("https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token",
data=data,
)
r.raise_for_status()
except Exception as e:
raise Exception(
f"Access token creation failed. Reponse from the server was: {r.json()}"
)
return r.json()["access_token"]
#加入top参数
def get_product_url(pro_time, relative_orbit_number, product_type, collection, product_time_type, orbit_direction):
# array:time:['2016-12-01','2016-12-31']
# int:relative_orbit_number:360
# str:product_type:'SR_2_LAN___'大写
# str:collection:'S3A'
# str:product_time_type:'NT'
# orbit_direction:'DESCENDING'
filter_time = f"ContentDate/Start gt {pro_time[0]}T00:00:00.000Z and ContentDate/Start lt {pro_time[1]}T00:00:00.000Z"
filter_relative_orbit_number = f"Attributes/OData.CSC.IntegerAttribute/any(att:att/Name eq 'relativeOrbitNumber' and att/OData.CSC.IntegerAttribute/Value eq {relative_orbit_number})"
filter_product_type = f"Attributes/OData.CSC.StringAttribute/any(att:att/Name eq 'productType' and att/OData.CSC.StringAttribute/Value eq '{product_type}')"
filter_collection = f"contains(Name,'{collection}')"
filter_product_time_type = f"contains(Name,'{product_time_type}')"
filter_orbit_direction = f"Attributes/OData.CSC.StringAttribute/any(att:att/Name eq 'orbitDirection' and att/OData.CSC.StringAttribute/Value eq '{orbit_direction}')"
filter_top = '&$top=999'
filter_area = "Attributes/OData.CSC.Intersects(area=geography'SRID=4326;POLYGON((98.982415 27.856909,98.982415 36.398266,116.208977 36.398266,116.208977 27.856909,98.982415 27.856909))')"
url = "https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=" + filter_relative_orbit_number+" and "+filter_product_type + \
" and "+filter_time+" and "+filter_collection + " and " + \
filter_product_time_type + " and " + filter_orbit_direction + filter_top
return url
def get_new_product(folder, product):
file_folder = os.listdir(folder)
file_name = []
new_products = []
for file in file_folder:
new_file = file.rstrip('.zip')
new_file = new_file.rstrip('.nc')
new_file = new_file.rstrip('.SEN3')
file_name.append(new_file)
for i in product:
if i['Name'].rstrip('.SEN3') not in file_name:
file_id = f"https://zipper.dataspace.copernicus.eu/odata/v1/Products({i['Id']})/$value"
product_list = [file_id, i['Name'], int(i['Name'][16:24])]
new_products.append(product_list)
new_products.sort(key=lambda x: x[2])
return new_products
def download_file(url, file_name):
max_retries = 5
retries = 0
response = session.get(url, headers=headers, stream=True)
product_size = int(response.headers.get('content-length', 0))
while retries < max_retries:
try:
if product_size > 1024:
fname = "E:/S3Achina/"+file_name+".zip"
total = int(response.headers.get('content-length', 0))
with open(fname, 'wb') as file, tqdm(
desc=fname,
total=total,
unit='iB',
unit_scale=True,
unit_divisor=1024,
) as bar:
for data in response.iter_content(chunk_size=2048):
size = file.write(data)
bar.update(size)
real_size = int(os.path.getsize(fname))
if product_size == real_size:
print(f"有一个下载完成")
return
else:
os.remove(fname)
x = 10/0
else:
x = 10/0
except:
retries += 1
if retries < max_retries:
time.sleep(5)
# 程序入口--------------------------------------------------------------------------------------
# requests操作,从给定url获得查询结果
# 以测高数据为例,输入时间范围,orbit,产品类型,卫星,时间类型,轨道类型
url = get_product_url(['2018-01-01', '2023-12-01'], 310,
'SR_2_LAN___', 'S3A', 'NT', 'ASCENDING')
access_token = get_access_token("你的账号", "你的密码")
headers = {"Authorization": f"Bearer {access_token}"}
session = requests.Session()
session.headers.update(headers)
response_all = session.get(url, headers=headers, stream=True)
data = response_all.json()
product = data['value']
print(f'共有{len(product)}个数据要下载')
# for i in product:
# print(i['Name'])
down_index = 1
while down_index != 0:
new_products = get_new_product('E:/S3Achina', product)
print(f'剩余{len(new_products)}个产品')
# for j in new_products:
# print(j[1])
# 下载
with ThreadPoolExecutor() as executor:
# 提交下载任务给线程池
futures = [executor.submit(
download_file, download_url[0], download_url[1]) for download_url in new_products]
# 等待所有任务完成
for future in futures:
future.result()
new_products = get_new_product('E:/S3Achina', product)
down_index = len(new_products)