功能:自动下载文件夹下哨兵数据对应的轨道数据
以下载哨兵的精轨数据为例:
网址:https://qc.sentinel1.eo.esa.int/aux_poeorb/
url搜索参数:
key | value | 例子 |
---|---|---|
mission | 哨兵1A:S1A 哨兵1B:S1B | 查询1A的精轨数据: https://qc.sentinel1.eo.esa.int/aux_poeorb/?mission=S1A |
validity_start_time | 年:2014 月:2014-01 日:2017-04-07…2017-04-10 | 查询2017-03-16到2017-03-19的精轨数据: https://qc.sentinel1.eo.esa.int/aux_poeorb/?validity_start_time=2017&validity_start_time=2017-03&validity_start_time=2017-03-16..2017-03-19 |
搜索参数的问题:
Python代码:
# -*- coding:utf-8 -*-
# Author:PasserQi
# Time:2019-4-5
# 下载文件夹下哨兵数据的精轨数据
# 须知:文件夹下的哨兵数据需解压。不想解压可以修改程序的第43行,.SAFE该为.zip
import urllib
from bs4 import BeautifulSoup
import re
import os
import datetime
import time
# 需要修改的参数
dir_path = r'G:\Sentinel-original data\Orbit40-path40\Frame75-11\added_20180105\SourceData' # 哨兵数据存在的目录
out_path = r'C:\Users\PasserQi\Desktop' #精轨数据保存的目录
FILE_TYPE = ".SAFE" #文件格式:.SAFE .zip
IsDownload = True #是否下载:True False
download_urls = []
error_url = []
url_prefix = 'https://qc.sentinel1.eo.esa.int/aux_poeorb/' #下载地址
def download(dest_dir, url):
print "正在下载:{}\n\t至{}\n".format(url, dest_dir)
try:
urllib.urlretrieve(url, dest_dir, callbackfunc)
except:
error_url.append(url)
print '\tError retrieving the URL:', dest_dir
else: # 没有异常
print "\t[done]"
if url in error_url: #在错误列表里
error_url.remove(url)
def callbackfunc(blocknum, blocksize, totalsize):
'''回调函数
@blocknum: 已经下载的数据块
@blocksize: 数据块的大小
@totalsize: 远程文件的大小
'''
percent = 100.0 * blocknum * blocksize / totalsize
if percent > 100:
percent = 100
print "%.2f%%"% percent
def get_yestoday(mytime):
myday = datetime.datetime( int(mytime[0:4]),int(mytime[4:6]),int(mytime[6:8]) )
delta = datetime.timedelta(days=-1)
my_yestoday = myday + delta
my_yes_time = my_yestoday.strftime('%Y%m%d')
return my_yes_time
if __name__ == '__main__':
# 获得files
files = os.listdir(dir_path)
#files = [
# "S1A_IW_SLC__1SDV_20180201T101712_20180201T101742_020412_022E1C_43FD.SAFE",
# "S1A_IW_SLC__1SDV_20180213T101712_20180213T101742_020587_0233BB_CA75.SAFE",
# "S1A_IW_SLC__1SDV_20180309T101712_20180309T101742_020937_023ED6_693E.SAFE",
# ]
for file in files:
if not file.endswith(FILE_TYPE):
continue
# ###########################
# 按文件名上的信息查找EOF
# 拼接URL
url_param_json = {}
url_param_json['sentinel1__mission'] = file[0:3]
date = re.findall(r"\d{8}",file)[0]
# 若参数为20170316,则搜索的是20170317的数据
# 所以参数应该提前一天
# 求date的前一天
date = get_yestoday(date)
# 在字符串指定位置插入指定字符
# 例:20170101 --> 2017-01-01
tmp = list(date)
tmp.insert(4,'-');tmp.insert(7,'-')
date = "".join(tmp)
url_param_json['validity_start'] = date
# 获得EOF下载网址
url_param = urllib.urlencode(url_param_json) #url参数
url = 'https://qc.sentinel1.eo.esa.int/aux_poeorb/?%s' % url_param #拼接
print "url:{}".format(url)
html = urllib.urlopen(url) # 获取html
dom = BeautifulSoup(html) # 解析html文档
a_list = dom.findAll("a") # 找出<a>
eof_lists = [a['href'] for a in a_list if a['href'].endswith('.EOF')] # 找出EOF
for eof in eof_lists:
if IsDownload:
eof_name = eof.split('/')[-1] #名字
savefile = os.path.join(out_path, eof_name) #保存路径
download(savefile, eof)
else:
download_urls.append(eof)
if IsDownload: #下载
print "------------------------------------"
print "开始下载出错的数据"
# 下载出错的数据重新下载
while len(error_url)!=0:
print "出错的数据有"
print error_url
for eof in error_url:
savefile = os.path.join(out_path, eof)
download(savefile, url_prefix + eof)
print "全部下载成功,无出错文件"
else: #不下载
with open(os.path.join(out_path, u"下载链接.txt"), "w+") as f:
for eof in download_urls:
f.write(eof)
f.write("\n")
f.close()
喜欢的帮忙赞一下浓:https://github.com/PasserQi/python_crawler
相关参考: