在大佬 苦学RS的小龙 的基础上,添加了读取SLC影像时间(readSLCFileName,省去了统计下载哨兵影像的时序的操作)、检测下载的轨道文件大小并对错误文件进行重新下载(getFileSize和downloadWrongOribits)的内容
具体操作方法见:
https://blog.csdn.net/qq_44932630/article/details/124202144、
FBI WARNNING:
在本例中,需要将大佬的基础源代码中download函数中的headers的Cookie补全;
大佬代码中download函数中的headers,需要将此处替换为你自己的信息(PS.换电脑也需要替换)
# coding:utf-8
from urllib.parse import urlparse
import urllib.request
from bs4 import BeautifulSoup
import re
import datetime
from dateutil.parser import parse
import tkinter as tk
import sys
import os
import requests
timestart = datetime.datetime.now()
def downloadWrongOribits(wrongList, outPath):
fileIndex = 1
while len(wrongList) != 0:
print("------------------------------------")
print("Start downloading the data with the error.")
print("The data that went wrong is:")
print(error_url)
for eof in error_url:
print("(" + str(fileIndex) + "/" + str(len(wrongList)) + ")" + " Start downloading the Precision track files: " + eof)
saveFile = os.path.join(outPath, eof[50:])
download(saveFile, eof)
fileIndex += 1
print("The data download for the error is complete.")
print("------------------------------------")
def getFileSize(filePath):
fsize = os.path.getsize(filePath)
if fsize < 1024:
return round(fsize, 2), 'Byte'
else:
KBX = fsize / 1024
if KBX < 1024:
return round(KBX, 2), 'K'
else:
MBX = KBX / 1024
if MBX < 1024:
return round(MBX, 2), 'M'
else:
return round(MBX / 1024), 'G'
def readSLCFileName(parentPath, path):
orbitsDateListContainsDuplicates = []
list = os.listdir(parentPath + path)
zip_file_list = []
for file in list:
if file[-3:] == "zip":
zip_file_list.append(file)
with open("zip_file_list.txt", "w") as logFile:
for zip_file in zip_file_list:
zip_file = zip_file + "\n"
logFile.write(zip_file)
file = open("zip_file_list.txt", 'r')
lines = file.read().splitlines()
for line in lines:
orbitsDateListContainsDuplicates.append(line[17:25])
orbitsDateList = []
for date in orbitsDateListContainsDuplicates:
if date not in orbitsDateList:
orbitsDateList.append(date)
return orbitsDateList
def download(dest_dir, url):
print(url)
print(dest_dir)
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
"Connection": "keep-alive",
"Cookie": "_ga_XCPHL9DW7E=GS1.1.1696731884.3.1.1696732536.0.0.0; _ga=GA1.2.1813036397.1695265545; "
"_ce.s=v~998cce5ddc7dea52e816b862b1f59664c97255b0~lcw~1696732434540~vpv~2~v11.fhb~1696731892368~v11"
".lhb~1696732494573~lcw~1696732494574; "
"asf-urs=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9"
".eyJmaXJzdF9uYW1lIjoiQ2hlbnpoaWhhbyIsImxhc3RfbmFtZSI6IlFpYW4iLCJ1cnMtdXNlci1pZCI6Imdpc2VyY2huIiwidXJzLWFjY2Vzcy10b2tlbiI6ImV5SjBlWEFpT2lKS1YxUWlMQ0p2Y21sbmFXNGlPaUpGWVhKMGFHUmhkR0VnVEc5bmFXNGlMQ0p6YVdjaU9pSmxaR3hxZDNSd2RXSnJaWGxmYjNCeklpd2lZV3huSWpvaVVsTXlOVFlpZlEuZXlKMGVYQmxJam9pVDBGMWRHZ2lMQ0pqYkdsbGJuUmZhV1FpT2lKQ1QxOXVOMjVVU1d4TmJHcGtkbFUyYTFKU1FqTm5JaXdpWlhod0lqb3hOams1TlRnNE5UY3pMQ0pwWVhRaU9qRTJPVFk1T1RZMU56TXNJbWx6Y3lJNklrVmhjblJvWkdGMFlTQk1iMmRwYmlJc0luVnBaQ0k2SW1kcGMyVnlZMmh1SW4wLnF6V1hnQmhJblBQWkZuXy00Vkd0dW5abEZCVUZNVkxwMHM1cE0tMXlRZ0VGR2piM21MekQtV1hpaWFHbzlxWkFJN3lRSVFHWUwwRS1Ja2VUNEhGU1ZRZl9QWU1SdG5hN3JYZU5PQmNWTWlGWFhucmk5Ynk4bGI2NWhZNC10U2VpUGs3WVhWREtZWkpKMkJzRWNVWG5XSXZzS05oanQtcjRubmJ2RVRDVnBFUGExUlZJbm1RTEpLS3ZpVFl1R0ItMFIxM2d6enlRc0xLV1BCWmFVQ2JVM3VNTUdRYlFZS1dxWkJRZXF0bGtDN2FMMVpKbnQ1aW8tcmtoMjNSa0pUbHR3NGk0RkNKOE1HVWp5eDdxUXMxSGNfX2pDUDV6eHBJQ1RBZ1VUVnZIbUhjaUI0MWpDcFlZYkplNGttb1kwb2pRY0wzSG83Q2hoY3VNYkp6QUZpQXhvQSIsInVycy1ncm91cHMiOltdLCJpYXQiOjE2OTY5OTY1NzQsImV4cCI6MTY5NzYwMTM3NH0.T3w_cX26656amley8u6brpP3-QM8hEfsbXO1ZVvHHYSluKkiLnPBbHjFFdm0l7pbTgpN8hoafq0ZImb_oTBNCb5RNaSCMY8OqV6Yw7AeRT8lFE1ZH9IewGfHt_LbMkfrQYeR0KrQRObEjT8xUdyESWJDPSOvNywqXYlncnjKxceIK0fxaHtBsBJmKK4gZOiYJVUc6WXmGPxZAtb7wPmPb7YjcqFFnxfJbdkEUBqXh_uwDU000thixMuDN2_tfuxT4tDML5v6CsYMNhq4CxSq5FYzfyxftPhLTqS2yGQsG-xGs-9PuGCSLwrQjxxh89Birk0PlVsbExGVS9HLikEyFAl23dh-3J5lOE-T7ZdKcm3G5a0J5oGuul8SGZTe91S4X7RELSomkEMhH5mHk1zTQ_DW7UC1p5RUKD0H7uLr6IrAMHEsPbovl11VsaKtLl5SIuTqpuEyXolx5GR6Us-DUEu9F-HD3HCbYL5AMyrKPI0NI29SU_YhNZ6jlxOmaSujzWH3nAI7p5wbZib6tf1PU6M_ih3LJSter7bD3lmj43yttA7MNVGsZGZ8z3xKJdNikA0CBrYotBd3dUSKSBhuN69HVpIxQ1_X9fq_QdVGQSo--vnbI9llLO4lcMDac3LiBD7Ky7aU64FwumYuvDtpVhO2j5AxMwMMKLFfvsIBcmY; urs-user-id=giserchn; urs-access-token=eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIjoiT0F1dGgiLCJjbGllbnRfaWQiOiJCT19uN25USWxNbGpkdlU2a1JSQjNnIiwiZXhwIjoxNjk5NTg4NTczLCJpYXQiOjE2OTY5OTY1NzMsImlzcyI6IkVhcnRoZGF0YSBMb2dpbiIsInVpZCI6Imdpc2VyY2huIn0.qzWXgBhInPPZFn_-4VGtunZlFBUFMVLp0s5pM-1yQgEFGjb3mLzD-WXiiaGo9qZAI7yQIQGYL0E-IkeT4HFSVQf_PYMRtna7rXeNOBcVMiFXXnri9by8lb65hY4-tSeiPk7YXVDKYZJJ2BsEcUXnWIvsKNhjt-r4nnbvETCVpEPa1RVInmQLJKKviTYuGB-0R13gzzyQsLKWPBZaUCbU3uMMGQbQYKWqZBQeqtlkC7aL1ZJnt5io-rkh23RkJTltw4i4FCJ8MGUjyx7qQs1Hc__jCP5zxpICTAgUTVvHmHciB41jCpYYbJe4kmoY0ojQcL3Ho7ChhcuMbJzAFiAxoA",
"Host": "s1qc.asf.alaska.edu",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0",
}
try:
request = urllib.request.Request(url, headers=headers)
response = urllib.request.urlopen(request)
f = open(dest_dir, "w")
lines = response.readlines()
for line in lines:
f.write(line.decode())
f.close()
except:
error_url.append(url)
print("\tError retrieving the URL:", dest_dir)
else:
if url in error_url:
error_url.remove(url)
if __name__ == '__main__':
current_path = sys.path[0]
slc_path = "\slc"
orbits_path = "\orbits"
cookie_txt_file_path = sys.path[0] + "\cookie.txt"
orbitsList = []
wrongDownloadOrbitsList = []
root = tk.Tk()
root.withdraw()
error_url = []
cookie_path = cookie_txt_file_path
out_path = current_path + orbits_path
url_param_json = {}
url_param_json['sentinel1__mission'] = 'S1A'
date = '2015-01-01'
url_param_json['validity_start'] = date
url_param = urllib.parse.urlencode(url_param_json)
url = 'https://s1qc.asf.alaska.edu/aux_poeorb/?%s' % url_param
html = requests.get(url).content
dom = BeautifulSoup(html, "lxml")
a_list = dom.findAll("a")
eof_lists = [a['href'] for a in a_list if a['href'].endswith('.EOF')]
TimeArray = []
NeedTimeArray = readSLCFileName(current_path, slc_path)
fileIndex = 1
for eof in eof_lists:
if os.path.splitext(eof)[1] == ".EOF" and os.path.basename(eof)[0:3] == 'S1A':
SplitEOF = re.split(r'[_,.,\s ]\s*', eof)
SplitTime = SplitEOF[-2]
Time = parse(SplitTime)
NeedTime = Time + datetime.timedelta(days=-1)
NeedTimeNum = (re.sub('[-,:, ]', '', str(NeedTime)))[0:8]
if NeedTimeNum in str(NeedTimeArray):
TimeArray.append(NeedTimeNum)
savefile = os.path.join(out_path, eof)
download(savefile, 'https://s1qc.asf.alaska.edu/aux_poeorb/' + eof)
print("------------------------------------")
print("(" + str(fileIndex) + "/" + str(len(NeedTimeArray)) + ")" + " Start downloading the Precision track files: " + eof)
print("The precision track data: " + eof + "download is complete.")
print("------------------------------------")
fileIndex += 1
if len(TimeArray) == len(NeedTimeArray):
print("The required precision track data is downloaded, totaling %d files" % (len(TimeArray)))
print("------------------------------------")
break
else:
continue
downloadWrongOribits(error_url, out_path)
orbitsList = os.listdir(current_path + orbits_path)
for orbitsFile in orbitsList:
size = getFileSize(orbitsFile)
if size[1] != 'M':
wrongDownloadOrbitsList.append(orbitsFile)
elif float(size[0]) < 4.0:
wrongDownloadOrbitsList.append(orbitsFile)
downloadWrongOribits(wrongDownloadOrbitsList, out_path)
timeend = datetime.datetime.now()
print('Running time: %s Seconds' % (timeend - timestart))