python批量下载Sentinel-1精密轨道文件(.EOF)

在大佬 苦学RS的小龙 的基础上,添加了读取SLC影像时间(readSLCFileName,省去了统计下载哨兵影像的时序的操作)、检测下载的轨道文件大小并对错误文件进行重新下载(getFileSize和downloadWrongOribits)的内容
具体操作方法见:
https://blog.csdn.net/qq_44932630/article/details/124202144、

FBI WARNNING:
在本例中,需要将大佬的基础源代码中download函数中的headers的Cookie补全;
大佬代码中download函数中的headers,需要将此处替换为你自己的信息(PS.换电脑也需要替换)

# coding:utf-8
from urllib.parse import urlparse
import urllib.request
from bs4 import BeautifulSoup
import re
import datetime
from dateutil.parser import parse
import tkinter as tk
import sys
import os
import requests

timestart = datetime.datetime.now()


def downloadWrongOribits(wrongList, outPath):
    fileIndex = 1
    while len(wrongList) != 0:
        print("------------------------------------")
        print("Start downloading the data with the error.")
        print("The data that went wrong is:")
        print(error_url)
        for eof in error_url:
            print("(" + str(fileIndex) + "/" + str(len(wrongList)) + ")" + " Start downloading the Precision track files: " + eof)
            saveFile = os.path.join(outPath, eof[50:])
            download(saveFile, eof)
            fileIndex += 1
        print("The data download for the error is complete.")
        print("------------------------------------")


def getFileSize(filePath):
    fsize = os.path.getsize(filePath)

    if fsize < 1024:
        return round(fsize, 2), 'Byte'
    else:
        KBX = fsize / 1024
        if KBX < 1024:
            return round(KBX, 2), 'K'
        else:
            MBX = KBX / 1024
            if MBX < 1024:
                return round(MBX, 2), 'M'
            else:
                return round(MBX / 1024), 'G'


def readSLCFileName(parentPath, path):
    orbitsDateListContainsDuplicates = []
    list = os.listdir(parentPath + path)
    zip_file_list = []
    for file in list:
        if file[-3:] == "zip":
            zip_file_list.append(file)

    with open("zip_file_list.txt", "w") as logFile:
        for zip_file in zip_file_list:
            zip_file = zip_file + "\n"
        logFile.write(zip_file)

    file = open("zip_file_list.txt", 'r')
    lines = file.read().splitlines()

    for line in lines:
        orbitsDateListContainsDuplicates.append(line[17:25])

    orbitsDateList = []
    for date in orbitsDateListContainsDuplicates:
        if date not in orbitsDateList:
            orbitsDateList.append(date)

    return orbitsDateList


def download(dest_dir, url):
    print(url)
    print(dest_dir)
    headers = {
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
        "Accept-Encoding": "gzip, deflate, br",
        "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
        "Connection": "keep-alive",
        "Cookie": "_ga_XCPHL9DW7E=GS1.1.1696731884.3.1.1696732536.0.0.0; _ga=GA1.2.1813036397.1695265545; "
                  "_ce.s=v~998cce5ddc7dea52e816b862b1f59664c97255b0~lcw~1696732434540~vpv~2~v11.fhb~1696731892368~v11"
                  ".lhb~1696732494573~lcw~1696732494574; "
                  "asf-urs=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9"
                  ".eyJmaXJzdF9uYW1lIjoiQ2hlbnpoaWhhbyIsImxhc3RfbmFtZSI6IlFpYW4iLCJ1cnMtdXNlci1pZCI6Imdpc2VyY2huIiwidXJzLWFjY2Vzcy10b2tlbiI6ImV5SjBlWEFpT2lKS1YxUWlMQ0p2Y21sbmFXNGlPaUpGWVhKMGFHUmhkR0VnVEc5bmFXNGlMQ0p6YVdjaU9pSmxaR3hxZDNSd2RXSnJaWGxmYjNCeklpd2lZV3huSWpvaVVsTXlOVFlpZlEuZXlKMGVYQmxJam9pVDBGMWRHZ2lMQ0pqYkdsbGJuUmZhV1FpT2lKQ1QxOXVOMjVVU1d4TmJHcGtkbFUyYTFKU1FqTm5JaXdpWlhod0lqb3hOams1TlRnNE5UY3pMQ0pwWVhRaU9qRTJPVFk1T1RZMU56TXNJbWx6Y3lJNklrVmhjblJvWkdGMFlTQk1iMmRwYmlJc0luVnBaQ0k2SW1kcGMyVnlZMmh1SW4wLnF6V1hnQmhJblBQWkZuXy00Vkd0dW5abEZCVUZNVkxwMHM1cE0tMXlRZ0VGR2piM21MekQtV1hpaWFHbzlxWkFJN3lRSVFHWUwwRS1Ja2VUNEhGU1ZRZl9QWU1SdG5hN3JYZU5PQmNWTWlGWFhucmk5Ynk4bGI2NWhZNC10U2VpUGs3WVhWREtZWkpKMkJzRWNVWG5XSXZzS05oanQtcjRubmJ2RVRDVnBFUGExUlZJbm1RTEpLS3ZpVFl1R0ItMFIxM2d6enlRc0xLV1BCWmFVQ2JVM3VNTUdRYlFZS1dxWkJRZXF0bGtDN2FMMVpKbnQ1aW8tcmtoMjNSa0pUbHR3NGk0RkNKOE1HVWp5eDdxUXMxSGNfX2pDUDV6eHBJQ1RBZ1VUVnZIbUhjaUI0MWpDcFlZYkplNGttb1kwb2pRY0wzSG83Q2hoY3VNYkp6QUZpQXhvQSIsInVycy1ncm91cHMiOltdLCJpYXQiOjE2OTY5OTY1NzQsImV4cCI6MTY5NzYwMTM3NH0.T3w_cX26656amley8u6brpP3-QM8hEfsbXO1ZVvHHYSluKkiLnPBbHjFFdm0l7pbTgpN8hoafq0ZImb_oTBNCb5RNaSCMY8OqV6Yw7AeRT8lFE1ZH9IewGfHt_LbMkfrQYeR0KrQRObEjT8xUdyESWJDPSOvNywqXYlncnjKxceIK0fxaHtBsBJmKK4gZOiYJVUc6WXmGPxZAtb7wPmPb7YjcqFFnxfJbdkEUBqXh_uwDU000thixMuDN2_tfuxT4tDML5v6CsYMNhq4CxSq5FYzfyxftPhLTqS2yGQsG-xGs-9PuGCSLwrQjxxh89Birk0PlVsbExGVS9HLikEyFAl23dh-3J5lOE-T7ZdKcm3G5a0J5oGuul8SGZTe91S4X7RELSomkEMhH5mHk1zTQ_DW7UC1p5RUKD0H7uLr6IrAMHEsPbovl11VsaKtLl5SIuTqpuEyXolx5GR6Us-DUEu9F-HD3HCbYL5AMyrKPI0NI29SU_YhNZ6jlxOmaSujzWH3nAI7p5wbZib6tf1PU6M_ih3LJSter7bD3lmj43yttA7MNVGsZGZ8z3xKJdNikA0CBrYotBd3dUSKSBhuN69HVpIxQ1_X9fq_QdVGQSo--vnbI9llLO4lcMDac3LiBD7Ky7aU64FwumYuvDtpVhO2j5AxMwMMKLFfvsIBcmY; urs-user-id=giserchn; urs-access-token=eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIjoiT0F1dGgiLCJjbGllbnRfaWQiOiJCT19uN25USWxNbGpkdlU2a1JSQjNnIiwiZXhwIjoxNjk5NTg4NTczLCJpYXQiOjE2OTY5OTY1NzMsImlzcyI6IkVhcnRoZGF0YSBMb2dpbiIsInVpZCI6Imdpc2VyY2huIn0.qzWXgBhInPPZFn_-4VGtunZlFBUFMVLp0s5pM-1yQgEFGjb3mLzD-WXiiaGo9qZAI7yQIQGYL0E-IkeT4HFSVQf_PYMRtna7rXeNOBcVMiFXXnri9by8lb65hY4-tSeiPk7YXVDKYZJJ2BsEcUXnWIvsKNhjt-r4nnbvETCVpEPa1RVInmQLJKKviTYuGB-0R13gzzyQsLKWPBZaUCbU3uMMGQbQYKWqZBQeqtlkC7aL1ZJnt5io-rkh23RkJTltw4i4FCJ8MGUjyx7qQs1Hc__jCP5zxpICTAgUTVvHmHciB41jCpYYbJe4kmoY0ojQcL3Ho7ChhcuMbJzAFiAxoA",
        "Host": "s1qc.asf.alaska.edu",
        "Sec-Fetch-Dest": "document",
        "Sec-Fetch-Mode": "navigate",
        "Sec-Fetch-Site": "none",
        "Sec-Fetch-User": "?1",
        "Upgrade-Insecure-Requests": "1",
        "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0",

    }
    try:
        request = urllib.request.Request(url, headers=headers)
        response = urllib.request.urlopen(request)
        f = open(dest_dir, "w")
        lines = response.readlines()
        for line in lines:
            f.write(line.decode())
        f.close()
    except:
        error_url.append(url)
        print("\tError retrieving the URL:", dest_dir)
    else:
        if url in error_url:
            error_url.remove(url)


if __name__ == '__main__':
    current_path = sys.path[0]
    slc_path = "\slc"
    orbits_path = "\orbits"
    cookie_txt_file_path = sys.path[0] + "\cookie.txt"

    orbitsList = []
    wrongDownloadOrbitsList = []

    root = tk.Tk()
    root.withdraw()
    error_url = []

    cookie_path = cookie_txt_file_path
    out_path = current_path + orbits_path

    url_param_json = {}
    url_param_json['sentinel1__mission'] = 'S1A'

    date = '2015-01-01'
    url_param_json['validity_start'] = date

    url_param = urllib.parse.urlencode(url_param_json)
    url = 'https://s1qc.asf.alaska.edu/aux_poeorb/?%s' % url_param
    html = requests.get(url).content
    dom = BeautifulSoup(html, "lxml")
    a_list = dom.findAll("a")
    eof_lists = [a['href'] for a in a_list if a['href'].endswith('.EOF')]
    TimeArray = []
    NeedTimeArray = readSLCFileName(current_path, slc_path)
    fileIndex = 1
    for eof in eof_lists:
        if os.path.splitext(eof)[1] == ".EOF" and os.path.basename(eof)[0:3] == 'S1A':
            SplitEOF = re.split(r'[_,.,\s ]\s*', eof)
            SplitTime = SplitEOF[-2]
            Time = parse(SplitTime)
            NeedTime = Time + datetime.timedelta(days=-1)
            NeedTimeNum = (re.sub('[-,:, ]', '', str(NeedTime)))[0:8]
            if NeedTimeNum in str(NeedTimeArray):
                TimeArray.append(NeedTimeNum)
                savefile = os.path.join(out_path, eof)
                download(savefile, 'https://s1qc.asf.alaska.edu/aux_poeorb/' + eof)
                print("------------------------------------")
                print("(" + str(fileIndex) + "/" + str(len(NeedTimeArray)) + ")" + " Start downloading the Precision track files: " + eof)
                print("The precision track data: " + eof + "download is complete.")
                print("------------------------------------")
                fileIndex += 1
                if len(TimeArray) == len(NeedTimeArray):
                    print("The required precision track data is downloaded, totaling %d files" % (len(TimeArray)))
                    print("------------------------------------")
                    break
            else:
                continue

    downloadWrongOribits(error_url, out_path)

    orbitsList = os.listdir(current_path + orbits_path)
    for orbitsFile in orbitsList:
        size = getFileSize(orbitsFile)
        if size[1] != 'M':
            wrongDownloadOrbitsList.append(orbitsFile)
        elif float(size[0]) < 4.0:
            wrongDownloadOrbitsList.append(orbitsFile)

    downloadWrongOribits(wrongDownloadOrbitsList, out_path)

    timeend = datetime.datetime.now()
    print('Running time: %s Seconds' % (timeend - timestart))

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值