import os
import requests
import json
import time
import re
from urllib.parse import urljoin, urlparse
import concurrent.futures
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore") # 禁用SSL警告
class TilesetDownloader:
def __init__(self, base_url, output_dir, max_workers=5):
self.base_url = base_url.rstrip('/') + '/'
self.output_dir = output_dir
self.max_workers = max_workers
self.session = requests.Session()
self.session.verify = False
self.session.headers.update({'authorization':'Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJqdGkiOiJlZWI1M2I1ZS00OTY3LTQ4N2YtOTRkMy00OTI5MGY0NDViMWIiLCJpZCI6MjU5LCJhc3NldElkIjo0MDg2NiwiYXNzZXRzIjp7IjQwODY2Ijp7InR5cGUiOiIzRFRJTEVTIn19LCJzcmMiOiI3ODZkMDQzOS03ZGJjLTQzZWUtYjlmYy04ZmM5Y2UwNzNhMmYiLCJpYXQiOjE3NDczMjMxNzUsImV4cCI6MTc0NzMyNjc3NX0.CHaBLjc4cspTYeZvocdkaiNKILaqbJcX5xMDDaaACyE','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36'})
self.downloaded_files = set()
self.failed_files = set()
os.makedirs(self.output_dir, exist_ok=True)
def resolve_relative_url(self, base_url, relative_path):
"""正确处理各种相对路径情况"""
if relative_path.startswith(('http://', 'https://')):
return relative_path
# 处理../等相对路径
if relative_path.startswith('../'):
base_dir = os.path.dirname(base_url.rstrip('/'))
while relative_path.startswith('../'):
base_dir = os.path.dirname(base_dir)
relative_path = relative_path[3:]
return f"{base_dir}/{relative_path}"
# 正常相对路径
return urljoin(base_url, relative_path)
def safe_local_path(self, url):
"""生成安全的本地文件路径,保持原始目录结构"""
parsed = urlparse(url)
path = parsed.path.lstrip('/')
# 处理特殊字符但保留路径结构
safe_path = re.sub(r'[<>:"|?*]', '_', path)
return os.path.normpath(os.path.join(self.output_dir, safe_path))
def download_file(self, url, local_path=None, max_retries=3):
if url in self.downloaded_files:
return True
if not local_path:
local_path = self.safe_local_path(url)
for attempt in range(max_retries):
try:
response = self.session.get(url, stream=True, timeout=(30, 60))
response.raise_for_status()
os.makedirs(os.path.dirname(local_path), exist_ok=True)
total_size = int(response.headers.get('content-length', 0))
with open(local_path, 'wb') as f, tqdm(
desc=os.path.basename(local_path)[:30],
total=total_size,
unit='B',
unit_scale=True,
leave=False
) as pbar:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
pbar.update(len(chunk))
self.downloaded_files.add(url)
return True
except Exception as e:
if attempt == max_retries - 1:
print(f"\nFailed after {max_retries} attempts: {url}")
self.failed_files.add(url)
return False
time.sleep(2 ** attempt) # 指数退避
def process_tileset(self, tileset_url, parent_dir=None):
print(f"\nProcessing: {tileset_url}")
# 确定当前tileset的基础URL
current_base = parent_dir if parent_dir else os.path.dirname(tileset_url) + '/'
local_path = self.safe_local_path(tileset_url)
if not self.download_file(tileset_url, local_path):
return
try:
with open(local_path, 'r', encoding='utf-8') as f:
content = f.read().strip().rstrip(',')
tileset = json.loads(content)
except Exception as e:
print(f"JSON解析失败: {local_path} - {str(e)}")
return
# 收集所有需要下载的文件
files_to_download = set()
def collect_uris(obj, base):
if isinstance(obj, dict):
for k, v in obj.items():
if k == 'uri' and isinstance(v, str) and v:
if not v.startswith(('http://', 'https://')):
resolved_url = self.resolve_relative_url(base, v)
files_to_download.add(resolved_url)
else:
collect_uris(v, base)
elif isinstance(obj, list):
for item in obj:
collect_uris(item, base)
collect_uris(tileset, current_base)
# 分类处理
external_tilesets = {f for f in files_to_download if f.endswith('.json')}
resource_files = files_to_download - external_tilesets
# 先递归处理外部tileset
for ext_url in external_tilesets:
if ext_url not in self.downloaded_files:
self.process_tileset(ext_url, os.path.dirname(ext_url) + '/')
# 并发下载资源文件
with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
futures = []
for file_url in resource_files:
if file_url not in self.downloaded_files:
futures.append(executor.submit(
self.download_file,
file_url,
self.safe_local_path(file_url)
))
print(f"Downloading {len(futures)} resources...")
for _ in tqdm(concurrent.futures.as_completed(futures),
total=len(futures),
desc="Progress"):
pass
def main():
# 目标数据配置
config = {
"index_url": "https://assets.ion.cesium.com/ap-northeast-1/40866/tileset.json?v=2",
"base_url": "https://assets.ion.cesium.com/ap-northeast-1/40866/",
"output_dir": "./cesium_data",
"max_workers": 8
}
print("="*60)
print("3DTiles Downloader - 专业版".center(60))
print("="*60)
print(f"索引文件: {config['index_url']}")
print(f"输出目录: {os.path.abspath(config['output_dir'])}")
print(f"并发数: {config['max_workers']}")
print("="*60 + "\n")
downloader = TilesetDownloader(
config["base_url"],
config["output_dir"],
config["max_workers"]
)
try:
start_time = time.time()
downloader.process_tileset(config["index_url"])
print("\n" + "="*60)
print("下载摘要".center(60))
print("="*60)
print(f"成功下载: {len(downloader.downloaded_files)} 个文件")
print(f"失败文件: {len(downloader.failed_files)} 个")
print(f"耗时: {time.time() - start_time:.2f} 秒")
if downloader.failed_files:
failed_file = "failed_downloads.txt"
with open(failed_file, 'w') as f:
f.write("\n".join(sorted(downloader.failed_files)))
print(f"\n失败URL已保存到: {failed_file}")
except KeyboardInterrupt:
print("\n用户中断下载!已下载文件保留。")
except Exception as e:
print(f"\n发生未预期错误: {str(e)}")
if __name__ == '__main__':
main()