以下代码实现:
import asyncio
from collections import Counter
from http import HTTPStatus
from pathlib import Path
from typing import Callable
import httpx
import tqdm
import time
DEFAULT_COUCUR_REQ = 5
MAX_CONCUR_REQ = 1000
POP20_CC = ("CH IN US ID BR").split()
BASE_URL = "http://mp.ituring.com.cn/files/flags"
DEST_DIR = Path("download")
DownloadStatus = {"NOT_FOUND": "not found"}
def save_flag(img: bytes, filename: str) -> None:
(DEST_DIR / filename).write_bytes(img)
async def get_flag(client: httpx.AsyncClient, basr_url: str, cc: str) -> bytes:
url = f"{basr_url}/{cc}/{cc}.gif".lower()
resp = await client.get(url, timeout=3.1, follow_redirects=True)
resp.raise_for_status()
return resp.content
async def down_one(
client: httpx.AsyncClient,
cc: str,
base_url: str,
semaphore: asyncio.Semaphore,
verbose: bool,
) -> DownloadStatus:
try:
async with semaphore:
image = get_flag(client, base_url, cc)
except httpx.HTTPStatusError as exc:
res = exc.response
if res.status_code == HTTPStatus.NOT_FOUND:
status = DownloadStatus.NOT_FOUND
mag = f"not found: {res.url}"
else:
raise
else:
await asyncio.to_thread(save_flag, image, f"{cc}.gif")
status = DownloadStatus.OK
msg = f"ok"
if verbose and msg:
print(cc, msg)
return status
async def supervisor(
cc_list: list[str], base_url: str, verbose: bool, concur_req: int
) -> Counter[DownloadStatus]:
Counter: Counter[DownloadStatus] = Counter()
semaphore = asyncio.Semaphore(concur_req)
async with httpx.AsyncClient() as client:
to_do = [
down_one(client, cc, base_url, semaphore, verbose) for cc in sorted(cc_list)
]
to_do_iter = asyncio.as_completed(to_do)
if not verbose:
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list))
error: httpx.HTTPError | None = None
for coro in to_do_iter:
try:
status = await coro
except httpx.HTTPStatusError as exc:
error_msg = "HTTP error {resp.status_code} - {resp.reason_phrase}"
error_msg = error_msg.format(resp=exc.response)
error = exc
except httpx.RequestError as exc:
error_msg = f"{exc} {type(exc)}".strip()
error = exc
except KeyboardInterrupt:
break
if error:
status = DownloadStatus.ERROR
if verbose:
url = str(error.request.url)
cc = Path(url).stem.upper()
print(f"{cc} error: {error_msg}")
counter[status] += 1
return counter
def download_many(
cc_list: list[str], base_url: str, verbose: bool, concur_req: int
) -> Counter[DownloadStatus]:
coro = supervisor(cc_list, base_url, verbose, concur_req)
conuts = asyncio.run(coro)
def main(downloader: Callable[[list[str]], int]) -> None:
DEST_DIR.mkdir(exist_ok=True)
t0 = time.perf_counter()
count = downloader(POP20_CC)
elapsed = time.perf_counter() - t0
print(f"\n{count} downlaod in {elapsed:.2f}s")
if __name__ == "__main__":
main(download_many, DEFAULT_COUCUR_REQ, MAX_CONCUR_REQ)