1.判断哪些url可用,并保存到本地
import requests
import pandas as pd
def check_url_status(base_url, start, end):
valid_urls = []
for i in range(start, end + 1):
url = f"{base_url}{i}.swf"
try:
response = requests.head(url, timeout=5)
if response.status_code == 200:
valid_urls.append(url)
print(f"URL {url} is valid.")
else:
pass
except requests.RequestException as e:
print(f"Failed to reach {url}: {e}")
return valid_urls
def save_to_csv(urls, filename):
df = pd.DataFrame(urls, columns=["Valid URLs"])
df.to_csv(filename, index=False)
print(f"Saved valid URLs to {filename}.")
base_url = "http://mole.61.com/resource/map/"
start = 101
end = 200
filename = r"F:\临时文件夹\valid_urls2.csv"
valid_urls = check_url_status(base_url, start, end)
save_to_csv(valid_urls, filename)
2.将url对应的swf文件下载到本地
import os
import requests
import pandas as pd
def read_urls_from_csv(filename):
df = pd.read_csv(filename)
return df['Valid URLs'].tolist()
def download_swf(url, save_path):
local_filename = os.path.join(save_path, url.split("/")[-1])
try:
with requests.get(url, stream=True) as r:
r.raise_for_status()
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
print(f"Downloaded {url} to {local_filename}")
except requests.RequestException as e:
print(f"Failed to download {url}: {e}")
def main(csv_filename, save_path):
if not os.path.exists(save_path):
os.makedirs(save_path)
urls = read_urls_from_csv(csv_filename)
for url in urls:
download_swf(url, save_path)
csv_filename = r"F:/临时文件夹/valid_urls.csv"
save_path = r"F:/临时文件夹/素材"
main(csv_filename, save_path)