批量下载swf文件

1.判断哪些url可用,并保存到本地

# 判断哪些url是存在的
import requests
import pandas as pd

def check_url_status(base_url, start, end):
    valid_urls = []

    for i in range(start, end + 1):
        url = f"{base_url}{i}.swf"
        try:
            response = requests.head(url, timeout=5)
            if response.status_code == 200:
                valid_urls.append(url)
                print(f"URL {url} is valid.")
            else:
                pass
#                 print(f"URL {url} returned status code {response.status_code}.")
        except requests.RequestException as e:
            print(f"Failed to reach {url}: {e}")

    return valid_urls

def save_to_csv(urls, filename):
    df = pd.DataFrame(urls, columns=["Valid URLs"])
    df.to_csv(filename, index=False)
    print(f"Saved valid URLs to {filename}.")

base_url = "http://mole.61.com/resource/map/"
start = 101
end = 200
filename = r"F:\临时文件夹\valid_urls2.csv"

valid_urls = check_url_status(base_url, start, end)
save_to_csv(valid_urls, filename)

2.将url对应的swf文件下载到本地

#从上一个cell里获得的CSV下载所有可用的URL
import os
import requests
import pandas as pd

# 从CSV文件中读取URL列表
def read_urls_from_csv(filename):
    df = pd.read_csv(filename)
    return df['Valid URLs'].tolist()

# 下载SWF文件
def download_swf(url, save_path):
    local_filename = os.path.join(save_path, url.split("/")[-1])
    try:
        with requests.get(url, stream=True) as r:
            r.raise_for_status()
            with open(local_filename, 'wb') as f:
                for chunk in r.iter_content(chunk_size=8192): 
                    f.write(chunk)
        print(f"Downloaded {url} to {local_filename}")
    except requests.RequestException as e:
        print(f"Failed to download {url}: {e}")

# 主函数
def main(csv_filename, save_path):
    # 创建保存路径文件夹(如果不存在)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    # 读取CSV文件中的URL
    urls = read_urls_from_csv(csv_filename)

    # 下载每个URL对应的SWF文件
    for url in urls:
        download_swf(url, save_path)

# 设置参数
csv_filename = r"F:/临时文件夹/valid_urls.csv"  # CSV文件路径
save_path = r"F:/临时文件夹/素材"  # 下载文件保存路径

# 执行下载
main(csv_filename, save_path)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值