编写python脚本：给定GitHub网址，查询网址下所有的repo，全部Git clone下来。如果中间有失败，则重试

llrraa2010

已于 2024-08-30 00:14:25 修改

阅读量278

点赞数 2

分类专栏： github 大模型文章标签： python github git

于 2024-08-25 23:56:31 首次发布

本文链接：https://blog.csdn.net/llrraa2010/article/details/141537364

版权

github 同时被 2 个专栏收录

2 篇文章 0 订阅

订阅专栏

大模型

2 篇文章 0 订阅

订阅专栏

编写一个Python脚本，以GitHub网址为输入，查询并克隆该用户或组织下的所有仓库，如果克隆失败则重试，可以使用requests库来查询GitHub API获取仓库列表，然后使用git命令行工具来克隆仓库。以下是一个示例脚本：

import requests
import os
import subprocess
import requests
import time

# 其他代码保持不变

# 替换为你的GitHub用户名或组织名
GITHUB_USERNAME = "iExecBlockchainComputing"

# 函数：发送GET请求到GitHub API
def get_github_data(url, params=None, max_retries=222, retry_delay=222):
    retries = 0
    while retries < max_retries:
        try:
            response = requests.get(url, params=params)
            if response.status_code == 200:
                return response.json()
            else:
                print(f"GitHub API request failed with status code {response.status_code}")
        except requests.RequestException as e:
            print(f"An error occurred: {e}")
        retries += 1
        time.sleep(retry_delay)  # 等待一段时间后重试
    raise Exception(f"Max retries reached for URL: {url}")


# GitHub API基本URL
GITHUB_API_BASE_URL = "https://api.github.com"


# 函数：获取所有仓库的列表
def get_repositories():
    url = f"{GITHUB_API_BASE_URL}/users/{GITHUB_USERNAME}/repos"
    page_count = 1
    
    repos = []
    
    while url:
        params = {'type': 'all', 'sort': 'updated', 'per_page': 100, 'page': page_count}
        response_data = get_github_data(url, params)
        if response_data is not None:
            repos.extend([repo['clone_url'] for repo in response_data])
            print(repos)
            # GitHub API分页处理
            # if 'next' in response_data.links.keys():
            if len(response_data)==100:
                # url = response_data.links['next']['url']
                page_count += 1
            else:
                url = None
        else:
            # 如果请求失败，等待一段时间后重试
            print(f"time.sleep(220)  # 等待1分钟")
            time.sleep(220)  # 等待1分钟
    return repos

# 函数：克隆仓库
def clone_repository(repo_url):
    try:
        subprocess.check_call(['git', 'clone', repo_url])
        # subprocess.check_call(['git', 'clone', repo_url], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        print(f"Successfully cloned {repo_url}")
    except subprocess.CalledProcessError:
        print(f"Failed to clone {repo_url}, retrying...")
        time.sleep(211)  # 等待5秒后重试
        clone_repository(repo_url)  # 递归调用进行重试

# 主逻辑
if __name__ == "__main__":
    # 获取所有仓库的URL列表
    repos = get_repositories()
    output_file = 'repositories.txt'  # 指定输出文件名
    with open(output_file, 'a') as f:
        for repo_url in repos:
            print(repo_url, file=f)  # 写入仓库URL到文件    # 克隆每个仓库
    for repo_url in repos:
        clone_repository(repo_url)