最近公司需要从Gitlab迁移回Bitbucket, 几百个项目, 手工操作太费劲, 在我的gpt4帮助下,编写了自动化同步脚本,一键完成迁移, AI提升效率.
具体流程如下,供参考:
1-从GItlab读取有权限的项目信息
前提条件:python环境,安装相关包 pip install requests pandas loguru -i https://pypi.tuna.tsinghua.edu.cn/simple
申请gitlab ACCESS_TOKEN,并添加相关仓库权限, 修改下面代码的参数 ACCESS_TOKEN = ""
https://gitlab.xx.xyz/-/profile/personal_access_tokens
1-将Gitlab项目同步到Bitbucket仓库,包括根据Giltab仓库名称自动创建Bitbucket新的仓库以及同步Gitlab项目上所有的分支和tag到Bitbucket
前提条件:python环境
代码运行的服务器的ssh key添加到gitlab和bitbucket
服务器生成ssh key:参考https://blog.csdn.net/qq_58838490/article/details/132834327
import requests
import pandas as pd
from datetime import datetime
ACCESS_TOKEN = "" # 替换这里的 YOUR_ACCESS_TOKEN 和 YOUR_GITLAB_DOMAIN 为您的访问令牌和 GitLab 域名
GITLAB_API_URL = "https://gitlab.xxx.xyz/api/v4/projects"
def get_project_details(token):
headers = {"PRIVATE-TOKEN": token}
params = {
"per_page": "100",
"with_custom_attributes": "true" # 获取自定义属性,如空间大小等
}
all_projects = []
page = 1
while True:
params['page'] = page
response = requests.get(GITLAB_API_URL, headers=headers, params=params)
response.raise_for_status()
data = response.json()
if not data:
break
all_projects.extend(data)
page += 1
return all_projects
def get_project_members(project_id, token):
project_members_url = f"https://gitlab.xxx.xyz/api/v4/projects/{project_id}/members/all"
response = requests.get(project_members_url, headers={"PRIVATE-TOKEN": token})
response.raise_for_status()
members = response.json()
member_names = [member['name'] for member in members]
return ', '.join(member_names)
def create_dataframe(projects, token):
projects_data = []
for project in projects:
members = get_project_members(project['id'], token)
projects_data.append({
"git_url": project['ssh_url_to_repo'],
"member": members,
"project": project['ssh_url_to_repo'].split(':', 1)[1].split('/', 1)[0],
"creator": project['creator_id'], # You may want to map this ID to a user name
"create_time": project['created_at'],
"last_update": project['last_activity_at'],
roject else 'N/A' # Uncomment if storage size is a custom attribute
})
return pd.DataFrame(projects_data)
if __name__ == "__main__":
projects = get_project_details(ACCESS_TOKEN)
df = create_dataframe(projects, ACCESS_TOKEN)
# Save DataFrame to CSV
date_suffix = datetime.now().strftime("%Y%m%d")
csv_filename = f"gitlab_projects_{date_suffix}.csv"
df.to_csv(csv_filename, index=False)
2-将Gitlab项目同步到Bitbucket仓库,包括根据Giltab仓库名称自动创建Bitbucket新的仓库以及同步Gitlab项目上所有的分支和tag到Bitbucket
前提条件:python环境
代码运行的服务器的ssh key添加到gitlab和bitbucket
服务器生成ssh key:参考https://blog.csdn.net/qq_58838490/article/details/132834327
git config --global user.name "登录账户名"
git config --global user.email "邮箱"
ssh-keygen -t rsa -C "邮箱"
cat ~/.ssh/id_rsa.pub
Gitlab
https://gitlab.xx.xyz/-/profile/keys
Bitbucket
https://bitbucket.org/account/settings/ssh-keys/
新建一个你有权限的项目: 如bitbucket_project = "PROJECT"
申请Bitbucket的app应用密码 https://bitbucket.org/account/settings/app-passwords/
修改下面代码的如下参数:
bitbucket_workspace = "xx"
bitbucket_project = "xx" ## 填写你的项目名
username = "xxx" ## 填写你的用户名
app_password = "xxx" # 请在这里填写你的应用密码
import os
import subprocess
import requests
from urllib.parse import urljoin
import pandas as pd
from datetime import datetime
from loguru import logger
date_suffix = datetime.now().strftime("%Y%m%d")
csv_filename = f"gitlab_projects_{date_suffix}.csv" ## 上一步保存的gitlab 仓库信息csv
DOWNLOAD_DIR="./"
log_file = os.path.join(DOWNLOAD_DIR, f'{csv_filename}.log')
logger.add(log_file, rotation="10 MB", encoding='utf-8')
# 读取CSV文件
df = pd.read_csv(csv_filename)
# 选择project列等于xic的记录
# df_xic_repo = df[df['project'] == 'xx']
# 配置参数
df_xic_repo=df[df['project'].isin([ 'project_name'])] ## 填写你要同步的Gitlab项目名
grouped_counts = df_xic_repo.groupby('project').size()
logger.info(grouped_counts)
gitlab_repos= df_xic_repo['git_url'].dropna().tolist()
logger.info(f"totoal {len(gitlab_repos)} repos need to be synced!")
logger.info(gitlab_repos)
# gitlab_repos = [
# # "git@gitlab.xx.xyz:xic/xic-xx.git",
# "git@gitlab.xx.xyz:algorithm/xx.git",
# ] # 你也可以直接手动填写要同步的项目list
bitbucket_workspace = "xtalpi"
bitbucket_project = "XIC" ## 填写你的项目名
username = "username" ## 填写你的用户名
app_password = "xxx" # 请在这里填写你的应用密码
base_dir = "/data/project/gitlab" ## 代码存储和push的临时目录
bitbucket_api_url = "https://api.bitbucket.org/2.0/repositories/"
# 日志函数
def log_message(message, log_file="sync.log"):
with open(log_file, "a") as f:
f.write(message + "\n")
print(message)
# 创建或更新本地仓库
def clone_or_pull(repo_url):
repo_name = os.path.basename(repo_url[:-4])
repo_path = os.path.join(base_dir, repo_name)
if os.path.isdir(repo_path):
log_message(f"Updating existing repository: {repo_name}")
subprocess.run(["git", "pull"], cwd=repo_path)
else:
log_message(f"Cloning new repository: {repo_name}")
subprocess.run(["git", "clone", repo_url], cwd=base_dir)
# 检查 Bitbucket 仓库是否存在
def check_bitbucket_repo(repo_slug):
response = requests.get(urljoin(bitbucket_api_url, f"{bitbucket_workspace}/{repo_slug}"),
auth=(username, app_password)) #, proxies=proxies
return response.status_code == 200
# 创建 Bitbucket 仓库
def create_bitbucket_repo(repo_name, repo_slug):
data = {
"scm": "git",
"description": f"Description for {repo_name}",
"is_private": True,
"project": {"key": bitbucket_project}
}
response = requests.post(urljoin(bitbucket_api_url, f"{bitbucket_workspace}/{repo_slug}"),
json=data, auth=(username, app_password)) #, proxies=proxies
if response.status_code in [200, 201]:
log_message(f"Repository '{repo_slug}' successfully created.")
else:
log_message(f"Failed to create repository: {response.json()}")
def push_to_bitbucket(repo_name, repo_slug, base_dir, bitbucket_workspace):
repo_path = os.path.join(base_dir, repo_name)
# 获取当前的远程仓库列表
remotes = subprocess.check_output(["git", "remote"], cwd=repo_path).decode().splitlines()
# 如果 'bitbucket' 远程仓库已存在,先移除它
if 'bitbucket' in remotes:
subprocess.run(["git", "remote", "remove", "bitbucket"], cwd=repo_path)
# 添加 Bitbucket 作为新的远程仓库
subprocess.run(["git", "remote", "add", "bitbucket", f"git@bitbucket.org:{bitbucket_workspace}/{repo_slug}.git"], cwd=repo_path)
# 获取最新的分支和标签
subprocess.run(["git", "fetch", "--all"], cwd=repo_path)
# 列出所有远程分支并建立跟踪关系
remote_branches = subprocess.check_output(["git", "branch", "-r"], cwd=repo_path).decode().splitlines()
log_message("GitLab remote branches:")
for branch in remote_branches:
if '->' not in branch: # 过滤掉 HEAD -> origin/master 这样的指向
branch_name = branch.strip().split('/')[1]
log_message(branch_name)
# 检查本地分支是否存在
local_branches = subprocess.check_output(["git", "branch"], cwd=repo_path).decode().splitlines()
if branch_name not in local_branches:
# 建立与远程分支的跟踪关系
subprocess.run(["git", "branch", "--track", branch_name, branch.strip()], cwd=repo_path)
# 列出所有远程标签
remote_tags = subprocess.check_output(["git", "tag", "-l"], cwd=repo_path).decode().splitlines()
log_message("GitLab remote tags:")
for tag in remote_tags:
log_message(tag)
# 推送所有跟踪的分支到 Bitbucket
push_branches_result = subprocess.run(["git", "push", "bitbucket", "--all"], cwd=repo_path)
if push_branches_result.returncode != 0:
log_message(f"Failed to push branches of GitLab repository '{repo_name}' to Bitbucket.")
return # 如果推送分支失败,则不继续尝试推送标签
# 推送所有标签到 Bitbucket
push_tags_result = subprocess.run(["git", "push", "bitbucket", "--tags"], cwd=repo_path)
if push_tags_result.returncode != 0:
log_message(f"Failed to push tags of GitLab repository '{repo_name}' to Bitbucket.")
return
# 如果两次推送都成功了,则记录成功信息
log_message(f"GitLab repository '{repo_name}' successfully pushed to Bitbucket.")
# 主流程
def main():
os.makedirs(base_dir, exist_ok=True)
for index, repo_url in enumerate(gitlab_repos, start=1):
# 这里的start=1意味着序号从1开始,如果你希望从0开始可以省略start参数
logger.info(f"sync repository {index}:{repo_url}")
repo_name = os.path.basename(repo_url[:-4])
repo_slug = repo_name.replace("_", "-").lower()
if not check_bitbucket_repo(repo_slug):
clone_or_pull(repo_url)
create_bitbucket_repo(repo_name, repo_slug)
push_to_bitbucket(repo_name, repo_slug, base_dir, bitbucket_workspace)
else:
log_message(f"{repo_slug} exists!")
clone_or_pull(repo_url)
push_to_bitbucket(repo_name, repo_slug, base_dir, bitbucket_workspace)
# push_to_bitbucket(repo_name, repo_slug, base_dir, bitbucket_workspace)
log_message(f"All {len(gitlab_repos)} repositories have been synchronized.")
if __name__ == "__main__":
main()
#