python 获取gitlab commit 代码提交信息

pip依赖

PyMySQL==1.0.3
python_gitlab==3.14.0
Requests==2.31.0

思路

根据提供的git库(project id)和需要统计的分支(branch_name),通过python-gitlab 的API接口获取所有的commit id信息,进而通过commit id获取每次提交的详细内容

踩坑

1、判断分支是否存在&&获取所有分支信息

def check_branch_exist(pid,branch_name):    # 判断分支是否存在
    branch_data = []
    page = 0
    while  True:
        page += 1
        per_page = 100
        project = gl.projects.get(pid)
        branchs = project.branches.list(page=page, per_page=per_page)
        for branch in branchs:
                branch_data.append(branch.name)
        if branchs == []:
            break
    try:
        if branch_name in branch_data:
            return True
        return False
    except gitlab.GitlabGetError as e:
        print(f"无法获取项目或分支信息:{e}")
        return False
    except Exception as e:
        print(f"发生错误:{e}")
        return False

如果git项目分支数量较多的话仅通过all=True 不能完全获取(gitlab API中有限制)必须使用分页获取,所以这里使用一个死循环进行分页获取直至返回为空时跳出

2、移除接口返回时间中的时区信息

def time_without_timezone(date):    #移除时间中的时区信息
    time = date
    output_datetime_str = re.sub(r'[+-]\d{2}:\d{2}', '', time)
    return output_datetime_str

由于mysql中datetime类型不能存入时区信息所以使用正则匹配并删除了时区信息

完整代码

import gitlab
from datetime import datetime
import calendar
import pymysql
import re

# 获取当前时间
now = datetime.now()
year = now.year
month = now.month - 1 # 统计上一个月
# month = now.month    # 统计本月
day = calendar.monthrange(year, month)[1]
# yesterday = calendar.monthrange(year, month)[1] - 1

def git_address_to_number(git_address):     #根据git地址返回对应数字编号
    if git_address == 'git.gitlab.com' or git_address == 'http://git.gitlab.com/':
        return '1'
    elif git_address == 'git2.gitlab.com' or git_address == 'http://git2.gitlab.com/':
        return '2'
    else :
        return '3'

def time_without_timezone(date):    #移除时间中的地区信息
    time = date
    output_datetime_str = re.sub(r'[+-]\d{2}:\d{2}', '', time)
    return output_datetime_str

def check_pid_exist(pid): # 判断项目id是否存在
    try:
        project = gl.projects.get(pid)
        print(project)
        return True
    except gitlab.exceptions.GitlabGetError as e:
        print(f"{pid},{e.error_message}")
        return False
        pass

def check_branch_exist(pid,branch_name):    # 判断分支是否存在
    branch_data = []
    page = 0
    while  True:
        page += 1
        per_page = 100
        project = gl.projects.get(pid)
        branchs = project.branches.list(page=page, per_page=per_page)
        for branch in branchs:
                branch_data.append(branch.name)
        if branchs == []:
            break
    try:
        if branch_name in branch_data:
            return True
        return False
    except gitlab.GitlabGetError as e:
        print(f"无法获取项目或分支信息:{e}")
        return False
    except Exception as e:
        print(f"发生错误:{e}")
        return False

def get_commits(pid,branch_name):   #获取代码提交信息
    commit_data = []
    print(pid,branch_name) 
    project = gl.projects.get(pid)
    start_date = datetime(year, month, 1).strftime("%Y-%m-%dT00:00:00Z")  # 月
    end_date = datetime(year, month, 1).replace(day=day).strftime("%Y-%m-%dT23:59:59Z")  # 月
    # start_date = datetime(year, month, 1).replace(day=yesterday).strftime("%Y-%m-%dT00:00:00Z")   # 统计前一日
    # end_date = datetime(year, month, 1).replace(day=yesterday).strftime("%Y-%m-%dT23:59:59Z")     # 统计前一日
    commits = project.commits.list(ref_name = branch_name, since=start_date, until=end_date, get_all=True)
    # 遍历每个提交
    for commit in commits:
        # 获取提交详细信息
        commit_details = project.commits.get(commit.id)
        commit_data.append({
            'gc_commitid' : commit_details.id,
            'gc_commit_short_id' : commit_details.short_id,
            'gc_created_at' : time_without_timezone(commit_details.created_at),
            'gc_parent_ids' : format(commit_details.parent_ids),
            'gc_title' : commit_details.title,
            'gc_message' : commit_details.message,
            'gc_author_name' : commit_details.author_name,
            'gc_author_email' : commit_details.author_email,
            'gc_authored_date' : time_without_timezone(commit_details.authored_date),
            'gc_committer_name' : commit_details.committer_name,
            'gc_committer_email' : commit_details.committer_email,
            'gc_committed_date' : time_without_timezone(commit_details.committed_date),
            'gc_web_url' : commit_details.web_url,
            'gc_additions' : commit_details.stats['additions'],
            'gc_deletions' : commit_details.stats['deletions'],
            'gc_total' : commit_details.stats['total'],
            'gc_status' : commit_details.status,
            'gc_project_id' : commit_details.project_id,
            'gc_last_pipeline' : format(commit_details.last_pipeline),
            'gc_gittype' : git_address_to_number(gitlab_url),
            'gc_branch_name' : branch_name
        })
    return commit_data

def insert_to_mysql(data): # 数据入库
    cursor = mysql_connect.cursor()
    insert_query = "INSERT INTO git_commit (gc_commitid,gc_commit_short_id,gc_created_at,gc_parent_ids,gc_title,gc_message,gc_author_name,gc_author_email,gc_authored_date,gc_committer_name,gc_committer_email,gc_committed_date,gc_web_url,gc_additions,gc_deletions,gc_total,gc_status,gc_project_id,gc_last_pipeline,gc_gittype,gc_branch_name)" \
                    "VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
    for commit in data:
        values = (
            commit['gc_commitid'],
            commit['gc_commit_short_id'],
            commit['gc_created_at'],
            commit['gc_parent_ids'],
            commit['gc_title'],
            commit['gc_message'],
            commit['gc_author_name'],
            commit['gc_author_email'],
            commit['gc_authored_date'],
            commit['gc_committer_name'],
            commit['gc_committer_email'],
            commit['gc_committed_date'],
            commit['gc_web_url'],
            commit['gc_additions'],
            commit['gc_deletions'],
            commit['gc_total'],
            commit['gc_status'],
            commit['gc_project_id'],
            commit['gc_last_pipeline'],
            commit['gc_gittype'],
            commit['gc_branch_name']
        )
        print(values)
        cursor.execute(insert_query,values)
    mysql_connect.commit()
    cursor.close()

def run_process(project_id,branch_name):
    if check_pid_exist(project_id) == True:
        if check_branch_exist(project_id,branch_name) == True:
            commit_data = get_commits(project_id,branch_name)
            print(commit_data)
            insert_to_mysql(commit_data)
        else:
            print(f"项目'{project_id}'分支'{branch_name}'不存在")
            pass
    else:
        print(f"项目'{project_id}'不存在")
        pass

if __name__ == "__main__":
    gitlab_url = "http://git.gitlab.com/"       # gitlab地址         
    private_token = ''      # gitlab账号token
    project_id = ['182']
    branch_name = ['master']
    gl = gitlab.Gitlab(gitlab_url, private_token=private_token)
    mysql_connect = pymysql.connect(    # mysql配置
        host = '127.0.0.1', 
        user = 'root',
        password = '123123',
        database = 'test' 
    )
    run_process(project_id,branch_name)
    mysql_connect.close()  # 关闭mysql连接

对应MySQL建表语句

CREATE TABLE `git_commit` (
  `gc_id` int(11) NOT NULL AUTO_INCREMENT COMMENT '自增id',
  `gc_commitid` varchar(50) DEFAULT NULL COMMENT '提交id',
  `gc_commit_short_id` varchar(10) DEFAULT NULL COMMENT '提交短id',
  `gc_created_at` datetime DEFAULT NULL COMMENT '提交时间',
  `gc_parent_ids` varchar(150) DEFAULT NULL COMMENT '上级ID',
  `gc_title` text COMMENT '提交title',
  `gc_message` text COMMENT '提交信息',
  `gc_author_name` varchar(50) DEFAULT NULL COMMENT '作者名字',
  `gc_author_email` varchar(50) DEFAULT NULL COMMENT '作者邮箱',
  `gc_authored_date` datetime DEFAULT NULL,
  `gc_committer_name` varchar(50) DEFAULT NULL COMMENT '提交名字',
  `gc_committer_email` varchar(50) DEFAULT NULL,
  `gc_committed_date` datetime DEFAULT NULL,
  `gc_web_url` varchar(150) DEFAULT NULL,
  `gc_additions` int(11) DEFAULT NULL COMMENT '代码增加行',
  `gc_deletions` int(11) DEFAULT NULL COMMENT '代码删除行',
  `gc_total` int(11) DEFAULT NULL COMMENT '影响总行数',
  `gc_status` varchar(50) DEFAULT NULL COMMENT '状态',
  `gc_project_id` int(11) DEFAULT NULL COMMENT '项目id',
  `gc_last_pipeline` text,
  `gc_gittype` int(11) DEFAULT NULL,
  `gc_branch_name` varchar(50) DEFAULT NULL,
  PRIMARY KEY (`gc_id`)
) ENGINE=InnoDB AUTO_INCREMENT=10 DEFAULT CHARSET=utf8 COMMENT='GIT提交记录表';
  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值