pip依赖
PyMySQL==1.0.3
python_gitlab==3.14.0
Requests==2.31.0
思路
根据提供的git库(project id)和需要统计的分支(branch_name),通过python-gitlab 的API接口获取所有的commit id信息,进而通过commit id获取每次提交的详细内容
踩坑
1、判断分支是否存在&&获取所有分支信息
def check_branch_exist(pid,branch_name): # 判断分支是否存在
branch_data = []
page = 0
while True:
page += 1
per_page = 100
project = gl.projects.get(pid)
branchs = project.branches.list(page=page, per_page=per_page)
for branch in branchs:
branch_data.append(branch.name)
if branchs == []:
break
try:
if branch_name in branch_data:
return True
return False
except gitlab.GitlabGetError as e:
print(f"无法获取项目或分支信息:{e}")
return False
except Exception as e:
print(f"发生错误:{e}")
return False
如果git项目分支数量较多的话仅通过all=True 不能完全获取(gitlab API中有限制)必须使用分页获取,所以这里使用一个死循环进行分页获取直至返回为空时跳出
2、移除接口返回时间中的时区信息
def time_without_timezone(date): #移除时间中的时区信息
time = date
output_datetime_str = re.sub(r'[+-]\d{2}:\d{2}', '', time)
return output_datetime_str
由于mysql中datetime类型不能存入时区信息所以使用正则匹配并删除了时区信息
完整代码
import gitlab
from datetime import datetime
import calendar
import pymysql
import re
# 获取当前时间
now = datetime.now()
year = now.year
month = now.month - 1 # 统计上一个月
# month = now.month # 统计本月
day = calendar.monthrange(year, month)[1]
# yesterday = calendar.monthrange(year, month)[1] - 1
def git_address_to_number(git_address): #根据git地址返回对应数字编号
if git_address == 'git.gitlab.com' or git_address == 'http://git.gitlab.com/':
return '1'
elif git_address == 'git2.gitlab.com' or git_address == 'http://git2.gitlab.com/':
return '2'
else :
return '3'
def time_without_timezone(date): #移除时间中的地区信息
time = date
output_datetime_str = re.sub(r'[+-]\d{2}:\d{2}', '', time)
return output_datetime_str
def check_pid_exist(pid): # 判断项目id是否存在
try:
project = gl.projects.get(pid)
print(project)
return True
except gitlab.exceptions.GitlabGetError as e:
print(f"{pid},{e.error_message}")
return False
pass
def check_branch_exist(pid,branch_name): # 判断分支是否存在
branch_data = []
page = 0
while True:
page += 1
per_page = 100
project = gl.projects.get(pid)
branchs = project.branches.list(page=page, per_page=per_page)
for branch in branchs:
branch_data.append(branch.name)
if branchs == []:
break
try:
if branch_name in branch_data:
return True
return False
except gitlab.GitlabGetError as e:
print(f"无法获取项目或分支信息:{e}")
return False
except Exception as e:
print(f"发生错误:{e}")
return False
def get_commits(pid,branch_name): #获取代码提交信息
commit_data = []
print(pid,branch_name)
project = gl.projects.get(pid)
start_date = datetime(year, month, 1).strftime("%Y-%m-%dT00:00:00Z") # 月
end_date = datetime(year, month, 1).replace(day=day).strftime("%Y-%m-%dT23:59:59Z") # 月
# start_date = datetime(year, month, 1).replace(day=yesterday).strftime("%Y-%m-%dT00:00:00Z") # 统计前一日
# end_date = datetime(year, month, 1).replace(day=yesterday).strftime("%Y-%m-%dT23:59:59Z") # 统计前一日
commits = project.commits.list(ref_name = branch_name, since=start_date, until=end_date, get_all=True)
# 遍历每个提交
for commit in commits:
# 获取提交详细信息
commit_details = project.commits.get(commit.id)
commit_data.append({
'gc_commitid' : commit_details.id,
'gc_commit_short_id' : commit_details.short_id,
'gc_created_at' : time_without_timezone(commit_details.created_at),
'gc_parent_ids' : format(commit_details.parent_ids),
'gc_title' : commit_details.title,
'gc_message' : commit_details.message,
'gc_author_name' : commit_details.author_name,
'gc_author_email' : commit_details.author_email,
'gc_authored_date' : time_without_timezone(commit_details.authored_date),
'gc_committer_name' : commit_details.committer_name,
'gc_committer_email' : commit_details.committer_email,
'gc_committed_date' : time_without_timezone(commit_details.committed_date),
'gc_web_url' : commit_details.web_url,
'gc_additions' : commit_details.stats['additions'],
'gc_deletions' : commit_details.stats['deletions'],
'gc_total' : commit_details.stats['total'],
'gc_status' : commit_details.status,
'gc_project_id' : commit_details.project_id,
'gc_last_pipeline' : format(commit_details.last_pipeline),
'gc_gittype' : git_address_to_number(gitlab_url),
'gc_branch_name' : branch_name
})
return commit_data
def insert_to_mysql(data): # 数据入库
cursor = mysql_connect.cursor()
insert_query = "INSERT INTO git_commit (gc_commitid,gc_commit_short_id,gc_created_at,gc_parent_ids,gc_title,gc_message,gc_author_name,gc_author_email,gc_authored_date,gc_committer_name,gc_committer_email,gc_committed_date,gc_web_url,gc_additions,gc_deletions,gc_total,gc_status,gc_project_id,gc_last_pipeline,gc_gittype,gc_branch_name)" \
"VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
for commit in data:
values = (
commit['gc_commitid'],
commit['gc_commit_short_id'],
commit['gc_created_at'],
commit['gc_parent_ids'],
commit['gc_title'],
commit['gc_message'],
commit['gc_author_name'],
commit['gc_author_email'],
commit['gc_authored_date'],
commit['gc_committer_name'],
commit['gc_committer_email'],
commit['gc_committed_date'],
commit['gc_web_url'],
commit['gc_additions'],
commit['gc_deletions'],
commit['gc_total'],
commit['gc_status'],
commit['gc_project_id'],
commit['gc_last_pipeline'],
commit['gc_gittype'],
commit['gc_branch_name']
)
print(values)
cursor.execute(insert_query,values)
mysql_connect.commit()
cursor.close()
def run_process(project_id,branch_name):
if check_pid_exist(project_id) == True:
if check_branch_exist(project_id,branch_name) == True:
commit_data = get_commits(project_id,branch_name)
print(commit_data)
insert_to_mysql(commit_data)
else:
print(f"项目'{project_id}'分支'{branch_name}'不存在")
pass
else:
print(f"项目'{project_id}'不存在")
pass
if __name__ == "__main__":
gitlab_url = "http://git.gitlab.com/" # gitlab地址
private_token = '' # gitlab账号token
project_id = ['182']
branch_name = ['master']
gl = gitlab.Gitlab(gitlab_url, private_token=private_token)
mysql_connect = pymysql.connect( # mysql配置
host = '127.0.0.1',
user = 'root',
password = '123123',
database = 'test'
)
run_process(project_id,branch_name)
mysql_connect.close() # 关闭mysql连接
对应MySQL建表语句
CREATE TABLE `git_commit` (
`gc_id` int(11) NOT NULL AUTO_INCREMENT COMMENT '自增id',
`gc_commitid` varchar(50) DEFAULT NULL COMMENT '提交id',
`gc_commit_short_id` varchar(10) DEFAULT NULL COMMENT '提交短id',
`gc_created_at` datetime DEFAULT NULL COMMENT '提交时间',
`gc_parent_ids` varchar(150) DEFAULT NULL COMMENT '上级ID',
`gc_title` text COMMENT '提交title',
`gc_message` text COMMENT '提交信息',
`gc_author_name` varchar(50) DEFAULT NULL COMMENT '作者名字',
`gc_author_email` varchar(50) DEFAULT NULL COMMENT '作者邮箱',
`gc_authored_date` datetime DEFAULT NULL,
`gc_committer_name` varchar(50) DEFAULT NULL COMMENT '提交名字',
`gc_committer_email` varchar(50) DEFAULT NULL,
`gc_committed_date` datetime DEFAULT NULL,
`gc_web_url` varchar(150) DEFAULT NULL,
`gc_additions` int(11) DEFAULT NULL COMMENT '代码增加行',
`gc_deletions` int(11) DEFAULT NULL COMMENT '代码删除行',
`gc_total` int(11) DEFAULT NULL COMMENT '影响总行数',
`gc_status` varchar(50) DEFAULT NULL COMMENT '状态',
`gc_project_id` int(11) DEFAULT NULL COMMENT '项目id',
`gc_last_pipeline` text,
`gc_gittype` int(11) DEFAULT NULL,
`gc_branch_name` varchar(50) DEFAULT NULL,
PRIMARY KEY (`gc_id`)
) ENGINE=InnoDB AUTO_INCREMENT=10 DEFAULT CHARSET=utf8 COMMENT='GIT提交记录表';