1.环境
使用python-3.10.8-amd64.exe的python的运行环境
代码调试环境使用PyCharm专业版
2.准备
2.1PyCharm依赖引入配置
2.2 设置 - AccessTokens
登录 GitLab,选择 设置 - Access Tokens
,创建个人访问令牌
复制生成的个人访问令牌到脚本的 private_token
中,再将 url
改为你的 GitLab 地址,最后修改 起-止时间
执行脚本即可
3.demo
思路如下:
- 首先遍历所有项目
- 然后遍历所有项目下拥有的所有分支
- 遍历所有分支下每个用户提交的代码量
- 时间区间限制
- 数据去重相加,格式化输出
3.1 demo1
# This is a sample Python script.
# !/usr/bin/python3
# coding=utf8
# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
import json
import requests
from dateutil.parser import parse
def print_hi(name):
# Use a breakpoint in the code line below to debug your script.
print(f'Hi, {name}') # Press Ctrl+F8 to toggle the breakpoint.
gitlab_url = "xxxxxx" # GitLab 地址
private_token = "xxxxx" # GitLab Access Tokens(管理员权限)
currentName = "xxxx"; # 统计某个用户的,写名字即可
info = []
headers = {
'Connection': 'close',
}
# UTC时间转时间戳
def utc_time(time):
dt = parse(time)
return int(dt.timestamp())
# 输出格式化
def str_format(txt):
lenTxt = len(txt)
lenTxt_utf8 = len(txt.encode('utf-8'))
size = int((lenTxt_utf8 - lenTxt) / 2 + lenTxt)
length = 20 - size
return length
# 获取 GitLab 上的所有项目
def gitlab_projects():
project_ids = []
page = 1
while True:
url = gitlab_url + "api/v4/projects/?private_token=" + private_token + "&page=" + str(page) + "&per_page=20"
while True:
try:
res = requests.get(url, headers=headers, timeout=10)
break
except Exception as e:
print(e)
continue
projects = json.loads(res.text)
if len(projects) == 0:
break
else:
for project in projects:
project_ids.append(project["id"])
page += 1
print("===project_ids====" + str(len(project_ids)))
return project_ids
# 获取 GitLab 上的项目 id 中的分支
def project_branches(project_id):
branch_names = []
page = 1
while True:
url = gitlab_url + "api/v4/projects/" + str(
project_id) + "/repository/branches?private_token=" + private_token + "&page=" + str(page) + "&per_page=20"
while True:
try:
res = requests.get(url, headers=headers, timeout=10)
break
except Exception as e:
print(e)
continue
branches = json.loads(res.text)
'''Debug
print(url)
print('--' * 10)
print(branches)
print('*' * 10)
'''
if len(branches) == 0:
break
else:
for branch in branches:
branch_names.append(branch["name"])
page += 1
print("===branch_names====" + str(len(branch_names)))
return branch_names
# 获取 GitLab 上的项目分支中的 commits,当 title 或 message 首单词为 Merge 时,表示合并操作,剔除此代码量
def project_commits(project_id, branch, start_time, end_time):
commit_ids = []
page = 1
while True:
url = gitlab_url + "api/v4/projects/" + str(
project_id) + "/repository/commits?ref_name=" + branch + "&private_token=" + private_token + "&page=" + str(
page) + "&per_page=20"
while True:
try:
res = requests.get(url, headers=headers, timeout=10)
break
except Exception as e:
print(e)
continue
commits = json.loads(res.text)
if len(commits) == 0:
break
else:
for commit in commits:
if "Merge" in commit["title"] or "Merge" in commit["message"] or "合并" in commit["title"] or "合并" in \
commit["message"]: # 不统计合并操作
continue
elif utc_time(commit["authored_date"]) < utc_time(start_time) or utc_time(
commit["authored_date"]) > utc_time(end_time): # 不满足时间区间
continue
else:
print("===project_id====" + str(project_id) + ",===branch===" + branch + ",====start_time===" + start_time + ",====end_time===" + end_time)
commit_ids.append(commit["id"])
page += 1
print("===commit_ids====" + str(len(commit_ids)))
return commit_ids
# 根据 commits 的 id 获取代码量
def commit_code(project_id, commit_id):
global info
url = gitlab_url + "api/v4/projects/" + str(
project_id) + "/repository/commits/" + commit_id + "?private_token=" + private_token
while True:
try:
res = requests.get(url, headers=headers, timeout=10)
break
except Exception as e:
print(e)
continue
data = json.loads(res.text)
if currentName == data["author_name"]:
temp = {"name": data["author_name"], "additions": data["stats"]["additions"],
"deletions": data["stats"]["deletions"], "total": data["stats"]["total"]} # Git工具用户名,新增代码数,删除代码数,总计代码数
info.append(temp)
# GitLab 数据查询
def gitlab_info(start_time, end_time):
for project_id in gitlab_projects(): # 遍历所有项目ID
for branche_name in project_branches(project_id): # 遍历每个项目中的分支
for commit_id in project_commits(project_id, branche_name, start_time, end_time): # 遍历每个分支中的 commit id
commit_code(project_id, commit_id) # 获取代码提交量
if __name__ == "__main__":
print("正在统计数据,请耐心等待,这将花费不少时间~")
gitlab_info('2022-10-01 00:00:00', '2022-10-28 23:59:59') # 起-止时间
name = [] # Git工具用户名
additions = [] # 新增代码数
deletions = [] # 删除代码数
total = [] # 总计代码数
res = {}
# 生成元组
for i in info:
for key, value in i.items():
if key == "name":
name.append(value)
if key == "additions":
additions.append(value)
if key == "deletions":
deletions.append(value)
if key == "total":
total.append(value)
data = list(zip(name, additions, deletions, total))
# print(data)
# 去重累加
for j in data:
name = j[0]
additions = j[1]
deletions = j[2]
total = j[3]
if name in res.keys():
res[name][0] += additions
res[name][1] += deletions
res[name][2] += total
else:
res.update({name: [additions, deletions, total]})
# 打印结果
print("Git用户名 新增代码数 删除代码数 总计代码数")
for k in res.keys():
print(k + " " * str_format(k) + str(res[k][0]) + " " * str_format(str(res[k][0])) + str(
res[k][1]) + " " * str_format(str(res[k][1])) + str(res[k][2]))
3.2 demo2
# !/usr/bin/python3
# coding=utf8
import gitlab
import time
from main import str_format
url = 'xxxxxx' # gitlab安装地址
private_token = 'xxxxx' # gitlab 就是上面我们获取的那个
currentName = "xxxxx" # 统计某个用户的,写名字即可
start_time = '2021-09-01T00:00:00Z'
end_time = '2022-10-28T00:00:00Z'
info = []
# 登录 获取gitlab操作对象gl
gl = gitlab.Gitlab(url, private_token)
if __name__ == "__main__":
print("正在统计数据,请耐心等待,这将花费不少时间~")
startTime = time.time()
projects = gl.projects.list(get_all=True) # 先把所有项目查出来
for project in projects: # 遍历每一个项目
branches = project.branches.list() # 把每个项目下面的所有分支查出来
for branch in branches: # 然后再遍历每一个分支
commits = project.commits.list(all=True, query_parameters={'since': start_time, 'until': end_time,
'ref_name': branch.name}) # 根据时间、分支名遍历该分支下面所有的提交记录
for commit in commits: # 然后再遍历每个提交记录,查询每个提交记录的人和量
com = project.commits.get(commit.id)
if hasattr(com, "author_name") and hasattr(com, "stats"):
name = com.author_name
if name == currentName:
additions = com.stats['additions']
deletions = com.stats['deletions']
total = com.stats['total']
temp = {"name": name, "additions": additions,
"deletions": deletions,
"total": total} # Git工具用户名,新增代码数,删除代码数,总计代码数
info.append(temp)
name = [] # Git工具用户名
additions = [] # 新增代码数
deletions = [] # 删除代码数
total = [] # 总计代码数
res = {}
# 生成元组
for i in info:
for key, value in i.items():
if key == "name":
name.append(value)
if key == "additions":
additions.append(value)
if key == "deletions":
deletions.append(value)
if key == "total":
total.append(value)
data = list(zip(name, additions, deletions, total))
# print(data)
# 去重累加
for j in data:
name = j[0]
additions = j[1]
deletions = j[2]
total = j[3]
if name in res.keys():
res[name][0] += additions
res[name][1] += deletions
res[name][2] += total
else:
res.update({name: [additions, deletions, total]})
# 打印结果
print("Git用户名 新增代码数 删除代码数 总计代码数")
for k in res.keys():
print(k + " " * str_format(k) + str(res[k][0]) + " " * str_format(str(res[k][0])) + str(
res[k][1]) + " " * str_format(str(res[k][1])) + str(res[k][2]))
endTime = time.time()
print("运行时间为:", endTime - startTime)
3.3 demo3
该demo优化了多层for循环和使用多线程统计提交的code然后使用了生产者和消费者模型,多线程生产主线程消费最后各个生产者线程生产的统计数据最后汇总去重累计输出到控制台。
# !/usr/bin/python3
# coding=utf8
import itertools
import math
import threading
import time
from concurrent.futures import ThreadPoolExecutor
import gitlab
from main import str_format
url = 'xxxxxx' # gitlab安装地址
private_token = 'xxxxx' # gitlab 就是上面我们获取的那个
currentName = "xxxxx" # 统计某个用户的,写名字即可
flag = True # 是否是统计某个用户标志,设置为false为统计所有用户
start_time = '2021-06-01T00:00:00Z'
end_time = '2022-10-28T00:00:00Z'
info = []
# 登录 获取gitlab操作对象gl
gl = gitlab.Gitlab(url, private_token)
# 创建线程池,最多维护10个线程
threadpool = ThreadPoolExecutor(100)
pageSize = 2
# 实例化队列对象
# q = queue.Queue(100000)
A_lock = threading.Lock()
def detail(projects1):
for project in projects1: # 遍历每一个项目
print("1")
p1 = [project]
for p, b in itertools.product(p1, project.branches.list(get_all=True)):
commits = p.commits.list(get_all=True, query_parameters={'since': start_time, 'until': end_time,
'ref_name': b.name}) # 根据时间、分支名遍历该分支下面所有的提交记录
ids = []
for c in commits:
ids.append(c.id)
for p2, di in itertools.product(p1, ids):
com = p2.commits.get(di)
if hasattr(com, "author_name") and hasattr(com, "stats"):
author_name = com.author_name
print("authorName:", author_name)
global info
if author_name == currentName and flag:
additions1 = com.stats['additions']
deletions1 = com.stats['deletions']
total1 = com.stats['total']
temp1 = {"name": author_name, "additions": additions1,
"deletions": deletions1,
"total": total1} # Git工具用户名,新增代码数,删除代码数,总计代码数
A_lock.acquire()
info.append(temp1)
A_lock.release()
print("current人执行中")
elif not flag:
additions2 = com.stats['additions']
deletions2 = com.stats['deletions']
total2 = com.stats['total']
temp2 = {"name": author_name, "additions": additions2,
"deletions": deletions2,
"total": total2} # Git工具用户名,新增代码数,删除代码数,总计代码数
A_lock.acquire()
info.append(temp2)
A_lock.release()
print("other人执行中")
ids.clear()
print("2")
p1.clear()
print("3")
"""for project in projects: # 遍历每一个项目
branches = project.branches.list(get_all=True) # 把每个项目下面的所有分支查出来
for branch in branches: # 然后再遍历每一个分支
commits = project.commits.list(get_all=True, query_parameters={'since': start_time, 'until': end_time,
'ref_name': branch.name}) # 根据时间、分支名遍历该分支下面所有的提交记录
for commit in commits: # 然后再遍历每个提交记录,查询每个提交记录的人和量
com = project.commits.get(commit.id)
if hasattr(com, "author_name") and hasattr(com, "stats"):
name = com.author_name
if name == currentName and flag:
name = com.author_name
additions = com.stats['additions']
deletions = com.stats['deletions']
total = com.stats['total']
temp = {"name": name, "additions": additions,
"deletions": deletions,
"total": total} # Git工具用户名,新增代码数,删除代码数,总计代码数
p.put(temp)
print("current人执行中")
elif not flag:
name = com.author_name
additions = com.stats['additions']
deletions = com.stats['deletions']
total = com.stats['total']
temp = {"name": name, "additions": additions,
"deletions": deletions,
"total": total} # Git工具用户名,新增代码数,删除代码数,总计代码数
p.put(temp)
print("other人执行中")"""
# 生产者
def producer(projects2):
detail(projects2)
print("生产者生产完毕")
# 消费者
def consumer(q3=None):
global info
A_lock.acquire()
info.append(q3.get())
A_lock.release()
print("消费者消费完毕", q3.get())
# def task(video_url):
# print("开始执行任务", video_url)
# time.sleep(1)
# return 1
def done(response): # response就是futuer对象,也就是task的返回值分装的一个Futuer对象
print("任务执行完后,回调的函数", response.result()) # 即Futuer.result():取出task的返回值
# futuer = threadpool.submit(task, url) # futuer是由task返回的一个Future对象,里面有记录task的返回值
# futuer.add_done_callback(done) # 回调done函数,执行者依然是子线程
# pages = (total + pageSize - 1) / pageSize;
# start = (page - 1) * per_page_num
# end = page * per_page_num
# page_data_list = data_list[start:end]
def producerData(pages2):
for page1 in range(pages2):
start = (page1 - 1) * pageSize
end = page1 * pageSize
pg = projects[start:end]
# detail(projects, p)
t = threading.Thread(target=producer(pg))
t.start()
# threadpool.submit(producer, pg)
print("生产完毕······")
def consumerData(pages3):
for page in range(pages3):
threadpool.submit(consumer)
print("消费完毕······")
if __name__ == "__main__":
try:
print("正在统计数据,请耐心等待,这将花费不少时间~")
startTime = time.time()
projects = gl.projects.list(get_all=True) # 先把所有项目查出来
total = len(projects)
pages = (total + pageSize - 1) / pageSize;
pages = math.floor(pages)
producerData(pages)
# consumerData(pages)
names = [] # Git工具用户名
additions = [] # 新增代码数
deletions = [] # 删除代码数
totals = [] # 总计代码数
res = {}
# print("等待线程池中的任务执行完毕中······")
# threadpool.shutdown(True) # 等待线程池中的任务执行完毕后,在继续执行
# print("等待线程池中的任务执行完毕······")
# while len(info) == 0:
# time.sleep(1)
# 生成元组
print("infor.size:", len(info))
for i in info:
for key, value in i.items():
if key == "name":
names.append(value)
if key == "additions":
additions.append(value)
if key == "deletions":
deletions.append(value)
if key == "total":
totals.append(value)
data = list(zip(names, additions, deletions, totals))
# 去重累加
for j in data:
name = j[0]
additions = j[1]
deletions = j[2]
total = j[3]
if name in res.keys():
res[name][0] += additions
res[name][1] += deletions
res[name][2] += total
else:
res.update({name: [additions, deletions, total]})
# 打印结果
print("Git用户名 新增代码数 删除代码数 总计代码数")
for k in res.keys():
print(k + " " * str_format(k) + str(res[k][0]) + " " * str_format(str(res[k][0])) + str(
res[k][1]) + " " * str_format(str(res[k][1])) + str(res[k][2]))
endTime = time.time()
print("运行时间为:", endTime - startTime)
except Exception as e:
print("发生了异常", e)
4.总结
之前跟领导聊天得知他搞了一个统计开发一段时间内的代码量的方法说他统计了一个前端小姐姐来公司一年提交的代码量是300行,然后这个小姐姐就被谈话了,之后她就离职了,所以我就很好奇中统计要咋搞,自己在网上看了一些教程之后来写了这个demo,或许还有其它效率更高的统计方式,由于我公司gitLab的代码仓库的项目太多,版本分支和提交记录也太多所以运行就非常的慢,由于demo中嵌套的for循环层数实在是有点多,导致代码的时间复杂度增大,从而code的数据量大运行就很慢,还有一个优化思路是把统计限定到指定的project项目上,不搞gitLab上的所有的项目所有人的一段时间的提交code记录统计,上面的demo1和demo2在项目多提交次数多的情况下是很慢的,demo3的效率优化了一点点,这几个demo还有优化的空间的,记得优化好了at我哈,请一键三连么么哒。