国庆期间接了大佬一个需求,需要来统计不同群组的项目代码行数
如果只是查看单个项目建议可以使用gitstat
下载
git clone https://github.com/hoxu/gitstats.git
使用
./gitstats projectdir inputdir
#projectdir 为项目目录 inputdir为输出目录
完成后查看inputdir的index.html文件既有web页面展示
因为需要的是查看所有群组的项目,这里就使用gitlab的python api接口来针对数据进行整理-代码如下
这里用的线程跑的 发现有很多接口开始报502
后面也没解决这个问题,
可以不用线程跑,看项目多少和提交次数
不过太多的话估计跑个几个小时
pip3 install python-gitlab
pip3 install xlwt
import threading
import gitlab
import xlwt
def getAllProjects():
client = gitlab.Gitlab(private_host, private_token=private_token)
projects = client.projects.list(all=True)
return projects
def getAllBranchByProject(project):
try:
branches = project.branches.list()
return branches
except:
return ""
def getCommitByBranch(project, branch):
author_commits = []
commits = project.commits.list(all=True, ref_name=branch.name)
for commit in commits:
committer_email = commit.committer_email
title = commit.title
message = commit.message
#if ('Merge' in message) or ('Merge' in title):
# print('Merge跳过')
# continue
#else:
author_commits.append(commit)
return author_commits
def getCodeByCommit(commit, project):
commit_info = project.commits.get(commit.id)
code = commit_info.stats
return code
def getAuthorCode(project):
print("project:%s" % project)
branches = getAllBranchByProject(project)
if branches == "":
pass
else:
for branch in branches:
if branch.name == "master":
#print("branch:%s" % branch)
#print('获取工程', project.name, '分支', branch.name, "的提交记录")
branchdata = {}
branchdata['group'] = project.name_with_namespace.split("/")[0]
branchdata['projectname'] = project.name
branchdata['branchename'] = branch.name
author_commits = getCommitByBranch(project, branch)
codes = []
for commit in author_commits:
#print('获取提交', commit.id, "的代码量")
code = getCodeByCommit(commit, project)
print(commit,code)
codes.append(code)
record = calculate(codes)
branchdata['commitcount'] = len(author_commits)
branchdata['codecount'] = record
data.append(branchdata)
#print(codes)
#print(calculate(codes))
return data
def writeExcel(excelPath, data):
workbook = xlwt.Workbook()
# 获取第一个sheet页
sheet = workbook.add_sheet('git')
row0 = ['项目组', '工程名称', '分支名称', '提交次数', '新增代码', '删除代码', '总计代码']
for i in range(0, len(row0)):
sheet.write(0, i, row0[i])
addcount = 0
delcount = 0
totalcount = 0
commitcount = 0
for i in range(0, len(data)):
recode = data[i]
j = 0
sheet.write(i + 1, j, recode['group'])
sheet.write(i + 1, j + 1, recode['projectname'])
sheet.write(i + 1, j + 2, recode['branchename'])
commitcount += (int)(recode['commitcount'])
sheet.write(i + 1, j + 3, recode['commitcount'])
addcount += (int)(recode['codecount']['additions'])
sheet.write(i + 1, j + 4, recode['codecount']['additions'])
delcount += (int)(recode['codecount']['deletions'])
sheet.write(i + 1, j + 5, recode['codecount']['deletions'])
totalcount += (int)(recode['codecount']['total'])
sheet.write(i + 1, j + 6, recode['codecount']['total'])
sheet.write(len(data) + 1, 3, commitcount)
sheet.write(len(data) + 1, 4, addcount)
sheet.write(len(data) + 1, 5, delcount)
sheet.write(len(data) + 1, 6, totalcount)
workbook.save(excelPath)
def calculate(data):
record = {}
addacount = 0
deletecount = 0
totaolcount = 0
for i in data:
print(i)
addacount += int(i['additions'])
deletecount += int(i['deletions'])
totaolcount += int(i['total'])
record['additions'] = addacount
record['deletions'] = deletecount
record['total'] = totaolcount
return record
if __name__ == '__main__':
# 用户git账户的token
private_token = ''
# git地址
private_host = 'https://gitlab.x.cn/'
data = []
thread_list = []
projects = getAllProjects()
print(projects)
for i in projects:
t = threading.Thread(target=getAuthorCode, args=(i,))
thread_list.append(t)
for threadname in thread_list: threadname.start()
for threadname in thread_list: threadname.join()
print(data)
writeExcel('d:/code_count.xls', data)
通过gitlab-api获取到每个项目计算出每次提交的新增和删除发现,最后的统计还是有些问题,有些上传的文件行数也被统计了,但那些不算代码,后面想了下,用了稍微笨一点的方法,下载所有项目然后在,通过cloc工具来统计
下载所有项目
import requests,json,uuid
import subprocess
#获取当前账号有权限的全部工程列表
def gitlab_projects():
#print("正在获取gitlab上工程数量...")
projects_api=git_url+'/api/v4/projects?simple=yes&per_page=20'
projects_headers=session.head(projects_api).headers
projects_num=int(projects_headers['X-Total'])
projects_pages=int(projects_headers['X-Total-Pages'])
#print("工程总数:",projects_num)
for i in range(projects_pages):
pages=i+1
projects_url=projects_api+'&page='+str(pages)
projects = session.get(projects_url).text
# print(projects)
projects_json = json.loads(projects)
for project_json in projects_json:
project_id=project_json['id']
project_name=project_json['name']
project_path=project_json['path_with_namespace'].split("/")[0]
project_http_url_to_repo = project_json["http_url_to_repo"]
#print(project_json)
lists.append({"group":project_path,"project_id":project_id,"name":project_name,"project_http_url_to_repo":project_http_url_to_repo})
def mkdir(path):
import os
path=path.strip()
path=path.rstrip("\\")
isExists=os.path.exists(path)
if not isExists:
os.makedirs("/data/count-git/%s"%path)
print(path+' 创建成功')
return True
else:
pass
def download_git(path,url):
p = subprocess.Popen("cd /data/count-git/%s && git clone -b master https://root:git@%s"%(path,url),stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=True)
print(p.stdout.read())
print(p.stderr.read())
if __name__ == "__main__":
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
}
# gitlab地址
git_url = 'https://gitlab.x.cn/'
# gitlab的token
git_token = ''
session = requests.Session()
headers['PRIVATE-TOKEN'] = git_token
session.headers = headers
git_login = session.get(git_url, headers=headers)
#获取gitlab上的工程列表
lists = []
gitlab_projects()
for i in lists:
mkdir(i["group"])
url = i["project_http_url_to_repo"].split("//")[1]
download_git(i["group"],url)
使用cloc来统计
import os,subprocess
import xlwt
def getALLDir(path = "/data/count-git/"):
filesList = os.listdir(path)
return filesList
def get_alldir(lists):
for i in lists:
if ".py" in i:
pass
else:
filesList = os.listdir("/data/count-git/%s"%i)
for v in filesList:
dir_lists.append("/data/count-git/%s/%s"%(i,v))
def clos_get_dir(lists):
n = 0
for i in lists:
p = subprocess.Popen("cloc %s | tail -2 | awk {'print $5'} "%(i),stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=True)
info = p.stdout.read().decode().split("\n")
group = i.split("/")[3]
project = i.split("/")[4]
if info[0] == '' or info[0] is None:
info[0] = 0
dir_lists_count.append({"group":group,"project":project,"branch":"master","count":info[0]})
def writeExcel(excelPath, data):
workbook = xlwt.Workbook()
# 获取第一个sheet页
sheet = workbook.add_sheet('git')
row0 = ['项目组', '工程名称', '分支名称', '总计代码']
for i in range(0, len(row0)):
sheet.write(0, i, row0[i])
addcount = 0
delcount = 0
totalcount = 0
commitcount = 0
for i in range(0, len(data)):
recode = data[i]
j = 0
sheet.write(i + 1, j, recode['group'])
sheet.write(i + 1, j + 1, recode['project'])
sheet.write(i + 1, j + 2, recode['branch'])
try:
commitcount += (int(recode['count']))
except:
print(commitcount,recode['count'])
sheet.write(i + 1, j + 3, recode['count'])
sheet.write(len(data) + 1, 3, commitcount)
workbook.save(excelPath)
if __name__ == '__main__':
dir_lists = []
dir_lists_count = []
get_alldir(getALLDir(path = "/data/count-git/"))
clos_get_dir(dir_lists)
print(dir_lists_count)
writeExcel("/data/count-code.xls", dir_lists_count)