python统计gitlab各群组及项目的代码量

国庆期间接了大佬一个需求,需要来统计不同群组的项目代码行数

如果只是查看单个项目建议可以使用gitstat

下载

git clone https://github.com/hoxu/gitstats.git

使用

./gitstats projectdir inputdir
#projectdir  为项目目录  inputdir为输出目录

完成后查看inputdir的index.html文件既有web页面展示

因为需要的是查看所有群组的项目,这里就使用gitlab的python api接口来针对数据进行整理-代码如下

这里用的线程跑的 发现有很多接口开始报502
后面也没解决这个问题,
可以不用线程跑,看项目多少和提交次数
不过太多的话估计跑个几个小时

pip3 install python-gitlab
pip3 install xlwt
import threading
import gitlab
import xlwt

def getAllProjects():
    client = gitlab.Gitlab(private_host, private_token=private_token)
    projects = client.projects.list(all=True)
    return projects


def getAllBranchByProject(project):
    try:
        branches = project.branches.list()
        return branches
    except:
        return ""

def getCommitByBranch(project, branch):
    author_commits = []
    commits = project.commits.list(all=True, ref_name=branch.name)
    for commit in commits:
        committer_email = commit.committer_email
        title = commit.title
        message = commit.message
        #if ('Merge' in message) or ('Merge' in title):
        #    print('Merge跳过')
        #    continue
        #else:
        author_commits.append(commit)
    return author_commits

def getCodeByCommit(commit, project):
    commit_info = project.commits.get(commit.id)
    code = commit_info.stats
    return code

def getAuthorCode(project):
    print("project:%s" % project)
    branches = getAllBranchByProject(project)
    if branches == "":
        pass
    else:
        for branch in branches:
            if branch.name == "master":
                #print("branch:%s" % branch)
                #print('获取工程', project.name, '分支', branch.name, "的提交记录")
                branchdata = {}
                branchdata['group'] = project.name_with_namespace.split("/")[0]
                branchdata['projectname'] = project.name
                branchdata['branchename'] = branch.name
                author_commits = getCommitByBranch(project, branch)
                codes = []
                for commit in author_commits:
                    #print('获取提交', commit.id, "的代码量")
                    code = getCodeByCommit(commit, project)
                    print(commit,code)
                    codes.append(code)

                record = calculate(codes)
                branchdata['commitcount'] = len(author_commits)
                branchdata['codecount'] = record
                data.append(branchdata)
    #print(codes)
    #print(calculate(codes))
    return data

def writeExcel(excelPath, data):
    workbook = xlwt.Workbook()
    # 获取第一个sheet页
    sheet = workbook.add_sheet('git')
    row0 = ['项目组', '工程名称', '分支名称', '提交次数', '新增代码', '删除代码', '总计代码']
    for i in range(0, len(row0)):
        sheet.write(0, i, row0[i])
    addcount = 0
    delcount = 0
    totalcount = 0
    commitcount = 0
    for i in range(0, len(data)):
        recode = data[i]
        j = 0
        sheet.write(i + 1, j, recode['group'])
        sheet.write(i + 1, j + 1, recode['projectname'])
        sheet.write(i + 1, j + 2, recode['branchename'])
        commitcount += (int)(recode['commitcount'])
        sheet.write(i + 1, j + 3, recode['commitcount'])
        addcount += (int)(recode['codecount']['additions'])
        sheet.write(i + 1, j + 4, recode['codecount']['additions'])
        delcount += (int)(recode['codecount']['deletions'])
        sheet.write(i + 1, j + 5, recode['codecount']['deletions'])
        totalcount += (int)(recode['codecount']['total'])
        sheet.write(i + 1, j + 6, recode['codecount']['total'])

    sheet.write(len(data) + 1, 3, commitcount)
    sheet.write(len(data) + 1, 4, addcount)
    sheet.write(len(data) + 1, 5, delcount)
    sheet.write(len(data) + 1, 6, totalcount)
    workbook.save(excelPath)

def calculate(data):
    record = {}
    addacount = 0
    deletecount = 0
    totaolcount = 0
    for i in data:
        print(i)
        addacount += int(i['additions'])
        deletecount += int(i['deletions'])
        totaolcount += int(i['total'])
    record['additions'] = addacount
    record['deletions'] = deletecount
    record['total'] = totaolcount
    return record


if __name__ == '__main__':
# 用户git账户的token
private_token = ''
# git地址
private_host = 'https://gitlab.x.cn/'

    data = []
    thread_list = []
    projects = getAllProjects()
    print(projects)

    for i in projects:
        t = threading.Thread(target=getAuthorCode, args=(i,))
        thread_list.append(t)

    for threadname in thread_list: threadname.start()
    for threadname in thread_list: threadname.join()
    print(data)
    writeExcel('d:/code_count.xls', data)

通过gitlab-api获取到每个项目计算出每次提交的新增和删除发现,最后的统计还是有些问题,有些上传的文件行数也被统计了,但那些不算代码,后面想了下,用了稍微笨一点的方法,下载所有项目然后在,通过cloc工具来统计

下载所有项目

import requests,json,uuid
import subprocess

#获取当前账号有权限的全部工程列表
def gitlab_projects():
    #print("正在获取gitlab上工程数量...")
    projects_api=git_url+'/api/v4/projects?simple=yes&per_page=20'
    projects_headers=session.head(projects_api).headers
    projects_num=int(projects_headers['X-Total'])
    projects_pages=int(projects_headers['X-Total-Pages'])
    #print("工程总数:",projects_num)
    for i in range(projects_pages):
        pages=i+1
        projects_url=projects_api+'&page='+str(pages)
        projects = session.get(projects_url).text
        # print(projects)
        projects_json = json.loads(projects)
        for project_json in projects_json:
            project_id=project_json['id']
            project_name=project_json['name']
            project_path=project_json['path_with_namespace'].split("/")[0]
            project_http_url_to_repo = project_json["http_url_to_repo"]
            #print(project_json)
            lists.append({"group":project_path,"project_id":project_id,"name":project_name,"project_http_url_to_repo":project_http_url_to_repo})

def mkdir(path):
    import os
    path=path.strip()
    path=path.rstrip("\\")
    isExists=os.path.exists(path)
    if not isExists:
        os.makedirs("/data/count-git/%s"%path)
        print(path+' 创建成功')
        return True
    else:
        pass

def download_git(path,url):
    p = subprocess.Popen("cd /data/count-git/%s && git clone -b master https://root:git@%s"%(path,url),stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=True)
    print(p.stdout.read())
    print(p.stderr.read())

if __name__ == "__main__":
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
    }
    # gitlab地址
    git_url = 'https://gitlab.x.cn/'
    # gitlab的token
    git_token = ''
    session = requests.Session()
    headers['PRIVATE-TOKEN'] = git_token
    session.headers = headers
    git_login = session.get(git_url, headers=headers)
    #获取gitlab上的工程列表
    lists = []
    gitlab_projects()
    for i in lists:
        mkdir(i["group"])
        url = i["project_http_url_to_repo"].split("//")[1]
        download_git(i["group"],url)

使用cloc来统计

import os,subprocess
import xlwt

def getALLDir(path = "/data/count-git/"):
    filesList = os.listdir(path)
    return filesList

def get_alldir(lists):
    for i in lists:
        if ".py" in i:
            pass
        else:
            filesList = os.listdir("/data/count-git/%s"%i)
            for v in filesList:
                dir_lists.append("/data/count-git/%s/%s"%(i,v))


def clos_get_dir(lists):
    n = 0
    for i in lists:
        p = subprocess.Popen("cloc %s |  tail -2 | awk {'print $5'} "%(i),stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=True)
        info = p.stdout.read().decode().split("\n")
        group = i.split("/")[3]
        project = i.split("/")[4]
        if info[0] == '' or info[0] is None:
            info[0] = 0
        dir_lists_count.append({"group":group,"project":project,"branch":"master","count":info[0]})

def writeExcel(excelPath, data):
    workbook = xlwt.Workbook()
    # 获取第一个sheet页
    sheet = workbook.add_sheet('git')
        row0 = ['项目组', '工程名称', '分支名称', '总计代码']
    for i in range(0, len(row0)):
        sheet.write(0, i, row0[i])
    addcount = 0
    delcount = 0
    totalcount = 0
    commitcount = 0
    for i in range(0, len(data)):
        recode = data[i]
        j = 0
        sheet.write(i + 1, j, recode['group'])
        sheet.write(i + 1, j + 1, recode['project'])
        sheet.write(i + 1, j + 2, recode['branch'])
        try:
            commitcount += (int(recode['count']))
        except:
            print(commitcount,recode['count'])
        sheet.write(i + 1, j + 3, recode['count'])

    sheet.write(len(data) + 1, 3, commitcount)
    workbook.save(excelPath)


if __name__ == '__main__':
    dir_lists = []
    dir_lists_count = []
    get_alldir(getALLDir(path = "/data/count-git/"))
    clos_get_dir(dir_lists)
    print(dir_lists_count)
    writeExcel("/data/count-code.xls", dir_lists_count)
  • 0
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值