python精确统计Gitlab统计代码的贡献量指标【作为开发KPI参考没毛病.....】

一.需求分析与解决思路

**1.需求:**
		需求是公司大领导想要了解每月研发提交的代码量,虽然本人也认为代码量不代表质量。可是现实总是如此的无奈,用量来衡量质量如此不可取的方法只会导致更加内卷。
		

**2.解决思路:**
		工具: 
			Gitstats :仓库代码统计工具之一,可以按git提交人、提交次数、修改文件数、代码行数、注释量在时间维度上进行统计,亦可按各文件类型进行简单的统计,非常方便,适合小团队代码统计分析。
			当然还有其他优秀仓库代码统计工具,个人觉得不太友好的地方是需要clone下代码配合分析,不适合项目非常多的情况。
		
		开发:Python3.x
				如果项目,分支,用户很多的情况,先按照每个项目分析生产报告,后合并到一个总的excel报告中。

二.实现代码部分

**1.方法一:  先按项目分析生产单个cvs报告,再汇为一个cvs**
	#!/usr/bin/env python
# coding=utf-8
import requests
import os
import json
import threading
import datetime

"""统计的时间区间-开始日期"""
git_root_url = "http://blog.csdn.net/"
"""访问Token"""
git_token = "blog.csdn.net"
"""统计结果的存储目录"""
export_path = "./dist"
"""统计的时间区间-开始日期"""
t_from = "2021-06-01"
"""统计的时间区间-结束日期"""
t_end = "2021-07-01"
"""统计的时间区间-开始日期,datetime对象"""
date_from = datetime.datetime.strptime(t_from, '%Y-%m-%d')
"""统计的时间区间-结束日期,datetime对象"""
date_end = datetime.datetime.strptime(t_end, '%Y-%m-%d')

"""一个线程锁"""
lock = threading.RLock()

user_unknown = {}
user_email_alias_mapping = {}
user_email_name_mapping = {}


class GitlabApiCountTrueLeTrue:
    """
    Worker类
    """
    """
    所有commit的集合,用于去重。
    这里的重复,可能是代码merge造成的
    """
    total_commit_map = {}

    """
    最终的数据集合
    """
    totalMap = {}

    def get_projects(self):
        """
        获取所有仓库,并生成报告
        :return:
        """
        threads = []
        # 获取服务器上的所有仓库,每个仓库新建一个线程
        for i in range(1, 3):
            # 线上gitlab可用,问题是没有全部显示
            url = '%s/api/v4/projects' \
                  '?private_token=%s&per_page=1000&page=%d&order_by=last_activity_at' % (
                      git_root_url, git_token, i)
            r1 = requests.get(url)  # 请求url,传入header,ssl认证为false
            r2 = r1.json()  # 显示json字符串
            print(r2)
            for r3 in r2:
                value = r3['default_branch']
                last_active_time = r3['last_activity_at']
                if value is None:
                    continue
                days = date_from - \
                    datetime.datetime.strptime(
                        last_active_time, '%Y-%m-%dT%H:%M:%S.%fZ')
                # 如果project的最后更新时间比起始时间小,则continue
                if days.days > 1:
                    continue
                project_info = ProjectInfo()
                project_info.project_id = r3['id']
                project_info.name = r3['name']
                project_info.project_desc = r3['description']
                project_info.project_url = r3['web_url']
                project_info.path = r3['path']
                # 构件好线程
                t = threading.Thread(
                    target=self.get_branches, args=(r3['id'], project_info))
                threads.append(t)
        # 所有线程逐一开始
        for t in threads:
            t.start()
        # 等待所有线程结束
        for t in threads:
            t.join()
        final_commit_map = {}
        for key, project in self.totalMap.items():
            for author_email, detail in project.commit_map.items():
                exist_detail = final_commit_map.get(detail.author_email)
                if exist_detail is None:
                    final_commit_map[detail.author_email] = detail
                else:
                    exist_detail.total += detail.total
                    exist_detail.additions += detail.additions
                    exist_detail.deletions += detail.deletions
                    final_commit_map[detail.author_email] = exist_detail
        write_to_csv("%s/GitStatic_%s/%s_%s.csv" % (export_path, t_from, 'total', t_from), final_commit_map,
                     "extra")
        return

    def get_branches(self, project_id, project_info):
        """
        获取仓库的所有Branch,并汇总commit到一个map里
        :param project_id:
        :param project_info:
        :return:
        """
        print("进入线程:%d,项目id%d,%s" %
              (threading.get_ident(), project_id, project_info.project_url))
        # 线上gitlab可用,问题是没有全部显示
        url = '%s/api/v4/projects/%s/repository/branches?private_token=%s' % (
            git_root_url, project_id, git_token)

        print("start get branch list %d,url=%s" % (project_id, url))

        r1 = requests.get(url)  # 请求url,传入header,ssl认证为false
        r2 = r1.json()  # 显示json字符串
        if not r2:
            return
        # branch的map,key为branch名称,value为按照提交者email汇总的,key为email的子map集合
        branch_map = {}
        # 主动获取master分支的提交
        detail_map = self.get_commits(
            project_id, project_info.project_url, 'master')
        print("get commits finish project_id=%d branch master" % project_id)

        if detail_map:
            branch_map['master'] = detail_map
        for r3 in r2:
            branch_name = r3['name']
            if branch_name is None:
                continue
            # 如果仓库已经被Merge了,则不再处理
            if r3['merged']:
                continue
            detail_map = self.get_commits(
                project_id, project_info.project_url, branch_name)
            if not detail_map:
                continue
            # 将结果放到map里
            branch_map[branch_name] = detail_map
            print("get commits finish project_id=%d branch %s" %
                  (project_id, branch_name))

        print("all branch commits finish %d " % project_id)

        final_commit_map = {}
        # 遍历branch map,并按照提交者email进行汇总
        for key, value_map in branch_map.items():
            for author_email, detail in value_map.items():
                exist_detail = final_commit_map.get(detail.author_email)
                if exist_detail is None:
                    final_commit_map[detail.author_email] = detail
                else:
                    exist_detail.total += detail.total
                    exist_detail.additions += detail.additions
                    exist_detail.deletions += detail.deletions
                    final_commit_map[detail.author_email] = exist_detail

        if not final_commit_map:
            return

        project_info.commit_map = final_commit_map
        # 加锁
        lock.acquire()
        # 此对象会被各个线程操作
        self.totalMap[project_info.project_id] = project_info
        # 释放锁
        lock.release()

        # 汇总完毕后,将结果写入到projectID+日期的csv文件里
        write_to_csv(
            "%s/GitStatic_%s/project/%s_%d.csv" % (
                export_path, t_from, project_info.path, project_info.project_id),
            final_commit_map, project_info.project_url)

    def get_commits(self, project_id, project_url, branch_name):
        """
        获取指定仓库,指定分支的所有commits,然后遍历每一个commit获得单个branch的统计信息
        :param project_id:
        :param project_url:
        :param branch_name:
        :return:
        """
        since_date = date_from.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
        until_date = date_end.strftime('%Y-%m-%dT%H:%M:%S.%fZ')

        url = '%s/api/v4/projects/%s/repository/commits?page=1&per_page=1000&ref_name=%s&since=%s&until=%s&private_token=%s' % (
            git_root_url, project_id, branch_name, since_date, until_date, git_token)
        r1 = requests.get(url)  # 请求url,传入header,ssl认证为false
        r2 = r1.json()  # 显示json字符串
        if not r2:
            return
        print('start get_commits,projectID=%d,branch=%s,url=%s' %
              (project_id, branch_name, url))

        detail_map = {}

        for r3 in r2:
            commit_id = r3['id']
            if commit_id is None:
                continue
            # 在这里进行commit去重判断
            if self.total_commit_map.get(commit_id) is None:
                self.total_commit_map[commit_id] = commit_id
            else:
                continue
            # 这里开始获取单次提交详情
            detail = get_commit_detail(project_id, commit_id)
            if detail is None:
                continue
            if detail.total > 5000:
                # 单次提交大于5000行的代码,可能是脚手架之类生成的代码,不做处理
                continue
            # 这里和主流程无关,是用来处理commit记录里的提交者,账号不规范的问题
            if detail.author_email in user_unknown:
                print("email %s projectid= %d,branchname,%s,url=%s" % (
                    detail.author_email, project_id, branch_name, project_url))

            # 根据email纬度,统计提交数据
            exist_detail = detail_map.get(detail.author_email)
            if exist_detail is None:
                detail_map[detail.author_email] = detail
            else:
                exist_detail.total += detail.total
                exist_detail.additions += detail.additions
                exist_detail.deletions += detail.deletions
                detail_map[detail.author_email] = exist_detail
        return detail_map


def get_commit_detail(project_id, commit_id):
    """
    获取单个commit的信息
    :param project_id: 工程ID
    :param commit_id: commit的id
    :return: 返回#CommitDetails对象
    """
    url = '%s/api/v4/projects/%s/repository/commits/%s?private_token=%s' \
          % (git_root_url, project_id, commit_id, git_token)
    r1 = requests.get(url)  # 请求url,传入header,ssl认证为false
    r2 = r1.json()  # 显示json字符串
    # print(json.dumps(r2, ensure_ascii=False))
    author_name = r2['author_name']
    author_email = r2['author_email']

    stats = r2['stats']
    if 'Merge branch' in r2['title']:
        return
    if stats is None:
        return
    temp_mail = user_email_alias_mapping.get(author_email)
    if temp_mail is not None:
        author_email = temp_mail
    temp_name = user_email_name_mapping.get(author_email)
    if temp_name is not None:
        author_name = temp_name
    additions = stats['additions']
    deletions = stats['deletions']
    total = stats['total']
    # details = {'additions': additions, 'deletions': deletions, 'total': total, 'author_email': author_email,
    #            'author_name': author_name}
    details = CommitDetails()
    details.additions = additions
    details.deletions = deletions
    details.total = total
    details.author_email = author_email

    details.author_name = author_name
    return details


def make_dir_safe(file_path):
    """
    工具方法:写文件时,如果关联的目录不存在,则进行创建
    :param file_path:文件路径或者文件夹路径
    :return:
    """
    if file_path.endswith("/"):
        if not os.path.exists(file_path):
            os.makedirs(file_path)
    else:
        folder_path = file_path[0:file_path.rfind('/') + 1]
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)


def write_to_csv(file_path, final_commit_map, extra):
    """
    工具方法:将结果写入csv,从#final_commit_map参数解析业务数据
    :param file_path:文件路径
    :param final_commit_map:提交参数
    :param extra:额外数据列
    :return:
    """
    make_dir_safe(file_path)
    with open(file_path, 'w') as out:
        title = '%s,%s,%s,%s,%s,%s' % (
            "提交人邮箱", "提交人姓名", "总行数", "增加行数", "删除行数", extra)
        out.write(title + "\n")
        # print(title)
        for key, value in final_commit_map.items():
            var = '%s,%s,%s,%s,%s' % (
                value.author_email, value.author_name, value.total, value.additions, value.deletions)
            out.write(var + '\n')
            # print(var)
        out.close()


class CommitDetails(json.JSONEncoder):
    """
    提交信息的结构体
    """
    author_name = None
    author_email = None
    additions = 0
    deletions = 0
    total = 0


class ProjectInfo(json.JSONEncoder):
    """
    工程信息的结构体
    """
    project_id = None
    project_desc = None
    project_url = None
    path = None
    name = None
    commit_map = None


if __name__ == '__main__':
    gitlab4 = GitlabApiCountTrueLeTrue()
    gitlab4.get_projects()

2.方法二: 在代码中分析每个项目,直接汇总为一个cvs。

#!/usr/bin/env python
# coding=utf-8

import time
import gitlab
import collections
import pandas as pd

gl = gitlab.Gitlab('http://blog.csdn.net/', private_token='blog.csdn.net', timeout=60, api_version='4')

start_time = '2021-06-1T00:00:00Z'
end_time = '2021-07-1T23:00:00Z'


def get_gitlab():
    """
    gitlab API
    """
    list2 = []
    projects = gl.projects.list(owned=True, all=True)
    num = 0
    for project in projects:
        num += 1
        print("查看了%d个项目" % num)
        for branch in project.branches.list():
            commits = project.commits.list(all=True, query_parameters={'since': start_time, 'until': end_time,
                                                                       'ref_name': branch.name})

            for commit in commits:
                com = project.commits.get(commit.id)

                pro = {}
                try:
                    # print(project.path_with_namespace,com.author_name,com.stats["total"])
                    pro["projectName"] = project.path_with_namespace
                    pro["authorName"] = com.author_name
                    pro["branch"] = branch.name
                    pro["additions"] = com.stats["additions"]
                    pro["deletions"] = com.stats["deletions"]
                    pro["commitNum"] = com.stats["total"]
                    list2.append(pro)
                except:
                    print("有错误, 请检查")

    return list2


def data():
    """
    数据去重
    key split
    """

    ret = {}

    for ele in get_gitlab():
        key = ele["projectName"] + ele["authorName"] + ele["branch"]
        if key not in ret:
            ret[key] = ele
            ret[key]["commitTotal"] = 1
        else:
            ret[key]["additions"] += ele["additions"]
            ret[key]["deletions"] += ele["deletions"]
            ret[key]["commitNum"] += ele["commitNum"]
            ret[key]["commitTotal"] += 1

    list1 = []
    for key, v in ret.items():
        v["项目名"] = v.pop("projectName")
        v["开发者"] = v.pop("authorName")
        v["分支"] = v.pop("branch")
        v["添加代码行数"] = v.pop("additions")
        v["删除代码行数"] = v.pop("deletions")
        v["提交总行数"] = v.pop("commitNum")
        v["提交次数"] = v["commitTotal"]
        list1.append(v)
    print(list1)
    return list1


def csv(csvName):
    """
    csv
    """

    df = pd.DataFrame(data(), columns=["项目名", "开发者", "分支", "添加代码行数", "删除代码行数", "提交总行数", "提交次数"])
    df.to_csv(csvName, index=False, encoding="utf_8_sig")


if __name__ == "__main__":
    csv("./gitlab.csv")

三.效果展示

1.方法一效果:
生成报告为project目录和total汇总cvs

单个项目的统计样式

由的单个cvs汇总为一个cvs报告

2.方法二效果:

按照每个项目的不同分支来汇总为一个cvs报告

##也可以加上发送邮件功能,具体参考我博客其他文章把该模块自行加进去。

  • 12
    点赞
  • 37
    收藏
    觉得还不错? 一键收藏
  • 13
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 13
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值