Python实现一个代码行数统计工具（易拓展到其他语言版）

-undefined--

已于 2022-06-21 02:51:48 修改

阅读量612

点赞数 4

文章标签： python c++ ruby

于 2022-06-21 02:50:36 首次发布

本文链接：https://blog.csdn.net/cwk240987054/article/details/125383682

版权

前情提要及单种语言的代码行统计思路，详见我另一篇文章：Python实现一个代码行数统计工具（以C/C++为例）
关于如何达到容易拓展到其他语言的效果，我想到的方法是用列表将不同语言的续行符、注释符、字符串符存起来，靠文件后缀来确定该用哪种符号。这只是我自己的一种思路，但感觉思路很一般，有想法的读者们可以进一步的优化。
具体代码如下：

from queue import Empty
import sys
import os
from enum import Enum
import time
from unittest.mock import patch
import threading

class Counter:

    Line_numbers = 0
    Code = 0
    total_comment_numbers = 0
    Blanks = 0
    # -------------------------------------------------------------------------
    # 扩展到其他语言代码统计时需要维护的代码块
    extendList = [".h", ".cpp", ".hpp", ".rb"] # 不同语言代码文件扩展名列表
    # -------------------------------------------------------------------------
    def get_filelist(dir, Filelist):
        # 获取当前目录下所有子目录及所有cpp文件
        newDir = dir
        dirList = []
        
        if os.path.isfile(dir) and os.path.splitext(dir)[1] in Counter.extendList:
            #文件扩展名属于列表中其中一种时，文件路径添加到filelist中
            Filelist.append(dir)
        
        # 路径为目录时，遍历目录下的所有文件和目录
        elif os.path.isdir(dir):
            for s in os.listdir(dir):
                newDir=os.path.join(dir, s)
                Counter.get_filelist(newDir, Filelist)
            
        return Filelist


    def CodeCounter(filename, path):
        codes_numbers = 0
        empty = 0
        comment_numbers = 0
        
        extendIdx =  Counter.extendList.index(os.path.splitext(filename)[1])
        
        # -------------------------------------------------------------------------
        # 扩展到其他语言代码统计时需要维护的代码块，各标志符列表下标与源代码文件后缀列表(Counter.extendList)下标一致
        
        LineCommentSymbol = ["//", "//", "//", "#"] # 进入行注释标识
        BlockCommentSymbol = ["/*", "/*", "/*", "=begin"] # 进入块注释标识
        ExitBlockCommentSymbol = ["*/", "*/", "*/", "=end"] # 退出块注释标识
        CharStringSymbol = [["\""], ["\""], ["\""], ["\"", "\'"]] # 进入字符串标识
        CombinationSymbol = ["\\", "\\", "\\", "\\"] # 续行符标识
        # -------------------------------------------------------------------------
        
        # 打开文件并获取所有行
        fp = open(filename, encoding = 'gbk', errors = 'ignore')
        lines = fp.readlines()
        
        row_cur_status = Status.Common # 设置初始状态为Common
        temp = ""
        
        for line in lines:
            line = temp + line
            line = line.strip("\r\t ")
            if line[-len(CombinationSymbol[extendIdx]) - 1:] == CombinationSymbol[extendIdx]: # 检查末尾是否有续行符，若有续行符，则保存当前line值，准备与下一行进行拼接
                temp += line[:-len(CombinationSymbol[extendIdx]) - 1]
                continue
            else:
                temp = ""
            
            lineLen = len(line)
            
            if lineLen == 1 and line == '\n':
                #空行，空行数量+1
                empty += 1
                # f.write('\n')
                continue
            
            skipStep = 0 # 需要跳过的字符数，用于跳过一些符号，例如遇到//时进入行注释状态，跳过到//后面第一个字符
            is_effective_code = False # 有效代码行标识
            
            for i in range(lineLen):
                
                if skipStep != 0:
                    skipStep -= 1
                    continue
                
                if row_cur_status == Status.Common:
                    # 普通状态下
                    
                    for symbol in CharStringSymbol[extendIdx]:
                        #检查是否进入字符串模式
                        symbolLen = len(symbol)
                        if(i + symbolLen > lineLen):
                            continue
                        if line[i:i + symbolLen] == symbol:
                            is_effective_code = True
                            row_cur_status = Status.CharString # 切换到字符串状态
                            CharStringStart = symbol # 记录字符串开始时的标识符，用于判断后续退出位置
                            break
                        
                    if row_cur_status != Status.Common:
                        continue
                    
                    # 检查是否进入行注释状态
                    symbol = LineCommentSymbol[extendIdx]
                    symbolLen = len(symbol)
                    if (i + symbolLen <= lineLen) and line[i:i + symbolLen] == symbol:
                        row_cur_status = Status.LineComment # 切换到行注释状态
                        skipStep = symbolLen - 1
                        continue
                        
                    # 检查是否进入块注释状态
                    symbol = BlockCommentSymbol[extendIdx]
                    symbolLen = len(symbol)
                    if (i + symbolLen <= lineLen) and line[i:i + symbolLen] == symbol:
                        row_cur_status = Status.BlockComments # 切换到块注释状态
                        skipStep = symbolLen - 1
                        continue
                    
                    if line[i] == '\n':
                        continue
                    if line[i] == ' ':
                        continue
                    else:
                        is_effective_code = True # 代码行有效
                        continue
                    
                elif row_cur_status == Status.CharString:
                    #字符串状态下
                    if line[i:i + len(CharStringStart)] == CharStringStart:
                        row_cur_status = Status.Common # 字符串结束，切换回普通状态 
                        skipStep = len(CharStringStart) - 1
                        # is_effective_code = True
                        continue
                    else:
                        continue
                    
                elif row_cur_status == Status.BlockComments:
                    # 块注释状态下
                    symbol = ExitBlockCommentSymbol[extendIdx]
                    symbolLen = len(symbol)
                    if (i + symbolLen <= lineLen) and line[i:i + symbolLen] == symbol:
                        # 退出块注释，注释行加上块注释的最后一行，切换回普通状态  
                        comment_numbers += 1
                        row_cur_status = Status.Common
                        skipStep = symbolLen - 1
                        continue
                    else:
                        continue

            
            # 单行遍历结束后，以当前状态记录行数
            # 代码行有效，有效代码行数+1
            if is_effective_code == True:
                codes_numbers += 1
            
            # 当前状态为块注释或行注释状态下，注释代码行数+1
            if row_cur_status in (Status.BlockComments, Status.LineComment):
                comment_numbers += 1
            
            # 当前状态不为块注释时，进入下一行前，初始化当前状态
            if row_cur_status != Status.BlockComments:
                row_cur_status = Status.Common
        
        total = len(lines)        
        
        if(lines[-1][-1] == '\n'):
            total += 1
            empty += 1
            

        fp.close()
        # f.close()
        
        print("file:{0} total:{1} empty:{2} effective:{3} comment:{4}               effective:{3}".format(filename.replace(path + "\\", ""), total, empty, codes_numbers, comment_numbers))
        
        Counter.Line_numbers += total
        Counter.Blanks += empty
        Counter.Code += codes_numbers
        Counter.total_comment_numbers += comment_numbers
        


if __name__ == "__main__":
    path = os.path.abspath(sys.argv[1]) #获取命令行输入的文件夹绝对路径
    # path = r"C:\Users\Undefined\Desktop\test\Osiris"
    list = Counter.get_filelist(path, [])
    threads = []
    
    # 将可能遇到的情况枚举
    # Common:表示普通状态 
    # CharString:表示字符串状态 
    # LineComment:表示行注释状态 
    # BlockComments:表示块注释状态 
    Status = Enum('Status','Init Common CharString LineComment BlockComments')
    
    for file in  list:
        t = threading.Thread(target=Counter.CodeCounter,args=(file, path))
        threads.append(t)
        

    for thr in threads:
    thr.start()

    for the in threads:    
    thr.join()

    time.sleep(0.1)
    
    print("-"*56)
    print("- {0:<10} {1:<10} {2:<10} {3:<10} {4:<10}".format("Files", "Lines", "Code", "Comments", "Blanks"))
    print("-"*56)
    print("  {0:<10} {1:<10} {2:<10} {3:<10} {4:<10}".format(len(list), Counter.Line_numbers, Counter.Code, Counter.total_comment_numbers, Counter.Blanks))
    print("-"*56)

若文中有错误，还请大佬在评论区指正，我会好好学习和改进，谢谢大佬们🙇

-undefined--

关注

4
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Python实现一个代码行数统计工具（易拓展到其他语言版）

关于如何达到容易拓展到其他语言的效果，我想到的方法是用列表将不同语言的续行符、注释符、字符串符存起来，靠文件后缀来确定该用哪种符号。这只是我自己的一种思路，但感觉思路很一般，有想法的读者们可以进一步的优化。若文中有错误，还请大佬在评论区指正，我会好好学习和改进，谢谢大佬们🙇......
复制链接

扫一扫