前情提要及单种语言的代码行统计思路,详见我另一篇文章:Python实现一个代码行数统计工具(以C/C++为例)
关于如何达到容易拓展到其他语言的效果,我想到的方法是用列表将不同语言的续行符、注释符、字符串符存起来,靠文件后缀来确定该用哪种符号。这只是我自己的一种思路,但感觉思路很一般,有想法的读者们可以进一步的优化。
具体代码如下:
from queue import Empty
import sys
import os
from enum import Enum
import time
from unittest.mock import patch
import threading
class Counter:
Line_numbers = 0
Code = 0
total_comment_numbers = 0
Blanks = 0
# -------------------------------------------------------------------------
# 扩展到其他语言代码统计时需要维护的代码块
extendList = [".h", ".cpp", ".hpp", ".rb"] # 不同语言代码文件扩展名列表
# -------------------------------------------------------------------------
def get_filelist(dir, Filelist):
# 获取当前目录下所有子目录及所有cpp文件
newDir = dir
dirList = []
if os.path.isfile(dir) and os.path.splitext(dir)[1] in Counter.extendList:
#文件扩展名属于列表中其中一种时,文件路径添加到filelist中
Filelist.append(dir)
# 路径为目录时,遍历目录下的所有文件和目录
elif os.path.isdir(dir):
for s in os.listdir(dir):
newDir=os.path.join(dir, s)
Counter.get_filelist(newDir, Filelist)
return Filelist
def CodeCounter(filename, path):
codes_numbers = 0
empty = 0
comment_numbers = 0
extendIdx = Counter.extendList.index(os.path.splitext(filename)[1])
# -------------------------------------------------------------------------
# 扩展到其他语言代码统计时需要维护的代码块,各标志符列表下标与源代码文件后缀列表(Counter.extendList)下标一致
LineCommentSymbol = ["//", "//", "//", "#"] # 进入行注释标识
BlockCommentSymbol = ["/*", "/*", "/*", "=begin"] # 进入块注释标识
ExitBlockCommentSymbol = ["*/", "*/", "*/", "=end"] # 退出块注释标识
CharStringSymbol = [["\""], ["\""], ["\""], ["\"", "\'"]] # 进入字符串标识
CombinationSymbol = ["\\", "\\", "\\", "\\"] # 续行符标识
# -------------------------------------------------------------------------
# 打开文件并获取所有行
fp = open(filename, encoding = 'gbk', errors = 'ignore')
lines = fp.readlines()
row_cur_status = Status.Common # 设置初始状态为Common
temp = ""
for line in lines:
line = temp + line
line = line.strip("\r\t ")
if line[-len(CombinationSymbol[extendIdx]) - 1:] == CombinationSymbol[extendIdx]: # 检查末尾是否有续行符,若有续行符,则保存当前line值,准备与下一行进行拼接
temp += line[:-len(CombinationSymbol[extendIdx]) - 1]
continue
else:
temp = ""
lineLen = len(line)
if lineLen == 1 and line == '\n':
#空行,空行数量+1
empty += 1
# f.write('\n')
continue
skipStep = 0 # 需要跳过的字符数,用于跳过一些符号,例如遇到//时进入行注释状态,跳过到//后面第一个字符
is_effective_code = False # 有效代码行标识
for i in range(lineLen):
if skipStep != 0:
skipStep -= 1
continue
if row_cur_status == Status.Common:
# 普通状态下
for symbol in CharStringSymbol[extendIdx]:
#检查是否进入字符串模式
symbolLen = len(symbol)
if(i + symbolLen > lineLen):
continue
if line[i:i + symbolLen] == symbol:
is_effective_code = True
row_cur_status = Status.CharString # 切换到字符串状态
CharStringStart = symbol # 记录字符串开始时的标识符,用于判断后续退出位置
break
if row_cur_status != Status.Common:
continue
# 检查是否进入行注释状态
symbol = LineCommentSymbol[extendIdx]
symbolLen = len(symbol)
if (i + symbolLen <= lineLen) and line[i:i + symbolLen] == symbol:
row_cur_status = Status.LineComment # 切换到行注释状态
skipStep = symbolLen - 1
continue
# 检查是否进入块注释状态
symbol = BlockCommentSymbol[extendIdx]
symbolLen = len(symbol)
if (i + symbolLen <= lineLen) and line[i:i + symbolLen] == symbol:
row_cur_status = Status.BlockComments # 切换到块注释状态
skipStep = symbolLen - 1
continue
if line[i] == '\n':
continue
if line[i] == ' ':
continue
else:
is_effective_code = True # 代码行有效
continue
elif row_cur_status == Status.CharString:
#字符串状态下
if line[i:i + len(CharStringStart)] == CharStringStart:
row_cur_status = Status.Common # 字符串结束,切换回普通状态
skipStep = len(CharStringStart) - 1
# is_effective_code = True
continue
else:
continue
elif row_cur_status == Status.BlockComments:
# 块注释状态下
symbol = ExitBlockCommentSymbol[extendIdx]
symbolLen = len(symbol)
if (i + symbolLen <= lineLen) and line[i:i + symbolLen] == symbol:
# 退出块注释,注释行加上块注释的最后一行,切换回普通状态
comment_numbers += 1
row_cur_status = Status.Common
skipStep = symbolLen - 1
continue
else:
continue
# 单行遍历结束后,以当前状态记录行数
# 代码行有效,有效代码行数+1
if is_effective_code == True:
codes_numbers += 1
# 当前状态为块注释或行注释状态下,注释代码行数+1
if row_cur_status in (Status.BlockComments, Status.LineComment):
comment_numbers += 1
# 当前状态不为块注释时,进入下一行前,初始化当前状态
if row_cur_status != Status.BlockComments:
row_cur_status = Status.Common
total = len(lines)
if(lines[-1][-1] == '\n'):
total += 1
empty += 1
fp.close()
# f.close()
print("file:{0} total:{1} empty:{2} effective:{3} comment:{4} effective:{3}".format(filename.replace(path + "\\", ""), total, empty, codes_numbers, comment_numbers))
Counter.Line_numbers += total
Counter.Blanks += empty
Counter.Code += codes_numbers
Counter.total_comment_numbers += comment_numbers
if __name__ == "__main__":
path = os.path.abspath(sys.argv[1]) #获取命令行输入的文件夹绝对路径
# path = r"C:\Users\Undefined\Desktop\test\Osiris"
list = Counter.get_filelist(path, [])
threads = []
# 将可能遇到的情况枚举
# Common:表示普通状态
# CharString:表示字符串状态
# LineComment:表示行注释状态
# BlockComments:表示块注释状态
Status = Enum('Status','Init Common CharString LineComment BlockComments')
for file in list:
t = threading.Thread(target=Counter.CodeCounter,args=(file, path))
threads.append(t)
for thr in threads:
thr.start()
for the in threads:
thr.join()
time.sleep(0.1)
print("-"*56)
print("- {0:<10} {1:<10} {2:<10} {3:<10} {4:<10}".format("Files", "Lines", "Code", "Comments", "Blanks"))
print("-"*56)
print(" {0:<10} {1:<10} {2:<10} {3:<10} {4:<10}".format(len(list), Counter.Line_numbers, Counter.Code, Counter.total_comment_numbers, Counter.Blanks))
print("-"*56)
若文中有错误,还请大佬在评论区指正,我会好好学习和改进,谢谢大佬们🙇