背景: 解决目录下多文本字数统计难题
代码如下:
import pathlib
#获取脚本路径
script_path = pathlib.PurePath(__file__).parent
#定义标点符号
punctuations = [",","。","!","?","”",","]
#获取文件列表
file_lists = [x for x in pathlib.Path(script_path).iterdir() if pathlib.PurePath(x).match("*.txt")]
def content_count(filename):
#传入参数必须为文件
if pathlib.Path(filename).is_file():
with open(filename,"r",encoding="utf-8") as fs:
content = fs.read()
words = content.rstrip()
#去除标点
words = [x for x in words if x not in punctuations]
return len(words)
sum = 0
for file in file_lists:
sum += content_count(file)
print(f"章节字数:{sum}")