import sys, os, re
def count_words(text):
# !num !char -> ' '
text = re.sub("\W+", " ", text)
# number -> ' '
text = re.sub("[0-9]", " ", text)
# shrink whitespace
text = re.sub("\s+", " ", text)
return text.count(' ')
def getfiletext(file):
file = open(file, 'rb')
textb = file.read()
file.close()
# ignore gbk
textb = textb.decode('gbk', 'ignore').encode('utf-8')
# bytes -> str
text = bytes.decode(textb, 'utf-8')
return text
def main():
for argv in sys.argv[1:]:
#print(argv)
print (count_words(getfiletext(argv)))
if __name__ == "__main__":
main()
最近开始SSS阅读法,想看自己看过多少英文单词了,心血来潮用python写了个
主要利用 正则 替换,不是很准确,特别有乱码的时候,精度99.9%吧
反正是大概,无所谓啦