def getText(text):
text = text.lower()
for ch in ",.;?-\'":
text = text.replace(ch,"")
return text
def wordFreg(text,topn):
words = text.split()
counts ={}
for word in words:
counts[word] = counts.get(word,0) + 1
excludes = {'the','and','to','of','be','a','it','is','not','but'}
for word in excludes:
del(counts[word])
items = list(counts.items())
return items[:topn]
#编写主程序, 调用函数
try:
with open(r"C:\Users\Dieter.DING\Desktop\新建文件夹\ne.txt",'r',encoding='utf-8-sig') as f:
text = f.read()
text = getText(text)
freqs = wordFreg(text, 10)
except IOError:
print("文本不存在,请先创建!\n")
else:
try:
with open(r"wordcounts.txt", 'w',encoding='utf-8') as fileFreq:
items = [word + '\t' + str(freq) + '\n' for word,freq in freqs]
fileFreq.writelines(items)
except IOError:
print("写入文件错误")
for word,freq in freqs:
print("{:<10}{:>}".format(word,freq))