11万余字数的《瓦尔登湖》
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# -*- coding: utf-8 -*-
# @Author : ZhuJD
# @FILE : TestData_3.py
# @Time : 2019/12/29 10:43
# @Software: PyCharm
# 文件操作
f = open('Walden.txt', 'r')
text = f.read()
# text = f.read(100) # 限定读取字数
text_lines = f.readlines()
f.close()
print(text)
# print(text_lines)
# 统计词频
import re
f = open('Walden.txt', 'r')
text = f.read()
f.close()
txt = text.lower()
txt = re.sub('[,.?/!":\'-]', '', txt) # 去除小说中的标点符号
words = txt.split()
word_sq = {}
for i in words:
if i not in word_sq.keys():
wo