词频统计和词云概述
import jieba
# 分词
word_list = jieba.lcut(chapter.txt[1])
word_list[:10]
import pandas as pd
df = pd.DataFrame(word_list, columns=['word'])
df.head(20)
result = df.groupby(['word']).size()
print(type(result))
freqlist = result.sort_values(ascending=False)
freqlist.head(20)
import nltk
word_list[:10]
fdist = nltk.FreqDist(word_list) # 生成完整的词条统计词典
fdist