获取文本段前二十个高频词
import csv
import jieba
import sklearn
from sklearn.feature_extraction.text import CountVectorizer
file=open("/Users/yin/Desktop/词频/comment.csv")
reader= csv.reader(file)
data=[]
for info in reader:
data.append(info)
word=[]
for row in data:
text=row[0]
ret = jieba.lcut(text)
ret=";".join(ret)
word.append(ret)
vect = CountVectorizer(max_features=20)
X=vect.fit_transform(word)
keywords=vect.get_feature_names()
print(keywords)