docList
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
# 第一步:将词转为TF-IDF的特征矩阵
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(docList)
# 查看向量
tfidf_matrix.toarray()
# 查看词
tfidf_vectorizer.get_feature_names()