# encoding=utf-8
from sklearn.feature_extraction.text import TfidfVectorizer, TfidfTransformer
corpus = [
'This This is the first document.',
'This This is the second second document.',
'And the third one.',
'Is this the first document?',
]
tfidf_model = TfidfVectorizer()
tfidf_matrix = tfidf_model.fit_transform(corpus)
word_dict=tfidf_model.get_feature_names()
print(word_dict)
print(tfidf_matrix)
实验结果:
"C:\Program Files\Anaconda3\python.exe" D:/pycharmprogram/csgwork/find_classification_keys/test_tfidfVectorizer.py
['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', 'this']
(0, 8) 0.6986804246371375
(0, 3) 0.34934021231856877
(0, 6) 0.2856085141790751
(0, 2) 0.43150466158747897
(0, 1) 0.