- sklearn CountVectorizer函数详解
from sklearn.feature_extraction.text import CountVectorizer
texts=["dog cat fish","dog cat cat","fish bird", 'bird']
cv = CountVectorizer()
cv_fit=cv.fit_transform(texts)
print(cv.get_feature_names())
print(cv_fit.toarray())
print(cv_fit)
- 返回的结果为稀疏矩阵
['bird', 'cat', 'dog', 'fish']
[[0 1 1 1]
[0 2 1 0]
[1 0 0 1]
[1 0 0 0]]
(0,