import sklearn
from sklearn.feature_extraction.text import CountVectorizer
vector = CountVectorizer()
res = vector.fit_transform(["life is is short,I like python","life is long,I dislike python"])
print(vector.get_feature_names())
print(res.toarray())
C:\Python38\python.exe D:/Project/Study/python/machine/test.py
['dislike', 'is', 'life', 'like', 'long', 'python', 'short']
[[0 2 1 1 0 1 1]
[1 1 1 0 1 1 0]]
import sklearn
from sklearn.feature_extraction.text import CountVectorizer
vector = CountVectorizer()
res = vector.fit_transform(["人生 苦 短,我喜欢 python","人生 漫长,不 喜欢python"])
print(vector.get_feature_names())
print(res.toarray())
C:\Python38\python.exe D:/Project/Study/python/machine/test.py
['python', '人生', '喜欢python', '我喜欢', '漫长']
[[1 1 0 1 0]
[0 1 1 0 1]]