首先了解一下tf-idf,通过一段小代码
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
corpus = [
'This is the first document.',
'This document is the second document.',
'And this is the third one.',
'Is this the first document?',
]
vectorizer = TfidfVectorizer()
x= vectorizer.fit_transform(corpus)
a=x.toarray()
print(type(x))
print(type(a))
输出结果:
<class 'scipy.sparse.csr.csr_matrix'> <class 'numpy.ndarray'>
import numpy as np
import pandas as pd
import jieba
import re
np.random.seed(1337) # for reproducibility
from keras.datasets import mnist
from keras.utils import np_utils
from keras.utils import to_categorical
from kera