def TF_IDF(docList):
result = dict()
m = len(docList)
WFD1 = 0
distion_word = set(drop_2_to_1_dim(docList))
leve1,leve2 = [],[]
for doc in docList:
n = len(doc)
tmpTF = np.array([(doc==word).sum()/n for word in distion_word])
leve1.append(tmpTF)
tmpIDF = np.array([word in doc for word in distion_word]).astype(int)
leve2.append(tmpIDF)
TF = np.array(leve1)
count_word = np.array(leve2).T.dot(np.ones(m))
IDF = np.log(m/(count_word+1))
return TF*IDF
TF-IDF 词袋
最新推荐文章于 2024-07-18 18:59:32 发布