import math
class TFIDf:
def __init__(self):
self.doc = [
['I','love','you', 'my','dear'],
['my','god','I','like','you'],
['good','morning'],
['good','afternoon']
]
self.vocab = []
self.dict = {}
self._tf = []
self._idf = []
def _getvocab(self):
for item in self.doc:
for word in item:
self.dict[word] = self.dict.get(word,0) + 1
self.vocab = list(self.dict.keys())
def tf(self):
self._getvocab()
self._tf = []
for i in self.vocab:
self._tf.append(self.dict[i])
def idf(self):
self.tf()
for word in self.vocab:
cnt = 0
for item in self.doc:
if word in item:
cnt += 1
self._idf.append(math.log(len(self.doc) / cnt ) + 1)
def tfidf(self):
self.idf()
tf = self._tf
idf = self._idf
self.tfidf = []
for i,j in zip(tf,idf):
self.tfidf.append(i*j)
for i in range(len(self.tfidf)):
print('word',self.vocab[i],'tf',self._tf[i],'idf',self._idf[i])
x = TFIDf()
x.tfidf()