# -*- coding: utf-8 -*-
import math
class BM25(object):
def __init__(self, docs):
"""
:param docs: 分好词的list
"""
self.D = len(docs)
self.avgdl = sum([len(doc)+0.0 for doc in docs]) / self.D
self.docs = docs
self.f = []
self.df = {}
self.idf = {}
self.k1 = 1.5
self.b = 0.75
self.init()
def init(self):
for doc in self.docs:
tmp = {}
for word in doc:
if not word in tmp:
tmp[word] = 0
tmp[word] += 1
self.f.append(tmp)
for k