维特比算法是一种动态规划算法,也是递归算法
本质上来说,就是寻找最短步骤的一种算法
NLP里常常用来分词并且保留语意对比
这里做了修改,尽可能用循环替代了递归部分
word_vector = list(word_dict)
def search_prob(word,word_dict=word_dict):
try:
return word_dict[word]
except Exception:
return 10**(-8)
def check_dict(word):
return word in word_vector
def slide_word(strings,start,end):
result_index,result_check = [],[]
for i in range(start,end+1):
word = strings[start:i]
check = check_dict(word)
if check:
result_check.append(check)
result_index = (start,i)
return len(result_index) > 0 and result_index or False
slide_word(example,3,7)
def Viterbi(string,word_dict,opcode=[10**-8],word_split=[],prob = 0):
t ,m = 0,len(string)
L,R , result ,testlog = [],[],[],[]
disposal_data = string
for i in range(m+1):
test = slide_word(string,i,m)
if test:
t+=1
l , r = test
L.append(l)
R.append(r)
if t == 1:
word = string[l:r]
prob += search_prob(word)
disposal_data = disposal_data.replace(word,"/")
result.append(word)
elif l in R:
word = string[l:r]
prob += search_prob(word)
disposal_data = disposal_data.replace(word,"/")
result.append(word)
check_symbols = len(set(disposal_data))
opcode.append(check_symbols)
word_split = word_split + result
if opcode[-1]-opcode[-2]==0:
expr = prob+10**(-8)*len(disposal_data.replace('/',''))
return expr,word_split+list(filter(lambda x : x !='' ,disposal_data.split('/')))
return Viterbi(disposal_data,word_dict,opcode,word_split,prob)
Viterbi(example,word_dict)