def fully_segment(text,dic):
word_list = []
for i in range(len(text)):
for j in range(i+1,len(text)+1):
word = text[i:j]
if word in dic:
word_list.append(word)
return word_list
def forward_segment(text,dic):
word_list = []
i = 0
while i < len(text):
longest_word = text[i]
for j in range(i+1,len(text)+1):
word = text[i:j]
if word in dic:
if len(word) > len(longest_word):
longest_word = word
word_list.append(longest_word)
i += len(longest_word)
return word_list
def backward_segment(text,dic):
word_list = []
i = len(text) - 1
while i >= 0:
longest_word = text[i]
for j in range(0,i):
word = text[j:i+1]
if word in dic:
if len(word) > len(longest_word):
longest_word = word
break
word_list.insert(0,longest_word)
i -= len(longest_word)
return word_list
def count_single_char(word_list:list):
return sum(1 for word in word_list if len(word) == 1)
def bidirectional_segment(text,dic):
f = forward_segment(text,dic)
b = backward_segment(text,dic)
if len(f) < len(b):
return f
elif len(f) > len(b):
return b
else:
if count_single_char(f) < count_single_char(b):
return f
else:
return b
dic=['商','商品','品','和','和服','服','服务','务',
'就','就读','读','北','北京','北京大学','京','大','大学','学',
'研究','研究生','生','生命','起源',
'欢','欢迎','新','迎新','老','老师','师生','生前','前来','来','就餐',
'项','项目','目的','的','研究',
'当','当下','下雨天','雨天','地面','积水',
'结婚','和尚','尚未','未']
while 1:
print('请输入句子:')
text=input()
print('完全切分:\n',fully_segment(text,dic))
print('最长匹配:\n',forward_segment(text,dic))
print('逆向最长匹配:\n',backward_segment(text,dic))
print('双向最长匹配:\n',bidirectional_segment(text,dic))
切分算法
最新推荐文章于 2023-08-06 15:47:31 发布