第一关
def cutA(sentence, dictA):
# sentence:要分词的句子
#dictA:机器词典
result = [] #存放分好的词
sentenceLen = len(sentence) #待分词字段长度
n = 0
maxDictA = max([len(word) for word in dictA]) #取词典中最长词条的字符长度
while sentenceLen>0:
maxCutLen = min(maxDictA,sentenceLen) #如果当前字段长度小于词典最长字段长度时,取当前字段长度进行划分
sub_sen = sentence[0:maxCutLen]
while maxCutLen>0:
if sub_sen in dictA:
result.append(sub_sen)
break
elif len(sub_sen)==1:
#长度为1说明词典中并无此词,则直接放入result中
result.append(sub_sen)
break
else:
#否则,删去最后一个字,重新操作
maxCutLen-=1
sub_sen = sub_sen[0:maxCutLen]
#更新当前未匹配字段及其长度
sentence = sentence[maxCutLen:]
sentenceLen -=maxCutLen
print(result) # 输出分词结果
第二
def cutB(sentence,dictB):
result = []
sentenceLen = len(sentence)
maxDictB = max([len(word) for word in dictB])
while sentenceLen>0:
maxCutLen = min(sentenceLen,maxDictB)
sub_sen = sentence[-maxCutLen:]
while maxCutLen>0:
if sub_sen in dictB:
result.append(sub_sen)
break
elif len(sub_sen)==1:
result.append(sub_sen)
break
else:
maxCutLen-=1
sub_sen = sub_sen[-maxCutLen:]
sentence = sentence[0:-maxCutLen]
sentenceLen -= maxCutLen
print(result[::-1],end="")
第三
class BiMM():
def __init__(self):
self.window_size = 3 # 字典中最长词数
def MMseg(self, text, dict): # 正向最大匹配算法
result = []
index = 0
text_length = len(text)
while text_length > index:
for size in range(self.window_size + index, index, -1):
piece = text[index:size]
if piece in dict:
index = size - 1
break
index += 1
result.append(piece)
return result
def RMMseg(self, text, dict): # 逆向最大匹配算法
result = []
index = len(text)
while index > 0:
for size in range(index - self.window_size, index):
piece = text[size:index]
if piece in dict:
index = size + 1
break
index = index - 1
result.append(piece)
result.reverse()
return result
#r1和r2分别为正向最大匹配法和逆向最大匹配法的切割列表
def main(self, text, r1, r2):
if len(r1) > len(r2):
print(r2,end="")
elif len(r1) < len(r2):
print(r1,end="")
else:
num1 = len(list(filter(lambda s: isinstance(s, str) and len(s) == 1, r1))) # filter()用于过滤,提取列表中长度为1的字符
num2 = len(list(filter(lambda s: isinstance(s, str) and len(s) == 1, r2)))
if num1 == num2:
print(r1,end="")
elif num1 > num2:
print(r2,end="")
else:
print(r1,end="")