python最大分词_python正向最大匹配分词和逆向最大匹配分词的实例

weixin_39626586

于 2020-11-24 04:35:14 发布

阅读量138

点赞数

文章标签： python最大分词

正向最大匹配

# -*- coding:utf-8 -*-

CODEC='utf-8'

def u(s, encoding):

'converted other encoding to unicode encoding'

if isinstance(s, unicode):

return s

else:

return unicode(s, encoding)

def fwd_mm_seg(wordDict, maxLen, str):

'forward max match segment'

wordList = []

segStr = str

segStrLen = len(segStr)

for word in wordDict:

print 'word: ', word

print "\n"

while segStrLen > 0:

if segStrLen > maxLen:

wordLen = maxLen

else:

wordLen = segStrLen

subStr = segStr[0:wordLen]

print "subStr: ", subStr

while wordLen > 1:

if subStr in wordDict:

print "subStr1: %r" % subStr

break

else:

print "subStr2: %r" % subStr

wordLen = wordLen - 1

subStr = subStr[0:wordLen]

# print "subStr3: ", subStr

wordList.append(subStr)

segStr = segStr[wordLen:]

segStrLen = segStrLen - wordLen

for wordstr in wordList:

print "wordstr: ", wordstr

return wordList

def main():

fp_dict = open('words.dic')

wordDict = {}

for eachWord in fp_dict:

wordDict[u(eachWord.strip(), 'utf-8')] = 1

segStr = u'你好世界hello world'

print segStr

wordList = fwd_mm_seg(wordDict, 10, segStr)

print "==".join(wordList)

if __name__ == '__main__':

main()

逆向最大匹配

# -*- coding:utf-8 -*-

def u(s, encoding):

'converted other encoding to unicode encoding'

if isinstance(s, unicode):

return s

else:

return unicode(s, encoding)

CODEC='utf-8'

def bwd_mm_seg(wordDict, maxLen, str):

'forward max match segment'

wordList = []

segStr = str

segStrLen = len(segStr)

for word in wordDict:

print 'word: ', word

print "\n"

while segStrLen > 0:

if segStrLen > maxLen:

wordLen = maxLen

else:

wordLen = segStrLen

subStr = segStr[-wordLen:None]

print "subStr: ", subStr

while wordLen > 1:

if subStr in wordDict:

print "subStr1: %r" % subStr

break

else:

print "subStr2: %r" % subStr

wordLen = wordLen - 1

subStr = subStr[-wordLen:None]

# print "subStr3: ", subStr

wordList.append(subStr)

segStr = segStr[0: -wordLen]

segStrLen = segStrLen - wordLen

wordList.reverse()

for wordstr in wordList:

print "wordstr: ", wordstr

return wordList

def main():

fp_dict = open('words.dic')

wordDict = {}

for eachWord in fp_dict:

wordDict[u(eachWord.strip(), 'utf-8')] = 1

segStr = ur'你好世界hello world'

print segStr

wordList = bwd_mm_seg(wordDict, 10, segStr)

print "==".join(wordList)

if __name__ == '__main__':

main()

以上这篇python正向最大匹配分词和逆向最大匹配分词的实例就是小编分享给大家的全部内容了，希望能给大家一个参考，也希望大家多多支持我们。

本文标题: python正向最大匹配分词和逆向最大匹配分词的实例

本文地址: http://www.cppcns.com/jiaoben/python/244616.html

weixin_39626586

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python最大分词_python正向最大匹配分词和逆向最大匹配分词的实例

正向最大匹配# -*- coding:utf-8 -*-CODEC='utf-8'def u(s, encoding):'converted other encoding to unicode encoding'if isinstance(s, unicode):return selse:return unicode(s, encoding)def fwd_mm_seg(wordDict, max...
复制链接

扫一扫

评论

被折叠的条评论为什么被折叠?

到【灌水乐园】发言

查看更多评论

添加红包

成就一亿技术人!

hope_wisdom

发出的红包

实付元

使用余额支付

点击重新获取

扫码支付

钱包余额 0

抵扣说明：

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、付费专栏及课程。