#-*- coding:utf-8 -*- import re import collections def words(text): return re.findall('[a-z]+',text.lower())#筛选 def train(features): model=collections.defaultdict(lambda :1)#model里面没有存在的词默认都返回1 for f in features:#打印出每个单词出现的次数 model[f]+=1 return model nw=train(words(open(r'E:\python人工智能\tb机器学习\01丶机器学习\唐宇迪-机器学习课程\机器学习算法配套案例实战\贝叶斯-拼写检查器\big.txt').read())) alphabet = 'abcdefghijklmnopqrstuvwxyz' #print(nw) def editsl(word):#对输入的单词进行删除 替换 交换 已经插入 n=len(word) return set([word[0:i] + word[i + 1:] for i in range(n)] + # deletion [word[0:i] + word[i + 1] + word[i] + word[i + 2:] for i in range(n - 1)] + # transposition [word[0:i] + c + word[i + 1:] for i in range(n) for c in alphabet] + # alteration [word[0:i] + c + word[i:] for i in range(n + 1) for c in alphabet]) # insertion def edits2(word): return set(e2 for el in editsl(word) for e2 in editsl(el))#返回那些距离为2的正确的拼写的单词 def known(words):#判断该单词是不是在该文件中 如果在返回出来 return set(w for w in words if w in nw)# def corret(word): candidats=known([word]) or known(editsl(word))or edits2(word)or [word] return max(candidats,key=lambda w:nw[w]) c=corret("lovv")#这里是程序入口 print(c)
错误单词的纠正实例 朴素的贝叶斯算法
最新推荐文章于 2023-03-27 18:51:48 发布