import re
def match_sxz(noun):
return re.search('[sxz]$',noun)
def apply_sxz(noun):
return re.sub('$','es',noun)
def match_h(noun):
return re.search('[^aeioudgkprt]h',noun)
def apply_h(noun):
return re.sub('$','es',noun)
def match_y(noun):
return re.search('[^aeiou]y$',noun)
def apply_y(noun):
return re.sub('y$','ies',noun)
def match_default(noun):
return True
def apply_default(noun):
return re.sub('$','s',noun)
rules = ((match_sxz, apply_sxz),
(match_h, apply_h),
(match_y, apply_y),
(match_default, apply_default)
)
def plural(noun):
for match_rule, apply_rule in rules:
if(match_rule(noun)):
return apply_rule(noun)
print(plural('body'));
改成动态创建函数的方式
import re
def build_match_apply_function(pattern, search, replace):
def match_rule(word):
return re.search(pattern, word)
def apply_rule(word):
return re.sub(search, replace, word)
return (match_rule, apply_rule)
patterns = \
(
('[sxz]$','$','es'),
('[^aeioudgkprt]h$','$','es'),
('(qu|[^aeiou]y$)','y$','ies'),
('$','$','s'),
)
rules = [build_match_apply_function(pattern, search, replace)
for (pattern, search, replace) in patterns]
def plural(noun):
for match_rule, apply_rule in rules:
if(match_rule(noun)):
return apply_rule(noun)
print(plural('body'));
下面把规则存放到一个文件中,通过读取文件内容的方式,来完成
把规则写到plural.txt中
[sxz]$$es
[^aeioudgkprt]h$$es
[^aeiou]y$y$ies
$$s
程序代码
import re
def build_match_apply_function(pattern, search, replace):
def match_rule(word):
return re.search(pattern, word)
def apply_rule(word):
return re.sub(search, replace, word)
return (match_rule, apply_rule)
rules = []
with open('plural.txt',encoding='gb2312') as pattern_file:
for line in pattern_file:
pattern, search, replace = line.split(None, 3)
rules.append(build_match_apply_function(
pattern, search, replace))
def plural(noun):
for match_rule, apply_rule in rules:
if(match_rule(noun)):
return apply_rule(noun)
print(plural('body'));
继续改进,用生成器
import re
def build_match_apply_function(pattern, search, replace):
def match_rule(word):
return re.search(pattern, word)
def apply_rule(word):
return re.sub(search, replace, word)
return (match_rule, apply_rule)
def rules(rule_filename):
with open(rule_filename,encoding='gb2312') as pattern_file:
for line in pattern_file:
pattern, search, replace = line.split(None, 3)
yield build_match_apply_function(pattern, search, replace)
def plural(noun, rule_filename = 'plural.txt'):
for match_rule, apply_rule in rules(rule_filename):
if(match_rule(noun)):
return apply_rule(noun)
raise ValueError('no matching rule for {0}'.format(noun))
print(plural('body'));
改用迭代器读取文件,并缓存
import re
def build_match_and_apply_functions(pattern, search, replace):
def matches_rule(word):
return re.search(pattern, word)
def apply_rule(word):
return re.sub(search, replace, word)
return [matches_rule, apply_rule]
class LazyRules:
rules_filename = 'plural.txt'
def __init__(self):
self.pattern_file = open(self.rules_filename, encoding='gb2312')
self.cache = []
def __iter__(self):
self.cache_index = 0
return self
def __next__(self):
self.cache_index += 1
if len(self.cache) >= self.cache_index:
return self.cache[self.cache_index - 1]
if self.pattern_file.closed:
raise StopIteration
line = self.pattern_file.readline()
if not line:
self.pattern_file.close()
raise StopIteration
pattern, search, replace = line.split(None, 3)
funcs = build_match_and_apply_functions(
pattern, search, replace)
self.cache.append(funcs)
return funcs
rules = LazyRules()
def plural(noun):
for matches_rule, apply_rule in rules:
if matches_rule(noun):
return apply_rule(noun)
print(plural('bony'));