“即时标记”

项目需求和原型设计来自于: 《Python基础教程》第20章

类图:


相对于原版的代码,主要是把“Filter”的功能从Handler中独立出来了,使得功能模块更加清晰一点。

代码如下:

#encoding=utf-8
#by panda

import re

class filter():
    '''为特定文本添加修饰性标签'''
    patterns = {
    'emphasis' : r'\*(.+?)\*',
    'url' : r'(http://[\.a-zA-Z/]+)',
    'mail' : r'([\.a-zA-Z]+@[\.a-zA-Z]+[a-zA-Z]+)',
    }    
    def __init__(self, name):
        self.name = name
        self.pattern = self.patterns[self.name] 
        self.actionFunc = lambda matchobj: self.callback('sub_', name, matchobj) or matchobj.group(0)
    
    def action(self, block):
        return re.sub(self.pattern, self.actionFunc, block)
    
    def callback(self, prefix, name, *args):
        method = getattr(self, prefix+name, None)
        if(callable(method)):
            return method(*args)
        else:
            return None
        
    def sub_emphasis(self, match):
        return '<em>%s</em>' % match.group(1)    
        
    def sub_url(self, match):
        return '<a href="%s">%s</a>' % (match.group(1), match.group(1))
    
    def sub_mail(self,match):
        return '<a href="mailto:%s">%s</a>' % (match.group(1), match.group(1))
    
class Rule():
    '''为文本块添加标签'''
    def __init__(self, handler):
        self.handler = handler
        
    def conditon(self,block):
        self.block = block
        return False
    
    def action(self):
        result = self.handler.start(self.type)
        result += self.handler.feed(self.block)
        result += self.handler.end(self.type)
        print result
        return result        
    
class TitleRule(Rule):
    """
    The title is the first block in the document, provided that it is
    a heading.
    """
    type = 'title'
    first = True
    def condition(self, block):
        Rule.conditon(self,block)
        if not self.first: return False
        self.first = False
        return not '\n' in block and len(block) <= 70 and not block[-1] == ':'

class ParagraphRule(Rule):
    """
    A paragraph is simply a block that isn't covered by any of the
    other rules.
    """
    type = 'paragraph'
    def condition(self, block):
        Rule.conditon(self,block)
        return True
    
class Handler():
    def callback(self, prefix, name,*args):
        method = getattr(self, prefix+name, None)
        if callable(method):
            return method(*args)
    
    def start(self, name):
        return self.callback('start_', name)
        
    def end(self, name):
        return self.callback('end_', name)    
    
class HTMLRenderer(Handler):
    def start_document(self):
        return unicode('<html><head><title>文本标记结果</title></head><body>', 'utf-8').encode('gbk')
    def end_document(self):
        return '</body></html>'
    def start_paragraph(self):
        return '<p>'
    def end_paragraph(self):
        return '</p>'
    def start_heading(self):
        return '<h2>'
    def end_heading(self):
        return '</h2>'
    def start_list(self):
        return '<ul>'
    def end_list(self):
        return '</ul>'
    def start_listitem(self):
        return '<li>'
    def end_listitem(self):
        return '</li>'
    def start_title(self):
        return '<h1>'
    def end_title(self):
        return '</h1>'
    def feed(self,block):
        return block
        
class Parser():
    '''文本分析器'''
    def __init__(self):
        self.filters = []
        self.rules = []
    
    def parse(self, content):
        result = []        
        for block in content:
            block = block.strip()
            if (len(block) == 0):
                continue;
            for filter in self.filters:
                block = filter.action(block)
                    
            for rule in self.rules:
                if rule.condition(block):
                    last = rule.action()
                    if last:
                        block = last
                        break;
            result.append(block)
        return ''.join(result)
                
    def addRule(self,rule):
        self.rules.append(rule)
    
    def addFilter(self,name):
        self.filters.append(filter(name))
    
class BasicTextParser(Parser):
    def __init__(self,handler):
        Parser.__init__(self)
        self.handler = handler
        self.addFilter('emphasis')
        self.addFilter('url')
        self.addFilter('mail')
        
        self.addRule(TitleRule(handler))
        self.addRule(ParagraphRule(handler))
        
    def parse(self, content):
        result = []
        result.append(self.handler.start('document'))
        result.append(Parser.parse(self,content))
        result.append(self.handler.end('document'))
        return ''.join(result)
        
    
#import sys
if __name__ == '__main__':
    handler = HTMLRenderer()
    parser = BasicTextParser(handler)
    f = open('test_input.txt')
    f2 = open('test.html','w')
#    p.parse(sys.stdin)
    output = parser.parse(f)
    print output
    f2.write(output)
    f2.close()
    f.close()


评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值