Markup, mark down_python markuppy-CSDN博客

本文链接：https://blog.csdn.net/u011410413/article/details/54319038

关于用python做文本文件的html markup项目。终于凭反复阅读体会实验代码，了解到了其中的一些实质。

话说，初学学python真的好么，太多东西都被抽象化了，只能说用起来容易，小改动容易吧。如果上手build from scratch 用python还真挺想哭。初学那种面向过程编程的语言，就会知道的比较实在。

不过也可能因为我个人被初学C给蒙蔽了双眼。
2017-04-03: Django里面看到一句话··”You should know basic math before you start using a calculator.”

下面介绍下入门版markup project。

#markup.py
import sys,re
from handlers import *
from util import *
from rules import *

class Parser:

    def __init__(self,handler):
        self.handler = handler
        self.rules = []
        self.filters = []

    def addRule(self,rule):
        self.rules.append(rule)

    def addFilter(self,pattern,name):

        def filter(block,handler):
            return re.sub(pattern,handler.sub(name),block)
        self.filters.append(filter)

    def parse(self,file):

        self.handler.start('document')
        for block in blocks(file):
            for filter in self.filters:
                block = filter(block,self.handler)#4.进到上面的def filter, 又被传到handlers.py里面的sub，从中获得到HTMLRenderer里面的替代后的字符串。 

            for rule in self.rules:#过每个rule

                if rule.condition(block):#某condition满足
                    last = rule.action(block,self.handler) #用对应rule的action,如果那个def 某rule里面没有，就用rules.py 一上来那个action。这时候，反正type也有了（从满足的条件中得到了rule.type）。
                    if last: break

        self.handler.end('document')

class BasicTextParser(Parser):

    def __init__(self,handler):
        Parser.__init__(self,handler)#1.初始化了才能用addRule这些个东西。
        self.addRule(ListRule())
        self.addRule(ListItemRule())
        self.addRule(TitleRule())
        self.addRule(HeadingRule())
        self.addRule(ParagraphRule())

        self.addFilter(r'\*(.+?)\*','emphasis')
        self.addFilter(r'\b[A-Z]{2,}\b','emphasisAlpha') #From further exploration point 2, words of all characters in uppercase
        self.addFilter(r'(http://[\.a-zA-Z/]+)','url')
        self.addFilter(r'([\.a-zA-Z]+@[\.a-zA-Z]+[a-zA-Z]+)','mail')

handler = HTMLRenderer()
parser = BasicTextParser(handler) #2.BasicTextParser里面的东西因此被实质填充。

parser.parse(sys.stdin) #3.上面那个小小parse是关键呀。

#handlers.py
class Handler:

    def callback(self,prefix,name,*args):
        method = getattr(self,prefix+name,None)
        if callable(method):return method(*args)#6.这里做到了具体的去下面的HTMLRenderer class里面返回不同的效果。

    def start(self,name): #5.callback还有意思，见上。
        self.callback('start_',name)
    def end(self,name):
        self.callback('end_',name)

    def sub(self,name):
        def substitution(match):  #match其实就是传进来的block,见上面的markup.py里面的filter
            result = self.callback('sub_',name,match)
            if result is None: match.group(0)
            return result
        return substitution

class HTMLRenderer(Handler):

    def start_document(self):
        print '<html><head><title>...</title></head><body>'

    def end_document(self):
        print '</body></html>'

    def start_paragraph(self):
        print '<p>'

    def end_paragraph(self):
        print '</p>'

    def start_heading(self):
        print '<h2>'

    def end_heading(self):
        print '</h2>'

    def start_list(self):
        print '<ul>'

    def end_list(self):
        print '</ul>'

    def start_listitem(self):
        print '<li>'

    def end_listitem(self):
        print '</li>'

    def start_title(self):
        print '<h1>'

    def end_title(self):
        print '</h1>'

    def sub_emphasis(self,match):
        return '<em>%s</em>' % match.group(1)

    def sub_emphasisAlpha(self,match): #From further exploration point 2
        return '<em>%s</em>' % match.group(0)

    def sub_url(self,match):
        return '<a href="%s">%s</a>' % (match.group(1),match.group(1))

    def sub_mail(self,match):
        return '<a href="mailto:%s">%s</a>' % (match.group(1),match.group(1))

    def feed(self,data):
        print data

#rules.py
class Rule:

    def action(self,block,handler):
        handler.start(self.type)
        handler.feed(block)
        handler.end(self.type)
        return True

class HeadingRule(Rule):

    type = 'heading'

    def condition(self,block):
        return not '\n' in block and len(block) <= 70 and not block[-1] == ':'

class TitleRule(HeadingRule):

    type = 'title'
    first = True

    def condition(self,block):
        if not self.first: return False
        self.first = False
        return HeadingRule.condition(self,block)

class ListItemRule(Rule):

    type = 'listitem'

    def condition(self,block):
        return block[0] == '-'

    def action(self,block,handler):
        handler.start(self.type)
        handler.feed(block[1:].strip()) #去掉 - 的部分被留下了。
        handler.end(self.type)
        return True

class ListRule(ListItemRule):

    type = 'list'
    inside = False

    def condition(self,block):
        return True

    def action(self,block,handler):
        if not self.inside and ListItemRule.condition(self,block):
            handler.start(self.type)
            self.inside = True
        elif self.inside and not ListItemRule.condition(self,block):
            handler.end(self.type)
            self.inside = False

        return False


class ParagraphRule(Rule):

    type = 'paragraph'

    def condition(self,block):
        return True

#util.py
def lines(file):
    for line in file: yield line
    yield '\n'  #不加这个，最后一行txt会没有的哟。

def blocks(file):
    block = []
    for line in lines(file):#整个下面的if是用来将空行分隔的多个非空行放在一起。
        if line.strip(): #如果行非空
            block.append(line)
        elif block:#如果block非空则执行
            yield ''.join(block).strip()
            block = []

def words(file): #目前这里没用。
    return [word for line in file for word in line.split()]

实验文件：

Welcome to World Wide Spam, Inc.

These are the corporate web pages of World Wide Spam, Inc. We hope
you find your stay enjoyable, and that you will sample many of our
products.

A short history of the company

World Wide Spam was started in the summer of 2000. The business
concept was to ride the dot-com wave and to make money both through
bulk email and by selling canned meat online.
After receiving several complaints from customers who weren’t
satisfied by their bulk email, World Wide Spam altered their profile,
and focused 100% on canned goods. Today, they rank as the world’s
13,892nd online supplier of SPAM.

wwspam@wwspam.fu

From this page you may visit several of our interesting web pages:

What is SPAM? (http://wwspam.fu/whatisspam)
How do they make it? (http://wwspam.fu/howtomakeit)
Why should I eat it? (http://wwspam.fu/whyeatit)

How to get in touch with us

You can GET in touch with us in many ways: By phone (555-1234), by
email (wwspam@wwspam.fu) or by visiting our customer feedback page
(http://wwspam.fu/feedback).

这里写图片描述