代码
'内建模块'
import itertools
from xml.parsers.expat import ParserCreate
from HTMLParser import HTMLParser
from htmlentitydefs import name2codepoint
class DefaultSaxHandler(object):
def start_element(self, name, attrs):
print('sax:start_element: %s, attrs: %s' % (name, str(attrs)))
def end_element(self, name):
print('sax:end_element: %s' % name)
def char_data(self, text):
print('sax:char_data: %s' % text)
xml = r'''<?xml version="1.0"?>
<ol>
<li><a href="/python">Python</a></li>
<li><a href="/ruby">Ruby</a></li>
</ol>
'''
handler = DefaultSaxHandler()
parser = ParserCreate()
parser.returns_unicode = True
parser.StartElementHandler = handler.start_element
parser.EndElementHandler = handler.end_element
parser.CharacterDataHandler = handler.char_data
parser.Parse(xml)
ns = itertools.repeat('A',10)
for n in ns:
print n
'''
cs = itertools.cycle('ABC') # 注意字符串也是序列的一种
for c in cs:
print c
natuals = itertools.count(1)
for n in natuals:
print n
# chain()可以把一组迭代对象串联起来,形成一个更大的迭代器
for c in chain('ABC','XYZ'):
print c
'''
for key, group in itertools.groupby('AAABBBCCAAA'):
print key, list(group)
for key, group in itertools.groupby('AaaBBbcCAAa', lambda c: c.upper()):
print key, list(group)
for x in itertools.imap(lambda x, y: x * y, [10, 20, 30], itertools.count(1)):
print x
r = itertools.imap(lambda x: x*x, itertools.count(1))
for n in itertools.takewhile(lambda x: x<100, r):
print n
class MyHTMLParser(HTMLParser):
def handle_starttag(self, tag, attrs):
print('<%s>' % tag)
def handle_endtag(self, tag):
print('</%s>' % tag)
def handle_startendtag(self, tag, attrs):
print('<%s/>' % tag)
def handle_data(self, data):
print('data')
def handle_comment(self, data):
print('<!-- -->')
def handle_entityref(self, name):
print('&%s;' % name)
def handle_charref(self, name):
print('&#%s;' % name)
parser = MyHTMLParser()
parser.feed('<html><head></head><body><p>Some <a href=\"#\">html</a> tutorial...<br>END</p></body></html>')