我使用了一个更简单的库来解决类似的问题:import re
from HTMLParser import HTMLParser
class MyParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.in_market = 0
self.markets = {}
self.market = None
def handle_starttag(self, tag, attrs):
if tag == 'span':
if "class" in attrs and \
and attrs["class"].indexof('market-name') != -1:
self.in_market = 1
elif self.in_market:
self.in_market += 1
elif self.in_market:
if tag == 'a' and 'href' in attrs:
self.market = attrs["href"]
elif tag == 'button' and 'onclick' in attrs:
add_to_cart_RE = re.compile(r'addToCart\((\d+),(\d+)\)')
match = add_to_cart_RE.match(attrs["onclick"])
self.markets[self.market] = [match.group(1), match.group(2)]
def handle_endtag(self, tag):
if self.tag == 'span' and self.in_market:
self.in_market -= 1
def handle_data(self, data):
pass
如果你不清楚密码,可以问我问题。在