学习网址:
http://www.runoob.com/regexp/regexp-syntax.html
import re
page_hero = '''<ul class="herolist clearfix"><li><a href="herodetail/194.shtml" target="_blank"><img src="http://game.gtimg.cn/images/yxzj/img201606/heroimg/194/194.jpg" width="91px" alt="苏烈">苏烈</a></li><li><a href="herodetail/195.shtml" target="_blank"><img src="http://game.gtimg.cn/images/yxzj/img201606/heroimg/195/195.jpg" width="91px" alt="百里玄策">百里玄策</a></li><li><a href="herodetail/196.shtml" target="_blank"><img src="http://game.gtimg.cn/images/yxzj/img201606/heroimg/196/196.jpg" width="91px" alt="百里守约">百里守约</a></li><li><a href="herodetail/193.shtml" target="_blank"><img src="http://game.gtimg.cn/images/yxzj/img201606/heroimg/193/193.jpg" width="91px" alt="铠">铠</a></li></ul>
'''
href_pattern = r'\bhref=(.*?) .*?'
href_regex = re.compile(href_pattern, re.IGNORECASE)
for match in href_regex.finditer(page_hero):
print("index=%s,href:%s\n"%(match.start(), match.group(1)))
name_pattern = r'.*?>(\w+?)<.*?'
name_regex = re.compile(name_pattern, re.IGNORECASE)
for match in name_regex.finditer(page_hero):
print("index=%s,name:%s\n"%(match.start(), match.group(1)))
jpg_pattern = r'\bsrc=(.*?) .*?'
jpg_regex = re.compile(jpg_pattern, re.IGNORECASE)
for match in jpg_regex.finditer(page_hero):
print("index=%s,name:%s\n"%(match.start(), match.group(1)))
? 匹配前面的子表达式零次或一次,或指明一个非贪婪限定符。要匹配 ? 字符,请使用 \?。
+ 匹配前面的子表达式一次或多次。要匹配 + 字符,请使用 \+。