通过css选择器去解析下载的网页
# -*- coding: utf-8 -*-
from pyquery import PyQuery
if __name__ == '__main__':
q = PyQuery(open('resource/v2ex.html').read())
print q('title').text() # 找到代码里面是<title></title>标签的元素
for each in q('div.inner>a').items(): # <div标签名称>.class的名称>子标签名称
if each.attr.href.find('tab') > 0:
print 1, each.attr.href
for each in q('a[href^="/member/"]').items():
print 2, each.attr.href