# -*- coding: cp936 -*- import requests from lxml import etree url = 'https://weibo.cn/pub/' html = requests.get(url).content #先用.content再用etree.HTML(html)方法,不然报错 selector = etree.HTML(html) ''' #文字部分 titles = selector.xpath('//a/text()') #属性部分,包括链接 ti = selector.xpath('//ahref/text()') for i in ti: print i print('type:'+str(type(titles))) for i in titles: print i ''' titles = selector.xpath('*//div/a/@href') for i in titles: print i