requests--etree--xpath

weixin_30823833

于 2018-05-25 21:05:00 发布

阅读量99

点赞数

原文链接：http://www.cnblogs.com/chillytao-suiyuan/p/9090684.html

版权

# -*- coding: cp936 -*-
import requests
from lxml import etree

url = 'https://weibo.cn/pub/'
html = requests.get(url).content
#先用.content再用etree.HTML(html)方法，不然报错

selector = etree.HTML(html)

'''
#文字部分
titles = selector.xpath('//a/text()')
#属性部分，包括链接
ti = selector.xpath('//ahref/text()')
for i in ti:
    print i
print('type:'+str(type(titles)))
for i in titles:
    print i
'''

titles = selector.xpath('*//div/a/@href')
for i in titles:
    print i