去年的工作,发第一篇试试
# coding= utf-8
import requests
from lxml import etree # path = './web/new_index.html'
fp = open('E:\\0爬虫监控平台\\错误码视图\\应用监控平台.html','rb')
html = fp.read().decode('utf-8') #.decode('gbk')
print(html)
selector = etree.HTML(html) #etree.HTML(源码) 识别为可被xpath解析的对象
print(selector)
infos = selector.xpath('//*[@id="tabs"]/ul/li[3]/a/text()')
print(type(infos)) #运行结果 <class 'list'>
print(len(infos)) #运行结果 11
print(infos)