-- coding: utf-8 --
author = ‘李元豪 from https://www.zhilu.space’
-- coding: utf-8 --
author = ‘李元豪 from https://www.zhilu.space’
from DrissionPage import Chromium
from lxml import etree
连接浏览器并获取一个MixTab对象
tab = Chromium().latest_tab
访问网址
tab.get(‘https://blog.csdn.net/rank/list’)
t=tab.html
parser = etree.HTMLParser()
tree = etree.fromstring(t, parser)
使用XPath提取所有文本
'.‘代表当前节点,’//'代表所有子孙节点,'text()'函数获取节点的文本内容
all_text = tree.xpath(‘.//text()’)
过滤掉空字符串
all_text = [text.strip() for text in all_text if text.strip()]
输出提取到的文本
for text in all_text:
print(text)
print(t)
切换到收发数据包模式
tab.change_mode()
获取所有行元素
items = tab.ele(‘.ui relaxed divided items explore-repo__list’).eles(‘.item’)
遍历获取到的元素
for item in items:
# 打印元素文本
print(item(‘t:h3’).text)
print(item(‘.project-desc mb-1’).text)
print()`# -- coding: utf-8 --
author = ‘李元豪 from https://www.zhilu.space’
-- coding: utf-8 --
author = ‘李元豪 from https://www.zhilu.space’
from DrissionPage import Chromium
from lxml import etree
连接浏览器并获取一个MixTab对象
tab = Chromium().latest_tab
访问网址
tab.get(‘https://blog.csdn.net/rank/list’)
t=tab.html
parser = etree.HTMLParser()
tree = etree.fromstring(t, parser)
使用XPath提取所有文本
'.‘代表当前节点,’//'代表所有子孙节点,'text()'函数获取节点的文本内容
all_text = tree.xpath(‘.//text()’)
过滤掉空字符串
all_text = [text.strip() for text in all_text if text.strip()]
输出提取到的文本
for text in all_text:
print(text)
print(t)
切换到收发数据包模式
tab.change_mode()
获取所有行元素
items = tab.ele(‘.ui relaxed divided items explore-repo__list’).eles(‘.item’)
遍历获取到的元素
for item in items:
# 打印元素文本
print(item(‘t:h3’).text)
print(item(‘.project-desc mb-1’).text)
print()`
# -*- coding: utf-8 -*-
__author__ = '李元豪 from https://www.zhilu.space'
# -*- coding: utf-8 -*-
__author__ = '李元豪 from https://www.zhilu.space'
from DrissionPage import Chromium
from lxml import etree
# 连接浏览器并获取一个MixTab对象
tab = Chromium().latest_tab
# 访问网址
tab.get('https://blog.csdn.net/rank/list')
t=tab.html
parser = etree.HTMLParser()
tree = etree.fromstring(t, parser)
# 使用XPath提取所有文本
# '.'代表当前节点,'//'代表所有子孙节点,'text()'函数获取节点的文本内容
all_text = tree.xpath('.//text()')
# 过滤掉空字符串
all_text = [text.strip() for text in all_text if text.strip()]
# 输出提取到的文本
for text in all_text:
print(text)
# print(t)
# 切换到收发数据包模式
tab.change_mode()
# 获取所有行元素
items = tab.ele('.ui relaxed divided items explore-repo__list').eles('.item')
# 遍历获取到的元素
for item in items:
# 打印元素文本
print(item('t:h3').text)
print(item('.project-desc mb-1').text)
print()