1、登陆博客 https://blog.csdn.net/m0_37607730,F2查看源码,搜索任意博客标题
2、右键,拷贝xpath地址
3、代码如下
# coding = utf-8
"""lxml应用:获取博客列表"""
import requests
from lxml import etree
def get_list(url_tr):
res = requests.get(url_tr)
res.raise_for_status()
html = etree.HTML(res.text)
return html.xpath('//*[@id="mainBox"]/main/div[2]/div/h4/a/text()')
if __name__ == '__main__':
url = 'https://blog.csdn.net/m0_37607730'
bk_list = get_list(url)
for bk in bk_list:
bk = bk.replace('\n', '').replace(' ','')
print(bk)
4、结果