[Python] 纯文本查看 复制代码import requestsimport os
import time
from lxml import etree
'''
xpath获取的element对象返回的是个字典.tag(标签名称) .attrib(标签属性) .text(标签文本)
'''
def get_xpath_text(url,headers):
response = requests.get(url, headers)
response.encoding = 'utf-8'
xpath_html = etree.HTML(response.text)
return xpath_html
def xpath_get_urls(html):
xpath_html = etree.HTML(html)
urls = xpath_html.xpath("//div[@class='item']/div[@class='p']/a")
return urls
def get_group_pageinfo(page_url):
grouppage_urls=[]
xpath_html = get_xpath_text(page_url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.162 Safari/537.36'})
pagenumstr &