应用背景:
你找到了一个网页,日期被放在了button元素的文本当中,你想获取两个日期之间的内容。
import requests
from bs4 import BeautifulSoup
url = 'xxxxxxxxxx'
links_data = {}
# 发送请求
response = requests.get(url)
# 解析网页内容
soup = BeautifulSoup(response.content, 'html.parser')
# 找到第一个 button 元素
first_button_element = soup.find('button', class_='position-absolute top-0 start-50 translate-middle btn btn-sm btn-danger rounded-pill')
# 找到第二个 button 元素
second_button_element = first_button_element.find_next('button', class_='position-absolute top-0 start-50 translate-middle btn btn-sm btn-danger rounded-pill')
# 查找这两个 button 元素之间的所有 a 元素
a_elements = []
current_element = first_button_element.find_next()
while current_element and current_element != second_button_element:
if current_element.name == 'a' and 'link-light' in current_element.get('class', []):
a_elements.append(current_element)
current_element = current_element.find_next()
# 处理每个 a 元素
for a_element in a_elements:
# 获取 href 和文本内容
href = a_element.get('href', '')
text_content = a_element.text.strip()
# 打印或存储 href 和文本内容
print(f"href: {href}, text: {text_content}")