先安装依赖的库
pip install requests
pip install beautifulsoup4
# -*- coding: UTF-8 -*-
import requests
from bs4 import BeautifulSoup
def scrape_links(url):
# 发送HTTP请求获取网页内容
response = requests.get(url)
# 使用Beautiful Soup解析网页内容
soup = BeautifulSoup(response.text, 'html.parser')
# 提取所有的超链接
links = soup.find_all('a')
# 创建一个文件来保存结果
with open('links_output.txt', 'w', encoding='utf-8') as file:
# 遍历并将超链接文字和链接写入文件
for link in links:
link_text = link.text.strip()
link_url = link.get('href')
file.write(f"Text: {link_text}, URL: {link_url}\n")
if __name__ == "__main__":
# 指定要爬取的网页 URL
target_url = "https://example.com"
# 调用函数进行爬取并输出到文件
scrape_links(target_url)
只做学习交流用,请遵守相关的规则条例