方法一
import requests
from bs4 import BeautifulSoup
def get_webpage_content(url):
try:
response = requests.get(url)
if response.status_code == 200:
return response.text
else:
print("Failed to retrieve webpage. Status code:", response.status_code)
return None
except Exception as e:
print("Error:", e)
return None
def extract_keywords(content, keywords):
keyword_info = {}
soup = BeautifulSoup(content, 'html.parser')
for keyword in keywords:
keyword_info[keyword] = []
for tag in soup.find_all(text=lambda text: text and keyword in text):
keyword_info[keyword].append(tag.strip())
return keyword_info
def main():
url = input("请输入要搜索的网址:")
keywords = input("请输入要搜索的关键词,以空格分隔:").split()
webpage_content = get_webpage_content(url)
if webpage_content:
keyword_info = extract_keywords(webpage_content, keywords)
for keyword, info in keyword_info.items():
print(f"关键词 '{keyword}' 出现的内容:")
for item in info:
print(item)
else:
print("未能获取网页内容,请检查输入的网址是否正确。")
if __name__ == "__main__":
main()
方法二
import requests
from bs4 import BeautifulSoup
def get_webpage_content(url):
try:
response = requests.get(url)
if response.status_code == 200:
return response.text
else:
print("Failed to retrieve webpage. Status code:", response.status_code)
return None
except Exception as e:
print("Error:", e)
return None
def extract_custom_keywords(content, custom_keywords):
keyword_info = {}
soup = BeautifulSoup(content, 'html.parser')
for keyword in custom_keywords:
keyword_info[keyword] = []
for tag in soup.find_all(text=lambda text: text and keyword in text):
keyword_info[keyword].append(tag.strip())
return keyword_info
def main():
url = input("请输入要搜索的网址:")
custom_keywords = input("请输入要搜索的自定义关键词,以空格分隔:").split()
webpage_content = get_webpage_content(url)
if webpage_content:
keyword_info = extract_custom_keywords(webpage_content, custom_keywords)
for keyword, info in keyword_info.items():
print(f"关键词 '{keyword}' 出现的内容:")
for item in info:
print(item)
else:
print("未能获取网页内容,请检查输入的网址是否正确。")
if __name__ == "__main__":
main()