import smtplib
import time
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from selenium import webdriver
from selenium.webdriver.common.by import By
import requests
from bs4 import BeautifulSoup
# Step 1: 访问网页并获取响应内容
def get_html_content(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36'}
try:
response = requests.get(url, headers=headers)
response.raise_for_status()
response.encoding = response.apparent_encoding
html_content = response.text
return html_content
except Exception as e:
print(f"网络请求异常:{e}")
return None
# Step 2: 解析网页并提取目标数据
def parse_html(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
span_list = soup.findAll("span")
update_date = span_list[3].text
# TODO:根据需求编写解析代码,并将结果保存到合适的数据结构中
return update_date
# Step 3: 存储数据到本地或其他持久化存储服务器中
def store_data(result_list):
# TODO:编写存储代码,将数据结果保存到本地或其他服务器中
pass
def send_email():
# 发送多种类型的邮件
msg_from = 'xxxx@qq.com' # 发送方邮箱
passwd = 'xxxxxxxxxxxx' # 就是上面的授权码,smtp授权码,自行百度
to_peoples = ['xxxx@qq.com','xxxx@qq.com'] # 接受方邮箱
# 设置邮件内容
# MIMEMultipart类可以放任何内容
msg = MIMEMultipart()
conntent = "msg 进行查看!!!"
# 把内容加进去
msg.attach(MIMEText(conntent, 'plain', 'utf-8'))
# 设置邮件主题
msg['Subject'] = "九价疫苗信息"
# 发送方信息
msg['From'] = msg_from
# 开始发送
# 通过SSL方式发送,服务器地址和端口
s = smtplib.SMTP_SSL("smtp.qq.com", 465)
# 登录邮箱
s.login(msg_from, passwd)
# 开始发送
s.sendmail(msg_from, to_peoples, msg.as_string())
print("邮件发送成功")
s.quit()
def get_hpv_notes():
driver = webdriver.Chrome()
driver.get("http://xa.bendibao.com/live/2019111/66311.shtm")
driver.implicitly_wait(2)
driver.maximize_window()
# has_hpv_date = driver.find_element(By.XPATH, '//*[@id="app"]/div[2]/div/div[2]/div[1]/div[2]/div[2]/div[1]/p[4]/strong/strong/span').text
has_hpv_date = driver.find_elements(By.XPATH, "//*[contains(text(),'更新时间:')]")[0].text
print(has_hpv_date)
time.sleep(5)
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
# get_hpv_notes()
target_url = "http://xa.bendibao.com/live/2019111/66311.shtm"
html_content = get_html_content(target_url)
if html_content:
if parse_html(html_content) != "更新时间:2023年8月14日":
send_email()
else:
print("网页访问失败")