思路:
- 设定一个时间
- 循环判断:现在的时间与设定时间相等
- 真:selenium自动爬取信息,email构建邮件骨架,smtplib发送邮件,更新设定的时间
- 无论是真是假,都睡眠一段时间。
坑:
- 刚开始做的时候报网易邮箱550错误没有用户权限,要去设置客户端授权密码,把邮箱密码换成授权密码
- 然后又报554错误被当成垃圾信息,网上说的这种是没加email骨架中的Subject,From,To,这个我有;再说是sender要用<>括号括起来并在前面加你的昵称,试了还是不行;还说发送的时候还得抄送一份发给自己,有点不合理,但我还是试了不行。最后自己找到原因是内容不正规,不要用什么test,asdf,瞎写这些,正规写就过了。
- 邮件发送成功,那边还是收不到,发多几次一样。那么恭喜你,你的邮件被当成了垃圾放在了垃圾邮箱里面,点开设置一下白名单即可
- 自个琢磨才知道sender_name瞎写都可以,只是一个称呼,不一定是你的昵称。
实践:
# coding=utf-8
import smtplib
import time
from datetime import datetime, timedelta
from email.header import Header
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import re
# 测试框架中浏览器驱动
from selenium import webdriver
subject = 'python各个岗位数量情况'
sender_name = '老弟'
sender = '18844631601@163.com'
receiver = ['1165101043@qq.com']
message = ''
mail_addr = 'smtp.163.com'
port = 25
password = '客户端授权密码'
def get_data(search_name):
url = "https://search.51job.com/list/040000,000000,0000,00,9,99,"+search_name+",2,1.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare="
# 驱动浏览器
chrome = webdriver.Chrome()
chrome.get(url) # 访问连接
page_source = chrome.page_source # 抓取网页资源
restr = """<div class="rt">([\s\S]*?)</div>"""
regex = re.compile(restr, re.IGNORECASE)
relist = regex.findall(page_source)
chrome.close() # 关闭浏览器
restr = """(\\d+)"""
regex = re.compile(restr, re.IGNORECASE)
datas = regex.findall(relist[0].strip())
if len(datas) == 0:
return search_name+"--失败"
else:
return search_name.decode('utf-8')+'--'+datas[0]
def sendmail(text):
message = text
# 模拟多组件混合构建邮件
msg = MIMEMultipart('mixed')
msg['Subject'] = Header(subject, 'utf-8')
msg['From'] = sender_name+'<'+sender+'>'
msg['To'] = ';'.join(receiver)
# 联系文本
text_plain = MIMEText(text, 'plain', 'utf-8')
msg.attach(text_plain)
# 连接登录发送邮件
smtp = smtplib.SMTP()
smtp.connect(mail_addr, port)
smtp.login(sender, password)
smtp.sendmail(msg['From'], msg['To'], msg.as_string())
print '邮件发送成功 -- '+datetime.now().strftime('%Y-%m-%d %H-%M-%S')
smtp.quit()
smtp.close()
def charge_time(start_datetime, now_datetime):
if start_datetime.hour == now_datetime.hour and start_datetime.minute == now_datetime.minute:
data = []
for name in names:
data.append(get_data(name))
sendmail(';'.join(data))
start_datetime += timedelta(minutes=2)
return start_datetime
if __name__ == '__main__':
start_datetime = datetime(2018, 12, 16, 14, 45)
names = ['python', 'python web', 'python 数据', 'python AI', 'python爬虫']
while True:
print start_datetime
start_datetime = charge_time(start_datetime, datetime.now())
time.sleep(60)