获取Outlook未读邮件,解析出html格式正文内容
1、web方式登录outlook个人账号,查看IMAP服务器信息
Note: 如果邮箱账号从未使用过web方式登录时,可能会出现如下图片提示登录失败,建议先按以上步骤配置时区并登录到邮箱内部
2、编写python code
import imaplib
import email
from email.utils import parseaddr
from email.header import decode_header
from bs4 import BeautifulSoup
def get_mail(email_address, email_password, server):
client = imaplib.IMAP4_SSL(server)
client.login(user=email_address, password=email_password)
client.select('INBOX') # 选择收件箱
email_type, data = client.search(None, 'UNSEEN') # 查询邮件, ALL---所有;UNSEEN---未读
email_id = data[0].split() # 得到邮箱索引
if len(email_id) != 0:
latest_email_id = email_id[len(email_id)-1]
email_type, datas = client.fetch(latest_email_id, '(RFC822)')
raw_email = datas[0][1]
email_message = email.message_from_bytes(raw_email) # 转为email.message对象
return email_message
else:
print("暂无未读邮件")
# 将原始邮件转化为可读邮件
def decode_str(s):
value, charset = decode_header(s)[0]
if charset:
value = value.decode(charset)
return value
def print_info(msg, indent=0):
if indent == 0:
for header in ['From', 'To', 'Subject']:
value = msg.get(header, '')
if value:
if header == 'Subject':
value = decode_str(value)
else:
hdr, addr = parseaddr(value)
name = decode_str(hdr)
value = u'%s <%s>' % (name, addr)
print('%s%s: %s' % (' ' * indent, header, value))
if msg.get_content_type() == 'text/html':
html = msg.get_payload(decode=True)
soup = BeautifulSoup(html, 'html.parser')
body = soup.body.get_text()
print(body)
if __name__ == "__main__":
imap_server = "imap mail server address"
username = "your_username"
password = "your_password"
messageObject = get_mail(username, password, imap_server)
if messageObject:
print_info(messageObject)
效果图