eml文件解析
由mailgun开源的Flanker - email address and MIME parsing for Python是一个解析高效、容错率不错的python第三方扩展库。python 3也可以正常使用,安装通过pip install flanker即可。该库包含了邮件地址解析和邮件mime格式解析。
前言
eml文件必须用邮箱软件才能打开查看,为了脱离邮箱软件,所以研究了下flanker。话不多说,上代码
提示:以下是本篇文章正文内容,下面案例可供参考
代码如下(示例):
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from datetime import datetime
from flanker import mime
def CreateHtml(title, addresser, addressee, sendTime, content, annex, path):
'''
创建html文件
:param title: 标题
:param addresser: 发件人
:param addressee: 收件人
:param sendTime: 发送时间
:param content: 内容
:param annex: 附件
:param path: 保存路径
:return:
'''
# HTML模板
template = '''
<!DOCTYPE html>
<html>
<head lang="en">
<meta charset="UTF-8">
<title>test1</title>
<link rel="stylesheet" type="text/css" href="../caseView/css/index.css" />
<link rel="stylesheet" href="../caseView/dist/layui-v2.5.6/layui/css/layui.css">
<script src="../caseView/dist/jquery.min.js"></script>
<script src="../caseView/dist/layui-v2.5.6/layui/layui.js" charset="utf-8"></script>
</head>
<body>
<div id="email" class="noOneTop">
<div id="contentInfo">
<div class="email">
<div class="email_title">
<p>%(title)s</p>
</div>
<div style="clear: both;"></div>
<div class="email_info">
<p><span>发件人:</span>%(addresser)s</p>
<p><span>收件人:</span>%(addressee)s</p>
<p><span>发送时间:</span>%(sendTime)s</p>
</div>
<div class="email_content" id="email_content">
%(content)s
</div>
<div class="email_file">
%(annex)s
</div>
</div>
</div>
</div>
</body>
<script>
$("#email_content").html("<meta" + $("#email_content").html().split('<meta')[1])
</script>
</html>'''
if addressee is None:
addressee = ''
if sendTime is None:
sendTime = ''
data = {'title': title, 'addresser': addresser, 'addressee': addressee, 'sendTime': sendTime,
'content': content, 'annex': annex}
html = template % data
with open(path, 'wb') as file:
file.write(html.encode())
file.close()
# 邮件正文
def contentEml(eml):
# 判断是否为单部分
if eml.content_type.is_singlepart():
eml_body = eml.body
else:
eml_body = ''
for part in eml.parts:
# 判断是否是多部分
if part.content_type.is_multipart():
eml_body, charset = contentEml(part)
else:
if part.content_type.main == 'text':
eml_body = part.body
return eml_body
def ParseEml(raw_email, path):
with open(raw_email, 'rb') as fhdl:
raw_email = fhdl.read()
eml = mime.from_string(raw_email)
# 主题
subjece = eml.subject
# 获取发信人地址及昵称
eml_header_from = eml.headers.get('From')
# 昵称
# eml_from = address.parse(eml_header_from)
# from_display_name = eml_from.display_name
# 发送时间
eml_time = eml.headers.get('Date')
# if eml_time is not None:
# eml_time = eml_time.replace(',', '')
# dates = datetime.strptime(eml_time, '%a %d %b %Y %H:%M:%S %z')
# eml_time = dates.strftime("%Y-%m-%d %H:%M:%S")
# 解析附件列表
eml_attachs = ';'.join(x.detected_file_name for x in eml.parts if x.detected_file_name)
if eml_attachs is not None and eml_attachs != '':
attachs = eml_attachs.split(';')
eml_attachs = '<p>附件</p>'
for ats in attachs:
eml_attachs += '<dl><dt><img src="../caseView/img/file_mail.png"></dt>' \
' <dd title="OPPO_BR_20200707.log">' + ats + '</dd></dl>'
# 内容
eml_body = contentEml(eml)
# 收件人信息
eml_to = eml.headers.get('To')
if eml_to is not None:
eml_to = eml_to.replace('<', '<').replace('>', '>')
# eml_to_addr = address.parse_list(eml_to)
# 多个地址以;拼接显示
# if not type(eml_to_addr) == str:
# to_display_name = ';'.join(x.display_name for x in eml_to_addr if x.display_name)
# to_address = ';'.join(x.address for x in eml_to_addr if x.address)
CreateHtml(subjece, eml_header_from, eml_to, eml_time,
eml_body, eml_attachs, path)
if __name__ == "__main__":
ParseEml('aa.eml', 'D:\\new1.html')
希望对你们有帮助!谢谢