#!/usr/bin/python2.7
# _*_ coding: utf-8 _*_
"""
@Author: MarkLiu
"""
import poplib
import email
from email.parser import Parser
from email.header import decode_header
from email.utils import parseaddr
def decode_str(s):
value, charset = decode_header(s)[0] if charset:
value = value.decode(charset)
return value
def guess_charset(msg):
# 先从msg对象获取编码:
charset = msg.get_charset()
if charset is None:
# 如果获取不到,再从Content-Type字段获取:
content_type = msg.get('Content-Type', '').lower()
pos = content_type.find('charset=')
if pos >= 0:
charset = content_type[pos + 8:].strip()
return charset
def get_email_headers(msg):
# 邮件的From, To, Subject存在于根对象上:
headers = {}
for header in ['From', 'To', 'Subject', 'Date']:
value = msg.get(header, '')
if value:
if header == 'Date':
headers['date'] = value
if header == 'Subject':
# 需要解码Subject字符串:
subject = decode_str(value)
headers['subject'] = subject
else:
# 需要解码Email地址:
hdr, addr = parseaddr(value)
name = decode_str(hdr)
value = u'%s ' % (name, addr)
if header == 'From':
from_address = value
headers['from'] = from_address
else:
to_address = value
headers['to'] = to_address
content_type = msg.get_content_type()
print 'head content_type: ', content_type
return headers
# indent用于缩进显示:
def get_email_cntent(message, base_save_path):
j = 0
content = ''
attachment_files = [] for part in message.walk():
j = j + 1
file_name = part.get_filename()
contentType = part.get_content_type()
# 保存附件
if file_name: # Attachment
# Decode filename
h = email.Header.Header(file_name)
dh = email.Header.decode_header(h)
filename = dh[0][0] if dh[0][1]: # 如果包含编码的格式,则按照该格式解码
filename = unicode(filename, dh[0][1])
filename = filename.encode("utf-8")
data = part.get_payload(decode=True)
att_file = open(base_save_path + filename, 'wb')
attachment_files.append(filename)
att_file.write(data)
att_file.close()
elif contentType == 'text/plain' or contentType == 'text/html':
# 保存正文
data = part.get_payload(decode=True)
charset = guess_charset(part)