# -*- coding: utf-8 -*-
import email
from email.parser import Parser
from email.header import decode_header
from email.utils import parseaddr
import poplib
import util as u
import do_email_list as do_list
import datetime
import change_pdf_to_png as cptp
# 输入邮件地址, 口令和POP3服务器地址:
username = 'xxx@aliyun.com'
password = 'xxx'
pop3_server = 'pop3.mail.aliyun.com'
time = datetime.datetime.now().strftime("%Y%m%d")
mkpath = "C:\\img\email\%s" % time
u.mkdir(mkpath)
def guess_charset(msg):
charset = msg.get_charset()
if charset is None:
content_type = msg.get('Content-Type', '').lower()
pos = content_type.find('charset=')
if pos >= 0:
charset = content_type[pos + 8:].strip()
return charset
def decode_str(s):
value, charset = decode_header(s)[0]
if charset:
value = value.decode(charset)
return value
def print_info(msg, indent=0):
if indent == 0:
for header in ['From', 'To', 'Subject']:
value = msg.get(header, '')
if value:
if header == 'Subject':
value = decode_str(value)
else:
hdr, addr = parseaddr(value)
name = decode_str(hdr)
value = u'%s ' % (name, addr)
print('%s%s: %s' % (' ' * indent, header, value))
if msg.is_multipart():
parts = msg.get_payload()
for n, part in enumerate(parts):
print('%spart %s' % (' ' * indent, n))
print('%s--------------------' % (' ' * indent))
print_info(part, indent + 1)
else:
content_type = msg.get_content_type()
if content_type == 'text/plain' or content_type == 'text/html':
content = msg.get_payload(decode=True)
charset = guess_charset(msg)
if charset:
content = content.decode(charset)
print('%sText: %s' % (' ' * indent, content + '...'))
else:
print('%sAttachment: %s' % (' ' * indent, content_type))
def get_email_headers(msg):
# 邮件的From, To, Subject存在于根对象上:
headers = {}
for header in ['From', 'To', 'Subject', 'Date']:
value = msg.get(header, '')
if value:
if header == 'Date':
headers['date'] = value
if header == 'Subject':
# 需要解码Subject字符串:
subject = decode_str(value)
headers['subject'] = subject
else:
# 需要解码Email地址:
hdr, addr = parseaddr(value)
name = decode_str(hdr)
value = u'%s ' % (name, addr)
if header == 'From':
from_address = value
headers['from'] = from_address
else:
to_address = value
headers['to'] = to_address
content_type = msg.get_content_type()
print('head content_type: ', content_type)
return headers
# indent用于缩进显示:
def get_email_content(message, base_save_path, id_card):
j = 0
flag = False
content = ''
attachment_files = []
for part in message.walk():
if not part.is_multipart():
j = j + 1
file_name = part.get_filename()
content_type = part.get_content_type()
# 保存附件
if file_name and flag is False: # Attachment
flag = True
# # Decode filename
# h = email.header.Header(file_name)
# dh = email.header.decode_header(h)
# my_code = part.get_content_charset()
# filename = dh[0][0]
# encode_str = dh[0][1]
# if encode_str is not None:
# filename = filename.decode("ASCII", my_code)
data = part.get_payload(decode=True)
# att_file = open(base_save_path + filename, 'wb')
att_file = open(base_save_path + "QQ.pdf", 'wb')
# att_file = open(base_save_path + filename, 'wb')
# url = base_save_path + filename
url = base_save_path + "QQ.pdf"
# url = base_save_path + filename
attachment_files.append("QQ.pdf")
# attachment_files.append(filename)
att_file.write(data)
att_file.close()
try:
img_info = cptp.run_convert(url, time, id_card)
if img_info:
if do_list.get_now_exist_email(id_card)['count_sfzhm'] <= 0:
do_list.insert_email_list(id_card, img_info["pagenum"], "%s/%s" % (time, id_card))
except:
print("这个有问题 跳过")
elif content_type == 'text/plain' or content_type == 'text/html':
# 保存正文
data = part.get_payload(decode=True)
charset = guess_charset(part)
if charset:
charset = charset.strip().split(';')[0]
print('charset:', charset)
data = data.decode(charset)
content = data
return content, attachment_files
def get_email():
# 连接到POP3服务器:
server = poplib.POP3(pop3_server)
# 可以打开或关闭调试信息:
server.set_debuglevel(1)
# 可选:打印POP3服务器的欢迎文字:
print(server.getwelcome().decode('utf-8'))
# 身份认证:
server.user(username)
server.pass_(password)
# stat()返回邮件数量和占用空间:
print('Messages: %s. Size: %s' % server.stat())
# list()返回所有邮件的编号:
resp, mails, octets = server.list()
# 可以查看返回的列表类似[b'1 82923', b'2 2184', ...]
# print(mails)
print('------ resp ------')
print(resp) # +OK 46 964346 响应的状态 邮件数量 邮件占用的空间大小
print('------ mails ------')
print(mails) # 所有邮件的编号及大小的编号list,['1 2211', '2 29908', ...]
print('------ octets ------')
print(octets)
# 获取最新一封邮件, 注意索引号从1开始:
length = len(mails)
last_num = do_list.get_last_email_list()
if last_num != -1:
last_email_no = u.get_list(mails, last_num)
"""
每次获取最新邮件ID 根据邮件ID 获取下标 然后切割数组循环最新邮件
1、更新 最新email_no 以便下次获取手环报告
2、下载附件 解析成图片保存到数据库后 删除本地文件
"""
if len(mails)-1 >= last_email_no:
new_mails = mails[last_email_no:]
for i in range(len(new_mails)-1):
no_info = str(new_mails[i+1], encoding="utf-8").split(' ', 1)
do_list.insert_email_no(no_info[1], no_info[0])
resp, lines, octets = server.retr(last_email_no+i+2)
# lines存储了邮件的原始文本的每一行,
# 可以获得整个邮件的原始文本:
msg_content = b'\n'.join(lines).decode('utf-8')
# 把邮件内容解析为Message对象:
msg = Parser().parsestr(msg_content)
# 但是这个Message对象本身可能是一个MIMEMultipart对象,即包含嵌套的其他MIMEBase对象,
# 嵌套可能还不止一层。所以我们要递归地打印出Message对象的层次结构:
print('-------------------- 邮件信息开始 --------------------')
base_save_path = '%s/' % mkpath
msg_headers = get_email_headers(msg)
if msg_headers['subject']:
if len(msg_headers['subject'].split("_")) == 2:
if u.check_id_card(msg_headers['subject'].split("_")[1]):
content, attachment_files = get_email_content(msg, base_save_path, msg_headers['subject'].split("_")[1])
print('subject:', msg_headers['subject'])
print('from_address:', msg_headers['from'])
print('to_address:', msg_headers['to'])
print('date:', msg_headers['date'])
print('content:', content)
print('attachment_files: ', attachment_files)
print('-------------------- 邮件信息结束 --------------------')
else:
print('-------------------- 邮件标题不符合规则结束 --------------------')
elif len(msg_headers['subject'].split("_")) == 1:
if u.check_id_card(msg_headers['subject'].split("_")[0]):
content, attachment_files = get_email_content(msg, base_save_path, msg_headers['subject'].split("_")[0])
print('subject:', msg_headers['subject'])
print('from_address:', msg_headers['from'])
print('to_address:', msg_headers['to'])
print('date:', msg_headers['date'])
print('content:', content)
print('attachment_files: ', attachment_files)
print('-------------------- 邮件信息结束 --------------------')
else:
print('-------------------- 邮件标题不符合规则结束 --------------------')
else:
print('-------------------- 邮件标题不以_规格 --------------------')
else:
print('-------------------- 邮件没有标题 --------------------')
else:
print('-------------------- 未获取到最后数字 --------------------')
# resp, lines, octets = server.retr(index)
# lines存储了邮件的原始文本的每一行,
# 可以获得整个邮件的原始文本:
# msg_content = b'\r\n'.join(lines).decode('utf-8')
# 稍后解析出邮件:
# msg = Parser().parsestr(msg_content)
# print_info(msg)
# 可以根据邮件索引号直接从服务器删除邮件:
# server.dele(index)
# 关闭连接:
server.quit()
# get_email()