# -*- coding:utf-8 -*-
'''
参考:
http://www.pythonclub.org/python-network-application/email-format
http://blog.sina.com.cn/s/blog_4deeda2501016eyf.html
todo:
1. 登录邮箱
2. 遍历邮件寻找符合筛选条件的邮件(精确匹配邮件标题)
3. 解析信体内容并下载邮件附件(模糊匹配附件名称) 以后可支持单邮件多附件入库
4. 读取邮件附件并入库 ###这个模块只负责到excel下载后为止~~~入库靠common主流程
'''
import sys
import os
import locale
import poplib
poplib._MAXLINE = 20480
from email.parser import Parser
from email.header import decode_header
from email.utils import parseaddr # 专门处理地址的模块
import platform
import imaplib, string, email
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, parent_dir)
sys.path.append("/home/sd_history_code/")
# from config.config import lbj_username,lbj_password
lbj_username = 'wenzilong@sunlands.com' # 发件人
lbj_password = '66Ab5C7fb1eC0db0' # 密码
# 确定运行环境的encoding
__g_codeset = sys.getdefaultencoding()
if "ascii" == __g_codeset:
__g_codeset = locale.getdefaultlocale()[1]
# 通过decode,将Subject其变为中文
def decode_str(s):
value, charset = decode_header(s)[0]
if charset:
value = value.decode(charset)
return value
# 解析邮件头部
def get_header(msg, email_name):
for header in ['Subject']:
value = msg.get(header, '')
if value:
# 文章的标题有专门的处理方法
if header == 'Subject':
value = decode_str(value)
# if value == email_name:
if email_name in value:
return True
# 文件保存位置
if platform.system() == 'Windows':
floder = 'd:\\ETL\\'
elif platform.system() == 'Linux':
floder = r'/home/鹰眼_数据源/'
# 解析邮件体
def get_file(msg, file_name, floder_input):
""" 解析邮件/信体 """
# 循环信件中的每一个mime的数据块
global filename
for part in msg.walk():
filename = part.get_filename()
if filename: # 如果存在附件
filename = decode_str(filename) # 获取的文件是乱码名称,通过一开始定义的函数解码
# if filename.find(file_name) == -1:
if file_name not in filename:
print('如果附件不是需要的, 则跳过')
continue # 如果附件不是需要的, 则跳过
data = part.get_payload(decode=True) # 取出文件正文内容
# 此处可以自己定义文件保存位置
os.chdir(floder_input)
if os.path.exists(filename):
# 移除文件
os.remove(filename)
path = filename
f = open(path, 'wb')
f.write(data)
f.close()
print('下载完成: ', filename)
break
return floder_input + filename
# 找到收件箱-邮件
def get_name(email_name,username,password):
M = imaplib.IMAP4_SSL("imap.263.net")
# print(M)
# try:
try:
M.login(username, password)
except Exception as e:
print('login error: %s' % e)
M.close()
M.select()
result, message = M.select()
# print(result)
# M.select('INBOX', readonly=True)
typ, data = M.search(None, 'ALL')
# print(data[0])
num = data[0].split()
# print(num)
for i in num[:-2:-1]: # 倒序第20个,到倒序最后一个
try:
typ, data = M.fetch(i, '(RFC822.HEADER)')
msg = email.message_from_string(data[0][1].decode('utf-8'))
# print(msg["From"])
# print (msg["Subject"])
# print (msg["Date"])
sub = msg.get('subject')
subdecode = email.header.decode_header(sub)[0][0]
a = subdecode.decode('utf-8')
print(str(i.decode()) + ': ' + a)
# if a == email_name:
if email_name in a:
x = int(i.decode())
print("目标文件在邮箱的第{}封".format(len(num) - int(i.decode()) + 1))
return x
except Exception as e:
continue
# return None
# 主函数
def getMailFile(email_name, file_name, username=lbj_username, password=lbj_password, floder_input=floder):
username = username
password = password
x = get_name(email_name, username, password)
print(x)
host = "popcom.263xmail.com"
server = poplib.POP3_SSL(host)
server.user(username)
server.pass_(password)
# 登录的过程
# resp, mails, octets = server.list()
resp, lines, octets = server.retr(x) # 取邮件
msg_content = b'\r\n'.join(lines).decode('utf-8', 'ignore')
msg = Parser().parsestr(msg_content)
ps = get_header(msg, email_name)
print('ps: ',ps)
if ps:
print('解析邮件/信体')
pb = get_file(msg, file_name, floder_input) # 解析邮件/信体
# print(pb)
server.quit()
return pb
server.quit()
# index = len(mails) # 邮件的总数
# # 此处的循环是取最近的几封邮件
# for i in range(index, index - 50, -1): # 倒叙遍历 2021年7月22日18:42:19 改为50
# resp, lines, octets = server.retr(i) # 取邮件
# msg_content = b'\r\n'.join(lines).decode('utf-8', 'ignore')
# msg = Parser().parsestr(msg_content)
# ps = get_header(msg, email_name)
# if ps:
# pb = get_file(msg, file_name) # 解析邮件/信体
# print(pb)
# server.quit()
# return pb
if __name__ == '__main__':
# host = "popcom.263xmail.com"
username = lbj_username
password = lbj_password
# 邮件名称:费边店铺3月账单明细
# 2. 遍历邮件寻找符合筛选条件的邮件(模糊匹配邮件标题)
email_name = '费边店铺'
# 3. 解析信体内容并下载邮件附件(模糊匹配附件名称)
file_name = '费边店铺'
# 例
# email_name = '【销售提成方案导出】'
# file_name = '完成' # 如果只写个. 则默认下载第一个
file_path = getMailFile(email_name, file_name, floder_input=floder)
# 结果下载:floder:d:\\ETL\\,中
print('################################ 已完成-测试 ################################')
# # 实例
# # 调用邮件数据
# from shangde.本地py.email.download_email_file import getMailFile
# import warnings
# warnings.filterwarnings("ignore")
#
# # 2. 遍历邮件寻找符合筛选条件的邮件(模糊匹配邮件标题)
# email_name = '费边店铺3月账单明细'
# # 3. 解析信体内容并下载邮件附件(模糊匹配附件名称)
# file_name = '费边店铺3月账单明细'
#
# # 下载文件
# file_path = getMailFile(email_name, file_name)
# if file_path == None:
# print('2.下载文件_失败')
# print('2.下载文件_成功')