邮件解析入库

# -*- coding:utf-8 -*-

'''

参考:

   http://www.pythonclub.org/python-network-application/email-format

   http://blog.sina.com.cn/s/blog_4deeda2501016eyf.html

todo:

1. 登录邮箱

2. 遍历邮件寻找符合筛选条件的邮件(精确匹配邮件标题)

3. 解析信体内容并下载邮件附件(模糊匹配附件名称)  以后可支持单邮件多附件入库

4. 读取邮件附件并入库  ###这个模块只负责到excel下载后为止~~~入库靠common主流程

'''

import sys

import os

import locale

import poplib

poplib._MAXLINE = 20480

from email.parser import Parser

from email.header import decode_header

from email.utils import parseaddr  # 专门处理地址的模块

import platform

import imaplib, string, email

parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

sys.path.insert(0, parent_dir)

sys.path.append("/home/sd_history_code/")

# from config.config import lbj_username,lbj_password

lbj_username = 'wenzilong@sunlands.com'  # 发件人

lbj_password = '66Ab5C7fb1eC0db0'  # 密码

# 确定运行环境的encoding

__g_codeset = sys.getdefaultencoding()

if "ascii" == __g_codeset:

    __g_codeset = locale.getdefaultlocale()[1]

# 通过decode,将Subject其变为中文

def decode_str(s):

    value, charset = decode_header(s)[0]

    if charset:

        value = value.decode(charset)

    return value

# 解析邮件头部

def get_header(msg, email_name):

    for header in ['Subject']:

        value = msg.get(header, '')

        if value:

            # 文章的标题有专门的处理方法

            if header == 'Subject':

                value = decode_str(value)

                # if value == email_name:

                if email_name in value:

                    return True

# 文件保存位置

if platform.system() == 'Windows':

    floder = 'd:\\ETL\\'

elif platform.system() == 'Linux':

    floder = r'/home/鹰眼_数据源/'

# 解析邮件体

def get_file(msg, file_name, floder_input):

    """ 解析邮件/信体 """

    # 循环信件中的每一个mime的数据块

    global filename

    for part in msg.walk():

        filename = part.get_filename()

        if filename:  # 如果存在附件

            filename = decode_str(filename)  # 获取的文件是乱码名称,通过一开始定义的函数解码

            # if filename.find(file_name) == -1:

            if file_name not in filename:

                print('如果附件不是需要的, 则跳过')

                continue  # 如果附件不是需要的, 则跳过

            data = part.get_payload(decode=True)  # 取出文件正文内容

            # 此处可以自己定义文件保存位置

            os.chdir(floder_input)

            if os.path.exists(filename):

                # 移除文件

                os.remove(filename)

            path = filename

            f = open(path, 'wb')

            f.write(data)

            f.close()

            print('下载完成: ', filename)

            break

    return floder_input + filename

# 找到收件箱-邮件

def get_name(email_name,username,password):

    M = imaplib.IMAP4_SSL("imap.263.net")

    # print(M)

    # try:

    try:

        M.login(username, password)

    except Exception as e:

        print('login error: %s' % e)

        M.close()

    M.select()

    result, message = M.select()

    # print(result)

    # M.select('INBOX', readonly=True)

    typ, data = M.search(None, 'ALL')

    # print(data[0])

    num = data[0].split()

    # print(num)

    for i in num[:-2:-1]: # 倒序第20个,到倒序最后一个

        try:

            typ, data = M.fetch(i, '(RFC822.HEADER)')

            msg = email.message_from_string(data[0][1].decode('utf-8'))

            # print(msg["From"])

            # print (msg["Subject"])

            # print (msg["Date"])

            sub = msg.get('subject')

            subdecode = email.header.decode_header(sub)[0][0]

            a = subdecode.decode('utf-8')

            print(str(i.decode()) + ': ' + a)

            # if a == email_name:

            if email_name in a:

                x = int(i.decode())

                print("目标文件在邮箱的第{}封".format(len(num) - int(i.decode()) + 1))

                return x

        except Exception as e:

            continue

                # return None

# 主函数

def getMailFile(email_name, file_name, username=lbj_username, password=lbj_password, floder_input=floder):

    username = username

    password = password

    x = get_name(email_name, username, password)

    print(x)

    host = "popcom.263xmail.com"

    server = poplib.POP3_SSL(host)

    server.user(username)

    server.pass_(password)

    # 登录的过程

    # resp, mails, octets = server.list()

    resp, lines, octets = server.retr(x)  # 取邮件

    msg_content = b'\r\n'.join(lines).decode('utf-8', 'ignore')

    msg = Parser().parsestr(msg_content)

    ps = get_header(msg, email_name)

    print('ps: ',ps)

    if ps:

        print('解析邮件/信体')

        pb = get_file(msg, file_name, floder_input)  # 解析邮件/信体

        # print(pb)

        server.quit()

        return pb

    server.quit()

    # index = len(mails)  # 邮件的总数

    # # 此处的循环是取最近的几封邮件

    # for i in range(index, index - 50, -1): # 倒叙遍历 2021年7月22日18:42:19 改为50

    #     resp, lines, octets = server.retr(i)  # 取邮件

    #     msg_content = b'\r\n'.join(lines).decode('utf-8', 'ignore')

    #     msg = Parser().parsestr(msg_content)

    #     ps = get_header(msg, email_name)

    #     if ps:

    #         pb = get_file(msg, file_name)  # 解析邮件/信体

    #         print(pb)

    #         server.quit()

    #         return pb

if __name__ == '__main__':

    # host = "popcom.263xmail.com"

    username = lbj_username

    password = lbj_password

    # 邮件名称:费边店铺3月账单明细

    # 2. 遍历邮件寻找符合筛选条件的邮件(模糊匹配邮件标题)

    email_name = '费边店铺'

    # 3. 解析信体内容并下载邮件附件(模糊匹配附件名称)

    file_name = '费边店铺'

    # 例

    # email_name = '【销售提成方案导出】'

    # file_name = '完成'  # 如果只写个. 则默认下载第一个

    file_path = getMailFile(email_name, file_name, floder_input=floder)

    # 结果下载:floder:d:\\ETL\\,中

    print('################################ 已完成-测试 ################################')

# # 实例

# # 调用邮件数据

# from shangde.本地py.email.download_email_file import getMailFile

# import warnings

# warnings.filterwarnings("ignore")

#

# # 2. 遍历邮件寻找符合筛选条件的邮件(模糊匹配邮件标题)

# email_name = '费边店铺3月账单明细'

# # 3. 解析信体内容并下载邮件附件(模糊匹配附件名称)

# file_name = '费边店铺3月账单明细'

#

# # 下载文件

# file_path = getMailFile(email_name, file_name)

# if file_path == None:

#     print('2.下载文件_失败')

# print('2.下载文件_成功')

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值