邮件拉取方法

 
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
'''
@File    :   receiver_email.py
@Time    :   2021/09/09
@Author  :   csniu
@Desc    :   邮件接收
'''

import os
import logging
import traceback
import email
import re


from imapclient import IMAPClient
from imapclient.exceptions import IMAPClientAbortError, IMAPClientError

import html2text
from retrying import retry

logger = logging.getLogger(__name__)


class ParseMessage(object):
    '''解析邮件内容'''

    def __init__(self, email_message, uid=None):
        self.message = email_message
        self.get_header()
        self.uid = uid

    def get_header(self):
        '''邮件头信息'''
        self.Subject = list(self.decode_to_str(self.message.get('Subject', '')))[0]
        self.From = {name: addr for name, addr in                            1     list(self.decode_addr(self.message.get('From')))}
        self.To = {name: addr for name, addr in list(self.decode_addr(self.message.get('To')))}
        self.Cc = {name: addr for name, addr in list(self.decode_addr(self.message.get('To')))}
        self.Reply_to = {name: addr for name, addr in list(self.decode_addr(self.message.get('Reply-to')))}
        self.date = email.utils.parsedate_to_datetime(self.message['Date'])
        self.message_id = self.message.get('message-id')

    def get_body(self):
        '''邮件的主体内容,有不同格式的(txt、html)'''
        data = {}
        for part in self.message.walk():
            if part.is_multipart():
                continue
            if not part.get_filename():
                content = part.get_payload(decode=True)
                content_type = part.get_content_type()
                charset = self.guess_charset(part)
                if charset:
                    try:
                        content = content.decode(charset)
                    except UnicodeDecodeError:
                        if str(charset).lower() == 'gb2312' or str(charset).lower() == 'gbk':
                            content = content.decode('GB18030')
                data[content_type] = content
        return data
    
    def get_text(self):
        bodys = self.get_body()
        # {'text/plain': '\x08', 'text/html': '<div>} 特殊字符
        # print(bodys, 'get_text !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
        if 'text/plain' in bodys and len(bodys['text/plain']) > 20:
            return bodys['text/plain']
        elif 'text/html' in bodys:
            return html2text.html2text(bodys['text/html'])
        else:
            return None

    def get_files(self, to_dir):
        '''
        附件文件

        to_dir: 本地保存路径
        '''
        if not os.path.isdir(to_dir):
            raise FileNotFoundError(str(to_dir))
        files = []
        for part in self.message.walk():
            if part.is_multipart():
                continue
            if part.get_filename():
                filename = list(self.decode_to_str(part.get_filename()))[0]
                file_path = os.path.join(to_dir, filename)
                with open(file_path, 'wb') as fl:
                    fl.write(part.get_payload(decode=True))
                files.append(file_path)
        return files

    def decode_to_str(self, s):
        data = email.header.decode_header(s)
        for value, charset in data:
            if charset == 'unknown-8bit':
                charset = 'GB18030'
            if charset:
                try:
                    value = value.decode(charset)
                except UnicodeDecodeError:
                    if str(charset).lower() in ['gb2312', 'gbk']:
                        try:
                            value = value.decode('GB18030')
                        except UnicodeDecodeError:
                            value = re.match(r"b'(.*?)'", str(value)).group(1)
            yield value

    def decode_addr(self, addrs):
        for line in str(addrs).split('\n'):
            name, addr = email.utils.parseaddr(line.strip())
            yield (list(self.decode_to_str(name))[0], addr)

    def guess_charset(self, msg):
        charset = msg.get_charset()
        if charset is None:
            content_type = msg.get('Content-Type', '').lower()
            pos = content_type.find('charset=')
            if pos >= 0:
                charset = content_type[pos + 8:].strip()
                charset = charset.strip('"').strip("'")
        return charset


def retry_if_io_error(exception):
    return isinstance(exception, (IMAPClientAbortError, IMAPClientError))


class IMAPConnection(object):
    '''IMAP 客户端'''

    def __init__(self, host, port, name, password, ssl=True, timeout=30):
        self.host = host
        self.port = port
        self.ssl = ssl
        self.timeout = timeout
        self.name = name
        self.password = password
        self.imap = self.client()

    def client(self):
        '''连接'''
        imap = IMAPClient(host=self.host, port=self.port, ssl=self.ssl, timeout=self.timeout)
        imap.login(self.name, self.password)
        return imap

    def folders(self):
        '''所有文件夹'''
        return self.imap.list_folders()

    def search(self, values, folder='INBOX'):
        '''在指定文件夹下查找邮件'''
        if self.imap.folder_exists(folder):
            self.imap.select_folder(folder=folder, readonly=True)
        else:
            raise ValueError('{} not exists! '.format(str(folder)))
        return self.imap.search(values)

    @retry(stop_max_attempt_number=5, stop_max_delay=10000, wait_fixed=2000, retry_on_exception=retry_if_io_error)
    def getUid(self, uids, identity):
        '''下载邮件'''
        try:
            return self.imap.fetch(uids, identity)
        except (IMAPClientAbortError, IMAPClientError):
            self.imap = self.client()
            logging.error('IMAPClient 错误,重新连接。')
            raise

    def download_email(self, uid, identity):
        '''下载邮件, 'BODY[HEADER]':仅下载header信息; 'RFC822': 下载所有 '''
        try:
            message_data = self.getUid(uid, identity)
        except Exception as Err:
            logger.error('Download Error: UID-{}, {}'.format(str(uid), traceback.format_exc()))
            return None
        message_header_data = email.message_from_bytes(message_data[uid][bytes(identity, encoding='utf-8')])
        return ParseMessage(message_header_data, uid=uid)

    def move(self, uid, folder, set_seen=False):
        '''移动邮件'''
        if not self.imap.folder_exists(folder):
            # raise ValueError('{} not exists! '.format(str(folder)))
            self.imap.create_folder(folder)

        if set_seen:
            self.imap.add_flags(uid, b'\\Seen')  # 标记为已读
        # 不同邮箱服务支持的方法不同
        # self.imap.copy(messages=uid, folder=folder)  # 移动
        self.imap.move(messages=uid, folder=folder)  # 移动
        logger.info('Move: {}'.format(str(uid)))
 

 

 #调用

IMAP_HOST = 'imap.exmail.qq.com'
IMAP_PORT = 993
USERNAME = exmail
PASSWD = password

def receiver():
    while True:
        client = IMAPConnection(host=IMAP_HOST, port=IMAP_PORT, ssl=True,
                                name=USERNAME, password=PASSWD)
        uids = client.search(values='ALL', folder='INBOX')
        my_logger.info('Search UIDS: {}'.format('、'.join(str(i) for i in uids)))
        for uid in uids:
            my_logger.info('开始处理邮件: {}'.format(uid))
            mail = client.download_email(uid, 'BODY[HEADER]')
            if mail is None:  # 下载失败
                my_logger.error('邮件下载失败:{}'.format(uid))
                continue
            #判断邮件主题是否符合拉取条件
            subject = mail.Subject
            # if '***' not in mail.Subject:
            #     my_logger.info(f'邮件主题不包含***,跳过:{uid}')
            #     client.move(uid, folder='OTHER')
            #     continue

            if len(subject.split(' ')) < 2:
                my_logger.info(f'邮件主题规则不匹配:{subject}')
                continue

            try:
                subject = subject.strip(' ')
                subject = subject.replace("  ", " ")
                ship_name = (' '.join(subject.split(' ')[:-1])).strip(' ')
                print(ship_name)
                voyage = subject.split(' ')[-1].split('.')[-1]
                print(voyage)
                from_addr = list(mail.From.values())[0]
                print(from_addr)
                rr = rpa.create_task(payload={'ship_name': ship_name, 'voyage': voyage, 'from_addr': from_addr}, computer=None)
                print(rr)
            except Exception as e:
                client.move(uid, folder='ERROR')
            else:
                client.move(uid, folder='SUCCESS')
        print('等待10分钟')
        time.sleep(600)
 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值