用python和Java实现提取指定目录下所有eml文件邮件数据,并存入mysql数据库

提取指定目录下所有eml文件邮件数据,并存入mysql数据库

<1>python

#!/usr/bin/env python3
# -*- coding:utf8 -*-
# @TIME  :2020/9
# @Author:ywl
# @File  :main_eml.py
import re
import email    #电子邮件包是一个用于管理电子邮件消息的库
import os
import shutil   #shutil模块主要作用与拷贝文件用的
import pymysql
import datetime
from email.parser import Parser
from email.header import decode_header
from email.utils import parseaddr
import poplib

# 任务:提取指定目录下全部eml文件里的收件人、发件人、邮件标题、邮件内容等数据,
# 并把数据写入到mysql数据库里。
# 1.打开eml读出data 2.提取分离data 3.将data放入mysql 4.打开mysql读出data

PATHS = [0 for i in range(100)]
UNPATHS = [0 for i in range(100)]
FILE_NUM = 0
UNFILE_NUM = 0
CONN = None
#=====================函数集====================#
def open_file(path):    #打开一个文件
    if os.path.exists(path):
        return open(path, 'r')
    else:
        print('文件不存在!')

def get_message(path):  #创建消息对象
    if os.path.exists(path):
        fp = open_file(path)
        return email.message_from_file(fp)
    else:
       print('文件不存在!')

def decode_str(s):  # 邮件的Subject或者Email中包含的名字都是经过编码后的str,要正常显示,就必须decode
    value, charset = decode_header(s)[0]
    if charset:
        value = value.decode(charset)
    return value

def get_mime_version(msg):  #获取邮件的生成版本
    if msg != None:
        return email.utils.parseaddr(msg.get('mime-version'))[1]
    else:
        print('msg is empty!')

def get_content_type(msg):  #获取邮件的文本类型
    if msg != None:
        return email.utils.parseaddr(msg.get('content-type'))[1]
    else:
        print('msg is empty!')

def get_content_transfer_encoding(msg):
    if msg != None:
        return email.utils.parseaddr(msg.get('content-transfer-encoding'))[1]
    else:
        print('msg is empty!')

def guess_charset(msg): #检测编码
    charset = msg.get_charset()
    if charset is None:
        content_type = msg.get('Content-Type', '').lower()
        pos = content_type.find('charset=')
        if pos >= 0:
            charset = content_type[pos + 8:].strip()
    return charset

def get_date(msg):
    content_type = msg.get_content_type()
    if content_type=='text/plain' or content_type=='text/html':
        content = msg.get_payload(decode=True)
        charset = guess_charset(msg)
        if charset:
            content = content.decode(charset)
            return content  
    else:   #附件
        return content_type
    
#=====================函数集====================#

def List_FilePATHS(target_path):    #递归文件夹下所有文件放入列表PATHS
    global FILE_NUM
    global PATHS
    k = 0

    for base_path,folder_list,file_list in os.walk(target_path):    #遍历文件夹
        for file_name in file_list :
            file_path = os.path.join(base_path,file_name)
            file_ext = file_path.rsplit('.',maxsplit=1)
            if len(file_ext) != 2:  # 没有后缀名,文件夹
                List_FilePATHS(file_path)
                continue
            elif  file_ext[1] == 'eml': #eml文件
                PATHS[FILE_NUM] = file_path
                FILE_NUM = FILE_NUM  + 1 
                print(">>>己获得第{0}个eml文件{1}".format(FILE_NUM,file_path))
                continue
    return True

def clean_uneml():
    global PATHS
    global UNPATHS
    global FILE_NUM
    global UNFILE_NUM
    try:
        # uneml_path = input("提交数据前请先设置无法提交的eml文件存放地址:")
        uneml_path = 'D:\\phpstudy_pro\\web_base\\uneml'
        if os.path.exists(uneml_path):
            pass
        else:
            os.mkdir(uneml_path)
        for i in range(FILE_NUM):
            text = open(PATHS[i], 'r').read()
            msg_content = text
            new_msg = Parser().parsestr(msg_content) 
            email_subject = decode_str(new_msg.get("Subject", "")) #主题
            if email_subject == "":
                shutil.copy(PATHS[i],uneml_path)
                print(">>>文件{0}无法传入数据库故将其清除".format(PATHS[i]))
                PATHS[i] = 0     
                UNFILE_NUM = UNFILE_NUM + 1   
    except Exception as e:
        print(">>>清洁失败",e)
    else:
        print(">>>清洁成功")

def post_eml_mysql(init_id):   #将完成列表中所有文件存入数据库
    global PATHS
    global FILE_NUM
    global CONN
    global UNFILE_NUM
    ttp = False
    clean_uneml()
    cur = CONN.cursor() #创建游标对象
    for i in range(FILE_NUM):
        time = datetime.datetime.now() #系统当前时刻
        if PATHS[i] == 0:
            pass 
        else:
            msg = get_message(PATHS[i])
            text = open(PATHS[i], 'r').read()
            msg_content = text
            new_msg = Parser().parsestr(msg_content)    
                
            #待插入的数据
            email_id = i + init_id - UNFILE_NUM                             #ID 1
            subject = decode_str(new_msg.get("Subject"))                #主题 2
            time = datetime.datetime.now() #系统当前时刻3
            from_ip, to_addr = parseaddr(new_msg.get("Received"))          #收件人4,5 
            pattern = '\d+\.\d+\.\d+\.\d+'
            from_ip = re.search(pattern, from_ip, flags=0).group()                             
                                            

            from_name, from_addr =parseaddr(new_msg.get("From"))        #发件人6,7
            from_name = decode_str(from_name)                          

            Mime_Version = get_mime_version(msg)        #email版本 8
            Content_Type = get_content_type(msg)        #内容类型 9
            Content_Transfer_Encoding = get_content_transfer_encoding(msg)  #内容传输编码10
            email_Data = get_date(new_msg)                                   #邮件内容11
            
            info = [(email_id,subject,time,to_addr,from_ip,from_name,from_addr,
                Mime_Version,Content_Type,Content_Transfer_Encoding,email_Data)]
           
            try:
                sql = "insert into email values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);"
                cur.executemany(sql,info)
            except Exception as e:
                print(">>>文件{0}存入数据库失败,失败原因:{1}".format(PATHS[i],e))
            else:   # 如果是插入数据, 一定要提交数据, 不然数据库中找不到要插入的数据;
                CONN.commit()
                print(">>>*文件{0}成功存入数据库*,time:{1}".format(PATHS[i],time))
                ttp = True
            
    cur.close()     # 4. 关闭游标           
            
    return ttp

def select_mysql(): #数据库查询
    global CONN
    cur = CONN.cursor() # 2. 创建游标对象

    key = 1
    while(key == 1):
        print(">>>**************************************")
        print(">>>*         查询选项                   *")
        print(">>>*      1.查询制定个数个查询结果集      *")
        print(">>>*      2.查询全部结果集              *")
        print(">>>*      3.查询结果集数量              *")
        print(">>>*      4.*指定ID查询                *")
        print(">>>*           0.退出                   *") 
        print(">>>**************************************")
        select = float(input(">>>请选择功能:"))

        sqli = "select * from email;"
        result = cur.execute(sqli)  
        print("===================本次查询结果如下===================")
        if select == 1:
            result_num = int(input(">>>请输入查询结果集数量:"))
            print(cur.fetchmany(result_num))   # 2). 获取制定个数个查询结果集;
        elif select == 2:
            info = cur.fetchall()     # 3). 获取所有的查询结果
            print(info)
        elif select == 3:
            print("email数据表数据记录数为:{0}".format(result))
        elif select == 4: 
            select_id = int(input(">>>请输入查询ID:"))
            if select_id <= result:
                select_id = select_id - 1
                for i in range(select_id):
                    cur.fetchone()
                print(cur.fetchone())
            else:
                print(">>>ID超出查询范围,请重新输入")
                continue
        else:
            key = 0
        print("===================本次查询结果如上===================")
    cur.close()     # 4. 关闭游标

def delete_data():    
    global CONN
    cur = CONN.cursor() # 2. 创建游标对象
    key = 1

    while(key == 1):
        print(">>>**************************************")
        print(">>>*         删除选项                   *")
        print(">>>*      1.*指定ID删除                 *")
        print(">>>*      2.*指定范围删除               *")
        print(">>>*      3.!删除全部结果集             *")
        print(">>>*           0.退出                   *") 
        print(">>>**************************************")
        select = float(input(">>>请选择功能:"))

        if select == 1:
            id = float(input(">>>请输入指定要删除ID:"))
            sql = "delete from email WHERE email_id = %d;" % (id)
        elif select == 2:
            play_id = float(input(">>>请输入指定要删除的起始ID:"))
            end_id = float(input(">>>请输入指定要删除的结束ID:"))
            sql = "delete from email WHERE email_id > %d && email_id < %d;" % (play_id,end_id)
        elif select == 3:
            id = float(input("!!!请再次确认是否要删除全部结果集(按1确认):")) 
            if id == 1:
                sql = "delete from email;"
            else:
                continue
        else:
            key = 0
        try:
            cur.execute(sql)#执行sql语句
            CONN.commit()#提交到数据库执行
        except Exception as s:
            print("删除未成功:",s)
        else:
            print("删除成功")
      

    cur.close()     # 4. 关闭游标

def creat_mysql():
    global CONN
    cur = CONN.cursor() # 2. 创建游标对象
        
    # 1创建数据表
    try:
        #create_sqli = "create table email(email_id int NOT NULL AUTO_INCREMENT,  subject varchar(100), eail_storage_time  varchar(100), to_ip varchar(500),  to_addr varchar(50), from_name varchar(50),  from_adde varchar(50), Mime_Version varchar(50), Content_Type varchar(50),Content_Transfer_Encoding varchar(50),email_Data  varchar(2500),primary key (email_id));"
        create_sqli = "create table email(邮件存储编号 int NOT NULL AUTO_INCREMENT,  邮件主题 varchar(100), 邮件存储时间  varchar(100), 收件人邮箱 varchar(50), 发件人IP varchar(500),   发件人姓名 varchar(50),  发件人邮箱 varchar(50), 版本 varchar(50), 内容类型 varchar(50),内容传输编码 varchar(50),邮件信息  varchar(2500),primary key (邮件存储编号));"
        cur.execute(create_sqli)
    except Exception as e:
        print("创建数据表失败:", e)
    else:
        print(
  • 0
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
好的,可以使用JavaMail API来解析.eml文件。以下是一个基本的代码示例: ``` import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.util.Properties; import javax.mail.Address; import javax.mail.Message; import javax.mail.MessagingException; import javax.mail.Session; import javax.mail.internet.MimeMessage; public class ParseEml { public static void main(String[] args) { String filePath = "your-file-path.eml"; parse(filePath); } public static void parse(String filePath) { Properties props = new Properties(); Session session = Session.getDefaultInstance(props, null); try (InputStream is = new FileInputStream(filePath)) { Message message = new MimeMessage(session, is); String subject = message.getSubject(); String from = getAddressListAsString(message.getFrom()); String to = getAddressListAsString(message.getRecipients(Message.RecipientType.TO)); String cc = getAddressListAsString(message.getRecipients(Message.RecipientType.CC)); String bcc = getAddressListAsString(message.getRecipients(Message.RecipientType.BCC)); String sentDate = message.getSentDate().toString(); String content = message.getContent().toString(); System.out.println("Subject: " + subject); System.out.println("From: " + from); System.out.println("To: " + to); System.out.println("Cc: " + cc); System.out.println("Bcc: " + bcc); System.out.println("Sent Date: " + sentDate); System.out.println("Content: " + content); } catch (IOException | MessagingException e) { e.printStackTrace(); } } private static String getAddressListAsString(Address[] addresses) { StringBuilder sb = new StringBuilder(); if (addresses != null) { for (Address address : addresses) { sb.append(address.toString()).append(", "); } } return sb.toString(); } } ``` 请注意,您需要替换代码示例中的 "your-file-path.eml " 字符串为您要解析的 .eml 文件的完整路径。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值