提取指定目录下所有eml文件邮件数据,并存入mysql数据库
<1>python
#!/usr/bin/env python3
# -*- coding:utf8 -*-
# @TIME :2020/9
# @Author:ywl
# @File :main_eml.py
import re
import email #电子邮件包是一个用于管理电子邮件消息的库
import os
import shutil #shutil模块主要作用与拷贝文件用的
import pymysql
import datetime
from email.parser import Parser
from email.header import decode_header
from email.utils import parseaddr
import poplib
# 任务:提取指定目录下全部eml文件里的收件人、发件人、邮件标题、邮件内容等数据,
# 并把数据写入到mysql数据库里。
# 1.打开eml读出data 2.提取分离data 3.将data放入mysql 4.打开mysql读出data
PATHS = [0 for i in range(100)]
UNPATHS = [0 for i in range(100)]
FILE_NUM = 0
UNFILE_NUM = 0
CONN = None
#=====================函数集====================#
def open_file(path): #打开一个文件
if os.path.exists(path):
return open(path, 'r')
else:
print('文件不存在!')
def get_message(path): #创建消息对象
if os.path.exists(path):
fp = open_file(path)
return email.message_from_file(fp)
else:
print('文件不存在!')
def decode_str(s): # 邮件的Subject或者Email中包含的名字都是经过编码后的str,要正常显示,就必须decode
value, charset = decode_header(s)[0]
if charset:
value = value.decode(charset)
return value
def get_mime_version(msg): #获取邮件的生成版本
if msg != None:
return email.utils.parseaddr(msg.get('mime-version'))[1]
else:
print('msg is empty!')
def get_content_type(msg): #获取邮件的文本类型
if msg != None:
return email.utils.parseaddr(msg.get('content-type'))[1]
else:
print('msg is empty!')
def get_content_transfer_encoding(msg):
if msg != None:
return email.utils.parseaddr(msg.get('content-transfer-encoding'))[1]
else:
print('msg is empty!')
def guess_charset(msg): #检测编码
charset = msg.get_charset()
if charset is None:
content_type = msg.get('Content-Type', '').lower()
pos = content_type.find('charset=')
if pos >= 0:
charset = content_type[pos + 8:].strip()
return charset
def get_date(msg):
content_type = msg.get_content_type()
if content_type=='text/plain' or content_type=='text/html':
content = msg.get_payload(decode=True)
charset = guess_charset(msg)
if charset:
content = content.decode(charset)
return content
else: #附件
return content_type
#=====================函数集====================#
def List_FilePATHS(target_path): #递归文件夹下所有文件放入列表PATHS
global FILE_NUM
global PATHS
k = 0
for base_path,folder_list,file_list in os.walk(target_path): #遍历文件夹
for file_name in file_list :
file_path = os.path.join(base_path,file_name)
file_ext = file_path.rsplit('.',maxsplit=1)
if len(file_ext) != 2: # 没有后缀名,文件夹
List_FilePATHS(file_path)
continue
elif file_ext[1] == 'eml': #eml文件
PATHS[FILE_NUM] = file_path
FILE_NUM = FILE_NUM + 1
print(">>>己获得第{0}个eml文件{1}".format(FILE_NUM,file_path))
continue
return True
def clean_uneml():
global PATHS
global UNPATHS
global FILE_NUM
global UNFILE_NUM
try:
# uneml_path = input("提交数据前请先设置无法提交的eml文件存放地址:")
uneml_path = 'D:\\phpstudy_pro\\web_base\\uneml'
if os.path.exists(uneml_path):
pass
else:
os.mkdir(uneml_path)
for i in range(FILE_NUM):
text = open(PATHS[i], 'r').read()
msg_content = text
new_msg = Parser().parsestr(msg_content)
email_subject = decode_str(new_msg.get("Subject", "")) #主题
if email_subject == "":
shutil.copy(PATHS[i],uneml_path)
print(">>>文件{0}无法传入数据库故将其清除".format(PATHS[i]))
PATHS[i] = 0
UNFILE_NUM = UNFILE_NUM + 1
except Exception as e:
print(">>>清洁失败",e)
else:
print(">>>清洁成功")
def post_eml_mysql(init_id): #将完成列表中所有文件存入数据库
global PATHS
global FILE_NUM
global CONN
global UNFILE_NUM
ttp = False
clean_uneml()
cur = CONN.cursor() #创建游标对象
for i in range(FILE_NUM):
time = datetime.datetime.now() #系统当前时刻
if PATHS[i] == 0:
pass
else:
msg = get_message(PATHS[i])
text = open(PATHS[i], 'r').read()
msg_content = text
new_msg = Parser().parsestr(msg_content)
#待插入的数据
email_id = i + init_id - UNFILE_NUM #ID 1
subject = decode_str(new_msg.get("Subject")) #主题 2
time = datetime.datetime.now() #系统当前时刻3
from_ip, to_addr = parseaddr(new_msg.get("Received")) #收件人4,5
pattern = '\d+\.\d+\.\d+\.\d+'
from_ip = re.search(pattern, from_ip, flags=0).group()
from_name, from_addr =parseaddr(new_msg.get("From")) #发件人6,7
from_name = decode_str(from_name)
Mime_Version = get_mime_version(msg) #email版本 8
Content_Type = get_content_type(msg) #内容类型 9
Content_Transfer_Encoding = get_content_transfer_encoding(msg) #内容传输编码10
email_Data = get_date(new_msg) #邮件内容11
info = [(email_id,subject,time,to_addr,from_ip,from_name,from_addr,
Mime_Version,Content_Type,Content_Transfer_Encoding,email_Data)]
try:
sql = "insert into email values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);"
cur.executemany(sql,info)
except Exception as e:
print(">>>文件{0}存入数据库失败,失败原因:{1}".format(PATHS[i],e))
else: # 如果是插入数据, 一定要提交数据, 不然数据库中找不到要插入的数据;
CONN.commit()
print(">>>*文件{0}成功存入数据库*,time:{1}".format(PATHS[i],time))
ttp = True
cur.close() # 4. 关闭游标
return ttp
def select_mysql(): #数据库查询
global CONN
cur = CONN.cursor() # 2. 创建游标对象
key = 1
while(key == 1):
print(">>>**************************************")
print(">>>* 查询选项 *")
print(">>>* 1.查询制定个数个查询结果集 *")
print(">>>* 2.查询全部结果集 *")
print(">>>* 3.查询结果集数量 *")
print(">>>* 4.*指定ID查询 *")
print(">>>* 0.退出 *")
print(">>>**************************************")
select = float(input(">>>请选择功能:"))
sqli = "select * from email;"
result = cur.execute(sqli)
print("===================本次查询结果如下===================")
if select == 1:
result_num = int(input(">>>请输入查询结果集数量:"))
print(cur.fetchmany(result_num)) # 2). 获取制定个数个查询结果集;
elif select == 2:
info = cur.fetchall() # 3). 获取所有的查询结果
print(info)
elif select == 3:
print("email数据表数据记录数为:{0}".format(result))
elif select == 4:
select_id = int(input(">>>请输入查询ID:"))
if select_id <= result:
select_id = select_id - 1
for i in range(select_id):
cur.fetchone()
print(cur.fetchone())
else:
print(">>>ID超出查询范围,请重新输入")
continue
else:
key = 0
print("===================本次查询结果如上===================")
cur.close() # 4. 关闭游标
def delete_data():
global CONN
cur = CONN.cursor() # 2. 创建游标对象
key = 1
while(key == 1):
print(">>>**************************************")
print(">>>* 删除选项 *")
print(">>>* 1.*指定ID删除 *")
print(">>>* 2.*指定范围删除 *")
print(">>>* 3.!删除全部结果集 *")
print(">>>* 0.退出 *")
print(">>>**************************************")
select = float(input(">>>请选择功能:"))
if select == 1:
id = float(input(">>>请输入指定要删除ID:"))
sql = "delete from email WHERE email_id = %d;" % (id)
elif select == 2:
play_id = float(input(">>>请输入指定要删除的起始ID:"))
end_id = float(input(">>>请输入指定要删除的结束ID:"))
sql = "delete from email WHERE email_id > %d && email_id < %d;" % (play_id,end_id)
elif select == 3:
id = float(input("!!!请再次确认是否要删除全部结果集(按1确认):"))
if id == 1:
sql = "delete from email;"
else:
continue
else:
key = 0
try:
cur.execute(sql)#执行sql语句
CONN.commit()#提交到数据库执行
except Exception as s:
print("删除未成功:",s)
else:
print("删除成功")
cur.close() # 4. 关闭游标
def creat_mysql():
global CONN
cur = CONN.cursor() # 2. 创建游标对象
# 1创建数据表
try:
#create_sqli = "create table email(email_id int NOT NULL AUTO_INCREMENT, subject varchar(100), eail_storage_time varchar(100), to_ip varchar(500), to_addr varchar(50), from_name varchar(50), from_adde varchar(50), Mime_Version varchar(50), Content_Type varchar(50),Content_Transfer_Encoding varchar(50),email_Data varchar(2500),primary key (email_id));"
create_sqli = "create table email(邮件存储编号 int NOT NULL AUTO_INCREMENT, 邮件主题 varchar(100), 邮件存储时间 varchar(100), 收件人邮箱 varchar(50), 发件人IP varchar(500), 发件人姓名 varchar(50), 发件人邮箱 varchar(50), 版本 varchar(50), 内容类型 varchar(50),内容传输编码 varchar(50),邮件信息 varchar(2500),primary key (邮件存储编号));"
cur.execute(create_sqli)
except Exception as e:
print("创建数据表失败:", e)
else:
print(