邮件分析小轮子
诚然,沙箱固然好用,但是如果你在不出网环境或者无法识别邮件内的二维码怎么办!?还是得人工一个个看!
此轮子快速解决你的分析需求
直接贴代码了,伸手党最喜欢的一集,api接口免费
```python
import re
import os
import email
import socket
from email import policy
from email.header import decode_header
from email.parser import BytesParser
from email.utils import parsedate_to_datetime
import ipinfo
from PIL import Image
from pyzbar.pyzbar import decode
import hashlib
import requests
import json
import chardet
import zipfile
import py7zr
import rarfile
# 正则表达式匹配URL
url_pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
# 正则表达式匹配IP地址
ip_pattern = re.compile(r'\b(?:\d{1,3}\.){3}\d{1,3}\b')
def decode_qr_code(image_path):
try:
img = Image.open(image_path)
decoded_objects = decode(img)
if decoded_objects:
for obj in decoded_objects:
print(f"QR Code Data: {obj.data.decode('utf-8')}")
else:
print(f"No QR code found in {image_path}")
except Exception as e:
print(f"Error decoding QR code: {e}")
def calculate_hashes(file_path):
"""
计算文件的MD5和SHA-256哈希值
"""
hashers = {
'MD5': hashlib.md5(),
'SHA-256': hashlib.sha256()
}
with open(file_path, 'rb') as f:
while chunk := f.read(8192):
for hasher in hashers.values():
hasher.update(chunk)
return {algo: hasher.hexdigest() for algo, hasher in hashers.items()}
def get_password(emlbody_string):
# 识别以下语句的解压密码,输出密码,不需要回复其他文字:
url = "https://api.deepseek.com/chat/completions"
payload = json.dumps({
"messages": [
{
"content": '识别以下语句的解压密码,输出密码,不需要回复其他文字:' + emlbody_string,
"role": "system"
},
{
"content": "Hi",
"role": "user"
}
],
"model": "deepseek-coder",
"frequency_penalty": 0,
"max_tokens": 2048,
"presence_penalty": 0,
"stop": None,
"stream": False,
"temperature": 1,
"top_p": 1,
"logprobs": False,
"top_logprobs": None
})
headers = {
'Content-Type': 'application/json',
'Accept': 'application/json',
'Authorization': 'Bearer 【你的key,自己去注册,反正免费的】'
}
response = requests.request("POST", url, headers=headers, data=payload)
# print(response.text)
# 解析JSON数据
data = json.loads(response.text)
# 提取content字段内容
content = data['choices'][0]['message']['content']
print('成功匹配密码:' + content)
content = content.replace(" ", "")
return content
def extract_archive(file_path, password, output_dir):
if file_path.lower().endswith('.zip'):
with zipfile.ZipFile(file_path, 'r') as zip_ref:
zip_ref.extractall(path=output_dir, pwd=password.encode())
elif file_path.lower().endswith('.7z'):
with py7zr.SevenZipFile(file_path, mode='r', password=password) as z:
z.extractall(path=output_dir)
def parse_eml(eml_fp, attr_dir):
"""
eml文件解析
:params eml_fp: eml文件路径
:params attr_dir: 附件保存目录
"""
# 计算附件的哈希值
hashes_eml = calculate_hashes(eml_fp)
print(f"邮件 {eml_fp} 的哈希值:")
for algo1, hash_value1 in hashes_eml.items():
print(f"{algo1}: {hash_value1}")
if not os.path.exists(attr_dir):
os.makedirs(attr_dir)
# 读取eml文件
with open(eml_fp, "r") as file:
eml_content = file.read()
# 转为email对象
msg = email.message_from_string(eml_content)
# 邮件主题
subject_bytes, subject_encode = decode_header(msg["Subject"])[0]
if subject_encode:
subject = subject_bytes.decode(subject_encode)
else:
subject = subject_bytes
print("主题:", subject)
# 邮件发件人
from_ip = re.search("<(.*)>", msg["from"]).group(1)
print("发件人邮箱:", from_ip)
from_name = decode_header(msg["from"].split("<")[0].strip())
if from_name:
if from_name[0] and from_name[0][1]:
from_n = from_name[0][0].decode(from_name[0][1])
else:
from_n = from_name[0][0]
print("发件人名称:", from_n)
# 获取收件人信息
recipients = []
if msg['To']:
recipients.extend(email.utils.getaddresses([msg['To']]))
if msg['Cc']:
recipients.extend(email.utils.getaddresses([msg['Cc']]))
if msg['Bcc']:
recipients.extend(email.utils.getaddresses([msg['Bcc']]))
print("收件人信息:")
for name, addr in recipients:
print(f" {name} <{addr}>")
# 邮件时间
received_date = parsedate_to_datetime(msg["date"])
print("接收时间:", received_date)
# 获取发件人IP地址
sender_ip = None
for received in msg.get_all('received', []):
match = ip_pattern.search(received)
if match:
sender_ip = match.group(0)
break
if sender_ip:
print("发件人IP地址:", sender_ip)
else:
print("无法获取发件人IP地址")
# 邮件正文及附件
for par in msg.walk():
if not par.is_multipart(): # 判断是否为multipart,里面的数据不需要
# name = par.get_param("name") # 获取附件的文件名
name = par.get_filename()
if name:
# 附件
fname = decode_header(name)[0]
if fname[1]:
attr_name = fname[0].decode(fname[1])
else:
attr_name = fname[0]
print("附件名:", attr_name)
# 解码附件内容
attr_data = par.get_payload(decode=True)
attr_fp = os.path.join(attr_dir, attr_name)
with open(attr_fp, 'wb') as f_write:
f_write.write(attr_data)
# 计算附件的哈希值
hashes = calculate_hashes(attr_fp)
print(f"附件 {attr_name} 的哈希值:")
for algo, hash_value in hashes.items():
print(f"{algo}: {hash_value}")
# 检查附件是否为图片并尝试解码QR码
if attr_name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
decode_qr_code(attr_fp)
# 检查附件是否为压缩包并尝试解压
if attr_name.lower().endswith(('.zip', '.7z', '.rar')):
# 解压文件
if attr_fp.endswith('.zip'):
try:
with zipfile.ZipFile(attr_name, 'r') as zip_ref:
zip_ref.extractall(attr_dir)
except zipfile.BadZipFile:
with zipfile.ZipFile(attr_name, 'r') as zip_ref:
zip_ref.extractall(attr_name, pwd=get_password(body))
elif attr_fp.endswith('.7z'):
try:
with py7zr.SevenZipFile(attr_name, 'r') as zip_ref:
zip_ref.extractall(attr_dir)
except py7zr.exceptions.PasswordRequired:
with py7zr.SevenZipFile(attr_name, 'r', password=get_password(body)) as zip_ref:
zip_ref.extractall(attr_dir)
elif attr_fp.endswith('.rar'):
try:
with rarfile.RarFile(attr_name, 'r') as zip_ref:
zip_ref.extractall(attr_dir)
except rarfile.NeedFirstVolume:
with rarfile.RarFile(attr_name, 'r') as zip_ref:
zip_ref.extractall(attr_dir, pwd=get_password(body))
else:
# 正文
text_char = par.get_content_charset()
if "text/plain" in par["content-type"]: # 文本正文
raw_data = par.get_payload(decode=True)
result = chardet.detect(raw_data)
charenc = result['encoding']
if charenc is None:
charenc = 'utf-8' # 或其他默认编码
body = raw_data.decode(charenc)
print("邮件正文:", body[:10])
get_password(body)
urls = url_pattern.findall(body)
for url in urls:
print("URL found in body:", url)
else: # html格式正文
html_body = par.get_payload(decode=True)
# 使用 chardet 检测编码
detected_encoding = chardet.detect(html_body)['encoding']
# 检查检测到的编码是否为 None,并提供默认编码
if detected_encoding is None:
detected_encoding = 'utf-8' # 或其他默认编码
try:
html_body = html_body.decode(detected_encoding)
except UnicodeDecodeError:
# 如果检测到的编码失败,尝试其他编码
html_body = html_body.decode('latin1') # 或其他备选编码
print("HTML正文:", html_body[:100])
urls = url_pattern.findall(html_body)
for url in urls:
print("URL found in HTML body:", url)
print("-" * 60)
continue
# 参数 1.目标邮件 2.释放目录
if __name__ == "__main__":
parse_eml("3.eml", "E:\\python\\邮件分析\\Safedi")