AI接口 + python 邮件分析小轮子

最新推荐文章于 2024-11-17 19:39:42 发布
爆弹大狸子
最新推荐文章于 2024-11-17 19:39:42 发布
阅读量317
点赞数 2
分类专栏：网络安全 python 文章标签： python 信息安全安全威胁分析
本文链接：https://blog.csdn.net/qq_39345447/article/details/139859530
版权
网络安全同时被 2 个专栏收录
5 篇文章 0 订阅
订阅专栏
python
4 篇文章 0 订阅
订阅专栏
邮件分析小轮子
诚然，沙箱固然好用，但是如果你在不出网环境或者无法识别邮件内的二维码怎么办！？还是得人工一个个看！
此轮子快速解决你的分析需求
在这里插入图片描述
直接贴代码了，伸手党最喜欢的一集，api接口免费

```python
import re
import os
import email
import socket
from email import policy
from email.header import decode_header
from email.parser import BytesParser
from email.utils import parsedate_to_datetime

import ipinfo
from PIL import Image
from pyzbar.pyzbar import decode
import hashlib
import requests
import json
import chardet
import zipfile
import py7zr
import rarfile

# 正则表达式匹配URL
url_pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
# 正则表达式匹配IP地址
ip_pattern = re.compile(r'\b(?:\d{1,3}\.){3}\d{1,3}\b')


def decode_qr_code(image_path):
    try:
        img = Image.open(image_path)
        decoded_objects = decode(img)
        if decoded_objects:
            for obj in decoded_objects:
                print(f"QR Code Data: {obj.data.decode('utf-8')}")
        else:
            print(f"No QR code found in {image_path}")
    except Exception as e:
        print(f"Error decoding QR code: {e}")


def calculate_hashes(file_path):
    """
    计算文件的MD5和SHA-256哈希值
    """
    hashers = {
        'MD5': hashlib.md5(),
        'SHA-256': hashlib.sha256()
    }
    with open(file_path, 'rb') as f:
        while chunk := f.read(8192):
            for hasher in hashers.values():
                hasher.update(chunk)
    return {algo: hasher.hexdigest() for algo, hasher in hashers.items()}


def get_password(emlbody_string):
    # 识别以下语句的解压密码，输出密码，不需要回复其他文字：
    url = "https://api.deepseek.com/chat/completions"

    payload = json.dumps({
        "messages": [
            {
                "content": '识别以下语句的解压密码，输出密码，不需要回复其他文字：' + emlbody_string,
                "role": "system"
            },
            {
                "content": "Hi",
                "role": "user"
            }
        ],
        "model": "deepseek-coder",
        "frequency_penalty": 0,
        "max_tokens": 2048,
        "presence_penalty": 0,
        "stop": None,
        "stream": False,
        "temperature": 1,
        "top_p": 1,
        "logprobs": False,
        "top_logprobs": None
    })
    headers = {
        'Content-Type': 'application/json',
        'Accept': 'application/json',
        'Authorization': 'Bearer 【你的key，自己去注册，反正免费的】'
    }

    response = requests.request("POST", url, headers=headers, data=payload)

    # print(response.text)
    # 解析JSON数据
    data = json.loads(response.text)

    # 提取content字段内容
    content = data['choices'][0]['message']['content']

    print('成功匹配密码：' + content)
    content = content.replace(" ", "")
    return content


def extract_archive(file_path, password, output_dir):
    if file_path.lower().endswith('.zip'):
        with zipfile.ZipFile(file_path, 'r') as zip_ref:
            zip_ref.extractall(path=output_dir, pwd=password.encode())
    elif file_path.lower().endswith('.7z'):
        with py7zr.SevenZipFile(file_path, mode='r', password=password) as z:
            z.extractall(path=output_dir)


def parse_eml(eml_fp, attr_dir):
    """
    eml文件解析
    :params eml_fp: eml文件路径
    :params attr_dir: 附件保存目录
    """

    # 计算附件的哈希值
    hashes_eml = calculate_hashes(eml_fp)

    print(f"邮件 {eml_fp} 的哈希值:")
    for algo1, hash_value1 in hashes_eml.items():
        print(f"{algo1}: {hash_value1}")

    if not os.path.exists(attr_dir):
        os.makedirs(attr_dir)

    # 读取eml文件
    with open(eml_fp, "r") as file:
        eml_content = file.read()
    # 转为email对象
    msg = email.message_from_string(eml_content)

    # 邮件主题
    subject_bytes, subject_encode = decode_header(msg["Subject"])[0]
    if subject_encode:
        subject = subject_bytes.decode(subject_encode)
    else:
        subject = subject_bytes
    print("主题：", subject)

    # 邮件发件人
    from_ip = re.search("<(.*)>", msg["from"]).group(1)
    print("发件人邮箱：", from_ip)
    from_name = decode_header(msg["from"].split("<")[0].strip())
    if from_name:
        if from_name[0] and from_name[0][1]:
            from_n = from_name[0][0].decode(from_name[0][1])
        else:
            from_n = from_name[0][0]
    print("发件人名称：", from_n)

    # 获取收件人信息
    recipients = []
    if msg['To']:
        recipients.extend(email.utils.getaddresses([msg['To']]))
    if msg['Cc']:
        recipients.extend(email.utils.getaddresses([msg['Cc']]))
    if msg['Bcc']:
        recipients.extend(email.utils.getaddresses([msg['Bcc']]))

    print("收件人信息：")
    for name, addr in recipients:
        print(f"  {name} <{addr}>")
    # 邮件时间
    received_date = parsedate_to_datetime(msg["date"])
    print("接收时间：", received_date)
    # 获取发件人IP地址
    sender_ip = None
    for received in msg.get_all('received', []):
        match = ip_pattern.search(received)
        if match:
            sender_ip = match.group(0)
            break
    if sender_ip:
        print("发件人IP地址：", sender_ip)
    else:
        print("无法获取发件人IP地址")
    # 邮件正文及附件
    for par in msg.walk():
        if not par.is_multipart():  # 判断是否为multipart，里面的数据不需要
            # name = par.get_param("name")  # 获取附件的文件名
            name = par.get_filename()
            if name:
                # 附件
                fname = decode_header(name)[0]
                if fname[1]:
                    attr_name = fname[0].decode(fname[1])
                else:
                    attr_name = fname[0]
                print("附件名:", attr_name)
                # 解码附件内容
                attr_data = par.get_payload(decode=True)
                attr_fp = os.path.join(attr_dir, attr_name)
                with open(attr_fp, 'wb') as f_write:
                    f_write.write(attr_data)
                # 计算附件的哈希值
                hashes = calculate_hashes(attr_fp)
                print(f"附件 {attr_name} 的哈希值:")
                for algo, hash_value in hashes.items():
                    print(f"{algo}: {hash_value}")
                # 检查附件是否为图片并尝试解码QR码
                if attr_name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
                    decode_qr_code(attr_fp)
                # 检查附件是否为压缩包并尝试解压
                if attr_name.lower().endswith(('.zip', '.7z', '.rar')):
                    # 解压文件
                    if attr_fp.endswith('.zip'):
                        try:
                            with zipfile.ZipFile(attr_name, 'r') as zip_ref:
                                zip_ref.extractall(attr_dir)
                        except zipfile.BadZipFile:
                            with zipfile.ZipFile(attr_name, 'r') as zip_ref:
                                zip_ref.extractall(attr_name, pwd=get_password(body))
                    elif attr_fp.endswith('.7z'):
                        try:
                            with py7zr.SevenZipFile(attr_name, 'r') as zip_ref:
                                zip_ref.extractall(attr_dir)
                        except py7zr.exceptions.PasswordRequired:
                            with py7zr.SevenZipFile(attr_name, 'r', password=get_password(body)) as zip_ref:
                                zip_ref.extractall(attr_dir)
                    elif attr_fp.endswith('.rar'):
                        try:
                            with rarfile.RarFile(attr_name, 'r') as zip_ref:
                                zip_ref.extractall(attr_dir)
                        except rarfile.NeedFirstVolume:
                            with rarfile.RarFile(attr_name, 'r') as zip_ref:
                                zip_ref.extractall(attr_dir, pwd=get_password(body))
            else:
                # 正文
                text_char = par.get_content_charset()
                if "text/plain" in par["content-type"]:  # 文本正文
                    raw_data = par.get_payload(decode=True)
                    result = chardet.detect(raw_data)
                    charenc = result['encoding']
                    if charenc is None:
                        charenc = 'utf-8'  # 或其他默认编码
                    body = raw_data.decode(charenc)
                    print("邮件正文：", body[:10])
                    get_password(body)
                    urls = url_pattern.findall(body)
                    for url in urls:
                        print("URL found in body:", url)
                else:  # html格式正文
                    html_body = par.get_payload(decode=True)

                    # 使用 chardet 检测编码
                    detected_encoding = chardet.detect(html_body)['encoding']

                    # 检查检测到的编码是否为 None，并提供默认编码
                    if detected_encoding is None:
                        detected_encoding = 'utf-8'  # 或其他默认编码

                    try:
                        html_body = html_body.decode(detected_encoding)
                    except UnicodeDecodeError:
                        # 如果检测到的编码失败，尝试其他编码
                        html_body = html_body.decode('latin1')  # 或其他备选编码

                    print("HTML正文：", html_body[:100])
                    urls = url_pattern.findall(html_body)
                    for url in urls:
                        print("URL found in HTML body:", url)
            print("-" * 60)
            continue



# 参数 1.目标邮件  2.释放目录

if __name__ == "__main__":
    parse_eml("3.eml", "E:\\python\\邮件分析\\Safedi")