基于哈夫曼树的压缩与解压

import heapq
import os
from collections import defaultdict


# 节点类
class Node:
    def __init__(self, freq, char=None):
        self.freq = freq
        self.char = char
        self.left = None
        self.right = None

    # 优先级比较方法(用于堆排序)
    def __lt__(self, other):
        return self.freq < other.freq


# 统计字符频率
def count_frequency(text):
    frequency = defaultdict(int)
    for char in text:
        frequency[char] += 1
    return frequency


# 构建哈夫曼树
def build_huffman_tree(frequency):
    heap = []
    for char, freq in frequency.items():
        heapq.heappush(heap, Node(freq, char))

    while len(heap) > 1:
        node1 = heapq.heappop(heap)
        node2 = heapq.heappop(heap)
        merged = Node(node1.freq + node2.freq)
        merged.left = node1
        merged.right = node2
        heapq.heappush(heap, merged)

    return heap[0]


# 构建哈夫曼编码表
def build_huffman_codes(root):
    codes = {}

    def traverse(node, code):
        if node.char:
            codes[node.char] = code
        else:
            traverse(node.left, code + '0')
            traverse(node.right, code + '1')

    traverse(root, '')
    return codes


# 压缩文件
def compress_file(file_path, output_path, codes):
    with open(file_path, 'r') as file:
        text = file.read()

    freq_dict = count_frequency(text)  # 频数字典
    huffman_tree = build_huffman_tree(freq_dict)  # 根节点
    huffman_codes = build_huffman_codes(huffman_tree)  # 根据哈夫曼树生成编码字典,得到编码表 huffman_codes。

    compressed_text = ''.join(codes[char] for char in text)
    padding = 8 - len(compressed_text) % 8  # 添加填充位
    compressed_text += '0' * padding

    output_bytes = bytearray()
    for i in range(0, len(compressed_text), 8):
        byte = compressed_text[i:i + 8]
        output_bytes.append(int(byte, 2))

    with open(output_path, 'wb') as file:
        file.write(bytes([padding]))
        file.write(output_bytes)

    with open(output_path + ".codes", "w") as codes_file:
        codes_file.write(str(huffman_codes))


# 解压文件
def decompress_file(file_path, output_path, codes):
    with open(file_path, 'rb') as file:
        padding = ord(file.read(1))
        compressed_text = ''.join(format(byte, '08b') for byte in file.read())

    compressed_text = compressed_text[:-padding]
    decoded_text = ''
    code = ''
    for bit in compressed_text:
        code += bit
        if code in codes:
            decoded_text += codes[code]
            code = ''

    with open(output_path, 'w') as file:
        file.write(decoded_text)


# 计算压缩比
def calculate_compression_ratio(original_size, compressed_size):
    return (1 - compressed_size / original_size) * 100


# 菜单功能
def menu():
    while True:
        print("菜单功能:")
        print("1. 文件压缩")
        print("2. 文件解压")
        print("3. 退出程序")
        choice = input("请选择操作:")
        if choice == "1":
            file_path = input("请输入要压缩的文件路径:")
            output_path = input("请输入压缩后的文件保存路径:")

            with open(file_path, 'r') as file:
                text = file.read()

            frequency = count_frequency(text)
            huffman_tree = build_huffman_tree(frequency)
            codes = build_huffman_codes(huffman_tree)
            compress_file(file_path, output_path, codes)

            original_size = os.path.getsize(file_path)
            compressed_size = os.path.getsize(output_path)
            compression_ratio = calculate_compression_ratio(original_size, compressed_size)
            print("压缩成功!压缩比:{:.2f}%\n".format(compression_ratio))
        elif choice == "2":
            file_path = input("请输入要解压的文件路径:")
            output_path = input("请输入解压后的文件保存路径:")

            with open(file_path, 'rb') as file:
                ord(file.read(1))
                compressed_text = ''.join(format(byte, '08b') for byte in file.read())

            frequency = count_frequency(compressed_text)
            huffman_tree = build_huffman_tree(frequency)
            codes = build_huffman_codes(huffman_tree)
            decompress_file(file_path, output_path, codes)

            print("解压成功!\n")
        elif choice == "3":
            print("退出程序")
            break
        else:
            print("无效选择,请重新选择。\n")


# 测试
if __name__ == '__main__':
    menu()
  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值