数据结构与算法之霍夫曼编码解码实现

最新推荐文章于 2022-11-15 04:07:10 发布

一只帅气的小菜鸡

最新推荐文章于 2022-11-15 04:07:10 发布

阅读量406

点赞数

文章标签： java 数据结构与算法

本文链接：https://blog.csdn.net/weixin_42028608/article/details/102635976

版权

目标：将字符串“can you can a can as a can canner can a can.”编码再解码

流程：

将字符串转成bytes (byte[]格式)（eg.[99,97,110....]）
霍夫曼编码
1. 统计bytes中各字节出现的次数，存于Map中（形如：{“字节”：c，“次数”：11}....），继而创建出对应的List<Node>
2. 构建霍夫曼树
  - 将List<node>中按node的value排序
  - 合并最小的二个node（一开始是node，后面是树），构成一个新树
  - 将新树加入List<node>中，并移除最小的node（一开始是node，后面是树）
  - 循环至List<node>中只剩一颗树
3. 利用霍夫曼树构建霍夫曼编码表（个人感觉是难点）
4. 利用霍夫曼编码表编码bytes获得霍夫曼码（只有0、1）
5. 将霍夫曼码转成字节数组（byte[]格式，8个一组，剩余直接转）
霍夫曼解码
1. 将字节数组转成霍夫曼码（只有0、1）
2. 对照霍夫曼编码表去解码霍夫曼码
将byte[]格式转字符串输出
代码实现：

霍夫曼编解码代码

package demo10;

import java.nio.file.attribute.AclEntry.Builder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64.Decoder;
import java.util.List;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;

public class WangTestHuffmanCode {

	public static void main(String[] args) {
		String msg = "can you can a can as a can canner can a can.";
                //将字符串转字节
		byte[] bytes = msg.getBytes();
		//霍夫曼编码
		byte[] b = huffmanZip(bytes);
		//霍夫曼解码
		byte[] newBytes = decoder(huffList, b);
		//将字节转字符串
		System.out.println(new String(newBytes));
	}

	/**
	 * 霍夫曼解码
	 * @param huffList2
	 * @param b
	 * @return
	 */
	private static byte[] decoder(Map<Byte, String> huffList, byte[] b) {
		//将b转成二进制字符串(非最后以为转成8位字符串，最后一位正常转)
		StringBuffer sb = new StringBuffer();
		for(int i=0; i<b.length; i++) {
			if(i<b.length-1) {				
				String binStr = Integer.toBinaryString((b[i] & 0xFF) + 0x100).substring(1);
				sb.append(binStr);
			}else {
				String binStr = Integer.toBinaryString(b[i] & 0xFF);
				sb.append(binStr);
			}
		}
		System.out.println(sb);
		//对照huffList，转化成byte数组
			//重定义霍夫曼编码表
		HashMap<String, Byte> newhuffList = new HashMap<>();
		for(Map.Entry<Byte, String> entry:huffList.entrySet()){
			newhuffList.put(entry.getValue(), entry.getKey());
		}
			//对应霍夫曼表转成字节,保存在list中
		ArrayList<Byte> arrayList = new ArrayList<Byte>();
		for(int i= 0; i<sb.length();) {
			int count= 1;
			boolean flag = true;
			Byte pdbyte = null;
			while(flag) {
				String substring = sb.substring(i, i+count);
				pdbyte = newhuffList.get(substring);
				if(pdbyte== null) {
					count++;
				}else {
					flag= false;
				}
			}
			i+=count;
			arrayList.add(pdbyte);
		}
			//将list转成数组
		byte[] bytes = new byte[arrayList.size()];
		for(int i=0; i<arrayList.size(); i++) {
			bytes[i]= arrayList.get(i);
		}
		return bytes;
	}

	/**
	 * 霍夫曼编码
	 * @param bytes
	 * @return
	 */
	private static byte[] huffmanZip(byte[] bytes) {
		//统计各字符出现的次数,转化成树节点list
		List<Node> nodes = getNodes(bytes);
		//构建霍夫曼树
		Node hufTree = createHuffmanTree(nodes); 
		//创建霍夫曼编码表
		Map<Byte, String> hufListMap = createHuffmanList(hufTree);
		//编码
		byte[] code = zip(bytes, hufListMap);
		return code;
	}
	
	/**
	 * 按照霍夫曼表去编码字符串
	 * @param bytes
	 * @param hufListMap
	 * @return
	 */
	private static byte[] zip(byte[] bytes, Map<Byte, String> hufListMap) {
		//创建一个变量来存储字符对应的霍夫曼码
		StringBuilder sb= new StringBuilder();
		for(byte b :bytes) {
			sb.append(hufListMap.get(b));
		}
		//确定转出的2进制数对应多少个字节
		int length;
		if(sb.length()%8==0) {
			length = sb.length()/8;
		}else {
			length = sb.length()/8+1;
		}
		//每8个为一组，多余的另算
		byte zipbytes[] = new byte[length];
		int index = 0;
		for(int i=0; i<sb.length(); i+=8) {
			String strByte;
			if(i+8>sb.length()) {
				strByte = sb.substring(i);
			}else {
				strByte = sb.substring(i, i+8);
			}
			zipbytes[index] = (byte)Integer.parseInt(strByte, 2);
			index++;
		}
		return zipbytes;
	}


	//存储暂时路径的变量
	static StringBuilder sb = new StringBuilder();
	//用于存储赫夫曼编码
	static Map<Byte, String> huffList= new HashMap<>();
	/**
	 * 将霍夫曼树转成霍夫曼编码表
         * @param hufTree
	 * @return

         * 输出霍夫曼树，输出霍夫曼表
         * 若空，返回；若有，遍历左，给路径加0，遍历右，给路径加1；
         * 递归至叶节点，保存霍夫曼表
	 */
	private static Map<Byte, String> createHuffmanList(Node hufTree) {
		if(hufTree==null) {
			return null;
		}else {
			//处理霍夫曼树的左子树
			transferToList(hufTree.left, "0", sb);
			//处理霍夫曼树的右子树
			transferToList(hufTree.right, "1", sb);
			return huffList;
		}
	}
	
	private static void transferToList(Node node, String code, StringBuilder sb) {
		StringBuilder sb2  = new StringBuilder(sb);
		sb2.append(code);
		if(node.data==null) {//非叶节点
			transferToList(node.left, "0", sb2);
			transferToList(node.right, "1", sb2);
		}else {
			huffList.put(node.data, sb2.toString());
		}
	}
	/**
	 * 创建赫夫曼树
	 * @param nodes
	 * @return
	 */
	private static Node createHuffmanTree(List<Node> nodes) {
		while(nodes.size()>1) {
			//按树中的value排序
			Collections.sort(nodes);
			//System.out.println(nodes);
			//取出、合并最小的两个树
			Node little1 = nodes.get(nodes.size()-1);
			Node little2 = nodes.get(nodes.size()-2);
			Node newNode = new Node(null, (little1.weight+little2.weight));
			//原2个二叉树为新创建的子树
			newNode.left = little1;
			newNode.right = little2;
			//移除最小的两个树
			nodes.remove(little1);
			nodes.remove(little2);
			//将合并后的树加入集合，重新排序（循环）
			nodes.add(newNode);
		}
		return nodes.get(0);
	}
	
	/**
	 * 将字节转成List
	 * @param bytes
	 * @return	存储node的List
	 */
	private static List<Node> getNodes(byte[] bytes) {
		List<Node> nodes = new ArrayList<>();
		//存储每一个字符及出现次数
		Map<Byte, Integer> counts = new HashMap<>();
		//遍历有哪些字符，并记录数量
		for(byte b: bytes) {
			if(counts.get(b)==null) {
				counts.put(b, 1);
			}else {
				counts.put(b, counts.get(b)+1);
			}
		}
		//将每一个enter转成一个node,放入list中
		for(Map.Entry<Byte, Integer> entry: counts.entrySet()) {
			nodes.add(new Node(entry.getKey(), entry.getValue()));
		}
		return nodes;
	}

}

树节点创建代码：

package demo10;

public class Node implements Comparable<Node> {
	Byte data;
	int weight;
	Node left;
	Node right;
	public Node(Byte data,int weight) {
		this.data=data;
		this.weight=weight;
	}
	
	@Override
	public String toString() {
		return "Node [data=" + data + ", weight=" + weight + "]";
	}

	@Override
	public int compareTo(Node o) {
		return o.weight-this.weight;
	}
}

一只帅气的小菜鸡

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
数据结构与算法之霍夫曼编码解码实现

目标：将字符串“can you can a can as a can canner can a can.”编码再解码流程：将字符串转成bytes (byte[]格式)（eg.[99,97,110....]）霍夫曼编码统计bytes中各字节出现的次数，存于Map中（形如：{“字节”：c，“次数”：11}....），继而创建出对应的List<Node> 构建霍夫曼树 ...
复制链接

扫一扫