自己写的Huffman tree, Huffman code和利用他们压缩解压示例

自己根据huffman tree/code原理写的示例。

package indi.tom.dataStructure.tree;

import org.junit.Test;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;

/**
 * @Author Tom
 * @Date 2020/12/16 17:33
 * @Version 1.0
 * @Description 
 */
public class HuffmanCode {

    //save the Huffman code for each byte(char) in a map
    private HashMap<Byte, String> byteStringHashMap = new HashMap<>();
    //save the number of occurrence for each character in a map.
    private HashMap<Byte, Integer> byteIntegerHashMap = new HashMap<>();

    /**
     * convert a byte array from a string to another byte array coded with Huffman code.
     */
    @Test
    public void test01(){

        String testStr = "i like like like java do you like a java";

        byte[] bytes = testStr.getBytes();//40 bytes


        createHuffmanCodeMap(bytes);
        for (Map.Entry<Byte, Integer> byteIntegerEntry : byteIntegerHashMap.entrySet()) {
            char c = (char) byteIntegerEntry.getKey().byteValue();
            System.out.println(c + " -> " + byteIntegerEntry.getValue());
        }
        /*for (Map.Entry<Byte, String> byteStringEntry : byteStringHashMap.entrySet()) {
            char c = (char) byteStringEntry.getKey().byteValue();
            System.out.println(c + " -> " + byteStringEntry.getValue());
        }*/
        System.out.println(byteStringHashMap);
    }



    public Node createHuffmanTree(byte[] array){
        //1 count the times of occurrence for each byte element and put them into a map
        for (byte b : array) {
            if(byteIntegerHashMap.get(b) == null){
                byteIntegerHashMap.put(b, 1);
            }else{
                byteIntegerHashMap.put(b,byteIntegerHashMap.get(b)+1);
            }
        }
        //2 construct a huffman tree
        ArrayList<Node> nodes = new ArrayList<>();
        for (Map.Entry<Byte, Integer> byteIntegerEntry : byteIntegerHashMap.entrySet()) {
            nodes.add(new Node(byteIntegerEntry.getValue(), byteIntegerEntry.getKey()));
        }
        int size = nodes.size();
        Node newNode = null;
        while(size > 1){
            Collections.sort(nodes);
            Node node0 = nodes.get(0);
            Node node1 = nodes.get(1);
            newNode = new Node(node0.getCount() + node1.getCount(), null);
            newNode.setLeft(node0);
            newNode.setRight(node1);
            nodes.add(newNode);
            nodes.remove(node0);
            nodes.remove(node1);
            size--;
        }
        return newNode;
    }

    public HashMap<Byte, String> createHuffmanCodeMap(byte[] array){
        Node node = createHuffmanTree(array);

        //2. Traverse the huffman tree leaf code,generate a code for each leaf node
        //and put it into a Map<Byte, String>

        String left = "0",right = "1";

        createHuffmanCode(node.getLeft(), "0");

        createHuffmanCode(node.getRight(), "1");

        return byteStringHashMap;

    }

    /**
     *
     * @param node Current node to be checked
     * @param huffmanCode the huffmanCode for the current node
     */
    private void createHuffmanCode(Node node, String huffmanCode){
        if(node == null) return;
        //if current node is not leaf node, then traverse it's sub node
        if(node.getB() == null){
            createHuffmanCode(node.getLeft(), huffmanCode + "0");
            createHuffmanCode(node.getRight(), huffmanCode + "1");
        }else{
        //if current node is leaf node, then generate code and
            byteStringHashMap.put(node.getB(), huffmanCode);
            //Not necessarily, save the code in the leaf node
            node.setHuffmanCode(huffmanCode);
        }
    }


    private class Node implements Comparable{
        //number of occurrence for a char(or byte) in the array
        private Integer count;
        private Byte b;

        private Node left;
        private Node right;

        private String huffmanCode;

        public Node(Integer count, Byte b) {
            this.count = count;
            this.b = b;
        }

        public Integer getCount() {
            return count;
        }

        public void setCount(Integer count) {
            this.count = count;
        }

        public Byte getB() {
            return b;
        }

        public void setB(Byte b) {
            this.b = b;
        }

        public Node getLeft() {
            return left;
        }

        public void setLeft(Node left) {
            this.left = left;
        }

        public Node getRight() {
            return right;
        }

        public void setRight(Node right) {
            this.right = right;
        }

        public String getHuffmanCode() {
            return huffmanCode;
        }

        public void setHuffmanCode(String huffmanCode) {
            this.huffmanCode = huffmanCode;
        }

        @Override
        public String toString() {
            return "Node{" +
                    "count=" + count +
                    ", b(letter or char)=" + (char)b.byteValue() +
                    ", huffmanCode='" + huffmanCode + '\'' +
                    '}';
        }

        @Override
        public int compareTo(Object o) {
            if(!(o instanceof Node)) throw new IllegalArgumentException("a Node type object has to be passed to this function.");
            Node o1 = (Node) o;
            return this.count - o1.getCount();
        }
    }

}
package indi.tom.dataStructure.tree;

import org.junit.Test;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;

/**
 * @Author Tom
 * @Date 2020/12/16 17:33
 * @Version 1.0
 * @Description Test using Huffman code to zip/unzip byte array.
 */
public class ZipUnzipByteArrayWithHuffmanCode {

    //if we don't use this effectiveBitNumber to record how many effective bits in last byte of
    // huffman string, it will have bug, if the last byte of huffman string is started with 0, issue will
    // happen. For example if we test string: "i like like like java do you like a javak";
    //If we don't use this variable, the program doesn't know how many bits there are in last byte belong to
    //the huffman code string. 

    private int effectiveBitNumber;
    private HashMap<Byte, String> huffmanCodeMap;



    @Test
    public void test01(){

        String testStr = "i like like like java do you like a javad and c/c++";

        byte[] bytes = testStr.getBytes();//40 bytes
//        System.out.println(bytes.length);
        this.huffmanCodeMap = new HuffmanCode().createHuffmanCodeMap(bytes);
        byte[] zippedBytes = zip(bytes,this.huffmanCodeMap);
        System.out.println("压缩后的结果是:" + Arrays.toString(zippedBytes) + " 长度= " + zippedBytes.length);

//        System.out.println(zippedBytes.length);

        byte[] unzippedBytes = unzip(zippedBytes,this.huffmanCodeMap);

        System.out.println(unzippedBytes.length);

        String s = new String(unzippedBytes);

        System.out.println(s);
    }



    public byte[] zip(byte[] array, HashMap<Byte, String> huffmanCodeMap){
        //new array used to save same data coded with huffman code
        byte[] newArray = null;

        StringBuilder sb = new StringBuilder();
        for (byte b : array) {
            sb.append(huffmanCodeMap.get(b));
        }
        System.out.println(sb);
        if((sb.length() % 8) == 0) newArray = new byte[sb.length()/8];
        if((sb.length() % 8) != 0) {
            newArray = new byte[sb.length() / 8 + 1];
            effectiveBitNumber = sb.length() % 8;
        }

        int i = 0, j = 0;
        for (; i + 8 <= sb.length(); i += 8, j++) {
            String substring = sb.substring(i, i + 8);
            int parseInt = Integer.parseInt(substring, 2);
            newArray[j] = (byte)parseInt;
        }
        if((sb.length() % 8) != 0){
            newArray[j] = (byte)Integer.parseInt(sb.substring(i),2);
        }
        return newArray;
    }

    public byte[] unzip(byte[] bytes, HashMap<Byte, String> huffmanCodeMap){
        //1. convert bytes array to binary string
        StringBuilder sb = new StringBuilder();
        boolean isLastByte = false;
        for (int i = 0; i < bytes.length; i++) {
            isLastByte = (i == (bytes.length -1));
            String s = toBinaryString(isLastByte, bytes[i]);
            sb.append(s);
        }
        System.out.println(sb);
        //2. Convert binary string to bytes array as per the map<Byte, String>
        //2.1 convert map<Byte, String> to map<String,Byte>
        HashMap<String, Byte> stringByteHashMap = new HashMap<>();
        for (Map.Entry<Byte, String> byteStringEntry : huffmanCodeMap.entrySet()) {
            stringByteHashMap.put(byteStringEntry.getValue(), byteStringEntry.getKey());
        }
        //2.2 generate byte array as per the map<String,Byte>
        int firstCharIndex = 0;
        int lastCharIndex = 0;
        ArrayList<Byte> byteList = new ArrayList<Byte>();
        while(lastCharIndex < sb.length()){
            String substring = sb.substring(firstCharIndex, lastCharIndex + 1);
            Byte aByte = stringByteHashMap.get(substring);
            if(aByte != null){
                byteList.add(aByte);
                lastCharIndex++;
                firstCharIndex = lastCharIndex;
            }else{
                lastCharIndex++;
            }
        }
        byte[] bytes1 = new byte[byteList.size()];
        for (int i = 0; i < byteList.size(); i++) {
            bytes1[i] = byteList.get(i);
        }
        return bytes1;


    }
    public int getEffectiveBitNumber() {
        return effectiveBitNumber;
    }

    public void setEffectiveBitNumber(int effectiveBitNumber) {
        this.effectiveBitNumber = effectiveBitNumber;
    }

    public HashMap<Byte, String> getHuffmanCodeMap() {
        return huffmanCodeMap;
    }

    public void setHuffmanCodeMap(HashMap<Byte, String> huffmanCodeMap) {
        this.huffmanCodeMap = huffmanCodeMap;
    }

    private String toBinaryString(boolean isLastByte, byte b) {
        int temp = b;
        if (!isLastByte) {
            temp |= 256;
        }
        String s = Integer.toBinaryString(temp);
        if (!isLastByte) {
            String substring = s.substring(s.length() - 8);
            return substring;
        } else {
            if (temp >= 0) {
                int x = effectiveBitNumber - s.length();
                String s1 = "";
                if(x > 0){
                    for (int i = 0; i < x; i++) {
                        s1 += "0";
                    }
                }
                return s1 + s;
            } else {
                return s.substring(s.length() - 8);
            }
        }


    }

}
package indi.tom.dataStructure.tree;

import org.junit.Test;

import java.io.*;
import java.util.HashMap;

/**
 * @Description: The joke is the zipped file is larger than the original file :)
 * @Author: Tom
 * @Date: 2020-12-21 20:40
 */
public class ZipUnzipFileWithHuffmanCode {

    @Test
    public void test01(){
        zipFile("D:\\Java\\IdeaProjects\\DataStructures&Algroithm\\resources\\HuffmanCode.java","D:\\Java\\IdeaProjects\\DataStructures&Algroithm\\resources\\HuffmanCode_zipped.java");
        unzipFile("D:\\Java\\IdeaProjects\\DataStructures&Algroithm\\resources\\HuffmanCode_zipped.java","D:\\Java\\IdeaProjects\\DataStructures&Algroithm\\resources\\HuffmanCode_unZipped.java");

    }


    public void zipFile(String sourceFile, String destFile)  {
        try(FileInputStream fileInputStream = new FileInputStream(sourceFile);
            FileOutputStream fileOutputStream = new FileOutputStream(destFile);
            ObjectOutputStream objectOutputStream = new ObjectOutputStream(fileOutputStream);
        ){
            //read file to byte array, then get the huffman code and zipped bytes with the class we created earlier.
            int size = fileInputStream.available();
            byte[] bytes = new byte[size];
            fileInputStream.read(bytes);
            HashMap<Byte, String> huffmanCodeMap = new HuffmanCode().createHuffmanCodeMap(bytes);
            byte[] zippedBytes = new ZipUnzipByteArrayWithHuffmanCode().zip(bytes, huffmanCodeMap);
            //write huffman code and byte array to des file.
            objectOutputStream.writeObject(huffmanCodeMap);
            objectOutputStream.writeObject(zippedBytes);


        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public void unzipFile(String sourceFile, String destFile){
        try(FileInputStream fileInputStream = new FileInputStream(sourceFile);
            FileOutputStream fileOutputStream = new FileOutputStream(destFile);
            ObjectInputStream objectInputStream = new ObjectInputStream(fileInputStream);
        ){
            HashMap<Byte, String> huffmanCodeMap = (HashMap<Byte, String>)objectInputStream.readObject();
            byte[] bytes = (byte[])objectInputStream.readObject();

            byte[] unzippedBytes = new ZipUnzipByteArrayWithHuffmanCode().unzip(bytes, huffmanCodeMap);
            fileOutputStream.write(unzippedBytes);


        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        }
    }
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值