最优二叉树又叫huffman树。
在此直接介绍构造树的方法。
构造过程中有一个节点数组 nodes
1:从 nodes 中获取权值最小的两个 node。
2:将两个 node 组成一个树父节点 nRoot 权值为两个 node 权值之和,从 nodes 中删除两个 node 并将 nRoot 添加到 nodes 中。
直到 nodes 中只剩余一个节点。那么huffman tree构造完成。
huffman编码就是使用次数越多的码编码长度越小。huffman解码就从构造好的huffman树根节点开始寻找到最后会找到一个确定节点(因为不存在一个节点是另一个节点的前缀的情况)。
/**
* 该版本的huffman编码在结尾为了满8位添加了‘0’补位,造成解码时最后解码结果会出现多余字符
* @author han
*
*/
public class HuffmanCompress implements Compress {
private static final int PARENT_DATA_VALUE = -1;
private static final int DEFAULT_INDEX = -1;
private static final int LEFT_VALUE = 1;
private static final int RIGHT_VALUE = 0;
private static final int FILE_END = -1;
private static final int END_FULL = 0;
private static final int PER_SIZE = 8;
static class HuffmanTreeNode{
HuffmanTreeNode(int weight, int data, HuffmanTreeNode l, HuffmanTreeNode r){
this.weight = weight;
this.data = data;
this.l = l;
this.r = r;
}
int weight;
int data;
HuffmanTreeNode l;
HuffmanTreeNode r;
}
@Override
public void compress(Path in, Path out) throws IOException {
int[] rate = getRateArr(in);
HuffmanTreeNode tree = createHuffmanTree(rate);TREE = tree;
String[] codes = new String[256];
codes = getCode(tree, codes, "");
showCodeRelation(codes, rate);
coding(codes, in, out);
}
private void showCodeRelation(String[] codes,int[] rate){
for (int i = 0; i < codes.length; i++){
if (codes[i] != null && !"".equals(codes[i])){
System.out.printf("char:%2s, number:%-3d, code:%s\n", (char)i, rate[i], codes[i]);
}
}
}
private void coding(String[] codes, Path in, Path out){
try (
InputStream is = Files.newInputStream(in);
Writer writer = Files.newBufferedWriter(out);){
int temp = 0;
StringBuffer sb = new StringBuffer();
while ((temp = is.read()) != FILE_END){
sb.append(codes[temp]);
if (sb.length() < PER_SIZE - 1){
continue;
}
int inputN = getIntFromSB(sb);
writer.write(inputN);
sb.delete(0, PER_SIZE - 1);
writer.flush();
}
while (sb.length() > 0){
LOG.d(sb.length());
if (sb.length() < PER_SIZE - 1){
int n = PER_SIZE - 1 - sb.length();
String add = "";
LOG.d(n, " ", sb.length());
for (int i = 0; i < n; i++){
add += String.valueOf(END_FULL);
}
sb.append(add);
}
int inputN = getIntFromSB(sb);
writer.write(inputN);
sb.delete(0, PER_SIZE - 1);
writer.flush();
}
} catch (IOException e) {
e.printStackTrace();
}
}
private int getIntFromSB(StringBuffer sb){
return Integer.parseInt(sb.substring(0, PER_SIZE - 1), 2);
}
private String[] getCode(HuffmanTreeNode tree, String[] codes, String code){
if (tree == null){
return codes;
}
if (tree.l == null && tree.r == null){
codes[tree.data] = code;
return codes;
}
getCode(tree.l, codes, code + String.valueOf(LEFT_VALUE));
getCode(tree.r, codes, code + String.valueOf(RIGHT_VALUE));
return codes;
}
private HuffmanTreeNode createHuffmanTree(int[] rate){
HuffmanTreeNode[] nodes = getHuffmanNodeArr(rate);
for (int i = 0; i < nodes.length; i++){
HuffmanTreeNode min = null;
HuffmanTreeNode min2 = null;
int minIndex = DEFAULT_INDEX;
int min2Index = DEFAULT_INDEX;
for (int j = 0; j < nodes.length; j++){
if (nodes[j] != null && (min == null || min.weight > nodes[j].weight)){
min2 = min;
min = nodes[j];
min2Index = minIndex;
minIndex = j;
} else if (nodes[j] != null && (min2 == null || min2.weight > nodes[j].weight)){
min2 = nodes[j];
min2Index = j;
}
}
if (min != null && min2 != null){
HuffmanTreeNode parent = makeParentNode(min, min2);
nodes[minIndex] = parent;
nodes[min2Index] = null;
}
}
return getHuffmanTree(nodes);
}
private HuffmanTreeNode getHuffmanTree(HuffmanTreeNode[] nodes){
for (int i = 0; i < nodes.length; i++){
if (nodes[i] != null)
return nodes[i];
}
return null;
}
private HuffmanTreeNode makeParentNode(HuffmanTreeNode h1, HuffmanTreeNode h2){
return new HuffmanTreeNode(h1.weight + h2.weight, PARENT_DATA_VALUE, h1, h2);
}
private HuffmanTreeNode[] getHuffmanNodeArr(int[] rate){
int n = getNumOfTreeNode(rate);
HuffmanTreeNode[] nodes = new HuffmanTreeNode[n];
int index = 0;
for (int i = 0; i < rate.length; i++){
if (rate[i] != 0){
nodes[index++] = new HuffmanTreeNode(rate[i], i, null, null);
}
}
return nodes;
}
private int getNumOfTreeNode(int[] rate){
int n = 0;
for (int i = 0; i < rate.length; i++){
if (rate[i] != 0){
n++;
}
}
return n;
}
private int[] getRateArr(Path in) throws IOException {
int[] rate = new int[256];
int temp = 0;
try (InputStream is = Files.newInputStream(in)){
while ((temp = is.read()) != -1){
rate[temp]++;
}
} catch (IOException e) {
throw e;
}
return rate;
}
public static HuffmanTreeNode TREE = null;// 因为没有将huffman tree 存入到压缩文件中所以需要 static保留。一切皆因为懒
public static void main(String[] args) throws IOException {
Compress com = new HuffmanCompress();
// Compresss 一个帮助类,包含开始压缩之前的文件判断等等步骤。
Compresss.compress("", "abc", com);// 需要加路径
Compresss.decompress("", "def", com);// 需要加路径
}
@Override
public void decompress(Path in, Path out) throws IOException {
HuffmanTreeNode tree = TREE;
decode(tree, in, out);
}
private void decode(HuffmanTreeNode tree, Path in, Path out){
try (
InputStream is = Files.newInputStream(in);
Writer writer = Files.newBufferedWriter(out);){
int temp = 0;
HuffmanTreeNode nowNode = tree;
while ((temp = is.read()) != -1){
String bs = fullBinaryHead(Integer.toBinaryString(temp));
for (int i = 0; i <= bs.length(); i++){
if (nowNode.l == null && nowNode.r == null){
writer.write((char)nowNode.data);
writer.flush();
nowNode = tree;
}
if (i == bs.length()){
break;
}
if (bs.charAt(i) == String.valueOf(LEFT_VALUE).charAt(0)){
nowNode = nowNode.l;
} else if (bs.charAt(i) == String.valueOf(RIGHT_VALUE).charAt(0)) {
nowNode = nowNode.r;
}
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
private String fullBinaryHead(String s){
if (s.length() >= PER_SIZE)
return s;
for (int i = s.length() + 1; i < PER_SIZE; i++){
s = "0" + s;
}
return s;
}
}