一、学习内容
Huffman 编码 (建树)
(1)huffman编码原理
huffman编码是一种无失真编码方式,是可变长(VLC)编码的一种。
huffman编码基于信源的概率统计模型,基本思路是出现概率大的信源符号编长码,出现概率小的符号编短码,从而使平均码长最小。
(2)具体内容:
1、Arrays.fill(charMapping, -1); 这种初始化工作非常重要。
2、171 行将 char 强制转换为 int, 即其在 ASCII 字符集中的位置.。
3、当我们设计完成的实际问题涉及到多个变量,以及这些变量的相关计算时,我们可以通过在变量名前面加上前缀,这样两个变量的相对关系就更明显。
4、最后生成的节点就是根节点。
二、代码编写
package datastructure.tree;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.stream.Collectors;
/**
* Huffman tree, encoding, and decoding. For simplicity, only ASCII characters
* are supported.
*
* @author Fan Min minfanphd@163.com.
*/
public class Huffman {
/**
* An inner class for Huffman nodes.
*/
class HuffmanNode {
/**
* The char. Only valid for leaf nodes.
*/
char character;
/**
* Weight. It can also be double.
*/
int weight;
/**
* The left child.
*/
HuffmanNode leftChild;
/**
* The right child.
*/
HuffmanNode rightChild;
/**
* The parent. It helps constructing the Huffman code of each character.
*/
HuffmanNode parent;
/**
*******************
* The first constructor
*******************
*/
public HuffmanNode(char paraCharacter, int paraWeight, HuffmanNode paraLeftChild,
HuffmanNode paraRightChild, HuffmanNode paraParent) {
character = paraCharacter;
weight = paraWeight;
leftChild = paraLeftChild;
rightChild = paraRightChild;
parent = paraParent;
}// Of HuffmanNode
/**
*******************
* To string.
*******************
*/
public String toString() {
String resultString = "(" + character + ", " + weight + ")";
return resultString;
}// Of toString
}// Of class HuffmanNode
/**
* The number of characters. 256 for ASCII.
*/
public static final int NUM_CHARS = 256;
/**
* The input text. It is stored in a string for simplicity.
*/
String inputText;
/**
* The length of the alphabet, also the number of leaves.
*/
int alphabetLength;
/**
* The alphabet.
*/
char[] alphabet;
/**
* The count of chars. The length is 2 * alphabetLength - 1 to include
* non-leaf nodes.
*/
int[] charCounts;
/**
* The mapping of chars to the indices in the alphabet.
*/
int[] charMapping;
/**
* Codes for each char in the alphabet. It should have the same length as
* alphabet.
*/
String[] huffmanCodes;
/**
* All nodes. The last node is the root.
*/
HuffmanNode[] nodes;
/**
*********************
* The first constructor.
*
* @param paraFilename
* The text filename.
*********************
*/
public Huffman(String paraFilename) {
charMapping = new int[NUM_CHARS];
readText(paraFilename);
}// Of the first constructor
/**
*********************
* Read text.
*
* @param paraFilename
* The text filename.
*********************
*/
public void readText(String paraFilename) {
try {
inputText = Files.newBufferedReader(Paths.get(paraFilename), StandardCharsets.UTF_8)
.lines().collect(Collectors.joining("\n"));
} catch (Exception ee) {
System.out.println(ee);
System.exit(0);
} // Of try
System.out.println("The text is:\r\n" + inputText);
}// Of readText
/**
*********************
* Construct the alphabet. The results are stored in the member variables
* charMapping and alphabet.
*********************
*/
public void constructAlphabet() {
// Initialize.
Arrays.fill(charMapping, -1);
// The count for each char. At most NUM_CHARS chars.
int[] tempCharCounts = new int[NUM_CHARS];
// The index of the char in the ASCII charset.
int tempCharIndex;
// Step 1. Scan the string to obtain the counts.
char tempChar;
for (int i = 0; i < inputText.length(); i++) {
tempChar = inputText.charAt(i);
tempCharIndex = (int) tempChar;
System.out.print("" + tempCharIndex + " ");
tempCharCounts[tempCharIndex]++;
} // Of for i
// Step 2. Scan to determine the size of the alphabet.
alphabetLength = 0;
for (int i = 0; i < 255; i++) {
if (tempCharCounts[i] > 0) {
alphabetLength++;
} // Of if
} // Of for i
// Step 3. Compress to the alphabet
alphabet = new char[alphabetLength];
charCounts = new int[2 * alphabetLength - 1];
int tempCounter = 0;
for (int i = 0; i < NUM_CHARS; i++) {
if (tempCharCounts[i] > 0) {
alphabet[tempCounter] = (char) i;
charCounts[tempCounter] = tempCharCounts[i];
charMapping[i] = tempCounter;
tempCounter++;
} // Of if
} // Of for i
System.out.println("The alphabet is: " + Arrays.toString(alphabet));
System.out.println("Their counts are: " + Arrays.toString(charCounts));
System.out.println("The char mappings are: " + Arrays.toString(charMapping));
}// Of constructAlphabet
/**
*********************
* Construct the tree.
*********************
*/
public void constructTree() {
// Step 1. Allocate space.
nodes = new HuffmanNode[alphabetLength * 2 - 1];
boolean[] tempProcessed = new boolean[alphabetLength * 2 - 1];
// Step 2. Initialize leaves.
for (int i = 0; i < alphabetLength; i++) {
nodes[i] = new HuffmanNode(alphabet[i], charCounts[i], null, null, null);
} // Of for i
// Step 3. Construct the tree.
int tempLeft, tempRight, tempMinimal;
for (int i = alphabetLength; i < 2 * alphabetLength - 1; i++) {
// Step 3.1 Select the first minimal as the left child.
tempLeft = -1;
tempMinimal = Integer.MAX_VALUE;
for (int j = 0; j < i; j++) {
if (tempProcessed[j]) {
continue;
} // Of if
if (tempMinimal > charCounts[j]) {
tempMinimal = charCounts[j];
tempLeft = j;
} // Of if
} // Of for j
tempProcessed[tempLeft] = true;
// Step 3.2 Select the second minimal as the right child.
tempRight = -1;
tempMinimal = Integer.MAX_VALUE;
for (int j = 0; j < i; j++) {
if (tempProcessed[j]) {
continue;
} // Of if
if (tempMinimal > charCounts[j]) {
tempMinimal = charCounts[j];
tempRight = j;
} // Of if
} // Of for j
tempProcessed[tempRight] = true;
System.out.println("Selecting " + tempLeft + " and " + tempRight);
// Step 3.3 Construct the new node.
charCounts[i] = charCounts[tempLeft] + charCounts[tempRight];
nodes[i] = new HuffmanNode('*', charCounts[i], nodes[tempLeft], nodes[tempRight], null);
// Step 3.4 Link with children.
nodes[tempLeft].parent = nodes[i];
nodes[tempRight].parent = nodes[i];
System.out.println("The children of " + i + " are " + tempLeft + " and " + tempRight);
} // Of for i
}// Of constructTree
/**
*********************
* Get the root of the binary tree.
*
* @return The root.
*********************
*/
public HuffmanNode getRoot() {
return nodes[nodes.length - 1];
}// Of getRoot