Huffman编码可以理解为从根结点到各个叶子结点的路径上边的标记序列,可以标记“0”为转向左孩子,“1”为转向右孩子。如:
代码实现也很简单,就是遍历一遍Huffman树的过程,每个结点的路径序列号可以通过其父结点确定,所以需要在遍历的时候携带路径序列,通过递归实现:
/**
*
*********************
* @Title: generateCodes
* @Description: TODO(Generate codes for each character in the alphabet.)
*********************
*
*/
public void generateCodes() {
huffmanCodes = new String[alphabetLength];
HuffmanNode temNode = getRoot();
getCodes(temNode, "");
}
/**
*
*********************
* @Title: getCodes
* @Description: TODO(Get the code of leaves)
*
* @param paraNode The HuffmanNode
* @param paraString The code of current node.
*********************
*
*/
public void getCodes(HuffmanNode paraNode, String paraString) {
if (paraNode.leftChild == null && paraNode.rightChild == null) {
int tempAscii = (int) paraNode.character;
int tempIndex = charMapping[tempAscii];
huffmanCodes[tempIndex] = paraString;
return;
} // Of if
if (paraNode.leftChild != null) {
String tempString = paraString + "0";
getCodes(paraNode.leftChild, tempString);
} // Of if
if (paraNode.leftChild != null) {
String tempString = paraString + "1";
getCodes(paraNode.rightChild, tempString);
} // Of if
}// Of getCodes
老师的代码是通过循环实现的,因为老师为树结点添加了其父亲结点的引用,所以可以从下至上获得其路径序列,但原理是一样的:
/**
*********************
* Generate codes for each character in the alphabet.
*********************
*/
public void generateCodes() {
huffmanCodes = new String[alphabetLength];
HuffmanNode tempNode;
for (int i = 0; i < alphabetLength; i++) {
tempNode = nodes[i];
// Use tempCharCode instead of tempCode such that it is unlike
// tempNode.
// This is an advantage of long names.
String tempCharCode = "";
while (tempNode.parent != null) {
if (tempNode == tempNode.parent.leftChild) {
tempCharCode = "0" + tempCharCode;
} else {
tempCharCode = "1" + tempCharCode;
} // Of if
tempNode = tempNode.parent;
} // Of while
huffmanCodes[i] = tempCharCode;
System.out.println("The code of " + alphabet[i] + " is " + tempCharCode);
} // Of for i
}// Of generateCodes
编码的过程就很简单了,因为我们已经为每个字符设置了相应的Huffman编码,只需要遍历一遍字符串完成一个映射。
解码:由于Huffman编码的特点——没有一个编码是另一个编码的前缀,如a的编码是1,就不会有其它任何字符的编码会是以1开头的序列。所以,从一个序列出发,对应从Huffman树根结点出发,如果序列号为“0”,则接着向该结点的左子树走,否则向右子树走,一定会走到一个叶子节点,找到对应的字符;然后开始下一个序列。
完整代码:
package day16;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.stream.Collectors;
public class Huffman {
/**
* An inner class for Huffman nodes.
*
*/
class HuffmanNode {
/**
* The char. Only valid for leaf nodes.
*/
char character;
/**
* Weight. It can also be double.
*/
int weight;
/**
* The left child.
*/
HuffmanNode leftChild;
/**
* The right child.
*/
HuffmanNode rightChild;
/**
*
*********************
* The first constructor.
*
*********************
*
*/
public HuffmanNode(char paraCharacter, int paraWeight, HuffmanNode paraLeftChild, HuffmanNode paraRightChild) {
character = paraCharacter;
weight = paraWeight;
leftChild = paraLeftChild;
rightChild = paraRightChild;
}// Of HuffmanNode
/**
* To string.
*/
public String toString() {
String resultString = "(" + character + "," + weight + ")";
return resultString;
}// Of toSting
}// Of class HuffmanNode
/**
* The number of characters. 256 for ASCII
*/
public static final int NUM_CHARS = 256;
/**
* The input text. It is stored in a string for simplicity.
*/
String inputText;
/**
* The length of alphabet,also the number of leaves.
*/
int alphabetLength;
/**
* The alphabet.
*/
char[] alphabet;
/**
* The count of chars. The length is 2*alphabetLength -1 to include non-leaf
* nodes.
*/
int[] charCounts;
/**
* The mapping of chars to the indices in the alphabet.
*/
int[] charMapping;
/**
* Codes for each char in the alphabet. It should have the same length as
* alphabet.
*/
String[] huffmanCodes;
/**
* All nodes. The last node is the root.
*/
HuffmanNode[] nodes;
/**
*
*********************
* The first constructor.
*
* @param paraFilename The text filename.
*********************
*
*/
public Huffman(String paraFilename) {
charMapping = new int[NUM_CHARS];
readText(paraFilename);
}// Of the fist constructor
/**
*
*********************
* @Title: readText
* @Description: TODO(Read text.)
*
* @param paraFilename The filename.
*********************
*
*/
public void readText(String paraFilename) {
try {
inputText = Files.newBufferedReader(Paths.get(paraFilename), StandardCharsets.UTF_8).lines()
.collect(Collectors.joining("\n"));
} catch (Exception ee) {
System.out.println(ee);
System.exit(0);
} // Of try
System.out.println("The text is:\r\n" + inputText);
}// Of readText
/**
*
*********************
* @Title: constructAlphabet
* @Description: TODO(Construct the alphabet. The results are stored in the
* member variables charMapping and alphabet)
*
*********************
*
*/
public void constructAlphabet() {
// Initialize.
Arrays.fill(charMapping, -1);
// The count for each char. At most NUM_VHARS chars.
int[] tempCharCounts = new int[NUM_CHARS];
// The index of the char in the ASCII charset.
int tempCharIndex;
// Step 1. Scan the string to obtain the counts.
char tempChar;
for (int i = 0; i < inputText.length(); i++) {
tempChar = inputText.charAt(i);
tempCharIndex = (int) tempChar;
System.out.println("" + tempCharIndex + " ");
tempCharCounts[tempCharIndex]++;
} // Of for i
// Step 2. Scan to determine the size of the alphabet.
alphabetLength = 0;
for (int i = 0; i < NUM_CHARS; i++) {
if (tempCharCounts[i] > 0) {
alphabetLength++;
} // Of if
} // Of for i
// Step 3. Compress to the alphabet
alphabet = new char[alphabetLength];
charCounts = new int[2 * alphabetLength - 1];
int tempCounter = 0;
for (int i = 0; i < NUM_CHARS; i++) {
if (tempCharCounts[i] > 0) {
alphabet[tempCounter] = (char) i;
charCounts[tempCounter] = tempCharCounts[i];
charMapping[i] = tempCounter;
tempCounter++;
} // Of if
} // Of for i
System.out.println("The alphabet is: " + Arrays.toString(alphabet));
System.out.println("Their counts are: " + Arrays.toString(charCounts));
System.out.println("The char mappings are: " + Arrays.toString(charMapping));
}// Of constructAlphabet
public void constructTree() {
// Step 1. Allocate space.
nodes = new HuffmanNode[alphabetLength * 2 - 1];
boolean[] tempProcessed = new boolean[alphabetLength * 2 - 1];
// Step 2. Initialize leaves.
for (int i = 0; i < alphabetLength; i++) {
nodes[i] = new HuffmanNode(alphabet[i], charCounts[i], null, null);
} // Of for i
// Step 3. Construct the tree.
int tempLeft, tempRight, tempMinimal;
for (int i = alphabetLength; i < 2 * alphabetLength - 1; i++) {
// Step 3.1 Select the first minimal as the left child.
tempLeft = -1;
tempMinimal = Integer.MAX_VALUE;
for (int j = 0; j < i; j++) {
if (tempProcessed[j]) {
continue;
} // Of if
if (tempMinimal > charCounts[j]) {
tempMinimal = charCounts[j];
tempLeft = j;
} // Of if
} // Of for j
tempProcessed[tempLeft] = true;
// Step 3.2 Select the second minimal as the right child.
tempRight = -1;
tempMinimal = Integer.MAX_VALUE;
for (int j = 0; j < i; j++) {
if (tempProcessed[j]) {
continue;
} // Of if
if (tempMinimal > charCounts[j]) {
tempMinimal = charCounts[j];
tempRight = j;
} // Of if
} // Of for j
tempProcessed[tempRight] = true;
System.out.println("Selecting " + i + " are " + tempLeft + " and " + tempRight);
// Step 3.3 Construct the new node.
charCounts[i] = charCounts[tempLeft] + charCounts[tempRight];
nodes[i] = new HuffmanNode('*', charCounts[i], nodes[tempLeft], nodes[tempRight]);
} // Of for i
}// Of constructTree
/**
*
*********************
* @Title: getRoot
* @Description: TODO(Get the root of the binary tree)
*
* @return The root.
*********************
*
*/
public HuffmanNode getRoot() {
return nodes[nodes.length - 1];
}// Of getRoot
/**
*
*********************
* @Title: preOrderVisit
* @Description: TODO(Pre-order visit)
*
* @param paraNode
*********************
*
*/
public void preOrderVisit(HuffmanNode paraNode) {
System.out.println("(" + paraNode.character + ", " + paraNode.weight + ")");
if (paraNode.leftChild != null) {
preOrderVisit(paraNode.leftChild);
} // Of if
if (paraNode.rightChild != null) {
preOrderVisit(paraNode.rightChild);
} // Of if
}// Of preOrderVisit
/**
*
*********************
* @Title: generateCodes
* @Description: TODO(Generate codes for each character in the alphabet.)
*********************
*
*/
public void generateCodes() {
huffmanCodes = new String[alphabetLength];
HuffmanNode temNode = getRoot();
getCodes(temNode, "");
}
/**
*
*********************
* @Title: getCodes
* @Description: TODO(Get the code of leaves)
*
* @param paraNode The HuffmanNode
* @param paraString The code of current node.
*********************
*
*/
public void getCodes(HuffmanNode paraNode, String paraString) {
if (paraNode.leftChild == null && paraNode.rightChild == null) {
int tempAscii = (int) paraNode.character;
int tempIndex = charMapping[tempAscii];
huffmanCodes[tempIndex] = paraString;
return;
} // Of if
if (paraNode.leftChild != null) {
String tempString = paraString + "0";
getCodes(paraNode.leftChild, tempString);
} // Of if
if (paraNode.leftChild != null) {
String tempString = paraString + "1";
getCodes(paraNode.rightChild, tempString);
} // Of if
}// Of getCodes
/**
*
*********************
* @Title: coding
* @Description: TODO(Encode the given string)
*
* @param paraString The given string.
* @return
*********************
*
*/
public String coding(String paraString) {
String resultCodingString = "";
int tempIndex;
for (int i = 0; i < paraString.length(); i++) {
// From the original char to the location in the alphabet.
tempIndex = charMapping[(int) paraString.charAt(i)];
// From the location in the alphabet to the code.
resultCodingString += huffmanCodes[tempIndex];
} // Of for i
return resultCodingString;
}// Of coding
/**
*
*********************
* @Title: decoding
* @Description: TODO(Decode the given string)
*
* @param paraString The given string
* @return
*********************
*
*/
public String decoding(String paraString) {
String resultCodingString = "";
HuffmanNode tempNode = getRoot();
for (int i = 0; i < paraString.length(); i++) {
if (paraString.charAt(i) == '0') {
tempNode = tempNode.leftChild;
System.out.println(tempNode);
} else {
tempNode = tempNode.rightChild;
System.out.println(tempNode);
} // Of if
if (tempNode.leftChild == null) {
System.out.println("Decode one: " + tempNode);
// Decode one char.
resultCodingString += tempNode.character;
// Return to the root.
tempNode = getRoot();
} // Of for if
} // Of for i
return resultCodingString;
}// Of decoding
/**
*
*********************
* @Title: main
* @Description: TODO(The entrance of program.)
*
* @param args Not used now.
*********************
*
*/
public static void main(String args[]) {
Huffman tempHuffman = new Huffman("F:/huffmantext-small.txt");
tempHuffman.constructAlphabet();
tempHuffman.constructTree();
HuffmanNode tempRoot = tempHuffman.getRoot();
System.out.println("The root is: " + tempRoot);
System.out.println("Preorder visit:");
tempHuffman.preOrderVisit(tempHuffman.getRoot());
tempHuffman.generateCodes();
String tempCoded = tempHuffman.coding("abcd");
System.out.println("Code: " + tempCoded);
String tempDecoded = tempHuffman.decoding(tempCoded);
System.out.println("Decoded: " + tempDecoded);
}// Of main
}// Of class Huffman
运行结果: