利用哈夫曼树求得的用于通信的二进制编码称为哈夫曼编码。树中从根到每个叶子节点都有一条路径,对路径上的各分支约定指向左子树的分支表示”0”码,指向右子树的分支表示“1”码,取每条路径上的“0”或“1”的序列作为各个叶子节点对应的字符编码,即是哈夫曼编码。
将每个出现的字符作为一个独立的接待你,其权值为他出现的品读,构造出相应的哈夫曼树。哈夫曼树的带权路径长度是最优。
例如:
代码如下:
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.stream.Collectors;
public class Huffman {
/**
* 哈夫曼节点类
*/
class HuffmanNode {
// 定义变量
char character; // 节点字符
int weight; // 权值
HuffmanNode leftChild; // 左孩子
HuffmanNode rightChild;// 右孩子
HuffmanNode parent; // 父节点
/**
* 构造方法
* @param character 字符
* @param weight 权
* @param leftChild 左孩子
* @param rightChild 右孩子
* @param parent 父节点
*/
public HuffmanNode(char character, int weight, HuffmanNode leftChild, HuffmanNode rightChild,
HuffmanNode parent) {
//成员变量
this.character = character;
this.weight = weight;
this.leftChild = leftChild;
this.rightChild = rightChild;
this.parent = parent;
}// Of contractor
/**
* 重写toString方法
*
* @return 字符串
*/
public String toString() {
String resultString = "(" + character + ", " + weight + ")";
return resultString;//返回值
}// Of toString
}// Of class HuffmanNode
// 常量 ASCII 长度:256字符
public static final int NUM_CHARS = 256;
// 变量
String inputText; // 文本内容
int alphabetLength; // 字符表长度
char[] alphabet; // 字符表
int[] charCounts; // 计数表
int[] charMapping; // 索引表
String[] huffmanCodes;// 储存霍夫曼编码
HuffmanNode[] nodes;// 储存霍夫曼树节点
//写方法
public Huffman(String paraFilename) {
charMapping = new int[NUM_CHARS];
readText(paraFilename);
}// Of contractor
/**
* 读取文件 ,并输出内容
*
* @param paraFilename 文件地址
*/
public void readText(String paraFilename) {
try {
inputText =
Files.newBufferedReader(Paths.get(paraFilename), StandardCharsets.UTF_8)
.lines().collect(Collectors.joining("\n"));
} catch (IOException e) {
e.printStackTrace();
}// Of try
System.out.println("The text is:\r\n" + inputText);
}// Of readText
/**
* 构建字符表
*/
public void constructAlphabet() {
// 初始化
Arrays.fill(charMapping, -1);
int[] tempCharCounts = new int[NUM_CHARS];
int tempCharIndex;
char tempChar;
// 1.遍历文档,记录每个字符的次数
for (int i = 0; i < inputText.length(); i++) {
tempChar = inputText.charAt(i);
tempCharIndex = (int) tempChar;
//System.out.print("" + tempCharIndex + " ");
tempCharCounts[tempCharIndex]++;
}// Of for
// 2.决定长度
alphabetLength = 0;
for (int i = 0; i < 255; i++) {
if (tempCharCounts[i] > 0) {
alphabetLength++;
}// Of if
}// Of for
// 3.生成alphabet
alphabet = new char[alphabetLength];
charCounts = new int[2 * alphabetLength - 1];
int tempCounter = 0;
for (int i = 0; i < NUM_CHARS; i++) {
if (tempCharCounts[i] > 0) {
//alphabet 按ASCII正序
alphabet[tempCounter] = (char) i;
// alphabet对应的字符出现的次数
charCounts[tempCounter] = tempCharCounts[i];
// ASCII在alphabet中的索引
charMapping[i] = tempCounter;
tempCounter++;
}// Of if
}// Of for
System.out.println("The alphabet is: " + Arrays.toString(alphabet));
System.out.println("Their counts are: " + Arrays.toString(charCounts));
System.out.println("The char mappings are: " + Arrays.toString(charMapping));
}// Of constructAlphabet
/**
* 建树
*/
public void contrastTree() {
// 1.开辟空间
nodes = new HuffmanNode[alphabetLength * 2 - 1];
boolean[] tempProcessed = new boolean[alphabetLength * 2 - 1];
// 2.生成节点们
for (int i = 0; i < alphabetLength; i++) {
nodes[i] = new HuffmanNode(alphabet[i], charCounts[i], null, null, null);
}// Of for
// 3.建树
int tempLeft, tempRight, tempMinimal;
for (int i = alphabetLength; i < 2 * alphabetLength - 1; i++) {
// 3.1 找最小的作为左子树
tempLeft = this.getMinPos(tempProcessed, i);
// 3.2 找第二小的作为右子树
tempRight = this.getMinPos(tempProcessed, i);
System.out.println("Selecting " + tempLeft + " and " + tempRight);
// 3.3 生成新的父节点
charCounts[i] = charCounts[tempLeft] + charCounts[tempRight];
nodes[i] = new HuffmanNode('*', charCounts[i], nodes[tempLeft], nodes[tempRight], null);
// 3.4 链接
nodes[tempLeft].parent = nodes[i];
nodes[tempRight].parent = nodes[i];
System.out.println("The children of " + i + " are " + tempLeft + " and " + tempRight);
}// Of for
}// Of contrastTree
/**
* 找权最小的节点
*
* @param paraProcessed 标记数组,标记已经被选过的
* @param paraLimit 查找范围的最大值
* @return 权最小节点在charCounts[]中的下标
*/
public int getMinPos(boolean[] paraProcessed, int paraLimit) {
int tempRes = -1;
int tempMinimal = Integer.MAX_VALUE;
for (int j = 0; j < paraLimit; j++) {
if (paraProcessed[j]) {
continue;
}// Of if
if (tempMinimal > charCounts[j]) {
tempMinimal = charCounts[j];
tempRes = j;
}// Of if
}// Of for j
paraProcessed[tempRes] = true;
return tempRes;
}// Of getMinPos
/**
* 获取根节点
*
* @return 根节点
*/
public HuffmanNode getRoot() {
return nodes[nodes.length - 1];
}
/**
* 先序遍历霍夫曼树
*
* @param paraNode 起始节点
*/
public void preOrderVisit(HuffmanNode paraNode) {
System.out.println("(" + paraNode.character + " , " + paraNode.weight + ")");
if (paraNode.leftChild != null) {
preOrderVisit(paraNode.leftChild);
}
if (paraNode.rightChild != null) {
preOrderVisit(paraNode.rightChild);
}
}
/**
* 生成编码 生成如:T 01000
*/
public void generateCodes() {
huffmanCodes = new String[alphabetLength];
// 单节点
if (alphabetLength == 1) {
System.out.println("The code of " + alphabet[0] + " is " + "1");
} else {
HuffmanNode tempNode;
for (int i = 0; i < alphabetLength; i++) {
tempNode = nodes[i];
String tempCharCode = "";
// 自下往上
while (tempNode.parent != null) {
if (tempNode == tempNode.parent.leftChild) {
tempCharCode = "0" + tempCharCode;
} else {
tempCharCode = "1" + tempCharCode;
}// Of if
tempNode = tempNode.parent;
}// Of while
// tempCharCode : 00110
huffmanCodes[i] = tempCharCode;
System.out.println("The code of " + alphabet[i] + " is " + tempCharCode);
}// Of for
}// Of if
}// Of generateCodes
/**
* 生成字符串的编码,如果文件中不存在该字符串,输出not exist
*
* @param paraString 字符串
* @return 编码
*/
public String coding(String paraString) {
String resultCodeString = "";
int tempIndex;
for (int i = 0; i < paraString.length(); i++) {
// 获取ASCII
tempIndex = charMapping[(int) paraString.charAt(i)];
// 防止越界
if (tempIndex == -1) {
//System.out.println("Not exist");
return "Not exist";
}// Of if
// 编码
resultCodeString += huffmanCodes[tempIndex];
} // Of for i
return resultCodeString;
}// Of coding
/**
* 单节点的解码
*
* @param paraString 编码
* @return 解码结果
*/
public String decodingSingle(String paraString) {
String res = "";
char tempValue = getRoot().character;
for (int i = 0; i < paraString.length(); i++) {
char tempChar = paraString.charAt(i);
if (tempChar != '0' && tempChar != '1') {
return "Invalid input";
} else {
if (tempChar == '1') {
res += tempValue;
} else {
System.out.println("extra input:" + paraString.substring(i));
break;
}// Of if
}// Of if
}// Of for
return res;
}// Of decodingSingle
/**
* 解码
*
* @param paraString 给定的编码
* @return 解码结果字符串
*/
public String decoding(String paraString) {
// 单节点情况
if (this.alphabetLength == 1) {
return decodingSingle(paraString);
}// Of if
String resString = "";
HuffmanNode tempNode = getRoot();
int tempMark = 0;
for (int i = 0; i < paraString.length(); i++) {
char tempChar = paraString.charAt(i);
// 只接受0,1
if (tempChar != '0' && tempChar != '1') {
return "Invalid input";
} else {
if (tempChar == '0') {
tempNode = tempNode.leftChild;
} else {
tempNode = tempNode.rightChild;
}// Of if
// 到叶节点(是初始节点,有值)
if (tempNode.leftChild == null) {
resString += tempNode.character;
tempMark = i; // 记录上一个完成解码的位置
// 防止刚好无多余编码的时候 输出多余编码时越界
if (i != paraString.length() - 1) {
tempNode = getRoot();
}// Of if
}// Of if
}// Of if
}// Of for
// 有多余编码
if (tempNode.leftChild != null) {
System.out.println("extra input:" + paraString.substring(tempMark + 1));
}// Of if
return resString;
}// Of decoding
public static void main(String[] args) {
Huffman huffman = new Huffman("E:\\Master\\Day90\\src\\files\\singleLetter");
huffman.constructAlphabet();
huffman.contrastTree();
//huffman.preOrderVisit(huffman.getRoot());
huffman.generateCodes();
System.out.println(huffman.coding("Hello"));
System.out.println(huffman.decoding("111001"));
}// Of main
}// Of class Huffman
constructAlphabet():存储字母;charCount存储字母的计数;最后一个按照256个ASCII的位置存字母的序号。
方法generateCodes() :在左孩子上加’0’;右孩子上加’1’;编码过程是自底向上的。decoding()是generateCodes() 的弄过程。
方法constructTree():先分配空间,再初始化叶子节点(将i及其个数传入huffmannode),最后建树。建立树又分为三步,第一步找出权重最低的节点,第二步找到权重次低的节点,第三步将前两步找到的节点相加,获得新的节点,并参与接下来的建树。第四步将子节点和父节点相连。
今天的代码看了很长时间才弄明白。