Java第31天——Huffman编码(建树,编码和解码)

建树

1,Arrays.fill(charMapping, -1); 这种初始化工作非常重要. 搞不好就会调拭很久才找到 bug.
2,171 行将 char 强制转换为 int, 即其在 ASCII 字符集中的位置. 这是底层代码的特权. 学渣没资格用哈哈.
3,变量多的时候你才能体会到用 temp, para 这些前缀来区分不同作用域变量的重要性. 没有特殊前缀的就是成员变量.
4,建树就是一个自底向上, 贪心选择的过程. 确定子节点、父节点的代码是核心.
5,最后生成的节点就是根节点.
6,手绘相应的 Huffman 树对照, 才能真正理解.


编码与解码

  1. 前序遍历代码的作用仅仅是调拭.
  2. 双重循环有一点点难度. 好像也并没有比第 26 天的难.
  3. 306 每次重新初始化很重要.
  4. 336 行使用了 charMapping 和强制类型转换.
  5. 编码是从叶节点到根节点, 解码就是反过来.
  6. 解码获得原先的字符串, 就验证正确性了.

所有代码

package dataStructure.tree;

import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.stream.Collectors;

/**
 * Huffman tree, encoding,and decoding. For simplicity, only ASCII characters
 * are supported
 * 
 * @author goudiyuan
 */
public class Huffman {

	/**
	 * An inner class for Huffman nodes.
	 */
	class HuffmanNode {

		/**
		 * The char.Only valid for leaf nodes.
		 */
		char character;

		/**
		 * weight.It can also be double.
		 */
		int weight;

		/**
		 * The left Child.
		 */
		HuffmanNode leftChild;

		/**
		 * The right Child
		 */
		HuffmanNode rightChild;

		/**
		 * The parent.It helps constructing the Huffman code of each character.
		 */
		HuffmanNode parent;

		/**
		 **********************
		 * The first constructor 
		 * ******************
		 */
		public HuffmanNode(char paraCharacter, int paraWeight, HuffmanNode paraLeftChild, HuffmanNode paraRightChild,
				HuffmanNode paraParent) {
			character = paraCharacter;
			weight = paraWeight;
			leftChild = paraLeftChild;
			rightChild = paraRightChild;
			parent = paraParent;
		}// Of HuffmanNode

		/**
		 *****************
		 * To String.
		 ***************** 
		 */
		public String toString() {
			String resultString = "(" + character + "," + weight + ")";

			return resultString;
		}// Of toString
	}// Of class HuffmanNode

	/**
	 * The number of characters. 256 for ASSII.
	 */
	public static final int NUM_CHARS = 256;

	/**
	 * the input text. It is stored in a string for simplicity.
	 */
	String inputText;

	/**
	 * The length of the alphabet, also the number of leaves.
	 */
	int alphabetLength;

	/**
	 * The alphabet
	 */
	char[] alphabet;

	/**
	 * The count of chars. The length is 2* alphabetLength -1 to include non-leaf
	 * nodes.
	 */
	int[] charCounts;

	/**
	 * The mapping of chars to the indices in the alphabet.
	 */
	int[] charMapping;

	/**
	 * Codes for each char in the alphabet. It should have the same length as
	 * alphhabet.
	 */
	String[] huffmanCodes;

	/**
	 * All nodes. The last node is the root.
	 */
	HuffmanNode[] nodes;

	/**
	 **************************** 
	 * The first constructor.
	 * 
	 * @param paraFilename 
	 *       The text filename.
	 ****************************
	 */
	public Huffman(String paraFilename) {
		charMapping = new int[NUM_CHARS];

		readText(paraFilename);
	}// Of the first constructor

	/**
	 ***************
	 * Read text.
	 * 
	 * @param paraFilename 
	 *             The text filename.
	 ***************
	 */
	public void readText(String paraFilename) {
		try {
			inputText = Files.newBufferedReader(Paths.get(paraFilename), StandardCharsets.UTF_8).lines()
					.collect(Collectors.joining("\n"));
		} catch (Exception ee) {
			System.out.println(ee);
			System.exit(0);
		} // Of try

		System.out.println("The text is:\r\n" + inputText);
	}// Of readText

	/**
	 * ********************* 
	 * Construct the alphabet. The results are stored in the
	 * member variables charMapping and alphabet.
	 *  *********************
	 */
	public void constructAlphabet() {
		// Initialize.
		Arrays.fill(charMapping, -1);

		// The count for each char. At most NUM_CHARS chars.
		int[] tempCharCounts = new int[NUM_CHARS];

		// The index of the char in the ASCII charset.
		int tempCharIndex;

		// Step 1. Scan the string to obtain the counts.
		char tempChar;
		for (int i = 0; i < inputText.length(); i++) {
			tempChar = inputText.charAt(i);
			tempCharIndex = (int) tempChar;

			System.out.println("" + tempCharIndex + " ");

			// The size must less than 255
			tempCharCounts[tempCharIndex]++;
		} // Of for i

		// Step 2. Scan to determine the size of the alphabet.
		alphabetLength = 0;
		// 在这里自作主张写成了i<256 果然在Console就报错了
		for (int i = 0; i < 255; i++) {
			if (tempCharCounts[i] > 0) {
				alphabetLength++;
			} // Of if
		} // Of for i

		// Step 3. Compress to the alphabet.
		alphabet = new char[alphabetLength];
		charCounts = new int[2 * alphabetLength - 1];

		int tempCounter = 0;
		for (int i = 0; i < NUM_CHARS; i++) {
			if (tempCharCounts[i] > 0) {
				alphabet[tempCounter] = (char) i;
				charCounts[tempCounter] = tempCharCounts[i];
				charMapping[i] = tempCounter;
				tempCounter++;
			} // Of if
		} // Of for i

		System.out.println("The alphabet is: " + Arrays.toString(alphabet));
		System.out.println("Their counts is: " + Arrays.toString(charCounts));
		System.out.println("The char mappings are: " + Arrays.toString(charMapping));
	}// Of constructAlphabet

	public void constructTree() {
		// Step 1. Allocate space.
		nodes = new HuffmanNode[alphabetLength * 2 - 1];
		boolean[] tempProcessed = new boolean[alphabetLength * 2 - 1];

		// Step 2. Initialize leaves.
		for (int i = 0; i < alphabetLength; i++) {
			nodes[i] = new HuffmanNode(alphabet[i], charCounts[i], null, null, null);
		} // Of for i

		// Step 3. Construct the tree.
		int tempLeft, tempRight, tempMinimal;
		for (int i = alphabetLength; i < 2 * alphabetLength - 1; i++) {
			// Step 3.1 Select the first minimal as the left child.
			tempLeft = -1;
			tempMinimal = Integer.MAX_VALUE;
			for (int j = 0; j < i; j++) {
				if (tempProcessed[j]) {
					continue;
				} // Of if

				if (tempMinimal > charCounts[j]) {
					tempMinimal = charCounts[j];
					tempRight = j;
				} // Of if
			} // Of for j
			tempProcessed[tempLeft] = true;

			// Step 3.2 Select the second minimal as the right child.
			tempRight = -1;
			tempMinimal = Integer.MAX_VALUE;
			for (int j = 0; j < i; j++) {
				if (tempProcessed[j]) {
					continue;
				} // Of if

				if (tempMinimal > charCounts[j]) {
					tempMinimal = charCounts[j];
					tempRight = j;
				} // Of if
			} // Of for j
			tempProcessed[tempRight] = true;
			System.out.println("Selecting " + tempLeft + " and " + tempRight);

			// Step 3.3 Construct the new node.
			charCounts[i] = charCounts[tempLeft] + charCounts[tempRight];
			nodes[i] = new HuffmanNode('*', charCounts[i], nodes[tempLeft], nodes[tempRight], null);

			// Step 3.4 Link with children.
			nodes[tempLeft].parent = nodes[i];
			nodes[tempRight].parent = nodes[i];
			System.out.println("The children of " + i + " are " + tempLeft + " and " + tempRight);
		} // Of for i
	}// Of constructTree

	/**
	 * ******************** Get the root of the binary tree.
	 * 
	 * @return The root. ********************
	 */
	public HuffmanNode getRoot() {
		return nodes[nodes.length - 1];
	}// Of getRoot

	/**
	 ********************* 
	 * Pre-oder visit.
	 *********************
	 */
	public void preOrderVisit(HuffmanNode paraNode) {
		System.out.println("(" + paraNode.character + "," + paraNode.weight + ")");

		if (paraNode.leftChild != null) {
			preOrderVisit(paraNode.leftChild);
		} // Of if

		if (paraNode.rightChild != null) {
			preOrderVisit(paraNode.rightChild);
		} // Of if
	}// Of preOrderVisit

	/**
	 * ************************ Generate codes for each character in the alphabet.
	 * ************************
	 */
	public void generateCodes() {
		huffmanCodes = new String[alphabetLength];
		HuffmanNode tempNode;
		for (int i = 0; i < alphabetLength; i++) {
			tempNode = nodes[i];
			// Use tempCharCode instead of tempCode such that it is unlike.
			// tempNode.
			// This is an advantage of long names.
			String tempCharCode = "";
			while (tempNode.parent != null) {
				if (tempNode == tempNode.parent.leftChild) {
					tempCharCode = "0" + tempCharCode;
				} else {
					tempCharCode = "1" + tempCharCode;
				} // Of if
				tempNode = tempNode.parent;
			} // Of while
			huffmanCodes[i] = tempCharCode;
			System.out.println("The code of " + alphabet[i] + " is " + tempCharCode);
		} // Of for i
	}// Of generateCodes

	/**
	 * ********************** 
	 * Encode the given string.
	 * 
	 * @param paraString The given string.
	 * **********************
	 */
	public String coding(String paraString) {
		String resultCodeString = " ";

		int tempIndex;
		for (int i = 0; i < paraString.length(); i++) {
			// From the original char to the location in the alphabet.
			tempIndex = charMapping[(int) paraString.charAt(i)];

			// From the location in the alphabet to the code.
			resultCodeString += huffmanCodes[tempIndex];
		} // Of for i
		return resultCodeString;
	}// Of coding

	/**
	 * ********************** 
	 * Decode the given string.
	 * 
	 * @param paraString The given string. 
	 * ***********************
	 */
	public String decoding(String paraString) {
		String resultCodeString = "";

		HuffmanNode tempNode = getRoot();

		for (int i = 0; i < paraString.length(); i++) {
			if (paraString.charAt(i) == '0') {
				tempNode = tempNode.leftChild;
				System.out.println(tempNode);
			} else {
				tempNode = tempNode.rightChild;
				System.out.println(tempNode);
			} // Of if

			if (tempNode.leftChild == null) {
				System.out.println("Decode one: " + tempNode);
				// Decode one char.
				resultCodeString += tempNode.character;

				// Return to the root.
				tempNode = getRoot();
			} // Of if
		} // Of for i

		return resultCodeString;
	}// Of decoding

	/**
	 ***********************
	 * The entrance of the program.
	 * 
	 * @param args 
	 *           Not used now.
	 ***********************
	 */
	public static void main(String[] args) {
		Huffman tempHuffman = new Huffman("C:/Users/goudiyuan/Desktop/learn/dataStructure/test.txt");
		tempHuffman.constructAlphabet();

		tempHuffman.constructTree();

		HuffmanNode tempRoot = tempHuffman.getRoot();
		System.out.println("The root is: " + tempRoot);
		System.out.println("Preorder visit:");
		tempHuffman.preOrderVisit(tempHuffman.getRoot());

		tempHuffman.generateCodes();

		String tempCoded = tempHuffman.coding("abcde");
		System.out.println("Coded: " + tempCoded);
		String tempDecoded = tempHuffman.decoding(tempCoded);
		System.out.println("Decoded: " + tempDecoded);
	}// Of main

}// Of class Huffman

ps:我感觉是我的文本错误,但就是运行不了,搞不懂为什么在建树那一步错了,反正就是整不出来。

文本

错误

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值