实际案例——数据压缩
目标:
将给出的一段文本,比如"ilikelikelikejavadoyoulikeajava",根据前面的的赫夫曼编码原理,对其进行数据压缩处理
步骤1:根据赫夫曼编码压缩数据的原理,需要创建"i like like like java do you like a java"对应的赫夫曼树.
思路:前面已经分析过了,而且我们已然讲过了构建赫夫曼树的具体实现
(创建赫夫曼树->给各个字符编码->规定向左为0向右为1得到每个字符的编码)。
功能:
根据赫夫曼编码压缩数据的原理,需要创建"i like like like java do you like a java"对应的赫夫曼树.
思路:
(1)构建一个新节点Node{data(存放数据),weight(权值),left,right}
(2)得到" "对应的byte[]数组
(3)编写一个方法,将构建赫夫曼树的Node节点,放在List<Node>中(形为:[Node[date=97,weight =5],Node[date=32,weight=9]]……)
(4)可以通过List创建对应的赫夫曼树
我们已经生成了赫夫曼树,进行第二阶段——————
(5)生成赫夫曼编树对应的赫夫曼编码(向左记0,向右记1)
形如下面
' '=01 'a'=100 'd'=11000 'u'=11001 'e'=1110 'v'=11011 'i'=101 'y'=11010 'j'=0010 'k'=1111 'l'=000 'o'=0011
(6)
将"i like like like java do you like a java",用上方编码编译
代码实现
package DataStructures.huffmanTree;
import java.util.*;
public class HuffmanCode {
public static void main(String[] args) {
String content = "i like like like java do you like a java";
byte contentBytes[] = content.getBytes();
System.out.println(contentBytes.length);//40
//测试
List<HuffmanNode> nodes = getHuffmanNodes(contentBytes);
System.out.println("nodes" + nodes);
//测试创建二叉树
System.out.println("赫夫曼树");
HuffmanNode HuffmanTreeRoot = creatHuffmanTree(nodes);
System.out.println("前序遍历");
HuffmanTreeRoot.preOreder();
//测试一把是否赫夫曼编码
//getCodes(HuffmanTreeRoot,"",stringBuilder);
Map<Byte,String> huffmanCodes = getCodes(HuffmanTreeRoot);
System.out.println("~生成的赫夫曼编码表\n"+huffmanCodes);
}
//生成赫夫曼树对应的赫夫曼编码里
//思路
//(1)将赫夫曼编码表放在Map<Byte,String>
// 这个Map就跟Python的字典一样,{[key:value] [key:value]……}
static Map<Byte,String> huffmanCodes = new HashMap<Byte, String>();
//(2)在生成赫夫曼编码表时,需要去拼接路径,所以定义一个StringBuilder 存储某个叶子节点的路径
static StringBuilder stringBuilder = new StringBuilder();
//Ps:为了调用方便,我们重载一下getCodes
private static Map<Byte,String> getCodes(HuffmanNode root){
if (root==null){
return null;
}
//处理root的左子树
getCodes(root.left,"0",stringBuilder);
//处理root的右子树
getCodes(root.right,"1",stringBuilder);
return huffmanCodes;
}
/**
* //(3)方法
* 功能:得到传入的node节点的所有赫夫曼编码,并存放到huffmanCodes集合中
* @param node 传入的节点(默认从root)
* @param code 该节点的路径(左节点0和右节点1)
* @param stringBuilder 拼接路径
*/
private static void getCodes(HuffmanNode node,String code,StringBuilder stringBuilder){
StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);
//将传入的code加入到StringBuile2中
stringBuilder2.append(code);
if (node!=null){
//如果node==null不处理
//判断当前node时叶子节点还是非叶子节点
if (node.data==null){
//说明是非叶子节点
//递归处理
//向左
getCodes(node.left,"0",stringBuilder2);
//向右
getCodes(node.right,"1",stringBuilder2);
}else {
//说明是一个叶子节点
huffmanCodes.put(node.data,stringBuilder2.toString());
}
}
}
/**
* @param bytes 接受的数组
* @return 返回的就是List形式:[Node[date=97,weight =5],Node[date=32,weight=9]]……
*/
private static List<HuffmanNode> getHuffmanNodes(byte bytes[]) {
//1.创建ArrayList
ArrayList<HuffmanNode> nodes = new ArrayList<HuffmanNode>();
//2.遍历bytes,统计每个byte出现的次数->map
Map<Byte, Integer> counts = new HashMap<>();
for (byte b : bytes) {
Integer count = counts.get(b);
if (count == null) {
//Map还没有这个字符数据
counts.put(b, 1);
} else {
counts.put(b, count + 1);
}
}
//把每个键值对转成HuffmanNode对象,并加入nodes中
//遍历Map
for (Map.Entry<Byte, Integer> entry : counts.entrySet()) {
nodes.add(new HuffmanNode(entry.getKey(), entry.getValue()));
}
return nodes;
}
//通过上面的List,常见对应的赫夫曼树
private static HuffmanNode creatHuffmanTree(List<HuffmanNode> nodes) {
while (nodes.size() > 1) {
Collections.sort(nodes);
HuffmanNode leftNode = nodes.get(0);
HuffmanNode rightNode = nodes.get(1);
//没有data只有权值weight
HuffmanNode parent = new HuffmanNode(null, leftNode.weight + rightNode.weight);
parent.left = leftNode;
parent.right = rightNode;
nodes.remove(leftNode);
nodes.remove(rightNode);
nodes.add(parent);
}
return nodes.get(0);
}
//前序遍历
private static void preOreder(HuffmanNode root) {
if (root != null) {
root.preOreder();
} else {
System.out.println("空树");
}
}
}
//创建Node
class HuffmanNode implements Comparable<HuffmanNode> {
Byte data;//存放数据本身'a'->97 ' '->32
int weight;//权值,字符出现的次数
HuffmanNode left;
HuffmanNode right;
public HuffmanNode(Byte data, int weight) {
this.data = data;
this.weight = weight;
}
@Override//表示从小到达排序
public int compareTo(HuffmanNode huffmanNode) {
return this.weight - huffmanNode.weight;
}
@Override
public String toString() {
return "HuffmanNode{" + "data=" + data + ", weight=" + weight + '}';
}
public void preOreder() {
System.out.println(this);
if (this.left != null) {
this.left.preOreder();
}
if (this.right != null) {
this.right.preOreder();
}
}
}
总结:
1. 首先需要一个等待压缩的字符串content
2. 将这个字符串放在字符数组中 contentBytes[]
5+6在这里完成哦!!
3. 数组建好了,就要将这个数组变成ArrayList<HuffmanNode>,得到一个动态数组(因为可以add和remove)
4. 接下来我们就需要将这个动态数组中的每一个元素变成一节点,构成一颗树
5. 为了完成3的变成一个节点,我们需要一个节点类class HuffmanNode
6. 节点类里应该包含我们字符还有字符出现的次数,当然还有左右节点
//(其实建节点5 & 6应该置前!置前!再置前!)//
7. 为了完成3的构成一棵树,我们就需要专门一个静态方法private static HuffmanNode creatHuffmanTree(List<HuffmanNode> nodes)
8. 方法里要有排序和建树,最后root节点也返回了,树也建好了
9. 这样3确实完成了,完成了以后我们就要通过这颗树得到这棵树的赫夫曼编码了
10. 我们决定将得到的赫夫曼编码存储到Map中,所以我们需要
static Map<Byte,String> huffmanCodes = new HashMap<Byte, String>();
11. 我们要对每一个字符进行编码,那么就需要一个方法
private static void getCodes(HuffmanNode node,String code,StringBuilder stringBuilder)
12. 方法里要进行递归处理,将0 和 1 放进去
当从递归中出来以后将这个data对应的路径也就是编码存到Map里就可以了!
13. 输出就输出huffmanCodes这个Map就可以了
//更加简洁好看的代码样子
package EXCISE;
import java.util.*;
public class HuffmanCodesDemo {
public static void main(String[] args) {
//1. 要有一个字符串
String content = "In case i don't see you,good morning,good afternoon and good night";
byte bytes[] = content.getBytes();
//测试1,我要看看那我的句子有多长
System.out.println("len=" + bytes.length);
//测试2,不想看看我弄得动态数组嘛?
List<aNode> list = aNodeArrayList(bytes);
System.out.println("list=" + list);
//测试3,我种的树不知道怎么样了。。
aNode root = Planttrees(list);
System.out.println("看看我的树吧!");
preOreder(root);
//测试4. 看看取得名字(编码)怎么样吧!
GiveCodes(root, "", stringBuilder);
System.out.println("这部看看我起了什么好名字么?\n" + HuffmanCodes);
//测试5.完结撒花
System.out.println();
System.out.println(content);
}
//3. 动态数组list在哪呢?
private static List<aNode> aNodeArrayList(byte bytes[]) {
List<aNode> list = new ArrayList<aNode>();
Map<Byte, Integer> counts = new HashMap<Byte, Integer>();
for (byte b : bytes) {
Integer count = counts.get(b);
if (count == null) {
counts.put(b, 1);
} else {
counts.put(b, count + 1);
}
}
for (Map.Entry<Byte, Integer> entry : counts.entrySet()) {
list.add(new aNode(entry.getKey(), entry.getValue()));
}
return list;
}
//4. 有list了是不是可以构建树啦?
private static aNode Planttrees(List<aNode> list) {
while (list.size() > 1) {
Collections.sort(list);
aNode leftNode = list.get(0);
aNode rightNode = list.get(1);
aNode parent = new aNode(null, leftNode.weight + rightNode.weight);
parent.left = leftNode;
parent.right = rightNode;
list.remove(leftNode);
list.remove(rightNode);
list.add(parent);
}
return list.get(0);
}
//4.1树出来了我们遍历一下看看成果吧
private static void preOreder(aNode root) {
if (root != null) {
root.preorder();
} else {
System.out.println("空");
}
}
//5. 起名字要做准备呀!
// 首先是Map<Byte,String> HuffmanCodes帮我存一下所有果实的名字
// 然后是StringBuilder stringBuilder帮我拼接一下想好的序号哦!
static Map<Byte, String> HuffmanCodes = new HashMap<Byte, String>();
static StringBuilder stringBuilder = new StringBuilder();
//6. 准备结束啦!其名字吧,一口气起完好了!就用递归!
private static void GiveCodes(aNode node, String Code, StringBuilder stringBuilder) {
StringBuilder stringBuilder1 = new StringBuilder(stringBuilder);
stringBuilder1.append(Code);
if (node != null) {
if (node.aByte == null) {
GiveCodes(node.left, "0", stringBuilder1);
GiveCodes(node.right, "1", stringBuilder1);
} else {
HuffmanCodes.put(node.aByte, stringBuilder.toString());
}
}
}
}
//2. Node在哪呢?
class aNode implements Comparable<aNode> {
Byte aByte;
int weight;
aNode left;
aNode right;
public aNode(Byte aByte, int weight) {
this.aByte = aByte;
this.weight = weight;
}
@Override
public int compareTo(aNode aNode) {
return this.weight - aNode.weight;
}
@Override
public String toString() {
return "HuffmanNode{" + "aByte=" + aByte + ", weight=" + weight + '}';
}
public void preorder() {
System.out.println(this);
if (this.left != null) {
this.left.preorder();
}
if (this.right != null) {
this.right.preorder();
}
}
}