Huffman Coding
We want to encode a given string S to a binary string. Each alphabet character in SS should be mapped to a different variable-length code and the code must not be a prefix of others.
Huffman coding is known as one of ways to obtain a code table for such encoding.
For example, we consider that appearance frequencycies of each alphabet character in SS are as follows:
a | b | c | d | e | f | |
---|---|---|---|---|---|---|
freq. | 45 | 13 | 12 | 16 | 9 | 5 |
code | 0 | 101 | 100 | 111 | 1101 | 1100 |
We can obtain "Huffman codes" (the third row of the table) using Huffman's algorithm.
The algorithm finds a full binary tree called "Huffman tree" as shown in the following figure.
To obtain a Huffman tree, for each alphabet character, we first prepare a node which has no parents and a frequency (weight) of the alphabet character. Then, we repeat the following steps:
- Choose two nodes, xx and yy, which has no parents and the smallest weights.
- Create a new node zz whose weight is the sum of xx's and yy's.
- Add edge linking zz to xx with label 00 and to yy with label 11. Then zz become their parent.
- If there is only one node without a parent, which is the root, finish the algorithm. Otherwise, go to step 1.
Finally, we can find a "Huffman code" in a path from the root to leaves.
Task
Given a string SS, output the length of a binary string obtained by using Huffman coding for SS.
Input
SS
A string SS is given in a line.
Output
Print the length of a binary string in a line.
Constraints
- 1≤|S|≤1051≤|S|≤105
- SS consists of lowercase English letters.
Sample Input 1
abca
Sample Output 1
6
Sample Input 2
aaabbcccdeeeffg
Sample Output 2
41
Sample Input 3
z
Sample Output 3
1
/code
用堆排序可以增加效率 所以我自己制造一个了轮子,写了一个堆,
先将输入的字符串转化为字符数组 ,数每个字符出现的次数。
每个字符和他出现次数存在内部类node 里面
建立霍夫曼树(用堆排序效率高)
对每个叶节点encode (用递归+static)
最后输出。
import java.util.ArrayList;
import java.util.Scanner;
public class Huffman {
static class Heap{
Node[] heap;
int size;
public Heap(){
super();
}
public Heap(Node[] heap)
{
this.heap = heap;
//record the size of the heap
this.size = heap.length;
}
}
static class Node{
char c ;
int freq = 0;
Node left;
Node right;
String code;
public Node()
{
super();
}
public Node(char c )
{
this.c = c;
this.freq =1;
}
public Node(int freq)
{
this.freq = freq;
}
boolean equals(char c)
{
if(c==this.c)
return true;
return false;
}
}
public static void main(String[] args) {
//-------------
//Huffman huffman = new Huffman();
Scanner scan = new Scanner(System.in);
String S = scan.nextLine();
char[] c = S.toCharArray();
ArrayList<Node> freq = new ArrayList<Node>();
//cnt freqence
for (int i = 0; i < S.length(); i++) {
if (freq.size() == 0) {
Node temp = new Node(c[i]);
freq.add(temp);
} else {
boolean flag = false;
for (int j = 0; j < freq.size(); j++) {
if (freq.get(j).equals(c[i])) {
freq.get(j).freq++;
flag = true;
break;
}
}
//if c[i] not exists in free , new a Node(c[i])
if (!flag) {
Node temp = new Node(c[i]);
freq.add(temp);
}
}
}
Node[] freqarr = new Node[freq.size()];
for (int i = 0; i < freqarr.length; i++) {
freqarr[i] = freq.get(i);
}
Node[] heap = makeheap(freqarr);
Heap H = new Heap(heap);
Node root = null;
do {
Node T = new Node();
T.left = pick(H);
T.right = pick(H);
T.freq = T.left.freq + T.right.freq;
Insert(H, T);
root = T;
} while (H.size > 2);
//encode leaf
ArrayList<Character> list = new ArrayList<>();
encode(root,list);
int cnt = 0 ;
for(int i= 0 ;i<freqarr.length;i++)
{
cnt +=freqarr[i].code.length()*freqarr[i].freq;
}
System.out.println(cnt);
}
//make a heap
public static Node[] makeheap(Node[] freqarr)
{
Node[] heap = new Node[freqarr.length+1];
heap[0] = new Node(100001);
for(int i = 0 ;i< freqarr.length;i++)
{
heap[i+1] = freqarr[i];
}
for(int i= (heap.length-1)/2 ; i>0;i-- )
{
rebuilt(heap,i,heap.length);
}
return heap;
}
//rebuilt the heap
public static void rebuilt(Node[] heap , int root, int size){
int parent ;
int son;
Node temproot = heap[root];
int temp = heap[root].freq;
for( parent = root; parent*2< size; parent = son)
{
son = parent*2;
if(son+1<size && heap[son+1].freq < heap[son].freq )
son++;
if(temp <= heap[son].freq ){
break;
}else{
heap[parent] = heap[son];
}
}
heap[parent] = temproot;
}
//pick and delete one elelment of heap
public static Node pick(Heap H){
Node res = H.heap[1];
H.heap[1] = H.heap[H.size-1];
int size = --H.size;
rebuilt(H.heap,1,size);
return res;
}
//insert an element into the heap
public static void Insert(Heap H, Node N)
{
H.size++;
H.heap[H.size-1] = N;
int size = H.size;
int son ;
int parent ;
for(son = H.size-1 ; son/2 >0 ; son = parent)
{
parent = son/2;
if(N.freq < H.heap[parent].freq)
H.heap[son] = H.heap[parent];
else{
break;
}
}
H.heap[son]=N;
}
public static void encode(Node root , ArrayList<Character> list)
{
if(root!=null)
{
if(root.left == null && root.right==null)
{
char[] code = new char[list.size()];
for(int i = 0 ; i< code.length;i++)
{
code[i] = list.get(i);
}
root.code=String.copyValueOf(code);
}
if(root.left != null || root.right!=null)
list.add('1');
encode(root.left,list);
// list.remove(list.size()-1);
if(root.left != null || root.right!=null)
list.add('0');
encode(root.right,list);
if(list.size()>0)
list.remove(list.size()-1);
}
}
}
} public static void encode(Node root , ArrayList<Character> list) { if(root!=null) { if(root.left == null && root.right==null) { char[] code = new char[list.size()]; for(int i = 0 ; i< code.length;i++) { code[i] = list.get(i); } root.code=String.copyValueOf(code); } if(root.left != null || root.right!=null) list.add('1'); encode(root.left,list); // list.remove(list.size()-1); if(root.left != null || root.right!=null) list.add('0'); encode(root.right,list); if(list.size()>0) list.remove(list.size()-1); } } }