1 #include
2 #include
3 using namespace::std;4
5 #define Code_value_bits 16 /* Number of bits in a code value */
6 typedef long code_value; /*Type of an arithmetic code value*/
7
8 #define Top_value (((long)1<
9
10
11 #define First_qtr (Top_value/4+1) /* Point after first quarter */
12 #define Half (2*First_qtr) /* Point after first half */
13 #define Third_qtr (3*First_qtr) /* Point after third quarter */
14
15 #define No_of_chars 256 /* Number of character symbols */
16 #define EOF_symbol (No_of_chars+1) /* Index of EOF symbol */
17
18 #define No_of_symbols (No_of_chars+1) /* Total number of symbols */
19
20 /*TRANSLATION TABLES BETWEEN CHARACTERS AND SYMBOL INDEXES.*/
21
22 int char_to_index[No_of_chars]; /*To index from character*/
23 unsigned char index_to_char[No_of_symbols+1]; /*To character from index*/
24
25 /*CUMULATIVE FREQUENCY TABLE.*/
26
27 #define Max_frequency 16383 /* Maximum allowed frequency count */
28 /*2^14 - 1*/
29 int cum_freq[No_of_symbols+1]; /*Cumulative symbol frequencies*/
30
31 //固定频率表,为了方便起见
32 int freq[No_of_symbols+1] ={33 0,34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 124, 1, 1, 1, 1, 1,35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,36
37 /*! " # $ % & ' ( ) * + , - . /*/
38 1236, 1, 21, 9, 3, 1, 25, 15, 2, 2, 2, 1, 79, 19, 60, 1,39
40 /*0 1 2 3 4 5 6 7 8 9 : ; < = > ?*/
41 15, 15, 8, 5, 4, 7, 5, 4, 4, 6, 3, 2, 1, 1, 1, 1,42
43 /*@ A B C D E F G H I J K L M N O*/
44 1, 24, 15, 22, 12, 15, 10, 9, 16, 16, 8, 6, 12, 23, 13, 11,45
46 /*P Q R S T U V W X Y Z [ / ] ^ _*/
47 14, 1, 14, 28, 29, 6, 3, 11, 1, 3, 1, 1, 1, 1, 1, 3,48
49 /*' a b c d e f g h i j k l m n o*/
50 1, 491, 85, 173, 232, 744, 127, 110, 293, 418, 6, 39, 250, 139, 429, 446,51
52 /*p q r s t u v w x y z { | } ~*/
53 111, 5, 388, 375, 531, 152, 57, 97, 12, 101, 5, 2, 1, 2, 3, 1,54
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,63 1
64 };65
66 //用来存储编码值,是编码解码过程的桥梁。大小暂定100,实际中可以修改
67 char code[100];68 static int code_index=0;69 static int decode_index=0;70
71 //buffer为八位缓冲区,暂时存放编码制
72 static intbuffer;73 //buffer中还有几个比特没有用到,初始值为8
74 static intbits_to_go;75 //超过了EOF的字符,也是垃圾
76 static intgarbage_bits;77
78 //启用字符频率统计模型,也就是计算各个字符的频率分布区间
79 voidstart_model(){80 inti;81 for (i = 0; i
83 char_to_index[i] = i+1;84 index_to_char[i+1] =i;85 }86
87 //累计频率cum_freq[i-1]=freq[i]+...+freq[257], cum_freq[257]=0;
88 cum_freq[No_of_symbols] = 0;89 for (i = No_of_symbols; i>0; i--) {90 cum_freq[i-1] = cum_freq[i] +freq[i];91 }92 //这条语句是为了确保频率和的上线,这是后话,这里就注释掉93 //if (cum_freq[0] > Max_frequency); /* Check counts within limit*/
94 }95
96
97 //初始化缓冲区,便于开始接受编码值
98 voidstart_outputing_bits()99 {100 buffer = 0; //缓冲区一开始为空
101 bits_to_go = 8;102 }103
104
105 void output_bit(intbit)106 {107 //为了写代码方便,编码数据是从右到左进入缓冲区的。记住这一点!
108 buffer >>= 1;109 if (bit) buffer |= 0x80;110 bits_to_go -= 1;111 //当缓冲区满了的时候,就输出存起来
112 if (bits_to_go==0) {113 code[code_index]=buffer;114 code_index++;115
116 bits_to_go = 8; //重新恢复为8
117 }118 }119
120
121 voiddone_outputing_bits()122 {123 //编码最后的时候,当缓冲区没有满,则直接补充0
124 code[code_index]=buffer>>bits_to_go;125 code_index++;126 }127
128
129
130 static void bit_plus_follow(int); /*Routine that follows*/
131 static code_value low, high; /*Ends of the current code region*/
132 static long bits_to_follow; /*Number of opposite bits to output after*/
133
134
135 voidstart_encoding()136 {137 for(int i=0;i<100;i++)code[i]='\0';138
139 low = 0; /*Full code range.*/
140 high =Top_value;141 bits_to_follow = 0; /*No bits to follow*/
142 }143
144
145 void encode_symbol(int symbol,intcum_freq[])146 {147 long range; /*Size of the current code region*/
148 range = (long)(high-low)+1;149
150 high = low + (range*cum_freq[symbol-1])/cum_freq[0]-1; /*Narrow the code region to that allotted to this*/
151 low = low + (range*cum_freq[symbol])/cum_freq[0]; /*symbol.*/
152
153 for(;;)154 { /*Loop to output bits.*/
155 if (high
157 }158 else if (low>=Half) { /*Output 1 if in high half.*/
159 bit_plus_follow(1);160 low -=Half;161 high -= Half; /*Subtract offset to top.*/
162 }163 else if (low>=First_qtr && high
164 bits_to_follow += 1;165 low -= First_qtr; /*Subtract offset to middle*/
166 high -=First_qtr;167 }168 else break; /*Otherwise exit loop.*/
169 low = 2*low;170 high = 2*high+1; /*Scale up code range.*/
171 }172 }173
174 /*FINISH ENCODING THE STREAM.*/
175
176 voiddone_encoding()177 {178 bits_to_follow += 1; /*Output two bits that*/
179 if (low
180 else bit_plus_follow(1); /*the current code range*/
181 } /*contains.*/
182
183
184 static void bit_plus_follow(intbit)185 {186 output_bit(bit); /*Output the bit.*/
187 while (bits_to_follow>0) {188 output_bit(!bit); /*Output bits_to_follow*/
189 bits_to_follow -= 1; /*opposite bits. Set*/
190 } /*bits_to_follow to zero.*/
191 }192
193
194
195 voidencode(){196 start_model(); /*Set up other modules.*/
197 start_outputing_bits();198 start_encoding();199 for (;;) { /*Loop through characters.*/
200 intch;201 intsymbol;202 ch = getchar(); /*Read the next character.*/
203 //if (ch==EOF) break; /* Exit loop on end-of-file. */204 //为了简单起见,这里就不用EOF为结尾了,直接使用回车符作为结尾。这不影响说明算法的原理
205 if(ch==10)break;206 symbol = char_to_index[ch]; /*Translate to an index.*/
207 encode_symbol(symbol,cum_freq); /*Encode that symbol.*/
208
209 }210 //将EOF编码进去,作为终止符
211 encode_symbol(EOF_symbol,cum_freq);212 done_encoding(); /*Send the last few bits.*/
213 done_outputing_bits();214
215 }216
217
218 //解码
219
220 static code_value value; /*Currently-seen code value*/
221
222 voidstart_inputing_bits()223 {224 bits_to_go = 0; /*Buffer starts out with*/
225 garbage_bits = 0; /*no bits in it.*/
226 }227
228
229 intinput_bit()230 {231 intt;232
233 if (bits_to_go==0) {234 buffer =code[decode_index];235 decode_index++;236
237 //if (buffer==EOF) {
238 if(decode_index >code_index ){239 garbage_bits += 1; /*Return arbitrary bits*/
240 if (garbage_bits>Code_value_bits-2) { /*after eof, but check*/
241 fprintf(stderr,"Bad input file/n"); /*for too many such.*/
242 //exit(-1);
243 }244 }245 bits_to_go = 8;246 }247 //从左到右取出二进制位,因为存的时候是从右到左
248 t = buffer&1; /*Return the next bit from*/
249 buffer >>= 1; /*the bottom of the byte.*/
250 bits_to_go -= 1;251 returnt;252 }253
254 voidstart_decoding()255 {256 inti;257 value = 0; /*Input bits to fill the*/
258 for (i = 1; i<=Code_value_bits; i++) { /*code value.*/
259 value = 2*value+input_bit();260 }261
262
263 low = 0; /*Full code range.*/
264 high =Top_value;265 }266
267
268 int decode_symbol(intcum_freq[])269 {270 long range; /*Size of current code region*/
271 int cum; /*Cumulative frequency calculated*/
272 int symbol; /*Symbol decoded*/
273 range = (long)(high-low)+1;274 cum = (((long)(value-low)+1)*cum_freq[0]-1)/range; /*Find cum freq for value.*/
275
276 for (symbol = 1; cum_freq[symbol]>cum; symbol++) ; /*Then find symbol.*/
277 high = low + (range*cum_freq[symbol-1])/cum_freq[0]-1; /*Narrow the code region*//*to that allotted to this*/
278 low = low + (range*cum_freq[symbol])/cum_freq[0];279
280 for (;;) { /*Loop to get rid of bits.*/
281 if (high
283 }284 else if (low>=Half) { /*Expand high half.*/
285 value -=Half;286 low -= Half; /*Subtract offset to top.*/
287 high -=Half;288 }289 else if (low>=First_qtr && high
292 high -=First_qtr;293 }294 else break; /*Otherwise exit loop.*/
295 low = 2*low;296 high = 2*high+1; /*Scale up code range.*/
297 value = 2*value+input_bit(); /*Move in next input blt.*/
298 }299 returnsymbol;300 }301
302
303 voiddecode(){304 start_model(); /*Set up other modules.*/
305 start_inputing_bits();306 start_decoding();307 for (;;) { /*Loop through characters.*/
308 int ch; intsymbol;309 symbol = decode_symbol(cum_freq); /*Decode next symbol.*/
310 if (symbol==EOF_symbol) break; /*Exit loop if EOF symbol.*/
311 ch = index_to_char[symbol]; /*Translate to a character.*/
312 putc(ch,stdout); /*Write that character.*/
313 }314 }315
316 intmain()317 {318 encode();319 decode();320 system("pause");321 return 0;322 }