LZW编码解码原理及编程实现

首先先来介绍一下,LZW的编码原理,LZW编码其实简单来说就是将一个个字符或者字符组映射到一个数字上,这样就会大大减少数据量,将这种映射关系存放在字典里,但是问题是这个字典如果传输给解码端也需要占用一定的空间,所以在这次实验中我们采用的是ASCII码的字典,这样就不用传了,但在解释编码解码原理时采用的是自己定义的字典,下面分别解释编码和解码的原理。
一 编码原理

  1. 初始状态,字典里只有所有的默认项,例如0->a,1->b,2->c。此时P和C都是空的。
  2. 读入新的字符C,与P合并形成字符串P+C。
  3. 在字典里查找P+C,如果:
    • P+C在字典里,P=P+C。
    • P+C不在字典里,将P的记号输出;在字典中为P+C建立一个记号映射;更新P=C。
  4. 返回步骤2重复,直至读完原字符串中所有字符。
    下面举一个例子来解释该原理,首先我们先来定义一个字典:
    0->a
    1->b
    2->c
    而后我们对一串字符串进行编码:abcababa
    首先P为空,C为a ,,P+C在字典中,则P=P+C=a,C=b,P+C=ab不在字典中,则更新字典3->ab而后输出P为0 P=C=b,而后C=c,P+C=bc不在,4->bc ,输出P为1,以此类推,直到最后一个字符,可得出编码完成为0 1 2 3 6,字典更新完成为:
    0->a
    1->b
    2->c
    3->ab
    4->bc
    5->ca
    6->aba
    这样就完成了编码 ,下面附上编码的算法编程实现:
void LZWEncode(FILE* fp, BITFILE* bf) {
  int character;
  int string_code;
  int index;
  unsigned long file_length;

  fseek(fp, 0, SEEK_END);
  file_length = ftell(fp);
  fseek(fp, 0, SEEK_SET);
  BitsOutput(bf, file_length, 4 * 8);
  InitDictionary();
  string_code = -1;
  while (EOF != (character = fgetc(fp))) {
  	index = InDictionary(character, string_code);
  	if (0 <= index) {	// string+character in dictionary
  		string_code = index;
  	}
  	else {	// string+character not in dictionary
  		output(bf, string_code);
  		if (MAX_CODE > next_code) {	// free space in dictionary
  			// add string+character to dictionary
  			AddToDictionary(character, string_code);
  		}
  		string_code = character;
  	}
  }
  output(bf, string_code)

其中InitDictionary函数完成了字典的初始化,代码如下:

void InitDictionary(void) {
   int i;

   for (i = 0; i < 256; i++) {
   	dictionary[i].suffix = i;
   	dictionary[i].parent = -1;
   	dictionary[i].firstchild = -1;
   	dictionary[i].nextsibling = i + 1;
   }
   dictionary[255].nextsibling = -1;
   next_code = 256;

InDictionary()完成了字典的查找,即查找当前字符是否在数组中,代码如下:

int InDictionary(int character, int string_code) {
   int sibling;
   if (0 > string_code) return character;
   sibling = dictionary[string_code].firstchild;
   while (-1 < sibling) {
   	if (character == dictionary[sibling].suffix) return sibling;
   	sibling = dictionary[sibling].nextsibling;
   }
   return -1;
}

AddToDictionary()完成了字典的更新,即在原有ASCII字典基础上增加新的映射,代码实现如下所示:

void AddToDictionary(int character, int string_code) {
	int firstsibling, nextsibling;
	if (0 > string_code) return;
	dictionary[next_code].suffix = character;
	dictionary[next_code].parent = string_code;
	dictionary[next_code].nextsibling = -1;
	dictionary[next_code].firstchild = -1;
	firstsibling = dictionary[string_code].firstchild;
	if (-1 < firstsibling) {	// the parent has child
		nextsibling = firstsibling;
		while (-1 < dictionary[nextsibling].nextsibling)
			nextsibling = dictionary[nextsibling].nextsibling;
		dictionary[nextsibling].nextsibling = next_code;
	}
	else {// no child before, modify it to be the first
		dictionary[string_code].firstchild = next_code;
	}
	next_code++;
}

二 解码原理

  1. 初始状态,字典里只有所有的默认项,例如0->a,1->b,2->c。此时pW和cW都是空的。
  2. 读入第一个的符号cW,解码输出。注意第一个cW肯定是能直接解码的,而且一定是单个字符。
  3. 赋值pW=cW。
  4. 读入下一个符号cW。
  5. 在字典里查找cW,如果:
    a. cW在字典里:
    (1) 解码cW,即输出 Str(cW)。
    (2) 令P=Str(pW),C=Str(cW)的第一个字符
    (3) 在字典中为P+C添加新的记号映射。
    b. cW不在字典里:
    (1) 令P=Str(pW),C=Str(pW)的第一个字符
    (2) 在字典中为P+C添加新的记号映射,这个新的记号一定就是cW。
    (3) 输出P+C。
  6. 返回步骤3重复,直至读完所有记号
    还是用刚才的例子来说明解码端的具体实现
    解码端接收到的为0 1 2 3 6,字典为
    0->a
    1->b
    2->c
    下面开始解码,首先CW=0 解码成a而后PW=0,CW=1,在字典中,增加3->ab,并解出b,而后PW=1,CW=2,CW在字典中,则将4->bc并输出c而后PW=2 CW=3,CW在字典中,输出ab,将5->ca 而后,PW=3,CW=6,CW不在字典中,则将6->aba并输出,则得到输出序列abcababa,解码完成。值得一提的是当CW不在字典时,说明一定是刚刚存入字典则出现了,因为解码比编码有一步的延迟则他的最后一个字符一定为之前的第一个字符,所以执行上述操作,代码实现如下所示:
void LZWDecode(BITFILE* bf, FILE* fp) {
   int character;
   int new_code, last_code;
   int phrase_length;
   unsigned long file_length;
   file_length = BitsInput(bf, 4 * 8);
   if (-1 == file_length) file_length = 0;
   InitDictionary();
   last_code = -1;
   while (0 < file_length) {
   	new_code = input(bf);
   	if (new_code >= next_code) { // this is the case CSCSC( not in dict)
   		d_stack[0] = character;
   		phrase_length = DecodeString(1, last_code);
   	}
   	else {
   		phrase_length = DecodeString(0, new_code);
   	}
   	character = d_stack[phrase_length - 1];
   	while (0 < phrase_length) {
   		phrase_length--;
   		fputc(d_stack[phrase_length], fp);
   		file_length--;
   	}
   	if (MAX_CODE > next_code) {// add the new phrase to dictionary
   		AddToDictionary(character, last_code);
   	}
   	last_code = new_code;
   }
}

三 完整代码

#include <stdlib.h>
#include <stdio.h>
#include "bitio.h"
#define MAX_CODE 65535

struct {
	int suffix;
	int parent, firstchild, nextsibling;
} dictionary[MAX_CODE + 1];
int next_code;
int d_stack[MAX_CODE]; // stack for decoding a phrase

#define input(f) ((int)BitsInput( f, 16))
#define output(f, x) BitsOutput( f, (unsigned long)(x), 16)

int DecodeString(int start, int code);
void InitDictionary(void);
void PrintDictionary(void) {
	int n;
	int count;
	for (n = 256; n < next_code; n++) {
		count = DecodeString(0, n);
		printf("%4d->", n);
		while (0 < count--) printf("%c", (char)(d_stack[count]));
		printf("\n");
	}
}

int DecodeString(int start, int code) {
	int count;
	count = start;
	while (0 <= code) {
		d_stack[count] = dictionary[code].suffix;
		code = dictionary[code].parent;
		count++;
	}
	return count;
}
void InitDictionary(void) {
	int i;

	for (i = 0; i < 256; i++) {
		dictionary[i].suffix = i;
		dictionary[i].parent = -1;
		dictionary[i].firstchild = -1;
		dictionary[i].nextsibling = i + 1;
	}
	dictionary[255].nextsibling = -1;
	next_code = 256;
}
/*
 * Input: string represented by string_code in dictionary,
 * Output: the index of character+string in the dictionary
 * 		index = -1 if not found
 */
int InDictionary(int character, int string_code) {
	int sibling;
	if (0 > string_code) return character;
	sibling = dictionary[string_code].firstchild;
	while (-1 < sibling) {
		if (character == dictionary[sibling].suffix) return sibling;
		sibling = dictionary[sibling].nextsibling;
	}
	return -1;
}

void AddToDictionary(int character, int string_code) {
	int firstsibling, nextsibling;
	if (0 > string_code) return;
	dictionary[next_code].suffix = character;
	dictionary[next_code].parent = string_code;
	dictionary[next_code].nextsibling = -1;
	dictionary[next_code].firstchild = -1;
	firstsibling = dictionary[string_code].firstchild;
	if (-1 < firstsibling) {	// the parent has child
		nextsibling = firstsibling;
		while (-1 < dictionary[nextsibling].nextsibling)
			nextsibling = dictionary[nextsibling].nextsibling;
		dictionary[nextsibling].nextsibling = next_code;
	}
	else {// no child before, modify it to be the first
		dictionary[string_code].firstchild = next_code;
	}
	next_code++;
}

void LZWEncode(FILE* fp, BITFILE* bf) {
	int character;
	int string_code;
	int index;
	unsigned long file_length;

	fseek(fp, 0, SEEK_END);
	file_length = ftell(fp);
	fseek(fp, 0, SEEK_SET);
	BitsOutput(bf, file_length, 4 * 8);
	InitDictionary();
	string_code = -1;
	while (EOF != (character = fgetc(fp))) {
		index = InDictionary(character, string_code);
		if (0 <= index) {	// string+character in dictionary
			string_code = index;
		}
		else {	// string+character not in dictionary
			output(bf, string_code);
			if (MAX_CODE > next_code) {	// free space in dictionary
				// add string+character to dictionary
				AddToDictionary(character, string_code);
			}
			string_code = character;
		}
	}
	output(bf, string_code);
}

void LZWDecode(BITFILE* bf, FILE* fp) {
	int character;
	int new_code, last_code;
	int phrase_length;
	unsigned long file_length;
	file_length = BitsInput(bf, 4 * 8);
	if (-1 == file_length) file_length = 0;
	InitDictionary();
	last_code = -1;
	while (0 < file_length) {
		new_code = input(bf);
		if (new_code >= next_code) { // this is the case CSCSC( not in dict)
			d_stack[0] = character;
			phrase_length = DecodeString(1, last_code);
		}
		else {
			phrase_length = DecodeString(0, new_code);
		}
		character = d_stack[phrase_length - 1];
		while (0 < phrase_length) {
			phrase_length--;
			fputc(d_stack[phrase_length], fp);
			file_length--;
		}
		if (MAX_CODE > next_code) {// add the new phrase to dictionary
			AddToDictionary(character, last_code);
		}
		last_code = new_code;
	}
}



int main(int argc, char** argv) {
	FILE* fp;
	BITFILE* bf;

	if (4 > argc) {
		fprintf(stdout, "usage: \n%s <o> <ifile> <ofile>\n", argv[0]);
		fprintf(stdout, "\t<o>: E or D reffers encode or decode\n");
		fprintf(stdout, "\t<ifile>: input file name\n");
		fprintf(stdout, "\t<ofile>: output file name\n");
		return -1;
	}
	if ('E' == argv[1][0]) { // do encoding
		fp = fopen(argv[2], "rb");
		bf = OpenBitFileOutput(argv[3]);
		if (NULL != fp && NULL != bf) {
			LZWEncode(fp, bf);
			fclose(fp);
			CloseBitFileOutput(bf);
			fprintf(stdout, "encoding done\n");
		}
	}
	else if ('D' == argv[1][0]) {	// do decoding
		bf = OpenBitFileInput(argv[2]);
		fp = fopen(argv[3], "wb");
		if (NULL != fp && NULL != bf) {
			LZWDecode(bf, fp);
			fclose(fp);
			CloseBitFileInput(bf);
			fprintf(stdout, "decoding done\n");
		}
	}
	else {	// otherwise
		fprintf(stderr, "not supported operation\n");
	}
	return 0;
}

我也做了编码解码的测试,发现,当文本较为少的时候,其实这种编码效率一点也不高,甚至生成出来的压缩后的文件比原文件还要大,只有当文本较大且比较有规律时,这种编码方式才有较高的效率。

  • 0
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
由于LZW编码和译码涉及到比较复杂的算法,因此需要进行较为详细的介绍和说明。以下是Verilog实现LZW编码和译码的步骤和代码示例。 LZW编码的步骤如下: 1. 初始化字典表,将所有单个字符初始化为字典表的项。 2. 读入输入字符流,将当前字符与已有的字典表中的字符串进行匹配,找到最长匹配字符串,并输出其对应的编码。 3. 将当前字符与最长匹配字符串组成新的字符串,将其加入字典表中,并更新字典表的大小。 4. 重复步骤2和3,直到读入所有的字符。 LZW编码的Verilog代码如下: ``` module lzw_encode ( input clk, input rst_n, input [7:0] data_in, input valid_in, output reg [15:0] data_out, output reg valid_out ); parameter DICT_SIZE = 256; parameter MAX_BITS = 12; reg [7:0] buffer; reg [15:0] dict[DICT_SIZE]; reg [7:0] dict_size; reg [7:0] dict_bits; reg [15:0] code; reg [7:0] code_bits; reg [15:0] next_code; reg [7:0] next_bits; always @(posedge clk or negedge rst_n) begin if (~rst_n) begin buffer <= 0; dict[0] <= 0; for (i = 1; i < DICT_SIZE; i = i + 1) begin dict[i] <= i; end dict_size <= DICT_SIZE; dict_bits <= 8; code <= 0; code_bits <= 0; next_code <= DICT_SIZE; next_bits <= dict_bits + 1; valid_out <= 0; end else begin if (valid_in) begin buffer <= data_in; if (code_bits == 0) begin code <= buffer; code_bits <= dict_bits; end else begin dict[next_code] <= {dict[code], buffer}; next_code <= next_code + 1; if (next_code == (1 << next_bits)) begin next_bits <= next_bits + 1; end code <= buffer; code_bits <= dict_bits; valid_out <= 1; data_out <= dict[buffer]; end for (i = 0; i < dict_size; i = i + 1) begin if (dict[i] == {dict[code], buffer}) begin code <= i; code_bits <= next_bits; valid_out <= 0; break; end end end else begin valid_out <= 0; end end end endmodule ``` LZW译码的步骤如下: 1. 初始化字典表,将所有单个字符初始化为字典表的项。 2. 读入输入编码流,将当前编码解码为字符串,并输出该字符串。 3. 将前一个字符串与当前字符串的第一个字符组成新的字符串,将其加入字典表中,并更新字典表的大小。 4. 重复步骤2和3,直到读入所有的编码LZW译码的Verilog代码如下: ``` module lzw_decode ( input clk, input rst_n, input [15:0] data_in, input valid_in, output reg [7:0] data_out, output reg valid_out ); parameter DICT_SIZE = 256; parameter MAX_BITS = 12; reg [15:0] dict[DICT_SIZE]; reg [7:0] dict_size; reg [7:0] dict_bits; reg [15:0] code; reg [7:0] code_bits; reg [15:0] next_code; reg [7:0] next_bits; reg [15:0] prev_code; reg [7:0] prev_bits; reg [7:0] output_buffer; reg [7:0] output_bits; reg [1:0] state; always @(posedge clk or negedge rst_n) begin if (~rst_n) begin dict[0] <= 0; for (i = 1; i < DICT_SIZE; i = i + 1) begin dict[i] <= i; end dict_size <= DICT_SIZE; dict_bits <= 8; code <= 0; code_bits <= 0; next_code <= DICT_SIZE; next_bits <= dict_bits + 1; prev_code <= -1; prev_bits <= 0; output_buffer <= 0; output_bits <= 0; state <= 0; valid_out <= 0; end else begin if (valid_in) begin code <= data_in; code_bits <= next_bits; if (prev_code == -1) begin output_buffer <= code; output_bits <= dict_bits; valid_out <= 1; prev_code <= code; prev_bits <= code_bits; state <= 1; end else begin if (code < dict_size) begin output_buffer <= dict[code]; output_bits <= dict_bits; valid_out <= 1; dict[next_code] <= {dict[prev_code], output_buffer}; next_code <= next_code + 1; if (next_code == (1 << next_bits)) begin next_bits <= next_bits + 1; end prev_code <= code; prev_bits <= code_bits; state <= 1; end else begin dict[next_code] <= {dict[prev_code], dict[prev_code][0]}; next_code <= next_code + 1; if (next_code == (1 << next_bits)) begin next_bits <= next_bits + 1; end output_buffer <= {dict[prev_code], dict[prev_code][0]}; output_bits <= next_bits; valid_out <= 1; prev_code <= code; prev_bits <= code_bits; state <= 2; end end end else begin valid_out <= 0; if (state == 2) begin prev_code <= -1; prev_bits <= 0; state <= 0; end end end end endmodule ```

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值