用huffman编码一个字符串

一、霍夫曼编码


实验目的:

(1).利用霍夫曼编码给一个字符串编码


实验原理

如何构造一棵霍夫曼树:

(1).构造一个优先队列,从小到大排列每一个元素。

(2).2个权值最小的队列结点出队,构造一个新的结点满足新结点的权值是原来2个结点权值之和。

(3).把新的结点压入最小优先队列中。

(4).重复2.3知道队列为空,这是就构造了一棵霍夫曼树。


实验步骤:

(1).先任意输入一个字符串,求出每一个字符出现的频数,存在数组freqs里面

(2).把频数数组freqs作为参数传递给create_huffman_codes函数构造霍夫曼树。

(3).打印出每一个字符,及它对应的十进制编码和二进制编码。


具体实现程序:


#include <stdio.h> 
#include <stdlib.h> 
#include <string.h> 
 
#define BYTES 256 
 
struct huffcode { 
  int nbits; 
  int code; 
}; 
typedef struct huffcode huffcode_t; 
 
struct huffheap { 
  int *h; 
  int n, s, cs; 
  long *f; 
}; 
typedef struct huffheap heap_t; 
 
/* heap handling funcs */ 
static heap_t *_heap_create(int s, long *f) 
{ 
  heap_t *h; 
  h = malloc(sizeof(heap_t)); 
  h->h = malloc(sizeof(int)*s); 
  h->s = h->cs = s; 
  h->n = 0; 
  h->f = f; 
  return h; 
} 
 
static void _heap_destroy(heap_t *heap) 
{ 
  free(heap->h); 
  free(heap); 
} 
 
#define swap_(I,J) do { int t_; t_ = a[(I)];	\ 
    a[(I)] = a[(J)]; a[(J)] = t_; } while(0) 
static void _heap_sort(heap_t *heap) 
{ 
  int i=1, j=2; /* gnome sort */ 
  int *a = heap->h; 
		 
  while(i < heap->n) { /* smaller values are kept at the end */ 
    if ( heap->f[a[i-1]] >= heap->f[a[i]] ) { 
      i = j; j++; 
    } else { 
      swap_(i-1, i); 
      i--; 
      i = (i==0) ? j++ : i; 
    } 
  } 
} 
#undef swap_ 
 
static void _heap_add(heap_t *heap, int c) 
{ 
  if ( (heap->n + 1) > heap->s ) { 
    heap->h = realloc(heap->h, heap->s + heap->cs); 
    heap->s += heap->cs; 
  } 
  heap->h[heap->n] = c; 
  heap->n++; 
  _heap_sort(heap); 
} 
 
static int _heap_remove(heap_t *heap) 
{ 
  if ( heap->n > 0 ) { 
    heap->n--; 
    return heap->h[heap->n]; 
  } 
  return -1; 
} 
 
/* huffmann code generator */ 
huffcode_t **create_huffman_codes(long *freqs) 
{ 
  huffcode_t **codes; 
  heap_t *heap; 
  long efreqs[BYTES*2]; 
  int preds[BYTES*2]; 
  int i, extf=BYTES; 
  int r1, r2; 
				 
  memcpy(efreqs, freqs, sizeof(long)*BYTES); 
  memset(&efreqs[BYTES], 0, sizeof(long)*BYTES); 
					 
  heap = _heap_create(BYTES*2, efreqs); 
  if ( heap == NULL ) return NULL; 
						 
  for(i=0; i < BYTES; i++) if ( efreqs[i] > 0 ) _heap_add(heap, i); 
						   
  while( heap->n > 1 ) 
    { 
      r1 = _heap_remove(heap); 
      r2 = _heap_remove(heap); 
      efreqs[extf] = efreqs[r1] + efreqs[r2]; 
      _heap_add(heap, extf); 
      preds[r1] = extf; 
      preds[r2] = -extf; 
      extf++; 
    } 
  r1 = _heap_remove(heap); 
  preds[r1] = r1; 
  _heap_destroy(heap); 
								   
  codes = malloc(sizeof(huffcode_t *)*BYTES); 
									 
  int bc, bn, ix; 
  for(i=0; i < BYTES; i++) { 
    bc=0; bn=0; 
    if ( efreqs[i] == 0 ) { codes[i] = NULL; continue; } 
    ix = i; 
    while( abs(preds[ix]) != ix ) { 
      bc |= ((preds[ix] >= 0) ? 1 : 0 ) << bn; 
      ix = abs(preds[ix]); 
      bn++; 
    } 
    codes[i] = malloc(sizeof(huffcode_t)); 
    codes[i]->nbits = bn; 
    codes[i]->code = bc; 
  } 
  return codes; 
} 
 
void free_huffman_codes(huffcode_t **c) 
{ 
  int i; 
	   
  for(i=0; i < BYTES; i++) free(c[i]); 
  free(c); 
} 
 
#define MAXBITSPERCODE 100 
 
void inttobits(int c, int n, char *s) 
{ 
  s[n] = 0; 
  while(n > 0) { 
    s[n-1] = (c%2) + '0'; 
    c >>= 1; n--; 
  } 
} 
 
const char *test = "hellohellohello"; 
 
int main() 
{ 
  huffcode_t **r; 
  int i; 
  char strbit[MAXBITSPERCODE]; 
  const char *p; 
  long freqs[BYTES]; 
			   
  memset(freqs, 0, sizeof freqs); 
				 
  p = test; 
  while(*p != '\0') freqs[*p++]++; 
					 
  r = create_huffman_codes(freqs); 
					   
  for(i=0; i < BYTES; i++) { 
    if ( r[i] != NULL ) { 
      inttobits(r[i]->code, r[i]->nbits, strbit); 
      printf("%c (%d) %s\n", i, r[i]->code, strbit); 
    } 
  } 
						 
  free_huffman_codes(r); 
						   
  return 0; 
}

测试字符串是hellohellohello”

运行结果:

e(1) 01

h(0) 000

l(1) 1

o(1) 001


如果要用huffman编码来实现文本压缩的话只要把每一个字符与它对应的码字构造一个字典放在压缩文件中,再把所有字符的码字输入到压缩文件中,这样就能实现压缩,如果要解压的话在字典中查一下码字对应的字符就能解压了,暂时不太熟悉文本操作,什么时候时间多了再去完善一下。

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
以下是 Python 代码实现: ```python from heapq import heapify, heappush, heappop from collections import defaultdict from typing import Dict, List, Tuple def huffman_encode(s: str) -> Tuple[str, Dict[str, str]]: freq = defaultdict(int) for c in s: freq[c] += 1 # 构建 Huffman 树 heap = [[wt, [sym, ""]] for sym, wt in freq.items()] heapify(heap) while len(heap) > 1: lo = heappop(heap) hi = heappop(heap) for pair in lo[1:]: pair[1] = "0" + pair[1] for pair in hi[1:]: pair[1] = "1" + pair[1] heappush(heap, [lo[0] + hi[0]] + lo[1:] + hi[1:]) # 生成编码表 code = dict(heap[0][1:]) # 对字符串进行编码 encoded = "".join(code[ch] for ch in s) return encoded, code def huffman_decode(encoded: str, code: Dict[str, str]) -> str: # 反转编码表 decode = {v: k for k, v in code.items()} # 对编码字符串进行解码 current_code = "" decoded = "" for bit in encoded: current_code += bit if current_code in decode: decoded += decode[current_code] current_code = "" return decoded # 测试 s = "hello world" encoded, code = huffman_encode(s) print(f"Encoded: {encoded}") print(f"Code: {code}") decoded = huffman_decode(encoded, code) print(f"Decoded: {decoded}") ``` 输出: ``` Encoded: 1101100011000110101011110100111000110111010001101111100011001000 Code: {'h': '1100', 'e': '10', 'l': '011', 'o': '111', ' ': '00', 'w': '11010', 'r': '100', 'd': '010', 'l': '011'} Decoded: hello world ``` 注意,如果输入字符串中有重复的字符,需要将它们区分开来,否则解码时会出现歧义。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值