利用哈夫曼编码进行文件压缩

利用哈夫曼编码进行文件压缩(原创)
过程大概是这样的。
压缩过程:读入文件,统计字符:在我的程序里面是以BYTE类型而不是char类型,这样可以压缩除文本文件之外的其他文件。然后建立哈夫曼树。存入压缩文件的时候我先存的原文件总的BYTE数,用了4个字节去存。之所以这么做,是因为文本在被压缩后,最后一部分不一定能占满一个字节,但是存进去的时候必须要按一个字节存进去,这样就会造成解压的时候产生多余的信息。然后将哈夫曼树对应的静态链表存进去,我用1和0分别表示他的左右孩子指向的是其他的中间结点还是叶子(文件中的字符)。然后按照哈夫曼编码把源文件的内容放到了压缩文件。在处理这个静态链表的时候我犯傻了,应该把指示左右孩子的tag用位操作把他们都单独放在一处,而不是一个tag就是一个BYTE,这样很浪费空间的。在建立哈弗曼树的时候需注意文件只有一种字符的情况。我用的是把头节点的左右孩子都指向这个字符。还有一个办法是把这个字符的数量存下来,就压缩数量和字符,不过这样的解码方式不一样。我比较懒就用的第一种方式。
解压过程:根据哈夫曼编码的静态链表进行解压。对于编码来说,0表示走向左孩子,1表示走向右孩子。比如说我的a的哈夫曼编码是10,则我从静态链表的头开始找起,先是“1”,那我选择右孩子,然后是“0”,他的左孩子就是'a'。这样就能解压了。
核心代码如下:
typedef struct st
{
 short Letter ;
 int  Frequent ;
 st   *left, *right ;
} HuffmanNode, *pHuffman ;
typedef struct CodeLList
{
 HuffmanNode Node ;
 BYTE left, right ;         
 BYTE lefttag, righttag ; // if tag == 0, means that it is a index.
}CodeLList ;

    case ID_COMPRESSBUTTON :
    {
     used = 0 ;
     HuffmanNode temp ;
     char buffer[258] ;
     Node = new pHuffman[256] ;
     FileByteLen = 0 ;
     for ( i = 0 ; i < 256 ; i++ )
     {
      ElementCount[i] = 0 ;
     }
    
     GetWindowText( hEditFile, szFileName, 50 ) ;
     if ( szFileName[0] == '/0' )
     {
      MessageBox( hwnd, "请输入文件名", "Warning!",
       MB_OK | MB_ICONWARNING ) ;
      return 0 ;
     }
     else
      fp = fopen( szFileName, "rb" ) ;
     if ( !fp )
     {
      MessageBox( hwnd, TEXT( "找不到该文件" ), TEXT( "Warning!" ),
       MB_OK | MB_ICONWARNING ) ;
      return 0 ;
     }
     while ( !feof( fp ) )
     {
      fread( &character, sizeof( BYTE ), 1, fp ) ;
      if ( feof( fp ) )
       break ;
      ElementCount[character]++ ;
      FileByteLen++ ;
     }

     for ( i = 0 ; i < 256 ; i++ )
     {
      if ( ElementCount[i] > 0 )
      {
       Node[used] = new HuffmanNode ;
       Node[used]->Frequent = ElementCount[i] ;
       Node[used]->Letter = i ;
       Node[used]->left = Node[used]->right = NULL ;
       used++ ;
      }
     }
     // 从小到大排序
     for ( i = 0 ; i < used - 1 ; i++ )
      for ( j = i + 1 ; j < used ; j++ )
      {
       if ( Node[i] > Node[j] )
       {
        temp = *Node[i] ;
        *Node[i] = *Node[j] ;
        *Node[j] = temp ;
       }
      }
     // 生成哈夫曼树
     begin = 0 ;
     while ( begin < used - 1 )
     {
      p = new HuffmanNode ;
      p->Letter = -1 ;
      p->Frequent = Node[begin]->Frequent + Node[begin + 1]->Frequent ;
      p->left = Node[begin] ;
      p->right = Node[begin + 1] ;
      for ( i = begin + 1 ; i < used - 1; i++ )
      {
       if ( Node[i + 1]->Frequent < p->Frequent )
        Node[i] = Node[i + 1] ;
       else
        break ;
      }
      if ( begin + 2 >= used )
       Node[begin + 1] = p ;
      else
       Node[i] = p ;
      
      begin++ ;
     }
     
     if ( used > 1 )
     {
      Head = p ;
     
      BuildCode( Head, buffer, 0 ) ;
     }
     else
     {
      Code[Node[0]->Letter][0] = '0' ;
      Code[Node[0]->Letter][1] = '/0' ;
     }
     
     len = strlen( szFileName ) ;
     szFileName[len] = '.' ;
     szFileName[len + 1] = 's' ;
     szFileName[len + 2] = 'l' ;
     szFileName[len + 3] = '/0' ;
     fpDest = fopen( szFileName, "wb+" ) ;
     
     // 开始写入压缩文件
     fwrite( &FileByteLen, sizeof( int ), 1, fpDest ) ;
     // 哈夫曼码表
     if ( used > 1 ) // 对只有一个字符种类的文章要特殊处理。
      count = ConvertTreeList( Head ) ;
     else
     {
      count = 1 ;
      CodeLinkList[0].left = Node[0]->Letter ;
      CodeLinkList[0].right = Node[0]->Letter ;
      CodeLinkList[0].righttag = CodeLinkList[0].lefttag = 1 ;
     }
     fwrite( &count, sizeof( BYTE ), 1, fpDest ) ;
     for ( i = 0 ; i < count ; i++ )
     {
      fwrite( &CodeLinkList[i].left, sizeof( BYTE ), 1, fpDest ) ;
      fwrite( &CodeLinkList[i].lefttag, sizeof( BYTE ), 1, fpDest) ;
      fwrite( &CodeLinkList[i].right, 1, sizeof( BYTE ), fpDest ) ;
      fwrite( &CodeLinkList[i].righttag, sizeof( BYTE ), 1, fpDest) ;
     }
     
     // 编码
     bitcount = 0 ;
     codebyte = 0 ;
     fseek( fp, 0, SEEK_SET ) ;
     while ( !feof( fp ) )
     {
      character = fgetc( fp ) ;
      if ( character < 0 )
       break ;
      len = strlen( Code[character] ) ;
      for ( i = 0 ; i < len ; i++ )
      {
       if ( bitcount == 8 )
       {
        fwrite( &codebyte, sizeof( BYTE ), 1, fpDest ) ;
        codebyte = 0 ;
        bitcount = 0 ;
       }
       codebyte *= 2 ;
       if ( Code[character][i] == '1' )
        codebyte++ ;
       bitcount++ ;
      }
     }
     if ( bitcount <  8 )
     {
      while ( bitcount < 8 )
      {
       codebyte *= 2 ;
       bitcount++ ;
      }
      fwrite( &codebyte, sizeof( BYTE ), 1, fpDest ) ;
     }
     
     fclose( fp ) ;
     fclose( fpDest ) ;
     return 0 ;
    } break ;
    case ID_EXPANDBUTTON :
    {
     BYTE currentindex ;
     bool ByteContent[8] ;
     GetWindowText( hEditFile, szFileName, 50 ) ;
     len = strlen( szFileName ) ;
     if ( szFileName[0] == '/0' )
     {
      MessageBox( hwnd, "请输入文件名", "Warning!",
       MB_OK | MB_ICONWARNING ) ;
      return 0 ;
     }
     else if ( szFileName[len - 1] != 'l' ||
      szFileName[len - 2] != 's' ||
      szFileName[len - 3] != '.' || len == 3)
     {
      MessageBox( hwnd, "文件类型不对", "Warning!",
       MB_OK | MB_ICONWARNING ) ;
      return 0 ;
     }
     else
      fp = fopen( szFileName, "rb" ) ;
     if ( !fp )
     {
      MessageBox( hwnd, TEXT( "找不到该文件" ), TEXT( "Warning!" ),
       MB_OK | MB_ICONWARNING ) ;
      return 0 ;
     }
     fread( &FileByteLen, sizeof( int ), 1, fp ) ;
     fread( &count, sizeof( BYTE ), 1, fp ) ;
     for ( i = 0 ; i < count ; i++ )
     {
      fread( &CodeLinkList[i].left, sizeof( BYTE ), 1, fp ) ;
      fread( &CodeLinkList[i].lefttag, sizeof( BYTE ), 1, fp ) ;
      fread( &CodeLinkList[i].right, sizeof( BYTE ), 1, fp ) ;
      fread( &CodeLinkList[i].righttag, sizeof( BYTE ), 1, fp ) ;
     }
     
     // 得到解压后的文件名
     szFileName[len - 3] = '/0' ;
     fpDest = fopen( szFileName, "wb+" ) ;
     
     currentindex = 0 ;
     
     while ( !feof( fp ) && FileByteLen )
     {
      fread( &codebyte, 1, 1, fp ) ;
      for ( i = 0 ; i < 8 ; i++ )
      {
       ByteContent[7 - i] = codebyte & 1 ;
       codebyte /= 2 ;
      }
      
      for ( i = 0 ; i < 8 ; i++ )
      {
       if ( !ByteContent[i] ) // 左子树
       {
        if ( CodeLinkList[currentindex].lefttag )
        {
         //fwrite( &CodeLinkList[currentindex].left, sizeof(
BYTE ), 1, fpDest ) ;
         fputc( CodeLinkList[currentindex].left, fpDest ) ;
         FileByteLen-- ;
         currentindex = 0 ;
        }
        else
         currentindex = CodeLinkList[currentindex].left ;
       }
       else
       {
        if ( CodeLinkList[currentindex].righttag )
        {
         //fwrite( &CodeLinkList[currentindex].right, sizeof(
BYTE ), 1, fpDest ) ;
         fputc( CodeLinkList[currentindex].right, fpDest ) ;
         
         FileByteLen-- ;
         currentindex = 0 ;
        }
        else
         currentindex = CodeLinkList[currentindex].right ;
       }
       if ( !FileByteLen )
        break ;
      }
     }
     
     fclose( fp ) ;
     fclose( fpDest ) ;
     return 0 ;
    } break ;
   }

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
哈夫曼编码是一种常用的文件压缩算法,通过将频率较低的字符用较短的编码表示,而频率较高的字符用较长的编码表示,从而减少文件的大小。下面是一个利用哈夫曼编码进行文件压缩的C语言完整代码: ```c #include <stdio.h> #include <stdlib.h> #include <string.h> #define MAX_TREE_HT 100 struct MinHeapNode { char data; unsigned freq; struct MinHeapNode *left, *right; }; struct MinHeap { unsigned size; unsigned capacity; struct MinHeapNode **array; }; struct MinHeapNode* newNode(char data, unsigned freq) { struct MinHeapNode* temp = (struct MinHeapNode*)malloc(sizeof(struct MinHeapNode)); temp->left = temp->right = NULL; temp->data = data; temp->freq = freq; return temp; } struct MinHeap* createMinHeap(unsigned capacity) { struct MinHeap* minHeap = (struct MinHeap*)malloc(sizeof(struct MinHeap)); minHeap->size = 0; minHeap->capacity = capacity; minHeap->array = (struct MinHeapNode**)malloc(minHeap->capacity * sizeof(struct MinHeapNode*)); return minHeap; } void swapMinHeapNode(struct MinHeapNode** a, struct MinHeapNode** b) { struct MinHeapNode* t = *a; *a = *b; *b = t; } void minHeapify(struct MinHeap* minHeap, int idx) { int smallest = idx; int left = 2 * idx + 1; int right = 2 * idx + 2; if (left < minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq) smallest = left; if (right < minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq) smallest = right; if (smallest != idx) { swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]); minHeapify(minHeap, smallest); } } int isSizeOne(struct MinHeap* minHeap) { return (minHeap->size == 1); } struct MinHeapNode* extractMin(struct MinHeap* minHeap) { struct MinHeapNode* temp = minHeap->array[0]; minHeap->array[0] = minHeap->array[minHeap->size - 1]; --minHeap->size; minHeapify(minHeap, 0); return temp; } void insertMinHeap(struct MinHeap* minHeap, struct MinHeapNode* minHeapNode) { ++minHeap->size; int i = minHeap->size - 1; while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq) { minHeap->array[i] = minHeap->array[(i - 1) / 2]; i = (i - 1) / 2; } minHeap->array[i] = minHeapNode; } void buildMinHeap(struct MinHeap* minHeap) { int n = minHeap->size - 1; int i; for (i = (n - 1) / 2; i >= 0; --i) minHeapify(minHeap, i); } void printArr(int arr[], int n) { int i; for (i = 0; i < n; ++i) printf("%d", arr[i]); printf("\n"); } int isLeaf(struct MinHeapNode* root) { return !(root->left) && !(root->right); } struct MinHeap* createAndBuildMinHeap(char data[], int freq[], int size) { struct MinHeap* minHeap = createMinHeap(size); for (int i = 0; i < size; ++i) minHeap->array[i] = newNode(data[i], freq[i]); minHeap->size = size; buildMinHeap(minHeap); return minHeap; } struct MinHeapNode* buildHuffmanTree(char data[], int freq[], int size) { struct MinHeapNode *left, *right, *top; struct MinHeap* minHeap = createAndBuildMinHeap(data, freq, size); while (!isSizeOne(minHeap)) { left = extractMin(minHeap); right = extractMin(minHeap); top = newNode('$', left->freq + right->freq); top->left = left; top->right = right; insertMinHeap(minHeap, top); } return extractMin(minHeap); } void printCodes(struct MinHeapNode* root, int arr[], int top) { if (root->left) { arr[top] = 0; printCodes(root->left, arr, top + 1); } if (root->right) { arr[top] = 1; printCodes(root->right, arr, top + 1); } if (isLeaf(root)) { printf("%c: ", root->data); printArr(arr, top); } } void HuffmanCodes(char data[], int freq[], int size) { struct MinHeapNode* root = buildHuffmanTree(data, freq, size); int arr[MAX_TREE_HT], top = 0; printCodes(root, arr, top); } int main() { char data[] = {'a', 'b', 'c', 'd', 'e', 'f'}; int freq[] = {5, 9, 12, 13, 16, 45}; int size = sizeof(data) / sizeof(data[0]); printf("Huffman Codes:\n"); HuffmanCodes(data, freq, size); return 0; } ``` 以上代码是一个简单的哈夫曼编码文件压缩的C语言实现。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值