哈夫曼树的c语言实现

哈夫曼树的c语言实现

#include <stdio.h>
#include <stdlib.h>
#include <memory.h>

#define MAX_CODE_LENGTH 40

long long vocab_max_size = 1000, 
     vocab_size = 0, 
     layer1_size = 100;


/**
 * word与Huffman树编码
 */
struct vocab_word {
  long long cn;     // 词在训练集中出现的频率
  int *point;       // 编码的节点路径
  char *word,       // 词
       *code,       // Huffman编码,每一位上,0或1
       codelen;     // Huffman编码长度
};


struct vocab_word *vocab;

/*
 * 打印构造过程的中间状态.
 */ 
void printState(long long* count, 
            long long* binary,
            long long* parent_node) {
  printf("count[]:\t");
  for(int x=0; x<vocab_size * 2; x++) {
    printf("%lld", count[x]);
    printf(" ");
  }
  printf("\n");

  printf("binary[]:\t");
  for(int x=0; x<vocab_size * 2; x++) {
    printf("%lld", binary[x]);
    printf(" ");
  }
  printf("\n");

  printf("parent[]:\t");
  for(int x=0; x<vocab_size * 2; x++) {
    printf("%lld", parent_node[x]);
    printf(" ");
  }
  printf("\n");

}


/**
 * 使用词频创建一棵的Huffman树. 频率高的字将具有更短的
 * Huffman二进制码(binary code).
 *
 */
// Create binary Huffman tree using the word counts
// Frequent words will have short uniqe binary codes
void CreateBinaryTree() {
  long long a, 
       b, 
       i, 
       min1i, 
       min2i, 
       pos1, 
       pos2, 
       point[MAX_CODE_LENGTH];


  char code[MAX_CODE_LENGTH];


  // count: 词频.
  // binary:
  // parent_node: 
  long long *count = (long long *)calloc(vocab_size * 2 + 1, sizeof(long long));
  long long *binary = (long long *)calloc(vocab_size * 2 + 1, sizeof(long long));
  long long *parent_node = (long long *)calloc(vocab_size * 2 + 1, sizeof(long long));

  // 初始化count数组的前一半
  for (a = 0; a < vocab_size; a++) {
      count[a] = vocab[a].cn;
      //printf("count=%lld\n", count[a]);
  }

  // 初始化count数组的后一半,用于交换. 赋很大值.
  for (a = vocab_size; a < vocab_size * 2; a++) {
      count[a] = 1e15;
      //printf("count=%lld\n", count[a]);
  }

  // 
  pos1 = vocab_size - 1;
  pos2 = vocab_size;
  //printf("pos1=%lld, pos2=%lld\n", pos1, pos2);

  printState(count, binary, parent_node);  

  // 根据算法构建Huffman树,一次增加一个节点.
  // Following algorithm constructs the Huffman tree by adding one node at a time
  for (a = 0; a < vocab_size - 1; a++) {

    printf("----------------\n");
    printf("pos1=%lld, pos2=%lld\n", pos1, pos2);

    // 每轮找到最小的两个值.
    // First, find two smallest nodes 'min1, min2'
    if (pos1 >= 0) {

      // 遍历所有词汇的count,比较count;取较小值.
      if (count[pos1] < count[pos2]) {
        min1i = pos1;
        pos1--;
      } else {
        min1i = pos2;
        pos2++;
      }
    } else {  
      min1i = pos2;
      pos2++;
    }

    printf("min1i=%d, min2i=%d\n", min1i, min2i);
    printf("pos1=%lld, pos2=%lld\n", pos1, pos2);
    // 再比一次.
    if (pos1 >= 0) {
      if (count[pos1] < count[pos2]) {
        min2i = pos1;
        pos1--;
      } else {
        min2i = pos2;
        pos2++;
      }
    } else {
      min2i = pos2;
      pos2++;
    }

    // 最小值cnt的两个索引
    printf("min1i=%d, min2i=%d\n", min1i, min2i);
    printf("count[min1i]=%d, count[min2i]=%d\n", count[min1i], count[min2i]);

    count[vocab_size + a] = count[min1i] + count[min2i];
    parent_node[min1i] = vocab_size + a;
    parent_node[min2i] = vocab_size + a;
    binary[min2i] = 1;

    printf("count[vocab_size + a] = %d\n", count[vocab_size + a]);
    printf("parent_node[%d] = %d\n", min1i, parent_node[min1i]);
    printf("parent_node[%d] = %d\n", min2i, parent_node[min2i]);
    printf("binary[%d] = %d\n", min2i, binary[min2i]);
    
    printState(count, binary, parent_node);  

  }

  // 将二进制编码分配给词汇表中每个词汇.
  // Now assign binary code to each vocabulary word
  for (a = 0; a < vocab_size; a++) {
    b = a;
    i = 0;

    while (1) {
      code[i] = binary[b];
      point[i] = b;
      i++;
      b = parent_node[b];
      if (b == vocab_size * 2 - 2) break;
    }

    // 得到huffman编码长度.
    vocab[a].codelen = i;

    // 得到huffman编码code及路径point.
    vocab[a].point[0] = vocab_size - 2;
    
    for (b = 0; b < i; b++) {
      vocab[a].code[i - b - 1] = code[b];
      vocab[a].point[i - b] = point[b] - vocab_size;
    }
  }

  // 释放内存.
  free(count);
  free(binary);
  free(parent_node);
}

/**
 * 代码运行:
 * gcc ./huffman_tree.cpp; ./a.out
 */
int main()
{
    vocab_size = 6; 
    vocab = (vocab_word*) calloc(vocab_size, sizeof(vocab_word));
    memset(vocab, 0, sizeof(vocab_word) * (vocab_size));

    // 初始化code/point.
    for (int a = 0; a < vocab_size; a++) {
        vocab[a].code = (char *)calloc(MAX_CODE_LENGTH, sizeof(char));
        vocab[a].point = (int *)calloc(MAX_CODE_LENGTH, sizeof(int));
    }

    // 事先对vocab按词频排好序(word2vec事先已经用qsort处理),从大到小排序.
    // 可以用qsort。 本代码直接已经人工排好序了.
    vocab[0].cn = 7;
    char* str = "T";
    vocab[0].word = str;

    vocab[1].cn = 5;
    str = "E";
    vocab[1].word = str;

    vocab[2].cn = 4;
    str = "G";
    vocab[2].word = str;

    vocab[3].cn = 4;
    str = "R";
    vocab[3].word = str;

    vocab[4].cn = 3;
    str = "O";
    vocab[4].word = str;

    vocab[5].cn = 2;
    str = "F";
    vocab[5].word = str;


    CreateBinaryTree();

    for (int a = 0; a < vocab_size; a++) {
        printf("word=%s\t", vocab[a].word);
        printf("cn=%d\t", vocab[a].cn);
        printf("codelen=%d\t", vocab[a].codelen);
        
        printf("code=");
        for(int i = 0; i < vocab[a].codelen; i++) {
            printf("%d", vocab[a].code[i]);
        }
        printf("\t");

        printf("point=");
        for(int i = 0; i < vocab[a].codelen; i++) {
            printf("%d-", vocab[a].point[i]);
        }
        
        printf("\n");
         //printf("point=%s\n", vocab[a].point);

    }

    
    
	return 0;
}


  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
以下是一个简单的哈夫曼树的 C 语言实现,不使用结构体: ```c #include <stdio.h> #include <stdlib.h> // 定义哈夫曼树节点结构体 typedef struct huf_node { int weight; // 权值 int parent; // 双亲节点下标 int lchild; // 左孩子节点下标 int rchild; // 右孩子节点下标 } huf_node; // 选择权值最小的两个节点 void select_min(huf_node *ht, int n, int *s1, int *s2) { int i; *s1 = *s2 = -1; for (i = 0; i < n; i++) { if (ht[i].parent == -1) { if (*s1 == -1 || ht[i].weight < ht[*s1].weight) { *s2 = *s1; *s1 = i; } else if (*s2 == -1 || ht[i].weight < ht[*s2].weight) { *s2 = i; } } } } // 构建哈夫曼树 void create_huffman(huf_node *ht, int *w, int n) { int i, s1, s2; for (i = 0; i < n; i++) { ht[i].weight = w[i]; ht[i].parent = -1; ht[i].lchild = -1; ht[i].rchild = -1; } for (i = n; i < 2 * n - 1; i++) { select_min(ht, i, &s1, &s2); ht[s1].parent = i; ht[s2].parent = i; ht[i].lchild = s1; ht[i].rchild = s2; ht[i].weight = ht[s1].weight + ht[s2].weight; } } int main() { int i, n; int *w; huf_node *ht; printf("请输入权值个数:"); scanf("%d", &n); w = (int *)malloc(n * sizeof(int)); ht = (huf_node *)malloc((2 * n - 1) * sizeof(huf_node)); printf("请输入%d个权值:", n); for (i = 0; i < n; i++) { scanf("%d", &w[i]); } create_huffman(ht, w, n); printf("哈夫曼树:\n"); for (i = 0; i < 2 * n - 1; i++) { printf("%d %d %d %d\n", ht[i].weight, ht[i].parent, ht[i].lchild, ht[i].rchild); } free(w); free(ht); return 0; } ```
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值