cuda c并行做哈弗曼编码

统计可见字符频率

找一篇英文文章in.txt,统计各种可见字符的出现频率,并保存到文件out.txt:

#include<iostream>
using namespace std;
int main()
{
    int a[256]={0};
    //for()
    freopen("in.txt","r",stdin);
    freopen("out.txt","w",stdout);
    char c;
    while(scanf("%c",&c)!=EOF)
    {
        a[(int)c]++;                         
    }   
    for(int i=0;i<128;i++)
    {
        c=char(i);       
        //cout<<i<<" "<<c<<" ";
        cout<<a[i]<<endl;
    }
}
哈弗曼编码

1 根据字符频率生成哈弗曼树

2 遍历哈弗曼树生成哈弗曼编码

3 从文件读入待编码序列

4 gpu分段生成待编码序列的哈弗曼编码

5 哈弗曼编码段组合并输出到文件

#include<iostream>
#include<fstream>
#include<queue>
using namespace std;
#define N 128  //max number of kinds of charecters to covert_encode
#define M 4    //number of charecters to encode in a thread
#define MM 20000 //max length of text to encode
#define HANDLE_ERROR(x) {if(x!=0) printf("error is caught on gpu: %d\n",x);}
struct node
{
    int v;
    char c;
    node *lchild;
    node *rchild; 
};
struct cmp
{
       bool operator()(const node &p1,const node &p2)
       {return p1.v>p2.v;} 
};

//根据哈弗曼编码规则dic,对待编码窜str进行编码,并将结果保存到part中
__global__ void cuda_fun_encode(char *dic,int *dicl,char *str,int *textlen,char *part,int *partl,int *threadNum)
{
    int tid=threadIdx.x;
    if(tid<(*threadNum))
    {
        partl[tid]=0;
        for(int i=tid*M;i<(tid+1)*M&&i<(*textlen);i++)
        {
            for(int j=0;j<dicl[str[i]];j++)
     {
                part[M*N*tid+(partl[tid]++)]=dic[(str[i])*N+j];
            } 
        }
       
    }
}

//生成哈弗曼树并得到哈弗曼编码
void search(node *root,string code,string *dic,int *dicl)
{  
    if(root->lchild!=NULL)
        search(root->lchild,code+"0",dic,dicl);
    if(root->rchild!=NULL)
        search(root->rchild,code+"1",dic,dicl);
    if(root->c==30)
        return;
    else
    {
        char c=root->c;
        cout<<c<<" "<<code<<endl;
        dic[c]=code;
        dicl[c]=code.length();
    }
}

int main()
{
    //begin*** get a encode scheme and store it in dic,dev_dic
    freopen("fre.txt","r",stdin);
    int freq[N];
    for(int i=0;i<N;i++)
        cin>>freq[i];
  
    priority_queue <node,vector<node>,cmp> q;
    for(int i=0;i<N;i++)
    {
        if(freq[i]==0)   
            continue;
 node oo;
        oo.v=freq[i];
        oo.c=char(i);
        oo.lchild=NULL;
        oo.rchild=NULL;
        q.push(oo);             
    }
    while(q.size()>1)
    {
        node *o,*l,*r;
        o=new node;
        l=new node;
        r=new node;
        *l=q.top();
        q.pop();
        *r=q.top();
        q.pop();
        o->lchild=l;
        o->rchild=r;
        o->v=l->v+r->v;
        o->c=30;//30 is an invisible character
        q.push(*o);
    }
   
    string dic[N];
    char dic2[N][N];
    int dicl[N];

//初始化
for(int i=0;i<N;i++)
{
dic[i] = "";
dicl[i] = 0;
for(int j=0;j<N;j++)
{
dic2[i][j] = 0;
}
}
    char *dev_dic;
    int *dev_dicl;
    HANDLE_ERROR(cudaMalloc((void**)&dev_dic, sizeof(char)*N*N));
    HANDLE_ERROR(cudaMalloc((void**)&dev_dicl,sizeof(int)*N));
    string code="";
    node head=q.top();
    search(&head,code,dic,dicl);
       
    for(int i=0;i<N;i++)
    {
        for(int j=0;j<dic[i].length();j++)
        {
     dic2[i][j]=dic[i][j];
 }
    }
    printf("1111*********************\n");
    HANDLE_ERROR( cudaMemcpy(dev_dic,dic2, sizeof(char)*N*N,cudaMemcpyHostToDevice) );
    HANDLE_ERROR( cudaMemcpy(dev_dicl,dicl,sizeof(int)*N,cudaMemcpyHostToDevice) );
    printf("2222*********************\n");
    //end***get a encode scheme and store it in dic,dev_dic
    //begin*** read textfile into string (dev_)str,length is (dev_)textlen.
    char str[MM];
    char *dev_str;// store a copy of str in device
    int i=0;
    int textlen;//length of the text
    int *dev_textlen;//length of the text on gpu
    freopen("text.txt","r",stdin);
    while(scanf("%c",&str[i])!=EOF)
        printf("%c",str[i++]);
    textlen=i-1;
    printf("3333*********************\n");
    HANDLE_ERROR(cudaMalloc((void**)&dev_str,sizeof(char)*MM));
    HANDLE_ERROR(cudaMalloc((void**)&dev_textlen,sizeof(int)));
    HANDLE_ERROR(cudaMemcpy(dev_str,str,sizeof(char)*MM,cudaMemcpyHostToDevice));   
    HANDLE_ERROR(cudaMemcpy(dev_textlen,&textlen,sizeof(int),cudaMemcpyHostToDevice));   
    printf("4444*********************\n");
    //end*** read textfile into string (dev_)str,length is (dev_)textlen.
   
    //begin*** divide and encode
    int threadNum=1+(textlen-1)/M;
    int *dev_threadNum;
    char part[N*M*threadNum];//[N*M*MM];
    char *dev_part;
    int partl[threadNum];
    int *dev_partl;
  
    printf("5555*********************\n");
    HANDLE_ERROR(cudaMalloc((void**)&dev_threadNum,sizeof(int)));
    HANDLE_ERROR(cudaMemcpy(dev_threadNum,&threadNum,sizeof(int),cudaMemcpyHostToDevice) );
    HANDLE_ERROR(cudaMalloc((void**)&dev_part,sizeof(char)*N*M*threadNum));
    HANDLE_ERROR(cudaMalloc((void**)&dev_partl,sizeof(int)*(threadNum)));
    printf("6666*********************\n");

    cuda_fun_encode<<<1,threadNum>>>(dev_dic,dev_dicl,dev_str,dev_textlen,dev_part,dev_partl,dev_threadNum);

    printf("7777*********************\n");
    HANDLE_ERROR( cudaMemcpy(partl,dev_partl,sizeof(int)*threadNum,cudaMemcpyDeviceToHost) );
    HANDLE_ERROR( cudaMemcpy(part,dev_part,sizeof(char)*N*M*threadNum,cudaMemcpyDeviceToHost) );
    printf("8888*********************\n");
    //end*** divide and encode

    //begin*** show
    printf("#############################################\n");
    printf("the length of text is %d\n",textlen);
    for(int i=0;i<threadNum;i++)
    {
        printf("the length of part[%d] is %d\n",i,partl[i]);
        for(int j=0;j<partl[i];j++)
            printf("%c",part[i*N*M+j]);
        printf("\n");
    }
    printf("#############################################\n");
    //show*** show
    freopen("encode.txt","w",stdout);
    for(int i=0;i<threadNum;i++)
        for(int j=0;j<partl[i];j++)
         printf("%c",part[i*N*M+j]);

    cudaFree(dev_dic);
    cudaFree(dev_dicl);
    cudaFree(dev_str);
    cudaFree(dev_textlen);
    cudaFree(dev_threadNum);
    cudaFree(dev_part);
    cudaFree(dev_partl);

 

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
哈夫编码是一种数据压缩技术,它通过对不同字符的出现频率进行编码,使得出现频率高的字符用较短的编码表示,出现频率低的字符用较长的编码表示,从而达到压缩数据的目的。 哈夫编码的过程可以分为两个主要步骤: 1. 统计字符出现频率:首先需要统计原始数据中不同字符出现的频率,并将其构建成一颗哈夫树。 2. 生成编码表:然后根据哈夫树生成每个字符对应的编码表,即将每个字符映射到一个哈夫编码上。 以下是一个简单的C语言程序实现哈夫编码: ``` #include <stdio.h> #include <stdlib.h> #include <string.h> #define MAX_TREE_HT 100 struct MinHeapNode { char data; unsigned freq; struct MinHeapNode *left, *right; }; struct MinHeap { unsigned size; unsigned capacity; struct MinHeapNode **array; }; struct MinHeapNode *newNode(char data, unsigned freq) { struct MinHeapNode *temp = (struct MinHeapNode*)malloc(sizeof(struct MinHeapNode)); temp->left = temp->right = NULL; temp->data = data; temp->freq = freq; return temp; } struct MinHeap *createMinHeap(unsigned capacity) { struct MinHeap *minHeap = (struct MinHeap*)malloc(sizeof(struct MinHeap)); minHeap->size = 0; minHeap->capacity = capacity; minHeap->array = (struct MinHeapNode**)malloc(minHeap->capacity * sizeof(struct MinHeapNode*)); return minHeap; } void swapMinHeapNode(struct MinHeapNode **a, struct MinHeapNode **b) { struct MinHeapNode *t = *a; *a = *b; *b = t; } void minHeapify(struct MinHeap *minHeap, int idx) { int smallest = idx; int left = 2*idx + 1; int right = 2*idx + 2; if (left < minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq) smallest = left; if (right < minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq) smallest = right; if (smallest != idx) { swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]); minHeapify(minHeap, smallest); } } int isSizeOne(struct MinHeap *minHeap) { return (minHeap->size == 1); } struct MinHeapNode *extractMin(struct MinHeap *minHeap) { struct MinHeapNode *temp = minHeap->array; minHeap->array = minHeap->array[minHeap->size - 1]; --minHeap->size; minHeapify(minHeap, 0); return temp; } void insertMinHeap(struct MinHeap *minHeap, struct MinHeapNode *minHeapNode) { ++minHeap->size; int i = minHeap->size - 1; while (i && minHeapNode->freq < minHeap->array[(i - 1)/2]->freq) { minHeap->array[i] = minHeap->array[(i - 1)/2]; i = (i - 1)/2; } minHeap->array[i] = minHeapNode; } void buildMinHeap(struct MinHeap *minHeap) { int n = minHeap->size - 1; int i; for (i = (n - 1)/2; i >= 0; --i) minHeapify(minHeap, i); } void printArr(int arr[], int n) { int i; for (i = 0; i < n; ++i) printf("%d", arr[i]); printf("\n"); } int isLeaf(struct MinHeapNode *root) { return !(root->left) && !(root->right); } struct MinHeap *createAndBuildMinHeap(char data[], int freq[], int size) { struct MinHeap *minHeap = createMinHeap(size); for (int i = 0; i < size; ++i) minHeap->array[i] = newNode(data[i], freq[i]); minHeap->size = size; buildMinHeap(minHeap); return minHeap; } struct MinHeapNode *buildHuffmanTree(char data[], int freq[], int size) { struct MinHeapNode *left, *right, *top; struct MinHeap *minHeap = createAndBuildMinHeap(data, freq, size); while (!isSizeOne(minHeap)) { left = extractMin(minHeap); right = extractMin(minHeap); top = newNode('$', left->freq + right->freq); top->left = left; top->right = right; insertMinHeap(minHeap, top); } return extractMin(minHeap); } void printCodes(struct MinHeapNode *root, int arr[], int top) { if (root->left) { arr[top] = 0; printCodes(root->left, arr, top + 1); } if (root->right) { arr[top] = 1; printCodes(root->right, arr, top + 1); } if (isLeaf(root)) { printf("%c: ", root->data); printArr(arr, top); } } void HuffmanCodes(char data[], int freq[], int size) { struct MinHeapNode *root = buildHuffmanTree(data, freq, size); int arr[MAX_TREE_HT], top = 0; printCodes(root, arr, top); } int main() { char data[] = {'a', 'b', 'c', 'd', 'e', 'f'}; int freq[] = {5, 9, 12, 13, 16, 45}; int size = sizeof(data)/sizeof(data); HuffmanCodes(data, freq, size); return 0; } ``` 以上程序会输出每个字符对应的哈夫编码

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值