利用桶函数和哈希函数对一篇英文文章中词频进行排序

先用一个二维数组(char**)存储所有单词列表,然后用哈希函数计算每个单词的哈希值,存入哈希表中。然后遍历哈希表,同时进行桶排序,以链表的形式把二维数组中每个单词的地址存在桶里。最后输出桶中数据即可。
本函数在code::blocks17.12中运行正常。

#include <stdio.h>
#include <stdlib.h>
#define wordnum 450
#define tablecapacity 1087
#define barrelcapacity 40

//哈希表内每个元素
typedef struct HashEntity{
    char* s;//该字符串
    int times;//该字符出现的次数(题目要求)
    int distance;//存在哈希表中的偏移位置。默认为0
    int deleted;//1表示被删除,0表示未被删除
}HashEntity;

//哈希表
typedef struct HashTable{
    HashEntity* data;
    int tablesize;
}HashTable;

//桶排序里的节点
typedef struct node{//桶排序以链表为实现方式
    char* data;//单词地址
    struct node* pNext;//链接到下一个节点
}node;

node* barrel[barrelcapacity];//node*格式的40个桶

char str[]="Throwing the spotlight on whether purchase a high-technology device when it first released, Some individuals accepts that enjoying the latest device makes possible to release the high pressure and sense the high technology, while the dismiss as exaggeration waiting some time can bring more benefit for them. However, I reckon that I will buy a electronic device when most people adopt it  for the follow reasons and examples.First and foremost, compared with **immediately** experience a new released device, waiting for several month in favor for individuals of saving money. Set the first **price** of the high technology products really high, which would haven been **expelled** by the competitive market, the company constantly leaves a high quality imagine for the product and even can advertise their identical product idea which require **customer** cast most money on it. For instance, when iPhone 6 first released, Its price is more than five thousand but I did not think it worth that money. As a result, I purchased it less than four thousand for six months.What is more, in term of the sense of operating the device, a **machine** have been put into the market is useful and people can exploit the new function of it. Everyone attempt to exploit the latest technology that applying to electronic products to sense the **convenient** of the modern society, but only few of them realize that without the software matching with the device, the main function cannot be exert. The superiority of purchasing the product after it released for several months is that the software developers are given chance to craft something **amazing** and embody the now function of the device. The point in case is that when the **HoloLens** first released, only the original system can be operated which is elusive and difficult to operate the subtle objects. However, several months latter, some software enterprise and single software developer **respectively** design the new operation system and applications to display the world of **virtual reality.**In addition, I am not deny that people who prefer to purchase the new high-technology device immediately do something wrong. It is universal acknowledge that the the electronic products renew faster than other **products** which makes people who wait for long time miss the developing process of technology. Nonetheless, it is the minor factor for us students when compared with saving money and a stable operating experience.After all discussion above, we can safely draw the conclusion that purchasing an electronic product until most people adopt is pave the way for individuals to save money and enjoy a **desirable** using experience.";

///单词处理:分割单词
char** sparate_words(){
    int i=-1;//对str[i]的计数器
    int j=0;//对单词内每个字母的计数器word[][j]
    int k=0;//对单词个数的计数器word[k][]
    int lock=0;//进入单词的触发器
    char** word=(char**)malloc(wordnum*sizeof(char*));//大约50个词
    memset(word,0,wordnum*sizeof(char*));
    while(str[++i]!='\0'){
        if(lock==0){//1. 未记录单词时
            if((str[i]>='a'&&str[i]<='z')||(str[i]>='A'&&str[i]<='Z')){//1.1 遇到字母
                lock=1;//开始记录
                word[k]=(char*)malloc(10*sizeof(char));//开辟新的一片空间
                word[k][j++]=str[i];//把这个字母录入
                continue;
            }
        }else if(lock==1){//2. 正在记录单次时
            if((str[i]>='a'&&str[i]<='z')||(str[i]>='A'&&str[i]<='Z')){//2.1 遇到字母
                word[k][j++]=str[i];//记录字母
                continue;
            }else if(str[i]==' '||str[i]==','||str[i]=='.'||str[i]=='?'||str[i]=='!'||str[i]=='\''){//2.2 遇到空格或符号
                lock=0;//结束记录
                word[k++][j]='\0';//添加结束符,前进单词计数器
                j=0;//重置字母计数器
            }
        }
    }
    return word;
}

///单词处理:打印单词列表
void read_word_list(char** word){
    for(int i=0;i<wordnum;i++){
        printf("%s\n",word[i]);
    }
}

///哈希表:利用位移法(秦九韶算法)实现的散列函数
int hash(char* s,int tablesize){
    unsigned int res=0;
    char temp;
    int i=0;
    while(s[i]!='\0'){
        temp=s[i++];
        res=(res<<5)+(int)temp;//<<5就是乘以32的意思
    }
    printf("res=%d\n",res%tablesize);
    return res%tablesize;
}

///哈希表:创建一个哈希表
HashTable* create_hashtable(int tablesize){
    HashTable* newtable=(HashTable*)malloc(sizeof(HashTable));
    memset(newtable,0,sizeof(HashTable));
    newtable->tablesize=tablesize;
    newtable->data=(HashEntity*)malloc(tablesize*sizeof(HashEntity));
    memset(newtable->data,0,tablesize*sizeof(HashEntity));
    return newtable;
}

///哈希表:给哈希表插入数值
void insert_to_hashtable(HashTable* table,char* x){
    if(x==0){
        return;
    }
    printf("%s\n",x);
    int hashvalue=hash(x,table->tablesize);
    int crash=0;//冲突数

    while(table->data[hashvalue+crash].s!=0){//此处data[hash+crash]已被占有
        if(strcmp(table->data[hashvalue+crash].s,x)==0){//但是已占有的就是已经出现过的这个
            printf("在[%d]处已有\n",hashvalue+crash);
            table->data[hashvalue].times++;
            return;
        }
        crash++;
    }
    if(table->data[hashvalue+crash].s==0){//此处data[hash+crash]还没有被占用
        printf("添加到[%d]处\n",hashvalue+crash);
        table->data[hashvalue+crash].s=x;
        table->data[hashvalue+crash].distance=crash;
        table->data[hashvalue+crash].times=0;
        table->data[hashvalue+crash].deleted=0;
        return;
    }
}

///哈希表:打印整个哈希表
void print_hashtable(HashTable* table){
    for(int i=0;i<tablecapacity;i++){
        if(table->data[i].s==0){
            continue;
        }
        printf("%s   出现了",table->data[i].s);
        printf("%d次\n",table->data[i].times+1);
        add_to_barrel(table->data[i].s,table->data[i].times+1);
    }
}

///桶排序:把x插入到桶的x位置
void add_to_barrel(char* x,int key){
    node* position;
    position=barrel[key];
    while(1){
        if(position->data==NULL){
            position->data=x;
            position->pNext=(node*)malloc(sizeof(node));
            position->pNext->data=NULL;
            position->pNext->pNext=NULL;
            break;
        }else if(position->data!=NULL){
            position=position->pNext;
        }
    }
}

///桶排序:输出桶中的内容
void print_barrel_sort(){
    node* pnode;
    for(int i=barrelcapacity-1;i>=0;i--){
        pnode=barrel[i];
        printf("%d次:",i);
        while(pnode->data!=NULL){
            printf("%s/ ",pnode->data);
            pnode=pnode->pNext;
        }
        if(pnode->data==NULL){
            printf("\n");
            continue;
        }
    }
}

///桶排序:初始化
void init_barrel(){
    for(int i=0;i<barrelcapacity;i++){
        free(barrel[i]);
        barrel[i]=(node*)malloc(sizeof(node));
        barrel[i]->data=NULL;
        barrel[i]->pNext=NULL;
    }
}


int main()
{
    char** word=sparate_words();
    HashTable* table=create_hashtable(tablecapacity);
    for(int i=0;i<wordnum;i++){
        printf("\n>>>>i=%d\n",i);
        insert_to_hashtable(table,word[i]);
    }
    init_barrel();
    print_hashtable(table);
    print_barrel_sort();
}

相关推荐
©️2020 CSDN 皮肤主题: 创作都市 设计师:CSDN官方博客 返回首页