BUAA_数据结构_6TH_1.单词查找（查找-基本题）

最新推荐文章于 2024-06-16 21:14:55 发布

Code&Coke

最新推荐文章于 2024-06-16 21:14:55 发布

阅读量3k

点赞数 8

分类专栏：数据结构文章标签：数据结构链表字符串

本文链接：https://blog.csdn.net/weixin_45568867/article/details/117567056

版权

数据结构专栏收录该内容

18 篇文章 14 订阅

订阅专栏

BUAA_数据结构_6TH_1.单词查找（查找-基本题）

第六次作业链接

[2. 排座位（简）a] 施工中…
[3. 整数排序（排序-基本题）] 施工中…

题目描述：

从标准输入中读入一个英文单词及查找方式，在一个给定的英文常用单词字典文件dictionary3000.txt中查找该单词，返回查找结果（查找到返回1，否则返回0）和查找过程中单词的比较次数。查找前，先将所有字典中单词读入至一个单词表（数组）中，然后按要求进行查找。字典中单词总数不超过3500，单词中的字符都是英文小写字母，并已按字典序排好序（可从课件下载区下载该字典文件）。字典中的单词和待查找单词的字符个数不超过20。
查找方式说明：查找方式以1~4数字表示，每个数字含义如下：
1：在单词表中以顺序查找方式查找，因为单词表已排好序，遇到相同的或第一个比待查找的单词大的单词，就要终止查找；
2：在单词表中以折半查找方式查找；
3：在单词表中通过索引表来获取单词查找范围，并在该查找范围中以折半方式查找。索引表构建方式为：以26个英文字母为头字母的单词在字典中的起始位置和单词个数来构建索引表，如：

该索引表表明以字母a开头的单词在单词表中的开始下标位置为0，单词个数为248。
4：按下面给定的hash函数为字典中单词构造一个hash表，hash冲突时按字典序依次存放单词。hash查找遇到冲突时，采用链地址法处理，在冲突链表中找到或未找到(遇到第一个比待查找的单词大的单词或链表结束)便结束查找。

/* compute hash value for string */
#define NHASH  3001
#define MULT  37
unsigned int hash(char *str)
{
       unsigned int h=0;
       char *p;
       for(p=str; *p!='\0'; p++)
              h = MULT*h + *p;
       return h % NHASH;
}

输入形式

单词字典文件dictionary3000.txt存放在当前目录下，待查找单词和查找方式从标准输入读取。待查找单词只包含英文小写字母，与表示查找方式的整数之间以一个空格分隔。

输出形式

将查找结果和单词比较次数输出到标准输出上，两整数之间以一个空格分隔。

样例输入

wins 1
wins 2
wins 3
wins 4
yes 1
yes 2
yes 3
yes 4

样例输出

在这里插入图片描述

思路详解

题目要求我们使用四种查找方式进行查找，下面我将对每种查找方式的实现进行说明

存储结构以及主函数部分说明

使用二维数组存储单词表：dic
使用一个字符数组存放需要查找的单词：pattern
使用dic_lenth记录单词表的长度
使用cmp_num记录查找过程中的比较次数

#define maxn 10010
char dic[maxn][25];//使用二维数组进行单词的存储
char pattern[25];//存储需要查找的单词
int dic_lenth;//单词表的长度
int cmp_num;//需要输出的比较次数

int main()
{
    FILE *in;//文件指针
	in=fopen("dictionary3000.txt","r");//读取文件
	dic_lenth=0;//初始化长度为0
    while(fscanf(in,"%s",dic[dic_lenth])!=EOF){
        dic_lenth++;//读一个加一下
    }
    dic_lenth-=1;
    int find_way,res;//find_way:查找方式 res:是否找到
    while(scanf("%s %d",pattern,&find_way)!=EOF){
    //输入待查找单词和查找方式
        cmp_num=0;//初始化比较次数为0
        if(find_way==1){//顺序查找
            res=just_search();
        }
        else if(find_way==2){//二分
            res=binary_search(0,dic_lenth);
        }
        else if(find_way==3){//索引
            res=index_search();
        }
        else if(find_way==4){//哈希
            res=hash_search();
        }
        printf("%d %d\n",res,cmp_num);//打印
        memset(pattern,'\0',sizeof(pattern));//清空字符数组
    }

    return 0;
}

顺序查找

这里唯一需要注意的事情就是当strcmp(dic[i],pattern)>0的时候就可以直接结束了

int just_search();
int just_search()
{
    for(int i=0;i<dic_lenth;i++){
        cmp_num++;
        if(strcmp(dic[i],pattern)==0){
            return 1;
        }
        if(strcmp(dic[i],pattern)>0){
            return 0;
        }
    }
    return 0;
}

二分查找

int binary_search(int left,int right);
int binary_search(int left,int right)
{
    int mid;
    int l=left,r=right;
    while(l<=r){
        mid=l+(r-l)/2;//防止溢出
        cmp_num++;
        if(strcmp(dic[mid],pattern)==0){
            return 1;
        }
        else if(strcmp(dic[mid],pattern)>0){
            r=mid-1;
        }
        else if(strcmp(dic[mid],pattern)<0){
            l=mid+1;
        }
    }
    return 0;
}

索引查找

这里需要注意的是数据集中不含有x开头的字母所以
1、创建索引的时候需要注意一下序号
2、创建索引的时候最好初始化一下

创建好的索引表应该如下：
s:起始位置 e:终止位置

0:a s:0 e:247
1:b s:248 e:414
2:c s:415 e:744
3:d s:745 e:939
4:e s:940 e:1113
5:f s:1114 e:1270
6:g s:1271 e:1356
7:h s:1357 e:1457
8:i s:1458 e:1596
9:j s:1597 e:1622
10:k s:1623 e:1648
11:l s:1649 e:1757
12:m s:1758 e:1910
13:n s:1911 e:1979
14:o s:1980 e:2064
15:p s:2065 e:2335
16:q s:2336 e:2350
17:r s:2351 e:2540
18:s s:2541 e:2942
19:t s:2943 e:3115
20:u s:3116 e:3192
21:v s:3193 e:3230
22:w s:3231 e:3350
23:  s:0 e:-1
24:y s:3351 e:3364
25:z s:3365 e:3365

int index_search();
struct index{
    char index;
    int start_pos;
    int end_pos;
};
typedef struct index index;
index list[30];
int index_search()
{
    //数据集中没有x开头的字母
    for(int i=0;i<30;i++){//初始化
        list[i].start_pos=0;
        list[i].end_pos=-1;
    }
    char now_al='a';
    list[now_al-'a'].start_pos=0;//手动初始化第一个字母的起始位置
    list[now_al-'a'].index='a';
    for(int i=0;i<dic_lenth;i++){
        if(dic[i][0]!=now_al){
            list[now_al-'a'].end_pos=i-1;
            now_al=dic[i][0];
            list[now_al-'a'].index=now_al;
            list[now_al-'a'].start_pos=i;
        }
    }
    list[now_al-'a'].end_pos=dic_lenth-1;//最后一个字母的终止位置
    /*for(int i=0;i<26;i++){
        index tmp=list[i];
        printf("%d:%c s:%d e:%d\n",i,tmp.index,tmp.start_pos,tmp.end_pos);
    }*/
    int index_p=pattern[0]-'a';
    int l=list[index_p].start_pos;
    int r=list[index_p].end_pos;
    return binary_search(l,r);
}

哈希查找

#define NHASH  3001
#define MULT  37
struct hash_node{
    char word[25];
    struct hash_node *next;
};
typedef struct hash_node node;
typedef struct hash_node * ptr;
ptr hash_table[maxn];

unsigned int hash(char *str);
ptr create_node(char word[]);
void insert_node(char word[]);
int hash_search();

ptr create_node(char word[])//创建一个新的指针
{
    ptr new_node=(ptr)malloc(sizeof(node));
    new_node->next=NULL;
    strcpy(new_node->word,word);
    return new_node;
}

void insert_node(char word[])//插入哈希表
{
    int key=hash(word);//得到key
    ptr temp=hash_table[key];
    if(NULL==temp){//如果这个key没发生过碰撞
        hash_table[key]=create_node(word);//创建
    }else{//碰撞，向后移动解决碰撞
        while(temp->next!=NULL){
            temp=temp->next;
        }
        temp->next=create_node(word);
    }
}

unsigned int hash(char *str)
{
       unsigned int h=0;
       char *p;
       for(p=str;*p!='\0';p++){
            h=MULT*h+*p;
       }
       return h%NHASH;
}

int hash_search()
{
    int key=hash(pattern);
    ptr temp=hash_table[key];
    if(NULL==temp) return 0;
    else{
        while(NULL!=temp){
            cmp_num++;
            if(strcmp(pattern,temp->word)==0){
                return 1;
            }
            else if(strcmp(temp->word,pattern)<0){
                temp=temp->next;
            }
            else {
                return 0;
            }
        }
    }
    return 0;
}

完整参考代码

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define maxn 10010
char dic[maxn][25];
char pattern[25];
int dic_lenth;
int cmp_num;

int just_search();
int binary_search(int left,int right);
int index_search();
struct index{
    char index;
    int start_pos;
    int end_pos;
};
typedef struct index index;
index list[30];

#define NHASH  3001
#define MULT  37
struct hash_node{
    char word[25];
    struct hash_node *next;
};
typedef struct hash_node node;
typedef struct hash_node * ptr;
ptr hash_table[maxn];

unsigned int hash(char *str);
ptr create_node(char word[]);
void insert_node(char word[]);
int hash_search();

int main()
{
    FILE *in;
	in=fopen("dictionary3000.txt","r");
	dic_lenth=0;
    while(fscanf(in,"%s",dic[dic_lenth])!=EOF){
        insert_node(dic[dic_lenth]);
        dic_lenth++;
    }
    dic_lenth-=1;
    int find_way,res;
    while(scanf("%s %d",pattern,&find_way)!=EOF){
        cmp_num=0;
        if(find_way==1){
            res=just_search();
        }
        else if(find_way==2){
            res=binary_search(0,dic_lenth);
        }
        else if(find_way==3){
            res=index_search();
        }
        else if(find_way==4){
            res=hash_search();
        }
        printf("%d %d\n",res,cmp_num);
        memset(pattern,'\0',sizeof(pattern));
    }

    return 0;
}

int just_search()
{
    for(int i=0;i<dic_lenth;i++){
        cmp_num++;
        if(strcmp(dic[i],pattern)==0){
            return 1;
        }
        if(strcmp(dic[i],pattern)>0){
            return 0;
        }
    }
    return 0;
}

int binary_search(int left,int right)
{
    int mid;
    int l=left,r=right;
    while(l<=r){
        mid=l+(r-l)/2;
        cmp_num++;
        if(strcmp(dic[mid],pattern)==0){
            return 1;
        }
        else if(strcmp(dic[mid],pattern)>0){
            r=mid-1;
        }
        else if(strcmp(dic[mid],pattern)<0){
            l=mid+1;
        }
    }
    return 0;
}

int index_search()
{
    //数据集中没有x开头的字母
    for(int i=0;i<30;i++){
        list[i].start_pos=0;
        list[i].end_pos=-1;
    }
    char now_al='a';
    list[now_al-'a'].start_pos=0;
    list[now_al-'a'].index='a';
    for(int i=0;i<dic_lenth;i++){
        if(dic[i][0]!=now_al){
            list[now_al-'a'].end_pos=i-1;
            now_al=dic[i][0];
            list[now_al-'a'].index=now_al;
            list[now_al-'a'].start_pos=i;
        }
    }
    list[now_al-'a'].end_pos=dic_lenth-1;
    /*for(int i=0;i<26;i++){
        index tmp=list[i];
        printf("%d:%c s:%d e:%d\n",i,tmp.index,tmp.start_pos,tmp.end_pos);
    }*/
    int index_p=pattern[0]-'a';
    int l=list[index_p].start_pos;
    int r=list[index_p].end_pos;
    return binary_search(l,r);
}

ptr create_node(char word[])
{
    ptr new_node=(ptr)malloc(sizeof(node));
    new_node->next=NULL;
    strcpy(new_node->word,word);
    return new_node;
}

void insert_node(char word[])
{
    int key=hash(word);
    ptr temp=hash_table[key];
    if(NULL==temp){
        hash_table[key]=create_node(word);
    }else{
        while(temp->next!=NULL){
            temp=temp->next;
        }
        temp->next=create_node(word);
    }
}

unsigned int hash(char *str)
{
       unsigned int h=0;
       char *p;
       for(p=str;*p!='\0';p++){
            h=MULT*h+*p;
       }
       return h%NHASH;
}

int hash_search()
{
    int key=hash(pattern);
    ptr temp=hash_table[key];
    if(NULL==temp) return 0;
    else{
        while(NULL!=temp){
            cmp_num++;
            if(strcmp(pattern,temp->word)==0){
                return 1;
            }
            else if(strcmp(temp->word,pattern)<0){
                temp=temp->next;
            }
            else {
                return 0;
            }
        }
    }
    return 0;
}

有问题~~或bug~~ 欢迎私戳/评论

Code&Coke

关注

8
点赞
踩
10

收藏

觉得还不错? 一键收藏
4
评论
BUAA_数据结构_6TH_1.单词查找（查找-基本题）

BUAA_数据结构_6TH_1.单词查找（查找-基本题）第六次作业链接[2. 排座位（简）a] 施工中…[3. 整数排序（排序-基本题）] 施工中…题目描述：从标准输入中读入一个英文单词及查找方式，在一个给定的英文常用单词字典文件dictionary3000.txt中查找该单词，返回查找结果（查找到返回1，否则返回0）和查找过程中单词的比较次数。查找前，先将所有字典中单词读入至一个单词表（数组）中，然后按要求进行查找。字典中单词总数不超过3500，单词中的字符都是英文小写字母，并已按字典序排好序
复制链接

扫一扫

专栏目录