问题 K: 单词检查(Ⅲ)- Hash表实现（2023）

-芒果酱-

已于 2023-11-25 19:37:19 修改

阅读量1.1k

点赞数 18

分类专栏： C C++语言数据结构文章标签：哈希算法算法

于 2023-11-25 11:59:40 首次发布

本文链接：https://blog.csdn.net/weixin_46337464/article/details/134612604

版权

C C++语言同时被 2 个专栏收录

17 篇文章 2 订阅

订阅专栏

数据结构

2 篇文章 0 订阅

订阅专栏

本文介绍了一种基于哈希技术的拼写检查程序，涉及链地址法的散列表实现、错误单词检测、修改建议生成以及线性地址法和链地址法的冲突处理性能比较。作者还提供了两种实现代码，讨论了如何优化哈希函数以提高性能并避免超时。

摘要由CSDN通过智能技术生成

1、题目描述

许多应用程序，如字处理软件，邮件客户端等，都包含了单词检查特性。单词检查是根据字典，找出输入文本中拼错的单词，我们认为凡是不出现在字典中的单词都是错误单词。不仅如此，一些检查程序还能给出类似拼错单词的修改建议单词。例如字典由下面几个单词组成：
bake cake main rain vase
如果输入文件中有词vake ，检查程序就能发现其是一个错误的单词，并且给出 bake, cake或vase做为修改建议单词。

修改建议单词可以采用如下生成技术：

(1)在每一个可能位置插入‘a-'z'中的一者
(2)删除单词中的一个字符
(3)用‘a'-'z'中的一者取代单词中的任一字符

很明显拼写检查程序的核心操作是在字典中查找某个单词，如果字典很大，性能无疑是非常关键的。
你写的程序要求读入字典文件，然后对一个输入文件的单词进行检查，列出其中的错误单词并给出修改建议。

本题要求采用hash技术维护字典，hash的实现必须由同学们编码完成，不能采用C++或JAVA的泛型库。
hash函数建议自行设计一个，然后和成熟的hash函数比较，比如下面的ELF hash函数。

/* UNIX ELF hash
* Published hash algorithm used in the UNIX ELF format for object files
*/
unsigned long hash(char *name)
{
unsigned long h = 0, g;

while ( *name ) {
h = ( h << 4 ) + *name++;
if ( g = h & 0xF0000000 )
h ^= g >> 24;
h &= ~g;
}
return h;
}

另外，请比较线性地址法和链地址法两种冲突处理方法的性能，以及调整hash表大小对性能的影响。

2、输入

输入分为两部分。
第一部分是字典，每个单词占据一行，最后以仅包含'#'的一行表示结束。所有的单词都是不同的，字典中最多500000个单词。

输入的第二部分包含了所有待检测的单词，单词数目不超过200。每个单词占据一行，最后以仅包含'#'的一行表示结束。

字典中的单词和待检测的单词均由小写字母组成，并且单词最大长度为15。

3、输出

按照检查次序每个单词输出一行，该行首先输出单词自身。如果单词在字典中出现，接着输出" is correct"。如果单词是错误的，那么接着输出':'，如果字典中有建议修改单词，则按照字典中出现的先后次序输出所有的建议修改单词（每个前面都添加一个空格），如果无建议修改单词，在':'后直接换行。

4、实现代码

方法1 一种独特思路，可以看看，不过会超时

//链地址法的散列表//Time out
#include <iostream>
#include <cstdio>
#include <cstring>
#define MAX 10000
using namespace std;
typedef struct node//结构体
{
    string key;
    long Order;
    struct node *next;
} Node, *PNode;
typedef struct
{
    PNode *elem;
    int sz; //size of hashtable
} HashTable;
unsigned long Hash(HashTable ht,string name);
int PrintLinklist(HashTable ht);
void InitHashTable(HashTable &ht, int n)//申请大小为n的指针数组，每个指针初始化为NULL
{
    ht.sz = n;
    ht.elem = new PNode[n];
    for(int i=0; i<n; i++)
    {
        ht.elem[i]=NULL;
    }
}
int InsertHashTable(HashTable &ht,string k,long i)//将k插入ht中对应的链表尾部
{
    int n,m;
    PNode p=NULL,q=NULL,z=NULL,e=NULL, r=NULL,t=NULL;
    n=Hash(ht,k);
    z=p=ht.elem[n];
    m=k.size();
    e=r=ht.elem[m];
    while(p)
    {
        z=p;
        p=p->next;
    }
    while(r)
    {
        e=r;
        r=r->next;
    }
    q=new Node;
    q->key=k;
    q->Order=i;
    q->next=NULL;
    if(!ht.elem[n]) ht.elem[n]=q;
    else z->next=q;
    t=new Node;
    t->key=k;
    t->Order=i;
    t->next=NULL;
    if(!ht.elem[m]) ht.elem[m]=t;
    else e->next=t;
    return 0;
}
int InsertListTable(HashTable &ht,string k,int i)
{
    int n,m;
    PNode p=NULL,q=NULL,z=NULL;
    q=new Node;
    q->key=k;
    q->Order=i;
    q->next=NULL;
    if(!ht.elem[0])
    {
        ht.elem[0]=q;
        return 0;
    }
    z=p=ht.elem[0];
    while(p)
    {
        if(p->Order>i) break;
        z=p;
        p=p->next;
    }
    if(p==ht.elem[0])
    {
        ht.elem[0]=q;
        q->next=p;
        return 0;


    }
    if(p)
    {
        z->next=q;
        q->next=p;
    }
    else
    {
        z->next=q;

    }
    return 0;
}

int ScanWay(HashTable ht,string findword)//查找建议单词函数
{
    int n,m,i,p,q,key=0;
    PNode k;
    ht.elem[0]=NULL;//初始化
    cout<<findword<<":";//输出
    n=findword.size();
    for(i=n-1; i<n+3; i++)//三种长度比较
    {
        if(i<=0)i=1;//特殊情况
        k=ht.elem[i];
        if(k)m=k->key.size();
        while(k)
        {
            key=0;
            if(m==n)//长度相同，一个字母不同
            {
                for(q=0; q<m; q++)
                {
                    if(k->key[q]!=findword[q]) key++;
                }
                if(key<=1)InsertListTable(ht,k->key,k->Order);
            }
            else if(n+1==m)//寻找长度少一个字符的近似项
            {
                for(q=0,p=0; q<m; q++)
                {
                    if(k->key[q]==findword[p])
                    {
                        p++;
                        key++;
                    }
                }
                if(key>=n)InsertListTable(ht,k->key,k->Order);
            }
            else if(n==m+1)//长度多一个
            {
                for(q=0,p=0; q<n; q++)
                {
                    if(k->key[p]==findword[q])
                    {
                        p++;
                        key++;
                    }

                }
                if(key>=m)InsertListTable(ht,k->key,k->Order);
            }
            k=k->next;
        }
    }
    k=ht.elem[0];
    //PrintLinklist(ht);
    while(k)
    {
        cout<<" "<<k->key;
        k=k->next;
    }
}

int SameFind(HashTable ht,string findword)//哈希相同查找函数
{
    long i,n,m;
    PNode p=ht.elem[Hash(ht,findword)];
    while(p)
    {
        if(p->key==findword) return 1;
        p=p->next;
    }
    return 0;
}
//查找函数
int SearchHashTable(HashTable ht)//查找函数
{
    string findword;
    long n,m,i;
    cin>>findword;
    while(findword!="#")
    {
        if(SameFind(ht,findword))
            cout<<findword<<" is correct\n";
        else
        {
            ScanWay(ht,findword);
            cout<<endl;
        }
        cin>>findword;
    }
}
unsigned long Hash(HashTable ht,string name)//Hash函数
{
    unsigned long h = 0, g=ht.sz;
    int len=1,i;
    for(i=0; name[i]!='\0'; i++)
    {
        len=len*name[i];
    }
    h=len%g;
    if(h>0&&h<30) h+=30;//特殊处理
    if(!h) h=31;
    return h;
}
void InputHashTable(HashTable &ht)//输入内容
{
    long i=1;
    string Word;
    cin>>Word;
    while(Word!="#")
    {
        InsertHashTable(ht,Word,i++);
        cin>>Word;
    }
}

int PrintLinklist(HashTable ht)//调试内容
{
    long i,n;
    PNode p;
    cout<<"-------------\n所有的内容："<<endl;
    for(i=0; i<ht.sz; i++)
    {
        if(ht.elem[i]!=NULL)
        {
            p=ht.elem[i];
            cout<<i<<" :";
            while(p)
            {
                cout<<p->key<<" -> ";
                p=p->next;
            }
            cout<<endl;
        }
    }
    cout<<"-------------"<<endl;
}
int main()
{
    HashTable ht;
    InitHashTable(ht, MAX);//初始化
    InputHashTable(ht);//输入字典
    PrintLinklist(ht);//调试函数
    SearchHashTable(ht);//查找函数
    return 0;
}

方法2，使用hash函数，通过

//链地址法的散列表 AC
#include <iostream>
#include <cstdio>
#include <iomanip>
#include <cstring>
#define MAX 50000
using namespace std;
typedef struct node//结构体
{
    string key;
    long Order;
    struct node *next;
} Node, *PNode;
typedef struct
{
    PNode *elem;
    int sz; //size of hashtable
} HashTable;
unsigned long Hash(HashTable ht,string name);
int SameFind(HashTable ht,string findword);
int PrintLinklist(HashTable ht);
void InitHashTable(HashTable &ht, int n)//申请大小为n的指针数组，每个指针初始化为NULL
{
    ht.sz = n;
    ht.elem = new PNode[n];
    for(int i=0; i<n; i++)
    {
        ht.elem[i]=NULL;
    }
}
int InsertHashTable(HashTable &ht,string k,long i)//将k插入ht中对应的链表尾部，输入函数
{
    int n,m;
    PNode p=NULL,q=NULL,z=NULL;
    n=Hash(ht,k);
    z=p=ht.elem[n];
    while(p)
    {
        z=p;
        p=p->next;
    }
    q=new Node;
    q->key=k;
    q->Order=i;
    q->next=NULL;
    if(!ht.elem[n]) ht.elem[n]=q;
    else z->next=q;
    return 0;
}
int InsertListTable(HashTable &ht,string k,int i)//排序插入函数
{
    int n,m;
    PNode p=NULL,q=NULL,z=NULL;
    q=new Node;
    q->key=k;
    q->Order=i;
    q->next=NULL;
    if(!ht.elem[0])
    {
        ht.elem[0]=q;
        return 0;
    }
    z=p=ht.elem[0];
    while(p)
    {
        if(p->Order==i)return 0;
        if(p->Order>i) break;
        z=p;
        p=p->next;
    }
    if(p==ht.elem[0])
    {
        ht.elem[0]=q;
        q->next=p;
        return 0;
    }
    if(p)
    {
        z->next=q;
        q->next=p;
    }
    else
    {
        z->next=q;
    }
    return 0;
}
//修改建议单词
int ScanWay(HashTable &ht,string findword)
{
    PNode k;
    int i,n,m,p,q,temp=0;
    string o="0";
    string changedword;
    ht.elem[0]=NULL;//初始化
    cout<<findword<<":";//输出
    n=findword.size();
    for(i=0; i<=n; i++)
    {
        if(i!=n)
        {
          for(m=97,temp=0; m<=122; m++,temp=0)
        {
            changedword=findword;
            if(changedword[i]==m) continue;
            changedword[i]=m;
            temp=SameFind(ht,changedword);
            if(temp)InsertListTable(ht,changedword,temp);
        }
        if(n!=1)
        {
            temp=0;
            changedword=findword;
            changedword.erase(i,1);
            temp=SameFind(ht,changedword);
            if(temp)InsertListTable(ht,changedword,temp);
        }
        }
        for(m=97,temp=0; m<=122; m++,temp=0)
        {
            o[0]=m;
            changedword=findword;
            changedword.insert(i,o);
            temp=SameFind(ht,changedword);
            if(temp)InsertListTable(ht,changedword,temp);
        }
    }
    k=ht.elem[0];
    while(k)
    {
        cout<<" "<<k->key;
        k=k->next;
    }
}
int SameFind(HashTable ht,string findword)//哈希相同查找
{
    PNode p=ht.elem[Hash(ht,findword)];
    while(p)
    {
        if(p->key==findword) return p->Order;
        p=p->next;
    }
    return 0;
}
//查找函数
int SearchHashTable(HashTable &ht)//查找函数
{
    string findword;
    cin>>findword;
    while(findword!="#")
    {
        if(SameFind(ht,findword))
            cout<<findword<<" is correct\n";
        else
        {
            ScanWay(ht,findword);
            //cout<<"No find\n";
            cout<<endl;
        }
        cin>>findword;
    }
}
unsigned long Hash(HashTable ht,string name)//Hash函数
{
    unsigned long h = 0, g=ht.sz;
    int len=1,i;
    for(i=0; name[i]!='\0'; i++)
    {
        h= (h * 3333+ name[i] + g) % g;
    }
    h=(h+g)%g;
    if(!h) h=1;//特殊处理
    return h;
}
void InputHashTable(HashTable &ht)//输入内容
{
    long i=1;
    string Word;
    cin>>Word;
    while(Word!="#")
    {
        InsertHashTable(ht,Word,i++);
        cin>>Word;
    }
}
int PrintLinklist(HashTable ht)//调试内容
{
    long i,n;
    PNode p;
    cout<<"-------------"<<endl;
    for(i=0; i<ht.sz; i++)
    {
        if(ht.elem[i]!=NULL)
        {
            p=ht.elem[i];
            cout<<setw(5)<<i<<" :";
            while(p)
            {
                cout<<p->key<<" -> ";
                p=p->next;
            }
            cout<<endl;
        }
    }
    cout<<"-------------"<<endl;
}
int main()
{
    HashTable ht;
    InitHashTable(ht, MAX);//初始化
    InputHashTable(ht);//输入字典
    //PrintLinklist(ht);//调试函数
    SearchHashTable(ht);//查找函数
    return 0;
}