文档集合上的查询,哈希表,前缀树

哈希表

#include <iostream>
#include <string>
#include <string.h>
#include <fstream>
#include <cstdlib>
using namespace std;

struct pNode
{
    string str;
    int position[30];
    int psize;
    pNode(string s,int pos)
    {
        position[0] = pos;
        str = s;
        psize = 1;
    }
    pNode():psize(0){};
    void operator=(const pNode& d)
    {
        str=d.str;
        psize=d.psize;
        memcpy(position,d.position,sizeof(d.position));
    }
};
struct stringNode
{
    pNode element;
    stringNode* next;
    stringNode(){};
    stringNode(const pNode& e)
    {
        element = e;
        next = NULL;
    }
    stringNode(const pNode& e,stringNode* n)
    {
        element = e;
        next = n;
    }
};
class stringChain
{
public:
    stringChain(){firstNode = NULL;dsize = 0;}
    bool empty() const{return dsize==0;}
    int size()const {return dsize;}
    pNode* find(string word) const;
    void insert(const pNode& pword);
private:
    stringNode* firstNode;
    int dsize;
};

pNode* stringChain::find(string word)const
{
    stringNode* currentNode = firstNode;
    while(currentNode!=NULL && currentNode->element.str != word)
        currentNode = currentNode->next;
    if(currentNode!=NULL && currentNode->element.str==word)
        return &currentNode->element;
    return NULL;
}

void stringChain::insert(const pNode& thepNode)
{
    if(firstNode == NULL)
    {
        firstNode = new stringNode(thepNode);
        dsize ++;
        return;
    }
    stringNode* p = firstNode;
    while(p->next!=NULL&&p->element.str!=thepNode.str)
        p=p->next;
    if(p->element.str==thepNode.str)
    {
        int i;
        for(i = 0;i<p->element.psize&&p->element.position[i]!=thepNode.position[0];i++);
        if(i==p->element.psize)
        {
            p->element.position[p->element.psize++] = thepNode.position[0];
        }
    }
    else
    {
        p->next = new stringNode(thepNode);
        dsize++;
    }
}

class Hashtable
{
public:
    Hashtable();
    bool empty(){return dsize==0;}
    int hashsize(){return dsize;}
    pNode* find(string word) const;
    void insert(const pNode& thepNode);
private:
    stringChain *table;
    int dsize;
    int divisor;
};
Hashtable::Hashtable()
{
    divisor = 52;
    dsize = 0;
    table = new stringChain[divisor];
}
pNode* Hashtable::find(string word) const
{
    char first = word[0];
    if(first>'Z')
        first = first -6;
    return table[first%divisor].find(word);
}
void Hashtable::insert(const pNode& thepNode)
{
    char first = thepNode.str[0];
    int homeBucket;
    if(first>'Z')
    {
        homeBucket = (first-6)%divisor;
    }
    else
    {
        homeBucket = first%divisor;
    }
    int homeSize = table[homeBucket].size();
    table[homeBucket].insert(thepNode);
    if(table[homeBucket].size()>homeSize)
        dsize++;
}
class txtsearch
{
public:
    void initial();
    bool showtxt(int t);
    void fillhash();
    bool findword(string word);
    void findwords(string s[],int num);
    bool ANDans(int num);
    bool ORans(int num);
private:
    Hashtable h;
    string txt[21][10000];
    int txtsize[21];
    pNode** wordsans; //
};
bool txtsearch::ANDans(int num)
{
    int txttmp[21];
    bool flag = false;
    memset(txttmp,0,sizeof(txttmp));
    for(int i=0;i<num;i++)
        for(int j = 0;wordsans[i]!=NULL&&j<wordsans[i]->psize;j++)
            txttmp[wordsans[i]->position[j]]++;
    for(int i=1;i<21;i++)
        if(txttmp[i]>=num)
        {
            flag = true;
            cout<<i<<".txt ";
        }
    cout<<endl;
    return flag;
}
bool txtsearch::ORans(int num)
{
    int txttmp[21];
    bool flag = false;
    memset(txttmp,0,sizeof(txttmp));
    for(int i=0;i<num;i++)
        for(int j = 0;wordsans[i]!=NULL&&j<wordsans[i]->psize;j++)
            txttmp[wordsans[i]->position[j]]++;
    for(int i = 1;i<21;i++)
        if(txttmp[i]!=0)
        {
            flag = true;
            cout<<i<<".txt ";
        }
    cout<<endl;
    return flag;
}
void txtsearch::findwords(string s[],int num)
{
    wordsans = new pNode* [num+1];
    for(int i=0;i<num;i++)
        wordsans[i] = h.find(s[i]);
}
void txtsearch::fillhash()
{
    for(int i=1;i<21;i++)
        for(int j=0;j<txtsize[i];j++)
        {
            string ss;
            if(txt[i][j][txt[i][j].size()-1]==','||txt[i][j][txt[i][j].size()-1]=='.')
                ss = txt[i][j].substr(0,txt[i][j].size() - 1);
            else
            {
                ss = txt[i][j];
            }
            pNode tmp(ss,i);
            h.insert(tmp);
        }
}
bool txtsearch::findword(string word)
{
    pNode* tp = h.find(word);
    if(tp==NULL)
        return false;
     for(int i = 0;i<tp->psize;i++)
        cout<<tp->position[i]<<".txt ";
    cout<<endl;
    return true;
}
void txtsearch::initial()
{
    fstream f;
    memset(txtsize,0,sizeof(txtsize));
    for(int i=1;i<21;i++)
    {
        char data_name[20];
        sprintf(data_name, "%d%s",i, ".txt");
        f.open(data_name,ios::in);
        for(int k = 0;!f.eof();k++)
        {
            f>>txt[i][k];
            txtsize[i]++;
        }

        f.close();
    }
}
bool txtsearch::showtxt(int t)
{
    bool flag = false;
    for(int i = 0;i<txtsize[t];i++)
    {
        cout<<txt[t][i]<<" ";
        flag = true;
    }
    cout<<endl;
    return flag;
}
int main()
{
    string word,words[100];
    int num;
    txtsearch t;
    cout<<"+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+"<<endl;
    cout<<"|                      #                            #                               |"<<endl;
    cout<<"|                                                                                   |"<<endl;
    cout<<"|                           欢迎你使用文本查询系统!                                |"<<endl;
    cout<<"|                                                                                   |"<<endl;
    cout<<"|                      #                            #                               |"<<endl;
    cout<<"+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+"<<endl;
    cout<<endl;
    cout<<"已经为你在当前目录准备了1-20号txt文件,你现在可以对文本内容进行修改!"<<endl;
    cout<<"准备好了吗?键入 OK 启动查询,键入 # 退出程序:"<<endl;
    string s;
    cin>>s;
    if(s=="OK")
    {
        t.initial();
        t.fillhash();
    }
    else exit(0);
    cout<<"为你提供了如下功能:"<<endl;
    cout<<"键入A 选择展示1-20号txt文件"<<endl;
    cout<<"键入B 查找某个单词所在的txt文件"<<endl;
    cout<<"键入C 查找多个单词同时出现的txt文件"<<endl;
    cout<<"键入D 查找多个单词中至少出现一个的txt文件"<<endl;
    cout<<"键入# 退出系统"<<endl;
    char o;
    while(cin>>o)
    {
        if(o=='#') exit(0);
        else if(o=='A')
        {
            cout<<"请输入你要展示的文件编号(1-20):"<<endl;
            int x;
            cin>>x;
            if(t.showtxt(x))
                cout<<"展示结束!"<<endl;
            else
                cout<<"文件为空!"<<endl;
        }
        else if(o=='B')
        {
            cout<<"请输入你要查询的一个单词:"<<endl;
            string w;
            cin>>w;
            if(t.findword(w))
                cout<<"查找成功!"<<endl;
            else
                cout<<"不存在!"<<endl;
        }
        else if(o=='C')
        {
            cout<<"请输入要查询的单词个数:"<<endl;
            int n;
            cin>>n;
            cout<<"请以 word AND word 的形式输入这"<<n<<"个单词:"<<endl;
            string ss[1000];
            for(int i = 0;i<n;)
            {
                string s;
                cin>>s;
                if(s=="AND") continue;
                else
                {
                    ss[i] = s;
                    i++;
                }
            }
            t.findwords(ss,n);
            if(t.ANDans(n))
                cout<<"查找成功!";
            else
                cout<<"不存在!";
        }
        else if(o=='D')
        {
            cout<<"请输入要查询的单词个数:"<<endl;
            int n;
            cin>>n;
            cout<<"请以 word OR word 的形式输入这"<<n<<"个单词"<<endl;
            string ss[1000];
            for(int i = 0;i<n;)
            {
                string s;
                cin>>s;
                if(s=="OR") continue;
                else
                {
                    ss[i] = s;
                    i++;
                }
            }
            t.findwords(ss,n);
            if(t.ORans(n))
                cout<<"查找成功!";
            else
                cout<<"不存在!";
        }
    }


}



前缀树

#include <iostream>
#include <set>
#include<fstream>
#include<cstring>
#include<stdlib.h>
using namespace std;
const int MAX_NOOD = 1e3;
const int CHARSET = 52;

class txtsearch
{
public:
    txtsearch();
    void trieinsert(string w,int txtnum);
    int triesearch(string s);
    void initial();
    bool showtxt(int t);
    void filltrie();
    void findword(string word);
    void ANDans(string ask[],int num);
    void ORans(string ask[],int num);
public:
    int trie[MAX_NOOD][CHARSET];  // trie[i][j] = x
    set<int> txttag[MAX_NOOD];
    int k;
    string txt[21][10000];
    int txtsize[21];
};
void txtsearch::ORans(string ask[],int num)
{
    int txttmp[21];
    memset(txttmp,0,sizeof(txttmp));
    for(int i = 0;i<num;i++)
    {
        int o  = triesearch(ask[i]);
        if(o!=-1&&!txttag[o].empty())
        {
            for(set<int >::iterator it = txttag[o].begin();it!=txttag[o].end();it++)
                txttmp[*it]++;
        }
    }
    bool flag = false;
    for(int i = 0;i<21;i++)
    {
        if(txttmp[i]!=0)
        {
            cout<<i<<".txt"<<" ";
            flag = true;
        }
    }
    cout<<endl;
    if(flag) return;
    else cout<<"Not Exists"<<endl;
}
void txtsearch::ANDans(string ask[],int num)
{
    int txttmp[21];
    memset(txttmp,0,sizeof(txttmp));
    for(int i = 0;i<num;i++)
    {
        int o  = triesearch(ask[i]);
        if(o==-1||txttag[o].empty())
        {
            cout<<"Not Exists!"<<endl;
            return;
        }
        else
        {
            for(set<int >::iterator it = txttag[o].begin();it!=txttag[o].end();it++)
                txttmp[*it]++;
        }
    }
    bool flag = false;
    for(int i = 0;i<21;i++)
    {
        if(txttmp[i]==num)
        {
            cout<<i<<".txt"<<" ";
            flag = true;
        }
    }
    cout<<endl;
    if(flag) return;
    else cout<<"Not Exists"<<endl;
}
void txtsearch::findword(string word)
{
    int o = triesearch(word);
        if(o==-1||txttag[o].empty())
            cout<<"Not Exists!"<<endl;
        else
        {
            for(set<int >::iterator it = txttag[o].begin();it!=txttag[o].end();it++)
                cout<<*it<<".txt"<<" ";
            cout<<endl;
        }
}
void txtsearch::filltrie()
{
    for(int i=1;i<21;i++)
        for(int j=0;j<txtsize[i];j++)
        {
            string ss;
            if(txt[i][j][txt[i][j].size()-1]==','||txt[i][j][txt[i][j].size()-1]=='.')
                ss = txt[i][j].substr(0,txt[i][j].size() - 1);
            else
            {
                ss = txt[i][j];
            }
            trieinsert(ss,i);
        }
}
void txtsearch::initial()
{
    fstream f;
    for(int i=1;i<21;i++)
    {
        char data_name[20];
        sprintf(data_name, "%d%s",i, ".txt");
        f.open(data_name,ios::in);
        for(int k = 0;!f.eof();k++)
        {
            f>>txt[i][k];
            txtsize[i]++;
        }

        f.close();
    }
}
bool txtsearch::showtxt(int t)
{
    bool flag = false;
    for(int i = 0;i<txtsize[t];i++)
    {
        cout<<txt[t][i]<<" ";
        flag = true;
    }
    cout<<endl;
    return flag;
}
txtsearch::txtsearch()
{
    //cout<<"nihao"<<endl;
    memset(txtsize,0,sizeof(txtsize));
    for(int i = 0;i<MAX_NOOD;i++)
        for(int j = 0;j<CHARSET;j++)
        trie[i][j] = 0;
    for(int i = 0;i<MAX_NOOD;i++)
        txttag[i].clear();
    k = 1;
    //cout<<"nihao";
}
void txtsearch::trieinsert(string w,int txtnum)
{
    int len = w.length();
    //cout<<len;
    int p = 0;
    for(int i = 0;i<len;i++)
    {
        int c;
        if('a'<=w[i]&&w[i]<='z')
             c = w[i] - 'a';
        else if('A'<=w[i]&&w[i]<='Z')
             c = w[i] - 'A'+26;
        if(!trie[p][c])
        {
            trie[p][c] = k;
            k++;
        }
        p = trie[p][c];
    }
    txttag[p].insert(txtnum);
}
int txtsearch::triesearch(string s)
{
    int len = s.length();
    int p = 0;
    for(int i = 0;i<len;i++)
    {
        int c;
        if('a'<=s[i]&&s[i]<='z')
             c = s[i] - 'a';
        else if('A'<=s[i]&&s[i]<='Z')
             c = s[i] - 'A'+26;
        if(!trie[p][c]) return -1;
        p = trie[p][c];
    }
    return p;
}
int main()
{
    string word,words[100];
    int num;
    txtsearch t;
    cout<<"+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+"<<endl;
    cout<<"|                      #                            #                               |"<<endl;
    cout<<"|                                                                                   |"<<endl;
    cout<<"|                           欢迎你使用文本查询系统!                                |"<<endl;
    cout<<"|                                                                                   |"<<endl;
    cout<<"|                      #                            #                               |"<<endl;
    cout<<"+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+--*--+"<<endl;
    cout<<endl;
    cout<<"已经为你在当前目录准备了1-20号txt文件,你现在可以对文本内容进行修改!"<<endl;
    cout<<"准备好了吗?键入 OK 启动查询,键入 # 退出程序:"<<endl;
    string s;
    cin>>s;
    if(s=="OK")
    {
        t.initial();
        t.filltrie();
    }
    else exit(0);
    cout<<"为你提供了如下功能:"<<endl;
    cout<<"键入A 选择展示1-20号txt文件"<<endl;
    cout<<"键入B 查找某个单词所在的txt文件"<<endl;
    cout<<"键入C 查找多个单词同时出现的txt文件"<<endl;
    cout<<"键入D 查找多个单词中至少出现一个的txt文件"<<endl;
    cout<<"键入# 退出系统"<<endl;
    char o;
    while(cin>>o)
    {
        if(o=='#') exit(0);
        else if(o=='A')
        {
            cout<<"请输入你要展示的文件编号(1-20):"<<endl;
            int x;
            cin>>x;
            if(t.showtxt(x))
                cout<<"END!"<<endl;
            else
                cout<<"Waring: Empty!"<<endl;
        }
        else if(o=='B')
        {
            cout<<"请输入你要查询的一个单词:"<<endl;
            string w;
            cin>>w;
            t.findword(w);
        }
        else if(o=='C')
        {
            cout<<"请输入要查询的单词个数:"<<endl;
            int n;
            cin>>n;
            cout<<"请以 word AND word 的形式输入这"<<n<<"个单词:"<<endl;
            string ss[1000];
            for(int i = 0;i<n;)
            {
                string s;
                cin>>s;
                if(s=="AND") continue;
                else
                {
                    ss[i] = s;
                    i++;
                }
            }
            t.ANDans(ss,n);
        }
        else if(o=='D')
        {
            cout<<"请输入要查询的单词个数:"<<endl;
            int n;
            cin>>n;
            cout<<"请以 word OR word 的形式输入这"<<n<<"个单词"<<endl;
            string ss[1000];
            for(int i = 0;i<n;)
            {
                string s;
                cin>>s;
                if(s=="OR") continue;
                else
                {
                    ss[i] = s;
                    i++;
                }
            }
            t.ORans(ss,n);
        }
    }


    //cout<<"nihao"<<endl;
//    txtsearch test;
//    test.trieinsert("in",1);
//    test.trieinsert("inn",1);
//    test.trieinsert("int",2);
//    test.trieinsert("tea",2);

}

  • 1
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值