7-9 迷你搜索引擎 (35 分)

Xuic
已于 2022-04-24 20:56:59 修改
阅读量939
点赞数 1
文章标签： c语言数据结构
于 2022-04-22 15:11:56 首次发布
原文链接：https://www.cnblogs.com/snzhong/p/12658252.html
版权
#include <stdio.h>
#include <string.h>
#include <malloc.h>
#include <ctype.h>
//花了很长时间才完全看懂代码，写了写注释，以后有时间再自己默写吧，太臭了太长了 
typedef char Element[51];//每行字符最多50个
//行号结点
typedef struct LNode {
    int LineNo;
    struct LNode *NextLine;
}*LList,LNode;
char word[101][101][51];//注意由于将#号也存入在word数组中，故文件行号最大101行
int F[101];
int L[101][101];
int ans=0;//文件结点    ，由get 和merge 得到，merge是并交集 
typedef struct FNode {
    int FileNo;
    struct FNode *NextFile;
    LList Line;
}*FList,FNode;
//哈希结点
struct HashTbl {
    Element word;
    int flag;
    FList Next;
};//哈希表
typedef struct {
    int TableSize;
    struct HashTbl *TheCells;
}*HashTable;
//创建行号结点
LNode *CreateLNode(int LineNo) {
    LNode *l=(LNode*)malloc(sizeof(LNode));
    l->LineNo=LineNo;
    l->NextLine=NULL;
    return l;
}//创建文件结点
FNode *CreateFNode(int FileNo,LNode *l) {
    FNode *f=(FNode*)malloc(sizeof(FNode));
    f->FileNo=FileNo;
    f->NextFile=NULL;
    f->Line=l;
    return f;
}//哈希表初始化
HashTable Create(int size) {
    HashTable H=(HashTable)malloc(sizeof(HashTable));
    H->TableSize=size;
    H->TheCells=(struct HashTbl *)malloc(sizeof(struct HashTbl)*size);
    while(size) 
	{
        H->TheCells[--size].Next=NULL;
        H->TheCells[size].flag=0;
    }
    return H;
}//哈希函数
int Hash(HashTable H,Element Key) {
    unsigned int h=0;
    while(*Key!='\0') {
        h=h<<5+(*Key++-'a');
    }
    //消去15位之前的 
    return (h&(32*32*32-1))%H->TableSize;
}//求存储位置
int FindPos(HashTable H,Element Key) {
    int pos=Hash(H,Key);
    //没有或相同 
    while(H->TheCells[pos].flag&&strcmp(H->TheCells[pos].word,Key)!=0) {
        pos++;//线性探测 
        if(pos==H->TableSize)
            pos-=H->TableSize;
    }
    return pos;
}//单词插入
void Insert(HashTable H,Element Key,int FileNo,int LineNo) {
    int pos=FindPos(H,Key);
    if(!H->TheCells[pos].flag) {//位置为空则插入
        strcpy(H->TheCells[pos].word,Key);
        H->TheCells[pos].flag=1;
        LNode *l=CreateLNode(LineNo);
        FNode *f=CreateFNode(FileNo,l);
        H->TheCells[pos].Next=f;
    } else
	 if(strcmp(H->TheCells[pos].word,Key)==0) {//位置不空且存储单词一致
        FNode *p=H->TheCells[pos].Next;
        FNode *s=NULL;
        while(p&&p->FileNo!=FileNo) {//文件号不空且文件号不等
            s=p;
            p=p->NextFile;
        }
        if(!p) {//文件指针空，尾插
            LNode *l=CreateLNode(LineNo);
            FNode *f=CreateFNode(FileNo,l);
            s->NextFile=f;
        } else if(p->FileNo==FileNo) {//文件号一致，则比较行号
            LNode *q=p->Line;
            LNode *r=NULL;;
            while(q&&q->LineNo!=LineNo){
                r=q;
                q=q->NextLine;
            }
            if(!q){//行号指针空，尾插
                LNode *l=CreateLNode(LineNo);
                r->NextLine=l;
            }
 
        }
 
    }
}//分离出单词
void GetWord(HashTable H,Element word,int FileNo,int LineNo){
    int i=0,k=0;
    Element str;
    while(word[i]!='\0') 
	{
        if(isalpha(word[i])) 
		{
            str[k++]=tolower(word[i]);
        } else 
		{
            if(isalpha(word[i+1])) 
			{
                str[k]='\0';
                Insert(H,str,FileNo,LineNo);
                k=0;
            }
        }
        i++;
    }
    str[k]='\0';
    Insert(H,str,FileNo,LineNo);
}//求文件名交集，行号并集
int Merge(HashTable H,FNode *p) {
    int w=0;
    int tmpF[101];
    int tmpL[101];
    int v=0;
    while(p&&F[w]!=-1) {
        if(F[w]>p->FileNo) 
		{
            p=p->NextFile;
        }
		 else if(F[w]<p->FileNo) 
		{
            w++;
        } else if(F[w]==p->FileNo) //文件交集 
		{
            tmpF[v++]=p->FileNo;
            LNode *q=p->Line;
            int u=0,h=0;
            //-1是结尾 
            while(q&&L[p->FileNo][u]!=-1) 
			{
				//行号有序递增 
                if(q->LineNo<L[p->FileNo][u]) 
				{
                    tmpL[h++]=q->LineNo;
                    q=q->NextLine;
                } else if(q->LineNo>L[p->FileNo][u]) 
				{
                    tmpL[h++]=L[p->FileNo][u];
                    u++;
                } else 
				{
					//相同的 
                    tmpL[h++]=q->LineNo;
                    u++;
                    q=q->NextLine;
                }
            }
            //把剩下的一方并入 
            while(q&&L[p->FileNo][u]==-1) 
			{
                tmpL[h++]=q->LineNo;
                q=q->NextLine;
            }
            while(!q&&L[p->FileNo][u]!=-1)
			 {
                tmpL[h++]=L[p->FileNo][u];
                u++;
            }
            int i;
            for(i=0; i<h; i++) 
			{
                L[p->FileNo][i]=tmpL[i];
            }
            L[p->FileNo][i]=-1;
            w++;
            p=p->NextFile;//开始找下一对文件 
        }
    }
    int i;
    for(i=0; i<v; i++) {
        F[i]=tmpF[i];//相交的文件 
    }
    F[i]=-1;//结束 
    ans=v;
    return v;
}//输出
void Out() {
    int i,t;
    for(i=0; F[i]!=-1; i++) 
	{
        printf("%s\n",word[F[i]][0]);
        for(t=0; L[F[i]][t]!=-1; t++) 
		{
            int line=L[F[i]][t];
            printf("%s\n",word[F[i]][line]);
        }
    }
}//求首个单词的文件名和行号    
int Get(HashTable H,Element str) {
    int flag=1;
    int pos=FindPos(H,str);//单词所在第一个文件
    FNode *p=H->TheCells[pos].Next;
    if(!p) 
	{
        flag=0;
    } 
	else
	 {
        int t=0;
        while(p) //将同一单词存起来 
		{
            int s=0;
            F[t++]=p->FileNo;//第一个文件 
            LNode *q=p->Line;
            while(q) 
			{
                L[p->FileNo][s++]=q->LineNo;
                q=q->NextLine;
            }
            L[p->FileNo][s]=-1;
            p=p->NextFile;
        }
        F[t]=-1;
        ans=t;
    }
    return flag;
}//最终结果
void GetAns(HashTable H) {
    int m;
    scanf("%d",&m);
    char str[10][11];
    char c;
    getchar();
    int i;
    for(i=0; i<m; i++) {
        int flag=0;
        int j=0,k=0;
        while(c=getchar(),c!='\n') 
		{
            if(isalpha(c))
                str[j][k++]=tolower(c);
            else 
			{
                str[j][k]='\0';
                j++;
                k=0;
            }
        }
        str[j][k]='\0';
        j++;
        flag=Get(H,str[0]);
        if(!flag) printf("0\nNot Found\n");
		else if(flag) 
		{
            if(j>1) 
			{
                for(k=1; k<j; k++) 
				{
					//合并是为了防止同一行 重复输出 
                    int pos=FindPos(H,str[k]);
                    FNode *p=H->TheCells[pos].Next;
                    flag=Merge(H,p);//每次链表里储存的不栋，生成的数组是新的并交集 
                    if(!flag) //没融合说明链表中没有这个单词 
					{
                        printf("0\nNot Found\n");
                        break;
                    }
                }
                if(flag) 
				{
                    printf("%d\n",ans);
                    Out();//每次那几个数组都会变化 
                }
            } else if(j==1) 
			{
                printf("%d\n",ans);
                Out();
            }
        }
    }
}
int main() {
    int n;
    scanf("%d",&n);
 
    getchar();
    int i,j;
    HashTable H=Create(500009);
    for(i=0; i<n; i++) 
	{
        j=0;
        //输入并比较 
        while(gets(word[i][j]),strcmp(word[i][j],"#")!=0) 
		{
            if(j)
                GetWord(H,word[i][j],i,j);
            j++;
        }
    }
    GetAns(H);
    return 0;
}
https://www.cnblogs.com/snzhong/p/12658252.html