C语言实现简单的倒排文件索引
来源:http://blog。csdn。net/lansatiankongxxc/article/details/8314996
inver。h文件#ifndef INVERT_FILE_H #define INVERT_FILE_H #include #include typedef struct _invertfile_ { unsigned int tablelen; void **table; //unsigned int offset; unsigned int nodecount; }if_t; typedef struct _word_{ unsigned int id; unsigned int refered;// void *link; }word_t; typedef struct _word_frequency_{ unsigned int d_id; unsigned int refered;//the num of referenced in the document void *next; }wf_t; if_t* invertfile_create(int length); void invertfile_insert(if_t *h,int w_id,int d_id); wf_t* invertfile_search(if_t *h,int w_id,int d_id); void invertfile_traverse(if_t *h); void invertfile_free(if_t *h); #endif //该代码片段来自于: http://www。
sharejs。com/codes/cpp/5963invert。cpp#include"invert。h" if_t* invertfile_create(int length){ if_t *h; h = (if_t *)calloc(1,sizeof(if_t)); if (NULL == h) return NULL; h->table =(void **)calloc(length,sizeof(void *)); h->tablelen=length; h->nodecount=0; word_t *w; for(int i=0;itable[i]=malloc(sizeof(word_t)); w=(word_t*)h->table[i]; w->id=i; w->refered=0; w->link=NULL; } return h; } //check if document d_id have word w_id wf_t* invertfile_search(if_t *h,int w_id,int d_id){ word_t *w; wf_t *wf; w=(word_t*)h->table[w_id]; if(w->refered>0){ wf=(wf_t*)w->link; while(wf){ if(wf->d_id==d_id)return wf; wf=(wf_t*)wf->next; } } return NULL; } void invertfile_insert(if_t *h,int w_id,int d_id){ word_t * w=(word_t*)h->table[w_id]; wf_t * wf; if((wf=invertfile_search(h,w_id,d_id))!=NULL){ wf->refered ; } else{ wf=(wf_t *)malloc(sizeof(wf_t)); wf->next=w->link; w->link=wf; w->refered ; wf->refered ; wf->d_id=d_id; h->nodecount ; } } void invertfile_free(if_t *h){ word_t *w; wf_t* wf,*cur; for(int i=0;itablelen;i ){ w=(word_t*)h->table[i]; wf=(wf_t*)w->link; while(wf!=NULL){ cur=wf; wf=(wf_t*)wf->next; free(cur); } free(w); } free(h->table); } void invertfile_traverse(if_t *h){ word_t *w; wf_t* wf,*cur; for(int i=0;itablelen;i ){ w=(word_t*)h->table[i]; wf=(wf_t*)w->link; printf("word_id:%d;",w->id); while(wf!=NULL){ cur=wf; wf=(wf_t*)wf->next; printf("d_id:%d,freq:%d;",cur->d_id,cur->refered); } printf("
"); } } //该代码片段来自于: http://www。
sharejs。com/codes/cpp/5963测试文件main。cpp#include"invert。h" int main(){ if_t *f=invertfile_create(10); invertfile_insert(f,1,1); invertfile_insert(f,1,1); invertfile_insert(f,1,3); invertfile_insert(f,2,5); invertfile_traverse(f); invertfile_free(f); } //该代码片段来自于: http://www。
sharejs。
com/codes/cpp/5963实验结果:word_id:0; word_id:1;d_id:3,freq:1;d_id:1,freq:2; word_id:2;d_id:5,freq:1; word_id:3; word_id:4; word_id:5; word_id:6; word_id:7; word_id:8; word_id:9;。
全部