scws+xapian(1)

//利用scws读取本地文件内容,分词生成terms,写入到本地文件

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<scws.h>
#define MAXLEN 10240

void read_file(const char *filename ,char *dest ,int maxlen){

FILE *file;
int pos , temp , i;
file = fopen(filename , "r");
if(NULL == file){
fprintf(stderr ,"open %s erro\n" , filename);
// return -1;
}
pos = 0;
for(i = 0 ; i<MAXLEN-1; i++){
temp = fgetc(file);
if(EOF == temp)
break;
dest[pos++]=temp;
}
fclose(file);
dest[pos] = 0;
}

main(){
FILE *fp;
        fp = fopen("/usr/local/irtest/termsdata1" ,"at+");

char text[10240];
read_file("/usr/local/irtest/data1" , text ,MAXLEN);

scws_t s;
scws_res_t res,cur;

  if(!(s = scws_new())){
printf("ERROR:can't init the scws!\n");
exit(-1);
}

scws_set_charset(s , "utf8");
scws_set_dict(s ,"/usr/local/scws/etc/dict.utf8.xdb" ,SCWS_XDICT_XDB);
scws_set_rule(s, "/usr/local/scws/etc/rules.utf8.ini");

scws_send_text(s , text, strlen(text));
while(res=cur=scws_get_result(s)){
while(cur!=NULL){
        fprintf(fp ,"%.*s" , cur->len,text+cur->off);
      fprintf(fp ," ");
      cur = cur->next;
}
// fclose(fp);
scws_free_result(res);
}
scws_free(s);
}


//indexer.cpp

#include<xapian.h>
#include<fstream>
#include<string>
#include<iostream>
using namespace std;

int main(int argc , char **argv){
try{
ifstream ifile("/usr/local/irtest/termsdata1");
string content, line;
while(getline(ifile , line))
content += line;
        Xapian::WritableDatabase database(argv[1],Xapian::DB_CREATE_OR_OPEN);
        Xapian::Document document;
        Xapian::TermGenerator indexer;
        document.add_value(1,string("825"));
        document.set_data(content);
        indexer.set_document(document);
        indexer.index_text(content);

        database.add_document(document);
        database.commit();
   }catch(const Xapian::Error &e){
        cout<<"exception: "<<e.get_description()<<endl;
        }
}



//searcher.cpp

#include<xapian.h>
#include<iostream>
#define QUERY "新加坡"
using namespace std;

int main(int argc , char **argv){
  try{
        string querystring(QUERY);

        Xapian::Database database(argv[1]);
        Xapian::Enquire enquire(database);
        Xapian::QueryParser qp;

        Xapian::Query query = qp.parse_query(querystring);
        cout<<"query is: "<<query.get_description()<<endl;

        enquire.set_query(query);

        Xapian::MSet matches = enquire.get_mset(0,10);
        cout<<matches.get_matches_estimated()<<" result found"<<endl;
       for(Xapian::MSetIterator it = matches.begin() ; it != matches.end() ; it++){
        Xapian::Document doc = it.get_document();
        cout<<it.get_rank()<<": "<<it.get_percent()<<"%docid= "<<*it<<"   value="<<doc.get_value(1) <<",data "<<doc.get_data()<<endl;
        }
  }catch(const Xapian::Error &e){
        cout<<"exception: "<<e.get_description()<<endl;
        }
}


[root@jcdd second]# gcc -o demo1 -I/usr/local/scws/include/scws -L/usr/local/scws/lib readfenci.c -lscws -Wl,--rpath -Wl,/usr/local/scws/lib 
[root@jcdd second]# ./demo1
[root@jcdd second]# g++ -std=c++0x index.cpp -o index -lxapian
[root@jcdd second]# g++ -std=c++0x search.cpp -o search -lxapian

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值