刚接触Lucene3.0.2写的一个小程序

 

用的是ecplise环境,把lucene 的jar包导入新建的工程中...

一共两个文件:

-----------------------------------

IndexDocument.java

SampleSearch.java

-----------------------------------

IndexDocument.java

源码如下:

***************************************************************************************************************************************************************************

‍package baseSample;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;

public class IndexDocument {


public static Directory getIndexDirectory(Directory directory,Analyzer analyzer) throws
CorruptIndexException, LockObtainFailedException, IOException {
  
IndexWriter iwriter=new IndexWriter(directory,analyzer,true ,new IndexWriter.MaxFieldLength(25000));
//索引过程的调优
iwriter.setMergeFactor(10); //激励因子
iwriter.setMaxMergeDocs(2000); // segment最大的文档数量
iwriter.setMaxBufferedDocs(10);// 内存文档数量

// news Fields
Field newsId=null;
Field newsName=null;
Field publishDate=null;
Field newsSource=null;
Field newssumary=null;

// 新闻1
Document doc1=new Document();
newsId=new Field("newsId","aaa",Field.Store.YES ,Field.Index.NOT_ANALYZED);
newsName=new Field("newsName","韩国政府宣布增强西部海域军事力量",Field.Store.YES,Field.Index.ANALYZED);
publishDate=new Field("publishDate","2010年11月26日07:29 ",Field.Store.YES,Field.Index.ANALYZED);
newsSource=new Field("newsSource","sina国际新闻 朝鲜韩国相互炮击专题",Field.Store.YES,Field.Index.ANALYZED);
newssumary=new Field("newssumary","朝鲜人民军驻板门店代表部25日通知美国军方,朝鲜方面认为,23日延坪岛炮击事件是韩国,有预谋和蓄意的军事挑衅,实际上是战争行为",
    Field.Store.YES,Field.Index.ANALYZED);
doc1.add(newsId);
doc1.add(newsName);
doc1.add(publishDate);
doc1.add(newssumary);
iwriter.adDocument(doc1);

// 新闻 2
Document doc2=new Document();
newsId=new Field("newsId","bbb",Field.Store.YES ,Field.Index.NOT_ANALYZED);
newsName=new Field("newsName","日在野党指政府对朝炮击反应迟缓",Field.Store.YES,Field.Index.ANALYZED);
publishDate=new Field("publishDate","2010年11月26日09:51",Field.Store.YES,Field.Index.ANALYZED);
newsSource=new Field("newsSource","中国新闻网 ",Field.Store.YES,Field.Index.ANALYZED);
newssumary=new Field("newssumary","中新网11月26日电 据日本共同社报道,25日,日本众参的两院预算委员会会议讨论了韩朝炮战事件。在野党指责政府在事件发生后反应迟缓,菅直人首相及相关阁僚未及时赶到官邸。报道称,对于凝聚力下降的菅直人政府而言,炮击这一可能引发朝鲜战争的事件" +
    "(菅直人语)原本应成为呼吁在野党合作的好机会,但其应对表现反而给了在野党发起攻击的口实,菅直人在会议上始终苦于辩解",Field.Store.YES,Field.Index.ANALYZED);
doc2.add(newsId);
doc2.add(newsName);
doc2.add(publishDate);
doc2.add(newssumary);
iwriter.addDocument(doc2);

// 新闻3
Document doc3=new Document();
newsId=new Field("newsId","ccc",Field.Store.YES ,Field.Index.NOT_ANALYZED);
newsName=new Field("newsName","南京雨花台改造将增加国民党军官烈士照",Field.Store.YES,Field.Index.ANALYZED);
publishDate=new Field("publishDate","2010年11月26日10:12",Field.Store.YES,Field.Index.ANALYZED);
newsSource=new Field("newsSource","扬子晚报 ",Field.Store.YES,Field.Index.ANALYZED);
newssumary=new Field("newssumary","找到烈士胡秉铎的照片了!雨花台烈士纪念馆陈列改造又有新消息传来。记者昨日从南京雨花台烈士陵园管理局了解到,改造将于明年7月1日前完工,目前工作人员兵分六路正在全国搜集烈士史料。烈士胡秉铎的资料在纪念馆中一直没有照片,这次的收获总算了却了工作人员多年来的遗憾",
    Field.Store.YES,Field.Index.ANALYZED);
doc3.add(newsId);
doc3.add(newsName);
doc3.add(publishDate);
doc3.add(newssumary);
iwriter.addDocument(doc3);

// 新闻4
Document doc4=new Document();
newsId=new Field("newsId","ccc",Field.Store.YES ,Field.Index.NOT_ANALYZED);
newsName=new Field("newsName","城市拆迁古建筑同时花巨款修仿古建筑引质疑",Field.Store.YES,Field.Index.ANALYZED);
publishDate=new Field("publishDate","2010年11月26日07:27 ",Field.Store.YES,Field.Index.ANALYZED);
newsSource=new Field("newsSource","中国青年报 ",Field.Store.YES,Field.Index.ANALYZED);
newssumary=new Field("newssumary","找群山环绕,山峦重叠,绿树成荫的柘里村曾是众多媒体记者眷恋之处。史料记载,宋时,这里四周屋前有宽阔塘面和大片荷花,晨露凝珠荷叶上,阳光下随风舞动,晶莹剔透招人喜爱。历史名篇《爱莲说》的作者周敦颐在衡期间,每年夏天都来此避暑读书消闲",
    Field.Store.YES,Field.Index.ANALYZED);
doc4.add(newsId);
doc4.add(newsName);
doc4.add(publishDate);
doc4.add(newssumary);
iwriter.addDocument(doc4);

iwriter.close();
return directory;
}
}

****************************************************************************************************************************************************************************

SampleSearch.java 源码

‍package baseSample;

import java.io.IOException;

//index store type
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.FSDirectory;

//indexing required
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;


//searching
import org.apache.lucene.queryParser.QueryParser; //单Field查询
import org.apache.lucene.queryParser.MultiFieldQueryParser; //多Field查询
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.util.Version;


public class SampleSearch {

public static void main(String[] args)throws CorruptIndexException, LockObtainFailedException, IOException, ParseException {
  
   // 将索引存入内存中
   Directory directory=new RAMDirectory();
   // 将索引存入文件系统中,硬盘 FSDirectory().open("F:\\lucene_temp");
   //Directory directory=FSDirectory.open("/path");
   Analyzer analyzer=new StandardAnalyzer(Version.LUCENE_CURRENT);
   analyzer=new CJKAnalyzer(Version.LUCENE_CURRENT);
   //开始检索索引
   IndexSearcher isearcher=new IndexSearcher(IndexDocument.getIndexDirectory(directory, analyzer),true);
  
   // 1.termQuery
   Query termQuery=new TermQuery(new Term("newsSource","扬子"));
   System.out.println("----------TermQuery: "+termQuery.toString());
   // 2.BooleanQuery
   Query a=new TermQuery(new Term("newsSource","扬"));
   Query b=new TermQuery(new Term("newsSource","子"));
   BooleanQuery booleanQuery=new BooleanQuery();
   booleanQuery.add(a, BooleanClause.Occur.MUST);
   booleanQuery.add(b, BooleanClause.Occur.MUST);
   System.out.println("----------BooleanQuery: "+booleanQuery.toString());
   // 3.用QueryParser 切词出 query
   QueryParser parser=new QueryParser(Version.LUCENE_CURRENT,"newsSource",analyzer);
   parser.setDefaultOperator(QueryParser.AND_OPERATOR); // 设置默认的逻辑
   Query parserQuery=parser.parse("朝鲜韩国相互炮击专题");
   System.out.println("----------parserQuery: "+parserQuery.toString());
   // 4.利用MultiFieldQueryParser实现对多Field查询
   String[] fields = {"newsName","newsSource"};
   MultiFieldQueryParser mparser=new MultiFieldQueryParser(Version.LUCENE_CURRENT,fields,analyzer);
   Query mQuery=mparser.parse("国际新闻 朝鲜韩国相互炮击专题");
   System.out.println("----------MultiFieldQueryParser: "+mQuery.toString());
   //ScoreDoc[] docs = isearcher.search(termQuery, 10).scoreDocs;
   ScoreDoc[] docs=isearcher.search(parserQuery, 10).scoreDocs;
   for (int i = 0; i < docs.length; i++){  
        System.out.println(docs[i].doc);
        System.out.println("searcher score :" + docs[i].score);
    Document hitDoc = isearcher.doc(docs[i].doc);
            System.out.println("--- explain : "+isearcher.explain(termQuery, docs[i].doc));
            System.out.println("boost:" + hitDoc.getBoost());
            System.out.println("newsId:" + hitDoc.get("newsId"));
            System.out.println("newsName:" + hitDoc.get("newsName"));
            System.out.println("publishDate:" + hitDoc.get("publishDate"));
            System.out.println("newsSource:" + hitDoc.get("newsSource"));
            System.out.println("newssummay:" + hitDoc.get("newssummay"));
            System.out.println("------------------------------------------");
   }
    
}

}

****************************************************************************************************************************************************************************

继续努力中....

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值