用的是ecplise环境,把lucene 的jar包导入新建的工程中...
一共两个文件:
-----------------------------------
IndexDocument.java
SampleSearch.java
-----------------------------------
IndexDocument.java
源码如下:
***************************************************************************************************************************************************************************
package baseSample;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
public class IndexDocument {
public static Directory getIndexDirectory(Directory directory,Analyzer analyzer) throws
CorruptIndexException, LockObtainFailedException, IOException {
IndexWriter iwriter=new IndexWriter(directory,analyzer,true ,new IndexWriter.MaxFieldLength(25000));
//索引过程的调优
iwriter.setMergeFactor(10); //激励因子
iwriter.setMaxMergeDocs(2000); // segment最大的文档数量
iwriter.setMaxBufferedDocs(10);// 内存文档数量
// news Fields
Field newsId=null;
Field newsName=null;
Field publishDate=null;
Field newsSource=null;
Field newssumary=null;
// 新闻1
Document doc1=new Document();
newsId=new Field("newsId","aaa",Field.Store.YES ,Field.Index.NOT_ANALYZED);
newsName=new Field("newsName","韩国政府宣布增强西部海域军事力量",Field.Store.YES,Field.Index.ANALYZED);
publishDate=new Field("publishDate","2010年11月26日07:29 ",Field.Store.YES,Field.Index.ANALYZED);
newsSource=new Field("newsSource","sina国际新闻 朝鲜韩国相互炮击专题",Field.Store.YES,Field.Index.ANALYZED);
newssumary=new Field("newssumary","朝鲜人民军驻板门店代表部25日通知美国军方,朝鲜方面认为,23日延坪岛炮击事件是韩国,有预谋和蓄意的军事挑衅,实际上是战争行为",
Field.Store.YES,Field.Index.ANALYZED);
doc1.add(newsId);
doc1.add(newsName);
doc1.add(publishDate);
doc1.add(newssumary);
iwriter.adDocument(doc1);
// 新闻 2
Document doc2=new Document();
newsId=new Field("newsId","bbb",Field.Store.YES ,Field.Index.NOT_ANALYZED);
newsName=new Field("newsName","日在野党指政府对朝炮击反应迟缓",Field.Store.YES,Field.Index.ANALYZED);
publishDate=new Field("publishDate","2010年11月26日09:51",Field.Store.YES,Field.Index.ANALYZED);
newsSource=new Field("newsSource","中国新闻网 ",Field.Store.YES,Field.Index.ANALYZED);
newssumary=new Field("newssumary","中新网11月26日电 据日本共同社报道,25日,日本众参的两院预算委员会会议讨论了韩朝炮战事件。在野党指责政府在事件发生后反应迟缓,菅直人首相及相关阁僚未及时赶到官邸。报道称,对于凝聚力下降的菅直人政府而言,炮击这一可能引发朝鲜战争的事件" +
"(菅直人语)原本应成为呼吁在野党合作的好机会,但其应对表现反而给了在野党发起攻击的口实,菅直人在会议上始终苦于辩解",Field.Store.YES,Field.Index.ANALYZED);
doc2.add(newsId);
doc2.add(newsName);
doc2.add(publishDate);
doc2.add(newssumary);
iwriter.addDocument(doc2);
// 新闻3
Document doc3=new Document();
newsId=new Field("newsId","ccc",Field.Store.YES ,Field.Index.NOT_ANALYZED);
newsName=new Field("newsName","南京雨花台改造将增加国民党军官烈士照",Field.Store.YES,Field.Index.ANALYZED);
publishDate=new Field("publishDate","2010年11月26日10:12",Field.Store.YES,Field.Index.ANALYZED);
newsSource=new Field("newsSource","扬子晚报 ",Field.Store.YES,Field.Index.ANALYZED);
newssumary=new Field("newssumary","找到烈士胡秉铎的照片了!雨花台烈士纪念馆陈列改造又有新消息传来。记者昨日从南京雨花台烈士陵园管理局了解到,改造将于明年7月1日前完工,目前工作人员兵分六路正在全国搜集烈士史料。烈士胡秉铎的资料在纪念馆中一直没有照片,这次的收获总算了却了工作人员多年来的遗憾",
Field.Store.YES,Field.Index.ANALYZED);
doc3.add(newsId);
doc3.add(newsName);
doc3.add(publishDate);
doc3.add(newssumary);
iwriter.addDocument(doc3);
// 新闻4
Document doc4=new Document();
newsId=new Field("newsId","ccc",Field.Store.YES ,Field.Index.NOT_ANALYZED);
newsName=new Field("newsName","城市拆迁古建筑同时花巨款修仿古建筑引质疑",Field.Store.YES,Field.Index.ANALYZED);
publishDate=new Field("publishDate","2010年11月26日07:27 ",Field.Store.YES,Field.Index.ANALYZED);
newsSource=new Field("newsSource","中国青年报 ",Field.Store.YES,Field.Index.ANALYZED);
newssumary=new Field("newssumary","找群山环绕,山峦重叠,绿树成荫的柘里村曾是众多媒体记者眷恋之处。史料记载,宋时,这里四周屋前有宽阔塘面和大片荷花,晨露凝珠荷叶上,阳光下随风舞动,晶莹剔透招人喜爱。历史名篇《爱莲说》的作者周敦颐在衡期间,每年夏天都来此避暑读书消闲",
Field.Store.YES,Field.Index.ANALYZED);
doc4.add(newsId);
doc4.add(newsName);
doc4.add(publishDate);
doc4.add(newssumary);
iwriter.addDocument(doc4);
iwriter.close();
return directory;
}
}
****************************************************************************************************************************************************************************
SampleSearch.java 源码
package baseSample;
import java.io.IOException;
//index store type
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.FSDirectory;
//indexing required
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
//searching
import org.apache.lucene.queryParser.QueryParser; //单Field查询
import org.apache.lucene.queryParser.MultiFieldQueryParser; //多Field查询
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.util.Version;
public class SampleSearch {
public static void main(String[] args)throws CorruptIndexException, LockObtainFailedException, IOException, ParseException {
// 将索引存入内存中
Directory directory=new RAMDirectory();
// 将索引存入文件系统中,硬盘 FSDirectory().open("F:\\lucene_temp");
//Directory directory=FSDirectory.open("/path");
Analyzer analyzer=new StandardAnalyzer(Version.LUCENE_CURRENT);
analyzer=new CJKAnalyzer(Version.LUCENE_CURRENT);
//开始检索索引
IndexSearcher isearcher=new IndexSearcher(IndexDocument.getIndexDirectory(directory, analyzer),true);
// 1.termQuery
Query termQuery=new TermQuery(new Term("newsSource","扬子"));
System.out.println("----------TermQuery: "+termQuery.toString());
// 2.BooleanQuery
Query a=new TermQuery(new Term("newsSource","扬"));
Query b=new TermQuery(new Term("newsSource","子"));
BooleanQuery booleanQuery=new BooleanQuery();
booleanQuery.add(a, BooleanClause.Occur.MUST);
booleanQuery.add(b, BooleanClause.Occur.MUST);
System.out.println("----------BooleanQuery: "+booleanQuery.toString());
// 3.用QueryParser 切词出 query
QueryParser parser=new QueryParser(Version.LUCENE_CURRENT,"newsSource",analyzer);
parser.setDefaultOperator(QueryParser.AND_OPERATOR); // 设置默认的逻辑
Query parserQuery=parser.parse("朝鲜韩国相互炮击专题");
System.out.println("----------parserQuery: "+parserQuery.toString());
// 4.利用MultiFieldQueryParser实现对多Field查询
String[] fields = {"newsName","newsSource"};
MultiFieldQueryParser mparser=new MultiFieldQueryParser(Version.LUCENE_CURRENT,fields,analyzer);
Query mQuery=mparser.parse("国际新闻 朝鲜韩国相互炮击专题");
System.out.println("----------MultiFieldQueryParser: "+mQuery.toString());
//ScoreDoc[] docs = isearcher.search(termQuery, 10).scoreDocs;
ScoreDoc[] docs=isearcher.search(parserQuery, 10).scoreDocs;
for (int i = 0; i < docs.length; i++){
System.out.println(docs[i].doc);
System.out.println("searcher score :" + docs[i].score);
Document hitDoc = isearcher.doc(docs[i].doc);
System.out.println("--- explain : "+isearcher.explain(termQuery, docs[i].doc));
System.out.println("boost:" + hitDoc.getBoost());
System.out.println("newsId:" + hitDoc.get("newsId"));
System.out.println("newsName:" + hitDoc.get("newsName"));
System.out.println("publishDate:" + hitDoc.get("publishDate"));
System.out.println("newsSource:" + hitDoc.get("newsSource"));
System.out.println("newssummay:" + hitDoc.get("newssummay"));
System.out.println("------------------------------------------");
}
}
}
继续努力中....