- package com.jadyer.lucene;
- import java.io.File;
- import java.io.IOException;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.index.IndexReader;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.index.IndexWriterConfig;
- import org.apache.lucene.index.Term;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.NRTManager;
- import org.apache.lucene.search.NRTManagerReopenThread;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.TermQuery;
- import org.apache.lucene.search.TopDocs;
- import org.apache.lucene.search.NRTManager.TrackingIndexWriter;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.FSDirectory;
- import org.apache.lucene.util.Version;
- /**
- * 【Lucene3.6.2入门系列】第12节_近实时搜索
- * @see 实时搜索(near-real-time)---->只要数据发生变化,则马上更新索引(IndexWriter.commit())
- * @see 近实时搜索------------------>数据发生变化时,先将索引保存到内存中,然后在一个统一的时间再对内存中的所有索引执行commit提交动作
- * @see 为了实现近实时搜索,Lucene3.0提供的方式叫做reopen,后来的版本中提供了两个线程安全的类NRTManager和SearcherManager
- * @see 不过这俩线程安全的类在Lucene3.5和3.6版本中的用法有点不太一样,这点要注意
- * @create Aug 7, 2013 4:19:58 PM
- * @author 玄玉<http://blog.csdn.net/jadyer>
- */
- public class HelloNRTSearch {
- private IndexWriter writer;
- private NRTManager nrtManager;
- private TrackingIndexWriter trackWriter;
- public HelloNRTSearch(){
- try {
- Directory directory = FSDirectory.open(new File("myExample/myIndex/"));
- writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36)));
- trackWriter = new NRTManager.TrackingIndexWriter(writer);
- // /*
- // * Lucene3.5中的NRTManager是通过下面的方式创建的
- // * 并且Lucene3.5中可以直接使用NRTManager.getSearcherManager(true)获取到org.apache.lucene.search.SearcherManager
- // */
- // nrtManager = new NRTManager(writer,new org.apache.lucene.search.SearcherWarmer() {
- // @Override
- // public void warm(IndexSearcher s) throws IOException {
- // System.out.println("IndexSearcher.reopen时会自动调用此方法");
- // }
- // });
- nrtManager = new NRTManager(trackWriter, null);
- //启动一个Lucene提供的后台线程来自动定时的执行NRTManager.maybeRefresh()方法
- //这里的后俩参数,是根据这篇分析的文章写的http://blog.mikemccandless.com/2011/11/near-real-time-readers-with-lucenes.html
- NRTManagerReopenThread reopenThread = new NRTManagerReopenThread(nrtManager, 5.0, 0.025);
- reopenThread.setName("NRT Reopen Thread");
- reopenThread.setDaemon(true);
- reopenThread.start();
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- /**
- * 创建索引
- */
- public static void createIndex(){
- String[] ids = {"1", "2", "3", "4", "5", "6"};
- String[] names = {"Michael", "Scofield", "Tbag", "Jack", "Jade", "Jadyer"};
- String[] contents = {"my blog", "my website", "my name", "my job is JavaDeveloper", "I am from Haerbin", "I like Lucene"};
- IndexWriter writer = null;
- Document doc = null;
- try{
- Directory directory = FSDirectory.open(new File("myExample/myIndex/"));
- writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36)));
- writer.deleteAll();
- for(int i=0; i<names.length; i++){
- doc = new Document();
- doc.add(new Field("id",ids[i],Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
- doc.add(new Field("name", names[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
- doc.add(new Field("content", contents[i], Field.Store.YES, Field.Index.ANALYZED));
- writer.addDocument(doc);
- }
- }catch(Exception e) {
- e.printStackTrace();
- }finally{
- if(null != writer){
- try {
- writer.close();
- } catch (IOException ce) {
- ce.printStackTrace();
- }
- }
- }
- }
- /**
- * 通过IndexReader获取文档数量
- */
- public static void getDocsCount(){
- IndexReader reader = null;
- try {
- reader = IndexReader.open(FSDirectory.open(new File("myExample/myIndex/")));
- System.out.println("maxDocs:" + reader.maxDoc());
- System.out.println("numDocs:" + reader.numDocs());
- System.out.println("deletedDocs:" + reader.numDeletedDocs());
- } catch (Exception e) {
- e.printStackTrace();
- } finally {
- if(reader != null){
- try {
- reader.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- }
- }
- /**
- * 搜索文件
- */
- public void searchFile(){
- //Lucene3.5里面可以直接使用NRTManager.getSearcherManager(true).acquire()
- IndexSearcher searcher = nrtManager.acquire();
- Query query = new TermQuery(new Term("content", "my"));
- try{
- TopDocs tds = searcher.search(query, 10);
- for(ScoreDoc sd : tds.scoreDocs){
- Document doc = searcher.doc(sd.doc);
- System.out.print("文档编号=" + sd.doc + " 文档权值=" + doc.getBoost() + " 文档评分=" + sd.score + " ");
- System.out.println("id=" + doc.get("id") + " name=" + doc.get("name") + " content=" + doc.get("content"));
- }
- }catch(Exception e) {
- e.printStackTrace();
- }finally{
- try {
- //这里就不要IndexSearcher.close()啦,而是交由NRTManager来释放
- nrtManager.release(searcher);
- //Lucene-3.6.2文档中ReferenceManager.acquire()方法描述里建议再手工设置searcher为null,以防止在其它地方被意外的使用
- searcher = null;
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- }
- /**
- * 更新索引
- */
- public void updateIndex(){
- Document doc = new Document();
- doc.add(new Field("id", "11", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
- doc.add(new Field("name", "xuanyu", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
- doc.add(new Field("content", "my name is xuanyu", Field.Store.YES, Field.Index.ANALYZED));
- try{
- //Lucene3.5中可以直接使用org.apache.lucene.search.NRTManager.updateDocument(new Term("id", "1"), doc)
- trackWriter.updateDocument(new Term("id", "1"), doc);
- }catch(IOException e) {
- e.printStackTrace();
- }
- }
- /**
- * 删除索引
- */
- public void deleteIndex(){
- try {
- //Lucene3.5中可以直接使用org.apache.lucene.search.NRTManager.deleteDocuments(new Term("id", "2"))
- trackWriter.deleteDocuments(new Term("id", "2"));
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- /**
- * 提交索引内容的变更情况
- */
- public void commitIndex(){
- try {
- writer.commit();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- }
下面是用JUnit4.x写的小测试
- package com.jadyer.test;
- import org.junit.After;
- import org.junit.Before;
- import org.junit.Test;
- import com.jadyer.lucene.HelloNRTSearch;
- public class HelloNRTSearchTest {
- @Before
- public void init(){
- HelloNRTSearch.createIndex();
- }
- @After
- public void destroy(){
- HelloNRTSearch.getDocsCount();
- }
- @Test
- public void searchFile(){
- HelloNRTSearch hello = new HelloNRTSearch();
- for(int i=0; i<5; i++){
- hello.searchFile();
- System.out.println("-----------------------------------------------------------");
- hello.deleteIndex();
- if(i == 2){
- hello.updateIndex();
- }
- try {
- System.out.println(".........开始休眠5s(模拟近实时搜索情景)");
- Thread.sleep(5000);
- System.out.println(".........休眠结束");
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- }
- //不能单独去new HelloNRTSearch,要保证它们是同一个对象,否则所做的delete和update不会被commit
- hello.commitIndex();
- }
- }