Lucene实例（增删改查）

最新推荐文章于 2022-02-12 18:10:11 发布

kyrin_云中鹤

最新推荐文章于 2022-02-12 18:10:11 发布

阅读量1.9k

点赞数

分类专栏： Lucene web开发文章标签： lucene 实例

本文链接：https://blog.csdn.net/zeqblog/article/details/16807547

版权

web开发同时被 2 个专栏收录

60 篇文章 0 订阅

订阅专栏

Lucene

4 篇文章 0 订阅

订阅专栏

前面两篇博文已经介绍过了Lucene，大家也对Lucene有了初步了解，我呢，在这里给出我项目中的一些实际的例子，这些例子中包含了

对索引的增删改查操作，还包括关键字高亮~~~

当然这些例子建立在Lucene3.0的基础之上，是对txt文件创建的索引

好的以下是代码~~

创建索引类：

 import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
public class  Index{
 /**
  * @param args
  * @throws IOException
  */
 public static void main(String[] args) throws IOException {
  // 保存索引文件的地方
  String indexDir = "C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\index";
  // 将要搜索TXT文件的地方
  String dateDir = "C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\data";
  IndexWriter indexWriter = null;
  // 创建Directory对象 ，FSDirectory代表待索引的文件存在磁盘上
  Directory dir = new SimpleFSDirectory(new File(indexDir));
  // 创建IndexWriter对象,第一个参数是Directory,第二个是分词器,第三个表示是否是创建,如果为false为在此基础上面修改,第四表示表示分词的最大值，比如说new
  // MaxFieldLength(2)，就表示两个字一分，一般用IndexWriter.MaxFieldLength.LIMITED
  indexWriter = new IndexWriter(dir, new StandardAnalyzer(
    Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED);
  
  
  File[] files = new File(dateDir).listFiles();
  for (int i = 0; i < files.length; i++) {
   Document doc = new Document();
   // 创建Field对象，并放入doc对象中
   
   doc.add(new Field("id", i+"", Field.Store.YES,
     Field.Index.NOT_ANALYZED));
   doc.add(new Field("contents", new FileReader(files[i])));
   
   doc.add(new Field("path", files[i].getAbsolutePath(), Field.Store.YES,
     Field.Index.ANALYZED));
   doc.add(new Field("indexDate", DateTools.dateToString(new Date(),
     DateTools.Resolution.DAY), Field.Store.YES,
     Field.Index.NOT_ANALYZED));
   // 写入IndexWriter
   indexWriter.addDocument(doc);
  }
  // 查看IndexWriter里面有多少个索引
  System.out.println("总共------》" + indexWriter.numDocs());
  
  indexWriter.close();
 }
}

查找索引类：

 
import java.io.File;
import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
/**
 * @author ht
 * 查询
 *
 */
public class Seacher {
	private static String INDEX_DIR = "C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\index";//索引所在的路径
	private static String KEYWORD = " ";//关键词
	private static int TOP_NUM = 10;//显示前10条结果
	
	public static void main(String[] args) throws Exception {
		File indexDir = new File(INDEX_DIR);
	    if (!indexDir.exists() || !indexDir.isDirectory()) {
	    	throw new Exception(indexDir +" 该目录不存在~~");
	    }
	    search(indexDir, KEYWORD);//调用search方法进行查询
	}
	/**查询
	 * @param indexDir
	 * @param q
	 * @throws Exception
	 */
	public static void search(File indexDir, String q) throws Exception {
	    IndexSearcher is = new  IndexSearcher(FSDirectory.open(indexDir),true);//read-only
	    String field = "contents";
	    
	    QueryParser parser = new QueryParser(Version.LUCENE_30, field, new StandardAnalyzer(Version.LUCENE_30));//有变化的地方
	    Query query = parser.parse(q);
	
	    TopScoreDocCollector collector = TopScoreDocCollector.create(TOP_NUM , false);//有变化的地方
	    
	    long start = new Date().getTime();// start time
	    
	    is.search(query, collector);
	    ScoreDoc[] hits = collector.topDocs().scoreDocs;
	
	    
	    for (int i = 0; i < hits.length; i++) {
	    	Document doc = is.doc(hits[i].doc);//new method is.doc()
	    	System.out.println("路径："+doc.get("path")+"---------------------"+doc.getField("path")+"\n创建时间："+doc.get("indexDate"));
	    	
		}
	    long end = new Date().getTime();//end time
	
	    System.out.println("\n找到" + collector.getTotalHits() +
	    	      "个结果，总共花费时间 ： " + (end - start)+"毫秒"
	    	      );
	  }
}

增加索引：

import java.io.File;
import java.io.IOException;
import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/*
 * 添加索引
 * 
 * 
 */

public class addIndex {
	
	
	
	public static void main(String s[]) throws IOException, ParseException
	{
		
		
		Directory dir=FSDirectory.open(new File("C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\index"));
		IndexWriter iw=new IndexWriter(dir, new StandardAnalyzer(
				Version.LUCENE_30), false, IndexWriter.MaxFieldLength.UNLIMITED);//注意在执行删除索引时IndexWriter中的Boolean属性必须为false；否则其他索引会被删除（true为重新建立索引，false为不重新建索引）
		Document doc=new Document();
		
		doc.add(new Field("contents", "java", Field.Store.YES,
				Field.Index.ANALYZED));
		doc.add(new Field("path", "新添加的路径www.baidu.com", Field.Store.YES,
				Field.Index.ANALYZED));
		doc.add(new Field("indexDate", DateTools.dateToString(new Date(),
				DateTools.Resolution.DAY), Field.Store.YES,
				Field.Index.NOT_ANALYZED));
		
		iw.addDocument(doc);
		IndexReader id=IndexReader.open(dir);//
		System.out.println("总共索引数"+id.maxDoc());
		
		iw.commit();
		iw.close();
		System.out.println("删除条数："+id.numDeletedDocs());
	} 

}

删除索引：

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/*
 * 删除索引
 * 
 * 
 */

public class deleteIndex {
	
	
	
	public static void main(String s[]) throws IOException, ParseException
	{
		Directory dir=FSDirectory.open(new File("C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\index"));
		IndexWriter iw=new IndexWriter(dir, new StandardAnalyzer(
				Version.LUCENE_30), false, IndexWriter.MaxFieldLength.UNLIMITED);//注意在执行删除索引时IndexWriter中的Boolean属性必须为false；否则其他索引会被删除（true为重新建立索引，false为不重新建索引）
		
		QueryParser qp =new QueryParser(Version.LUCENE_30,"contents",new StandardAnalyzer(Version.LUCENE_30));
		Query p=qp.parse("新添加的contents");
		iw.deleteDocuments(p);
		IndexReader id=IndexReader.open(dir);//
		System.out.println("总共索引数"+id.maxDoc());
		iw.commit();
		System.out.println("删除条数："+id.numDeletedDocs());
	} 

}

更新索引：

import java.awt.font.OpenType;
import java.io.File;
import java.io.IOException;
import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/*
 * 更新索引
 * 
 * 
 */

public class updateIndex {

	public static void main(String s[]) throws IOException, ParseException {
		Directory dir = FSDirectory.open(new File(
				"C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\index"));
		IndexSearcher is = new IndexSearcher(dir, true);// read-only
		IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(
				Version.LUCENE_30), false, IndexWriter.MaxFieldLength.UNLIMITED);// 注意在执行删除索引时IndexWriter中的Boolean属性必须为false；否则其他索引会被删除（true为重新建立索引，false为不重新建索引）
       //新建要拿来替换的doc
		Document doc = new Document();
		doc.add(new Field("contents", "android", Field.Store.YES,
				Field.Index.ANALYZED));
		doc.add(new Field("path", "新添加的路径www.baidu.com android", Field.Store.YES,
				Field.Index.ANALYZED));
		doc.add(new Field("indexDate", DateTools.dateToString(new Date(),
				DateTools.Resolution.DAY), Field.Store.YES,
				Field.Index.ANALYZED));

		/*
		 * 使用indexwriter中的updatedocument不知道为什么。只添加，不会更新
		 * 
		 * 所以这里只好采用 “先删除，再添加” 的方式就行更新操作了
		 */
		long start = new Date().getTime();// start time
         ///查找要被替换的所有对应的项
		QueryParser qp = new QueryParser(Version.LUCENE_30, "contents",
				new StandardAnalyzer(Version.LUCENE_30));
		Query p = qp.parse("java");

		/*
		 * 查找模块，查找要更新的项
		 */
		TopScoreDocCollector collector = TopScoreDocCollector.create(10, false);// 有变化的地方
		is.search(p, collector);
		ScoreDoc[] hits = collector.topDocs().scoreDocs;
// 
		//删除所有查到的，只添加一条
//		if (hits.length > 0)
//		{// 控制添加，如果有要更新的项，就进行，没有就不进行
//			/*
//			 * 这里有个问题要注意：当搜索出多个要更新的项，，将所有的项都删除了，但是就添加了一个，这里可以用id唯一标示来解决这个问题，
//			 * 也可以用hits数组来循环删除添加，来解决这个问题，如注释1
//			 * 但是呢，对于数据库操作时，基本不会出现这样的问题，因为数据库中有id项，直接对id进行查询就行了，不会出现重复项
//			 */
//			// 删除操作
//			iw.deleteDocuments(p);
//			// 添加操作
//			iw.addDocument(doc);
//
//		}
	 	
		/*注释1  删除几条，添加几条*/
		   if(hits.length > 0){//如果有更新项才进行操作，否则不进行
		   iw.deleteDocuments(p);//删除只执行一次就行，就可以将所有的删除
		  for(int i=0 ; i < hits.length ;i++){
		  
			  iw.addDocument(doc);
			
		  }
		   }
		 
		
		iw.optimize();//优化索引
		iw.close();
		IndexReader id = IndexReader.open(dir);
		System.out.println("总共索引数" + id.maxDoc());
		long end = new Date().getTime();// end time
	   System.out.println("删除条数：" + id.numDeletedDocs() + "总共花费时间："
				+ (end - start));
	}

}

关键字高亮：

import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.Date;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class HighLighter {

	/**
	 * 高亮设置
	 * 
	 * @param query
	 * @param doc
	 * @param field
	 * @return
	 */

	private static String INDEX_DIR = "C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\index";// 索引所在的路径
	private static String KEYWORD = "android";// 关键词
	private static int TOP_NUM = 100;// 显示前100条结果

	private static String toHighlighter(Query query, Document doc, String field)
	{// 关键字高亮显示
		try {
			SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter(
					"<font color=\"red\">", "</font>");
			Highlighter highlighter = new Highlighter(simpleHtmlFormatter,
					new QueryScorer(query));
			StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
			TokenStream tokenStream1 = analyzer.tokenStream("path",
					new StringReader(doc.get(field)));
			String highlighterStr = highlighter.getBestFragment(tokenStream1,
					doc.get(field));
			return highlighterStr == null ? doc.get(field) : highlighterStr;
		} catch (IOException e) {
			// TODO Auto-generated catch block
			System.out.println(e);
		} catch (InvalidTokenOffsetsException e) {
			// TODO Auto-generated catch block
			System.out.println(e);
		}
		return null;
	}

	
	public static void main(String[] args) throws Exception
	{
		File indexDir = new File(INDEX_DIR);
		if (!indexDir.exists() || !indexDir.isDirectory()) {
			throw new Exception(indexDir + " 该目录不存在~~");
		}
		search(indexDir, KEYWORD);// 调用search方法进行查询
	
	}

	/**
	 * 查询
	 * 
	 * @param indexDir
	 * @param q
	 * @throws Exception
	 */
	public static void search(File indexDir, String q) throws Exception
	{
		IndexSearcher is = new IndexSearcher(FSDirectory.open(indexDir), true);// read-only
		String field = "contents";

		QueryParser parser = new QueryParser(Version.LUCENE_30, field,
				new StandardAnalyzer(Version.LUCENE_30));// 有变化的地方
		Query query = parser.parse(q);
		TopScoreDocCollector collector = TopScoreDocCollector.create(TOP_NUM,
				false);// 有变化的地方

		long start = new Date().getTime();// 计算开始时间

		is.search(query, collector);

		ScoreDoc[] hits = collector.topDocs().scoreDocs;
		for (int i = 0; i < hits.length; i++) {
			Document doc = is.doc(hits[i].doc);// new method is.doc()
			// System.out.println("路径："+doc.get("path")+"---------------------"+doc.getField("path")+"\n创建时间："+doc.get("indexDate"));
			System.out.println("高亮----------------"
					+ toHighlighter(query, doc, "path") + "\n 创建时间："
					+ doc.get("indexDate"));
		}

		long end = new Date().getTime();// 计算结束时间

		System.out.println("\n找到" + collector.getTotalHits() + "个结果，总共花费时间 ： "
				+ (end - start) + "毫秒");

	}
	
	

}

以上是我自己整理的，亲自调试过，没什么问题~~~对于更新索引操作我目前还没什么好的办法！！！希望大家批评指正！

在这里把源代码以及架包和索引例子给大家贡献出来~~~~

http://download.csdn.net/detail/zeq9069/6571589