lucene(二) 索引的创建、增删改查

最新推荐文章于 2019-01-17 17:52:34 发布

fan_rockrock

最新推荐文章于 2019-01-17 17:52:34 发布

阅读量1k

点赞数

分类专栏： lucene 文章标签： lucene 索引索引的建立索引的增删改查 luke

本文链接：https://blog.csdn.net/wust__wangfan/article/details/51425525

版权

lucene 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

一、索引的整个知识架构

二、例子一：创建对多个文件的索引并查询

import java.io.File;
import java.io.FileReader;
import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;


public class HelloLucene {
	public static void main(String[]args){
		HelloLucene helloLucene=new HelloLucene();
		helloLucene.index();
		helloLucene.search();
	}
	/** 建立文件索引
	 * @author
	 * @param
	 * @return
	 */
	public void index(){
		IndexWriter writer=null;
		//1、创建Directory
		//Directory directory=new RAMDirectory();//建立在内存中的
		try {
			Directory directory=FSDirectory.open(new File("d:/index"));//建在硬盘上的
			//2、创建IndexWriter
			writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
			//3、创建Document对象
			Document doc=null;
			//4、为Document添加Field
			File f=new File("d:/TestLucene");
			for(File file:f.listFiles()){
				doc=new Document();
				doc.add(new Field("content", new FileReader(file)));
				doc.add(new Field("filename",file.getName(),Field.Store.YES,Field.Index.NOT_ANALYZED));
				doc.add(new Field("path",file.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED));
				writer.addDocument(doc);
			}
			
		} catch (CorruptIndexException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}finally{
			if(writer!=null)
				try {
					writer.close();
				} catch (CorruptIndexException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				} catch (IOException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
		}	
	}
	/** 查询
	 * @author
	 * @param
	 * @return
	 */
	public void search(){
		
		try{
			//1、创建Directory
			Directory directory=FSDirectory.open(new File("d:/index"));
	        //2、创建IndexReader
		    IndexReader reader=IndexReader.open(directory);
		    //3、根据IndexReader创建IndexSearcher
		    IndexSearcher searcher=new IndexSearcher(reader);
		    //4、创建搜索的Query
		    QueryParser parser=new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35));
			Query query=parser.parse("奥运");//查询content字段内容为"奥运"的文件
			//5、根据seacher搜索并且返回TopDocs
			TopDocs tds=searcher.search(query, 10);
			//6、根据TopDocs获取ScoreDoc对象
			ScoreDoc[]sds=tds.scoreDocs;
			for(ScoreDoc sd:sds){
				//7、根据sercher和ScoreDoc对象获取具体的Document对象
				Document d=searcher.doc(sd.doc);
				//8、根据Document对象获取需要的值
				System.out.println(d.get("filename")+"["+d.get("path")+"]");
			}
			//9、关闭reader
			reader.close();
			
		}catch(CorruptIndexException e){
			e.printStackTrace();
		}
		catch (IOException e) {
			e.printStackTrace();
		}
		catch (ParseException e) {
			e.printStackTrace();
		}
	}
}

三、域的说明、使用luke打开创建的索引二进制文件

1、域Field的说明

2、使用luke分析索引文件

创建好索引之后在硬盘中找到如下所示：

使用luke简单看一下各个文件是干什么的：

luke还有其他的功能，这里就不一一介绍了~

四、例子二：索引的增删改查

import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.StaleReaderException;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;


public class IndexUtil {
	private String[] ids = {"1","2","3","4","5","6"};
	private String[] emails = {"aa@itat.org","bb@itat.org","cc@cc.org","dd@sina.org","ee@zttc.edu","ff@itat.org"};
	private String[] contents = {
			"welcome to visited the space,I like book",
			"hello boy, I like pingpeng ball",
			"my name is cc I like game",
			"I like football", 
			"I like football and I like basketball too",
			"I like movie and swim"
	};
	private int[] attachs = {2,3,1,4,5,5};//附件
	private String[] names = {"zhangsan","lisi","john","jetty","mike","jake"};
	private Map<String,Float> scores = new HashMap<String,Float>();//存储权值
	
	private Directory directory=null;
	
	
	public static void main(String[]args){
		IndexUtil iUtil=new IndexUtil();
		iUtil.index();
		iUtil.search();
	}
	/** 构造函数
	 * @author 
	 * @param
	 * @return
	 */
	public IndexUtil(){
		scores.put("itat.org",2.0f);
		scores.put("zttc.edu", 1.5f);
		try {
			//1、创建索引存在的地方
			directory=FSDirectory.open(new File("d:/index"));
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
	
	/** 创建索引
	 * @author 
	 * @param
	 * @return
	 */
	public void index(){
		//2、创建索引
		IndexWriter writer=null;
			try {
				writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
			    //3、创建文档并建立索引(文档相当于二维表中的每一条记录，域相当于表的字段，所以整个索引可以理解为一个二维表)
				Document doc=null;
				for(int i=0;i<ids.length;i++){
					doc = new Document();
					doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
					doc.add(new Field("email",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
					doc.add(new Field("content",contents[i],Field.Store.NO,Field.Index.ANALYZED));
					doc.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
					String et = emails[i].substring(emails[i].lastIndexOf("@")+1);
					System.out.println(et);
					//对文档进行加权处理
//					if(scores.containsKey(et)) {
//						doc.setBoost(scores.get(et));
//					} else {
//						doc.setBoost(0.5f);
//					}
					writer.addDocument(doc);
				}
			} catch (CorruptIndexException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (LockObtainFailedException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}finally{
				if(writer!=null)
					try {
						writer.close();
					} catch (CorruptIndexException e) {
						// TODO Auto-generated catch block
						e.printStackTrace();
					} catch (IOException e) {
						// TODO Auto-generated catch block
						e.printStackTrace();
					}
			}
	}
	
	/** 查询索引基本信息
	 * @author 
	 * @param
	 * @return
	 */ 
	public void query() {
		try {
			IndexReader reader = IndexReader.open(directory);
			//通过reader可以有效的获取到文档的数量
			System.out.println("numDocs:"+reader.numDocs());
			System.out.println("maxDocs:"+reader.maxDoc());
			System.out.println("deleteDocs:"+reader.numDeletedDocs());
			reader.close();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();		}
	}
	/** 使用TermQuery具体查询
	 * @author 
	 * @param
	 * @return
	 */ 
	public void search() {
		try {
			IndexReader reader = IndexReader.open(directory);
			IndexSearcher searcher = new IndexSearcher(reader);
			TermQuery query = new TermQuery(new Term("content","like"));
			TopDocs tds = searcher.search(query, 10);
			for(ScoreDoc sd:tds.scoreDocs) {
				Document doc = searcher.doc(sd.doc);
				System.out.println("("+sd.doc+"-"+doc.getBoost()+"-"+sd.score+")"+
						doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id"));
			}
			reader.close();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	/** 删除索引
	 * @author 
	 * @param
	 * @return
	 */ 
	public void delete() {
		IndexWriter writer = null;
		try {
			writer = new IndexWriter(directory,
					new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
			//参数是一个选项，可以是一个Query，也可以是一个term，term是一个精确查找的值
			//此时删除的文档并不会被完全删除，而是存储在一个回收站中的，可以恢复
			writer.deleteDocuments(new Term("id","1"));
			writer.commit();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			try {
				if(writer!=null) writer.close();
			} catch (CorruptIndexException e) {
				e.printStackTrace();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
	/** 恢复删除的索引
	 * @author 
	 * @param
	 * @return
	 */ 
	public void undelete() {
		//使用IndexReader进行恢复
		try {
			IndexReader reader = IndexReader.open(directory,false);
			//恢复时，必须把IndexReader的只读(readOnly)设置为false
			reader.undeleteAll();
			reader.close();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (StaleReaderException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	/** 强制删除回收站中的索引
	 * @author 
	 * @param
	 * @return
	 */ 
	public void forceDelete() {
		IndexWriter writer = null;
		
		try {
			writer = new IndexWriter(directory,
					new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
			writer.forceMergeDeletes();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			try {
				if(writer!=null) writer.close();
			} catch (CorruptIndexException e) {
				e.printStackTrace();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
	/** 索引的更新
	 * @author 
	 * @param
	 * @return
	 */ 
	public void update() {
		IndexWriter writer = null;
		try {
			writer = new IndexWriter(directory,
					new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
			/*
			 * Lucene并没有提供更新，这里的更新操作其实是如下两个操作的合集
			 * 先删除之后再添加
			 */
			Document doc = new Document();
			doc.add(new Field("id","11",Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
			doc.add(new Field("email",emails[0],Field.Store.YES,Field.Index.NOT_ANALYZED));
			doc.add(new Field("content",contents[0],Field.Store.NO,Field.Index.ANALYZED));
			doc.add(new Field("name",names[0],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
			writer.updateDocument(new Term("id","1"), doc);
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			try {
				if(writer!=null) writer.close();
			} catch (CorruptIndexException e) {
				e.printStackTrace();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
}