Lucene的创建和查询

最新推荐文章于 2024-11-12 10:47:45 发布

Grit_ICPC

最新推荐文章于 2024-11-12 10:47:45 发布

阅读量382

点赞数

分类专栏： Lucene&solr 文章标签： lucene

本文链接：https://blog.csdn.net/Grit_ICPC/article/details/70312033

版权

Lucene&solr 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

Lucene创建的包主要有：

1.lucene-analyzer

2.lucene-core

3.lucene-hightlighter

4.lucene-memory

代码：

package lucene.bean;

public class Article {
	private Long id;
	private String title;
	private String context;
	public Long getId() {
		return id;
	}
	public void setId(Long id) {
		this.id = id;
	}
	public String getTitle() {
		return title;
	}
	public void setTitle(String title) {
		this.title = title;
	}
	public String getContext() {
		return context;
	}
	public void setContext(String context) {
		this.context = context;
	}
	
}




package lucene.bean;

import java.awt.List;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
import org.junit.Test;

/**
 * 1.创建article对象，把该对象放入索引库中
 * 2.从索引库中将article对象检索出来
 * @author Dqd
 *
 */
public class Helloworld {
	/**
	 * 创建索引
	 * @throws IOException 
	 * @throws LockObtainFailedException 
	 * @throws CorruptIndexException 
	 */
	
	@Test
	public void testCreateIndex() throws CorruptIndexException, LockObtainFailedException, IOException{
		/**
		 * 1.创建article对象
		 * 2.把article对象转化成document对象
		 * 3.创建一个IndexWriter对象
		 * 4.把document对象加入到索引库中
		 */
		Article article = new Article();
		article.setId(1L);
		article.setTitle("Lucene");
		article.setContext("我的第一个Lucene程序");
		
		Document document = new Document();
		/**
		 * 第一个参数为放到索引库中的name名称
		 * ..二..放入索引库中的value
		 * ..三.....Store:
		 * 		YES：该字段向内容库存储
		 * 		NO:不存储
		 * ..四.....Index:
		 * 		No:不向目录库中存储
		 * 		NOT_ANALYZED：存储但是不分词
		 *  	ANALYZED：存储分词
		 * 
		 * 
		 */
		Field idField = new Field("id", article.getId().toString(),
				Store.YES,Index.NOT_ANALYZED);
		Field titleField = new Field("title", article.getTitle()
				,Store.YES,Index.ANALYZED);
		
		Field contentField = new Field("content", article.getContext()
				,Store.YES,Index.ANALYZED);
		
		document.add(idField);
		document.add(titleField);
		document.add(contentField);
	
		
		//索引库
		Directory d = FSDirectory.open(new File("./indexDir"));
		//分词器
		Analyzer a = new StandardAnalyzer(Version.LUCENE_30);
		/**
		 * d，指向索引库的路径
		 * a,分词器把title，content的内容分词后的内容放入目录库中
		 * MaxFieldLength，限制每一个字段往索引库中存储大小
		 */
		IndexWriter indexWriter = new IndexWriter(d, a,MaxFieldLength.LIMITED);
		try {
			indexWriter.addDocument(document);
			indexWriter.commit();
			indexWriter.close();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
		
	}
	
	@Test
	public void testSearchIndex() throws IOException, ParseException{
		//索引库
		Directory d = FSDirectory.open(new File("./indexDir"));
		IndexSearcher indexSearcher = new 
				IndexSearcher(d);
		
		Analyzer a = new StandardAnalyzer(Version.LUCENE_30);
		QueryParser queryParser = new QueryParser(Version.LUCENE_30,
				"content", a);
		Query query2 = queryParser.parse("lucene");
		TopDocs topDocs = indexSearcher.search(query2, 1);
	
		int count = topDocs.totalHits;
		
		ScoreDoc[] scoreDocs = topDocs.scoreDocs;
		
		ArrayList<Article> list = new ArrayList<Article>();
		for(ScoreDoc scoreDoc:scoreDocs ){
			//相关度得分
			float score = scoreDoc.score;
			Document document = indexSearcher.doc(scoreDoc.doc);
			Article article = new Article();
			article.setId(Long.parseLong(document.get("id")));
			article.setTitle(document.get("title"));
			article.setContext(document.get("content"));
			list.add(article);
		}
		
		for(Article article: list){
			System.out.println(article.getId());
			System.out.println(article.getTitle());
			System.out.println(article.getContext());
		}
		
	}
	
}

一、其中IndexWriter在向索引库中写索引的时候，会上锁，既在硬盘文件上会有一个work.lock的文件

如果其他的IndexWriter想进行访问的话，必须将上一个IndexWriter对象关闭

二、对索引库的优化，IndexWriter对象有optimize()方法，将多个索引文件合并成一个

三、为了提高操作效率可以同时定义内存索引库和文件索引库进行操作