luence之全文检索

最新推荐文章于 2021-06-21 21:43:24 发布

zhenlai2012

最新推荐文章于 2021-06-21 21:43:24 发布

阅读量870

点赞数

分类专栏： lucene 文章标签： luence之全文检索

本文链接：https://blog.csdn.net/zhenlai2012/article/details/10002483

版权

lucene 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

1.简介：

lucene是一个设计非常优秀的软件，优秀在于简单易用，它屏蔽了复杂的实现过程，只要使用它提供的一些类和相应的api就能进行全文检索，并提供了hits分页功能。是不是很激动？现在我们就一起走进luence吧。

2.下面看一个集成lucene经典案例图

3.有人到这时会问lucene到底会做些什么呢：

其实使用lucene可以在应用程序中添加索引和搜索功能，例如百度，这样可以提高文件检索效率

4.现在就做一个lucene的创建索引和搜索的示例

a.首先建一个普通的javaproject

b.其次在官网下载lucene包把lucene-analyzers-common-4.4.0.jar、lucene-core-4.4.0.jar、lucene-queryparser-4.4.0.jar三个jar导入项目

c.最后献上示例代码

package luence;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;



public class CreateIndex {
	/**
	 * dir目录
	 */
	private static Directory dir;
	
	/**
	 * 索引文件存放位置
	 */
	private static final String  pathFile = "E:\\luence";
	
	
	/**
	 * 创建索引 获得IndexWriter对象
	 */
	protected static IndexWriter getWriter() throws Exception {
		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
		IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40,
				analyzer);
		return new IndexWriter(dir, iwc);
	}
	
	/**
	 * 创建索引
	 * @param map
	 */
	public static void createIndex(Map<String, Object> map) {
		System.out.println("createIndex进");
		String text = "";
		System.out.println("createIndexpath=="+pathFile);
		File file = new File(pathFile);
		IndexWriter writer = null;
		try {
			if(!file.isDirectory()){
				file.mkdirs();
			}
			dir = FSDirectory.open(new File(pathFile));
			writer = getWriter();
			// 定义搜索体
			if (map != null) {
				Document doc = new Document();
				for (Entry<String, Object> e : map.entrySet()) {
					text += e.getValue() + " ";
					doc.add(new TextField(e.getKey(), e.getValue().toString(),
							Store.YES));
				}
				doc.add(new TextField("content", text, Store.YES));//content为收缩域
				writer.addDocument(doc);
			}
			System.out.println("content"+"=" + text);
			System.out.println("init ok?");
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				if (writer != null) {
					// 关闭索引
					writer.close();
				}
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
	
	/**
	 *查询，这里返回的是map集合
	 */
	public static List<Map<String, Object>> createSearch(String content,String searchIndex,String[] strArr) {
		System.out.println("createSearch 进");
		// 定义索引路径
		System.out.println("createSearchpath="+pathFile);
		Directory dir = null;
		IndexReader reader = null;
		List<Map<String, Object>> retuList = new ArrayList<Map<String, Object>>();// 返回结果集
		try {
			dir = FSDirectory.open(new File(pathFile));
			reader = DirectoryReader.open(dir);
			IndexSearcher searcher = new IndexSearcher(reader);
			Term term = new Term(content, searchIndex);
			System.out.println("sarchContent=" + content);
			// 创建模糊查询（还有很多种查询方式这里不再介绍，感兴趣的哥们可以查看相关资料《luence in action》）
			Query query = new FuzzyQuery(term);
			TopDocs topdocs = searcher.search(query, 100);//显示队列的Size为100
			ScoreDoc[] scoreDocs = topdocs.scoreDocs;
			System.out.println("查询结果总数:" + topdocs.totalHits);
			for (int i = 0; i < scoreDocs.length; i++) {
				Map<String, Object> retuMap = new HashMap<String, Object>();// 查询结果map
				int doc = scoreDocs[i].doc;
				Document document = searcher.doc(doc);
				if(strArr != null){
					for (String para : strArr) {
						retuMap.put(para, document.get(para));
						//打印出搜索值（测试）
						System.out.println(para + "=" + document.get(para));
					}
				}
				retuList.add(retuMap);
			}
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if (reader != null) {
				try {
					reader.close();//关闭流
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
		return retuList;
	}
	
	public static void main(String[] args) {
		//一组要创建索引的对象
		List<Map<String,Object>> listMap = new ArrayList<Map<String,Object>>();
		for (int i = 0; i < 6; i++) {
			//初始化对象（这里对象用map集合封装）
			Map<String,Object> map = new HashMap<String, Object>();
			map.put("code", "code" + i);
			map.put("city", "city" + i);
			listMap.add(map);
		}
		//批量创建索引
        for (Map<String, Object> map2 : listMap) {
        	createIndex(map2);
		}
        //查询
        String[] strArr = {"code","city"};
        System.out.println(createSearch("content", "code", strArr));
	}
}

5.把上面代码贴在项目测试即可