java Lucene 工具类

最新推荐文章于 2024-10-15 10:14:15 发布

limon758

最新推荐文章于 2024-10-15 10:14:15 发布

阅读量147

点赞数

文章标签： lucene 索引分词搜索查询

本文链接：https://blog.csdn.net/heisetoufa/article/details/84474114

版权

IndeSearchFiles



import java.io.File;
import java.io.IOException;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.junit.Test;

public class IndeSearchFiles {

	/**
	 * 创建索引
	 * 
	 * @throws IOException
	 * @throws CorruptIndexException
	 */
	@Test
	public void createIndex() throws Exception {

		// 操作增，删,改索引库的
		IndexWriter writer = LuceneUtils.createIndexWriter(OpenMode.CREATE);
		// 数据源的位置
		File sourceFile = LuceneUtils.createSourceFile();
		System.out.println("文件路径：" + sourceFile.getAbsolutePath());
		// 进行写入文档
		Document doc = new Document();
		doc.add(new Field("name", sourceFile.getName(), Field.Store.YES,
				Field.Index.ANALYZED_NO_NORMS));
		// 文件路径
		Field pathField = new Field("path", sourceFile.getPath(),
				Field.Store.YES, Field.Index.NO);
		pathField
				.setIndexOptions(org.apache.lucene.index.FieldInfo.IndexOptions.DOCS_ONLY);
		doc.add(pathField);
		// 文件最后修改时间
		doc.add(new Field("modified",
				String.valueOf(sourceFile.lastModified()), Field.Store.YES,
				Field.Index.NO));
		// 添加文件内容
		String content = LuceneUtils.readFileContext(sourceFile);
		System.out.println("content: " + content);
		doc.add(new Field("contents", content, Field.Store.YES,
				Field.Index.ANALYZED));
		// 以下是官网的实现
		/*
		 * FileInputStream fis = new FileInputStream(sourceFile); doc.add(new
		 * Field("contents", new BufferedReader(new InputStreamReader(fis,
		 * "UTF-8"))));
		 */

		if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
			writer.addDocument(doc);
		} else {
			writer.updateDocument(new Term("path", sourceFile.getPath()), doc);
		}
		// 释放资源
		writer.close();
		// fis.close();

	}

	/***************************************************************************
	 * 搜索
	 */
	@Test
	public void search() throws Exception {

		// 查询的字符串:输入不存在的字符串是查询不到的,如：中国
		String queryString = "Lucene";
		// 查询字段集合
		String[] queryFileds = { "contents" };
		IndexSearcher searcher = LuceneUtils.createIndexSearcher();
		Query query = LuceneUtils.createQuery(queryFileds, queryString);
		// 在搜索器中进行查询
		// 对查询内容进行过滤
		Filter filter = null;
		// 一次在索引器查询多少条数据
		int queryCount = 100;
		TopDocs results = searcher.search(query, filter, queryCount);
		System.out.println("总符合: " + results.totalHits + "条数！");

		// 显示记录
		for (ScoreDoc sr : results.scoreDocs) {
			// 文档编号
			int docID = sr.doc;
			// 真正的内容
			Document doc = searcher.doc(docID);
			System.out.println("name = " + doc.get("name"));
			System.out.println("path = " + doc.get("path"));
			System.out.println("modified = " + doc.get("modified"));
			System.out.println("contents = " + doc.get("contents"));
		}
	}
}

LuceneUtils

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class LuceneUtils {

	// 当前目录位置
	public static final String USERDIR = System.getProperty("user.dir");
	// 存放索引的目录
	private static final String INDEXPATH = USERDIR + File.separator + "index";
	// 数据源
	private static final String INDEXSOURCE = USERDIR + File.separator
			+ "source" + File.separator + "lucene.txt";
	// 使用版本
	public static final Version version = Version.LUCENE_35;

	/**
	 * 获取分词器
	 */
	public static Analyzer getAnalyzer() {
		// 分词器
		Analyzer analyzer = new StandardAnalyzer(version);
		return analyzer;
	}

	/**
	 * 创建一个索引器的操作类
	 * 
	 * @param openMode
	 * @return
	 * @throws Exception
	 */
	public static IndexWriter createIndexWriter(OpenMode openMode)
			throws Exception {
		// 索引存放位置设置
		Directory dir = FSDirectory.open(new File(INDEXPATH));
		// 索引配置类设置
		IndexWriterConfig iwc = new IndexWriterConfig(version, getAnalyzer());
		iwc.setOpenMode(openMode);
		IndexWriter writer = new IndexWriter(dir, iwc);
		return writer;
	}

	/***************************************************************************
	 * 创建一个搜索的索引器
	 * 
	 * @throws IOException
	 * @throws CorruptIndexException
	 */
	public static IndexSearcher createIndexSearcher()
			throws CorruptIndexException, IOException {
		IndexReader reader = IndexReader.open(FSDirectory.open(new File(
				INDEXPATH)));
		IndexSearcher searcher = new IndexSearcher(reader);
		return searcher;
	}

	/**
	 * 创建一个查询器
	 * 
	 * @param queryFileds
	 *            在哪些字段上进行查询
	 * @param queryString
	 *            查询内容
	 * @return
	 * @throws ParseException
	 */
	public static Query createQuery(String[] queryFileds, String queryString)
			throws ParseException {
		QueryParser parser = new MultiFieldQueryParser(version, queryFileds,
				getAnalyzer());
		Query query = parser.parse(queryString);
		return query;
	}

	/***************************************************************************
	 * 读取文件内容
	 */
	public static String readFileContext(File file) {
		try {
			BufferedReader br = new BufferedReader(new InputStreamReader(
					new FileInputStream(file)));
			StringBuilder content = new StringBuilder();
			for (String line = null; (line = br.readLine()) != null;) {
				content.append(line).append("\n");
			}
			return content.toString();
		} catch (Exception e) {
			throw new RuntimeException(e);
		}

	}

	public static void main(String[] args) {

		System.out.println(Thread.currentThread().getContextClassLoader()
				.getResource(""));
		System.out.println(LuceneUtils.class.getClassLoader().getResource(""));
		System.out.println(ClassLoader.getSystemResource(""));
		System.out.println(LuceneUtils.class.getResource(""));
		System.out.println(LuceneUtils.class.getResource("/")); // Class文件所在路径
		System.out.println(new File("/").getAbsolutePath());
		System.out.println(System.getProperty("user.dir"));
	}

	/**
	 * 创建索引的数据源
	 * 
	 * @return
	 */
	public static File createSourceFile() {
		File file = new File(INDEXSOURCE);
		return file;
	}

}