lucene 学习笔记

最新推荐文章于 2018-03-27 23:17:16 发布

Ping_QC

最新推荐文章于 2018-03-27 23:17:16 发布

阅读量2.1k

点赞数

分类专栏： lucene java 文章标签： lucene exception file string query url

本文链接：https://blog.csdn.net/Ping_QC/article/details/7444305

版权

java 同时被 2 个专栏收录

18 篇文章 0 订阅

订阅专栏

lucene

4 篇文章 0 订阅

订阅专栏

package com.test.lucene;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;

public class Main {

	private static final Version version = Version.LUCENE_35;
	private static final Analyzer analyzer = new StandardAnalyzer(version);
	private static final File indexDir = new File("E:/lucene/index");
	private static final File dataDir = new File("E:/lucene/data");

	/**
	 * 建立索引
	 * 
	 * @throws Exception
	 */
	public void index() throws Exception {
		IndexWriter writer = getIndexWriter();
		File[] files = dataDir.listFiles();
		for (File file : files) {
			if (file.isDirectory()) {// 略过文件夹
				continue;
			}
			Document doc = getDoc(file);
			writer.addDocument(doc);
		}
		writer.close();
	}

	private IndexWriter getIndexWriter() {
		IndexWriterConfig iwc = null;
		IndexWriter writer = null;
		try {
			Directory dir = null;
			dir = FSDirectory.open(indexDir);// 索引文件保存在文件系统上， 存在io操作， 速度较慢
			dir = new RAMDirectory();// 放在内存里， 速度快， 没有io操作， 但是程序一退出， 就没有了

			// 可以结合以上两个优点：索引存放在文件系统上， 程序启动时， 把索引库读到内存，
			// 程序退出时， 把经过增删改的索引库保存会硬盘上
			// 1.启动时读入

			writer = new IndexWriter(dir, iwc);
		} catch (Exception e) {
			e.printStackTrace();
		}
		return writer;
	}

	/**
	 * 测试两种存放方式：
	 * 
	 * <pre>
	 * 1.启动时从filesystem加载索引到ram 
	 * 2.对ram中的索引进行增删改查
	 * 3.退出时保存：从ram到filesystem
	 * </pre>
	 */
	@Test
	public void test() {
		IndexWriterConfig iwc1 = null;
		IndexWriterConfig iwc2 = null;
		IndexWriter fsWriter = null;
		IndexWriter ramWriter = null;
		// indexWriterConfig 不能用两次： the object cannot be set twice!
		iwc1 = new IndexWriterConfig(version, analyzer);
		iwc1.setOpenMode(OpenMode.CREATE_OR_APPEND);// ram中，添加文档，使用创建或追加
		iwc2 = new IndexWriterConfig(version, analyzer);
		iwc2.setOpenMode(OpenMode.CREATE);// 因为是从ram中写入， ram中保存的是最新的， 所以直接创建
		try {
			Directory fsDir = FSDirectory.open(indexDir);
			Directory ramDir = new RAMDirectory(fsDir);// 从systemfile加载
			ramWriter = new IndexWriter(ramDir, iwc2);// 操作内存索引的writer
			ramWriter.addDocument(getDoc(new File("E:/lucene/data/test")));// 直接添加，方便点
			ramWriter.commit();
			ramWriter.close();// 关闭后才能把ram中最新的索引写回systemfile

			fsWriter = new IndexWriter(fsDir, iwc1);// 操作硬盘索引的writer
			fsWriter.addIndexes(ramDir);
			fsWriter.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	private Document getDoc(File file) {
		Document doc = new Document();
		/**
		 * 网页搜索时有：url地址， 标题， 内容等， 而通常不需要通过url进行搜索， 但是url还是得存起来，
		 * 这时需要用Field.Store.YES， Field.Index.NOT_ANALYZED <br/>
		 * 索引
		 * 
		 * <pre>
		 * +--+不索引 
		 * +--+索引
		 * 		+---+分词 
		 * 		+---+不分词
		 * </pre>
		 */

		Field name = new Field("name", file.getName(), Field.Store.YES,
				Field.Index.ANALYZED);// 索引
		Field size = new Field("size", String.valueOf(file.length()),
				Field.Store.YES, Field.Index.NOT_ANALYZED);
		Field content = new Field("content", readFile(file), Field.Store.YES,
				Field.Index.ANALYZED);
		doc.add(name);
		doc.add(size);
		doc.add(content);
		return doc;
	}

	private String readFile(File file) {
		StringBuffer content = new StringBuffer();
		String line = "";
		BufferedReader reader = null;
		try {
			reader = new BufferedReader(new InputStreamReader(
					new FileInputStream(file)));
			while ((line = reader.readLine()) != null) {
				content.append(line).append("\n");
			}
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return content.toString();
	}

	private Query getQuery(String fieldName, String key) throws Exception {// 单个field中索引
		QueryParser parser = new QueryParser(version, fieldName, analyzer);
		return parser.parse(key);
	}

	private Query getQuery(String[] fields, String key) throws Exception {// 多个field中检索
		QueryParser parser = new MultiFieldQueryParser(version, fields,
				analyzer);
		return parser.parse(key);
	}

	@Test
	public void search() throws Exception {
		IndexReader reader = IndexReader.open(FSDirectory.open(indexDir));// 存放在文件系统
		// IndexReader reader = IndexReader.open(new RAMDirectory());// 存放在内存
		IndexSearcher searcher = new IndexSearcher(reader);

		Query query = getQuery(new String[] { "name", "content" }, "test");
		TopDocs hits = searcher.search(query, 100);// 100 是搜索最大记录数， 不是分页用的， 搞错了
		int total = hits.totalHits;
		if (total > 0) {
			System.out.println("共找到" + total + "条记录");
		} else {
			System.out.println("没有找到记录");
		}
		ScoreDoc[] scoreDocs = hits.scoreDocs;
		int start = 0;
		int end = hits.totalHits;
		// for (ScoreDoc doc : hits.scoreDocs) {//这样不便分页
		for (int i = start; i < end; i++) {// 可以分页
			int sn = scoreDocs[i].doc;// 相当于获取主键，
			Document document = searcher.doc(sn);// 根据主键获取文档
			print(document);
		}
		searcher.close();
	}

	private void print(Document doc) {
		System.out
				.println("--------------------------------------------------");
		System.out.println("name   :" + doc.get("name"));
		System.out.println("size   :" + doc.get("size"));
		System.out.println("content:\n" + doc.get("content"));
	}
}

Ping_QC

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
lucene 学习笔记

package com.test.lucene;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.IOException;import java.io.InputStre
复制链接

扫一扫

专栏目录