lucene 学习笔记

4 篇文章 0 订阅
package com.test.lucene;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;

public class Main {

	private static final Version version = Version.LUCENE_35;
	private static final Analyzer analyzer = new StandardAnalyzer(version);
	private static final File indexDir = new File("E:/lucene/index");
	private static final File dataDir = new File("E:/lucene/data");

	/**
	 * 建立索引
	 * 
	 * @throws Exception
	 */
	public void index() throws Exception {
		IndexWriter writer = getIndexWriter();
		File[] files = dataDir.listFiles();
		for (File file : files) {
			if (file.isDirectory()) {// 略过文件夹
				continue;
			}
			Document doc = getDoc(file);
			writer.addDocument(doc);
		}
		writer.close();
	}

	private IndexWriter getIndexWriter() {
		IndexWriterConfig iwc = null;
		IndexWriter writer = null;
		try {
			Directory dir = null;
			dir = FSDirectory.open(indexDir);// 索引文件保存在文件系统上, 存在io操作, 速度较慢
			dir = new RAMDirectory();// 放在内存里, 速度快, 没有io操作, 但是程序一退出, 就没有了

			// 可以结合以上两个优点:索引存放在文件系统上, 程序启动时, 把索引库读到内存,
			// 程序退出时, 把经过增删改的索引库保存会硬盘上
			// 1.启动时读入

			writer = new IndexWriter(dir, iwc);
		} catch (Exception e) {
			e.printStackTrace();
		}
		return writer;
	}

	/**
	 * 测试两种存放方式:
	 * 
	 * <pre>
	 * 1.启动时从filesystem加载索引到ram 
	 * 2.对ram中的索引进行增删改查
	 * 3.退出时保存:从ram到filesystem
	 * </pre>
	 */
	@Test
	public void test() {
		IndexWriterConfig iwc1 = null;
		IndexWriterConfig iwc2 = null;
		IndexWriter fsWriter = null;
		IndexWriter ramWriter = null;
		// indexWriterConfig 不能用两次: the object cannot be set twice!
		iwc1 = new IndexWriterConfig(version, analyzer);
		iwc1.setOpenMode(OpenMode.CREATE_OR_APPEND);// ram中,添加文档,使用创建或追加
		iwc2 = new IndexWriterConfig(version, analyzer);
		iwc2.setOpenMode(OpenMode.CREATE);// 因为是从ram中写入, ram中保存的是最新的, 所以直接创建
		try {
			Directory fsDir = FSDirectory.open(indexDir);
			Directory ramDir = new RAMDirectory(fsDir);// 从systemfile加载
			ramWriter = new IndexWriter(ramDir, iwc2);// 操作内存索引的writer
			ramWriter.addDocument(getDoc(new File("E:/lucene/data/test")));// 直接添加,方便点
			ramWriter.commit();
			ramWriter.close();// 关闭后才能把ram中最新的索引写回systemfile

			fsWriter = new IndexWriter(fsDir, iwc1);// 操作硬盘索引的writer
			fsWriter.addIndexes(ramDir);
			fsWriter.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	private Document getDoc(File file) {
		Document doc = new Document();
		/**
		 * 网页搜索时有:url地址, 标题, 内容等, 而通常不需要通过url进行搜索, 但是url还是得存起来,
		 * 这时需要用Field.Store.YES, Field.Index.NOT_ANALYZED <br/>
		 * 索引
		 * 
		 * <pre>
		 * +--+不索引 
		 * +--+索引
		 * 		+---+分词 
		 * 		+---+不分词
		 * </pre>
		 */

		Field name = new Field("name", file.getName(), Field.Store.YES,
				Field.Index.ANALYZED);// 索引
		Field size = new Field("size", String.valueOf(file.length()),
				Field.Store.YES, Field.Index.NOT_ANALYZED);
		Field content = new Field("content", readFile(file), Field.Store.YES,
				Field.Index.ANALYZED);
		doc.add(name);
		doc.add(size);
		doc.add(content);
		return doc;
	}

	private String readFile(File file) {
		StringBuffer content = new StringBuffer();
		String line = "";
		BufferedReader reader = null;
		try {
			reader = new BufferedReader(new InputStreamReader(
					new FileInputStream(file)));
			while ((line = reader.readLine()) != null) {
				content.append(line).append("\n");
			}
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return content.toString();
	}

	private Query getQuery(String fieldName, String key) throws Exception {// 单个field中索引
		QueryParser parser = new QueryParser(version, fieldName, analyzer);
		return parser.parse(key);
	}

	private Query getQuery(String[] fields, String key) throws Exception {// 多个field中检索
		QueryParser parser = new MultiFieldQueryParser(version, fields,
				analyzer);
		return parser.parse(key);
	}

	@Test
	public void search() throws Exception {
		IndexReader reader = IndexReader.open(FSDirectory.open(indexDir));// 存放在文件系统
		// IndexReader reader = IndexReader.open(new RAMDirectory());// 存放在内存
		IndexSearcher searcher = new IndexSearcher(reader);

		Query query = getQuery(new String[] { "name", "content" }, "test");
		TopDocs hits = searcher.search(query, 100);// 100 是搜索最大记录数, 不是分页用的, 搞错了
		int total = hits.totalHits;
		if (total > 0) {
			System.out.println("共找到" + total + "条记录");
		} else {
			System.out.println("没有找到记录");
		}
		ScoreDoc[] scoreDocs = hits.scoreDocs;
		int start = 0;
		int end = hits.totalHits;
		// for (ScoreDoc doc : hits.scoreDocs) {//这样不便分页
		for (int i = start; i < end; i++) {// 可以分页
			int sn = scoreDocs[i].doc;// 相当于获取主键,
			Document document = searcher.doc(sn);// 根据主键获取文档
			print(document);
		}
		searcher.close();
	}

	private void print(Document doc) {
		System.out
				.println("--------------------------------------------------");
		System.out.println("name   :" + doc.get("name"));
		System.out.println("size   :" + doc.get("size"));
		System.out.println("content:\n" + doc.get("content"));
	}
}


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值