Lucene入门实例(lucene3.0)

最新推荐文章于 2024-09-19 15:10:23 发布

yuyiming1986

最新推荐文章于 2024-09-19 15:10:23 发布

阅读量137

点赞数

分类专栏： lucene 文章标签： lucene 实例

本文链接：https://blog.csdn.net/yuyiming1986/article/details/84093254

版权

lucene 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

[b]IndexFile.java用于将目录下的文件进行索引[/b]
(需要导入lucene-core-3.0.3.jar包)

package cn.edu.uestc.lucene;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;

public class IndexFile {


	private static String sourceFilePath = "C:\\source";
	private static String indexFilePath = "C:\\index";

	//索引filePath目录下的所有文件
	public static void indexDox(IndexWriter writer, String filePath)
			throws CorruptIndexException, IOException {

		if (null == filePath || null == writer)
			return;

		File file = new File(filePath);

		if (file.exists() && file.canRead() && file.isDirectory()) {

			File files[] = file.listFiles();

			for (File f : files) {

				Document doc = new Document();

				//索引文档的存放路径
				doc.add(new Field("path", f.getPath(), Field.Store.YES,
						Field.Index.NOT_ANALYZED));

				//索引文档的修改时间
				doc.add(new Field("modified", DateTools.timeToString(f
						.lastModified(), DateTools.Resolution.MINUTE),
						Field.Store.YES, Field.Index.NOT_ANALYZED));
                //索引文档的内容
				doc.add(new Field("contents", new FileReader(f)));

				//向索引中添加文档
				writer.addDocument(doc);

			}
		}
	}


	public static void main(String[] args) {


		IndexWriter writer = null;
		long startTime = 0l, endTime = 0l;

		try {

			//1.创建索引写入对象writer
			//指定创建索引存放位置FSDirectory.open(new File(sourceFilePath)
			//指定创建索引过程使用的StandAnalyzer分词器进行分词
			//指定创建新的索引true, false为覆盖已有的索引
			//指定writer最大的分词数为IndexWriter.MaxFieldLength.LIMITED
			writer = new IndexWriter(FSDirectory.open(new File(sourceFilePath)), 
					          new StandardAnalyzer(Version.LUCENE_30),
					          true, IndexWriter.MaxFieldLength.LIMITED);


		     System.out.println("Indexing to directory '" +sourceFilePath+ "'...");
		     startTime = new Date().getTime();

		     //2.添加文件进行索引
		     indexDox(writer, indexFilePath);

		     System.out.println("Optimizing...");

		     //3.对索引过程进行优化处理
		     writer.optimize();

		     //4.索引完成后关闭IndexWriter
		     writer.close();

		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}

		endTime = new Date().getTime();
		System.out.println("It takes " + (endTime - startTime) 
                + " milliseconds to create index for the files in directory "
		           + sourceFilePath);    
	}
}

SearchFile.java在索引中搜索查询词[/b]

package cn.edu.uestc.lucene;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class SearchFile {

	private static String indexFilePath = "C:\\index";
	private static String queryString = "lucene";

	public static void main(String[] args) {


        IndexSearcher searcher = null;
        QueryParser  parser = null;
        Query query = null;
        TopDocs hits = null;
        long startTime = 0L, endTime = 0L;

		try {


			//1.打开索引文件,索引文件的位置是磁盘文件系统上的indexFilePath
			searcher = new IndexSearcher(FSDirectory.open(new File(indexFilePath)));


			//2.指定StandardAnalyzer解析查询字符串,查询的Field的域是"contents",而查询的字符串是queryString
			parser = new QueryParser(Version.LUCENE_30, 
	                  "contents",
	                  new StandardAnalyzer(Version.LUCENE_30));

			query = parser.parse(queryString);


			//3.搜索索引，返回结果保存在TopDocs当中
			startTime = System.currentTimeMillis();
	        hits = searcher.search(query, 5);
	        endTime = System.currentTimeMillis();


		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} catch (ParseException e) {
			e.printStackTrace();
		}



		//4.显示结果,打印匹配文档的路径
		System.out.println("Found "+hits.totalHits+" documents (in "+(endTime - startTime)
				+" milliseconds) that match query '"+queryString+"':");

		for(ScoreDoc tdoc:hits.scoreDocs){

			Document doc = null;

			try {
				doc = searcher.doc(tdoc.doc);
			} catch (CorruptIndexException e) {
				e.printStackTrace();
			} catch (IOException e) {
				e.printStackTrace();
			}

			System.out.println(doc.get("path"));
		}	



		//5.完成搜索后关闭IndexSearch
		try {
			searcher.close();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
}