lucene4.0入门1

最新推荐文章于 2024-10-11 17:30:36 发布

weixin_33963189

最新推荐文章于 2024-10-11 17:30:36 发布

阅读量52

点赞数

文章标签： java python 数据库

原文链接：https://my.oschina.net/91jason/blog/342285

版权

2019独角兽企业重金招聘Python工程师标准>>>

Lucene主要分为三大块：

1、创建索引

2、分词

3、读取并查询索引

前提：由于本人目前看的是3.5的视频材料，所以可能里面有的写法还是3.5的写法，我能保证demo能跑通，能运营，能明白意思。

发现在lucene3.5里，lucene的主要jar都在core包里，但是在4.0以后好像被拆分成了多个jar，需要根据项目需要，一点一点自己往上加jar包。

所用jar: lucene 4.10.2

教学视频：lucene 3.5

依赖的jar包：

入门代码1：

package com.test;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class HelloLucene2 {
	@SuppressWarnings("deprecation")
	public void index() throws Exception {
		
		
		//创建directory，主要为了存储索引存放的路径
		//Directory directory = new RAMDirectory();// 建立在内存中
		Directory fsddirectory = FSDirectory.open(new File(
				"D:/lucene-file/index01"));//建立在硬盘上
		//indexwriter的配置信息
		IndexWriterConfig IndexWriterConfig = new IndexWriterConfig(
				Version.LATEST, new StandardAnalyzer(Version.LATEST));
		//indexwriter主要是用来写索引的，类似于file流
		IndexWriter writer = new IndexWriter(fsddirectory, IndexWriterConfig);
		//document类似于一条数据，存放数据信息
		Document document = null;
		//field类似于字段，用于存放数据的每个分类信息
		File f = new File("D:/lucene-file/exampletxt");
		for (File file : f.listFiles()) {
			document = new Document();
			document.add(new Field("content", new FileReader(file)));
			document.add(new Field("filename", file.getName(), Field.Store.YES,
					Field.Index.NOT_ANALYZED));
			document.add(new Field("path", file.getAbsolutePath(),
					Field.Store.YES, Field.Index.NOT_ANALYZED));
			writer.addDocument(document);
		}
		if (writer != null) {
			writer.close();
		}

	}
	
	public void search() throws Exception {
		//主要流程：
		//1：创建directory   从哪读取信息与资料
		Directory fsddirectory = FSDirectory.open(new File(
				"D:/lucene-file/index01"));//放在硬盘上的信息源
		//2:创建indexReader  //读取Index索引
		IndexReader reader = IndexReader.open(fsddirectory);
		//3:根据indexReader创建IndexSearcher
		IndexSearcher searcher = new IndexSearcher(reader);
		//4:创建搜索的Query
		//需要创建parser来确定要搜索的文件的内容，第二个参数表示搜索的域
		 QueryParser parser = new QueryParser(Version.LATEST, "content",new StandardAnalyzer(Version.LATEST));
		 //表示域中包含徐这个字的文档	
		 Query query = parser.parse("java");
		 //5:根据 searcher搜索并且返回TopDocs，类似于数据库的结果集
		 //10代表搜索10条， 
		 TopDocs tds = searcher.search(query, 10);
		//6：根据TOPDocs获取ScoreDoc对象
		 ScoreDoc[] sds =tds.scoreDocs;
		//7:根据searcher和ScordDoc对象获取具体的Document对象
		 for (ScoreDoc scoreDoc : sds) {
			 Document d = searcher.doc(scoreDoc.doc);
			System.out.println(d.get("filename"));
			System.out.println(d.get("path"));
			System.out.println("---------");
		}
		 
		 System.out.println("length"+sds.length);
		
		//8：根据Document对象获取需要的值
		//9：关闭reader
		reader.close();
	}

}

test类：

package com.junittest;

import static org.junit.Assert.*;

import org.junit.Test;

import com.test.HelloLucene2;

public class TestLucene {

/*	@Test
	public void testindex() throws Exception {
		HelloLucene2 hl = new HelloLucene2();
		hl.index();
	}*/
	@Test
	public void testsearch() throws Exception {
		HelloLucene2 hl = new HelloLucene2();
		hl.index();
		hl.search();
	}

}

转载于:https://my.oschina.net/91jason/blog/342285