初次接触Lucene小记

最新推荐文章于 2024-08-20 15:15:16 发布

zhoucl0220

最新推荐文章于 2024-08-20 15:15:16 发布

阅读量95

点赞数

分类专栏： Lucene Maven 文章标签： lucene java

本文链接：https://blog.csdn.net/zhoucl0220/article/details/84005033

版权

Maven 同时被 2 个专栏收录

7 篇文章 0 订阅

订阅专栏

Lucene

0 篇文章 0 订阅

订阅专栏

这几天闲着没什么事，就鼓捣了一下Lucene的全文检索方面的内容，首先在Apache的Lucene站点上大致浏览了一下Lucene的相关文档及知识，由于英文不怎么好，干脆直接找中文相关的内容看了，弄了两天，自己整了个Lucene的大致的实现程序，并且添加进自己的站点项目中，做了个简单的测试，出了一点效果，在这里将大致过程做个简单的记录：

1、在原有的maven项目上，增加了一个test-lucene的子模块，原有的与其相关的模块为test-persisit(主要做持久化操作),test-model(主要定义模型的模块), 大致的POM文件如下：

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <parent>
        <artifactId>test</artifactId>
        <groupId>org.clzps</groupId>
        <version>0.0.1-SNAPSHOT</version>
    </parent>

  <groupId>org.clzps</groupId>
  <artifactId>test-lucene</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <packaging>jar</packaging>

  <name>test-lucene</name>
  <url>http://maven.apache.org</url>

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <lucene.version>3.1.0</lucene.version>
  </properties>

  <dependencies>
  	<!-- 项目模块相关依赖 -->
  	<dependency>
  		<groupId>org.clzps</groupId>
  		<artifactId>test-model</artifactId>
  		<version>${project.version}</version>
  		<type>jar</type>
  	</dependency>
  	
  	<dependency>
  		<groupId>org.clzps</groupId>
  		<artifactId>test-persist</artifactId>
  		<version>${project.version}</version>
  		<type>jar</type>
  	</dependency>
  	
  	<dependency>
  		<groupId>org.apache.lucene</groupId>
  		<artifactId>lucene-analyzers</artifactId>
  		<version>${lucene.version}</version>
  		<type>jar</type>
  		<scope>compile</scope>
  	</dependency>
  	<dependency>
  		<groupId>org.apache.lucene</groupId>
  		<artifactId>lucene-core</artifactId>
  		<version>${lucene.version}</version>
  		<type>jar</type>
  		<scope>compile</scope>
  	</dependency>
  	<dependency>
  		<groupId>org.apache.lucene</groupId>
  		<artifactId>lucene-highlighter</artifactId>
  		<version>${lucene.version}</version>
  		<type>jar</type>
  		<scope>compile</scope>
  	</dependency>
  	<dependency>
  		<groupId>org.apache.lucene</groupId>
  		<artifactId>lucene-smartcn</artifactId>
  		<version>${lucene.version}</version>
  		<type>jar</type>
  		<scope>compile</scope>
  	</dependency>
  </dependencies>
</project>

2、主要的Java文件内容，实现外部应用的直接调用，包括创建index过程,加载index过程(索引已经创建好),搜索过程，具体就不详细描述了，主要自己做记录，看代码：

public class LuceneService {
	/**
	 * Logger
	 */
//	private final Logger logger = Logger.getLogger(getClass());
	
	/**
	 * 分词器
	 */
	private static Analyzer analyzer = Constants.analyzer;
	
	public static void setAnalyzer(Analyzer analyzer) {
		LuceneServiceImpl.analyzer = analyzer;
	}
	
	/**
	 * 索引目录路径
	 */
	private static String indexDirPath = Constants.indexDirectoryPath;

	public static void setIndexDirPath(String indexDirPath) {
		LuceneServiceImpl.indexDirPath = indexDirPath;
	}
	
	/**
	 * Directory变量
	 */
	private static FSDirectory  fsdirectory  = null;
	private static RAMDirectory ramDirectory = null;
	
	private static IndexReader fsIndexReader  = null;
	private static IndexReader ramIndexReader = null;
	
	private void IndexRamToDisk() {
		IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_31, analyzer);
		iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
		IndexWriter fsIndexWriter = null;
		
		try {
			fsdirectory = FSDirectory.open(new File(indexDirPath));
			
			fsIndexWriter = new IndexWriter(fsdirectory, iwConfig);
			fsIndexWriter.addIndexes(new Directory[]{ramDirectory});
			fsIndexWriter.optimize();
			fsIndexWriter.close();
			
			//关闭ramDirectory
			ramDirectory.close();
			ramDirectory = null;
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		
		loadIndex();
	}
	
	public void createIndex(List<ArticleData> objList) {
		IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_31, analyzer);
		iwConfig.setOpenMode(OpenMode.CREATE);
		IndexWriter ramIndexWriter = null;
		ramDirectory = new RAMDirectory();
		
		try {
			ramIndexWriter = new IndexWriter(ramDirectory, iwConfig);
			ramIndexWriter.deleteAll();
			ramIndexWriter.commit();
			
			for(int i = 0; i < objList.size(); i++) {
				Document doc = AnyObjects2DocumentUtils.Persist2Document(objList.get(i));
				ramIndexWriter.addDocument(doc);
				System.out.println(i);
			}
			ramIndexWriter.close();
			
			IndexRamToDisk();
		} catch (IOException e) {
			e.printStackTrace();
		}		
	}
	
	public void loadIndex() {
		try {
			if(fsdirectory == null) {
				fsdirectory = FSDirectory.open(new File(indexDirPath));
			}
			fsIndexReader = IndexReader.open(fsdirectory);
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	public void search(String queryStr) {
		Long start = new Date().getTime();
		String[] fields = new String[]{"title","content"};
		try {
			Query query = new MultiFieldQueryParser(Version.LUCENE_31, fields, analyzer).parse(queryStr);
			MultiReader multiReader = null;
			
			if(fsIndexReader == null) return;
			
			if(ramIndexReader == null) {
				multiReader = new MultiReader(new IndexReader[]{fsIndexReader});
			} else {
				multiReader = new MultiReader(new IndexReader[]{fsIndexReader, ramIndexReader});
			}
			
			IndexSearcher indexSearcher = new IndexSearcher(multiReader);
			
			TopDocs topDocs = indexSearcher.search(query, null, 100);
			
			System.out.println("检索到【" + topDocs.totalHits + "】条匹配的结果!");
			System.out.println("--------------------------------------------------------");
			ScoreDoc[] scoreDoc = topDocs.scoreDocs;
			
			for(int i = 0; i < scoreDoc.length; i++) {
				Document document = indexSearcher.doc(scoreDoc[i].doc);
				Object2DocumentUtils.PrintDocument(document);
			}
			
			Long end = new Date().getTime();
			System.out.println("搜索时间:" + (end-start));
			
		} catch (ParseException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

}

3、通过下面的测试代码，即可进行创建Index，加载Index和搜索过程了：

@Test
	public void testCreateOrLoadIndex() {
		
		File file = new File(indexDirectoryPath);
		if(file.exists()) {
			luceneService.loadIndex();
		} else {
			createIndex();
		}
		
		luceneService.search("事件");
	}

zhoucl0220

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
初次接触Lucene小记

这几天闲着没什么事，就鼓捣了一下Lucene的全文检索方面的内容，首先在Apache的Lucene站点上大致浏览了一下Lucene的相关文档及知识，由于英文不怎么好，干脆直接找中文相关的内容看了，弄了两天，自己整了个Lucene的大致的实现程序，并且添加进自己的站点项目中，做了个简单的测试，出了一点效果，在这里将大致过程做个简单的记录：1、在原有的maven项目上，增加了一个test-...
复制链接

扫一扫

专栏目录