Lucene4.0教程

本文采用lucene4.0.0和IKAnalyzer中文分词器

一、创建索引

package com.jmj.project.web;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class Abc {
	private Logger logger = LoggerFactory.getLogger(getClass());

	public String createIndex() {
		String[] ids = { "1", "2" };
		String[] contents = { "我是第一条内容,i am lucene IKAnalyzer", "我在北京天安门广场吃炸鸡" };
		String[] citys = { "北京", "上海" };
		String indexPath = "/home/iflow/luceneindex"; // 建立索引文件的目录
		// 默认IKAnalyzer()-false:实现最细粒度切分算法,true:分词器采用智能切分
		Analyzer analyzer = new IKAnalyzer();
		IndexWriter indexWriter = null;
		Directory directory = null;
		try {
			directory = FSDirectory.open(new File(indexPath));
			indexWriter = getIndexWriter(directory, analyzer);
		} catch (Exception e) {
			logger.info("索引打开异常!");
		}
		// 添加索引
		try {
			Document document = null;
			for (int i=0; i < ids.length; i++) {
				document = new Document();

				document.add(new StringField("id", ids[i], Field.Store.YES));
				Field content = new TextField("content", contents[i], Field.Store.YES);
				Field city = new TextField("city", citys[i], Field.Store.YES);

				document.add(content);
				document.add(city);

				// 1.权值越高,查询结果越靠前。
				content.setBoost(2.0f);
				city.setBoost(1.0f);
				indexWriter.addDocument(document);
			}
			indexWriter.commit();
		} catch (IOException e1) {
			logger.info("索引创建异常!");
		}
		try {
			closeWriter(indexWriter);
		} catch (Exception e) {
			logger.info("索引关闭异常!");
		}
		logger.info("索引创建成功!");
		return null;
	}

	/**
	 * 获得indexwriter对象
	 * 
	 * @param dir
	 * @return
	 * @throws IOException
	 * @throws Exception
	 */
	private IndexWriter getIndexWriter(Directory dir, Analyzer analyzer) throws IOException {
		IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);
		return new IndexWriter(dir, iwc);
	}

	/**
	 * 关闭indexwriter对象
	 * 
	 * @throws IOException
	 * 
	 * @throws Exception
	 */
	private void closeWriter(IndexWriter indexWriter) throws IOException {
		if (indexWriter != null) {
			indexWriter.close();
		}
	}
}

二、查询索引

package com.jmj.project.web;

import java.io.File;
import java.io.IOException;
import java.util.Map;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class Abc {

	private Logger logger = LoggerFactory.getLogger(getClass());

	public PageResults<ViProject> indexSearch(Map<String, Object> filterMap)
			throws ParseException, IOException, InvalidTokenOffsetsException, java.text.ParseException {
		String indexPath = "/home/iflow/luceneindex"; // 建立索引文件的目录
		Analyzer analyzer = new IKAnalyzer();
		Directory directory = null;
		try {
			directory = FSDirectory.open(new File(indexPath));
		} catch (Exception e) {
			logger.info("索引打开异常!");
		}
		IndexReader ireader = null;
		IndexSearcher isSearcher = null;
		try {
			ireader = IndexReader.open(directory);
		} catch (IOException e) {
			logger.info("索引打开异常!");
		}
		isSearcher = new IndexSearcher(ireader);
		BooleanQuery booleanQuery = new BooleanQuery();

		// eg:多个字段查询
//		String text = "北京";
//		String[] fields = { "content", "city" };
//		QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_40, fields, analyzer);
//		Query query = qp.parse(text);
//		booleanQuery.add(query, Occur.MUST);

		// eg:单个字段查询
		String text = "北京";
		QueryParser qp = new QueryParser(Version.LUCENE_40, "content", analyzer);
		Query query = qp.parse(text);
		booleanQuery.add(query, Occur.MUST);

		// 搜索相似度最高的100条记录
		TopDocs topDocs = isSearcher.search(booleanQuery, 100);
		// 输出结果
		ScoreDoc[] scoreDocs = topDocs.scoreDocs;
		for (int i = 0; i < scoreDocs.length; i++) {
			Document targetDoc = isSearcher.doc(scoreDocs[i].doc);
			System.out.println(targetDoc.get("id") + targetDoc.get("content") + targetDoc.get("city"));
		}
		ireader.close();
		return null;

	}
}

三、pom文件

		<dependency>
			<groupId>org.apache.lucene</groupId>
			<artifactId>lucene-core</artifactId>
			<version>4.0.0</version>
	  	</dependency>
	  	
	  	<dependency>
			<groupId>org.apache.lucene</groupId>
			<artifactId>lucene-queryparser</artifactId>
			<version>4.0.0</version>
	 	</dependency>
	 	
	 	<dependency>
			<groupId>org.apache.lucene</groupId>
			<artifactId>lucene-highlighter</artifactId>
			<version>4.0.0</version>
		</dependency>
		
		<dependency>
			<groupId>com.lucene</groupId>
			<artifactId>ikAnalyzer</artifactId>
			<version>RELEASE</version>
		</dependency>
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值