lucene--同音词


如果lucene在查询的时候要显示同音词的话,那么在创建索引的时候就要加入相关的算法,肯定会损耗性能,得不偿失。

/**
 * 2013.06.06
 * @author 赵洪志
 * 同音词测试,只是随便看看
 */
package com.zhao.lucene.analysis.codec;

import java.io.IOException;

import org.apache.commons.codec.language.Metaphone;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;

public class MetaphoneReplacementFilter extends TokenFilter {

	public static final String METAPHONE = "metaphone";

	private Metaphone metaphoner = new Metaphone();
	private TermAttribute termAttr;
	private TypeAttribute typeAttr;

	public MetaphoneReplacementFilter(TokenStream input) {
		super(input);
		termAttr = addAttribute(TermAttribute.class);
		typeAttr = addAttribute(TypeAttribute.class);
	}

	public boolean incrementToken() throws IOException {
		if (!input.incrementToken()) // #A
			return false; // #A

		String encoded;
		encoded = metaphoner.encode(termAttr.term()); // #B
		termAttr.setTermBuffer(encoded); // #C
		typeAttr.setType(METAPHONE); // #D
		return true;
	}
}

/**
 * 2013.06.06
 * @author 赵洪志
 * 同音词测试,只是随便看看
 */
package com.zhao.lucene.analysis.codec;

import java.io.Reader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LetterTokenizer;
import org.apache.lucene.analysis.TokenStream;

public class MetaphoneReplacementAnalyzer extends Analyzer {

	@Override
	public TokenStream tokenStream(String fieldName, Reader reader) {
		// TODO 自动生成的方法存根
		return new MetaphoneReplacementFilter(new LetterTokenizer(reader));
	}

}

/**
 * 2013.06.06
 * @author 赵洪志
 * 同音词测试,只是随便看看
 */
package com.zhao.lucene.analysis.codec;

import java.io.IOException;
import java.io.StringReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;

import junit.framework.TestCase;

public class MetaphoneAnalyzerTest extends TestCase {
	public void testKoolKat() throws Exception {
		RAMDirectory directory = new RAMDirectory();
		Analyzer analyzer = new MetaphoneReplacementAnalyzer();

		IndexWriter writer = new IndexWriter(directory, analyzer, true,
				IndexWriter.MaxFieldLength.UNLIMITED);

		Document doc = new Document();
		doc.add(new Field("contents", // #A
				"我爱中华人民共和国", Field.Store.YES, Field.Index.ANALYZED));
		writer.addDocument(doc);
		writer.close();

		IndexSearcher searcher = new IndexSearcher(directory);

		Query query = new QueryParser(Version.LUCENE_30, // #B
				"contents", analyzer) // #B
				.parse("爱人"); // #B

		TopDocs hits = searcher.search(query, 1);

		if (hits.totalHits > 0) {
			int docID = hits.scoreDocs[0].doc;
			doc = searcher.doc(docID);

			assertEquals("我爱中华人民共和国", doc.get("contents")); // #D
			System.out.println(doc.get("contents"));
		}
		searcher.close();
	}

	/*
	 * #A Index document #B Parse query text #C Verify match #D Retrieve
	 * original value
	 */

	public static void main(String[] args) throws IOException {
		MetaphoneReplacementAnalyzer analyzer = new MetaphoneReplacementAnalyzer();

		TokenStream stream = analyzer.tokenStream("content", new StringReader(
				"The quick brown fox jumped over the lazy dog"));
		TermAttribute attribute = stream.addAttribute(TermAttribute.class);

		while (stream.incrementToken()) {
			System.out.print(attribute.term() + "===");
		}
		System.out.println();

		// System.out.println("");
		// AnalyzerUtils.displayTokens(analyzer,
		// "Tha quik brown phox jumpd ovvar tha lazi dag");
	}
}



评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值