Lucene-分词器

/**
	 * 经过该方法可以把分词后的结果输出
	 * @param analyzer
	 * @param text
	 * @throws Exception
	 */
	private void testAnalyzer(Analyzer analyzer,String text)throws Exception{
		TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(text));
		tokenStream.addAttribute(TermAttribute.class);
		while(tokenStream.incrementToken()){
			TermAttribute termAttribute = tokenStream.getAttribute(TermAttribute.class);
			System.out.println(termAttribute.term());
		}
	}
/**
 * 主要針對汉语
 * 英语
 *    
 * 汉语
 * @author Administrator
 *
 */
public class AnalyzerTest {
	@Test
	public void testEn() throws Exception{
		/**
		 * Creates a searcher searching the index in the named directory
		 */
		/**
		 * 1、切分关键词
		 * 2、去掉停用词
		 * 3、把大写转化成小写
		 */
		String text = "Creates a searcher searching the index in the named directory";
		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
		this.testAnalyzer(analyzer, text);
	}
	
	@Test
	public void testZH() throws Exception{
		/**
		 * 单字分词
		 */
		Analyzer analyzer = new ChineseAnalyzer();
		String text = "传智播客的黎活明是UFO";
		this.testAnalyzer(analyzer, text);
	}
	
	@Test
	public void testZH2() throws Exception{
		/**
		 * 单字分词
		 */
		Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_30);
		String text = "传智播客的黎活明是UFO";
		this.testAnalyzer(analyzer, text);
	}
	
	@Test
	public void testZH3() throws Exception{
		Analyzer analyzer = new IKAnalyzer();
		String text = "北京美女";
		this.testAnalyzer(analyzer, text);
	}
	/**
	 * 经过该方法可以把分词后的结果输出
	 * @param analyzer
	 * @param text
	 * @throws Exception
	 */
	private void testAnalyzer(Analyzer analyzer,String text)throws Exception{
		TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(text));
		tokenStream.addAttribute(TermAttribute.class);
		while(tokenStream.incrementToken()){
			TermAttribute termAttribute = tokenStream.getAttribute(TermAttribute.class);
			System.out.println(termAttribute.term());
		}
	}
}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值