Lucene4.10.4版本 IKAnalyzer 中文分词

pom.xml

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>cn.et</groupId>
  <artifactId>LuceneIkAnalyzer</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <parent>  
	<groupId>org.springframework.boot</groupId>  
	<artifactId>spring-boot-starter-parent</artifactId>  
	<version>1.5.9.RELEASE</version>  
  </parent>  
  <properties>  
    <lucene.version>4.10.4</lucene.version>  
  </properties>  
  <dependencies>
    <dependency>  
      <groupId>org.springframework.boot</groupId>  
      <artifactId>spring-boot-starter-web</artifactId>  
    </dependency>
    <dependency>  
        <groupId> org.apache.lucene</groupId>  
        <artifactId> lucene-core</artifactId>  
        <version> ${lucene.version}</version>  
    </dependency>  
    <dependency>  
        <groupId> org.apache.lucene</groupId>  
        <artifactId> lucene-analyzers-common</artifactId>  
        <version> ${lucene.version}</version>  
    </dependency>  
    <dependency>  
        <groupId> org.apache.lucene</groupId>  
        <artifactId> lucene-queryparser</artifactId>  
        <version> ${lucene.version}</version>  
    </dependency>  
    <dependency>  
        <groupId> org.apache.lucene</groupId>  
        <artifactId> lucene-highlighter</artifactId>  
        <version> ${lucene.version}</version>  
    </dependency>  
    <dependency>  
        <groupId>com.janeluo</groupId>  
        <artifactId>ikanalyzer</artifactId>  
        <version>2012_u6</version>  
    </dependency>  
  </dependencies>
  <build>
    <plugins>
	  <plugin>
	    <groupId>org.apache.maven.plugins</groupId>
	  	<artifactId>maven-compiler-plugin</artifactId>
	  	<configuration>
	  	  <source>1.7</source>
	  	  <target>1.7</target>
	  	  <encoding>UTF-8</encoding>
	  	</configuration>
	  </plugin>
    </plugins>
  </build>
</project>

IKAnalyzer.cfg.xml

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">  
<properties>  
    <comment>IK Analyzer 扩展配置</comment>
    <!--用户可以在这里配置自己的扩展字典 -->
    <entry key="ext_dict">ext.dic;</entry> 
    <!--用户可以在这里配置自己的扩展停止词字典-->
    <entry key="ext_stopwords">stopword.dic;chinese_stopword.dic</entry>
</properties>
ext.dic

博客
QQ
Phone
Email

chinese_stopword.dic

喔
啊
呀
吐
哈
嗯
啪
呼
噗
嗨
嘿

stopword.dic
a
an
and
are
as
at
be
but
by
for
if
in
into
is
it
no
not
of
on
or
such
that
the
their
then
there
these
they
this
to
was
will
with
package cn.et;

import org.springframework.boot.SpringApplication;  
import org.springframework.boot.autoconfigure.SpringBootApplication;  
@SpringBootApplication  
public class SpringBootMain {  
    public static void main(String[] args) {  
        SpringApplication.run(SpringBootMain.class, args);  
    }  
} 

package cn.et.control;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.wltea.analyzer.lucene.IKAnalyzer;
@RestController 
public class LueneIkControl {
	public List<String> getWords(String str,Analyzer analyzer){  
	    List<String> result = new ArrayList<String>();  
	    TokenStream stream = null;  
	    try {  
	        stream = analyzer.tokenStream("content", new StringReader(str));  
	        CharTermAttribute attr = stream.addAttribute(CharTermAttribute.class);  
	        stream.reset();  
	        while(stream.incrementToken()){  
	            result.add(attr.toString());  
	        }  
	    } catch (IOException e) {  
	        e.printStackTrace();  
	    }finally{  
	        if(stream != null){  
	            try {  
	                stream.close();  
	            } catch (IOException e) {  
	                e.printStackTrace();  
	            }  
	        }  
	    }  
	    return result;  
	}  
	
	@RequestMapping("/getIkResult") 
	public String getIkResult(String content){
		//String content = "欢迎光临卢陈的博客:http://blog.csdn.net/phone13144830339";  
		List<String> lists = getWords(content, new IKAnalyzer());  
		String result = "";
		for (String s : lists) {  
		    System.out.println(s);  
		    result += s + "<br/>";
		}  
		return result;
	}
}

form.css

input{
margin:12px;
font-size:15px;
color:SaddleBrown;
vertical-align:middle;
padding:5px;
font-family:楷体;
font-weight:bold;
}
[type=text],[type=password]{
width:180px;
height:30px;
border-radius:5px;
}
[type=submit],[type=reset] {
width:80px;
padding:5px;
background:Crimson;
color:white;
font-weight:bold;
font-family:楷体;
border-radius:7px;
}
body{font-weight:bold;font-family:楷体;}
a{text-decoration:none;}
a:link{color:blueViolet;}
a:visited{color:brown;}
a:hover{color:orangeRed;}
a:active{color:dimGray;}
index.html

<html>
	<head>
		<meta http-equiv="content-type" content="text/html;charset=UTF-8" />
		<title>Lucene IKAnalyzer</title>
		<link rel=stylesheet href="form.css" type="text/css">
	</head>
	<body>
		<form align="center" action="/getIkResult" method="get" οnsubmit="return submitForm()">
			中  文<input type="text" name="content" placeholder="请输入中文"/><br> 
			<input type="submit" value="分词" />
			<input type="reset" value="重置"/><br/>
		</form>
	</body>
</html>

输入“欢迎光临卢陈的博客:http://blog.csdn.net/phone13144830339”,点击分词查看分词结果




评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值