pom.xml
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>cn.et</groupId>
<artifactId>LuceneIkAnalyzer</artifactId>
<version>0.0.1-SNAPSHOT</version>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>1.5.9.RELEASE</version>
</parent>
<properties>
<lucene.version>4.10.4</lucene.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId> org.apache.lucene</groupId>
<artifactId> lucene-core</artifactId>
<version> ${lucene.version}</version>
</dependency>
<dependency>
<groupId> org.apache.lucene</groupId>
<artifactId> lucene-analyzers-common</artifactId>
<version> ${lucene.version}</version>
</dependency>
<dependency>
<groupId> org.apache.lucene</groupId>
<artifactId> lucene-queryparser</artifactId>
<version> ${lucene.version}</version>
</dependency>
<dependency>
<groupId> org.apache.lucene</groupId>
<artifactId> lucene-highlighter</artifactId>
<version> ${lucene.version}</version>
</dependency>
<dependency>
<groupId>com.janeluo</groupId>
<artifactId>ikanalyzer</artifactId>
<version>2012_u6</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.7</source>
<target>1.7</target>
<encoding>UTF-8</encoding>
</configuration>
</plugin>
</plugins>
</build>
</project>
IKAnalyzer.cfg.xml
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
<comment>IK Analyzer 扩展配置</comment>
<!--用户可以在这里配置自己的扩展字典 -->
<entry key="ext_dict">ext.dic;</entry>
<!--用户可以在这里配置自己的扩展停止词字典-->
<entry key="ext_stopwords">stopword.dic;chinese_stopword.dic</entry>
</properties>
ext.dic
博客
QQ
Phone
Email
chinese_stopword.dic
喔
啊
呀
吐
哈
嗯
啪
呼
噗
嗨
嘿
stopword.dic
a
an
and
are
as
at
be
but
by
for
if
in
into
is
it
no
not
of
on
or
such
that
the
their
then
there
these
they
this
to
was
will
with
package cn.et;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@SpringBootApplication
public class SpringBootMain {
public static void main(String[] args) {
SpringApplication.run(SpringBootMain.class, args);
}
}
package cn.et.control;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.wltea.analyzer.lucene.IKAnalyzer;
@RestController
public class LueneIkControl {
public List<String> getWords(String str,Analyzer analyzer){
List<String> result = new ArrayList<String>();
TokenStream stream = null;
try {
stream = analyzer.tokenStream("content", new StringReader(str));
CharTermAttribute attr = stream.addAttribute(CharTermAttribute.class);
stream.reset();
while(stream.incrementToken()){
result.add(attr.toString());
}
} catch (IOException e) {
e.printStackTrace();
}finally{
if(stream != null){
try {
stream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return result;
}
@RequestMapping("/getIkResult")
public String getIkResult(String content){
//String content = "欢迎光临卢陈的博客:http://blog.csdn.net/phone13144830339";
List<String> lists = getWords(content, new IKAnalyzer());
String result = "";
for (String s : lists) {
System.out.println(s);
result += s + "<br/>";
}
return result;
}
}
form.css
input{
margin:12px;
font-size:15px;
color:SaddleBrown;
vertical-align:middle;
padding:5px;
font-family:楷体;
font-weight:bold;
}
[type=text],[type=password]{
width:180px;
height:30px;
border-radius:5px;
}
[type=submit],[type=reset] {
width:80px;
padding:5px;
background:Crimson;
color:white;
font-weight:bold;
font-family:楷体;
border-radius:7px;
}
body{font-weight:bold;font-family:楷体;}
a{text-decoration:none;}
a:link{color:blueViolet;}
a:visited{color:brown;}
a:hover{color:orangeRed;}
a:active{color:dimGray;}
index.html
<html>
<head>
<meta http-equiv="content-type" content="text/html;charset=UTF-8" />
<title>Lucene IKAnalyzer</title>
<link rel=stylesheet href="form.css" type="text/css">
</head>
<body>
<form align="center" action="/getIkResult" method="get" οnsubmit="return submitForm()">
中 文<input type="text" name="content" placeholder="请输入中文"/><br>
<input type="submit" value="分词" />
<input type="reset" value="重置"/><br/>
</form>
</body>
</html>
输入“欢迎光临卢陈的博客:http://blog.csdn.net/phone13144830339”,点击分词查看分词结果