百度云盘下载地址:
https://pan.baidu.com/s/1tnDXvQs9rBVSqnbMpxN3DQ
提取码:q9fh
1.引入jar包
- lucene-core-3.3.0.jar
- commons-logging.jar
- paoding.jar
2.加入字典
- src/paoding/dic 下的所有文件
- src/paoding-dic-home.properties 来配置 指向字典目录
#values are "system-env" or "this";
#if value is "this" , using the paoding.dic.home as dicHome if configed!
#paoding.dic.home.config-first=system-env
paoding.dic.home.config-first=this
#dictionary home (directory)
#"classpath:xxx" means dictionary home is in classpath.
#e.g "classpath:dic" means dictionaries are in "classes/dic" directory or any other classpath directory
paoding.dic.home=D:\\qiyulin\\workspace\\myapi\\src\\paoding\\dic
#seconds for dic modification detection
#paoding.dic.detector.interval=60
3.测试代码
import com.google.gson.Gson;
import net.paoding.analysis.analyzer.PaodingAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
/***
* @author qiyulin 实现分词
* */
public class Fenci {
public static Object [] start(String text){
try {
List<String> list = new ArrayList<String>();
Analyzer analyzer = new PaodingAnalyzer();
TokenStream tokenizer = analyzer.tokenStream("text", new StringReader(text));
tokenizer.reset();
CharTermAttribute offAtt = (CharTermAttribute) tokenizer.addAttribute(CharTermAttribute.class);
while (tokenizer.incrementToken()) {
list.add(offAtt.toString());
}
tokenizer.close();
return list.toArray();
}catch (Exception e){
e.printStackTrace();
}
return new String[]{text};
}
public static void main(String args []){
Object arr[]=start("这些沙糖桔怎么回事?");
System.out.println(new Gson().toJson(arr));
}
}
4.运行结果