- http://nlp.stanford.edu/software/segmenter.shtml
- https://github.com/jiekechoo/NLPStudy
- http://blog.csdn.net/shijiebei2009/article/details/42525091
貌似实际用的不是这几篇。中间2. github上那篇,是怎么在maven中使用,通过properties文件生效。然而没有尝试成功,properties里设置不成功。
首先,去【1】下载Download Stanford Word Segmenter version 3.5.2,取得里面的 data 文件夹,放在maven project的 src/main/resources 里。
然后,maven依赖添加:
<properties>
<java.version>1.8</java.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<corenlp.version>3.6.0</corenlp.version>
</properties>
<dependencies>
<dependency>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-corenlp</artifactId>
<version>${corenlp.version}</version>
</dependency>
<dependency>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-corenlp</artifactId>
<version>${corenlp.version}</version>
<classifier>models</classifier>
</dependency>
<dependency>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-corenlp</artifactId>
<version>${corenlp.version}</version>
<classifier>models-chinese</classifier>
</dependency>
</dependencies>
最后,代码如下:
import java.util.Properties;
import edu.stanford.nlp.ie.crf.CRFClassifier;
public class CoreNLPSegment {
private static CoreNLPSegment instance;
private CRFClassifier classifier;
private CoreNLPSegment(){
Properties props = new Properties();
props.setProperty("sighanCorporaDict", "data");
props.setProperty("serDictionary", "data/dict-chris6.ser.gz");
props.setProperty("inputEncoding", "UTF-8");
props.setProperty("sighanPostProcessing", "true");
classifier = new CRFClassifier(props);
classifier.loadClassifierNoExceptions("data/ctb.gz", props);
classifier.flags.setProperties(props);
}
public static CoreNLPSegment getInstance() {
if (instance == null) {
instance = new CoreNLPSegment();
}
return instance;
}
public String[] doSegment(String data) {
return (String[]) classifier.segmentString(data).toArray();
}
public static void main(String[] args) {
String sentence = "他和我在学校里常打桌球。";
String ret[] = CoreNLPSegment.getInstance().doSegment(sentence);
for (String str : ret) {
System.out.println(str);
}
}
}