Properties properties =newProperties();/**
* Pipeline options - lemma is no-op for Chinese but currently needed because coref demands it (bad old requirements system)
*/
properties.setProperty("annotators","tokenize,ssplit,pos,lemma,ner,parse,depparse,coref,kbp,quote");/**
* segment
*/
properties.setProperty("tokenize.language","zh");
properties.setProperty("segment.model","edu/stanford/nlp/models/segmenter/chinese/ctb.gz");
properties.setProperty("segment.sighanCorporaDict","edu/stanford/nlp/models/segmenter/chinese");
properties.setProperty("segment.serDictionary","edu/stanford/nlp/models/segmenter/chinese/dict-chris6.ser.gz");
properties.setProperty("segment.sighanPostProcessing","true");/**
* sentence split
*/
properties.setProperty("ssplit.boundaryTokenRegex","[.。]|[!?!?,;,]+");/**
* pos
*/
properties.setProperty("pos.model","edu/stanford/nlp/models/pos-tagger/chinese-distsim/chinese-distsim.tagger");/**
* ner
*/
properties.setProperty("ner.language","chinese");
properties.setProperty("ner.model","edu/stanford/nlp/models/ner/chinese.misc.distsim.crf.ser.gz");
properties.setProperty("ner.applyNumericClassifiers","true");
properties.setProperty("ner.useSUTime","false");
properties.setProperty("ner.fine.regexner.mapping","edu/stanford/nlp/models/kbp/chinese/gazetteers/cn_regexner_mapping.tab");
properties.setProperty("ner.fine.regexner.noDefaultOverwriteLabels","");/**
* parse
*/
properties.setProperty("parse.model","edu/stanford/nlp/models/srparser/chineseSR.ser.gz");/**
* depparse
*/
properties.setProperty("depparse.model","edu/stanford/nlp/models/parser/nndep/UD_Chinese.gz");
properties.setProperty("depparse.language","chinese");/**
* entitylink
*/
properties.setProperty("entitylink.wikidict","edu/stanford/nlp/models/kbp/chinese/wikidict_chinese.tsv.gz");
StanfordCoreNLP pipline =newStanfordCoreNLP(properties);
分句
String text ="乔·史密斯出生于加利福尼亚。2017年夏天,他去了法国巴黎。他的航班于2017年7月10日下午3点起飞。第一次吃了一些蜗牛后,乔说:“太好吃了!”他寄了一张明信片给他的妹妹简·史密斯,他打了他的女儿汤姆。听了乔的旅行后,简决定有一天去法国。";// make an example document
CoreDocument doc =newCoreDocument(text);// annotate the document
pipline.annotate(doc);
List<CoreSentence> sentences = doc.sentences();
System.out.println("-----------分句-------------");for(int i =0; i < sentences.size(); i++){
CoreSentence sentence = sentences.get(i);
System.out.println(sentence.text());
System.out.println("************************");}