3、编码进行分词并标记
package com.neohope.opennlp.test;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import opennlp.tools.cmdline.PerformanceMonitor;
import opennlp.tools.cmdline.postag.POSModelLoader;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSSample;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.tokenize.WhitespaceTokenizer;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
public class TestIt {
@SuppressWarnings("deprecation")
public static void POSTag() throws IOException {
POSModel model = new POSModelLoader()
.load(new File("en-pos-maxent.bin"));
PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
POSTaggerME tagger = new POSTaggerME(model);
String input = "Don't ever let somebody tell you you can't do something, not even me. "
+ "You got a dream, you gotta protect it. "
+ "People can’t do something themselves, they wanna tell you you can’t do it. "
+ "If you want something, go get it. " + "Period.";
ObjectStream lineStream = new PlainTextByLineStream(
new StringReader(input));
perfMon.start();
String line;
while ((line = lineStream.read()) != null) {
String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE
.tokenize(line);
String[] tags = tagger.tag(whitespaceTokenizerLine);
POSSample sample = new POSSample(whitespaceTokenizerLine, tags);
System.out.println(sample.toString());
perfMon.incrementCounter();
}
perfMon.stopAndPrintFinalResult();
}
public static void main(String[] args) throws IOException {
POSTag();
}
}
4、输出结果
Don't_NNP ever_RB let_VB somebody_NN tell_VB you_PRP you_PRP can't_MD do_VB something,_RB not_RB even_RB me._RBR You_PRP got_VBD a_DT dream,_NN you_PRP gotta_VBP protect_VB it._PRP People_NNS can’t_MD do_VB something_NN themselves,_, they_PRP wanna_MD tell_VB you_PRP you_PRP can’t_MD do_VB it._PRP If_IN you_PRP want_VBP something,_NN go_VB get_VB it._PRP Period._.
Share the post "使用OpenNLP进行分词及词性标注"