来自http://blog.csdn.net/boy178564309/article/details/12418691
为了进行中文句法分析,使用了stanford parser.开始时,一头茫然。搜索网上资源也有很多异常,勉强处理了下,现将可以运行的代码粘贴如下,希望对于用到的人有所帮助:
- import java.util.*;
- import java.io.StringReader;
- import edu.stanford.nlp.process.CoreLabelTokenFactory;
- import edu.stanford.nlp.process.DocumentPreprocessor;
- import edu.stanford.nlp.process.PTBTokenizer;
- import edu.stanford.nlp.process.TokenizerFactory;
- import edu.stanford.nlp.ling.CoreLabel;
- import edu.stanford.nlp.ling.HasWord;
- import edu.stanford.nlp.trees.*;
- import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
- class ParserDemo {
- public static void main(String[] args) {
- LexicalizedParser lp =
- LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/chinesePCFG.ser.gz");
- if (args.length > 0) {
- demoDP(lp, args[0]);
- } else {
- demoAPI(lp);
- }
- }
- public static void demoDP(LexicalizedParser lp, String filename) {
- // This option shows loading and sentence-segment and tokenizing
- // a file using DocumentPreprocessor
- TreebankLanguagePack tlp = new PennTreebankLanguagePack();
- GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
- // You could also create a tokenier here (as below) and pass it
- // to DocumentPreprocessor
- for (List<HasWord> sentence : new DocumentPreprocessor(filename)) {
- Tree parse = lp.apply(sentence);
- parse.pennPrint();
- System.out.println();
- GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
- Collection tdl = gs.typedDependenciesCCprocessed(true);
- System.out.println(tdl);
- System.out.println();
- }
- }
- public static void demoAPI(LexicalizedParser lp) {
- // This option shows parsing a list of correctly tokenized words
- String[] sent = { "我", "是", "一名", "好", "学生", "。" };
- List<CoreLabel> rawWords = new ArrayList<CoreLabel>();
- for (String word : sent) {
- CoreLabel l = new CoreLabel();
- l.setWord(word);
- rawWords.add(l);
- }
- Tree parse = lp.apply(rawWords);
- parse.pennPrint();
- System.out.println();
- TreebankLanguagePack tlp = lp.getOp().langpack();
- GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
- GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
- List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
- System.out.println(tdl);
- System.out.println();
- TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed",tlp);
- tp.printTree(parse);
- }
- private ParserDemo() {} // static methods only
- }
运行结果:
- Loading parser from serialized file edu/stanford/nlp/models/lexparser/chinesePCFG.ser.gz ... done [3.3 sec].
- (ROOT
- (IP
- (NP (PN 我))
- (VP (VC 是)
- (NP
- (QP (CD 一名))
- (ADJP (JJ 好))
- (NP (NN 学生))))
- (PU 。)))
- [top(是-2, 我-1), root(ROOT-0, 是-2), nummod(学生-5, 一名-3), amod(学生-5, 好-4), attr(是-2, 学生-5)]
- (ROOT
- (IP
- (NP (PN 我))
- (VP (VC 是)
- (NP
- (QP (CD 一名))
- (ADJP (JJ 好))
- (NP (NN 学生))))
- (PU 。)))
- top(是-2, 我-1)
- root(ROOT-0, 是-2)
- nummod(学生-5, 一名-3)
- amod(学生-5, 好-4)
- attr(是-2, 学生-5)