来源:https://www.khalidalnajjar.com/setup-use-stanford-corenlp-server-python/
需要先安装java版StanfordCoreNLP,然后使用python通过api访问这个java版
1) Install Java 8 (if not installed)
检查是否安装 :
java -version
结果显示1.8+表示安装了java 8
未安装,则在https://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html选择合适的版本安装
以linux系统为例:
tar -xzvf jdk-8u144-linux-x64.tar.gz
添加临时环境变量export PATH=~/java8/jdk1.8.0_144/bin:$PATH
2) Download Stanford CoreNLP
wget http://nlp.stanford.edu/software/stanford-corenlp-full-2018-10-05.zip
或者访问https://stanfordnlp.github.io/CoreNLP/index.html#download点击下载
3) Running Stanford CoreNLP Server
unzip stanford-corenlp-full-2018-10-05.zip
cd stanford-corenlp-full-2018-10-05
java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -annotators "tokenize,ssplit,pos,lemma,parse,sentiment" -port 9000 -timeout 30000
至此打开了StanfordCoreNLP服务器api
4) Accessing Stanford CoreNLP Server using Python
至此打开了再打开一个服务器命令窗口
运行下列python代码
python代码
'''
A sample code usage of the python package stanfordcorenlp to access a Stanford CoreNLP server.
Written as part of the blog post: https://www.khalidalnajjar.com/how-to-setup-and-use-stanford-corenlp-server-with-python/
'''
from stanfordcorenlp import StanfordCoreNLP
import logging
import json
class StanfordNLP:
def __init__(self, host='http://localhost', port=9000):
self.nlp = StanfordCoreNLP(host, port=port,
timeout=30000) # , quiet=False, logging_level=logging.DEBUG)
self.props = {
'annotators': 'tokenize,ssplit,pos,lemma,ner,parse,depparse,dcoref,relation',
'pipelineLanguage': 'en',
'outputFormat': 'json'
}
def word_tokenize(self, sentence):
return self.nlp.word_tokenize(sentence)
def pos(self, sentence):
return self.nlp.pos_tag(sentence)
def ner(self, sentence):
return self.nlp.ner(sentence)
def parse(self, sentence):
return self.nlp.parse(sentence)
def dependency_parse(self, sentence):
return self.nlp.dependency_parse(sentence)
def annotate(self, sentence):
return json.loads(self.nlp.annotate(sentence, properties=self.props))
@staticmethod
def tokens_to_dict(_tokens):
tokens = defaultdict(dict)
for token in _tokens:
tokens[int(token['index'])] = {
'word': token['word'],
'lemma': token['lemma'],
'pos': token['pos'],
'ner': token['ner']
}
return tokens
if __name__ == '__main__':
sNLP = StanfordNLP()
text = 'A blog post using Stanford CoreNLP Server. Visit www.khalidalnajjar.com for more details.'(例句可更改部分)
print "Annotate:", sNLP.annotate(text)
print "POS:", sNLP.pos(text)
print "Tokens:", sNLP.word_tokenize(text)
print "NER:", sNLP.ner(text)
print "Parse:", sNLP.parse(text)
print "Dep Parse:", sNLP.dependency_parse(text)
运行结果
Annotate: {'sentences': [{'index': 0, 'parse': '(ROOT\n (S\n (NP (DT A) (NN blog))\n (VP (NN post)\n (S\n (VP (VBG using)\n (NP (NNP Stanford) (NNP CoreNLP) (NN Server)))))\n (. .)))', 'basicDependencies': [{'dep': 'ROOT', 'governor': 0, 'governorGloss': 'ROOT', 'dependent': 3, 'dependentGloss': 'post'}, {'dep': 'det', 'governor': 3, 'governorGloss': 'post', 'dependent': 1, 'dependentGloss': 'A'}, {'dep': 'compound', 'governor': 3, 'governorGloss': 'post', 'dependent': 2, 'dependentGloss': 'blog'}, {'dep': 'acl', 'governor': 3, 'governorGloss': 'post', 'dependent': 4, 'dependentGloss': 'using'}, {'dep': 'compound', 'governor': 7, 'governorGloss': 'Server', 'dependent': 5, 'dependentGloss': 'Stanford'}, {'dep': 'compound', 'governor': 7, 'governorGloss': 'Server', 'dependent': 6, 'dependentGloss': 'CoreNLP'}, {'dep': 'dobj', 'governor': 4, 'governorGloss': 'using', 'dependent': 7, 'dependentGloss': 'Server'}, {'dep': 'punct', 'governor': 3, 'governorGloss': 'post', 'dependent': 8, 'dependentGloss': '.'}], 'enhancedDependencies': [{'dep': 'ROOT', 'governor': 0, 'governorGloss': 'ROOT', 'dependent': 3, 'dependentGloss': 'post'}, {'dep': 'det', 'governor': 3, 'governorGloss': 'post', 'dependent': 1, 'dependentGloss': 'A'}, {'dep': 'compound', 'governor': 3, 'governorGloss': 'post', 'dependent': 2, 'dependentGloss': 'blog'}, {'dep': 'acl', 'governor': 3, 'governorGloss': 'post', 'dependent': 4, 'dependentGloss': 'using'}, {'dep': 'compound', 'governor': 7, 'governorGloss': 'Server', 'dependent': 5, 'dependentGloss': 'Stanford'}, {'dep': 'compound', 'governor': 7, 'governorGloss': 'Server', 'dependent': 6, 'dependentGloss': 'CoreNLP'}, {'dep': 'dobj', 'governor': 4, 'governorGloss': 'using', 'dependent': 7, 'dependentGloss': 'Server'}, {'dep': 'punct', 'governor': 3, 'governorGloss': 'post', 'dependent': 8, 'dependentGloss': '.'}], 'enhancedPlusPlusDependencies': [{'dep': 'ROOT', 'governor': 0, 'governorGloss': 'ROOT', 'dependent': 3, 'dependentGloss': 'post'}, {'dep': 'det', 'governor': 3, 'governorGloss': 'post', 'dependent': 1, 'dependentGloss': 'A'}, {'dep': 'compound', 'governor': 3, 'governorGloss': 'post', 'dependent': 2, 'dependentGloss': 'blog'}, {'dep': 'acl', 'governor': 3, 'governorGloss': 'post', 'dependent': 4, 'dependentGloss': 'using'}, {'dep': 'compound', 'governor': 7, 'governorGloss': 'Server', 'dependent': 5, 'dependentGloss': 'Stanford'}, {'dep': 'compound', 'governor': 7, 'governorGloss': 'Server', 'dependent': 6, 'dependentGloss': 'CoreNLP'}, {'dep': 'dobj', 'governor': 4, 'governorGloss': 'using', 'dependent': 7, 'dependentGloss': 'Server'}, {'dep': 'punct', 'governor': 3, 'governorGloss': 'post', 'dependent': 8, 'dependentGloss': '.'}], 'entitymentions': [{'docTokenBegin': 4, 'docTokenEnd': 5, 'tokenBegin': 4, 'tokenEnd': 5, 'text': 'Stanford', 'characterOffsetBegin': 18, 'characterOffsetEnd': 26, 'ner': 'ORGANIZATION'}], 'tokens': [{'index': 1, 'word': 'A', 'originalText': 'A', 'lemma': 'a', 'characterOffsetBegin': 0, 'characterOffsetEnd': 1, 'pos': 'DT', 'ner': 'O', 'speaker': 'PER0', 'before': '', 'after': ' '}, {'index': 2, 'word': 'blog', 'originalText': 'blog', 'lemma': 'blog', 'characterOffsetBegin': 2, 'characterOffsetEnd': 6, 'pos': 'NN', 'ner': 'O', 'speaker': 'PER0', 'before': ' ', 'after': ' '}, {'index': 3, 'word': 'post', 'originalText': 'post', 'lemma': 'post', 'characterOffsetBegin': 7, 'characterOffsetEnd': 11, 'pos': 'NN', 'ner': 'O', 'speaker': 'PER0', 'before': ' ', 'after': ' '}, {'index': 4, 'word': 'using', 'originalText': 'using', 'lemma': 'use', 'characterOffsetBegin': 12, 'characterOffsetEnd': 17, 'pos': 'VBG', 'ner': 'O', 'speaker': 'PER0', 'before': ' ', 'after': ' '}, {'index': 5, 'word': 'Stanford', 'originalText': 'Stanford', 'lemma': 'Stanford', 'characterOffsetBegin': 18, 'characterOffsetEnd': 26, 'pos': 'NNP', 'ner': 'ORGANIZATION', 'speaker': 'PER0', 'before': ' ', 'after': ' '}, {'index': 6, 'word': 'CoreNLP', 'originalText': 'CoreNLP', 'lemma': 'CoreNLP', 'characterOffsetBegin': 27, 'characterOffsetEnd': 34, 'pos': 'NNP', 'ner': 'O', 'speaker': 'PER0', 'before': ' ', 'after': ' '}, {'index': 7, 'word': 'Server', 'originalText': 'Server', 'lemma': 'server', 'characterOffsetBegin': 35, 'characterOffsetEnd': 41, 'pos': 'NN', 'ner': 'O', 'speaker': 'PER0', 'before': ' ', 'after': ''}, {'index': 8, 'word': '.', 'originalText': '.', 'lemma': '.', 'characterOffsetBegin': 41, 'characterOffsetEnd': 42, 'pos': '.', 'ner': 'O', 'speaker': 'PER0', 'before': '', 'after': ' '}]}, {'index': 1, 'parse': '(ROOT\n (NP\n (NP (NN Visit) (NN www.khalidalnajjar.com))\n (PP (IN for)\n (NP (JJR more) (NNS details)))\n (. .)))', 'basicDependencies': [{'dep': 'ROOT', 'governor': 0, 'governorGloss': 'ROOT', 'dependent': 2, 'dependentGloss': 'www.khalidalnajjar.com'}, {'dep': 'compound', 'governor': 2, 'governorGloss': 'www.khalidalnajjar.com', 'dependent': 1, 'dependentGloss': 'Visit'}, {'dep': 'case', 'governor': 5, 'governorGloss': 'details', 'dependent': 3, 'dependentGloss': 'for'}, {'dep': 'amod', 'governor': 5, 'governorGloss': 'details', 'dependent': 4, 'dependentGloss': 'more'}, {'dep': 'nmod', 'governor': 2, 'governorGloss': 'www.khalidalnajjar.com', 'dependent': 5, 'dependentGloss': 'details'}, {'dep': 'punct', 'governor': 2, 'governorGloss': 'www.khalidalnajjar.com', 'dependent': 6, 'dependentGloss': '.'}], 'enhancedDependencies': [{'dep': 'ROOT', 'governor': 0, 'governorGloss': 'ROOT', 'dependent': 2, 'dependentGloss': 'www.khalidalnajjar.com'}, {'dep': 'compound', 'governor': 2, 'governorGloss': 'www.khalidalnajjar.com', 'dependent': 1, 'dependentGloss': 'Visit'}, {'dep': 'case', 'governor': 5, 'governorGloss': 'details', 'dependent': 3, 'dependentGloss': 'for'}, {'dep': 'amod', 'governor': 5, 'governorGloss': 'details', 'dependent': 4, 'dependentGloss': 'more'}, {'dep': 'nmod:for', 'governor': 2, 'governorGloss': 'www.khalidalnajjar.com', 'dependent': 5, 'dependentGloss': 'details'}, {'dep': 'punct', 'governor': 2, 'governorGloss': 'www.khalidalnajjar.com', 'dependent': 6, 'dependentGloss': '.'}], 'enhancedPlusPlusDependencies': [{'dep': 'ROOT', 'governor': 0, 'governorGloss': 'ROOT', 'dependent': 2, 'dependentGloss': 'www.khalidalnajjar.com'}, {'dep': 'compound', 'governor': 2, 'governorGloss': 'www.khalidalnajjar.com', 'dependent': 1, 'dependentGloss': 'Visit'}, {'dep': 'case', 'governor': 5, 'governorGloss': 'details', 'dependent': 3, 'dependentGloss': 'for'}, {'dep': 'amod', 'governor': 5, 'governorGloss': 'details', 'dependent': 4, 'dependentGloss': 'more'}, {'dep': 'nmod:for', 'governor': 2, 'governorGloss': 'www.khalidalnajjar.com', 'dependent': 5, 'dependentGloss': 'details'}, {'dep': 'punct', 'governor': 2, 'governorGloss': 'www.khalidalnajjar.com', 'dependent': 6, 'dependentGloss': '.'}], 'entitymentions': [{'docTokenBegin': 9, 'docTokenEnd': 10, 'tokenBegin': 1, 'tokenEnd': 2, 'text': 'www.khalidalnajjar.com', 'characterOffsetBegin': 49, 'characterOffsetEnd': 71, 'ner': 'URL'}], 'tokens': [{'index': 1, 'word': 'Visit', 'originalText': 'Visit', 'lemma': 'visit', 'characterOffsetBegin': 43, 'characterOffsetEnd': 48, 'pos': 'NN', 'ner': 'O', 'speaker': 'PER0', 'before': ' ', 'after': ' '}, {'index': 2, 'word': 'www.khalidalnajjar.com', 'originalText': 'www.khalidalnajjar.com', 'lemma': 'www.khalidalnajjar.com', 'characterOffsetBegin': 49, 'characterOffsetEnd': 71, 'pos': 'NN', 'ner': 'URL', 'speaker': 'PER0', 'before': ' ', 'after': ' '}, {'index': 3, 'word': 'for', 'originalText': 'for', 'lemma': 'for', 'characterOffsetBegin': 72, 'characterOffsetEnd': 75, 'pos': 'IN', 'ner': 'O', 'speaker': 'PER0', 'before': ' ', 'after': ' '}, {'index': 4, 'word': 'more', 'originalText': 'more', 'lemma': 'more', 'characterOffsetBegin': 76, 'characterOffsetEnd': 80, 'pos': 'JJR', 'ner': 'O', 'speaker': 'PER0', 'before': ' ', 'after': ' '}, {'index': 5, 'word': 'details', 'originalText': 'details', 'lemma': 'detail', 'characterOffsetBegin': 81, 'characterOffsetEnd': 88, 'pos': 'NNS', 'ner': 'O', 'speaker': 'PER0', 'before': ' ', 'after': ''}, {'index': 6, 'word': '.', 'originalText': '.', 'lemma': '.', 'characterOffsetBegin': 88, 'characterOffsetEnd': 89, 'pos': '.', 'ner': 'O', 'speaker': 'PER0', 'before': '', 'after': ''}]}], 'corefs': {'1': [{'id': 1, 'text': 'Stanford', 'type': 'PROPER', 'number': 'UNKNOWN', 'gender': 'NEUTRAL', 'animacy': 'INANIMATE', 'startIndex': 5, 'endIndex': 6, 'headIndex': 5, 'sentNum': 1, 'position': [1, 3], 'isRepresentativeMention': True}], '2': [{'id': 2, 'text': 'A blog', 'type': 'NOMINAL', 'number': 'SINGULAR', 'gender': 'UNKNOWN', 'animacy': 'INANIMATE', 'startIndex': 1, 'endIndex': 3, 'headIndex': 2, 'sentNum': 1, 'position': [1, 1], 'isRepresentativeMention': True}], '3': [{'id': 3, 'text': 'Stanford CoreNLP Server', 'type': 'NOMINAL', 'number': 'SINGULAR', 'gender': 'NEUTRAL', 'animacy': 'INANIMATE', 'startIndex': 5, 'endIndex': 8, 'headIndex': 7, 'sentNum': 1, 'position': [1, 2], 'isRepresentativeMention': True}], '4': [{'id': 4, 'text': 'Visit www.khalidalnajjar.com for more details .', 'type': 'NOMINAL', 'number': 'SINGULAR', 'gender': 'UNKNOWN', 'animacy': 'UNKNOWN', 'startIndex': 1, 'endIndex': 7, 'headIndex': 2, 'sentNum': 2, 'position': [2, 1], 'isRepresentativeMention': True}], '5': [{'id': 5, 'text': 'more details', 'type': 'NOMINAL', 'number': 'PLURAL', 'gender': 'UNKNOWN', 'animacy': 'INANIMATE', 'startIndex': 4, 'endIndex': 6, 'headIndex': 5, 'sentNum': 2, 'position': [2, 2], 'isRepresentativeMention': True}]}}
POS: [('A', 'DT'), ('blog', 'NN'), ('post', 'NN'), ('using', 'VBG'), ('Stanford', 'NNP'), ('CoreNLP', 'NNP'), ('Server', 'NN'), ('.', '.'), ('Visit', 'NN'), ('www.khalidalnajjar.com', 'NN'), ('for', 'IN'), ('more', 'JJR'), ('details', 'NNS'), ('.', '.')]
Tokens: ['A', 'blog', 'post', 'using', 'Stanford', 'CoreNLP', 'Server', '.', 'Visit', 'www.khalidalnajjar.com', 'for', 'more', 'details', '.']
NER: [('A', 'O'), ('blog', 'O'), ('post', 'O'), ('using', 'O'), ('Stanford', 'ORGANIZATION'), ('CoreNLP', 'O'), ('Server', 'O'), ('.', 'O'), ('Visit', 'O'), ('www.khalidalnajjar.com', 'URL'), ('for', 'O'), ('more', 'O'), ('details', 'O'), ('.', 'O')]
Parse: (ROOT
(S
(NP (DT A) (NN blog))
(VP (NN post)
(S
(VP (VBG using)
(NP (NNP Stanford) (NNP CoreNLP) (NN Server)))))
(. .)))
Dep Parse: [('ROOT', 0, 3), ('det', 3, 1), ('compound', 3, 2), ('acl', 3, 4), ('compound', 7, 5), ('compound', 7, 6), ('dobj', 4, 7), ('punct', 3, 8), ('ROOT', 0, 2), ('compound', 2, 1), ('case', 5, 3), ('amod', 5, 4), ('nmod', 2, 5), ('punct', 2, 6)]