引入斯坦福
!pip install stanfordcorenlp
下载
!wget http://nlp.stanford.edu/software/stanford-corenlp-latest.zip
解压
!unzip '/content/stanford-corenlp-latest.zip' -d '/content'
改目录
import os
os.chdir('/content/stanford-corenlp-4.0.0')
!pwd
样例一
from stanfordcorenlp import StanfordCoreNLP
import nltk
from nltk.tree import Tree as nltkTree
##读取stanford-corenlp所在的目录
nlp = StanfordCoreNLP('/content/stanford-corenlp-4.0.0')
#输入句子
sentence = 'Video becomes a new way of communication between Internet users with the proliferation of sensor-rich mobile devices.'
print('Part of Speech:', nlp.pos_tag(sentence))
print('Part of Speech:', nlp.dependency_parse(sentence))
print(nlp.word_tokenize(sentence))
print(nlp.ner(sentence))
print(nlp.parse(sentence))
写入json文件
from stanfordcorenlp import StanfordCoreNLP
import nltk
from nltk.tree import Tree as nltkTree
##读取stanford-corenlp所在的目录
nlp = StanfordCoreNLP('/content/stanford-corenlp-4.0.0')
all_cap_pos = []
all_img_id = []
examples = []
word, pos = [], []
for annot in annotations['annotations'][:2000]:
cap_pos = nlp.pos_tag(annot['caption'])
image_id = annot['image_id']
for cap_p in cap_pos:
word.append(cap_p[0])
pos.append(cap_p[1])
examples.append({'word':word, 'pos':pos, 'image_id':image_id})
word, pos = [], []
with open('/content/cap_pos.json','w', encoding='utf-8') as f:
json.dump(examples, f)