python中使用StanfordCoreNLP，进行命名实体识别

本文链接：https://blog.csdn.net/weixin_45858635/article/details/126021922

本文在Python中使用StanfordOpenIE已经抽取出（实体，关系，实体）的基础之上，进行时间实体的抽取。（本文参考了python中stanfordCorenlp使用教程博文，感谢这位博主的分享！）
结果如下：
在这里插入图片描述

1、安装 StanfordCoreNLP ：pip install stanfordcorenlp

查看是否安装成功：pip list

2、代码实现：

from stanfordcorenlp import StanfordCoreNLP
from tqdm import trange
import os

def Corenlp(infile,outfile):
    nlp = StanfordCoreNLP(r'E:\NLP_Tools\StanfordNLP\.stanfordnlp_resources\stanford-corenlp-4.1.0')
    datefile = open(outfile,'w',encoding='utf-8')
    date_List = ['DATE','TIME']         #时间类实体列表
    with open(infile,'r',encoding='utf-8') as f:
        line = f.readline()         #读取第一行
        while line:
            dict_triple = eval(line)                #将字符串转为字典：{'subject': 'Vagococcus silagei', 'relation': 'is non-spore-forming bacterium from', 'object': 'genus'}
            str_triple1 = dict_triple['subject']
            str_triple2 = dict_triple['object']
            len_str_triple1 = len(nlp.ner(str_triple1))         #获取字符串中有多少个单词
            len_str_triple2 = len(nlp.ner(str_triple2))
            for idx1 in range(len_str_triple1):          #[('Vagococcus', 'O'), ('silagei', 'O')]   ----->  列表形式是：[(单词，标注)]
                if nlp.ner(str_triple1)[idx1][1] in date_List:
                    datefile.write(line)            #写入文本的是字符串
                    print(line)
                    break
            for idx2 in range(len_str_triple2):
                if nlp.ner(str_triple2)[idx2][1] in date_List:
                    datefile.write(line)            #最好判断一下line是否已经写入文本中
                    print(line)
                    break
            line = f.readline()        #读取文本下一行
    nlp.close()         #要关闭
    datefile.close()

infile = os.path.join('../Data/enwiki/result','res_wiki_en.txt')
outfile = os.path.join('../Data/enwiki/result','res_date_en.txt')
Corenlp(infile,outfile)