两个节点在依存解析树上的最短子树（Python）

最新推荐文章于 2021-02-19 14:04:03 发布

小妖精Fsky

最新推荐文章于 2021-02-19 14:04:03 发布

阅读量2.4k

点赞数 1

分类专栏： NLPTools 文章标签： python

本文链接：https://blog.csdn.net/appleml/article/details/62889745

版权

NLPTools 专栏收录该内容

19 篇文章 0 订阅

订阅专栏

get_node_father前提条件，根据StanfordParser解析出的结果（调用现成的结果），针对给出两个节点找其最短依存子书
shortest_dependency_tree.py

import parser.dependent_tree_rela as dt_rela
parseTree = ["case(cells-5, In-1)","amod(cells-5, mature-2)","amod(cells-5, human-3)","compound(cells-5, B-4)",
             "nmod(inhibited-8, cells-5)","nsubj(inhibited-8, BMP-6-7)","root(ROOT-0, inhibited-8)","compound(growth-10, cell-9)",
             "dobj(inhibited-8, growth-10)","cc(inhibited-8, and-12)","advmod(induced-14, rapidly-13)","conj(inhibited-8, induced-14)",
             "dobj(induced-14, phosphorylation-15)","case(Smad1/5/8-17, of-16)","nmod(phosphorylation-15, Smad1/5/8-17)",
             "acl(Smad1/5/8-17, followed-18)","case(upregulation-21, by-19)","det(upregulation-21, an-20)","nmod(followed-18, upregulation-21)",
             "case(Id1-23, of-22)","nmod(upregulation-21, Id1-23)]"]

word1_father_index = []
word1_father_word = []
word1_father_rela = []

word2_father_index = []
word2_father_word = []
word2_father_rela = []

def get_shortest_dependent_path(word1, word1_index, word2, word2_index, tdl, is_first):
    if is_first == True: #第一次运行的时候把当前词也放到，Word1_father中
        word1_father_index.append(word1_index)
        word1_father_word.append(word1)
        word2_father_index.append(word2_index)
        word2_father_word.append(word2)

    word1_father_value, word1_father_id = get_node_father(word1_index, tdl, word1_father_word, word1_father_index, word1_father_rela)
    word2_father_value, word2_father_id = get_node_father(word2_index, tdl, word2_father_word, word2_father_index, word2_father_rela)

    branch1 = ""
    branch2 = ""
    branch3 = ""
    branch4 = ""

    # word1_father_index依次与word2_father进行比较，
    mark1 = False
    path1_length = 0
    for i in range(len(word2_father_word)):
        if word1_father_id == word2_father_index[i]: #存在依存子数， 将树的两个分支给出
            mark1= True
            #定位到word1是第几个词，从word1_father_word中截取
            record_loc = -1
            for h in range(len(word1_father_index)):
                if word1_father_index[h] == word2_father_index[i]: # word1_father_index序列中第几个与word2_father_index.get(i)相等
                    record_loc = h

            # 生成branch1
            for k in range(record_loc+1):
                path1_length += 1
                if k == record_loc:
                    branch1 += word1_father_word[k]+"_"+str(word1_father_index[k])
                else:
                    branch1 += word1_father_word[k]+"_"+str(word1_father_index[k])+"__("+word1_father_rela[k]+")__"

            #生成branch2
            for e in range(i+1):
                path1_length += 1
                if e == i:
                    branch2 += word2_father_word[e]+"_"+str(word2_father_index[e])
                else:
                    branch2 += word2_father_word[e]+"_"+str(word2_father_index[e])+"__("+word2_father_rela[e]+")__"

    #word2_father_index依次与word1_father进行比较，
    mark2 = False
    path2_length = 0    
    for j in range(len(word1_father_word)):
        if word2_father_id == word1_father_index[j]: # 存在依存书
            mark2 = True
            record_loc = -1

            for m in range(len(word2_father_index)):                
                if word2_father_index[m] == word1_father_index[j]:
                    record_loc = m

            #生成branch4           
            for l in range(record_loc+1):
                path2_length += 1
                if l == record_loc:
                    branch4 += word2_father_word[l]+"_"+str(word2_father_index[l])
                else:
                    branch4 += word2_father_word[l]+"_"+str(word2_father_index[l])+"__("+word2_father_rela[l]+")__"

            #生成branch3
            for e in range(j+1):
                path2_length += 1
                if e == j:
                    branch3 += word1_father_word[e]+"_"+str(word1_father_index[e])
                else:
                    branch3 += word1_father_word[e]+"_"+str(word1_father_index[e])+"__("+word1_father_rela[e]+")__"


    if mark1 == False and mark2 == False: #没有找到最短依存路径
        #如果没有相同的，则继续找两个父节点的子节点
        father_wordd, father_indexx = get_shortest_dependent_path(word1_father_value, word1_father_id, word2_father_value, word2_father_id, tdl, False)
        return father_wordd, father_indexx
    elif mark1 == True and mark2 == False: #找到最短依存路径，打印出
        print(branch1)
        print(branch2)
    elif mark1 == False and mark2 == True:
        print(branch3)
        print(branch4)
    else: #找到两棵树
        #如果找到了两个依存书，比较那个依存书最短
        if path1_length > path2_length:
            print(branch1)
            print(branch2)
        else:
            print(branch3)
            print(branch4)

# 找父节点
def get_node_father(wordId, tdl, father_node_word, father_node_index, rela_set):

    father_word = ""
    father_index = -1
    for tdp in tdl:
        if wordId == tdp.dep_index:
            if tdp.gov_word != father_node_word[-1] or tdp.gov_index != father_node_word[-1]:
                father_node_word.append(tdp.gov_word)
                father_node_index.append(tdp.gov_index)
                rela_set.append(tdp.dep_rela)
                father_word = tdp.gov_word
                father_index = tdp.gov_index
                break
    return father_word, father_index

def changeFormat(parseTree):
    dependent_tree = []
    for element in parseTree:
        lbracket = element.index('(')
        rbracket = element.rindex(')')

        comma_index = element.index(',')
        part_one = element[lbracket+1:comma_index]
        part_two = element[comma_index+1:rbracket]

        line1_loc = part_one.rindex('-')
        gov_word = part_one[0:line1_loc]
        gov_index = part_one[line1_loc+1:]

        line2_loc = part_two.rindex('-')
        dep_word = part_two[0:line2_loc]
        dep_index = part_two[line2_loc+1:]
        rela = element[0:lbracket]

        dep_relation = dt_rela.dependent_tree_rela()
        dep_relation.gov_word = gov_word
        dep_relation.gov_index =  int(gov_index)
        dep_relation.dep_word = dep_word
        dep_relation.dep_index = int(dep_index)
        dep_relation.dep_rela = rela
        dependent_tree.append(dep_relation)
    return dependent_tree

def testResult():
    #sdt.getShortestDependentPath("In", 1, "Smad1/5/8", 17, parse_result, is_first);
    #sdt.getShortestDependentPath("cells", 5, "growth", 10, parse_result, is_first);
    #sdt.getShortestDependentPath("BMP-6", 7, "inhibited", 8, parse_result, is_first);
    parse_result = changeFormat(parseTree)
    #get_shortest_dependent_path("BMP-6", 7, "inhibited", 8, parse_result, True)
    #get_shortest_dependent_path("In", 1, "Smad1/5/8", 17, parse_result, True)
    get_shortest_dependent_path("cells", 5, "growth", 10, parse_result, True)

if __name__ == '__main__':
    testResult()

dependent_tree_rela.py

#一条依存关系的表示
class dependent_tree_rela:
    dep_word = ""
    dep_index = -1
    gov_word = ""
    gov_index = -1
    dep_rela = ""

小妖精Fsky

关注

1
点赞
踩
5

收藏

觉得还不错? 一键收藏
1
评论
两个节点在依存解析树上的最短子树（Python）

前提条件，根据StanfordParser解析出的结果（调用现成的结果），针对给出两个节点找其最短依存子书 shortest_dependency_tree.pyimport parser.dependent_tree_rela as dt_relaparseTree = ["case(cells-5, In-1)","amod(cells-5, mature-2)","amod(cells-5
复制链接

扫一扫