get_node_father前提条件,根据StanfordParser解析出的结果(调用现成的结果),针对给出两个节点找其最短依存子书
shortest_dependency_tree.py
import parser.dependent_tree_rela as dt_rela
parseTree = ["case(cells-5, In-1)","amod(cells-5, mature-2)","amod(cells-5, human-3)","compound(cells-5, B-4)",
"nmod(inhibited-8, cells-5)","nsubj(inhibited-8, BMP-6-7)","root(ROOT-0, inhibited-8)","compound(growth-10, cell-9)",
"dobj(inhibited-8, growth-10)","cc(inhibited-8, and-12)","advmod(induced-14, rapidly-13)","conj(inhibited-8, induced-14)",
"dobj(induced-14, phosphorylation-15)","case(Smad1/5/8-17, of-16)","nmod(phosphorylation-15, Smad1/5/8-17)",
"acl(Smad1/5/8-17, followed-18)","case(upregulation-21, by-19)","det(upregulation-21, an-20)","nmod(followed-18, upregulation-21)",
"case(Id1-23, of-22)","nmod(upregulation-21, Id1-23)]"]
word1_father_index = []
word1_father_word = []
word1_father_rela = []
word2_father_index = []
word2_father_word = []
word2_father_rela = []
def get_shortest_dependent_path(word1, word1_index, word2, word2_index, tdl, is_first):
if is_first == True: #第一次运行的时候把当前词也放到,Word1_father中
word1_father_index.append(word1_index)
word1_father_word.append(word1)
word2_father_index.append(word2_index)
word2_father_word.append(word2)
word1_father_value, word1_father_id = get_node_father(word1_index, tdl, word1_father_word, word1_father_index, word1_father_rela)
word2_father_value, word2_father_id = get_node_father(word2_index, tdl, word2_father_word, word2_father_index, word2_father_rela)
branch1 = ""
branch2 = ""
branch3 = ""
branch4 = ""
# word1_father_index依次与word2_father进行比较,
mark1 = False
path1_length = 0
for i in range(len(word2_father_word)):
if word1_father_id == word2_father_index[i]: #存在依存子数, 将树的两个分支给出
mark1= True
#定位到word1是第几个词,从word1_father_word中截取
record_loc = -1
for h in range(len(word1_father_index)):
if word1_father_index[h] == word2_father_index[i]: # word1_father_index序列中第几个与word2_father_index.get(i)相等
record_loc = h
# 生成branch1
for k in range(record_loc+1):
path1_length += 1
if k == record_loc:
branch1 += word1_father_word[k]+"_"+str(word1_father_index[k])
else:
branch1 += word1_father_word[k]+"_"+str(word1_father_index[k])+"__("+word1_father_rela[k]+")__"
#生成branch2
for e in range(i+1):
path1_length += 1
if e == i:
branch2 += word2_father_word[e]+"_"+str(word2_father_index[e])
else:
branch2 += word2_father_word[e]+"_"+str(word2_father_index[e])+"__("+word2_father_rela[e]+")__"
#word2_father_index依次与word1_father进行比较,
mark2 = False
path2_length = 0
for j in range(len(word1_father_word)):
if word2_father_id == word1_father_index[j]: # 存在依存书
mark2 = True
record_loc = -1
for m in range(len(word2_father_index)):
if word2_father_index[m] == word1_father_index[j]:
record_loc = m
#生成branch4
for l in range(record_loc+1):
path2_length += 1
if l == record_loc:
branch4 += word2_father_word[l]+"_"+str(word2_father_index[l])
else:
branch4 += word2_father_word[l]+"_"+str(word2_father_index[l])+"__("+word2_father_rela[l]+")__"
#生成branch3
for e in range(j+1):
path2_length += 1
if e == j:
branch3 += word1_father_word[e]+"_"+str(word1_father_index[e])
else:
branch3 += word1_father_word[e]+"_"+str(word1_father_index[e])+"__("+word1_father_rela[e]+")__"
if mark1 == False and mark2 == False: #没有找到最短依存路径
#如果没有相同的,则继续找两个父节点的子节点
father_wordd, father_indexx = get_shortest_dependent_path(word1_father_value, word1_father_id, word2_father_value, word2_father_id, tdl, False)
return father_wordd, father_indexx
elif mark1 == True and mark2 == False: #找到最短依存路径,打印出
print(branch1)
print(branch2)
elif mark1 == False and mark2 == True:
print(branch3)
print(branch4)
else: #找到两棵树
#如果找到了两个依存书,比较那个依存书最短
if path1_length > path2_length:
print(branch1)
print(branch2)
else:
print(branch3)
print(branch4)
# 找父节点
def get_node_father(wordId, tdl, father_node_word, father_node_index, rela_set):
father_word = ""
father_index = -1
for tdp in tdl:
if wordId == tdp.dep_index:
if tdp.gov_word != father_node_word[-1] or tdp.gov_index != father_node_word[-1]:
father_node_word.append(tdp.gov_word)
father_node_index.append(tdp.gov_index)
rela_set.append(tdp.dep_rela)
father_word = tdp.gov_word
father_index = tdp.gov_index
break
return father_word, father_index
def changeFormat(parseTree):
dependent_tree = []
for element in parseTree:
lbracket = element.index('(')
rbracket = element.rindex(')')
comma_index = element.index(',')
part_one = element[lbracket+1:comma_index]
part_two = element[comma_index+1:rbracket]
line1_loc = part_one.rindex('-')
gov_word = part_one[0:line1_loc]
gov_index = part_one[line1_loc+1:]
line2_loc = part_two.rindex('-')
dep_word = part_two[0:line2_loc]
dep_index = part_two[line2_loc+1:]
rela = element[0:lbracket]
dep_relation = dt_rela.dependent_tree_rela()
dep_relation.gov_word = gov_word
dep_relation.gov_index = int(gov_index)
dep_relation.dep_word = dep_word
dep_relation.dep_index = int(dep_index)
dep_relation.dep_rela = rela
dependent_tree.append(dep_relation)
return dependent_tree
def testResult():
#sdt.getShortestDependentPath("In", 1, "Smad1/5/8", 17, parse_result, is_first);
#sdt.getShortestDependentPath("cells", 5, "growth", 10, parse_result, is_first);
#sdt.getShortestDependentPath("BMP-6", 7, "inhibited", 8, parse_result, is_first);
parse_result = changeFormat(parseTree)
#get_shortest_dependent_path("BMP-6", 7, "inhibited", 8, parse_result, True)
#get_shortest_dependent_path("In", 1, "Smad1/5/8", 17, parse_result, True)
get_shortest_dependent_path("cells", 5, "growth", 10, parse_result, True)
if __name__ == '__main__':
testResult()
dependent_tree_rela.py
#一条依存关系的表示
class dependent_tree_rela:
dep_word = ""
dep_index = -1
gov_word = ""
gov_index = -1
dep_rela = ""