1.python代码生成words.csv、relations_daixiang.csv relations_yongxiang.csv relations_xiangguan.csv 注意:每种关系单独导入!
# 生成三元组
with open('d2019.txt', 'r', encoding='utf-8') as f:
all = f.read()
records = all.split('*NEWRECORD')
tuples = []
mapd = {
}
for record, i in zip(records[1:], range(1, len(records))):
tuplesitem = []
items = record.split('\n')
idcount = 0
for item in items:
if item.startswith('MH = '):
heading = re.findall('MH = ([^"]+)', item)[0]
mapd[heading] = "D" + str(i)
if item.startswith('ENTRY = '):
entry = re.findall('[^\|]+', item)[0]
entry = re.findall('ENTRY = ([^"]+)', entry)[0]
tuples.append((heading, entry, '代项'))
tuples.append((entry, heading, '用项'))
if not mapd.get(entry,0):
idcount = idcount + 1
mapd[entry] = mapd[heading] + "_" + str(idcount)
if item.startswith('PRINT ENTRY = '):
printentry = re.findall('[^\|]+', item)[0]
printentry = re.findall('PRINT ENTRY = ([^"]+)', printentry)[0]
tuples.append((heading, printentry, '代项'))
tuples.append((printentry, heading, '用项'))
if not mapd.get(printentry, 0):
idcount = idcount + 1
mapd[printentry] = mapd[heading] + "_" + str(idcount)
if item.startswith('MN = '):
nid = re.findall('MN = ([^"]+)', item)[0]
hypernyms = find_hypernym(nid)
for hypernym in hypernyms:
tuples.append((heading, hypernym, '上位词'))
hyponyms = find_hyponym(nid)
for hyponym in hyponyms:
tuples.append((heading, hyponym, '下位词'))
homoionyms = find_homoionym(nid)
for homoionym in homoionyms:
tuples.append((heading, homoionym, '兄弟'