文章目录
一、数据集加载
对NeuralKG代码整理并理解,供个人学习
import numpy as np
import torch.nn.functional as F
from torch.utils.data import Dataset
import os
from collections import defaultdict as ddict
from IPython import embed
import torch
import torch.nn as nn
1.1 加载实体和关系数据集
# 实体(关系)2id
ent2id = {}
rel2id = {}
# id2实体(关系)
id2ent = {}
id2rel = {}
eid=0
rid=0
with open(os.path.join("dataset\\toy", "entities.txt")) as fin:
for line in fin:
entity = line.strip()
ent2id[entity] = int(eid)
id2ent[int(eid)] = entity
eid+=1
with open(os.path.join("dataset\\toy", "relations.txt")) as fin:
for line in fin:
relation = line.strip()
rel2id[relation] = int(rid)
id2rel[int(rid)] = relation
rid+=1
# 输出一下
num_ent = len(ent2id)
num_rel = len(rel2id)
print("实体数量:",num_ent)
print("关系数量:",num_rel)
print("====================")
print("实体2id字典:")
for key,value in ent2id.items():
if value!=3:
print("[{}:{}]".format(key,value))
else:
break
print("====================")
print("关系2id字典:")
for key,value in rel2id.items():
if value!=3:
print("[{}:{}]".format(key,value))
else:
break
实体数量: 280
关系数量: 112
====================
实体2id字典:
[/m/0342h:0]
[/m/0l14qv:1]
[/m/02sgy:2]
====================
关系2id字典:
[/music/performance_role/regular_performances./music/group_membership/role:0]
[/music/performance_role/guest_performances./music/recording_contribution/performance_role:1]
[/olympics/olympic_participating_country/athletes./olympics/olympic_athlete_affiliation/olympics:2]
1.2 加载三元组数据集
# 存放三元组的id
train_triples = []
valid_triples = []
test_triples = []
all_true_triples = set()
# grounding 使用
TrainTriples = {}
Relation2Tuple = {}
RelSub2Obj = {}
with open(os.path.join("dataset\\toy", "train.txt")) as f:
for line in f.readlines():
h, r, t = line.strip().split() # 使用split()函数将行按空格分割为头实体h、关系r和尾实体t
train_triples.append(
(ent2id[h], rel2id[r], ent2id[t])
)
# 构建一个以字符串(h, r, t)为键的字典TrainTriples,值为True。
# 这个字典用于快速检查某个三元组是否在训练集中存在。
tmp = str(ent2id[h]) + '\t' + str(rel2id[r]) + '\t' + str(ent2id[t])
TrainTriples[tmp] = True
# 根据关系ID将(h, t)作为键和关系ID作为值添加到字典Relation2Tuple中。
# Relation2Tuple这个字典用于存储每个关系对应的实体对。
iRelationID = rel2id[r]
strValue = str(h) + "#" + str(t)
if not iRelationID in Relation2Tuple:
tmpLst = []
tmpLst.append(strValue)
Relation2Tuple[iRelationID] = tmpLst
else:
Relation2Tuple[iRelationID].append(strValue)
# 根据关系ID、主体实体ID和客体实体ID构建一个多层嵌套的字典RelSub2Obj。
# 首先检查关系ID是否已存在于字典中,如果不存在,则创建一个新的字典,
# 并将(iSubjectID, iObjectID)作为键和True作为值添加到内层字典中。
# 然后,将内层字典添加到外层字典中。如果关系ID已存在于字典中,则获取内层字典,
# 并按照相同的方式添加(iSubjectID, iObjectID)到内层字典中。
# 最后,将更新后的字典重新赋值给RelSub2Obj[iRelationID]。
iRelationID = rel2id[r]
iSubjectID = ent2id[h]
iObjectID = ent2id[t]
tmpMap = {}
tmpMap_in = {}
if not iRelationID in RelSub2Obj:
if not iSubjectID in tmpMap:
tmpMap_in.clear()
tmpMap_in[iObjectID] = True
tmpMap[iSubjectID] = tmpMap_in
else:
tmpMap[iSubjectID][iObjectID] = True
RelSub2Obj[iRelationID] = tmpMap
else:
tmpMap = RelSub2Obj[iRelationID]
if not iSubjectID in tmpMap:
tmpMap_in.clear()
tmpMap_in[iObjectID] = True
tmpMap[iSubjectID] = tmpMap_in
else:
tmpMap[iSubjectID][iObjectID] = True
RelSub2Obj[iRelationID] = tmpMap # 是不是应该要加?
# 验证集
with open(os.path.join("dataset\\toy", "valid.txt")) as f:
for line in f.readlines():
h, r, t = line.strip().split()
valid_triples.append(
(ent2id[h], rel2id[r], ent2id[t])
)
#测试集
with open(os.path.join("dataset\\toy", "test.txt")) as f:
for line in f.readlines():
h, r, t = line.strip().split()
test_triples.append(
(ent2id[h], rel2id[r], ent2id[t])
)
all_true_triples = set(
train_triples + valid_triples + test_triples
)
# 输出测试一下
print("训练集:")
for i in range(3):
print(train_triples[i])
print("============================")
print("验证集:")
for i in range(3):
print(valid_triples[i])
print("============================")
print("测试集:")
for i in range(3):
print(test_triples[i])
print("============================")
print("TrainTriples:")
cnt=0
for key,value in TrainTriples.items():
cnt+=1
if cnt!=3:
print("[{}:{}]".format(key,value))
else:
break
print("============================")
print("RelSub2Obj:")
cnt=0
for key,value in RelSub2Obj.items():
cnt+=1
if cnt!=3:
print("[{}:{}]".format(key,value))
else:
break
print("============================")
print("Relation2Tuple:")
cnt=0
for key,value in Relation2Tuple.items():
cnt+=1
if cnt!=3:
print("[{}:{}]".format(key,value))
else:
break
print("============================")
训练集:
(0, 0, 1)
(2, 1, 3)
(4, 2, 5)
============================
验证集:
(106, 24, 155)
(110, 56, 159)
(140, 51, 157)
============================
测试集:
(162, 60, 230)
(89, 41, 152)
(174, 18, 60)
============================
TrainTriples:
[0 0 1:True]
[2 1 3:True]
============================
RelSub2Obj:
[0:{0: {1: True, 116: True, 121: True, 95: True, 17: True, 53: True, 61: True, 30: True, 45: True, 26: True, 31: True, 39: True, 144: True, 33: True, 13: True, 32: True, 44: True, 2: True, 19: True, 104: True, 176: True, 3: True, 50: True, 29: True, 25: True, 12: True, 24: True, 85: True, 67: True}, 2: {13: True, 39: True, 0: True, 3: True, 25: True, 17: True, 90: True, 31: True, 12: True, 45: True, 33: True, 116: True, 132: True, 29: True, 40: True, 61: True, 53: True, 1: True, 67: True, 26: True, 95: True, 121: True, 24: True, 104: True, 16: True, 32: True}, 16: {17: True, 26: True, 33: True, 50: True, 53: True, 95: True, 3: True, 176: True, 76: True, 144: True, 32: True, 13: True, 104: True, 31: True, 24: True, 90: True, 0: True, 25: True, 1: True}, 24: {25: True, 12: True, 29: True, 61: True, 17: True, 121: True, 67: True, 39: True, 26: True, 31: True, 50: True, 95: True, 58: True, 32: True, 1: True, 45: True, 90: True, 30: True, 44: True, 176: True, 116: True, 16: True, 76: True, 40: True, 13: True, 33: True, 0: True, 104: True, 3: True, 144: True, 53: True}, 32: {33: True, 17: True, 24: True, 3: True, 2: True, 26: True, 16: True, 12: True, 104: True, 13: True, 53: True, 25: True, 132: True, 90: True, 67: True, 50: True, 0: True, 44: True, 29: True, 116: True, 1: True}, 26: {40: True, 53: True, 25: True, 33: True, 24: True, 67: True, 50: True, 32: True, 45: True, 29: True, 16: True, 1: True, 144: True, 17: True, 95: True, 13: True, 104: True, 61: True, 90: True, 132: True, 3: True, 0: True, 31: True}, 39: {44: True, 0: True, 13: True, 32: True, 85: True, 40: True, 61: True, 58: True, 3: True, 17: True, 1: True, 67: True, 90: True, 33: True, 121: True, 77: True, 12: True, 50: True, 2: True, 104: True, 16: True, 25: True, 132: True, 176: True, 53: True, 31: True, 45: True}, 50: {31: True, 0: True, 12: True, 116: True, 176: True, 77: True, 61: True, 30: True, 121: True, 16: True, 95: True, 2: True, 13: True, 76: True, 3: True, 90: True, 53: True, 32: True, 33: True, 29: True, 58: True, 45: True, 67: True, 17: True, 39: True, 1: True, 40: True}, 31: {58: True, 0: True, 85: True, 104: True, 3: True, 32: True, 12: True, 45: True, 95: True, 76: True, 50: True, 53: True, 24: True, 30: True, 26: True, 33: True, 17: True, 39: True, 13: True, 67: True, 25: True, 16: True, 40: True, 2: True, 61: True, 121: True, 116: True, 1: True, 132: True}, 1: {95: True, 39: True, 144: True, 26: True, 176: True, 61: True, 67: True, 17: True, 121: True, 3: True, 40: True, 2: True, 33: True, 53: True, 104: True, 13: True, 132: True, 90: True, 58: True, 30: True, 31: True, 24: True, 16: True, 116: True, 12: True, 32: True, 44: True, 50: True, 45: True, 29: True}, 13: {31: True, 67: True, 53: True, 40: True, 29: True, 90: True, 45: True, 16: True, 3: True, 116: True, 104: True, 44: True, 2: True, 50: True, 24: True, 30: True, 32: True, 1: True, 0: True, 26: True, 39: True, 95: True}, 18: {3: True, 1: True, 176: True, 25: True, 33: True}, 30: {90: True, 0: True, 40: True, 1: True, 31: True, 2: True, 3: True, 50: True, 176: True, 132: True}, 58: {39: True, 90: True, 2: True, 17: True, 1: True, 132: True, 61: True, 3: True, 25: True, 53: True, 31: True, 121: True, 24: True, 40: True, 26: True, 104: True}, 104: {40: True, 177: True, 45: True, 19: True, 0: True, 53: True, 176: True, 12: True, 90: True, 1: True, 95: True, 144: True, 121: True, 16: True, 25: True, 24: True, 33: True, 3: True, 29: True, 61: True, 76: True, 58: True, 85: True, 44: True, 50: True, 2: True, 18: True}, 44: {45: True, 104: True, 13: True, 1: True, 53: True, 85: True, 90: True, 39: True, 95: True, 31: True, 67: True, 50: True, 29: True, 0: True, 116: True, 25: True, 40: True, 12: True}, 53: {25: True, 13: True, 33: True, 44: True, 26: True, 90: True, 24: True, 40: True, 16: True, 85: True, 132: True, 0: True, 32: True, 58: True, 50: True, 1: True, 61: True, 3: True, 95: True, 67: True, 76: True}, 121: {58: True, 95: True, 31: True, 3: True, 61: True, 104: True, 12: True, 53: True, 2: True, 50: True, 176: True, 40: True, 67: True, 39: True, 24: True, 0: True, 17: True, 32: True, 45: True}, 45: {53: True, 2: True, 32: True, 39: True, 61: True, 1: True, 33: True, 85: True, 25: True, 50: True, 12: True, 67: True, 40: True, 31: True, 0: True, 13: True, 116: True, 44: True, 176: True, 26: True, 17: True, 90: True, 16: True, 24: True}, 144: {16: True, 132: True, 40: True, 26: True, 31: True, 25: True, 3: True, 1: True, 17: True, 0: True, 24: True, 104: True, 33: True, 39: True}, 116: {39: True, 40: True, 24: True, 29: True, 0: True, 132: True, 13: True, 45: True, 33: True, 12: True, 31: True, 104: True, 17: True, 3: True}, 12: {116: True, 32: True, 50: True, 176: True, 44: True, 2: True, 1: True, 104: True, 31: True, 90: True, 3: True, 26: True, 39: True, 45: True, 121: True, 95: True, 61: True, 40: True, 85: True, 17: True, 0: True, 33: True, 67: True, 24: True, 25: True}, 40: {39: True, 31: True, 67: True, 104: True, 61: True, 58: True, 24: True, 26: True, 95: True, 121: True, 13: True, 33: True, 176: True, 2: True, 12: True, 53: True, 16: True, 25: True, 44: True, 3: True, 19: True, 50: True, 45: True, 90: True, 29: True, 1: True, 17: True, 177: True, 85: True, 132: True, 0: True}, 95: {85: True, 26: True, 40: True, 2: True, 12: True, 0: True, 67: True, 104: True, 50: True, 13: True, 3: True, 44: True, 33: True, 16: True}, 76: {3: True, 33: True, 24: True, 132: True, 17: True, 19: True, 104: True, 67: True, 53: True, 90: True, 85: True, 0: True, 31: True, 40: True, 1: True, 25: True}, 33: {95: True, 1: True, 12: True, 67: True, 16: True, 77: True, 177: True, 45: True, 104: True, 76: True, 29: True, 144: True, 25: True, 53: True, 116: True, 90: True, 121: True, 24: True, 85: True, 0: True, 17: True, 31: True, 39: True, 2: True}, 90: {17: True, 116: True, 13: True, 31: True, 77: True, 58: True, 132: True, 32: True, 24: True, 40: True, 44: True, 25: True, 1: True, 67: True, 29: True, 33: True, 30: True, 121: True, 85: True, 0: True, 61: True, 3: True, 76: True, 39: True, 45: True, 26: True, 16: True, 53: True, 50: True}, 25: {13: True, 121: True, 116: True, 53: True, 95: True, 104: True, 26: True, 61: True, 39: True, 3: True, 16: True, 85: True, 2: True, 30: True, 40: True, 17: True, 1: True, 12: True, 67: True, 31: True, 77: True, 58: True, 44: True, 176: True}, 17: {116: True, 176: True, 16: True, 58: True, 24: True, 53: True, 40: True, 39: True, 29: True, 0: True, 45: True, 32: True, 104: True, 61: True, 144: True, 76: True, 25: True, 95: True, 1: True, 3: True, 26: True, 33: True, 12: True, 2: True, 90: True, 50: True, 85: True}, 61: {90: True, 39: True, 25: True, 50: True, 40: True, 1: True, 132: True, 24: True, 104: True, 2: True, 26: True, 29: True, 176: True, 17: True, 3: True, 0: True, 16: True, 45: True}, 132: {3: True, 67: True, 53: True, 25: True, 58: True, 90: True, 50: True, 26: True, 76: True, 32: True, 29: True, 33: True, 31: True, 17: True, 0: True, 30: True, 2: True, 144: True, 104: True, 40: True, 1: True, 116: True, 39: True}, 3: {121: True, 13: True, 44: True, 90: True, 30: True, 0: True, 104: True, 1: True, 19: True, 26: True, 31: True, 77: True, 61: True, 53: True, 76: True, 33: True, 116: True, 50: True, 24: True, 29: True, 16: True, 18: True, 12: True, 85: True, 2: True, 58: True, 17: True, 40: True, 25: True, 132: True, 95: True}, 67: {40: True, 176: True, 24: True, 29: True, 132: True, 85: True, 53: True, 116: True, 90: True, 39: True, 12: True, 50: True, 25: True, 33: True, 32: True, 13: True, 0: True, 95: True, 44: True}, 77: {0: True, 3: True, 25: True, 31: True, 33: True, 24: True, 90: True}, 29: {67: True, 30: True, 12: True, 132: True, 39: True, 90: True, 50: True, 116: True, 44: True, 24: True, 17: True, 16: True, 104: True, 45: True, 31: True, 25: True, 85: True, 53: True, 3: True, 2: True, 0: True, 32: True, 13: True}, 176: {24: True, 85: True, 16: True, 53: True, 1: True, 30: True, 17: True, 12: True, 32: True, 0: True, 31: True, 39: True, 104: True, 45: True, 121: True, 25: True, 95: True, 67: True, 40: True, 3: True, 18: True, 61: True, 50: True}, 177: {40: True, 104: True, 33: True}, 85: {26: True, 32: True, 17: True, 90: True, 45: True, 50: True, 33: True, 176: True, 29: True, 104: True, 39: True, 0: True, 53: True, 44: True, 3: True, 95: True, 25: True, 76: True}, 19: {40: True, 0: True, 3: True, 104: True, 1: True, 76: True}}]
[1:{2: {3: True, 90: True, 25: True, 1: True, 30: True, 32: True, 0: True, 31: True, 144: True}, 17: {18: True, 19: True, 3: True, 1: True, 45: True, 30: True, 144: True, 58: True, 40: True}, 29: {0: True}, 3: {45: True, 40: True, 32: True, 2: True, 31: True, 24: True, 1: True, 19: True, 58: True, 18: True, 144: True, 30: True, 176: True, 0: True}, 12: {0: True, 32: True, 85: True, 40: True}, 104: {25: True, 176: True, 30: True, 53: True, 1: True, 40: True, 144: True, 0: True, 90: True, 2: True, 26: True}, 0: {33: True, 12: True, 45: True, 3: True, 176: True, 121: True, 26: True, 90: True, 29: True, 25: True, 32: True, 31: True, 76: True, 1: True, 39: True, 2: True, 53: True, 40: True}, 121: {1: True, 25: True, 90: True, 33: True, 31: True, 58: True, 0: True, 61: True, 24: True}, 26: {76: True, 104: True, 1: True, 25: True}, 33: {25: True, 121: True, 0: True, 104: True}, 1: {61: True, 121: True, 24: True, 30: True, 45: True, 18: True, 31: True, 58: True, 144: True, 26: True, 2: True, 17: True, 3: True, 104: True}, 25: {33: True, 30: True, 104: True, 76: True, 3: True, 58: True, 90: True, 24: True, 32: True, 26: True, 45: True, 53: True, 121: True, 1: True, 0: True, 31: True}, 58: {1: True, 31: True, 121: True, 24: True, 25: True, 3: True, 30: True, 19: True, 17: True}, 30: {25: True, 58: True, 1: True, 104: True, 19: True, 32: True, 2: True, 18: True}, 40: {30: True, 104: True, 32: True, 3: True, 144: True, 2: True, 24: True, 25: True, 45: True, 31: True, 12: True, 16: True, 176: True, 17: True}, 76: {1: True, 25: True, 45: True, 26: True}, 31: {24: True, 121: True, 2: True, 0: True, 3: True, 40: True, 30: True, 45: True, 58: True, 1: True, 61: True}, 18: {19: True, 1: True, 45: True, 3: True, 17: True}, 19: {1: True, 18: True, 58: True, 17: True, 30: True}, 90: {3: True, 33: True, 0: True, 24: True, 104: True, 2: True, 32: True, 121: True, 25: True, 53: True}, 45: {1: True, 17: True, 67: True, 3: True, 31: True, 0: True, 24: True, 76: True, 176: True, 25: True, 18: True, 40: True}, 24: {0: True, 45: True, 33: True, 25: True, 58: True, 40: True, 3: True, 90: True, 61: True}, 32: {0: True, 25: True, 2: True, 31: True, 30: True, 3: True, 40: True}, 132: {61: True, 31: True, 40: True}, 61: {1: True, 121: True, 132: True, 24: True}, 53: {0: True, 104: True, 24: True, 3: True, 33: True}, 144: {25: True, 1: True, 3: True, 40: True, 17: True, 2: True, 104: True, 30: True}, 176: {104: True, 0: True, 3: True}, 39: {33: True, 0: True}, 85: {12: True}, 44: {31: True}, 16: {40: True}, 67: {45: True}}]
============================
Relation2Tuple:
[0:['/m/0342h#/m/0l14qv', '/m/02sgy#/m/07xzm', '/m/0dwtp#/m/013y1f', '/m/03bx0bm#/m/018vs', '/m/0dwtp#/m/0g2dz', '/m/04rzd#/m/028tv0', '/m/02sgy#/m/01vj9c', '/m/0g2dz#/m/05148p4', '/m/01vj9c#/m/02fsn', '/m/0mkg#/m/05r5c', '/m/05r5c#/m/03gvt', '/m/01vj9c#/m/0342h', '/m/0l14qv#/m/01wy6', '/m/07xzm#/m/05r5c', '/m/0mkg#/m/0342h', '/m/0395lw#/m/0l14md', '/m/0l14qv#/m/01vj9c', '/m/026t6#/m/042v_gx', '/m/03gvt#/m/01vj9c', '/m/02hnl#/m/05148p4', '/m/0342h#/m/06w7v', '/m/02fsn#/m/0l14j_', '/m/02fsn#/m/02hnl', '/m/03qjg#/m/018vs', '/m/0g2dz#/m/03qjg', '/m/02fsn#/m/07xzm', '/m/0mkg#/m/07y_7', '/m/04rzd#/m/013y1f', '/m/01vj9c#/m/07xzm', '/m/05r5c#/m/0342h', '/m/0gkd1#/m/03gvt', '/m/0l14j_#/m/03qjg', '/m/01v1d8#/m/0dwtp', '/m/06w7v#/m/01vj9c', '/m/0l14j_#/m/02sgy', '/m/07y_7#/m/06w7v', '/m/04rzd#/m/03bx0bm', '/m/05148p4#/m/01vj9c', '/m/02sgy#/m/0342h', '/m/0mkg#/m/06w7v', '/m/01wy6#/m/01xqw', '/m/07brj#/m/0l14md', '/m/028tv0#/m/01wy6', '/m/0l14qv#/m/01v1d8', '/m/042v_gx#/m/013y1f', '/m/0g2dz#/m/018vs', '/m/042v_gx#/m/06w7v', '/m/0l14qv#/m/0g2dz', '/m/018vs#/m/07xzm', '/m/05148p4#/m/05r5c', '/m/018vs#/m/0gkd1', '/m/0gkd1#/m/01wy6', '/m/02sgy#/m/0l14md', '/m/03gvt#/m/042v_gx', '/m/0mkg#/m/0dwt5', '/m/0mkg#/m/01vdm0', '/m/05r5c#/m/01xqw', '/m/02hnl#/m/05842k', '/m/0g2dz#/m/028tv0', '/m/07xzm#/m/06ncr', '/m/02fsn#/m/0l14qv', '/m/0342h#/m/0gkd1', '/m/013y1f#/m/06w7v', '/m/03bx0bm#/m/07y_7', '/m/0mkg#/m/01s0ps', '/m/03bx0bm#/m/018j2', '/m/01s0ps#/m/042v_gx', '/m/01s0ps#/m/01vj9c', '/m/0mkg#/m/026t6', '/m/026t6#/m/0342h', '/m/0bxl5#/m/0l14md', '/m/02hnl#/m/0l14j_', '/m/07xzm#/m/03qjg', '/m/0l14md#/m/0gkd1', '/m/0342h#/m/01wy6', '/m/07xzm#/m/05148p4', '/m/07y_7#/m/04rzd', '/m/013y1f#/m/0dwt5', '/m/0342h#/m/013y1f', '/m/03bx0bm#/m/01s0ps', '/m/05r5c#/m/02hnl', '/m/02fsn#/m/03qjg', '/m/028tv0#/m/0l14qv', '/m/06ncr#/m/05148p4', '/m/03bx0bm#/m/013y1f', '/m/0l14md#/m/07xzm', '/m/02sgy#/m/018vs', '/m/05148p4#/m/06ncr', '/m/02sgy#/m/013y1f', '/m/042v_gx#/m/07xzm', '/m/042v_gx#/m/05r5c', '/m/01vdm0#/m/0342h', '/m/018j2#/m/06ncr', '/m/04rzd#/m/0l14md', '/m/07y_7#/m/0mkg', '/m/02fsn#/m/01xqw', '/m/04rzd#/m/02sgy', '/m/02sgy#/m/042v_gx', '/m/07xzm#/m/018j2', '/m/018vs#/m/06w7v', '/m/04rzd#/m/0g2dz', '/m/06ncr#/m/0dwt5', '/m/03qjg#/m/07xzm', '/m/01s0ps#/m/018vs', '/m/03bx0bm#/m/0gkd1', '/m/05148p4#/m/02hnl', '/m/05148p4#/m/01s0ps', '/m/04rzd#/m/0dwtp', '/m/07y_7#/m/0dwt5', '/m/02hnl#/m/0l15bq', '/m/0g2dz#/m/03bx0bm', '/m/01wy6#/m/0g2dz', '/m/018vs#/m/03qjg', '/m/0mkg#/m/0gkd1', '/m/01wy6#/m/05148p4', '/m/0bxl5#/m/06ncr', '/m/02hnl#/m/0342h', '/m/028tv0#/m/07y_7', '/m/0l14qv#/m/0dwt5', '/m/01vdm0#/m/0l14md', '/m/0dwt5#/m/03bx0bm', '/m/013y1f#/m/0dwtp', '/m/0342h#/m/03qjg', '/m/03gvt#/m/02sgy', '/m/01s0ps#/m/0mkg', '/m/05842k#/m/05148p4', '/m/0l14j_#/m/04rzd', '/m/03bx0bm#/m/06ncr', '/m/01s0ps#/m/05148p4', '/m/01wy6#/m/02sgy', '/m/0342h#/m/01s0ps', '/m/0dwt5#/m/01xqw', '/m/02hnl#/m/03qjg', '/m/018vs#/m/01wy6', '/m/04rzd#/m/07y_7', '/m/03bx0bm#/m/01vj9c', '/m/03qjg#/m/028tv0', '/m/0342h#/m/026t6', '/m/0bxl5#/m/03qjg', '/m/01v1d8#/m/0bxl5', '/m/018vs#/m/02hnl', '/m/07xzm#/m/042v_gx', '/m/07xzm#/m/0l14j_', '/m/02hnl#/m/0dwt5', '/m/01s0ps#/m/0l14qv', '/m/06w7v#/m/05148p4', '/m/0mkg#/m/0dwtp', '/m/042v_gx#/m/01vdm0', '/m/0l14md#/m/02fsn', '/m/03bx0bm#/m/0g2dz', '/m/05148p4#/m/03gvt', '/m/0dwtp#/m/028tv0', '/m/0l14j_#/m/01vj9c', '/m/07brj#/m/028tv0', '/m/013y1f#/m/03gvt', '/m/0bxl5#/m/018vs', '/m/028tv0#/m/06ncr', '/m/07y_7#/m/02fsn', '/m/02hnl#/m/07y_7', '/m/07brj#/m/03bx0bm', '/m/028tv0#/m/0dwtp', '/m/0g2dz#/m/06ncr', '/m/03bx0bm#/m/05r5c', '/m/04rzd#/m/02hnl', '/m/02fsn#/m/042v_gx', '/m/01s0ps#/m/0bxl5', '/m/0mkg#/m/01wy6', '/m/06ncr#/m/03bx0bm', '/m/02hnl#/m/042v_gx', '/m/0bxl5#/m/03gvt', '/m/0l14md#/m/042v_gx', '/m/0g2dz#/m/0mkg', '/m/03bx0bm#/m/0mkg', '/m/0342h#/m/0l14j_', '/m/05148p4#/m/03bx0bm', '/m/0dwtp#/m/0mkg', '/m/02fsn#/m/01vj9c', '/m/0342h#/m/0g2dz', '/m/0dwtp#/m/03qjg', '/m/018vs#/m/0g2dz', '/m/06ncr#/m/018j2', '/m/02sgy#/m/05r5c', '/m/02sgy#/m/07y_7', '/m/0mkg#/m/02sgy', '/m/042v_gx#/m/03gvt', '/m/07brj#/m/0bxl5', '/m/04rzd#/m/07xzm', '/m/05r5c#/m/0l14md', '/m/07xzm#/m/0dwtp', '/m/013y1f#/m/03bx0bm', '/m/07brj#/m/013y1f', '/m/01v1d8#/m/05148p4', '/m/026t6#/m/05148p4', '/m/03gvt#/m/013y1f', '/m/018j2#/m/026t6', '/m/0g2dz#/m/04rzd', '/m/0l14j_#/m/01s0ps', '/m/0mkg#/m/07xzm', '/m/05148p4#/m/0g2dz', '/m/01v1d8#/m/0g2dz', '/m/01s0ps#/m/03bx0bm', '/m/0bxl5#/m/042v_gx', '/m/01s0ps#/m/02hnl', '/m/0g2dz#/m/0l14j_', '/m/05148p4#/m/01wy6', '/m/07xzm#/m/0l14md', '/m/03bx0bm#/m/01wy6', '/m/07xzm#/m/06w7v', '/m/026t6#/m/0l14qv', '/m/02sgy#/m/0l14j_', '/m/01xqw#/m/0g2dz', '/m/0l14md#/m/026t6', '/m/03gvt#/m/0l14qv', '/m/0l14md#/m/0342h', '/m/05148p4#/m/0gkd1', '/m/028tv0#/m/01vdm0', '/m/03bx0bm#/m/03gvt', '/m/05r5c#/m/04rzd', '/m/01v1d8#/m/05r5c', '/m/0l14qv#/m/01s0ps', '/m/018j2#/m/07y_7', '/m/07y_7#/m/02sgy', '/m/0l14md#/m/02hnl', '/m/01xqw#/m/04rzd', '/m/013y1f#/m/03qjg', '/m/042v_gx#/m/0bxl5', '/m/018j2#/m/0bxl5', '/m/0bxl5#/m/0mkg', '/m/01vj9c#/m/04rzd', '/m/0l14md#/m/0l14qv', '/m/07xzm#/m/02hnl', '/m/0l15bq#/m/05148p4', '/m/05148p4#/m/07xzm', '/m/05148p4#/m/028tv0', '/m/0l15bq#/m/0342h', '/m/01v1d8#/m/018vs', '/m/0342h#/m/05r5c', '/m/01wy6#/m/07y_7', '/m/03qjg#/m/02fsn', '/m/03gvt#/m/0bxl5', '/m/0l14qv#/m/06ncr', '/m/0342h#/m/01vj9c', '/m/05r5c#/m/07y_7', '/m/02fsn#/m/01wy6', '/m/01vj9c#/m/01xqw', '/m/03gvt#/m/01s0ps', '/m/03bx0bm#/m/04rzd', '/m/0342h#/m/01v1d8', '/m/0l14j_#/m/0l14qv', '/m/04rzd#/m/03qjg', '/m/07brj#/m/0l15bq', '/m/0mkg#/m/07brj', '/m/0g2dz#/m/018j2', '/m/0bxl5#/m/0g2dz', '/m/0dwtp#/m/01wy6', '/m/05r5c#/m/0l14j_', '/m/0dwtp#/m/0l14md', '/m/0l14j_#/m/028tv0', '/m/028tv0#/m/05842k', '/m/06ncr#/m/0bxl5', '/m/07y_7#/m/0l14qv', '/m/0l14qv#/m/013y1f', '/m/0dwtp#/m/0dwt5', '/m/0gkd1#/m/05r5c', '/m/018vs#/m/01s0ps', '/m/0dwt5#/m/0dwtp', '/m/06ncr#/m/01xqw', '/m/0dwt5#/m/03qjg', '/m/02sgy#/m/028tv0', '/m/0l14j_#/m/01xqw', '/m/06ncr#/m/03qjg', '/m/06ncr#/m/06w7v', '/m/0l14qv#/m/0gkd1', '/m/028tv0#/m/0l14j_', '/m/0g2dz#/m/0dwtp', '/m/018j2#/m/01vj9c', '/m/05r5c#/m/01wy6', '/m/03gvt#/m/0l14md', '/m/06w7v#/m/03bx0bm', '/m/0l14j_#/m/018vs', '/m/0395lw#/m/0l14qv', '/m/0mkg#/m/0l14md', '/m/018j2#/m/042v_gx', '/m/042v_gx#/m/04rzd', '/m/02hnl#/m/0l14qv', '/m/06w7v#/m/018j2', '/m/02hnl#/m/01wy6', '/m/0342h#/m/028tv0', '/m/02sgy#/m/06w7v', '/m/0dwt5#/m/0l14qv', '/m/07xzm#/m/02fsn', '/m/02hnl#/m/01v1d8', '/m/01vj9c#/m/05148p4', '/m/05148p4#/m/0dwt5', '/m/01xqw#/m/013y1f', '/m/026t6#/m/05r5c', '/m/042v_gx#/m/03bx0bm', '/m/07xzm#/m/02sgy', '/m/03bx0bm#/m/0l14qv', '/m/05r5c#/m/07brj', '/m/01vdm0#/m/018vs', '/m/042v_gx#/m/05148p4', '/m/018j2#/m/0mkg', '/m/0dwt5#/m/026t6', '/m/02sgy#/m/0bxl5', '/m/0dwtp#/m/07brj', '/m/05148p4#/m/02sgy', '/m/02sgy#/m/018j2', '/m/042v_gx#/m/02fsn', '/m/01xqw#/m/042v_gx', '/m/03qjg#/m/0g2dz', '/m/01wy6#/m/0342h', '/m/0dwtp#/m/01v1d8', '/m/0l14qv#/m/0l14md', '/m/0gkd1#/m/0l14md', '/m/07xzm#/m/0mkg', '/m/01vj9c#/m/01s0ps', '/m/0mkg#/m/042v_gx', '/m/03bx0bm#/m/0l14j_', '/m/05r5c#/m/0mkg', '/m/0g2dz#/m/0l14qv', '/m/03qjg#/m/042v_gx', '/m/05r5c#/m/03qjg', '/m/028tv0#/m/02hnl', '/m/07y_7#/m/02hnl', '/m/0mkg#/m/03qjg', '/m/01vdm0#/m/05r5c', '/m/01vj9c#/m/03gvt', '/m/013y1f#/m/05148p4', '/m/042v_gx#/m/018vs', '/m/013y1f#/m/01vj9c', '/m/0bxl5#/m/07brj', '/m/018vs#/m/01vj9c', '/m/01xqw#/m/0l14j_', '/m/02sgy#/m/05148p4', '/m/0l14md#/m/0l15bq', '/m/05r5c#/m/03bx0bm', '/m/028tv0#/m/07brj', '/m/0bxl5#/m/04rzd', '/m/05r5c#/m/026t6', '/m/02sgy#/m/01s0ps', '/m/07brj#/m/02hnl', '/m/018j2#/m/06w7v', '/m/028tv0#/m/018j2', '/m/01vj9c#/m/0l14md', '/m/07y_7#/m/05r5c', '/m/03qjg#/m/03bx0bm', '/m/02hnl#/m/0gkd1', '/m/05r5c#/m/0g2dz', '/m/01v1d8#/m/0l14md', '/m/0l14md#/m/0g2dz', '/m/026t6#/m/02sgy', '/m/0l14qv#/m/05148p4', '/m/05148p4#/m/07y_7', '/m/02hnl#/m/0dwtp', '/m/0l14md#/m/05r5c', '/m/0342h#/m/07xzm', '/m/01vj9c#/m/013y1f', '/m/018j2#/m/02fsn', '/m/018j2#/m/03bx0bm', '/m/03bx0bm#/m/042v_gx', '/m/026t6#/m/0l14md', '/m/0gkd1#/m/01s0ps', '/m/0gkd1#/m/02hnl', '/m/01vdm0#/m/028tv0', '/m/018vs#/m/0l14md', '/m/0l14j_#/m/0mkg', '/m/0dwt5#/m/013y1f', '/m/01vj9c#/m/0l14qv', '/m/0l14md#/m/01vdm0', '/m/05r5c#/m/028tv0', '/m/03gvt#/m/018vs', '/m/06ncr#/m/042v_gx', '/m/07brj#/m/06ncr', '/m/0dwt5#/m/07y_7', '/m/01xqw#/m/0mkg', '/m/0dwtp#/m/04rzd', '/m/042v_gx#/m/0l14qv', '/m/0l15bq#/m/0l14md', '/m/03qjg#/m/05148p4', '/m/01v1d8#/m/0l14qv', '/m/01xqw#/m/028tv0', '/m/07y_7#/m/042v_gx', '/m/0g2dz#/m/01v1d8', '/m/0l14j_#/m/07y_7', '/m/0l14qv#/m/02sgy', '/m/0l14qv#/m/028tv0', '/m/02fsn#/m/05r5c', '/m/07xzm#/m/03bx0bm', '/m/0mkg#/m/04rzd', '/m/0l14j_#/m/06ncr', '/m/013y1f#/m/018j2', '/m/06w7v#/m/0342h', '/m/06ncr#/m/01vj9c', '/m/0l14md#/m/01s0ps', '/m/0bxl5#/m/018j2', '/m/05842k#/m/02hnl', '/m/03gvt#/m/03qjg', '/m/0l14j_#/m/05148p4', '/m/03bx0bm#/m/026t6', '/m/01xqw#/m/0dwt5', '/m/028tv0#/m/01v1d8', '/m/07brj#/m/03qjg', '/m/018vs#/m/0dwtp', '/m/0342h#/m/04rzd', '/m/07y_7#/m/0l14md', '/m/07y_7#/m/0g2dz', '/m/026t6#/m/0mkg', '/m/0bxl5#/m/028tv0', '/m/0g2dz#/m/013y1f', '/m/0gkd1#/m/07y_7', '/m/0342h#/m/02fsn', '/m/05r5c#/m/013y1f', '/m/05148p4#/m/03qjg', '/m/05148p4#/m/0dwtp', '/m/018vs#/m/01xqw', '/m/0bxl5#/m/05r5c', '/m/042v_gx#/m/06ncr', '/m/0bxl5#/m/013y1f', '/m/018vs#/m/02sgy', '/m/01vj9c#/m/06ncr', '/m/01v1d8#/m/013y1f', '/m/01vj9c#/m/042v_gx', '/m/01wy6#/m/06ncr', '/m/02hnl#/m/018vs', '/m/0342h#/m/02sgy', '/m/01vj9c#/m/028tv0', '/m/07y_7#/m/01vj9c', '/m/03qjg#/m/0dwtp', '/m/0bxl5#/m/0342h', '/m/01wy6#/m/02hnl', '/m/02fsn#/m/06ncr', '/m/02hnl#/m/03bx0bm', '/m/0g2dz#/m/01wy6', '/m/0gkd1#/m/03qjg', '/m/0342h#/m/0l15bq', '/m/0bxl5#/m/026t6', '/m/05148p4#/m/018vs', '/m/0dwt5#/m/04rzd', '/m/01wy6#/m/0mkg', '/m/07brj#/m/042v_gx', '/m/03bx0bm#/m/02fsn', '/m/0395lw#/m/0dwt5', '/m/04rzd#/m/018vs', '/m/03bx0bm#/m/0dwt5', '/m/0gkd1#/m/02sgy', '/m/0342h#/m/02hnl', '/m/01vj9c#/m/0gkd1', '/m/018j2#/m/013y1f', '/m/03gvt#/m/05r5c', '/m/0dwtp#/m/07xzm', '/m/0l14md#/m/03qjg', '/m/03qjg#/m/01xqw', '/m/01v1d8#/m/0342h', '/m/01xqw#/m/018j2', '/m/0l14j_#/m/05r5c', '/m/01xqw#/m/02hnl', '/m/042v_gx#/m/018j2', '/m/0dwtp#/m/02hnl', '/m/013y1f#/m/0342h', '/m/0g2dz#/m/07xzm', '/m/05148p4#/m/02fsn', '/m/0l15bq#/m/02hnl', '/m/03qjg#/m/0bxl5', '/m/013y1f#/m/0l14j_', '/m/04rzd#/m/0bxl5', '/m/018j2#/m/0dwtp', '/m/0l14qv#/m/03qjg', '/m/0l14qv#/m/02hnl', '/m/028tv0#/m/018vs', '/m/03qjg#/m/0342h', '/m/01s0ps#/m/02sgy', '/m/01xqw#/m/01vj9c', '/m/03gvt#/m/0gkd1', '/m/01s0ps#/m/0g2dz', '/m/02fsn#/m/0mkg', '/m/01xqw#/m/0342h', '/m/0l14qv#/m/07xzm', '/m/0dwt5#/m/0342h', '/m/02hnl#/m/028tv0', '/m/01v1d8#/m/03bx0bm', '/m/03bx0bm#/m/06w7v', '/m/03bx0bm#/m/0dwtp', '/m/02hnl#/m/0l14md', '/m/0l14qv#/m/0bxl5', '/m/0l14qv#/m/042v_gx', '/m/03qjg#/m/04rzd', '/m/02sgy#/m/03qjg', '/m/04rzd#/m/042v_gx', '/m/042v_gx#/m/028tv0', '/m/03qjg#/m/03gvt', '/m/018j2#/m/02hnl', '/m/0gkd1#/m/0mkg', '/m/02fsn#/m/018j2', '/m/042v_gx#/m/026t6', '/m/013y1f#/m/04rzd', '/m/03qjg#/m/0mkg', '/m/02sgy#/m/0l14qv', '/m/07brj#/m/01xqw', '/m/01s0ps#/m/018j2', '/m/0dwt5#/m/05r5c', '/m/018j2#/m/0l14j_', '/m/06ncr#/m/07y_7', '/m/03gvt#/m/03bx0bm', '/m/01xqw#/m/03qjg', '/m/028tv0#/m/03qjg', '/m/01wy6#/m/07xzm', '/m/0dwtp#/m/05r5c', '/m/03qjg#/m/0l14qv', '/m/013y1f#/m/02hnl', '/m/042v_gx#/m/0gkd1', '/m/02hnl#/m/018j2', '/m/01xqw#/m/02fsn', '/m/01vj9c#/m/01vdm0', '/m/018vs#/m/026t6', '/m/0l14md#/m/07brj', '/m/0l14md#/m/028tv0', '/m/04rzd#/m/06ncr', '/m/0l14md#/m/06w7v', '/m/05r5c#/m/01vj9c', '/m/0g2dz#/m/02hnl', '/m/02sgy#/m/06ncr', '/m/0g2dz#/m/01s0ps', '/m/06w7v#/m/0bxl5', '/m/05r5c#/m/07xzm', '/m/01s0ps#/m/0dwt5', '/m/0dwt5#/m/01vj9c', '/m/0gkd1#/m/0dwt5', '/m/0mkg#/m/028tv0', '/m/042v_gx#/m/01xqw', '/m/02fsn#/m/0342h', '/m/06w7v#/m/07xzm', '/m/02fsn#/m/06w7v', '/m/05148p4#/m/0l14md', '/m/0bxl5#/m/02sgy', '/m/0l14j_#/m/0342h', '/m/0mkg#/m/018j2', '/m/05r5c#/m/06ncr', '/m/05r5c#/m/018vs', '/m/0l14qv#/m/03gvt', '/m/018j2#/m/05r5c', '/m/018j2#/m/018vs', '/m/0dwt5#/m/02hnl', '/m/0l14md#/m/0mkg', '/m/02hnl#/m/01s0ps', '/m/0l14qv#/m/026t6', '/m/03gvt#/m/05148p4', '/m/042v_gx#/m/0342h', '/m/01vj9c#/m/07y_7', '/m/02hnl#/m/07brj', '/m/05148p4#/m/0l15bq', '/m/028tv0#/m/06w7v', '/m/05148p4#/m/0mkg', '/m/018j2#/m/01xqw', '/m/026t6#/m/0dwt5', '/m/03bx0bm#/m/07brj', '/m/03qjg#/m/01s0ps', '/m/0dwt5#/m/0l14j_', '/m/0mkg#/m/03gvt', '/m/0l14md#/m/03bx0bm', '/m/04rzd#/m/0mkg', '/m/013y1f#/m/01s0ps', '/m/028tv0#/m/042v_gx', '/m/01wy6#/m/0l14md', '/m/018vs#/m/05148p4', '/m/0l14md#/m/018j2', '/m/04rzd#/m/0342h', '/m/01s0ps#/m/013y1f', '/m/013y1f#/m/01v1d8', '/m/0gkd1#/m/05148p4', '/m/03bx0bm#/m/05148p4', '/m/01v1d8#/m/02hnl', '/m/0gkd1#/m/06ncr', '/m/04rzd#/m/02fsn', '/m/02sgy#/m/0g2dz', '/m/05r5c#/m/0dwtp', '/m/06ncr#/m/0mkg', '/m/03bx0bm#/m/07xzm', '/m/07y_7#/m/0l14j_', '/m/05148p4#/m/0l14j_', '/m/03bx0bm#/m/028tv0', '/m/06w7v#/m/0l14j_', '/m/013y1f#/m/07brj', '/m/018j2#/m/03qjg', '/m/018vs#/m/013y1f', '/m/0gkd1#/m/01vj9c', '/m/026t6#/m/0bxl5', '/m/018j2#/m/0l14md', '/m/02hnl#/m/03gvt', '/m/0342h#/m/0dwt5', '/m/0g2dz#/m/042v_gx', '/m/02sgy#/m/01wy6', '/m/0l14md#/m/0dwtp', '/m/01vj9c#/m/0mkg', '/m/0dwtp#/m/03bx0bm', '/m/05148p4#/m/042v_gx', '/m/05r5c#/m/05148p4', '/m/0l14qv#/m/05r5c', '/m/0342h#/m/0l14md', '/m/07brj#/m/0342h', '/m/01wy6#/m/02fsn', '/m/013y1f#/m/018vs', '/m/07y_7#/m/0gkd1', '/m/03qjg#/m/0l14md', '/m/0g2dz#/m/0bxl5', '/m/07xzm#/m/026t6', '/m/01xqw#/m/0l14md', '/m/01s0ps#/m/0l14md', '/m/0l14qv#/m/03bx0bm', '/m/03bx0bm#/m/0342h', '/m/042v_gx#/m/01s0ps', '/m/01v1d8#/m/028tv0', '/m/05r5c#/m/02sgy', '/m/0l14md#/m/0395lw', '/m/05r5c#/m/01s0ps', '/m/01vj9c#/m/02sgy', '/m/07y_7#/m/01wy6', '/m/018vs#/m/0l14qv', '/m/01vj9c#/m/02hnl', '/m/028tv0#/m/0gkd1', '/m/01vj9c#/m/0dwtp', '/m/07y_7#/m/01s0ps', '/m/028tv0#/m/03bx0bm', '/m/042v_gx#/m/0l14md', '/m/01vj9c#/m/018vs', '/m/06ncr#/m/018vs', '/m/0dwt5#/m/0gkd1', '/m/0l14md#/m/07y_7', '/m/013y1f#/m/01wy6', '/m/05r5c#/m/0gkd1', '/m/05r5c#/m/06w7v', '/m/01vdm0#/m/03bx0bm', '/m/05148p4#/m/018j2', '/m/0mkg#/m/0l14j_', '/m/0342h#/m/0mkg', '/m/07y_7#/m/05148p4', '/m/05148p4#/m/0l14qv', '/m/07xzm#/m/04rzd', '/m/06w7v#/m/028tv0', '/m/0mkg#/m/06ncr', '/m/0dwtp#/m/042v_gx', '/m/028tv0#/m/01xqw', '/m/05842k#/m/028tv0', '/m/0bxl5#/m/01v1d8', '/m/013y1f#/m/0l14qv', '/m/0l14j_#/m/07xzm', '/m/02sgy#/m/0gkd1', '/m/03qjg#/m/01wy6', '/m/0l15bq#/m/0l14qv', '/m/0l14md#/m/01xqw', '/m/03bx0bm#/m/02hnl', '/m/0l14qv#/m/0dwtp', '/m/01s0ps#/m/0342h', '/m/018j2#/m/02sgy', '/m/018j2#/m/0342h', '/m/0dwtp#/m/0342h', '/m/0dwtp#/m/018vs', '/m/013y1f#/m/0l14md', '/m/018vs#/m/07y_7', '/m/07y_7#/m/01xqw', '/m/0l14md#/m/02sgy', '/m/018vs#/m/06ncr', '/m/02hnl#/m/01xqw', '/m/013y1f#/m/0g2dz', '/m/0dwt5#/m/018vs', '/m/07xzm#/m/0l14qv', '/m/07brj#/m/05r5c', '/m/0dwtp#/m/0l14qv', '/m/06ncr#/m/028tv0', '/m/06w7v#/m/07y_7', '/m/018vs#/m/05r5c', '/m/0bxl5#/m/02hnl', '/m/07y_7#/m/013y1f', '/m/03bx0bm#/m/0l14md', '/m/028tv0#/m/0342h', '/m/07brj#/m/05148p4', '/m/0l14md#/m/03gvt', '/m/0342h#/m/018j2', '/m/0dwt5#/m/01wy6', '/m/02sgy#/m/03bx0bm', '/m/01vj9c#/m/0bxl5', '/m/0gkd1#/m/03bx0bm', '/m/03gvt#/m/0g2dz', '/m/042v_gx#/m/07brj', '/m/06w7v#/m/05r5c', '/m/018vs#/m/01vdm0', '/m/0bxl5#/m/05148p4', '/m/01wy6#/m/028tv0', '/m/02sgy#/m/02hnl', '/m/0gkd1#/m/0342h', '/m/07brj#/m/0l14qv', '/m/01vj9c#/m/0dwt5', '/m/013y1f#/m/028tv0', '/m/028tv0#/m/013y1f', '/m/0gkd1#/m/013y1f', '/m/013y1f#/m/07y_7', '/m/0395lw#/m/018vs', '/m/0l14j_#/m/06w7v', '/m/0l14qv#/m/06w7v', '/m/0l14j_#/m/02fsn', '/m/06w7v#/m/02hnl', '/m/05148p4#/m/013y1f', '/m/013y1f#/m/02sgy', '/m/0l14md#/m/013y1f', '/m/0mkg#/m/013y1f', '/m/0l14j_#/m/0dwt5', '/m/0gkd1#/m/04rzd', '/m/07y_7#/m/0342h', '/m/05148p4#/m/05842k', '/m/0dwt5#/m/06ncr', '/m/07y_7#/m/028tv0', '/m/0l14qv#/m/07y_7', '/m/03qjg#/m/06ncr', '/m/0dwt5#/m/05148p4', '/m/02hnl#/m/02fsn', '/m/0l14md#/m/05148p4', '/m/07y_7#/m/06ncr', '/m/0l14md#/m/018vs', '/m/0mkg#/m/01vj9c', '/m/06ncr#/m/04rzd', '/m/018j2#/m/04rzd', '/m/06ncr#/m/07xzm', '/m/042v_gx#/m/01vj9c', '/m/01wy6#/m/0dwtp', '/m/0l14qv#/m/04rzd', '/m/0342h#/m/018vs', '/m/01vj9c#/m/03qjg', '/m/0l14j_#/m/0g2dz', '/m/018vs#/m/03gvt', '/m/06w7v#/m/013y1f', '/m/07xzm#/m/0342h', '/m/05148p4#/m/01xqw', '/m/04rzd#/m/018j2', '/m/01s0ps#/m/0dwtp', '/m/01xqw#/m/01wy6', '/m/02sgy#/m/0dwtp', '/m/0l14md#/m/0bxl5', '/m/0342h#/m/07y_7', '/m/01vj9c#/m/05r5c', '/m/0l14j_#/m/013y1f', '/m/02fsn#/m/018vs', '/m/07y_7#/m/03bx0bm', '/m/028tv0#/m/05r5c', '/m/0dwt5#/m/0l14md', '/m/07xzm#/m/0g2dz', '/m/0395lw#/m/028tv0', '/m/0l14qv#/m/02fsn', '/m/07brj#/m/018vs', '/m/0gkd1#/m/0l14j_', '/m/0342h#/m/03bx0bm', '/m/03qjg#/m/07brj', '/m/04rzd#/m/06w7v', '/m/05148p4#/m/0bxl5', '/m/018j2#/m/07xzm', '/m/05r5c#/m/0l14qv', '/m/02sgy#/m/04rzd', '/m/0g2dz#/m/0l14md', '/m/0dwt5#/m/0395lw', '/m/01s0ps#/m/0l14j_', '/m/0342h#/m/01xqw', '/m/06ncr#/m/0342h', '/m/013y1f#/m/042v_gx', '/m/0l14qv#/m/0mkg', '/m/02hnl#/m/0mkg', '/m/06ncr#/m/01wy6', '/m/03bx0bm#/m/01v1d8', '/m/02fsn#/m/05148p4', '/m/02fsn#/m/07y_7', '/m/042v_gx#/m/0l14j_', '/m/01vdm0#/m/042v_gx', '/m/0l14qv#/m/0l14j_', '/m/03gvt#/m/02hnl', '/m/01xqw#/m/018vs', '/m/06w7v#/m/0l14md', '/m/042v_gx#/m/0g2dz', '/m/042v_gx#/m/0dwtp', '/m/0g2dz#/m/0342h', '/m/0mkg#/m/0l14qv', '/m/07xzm#/m/01vj9c', '/m/0l14j_#/m/042v_gx', '/m/042v_gx#/m/03qjg', '/m/01vj9c#/m/0l14j_', '/m/0l15bq#/m/07brj', '/m/02hnl#/m/02sgy', '/m/013y1f#/m/0mkg', '/m/0bxl5#/m/0l14qv', '/m/018vs#/m/02fsn', '/m/07y_7#/m/018vs', '/m/0mkg#/m/05148p4', '/m/05r5c#/m/0bxl5', '/m/0bxl5#/m/06w7v', '/m/02hnl#/m/0395lw', '/m/0dwt5#/m/01s0ps', '/m/042v_gx#/m/0mkg', '/m/07xzm#/m/01wy6', '/m/0l14j_#/m/0dwtp', '/m/0bxl5#/m/01vj9c', '/m/0l14md#/m/01wy6', '/m/0dwt5#/m/0mkg', '/m/0g2dz#/m/05r5c', '/m/05148p4#/m/0342h', '/m/013y1f#/m/01xqw', '/m/01xqw#/m/07brj', '/m/06ncr#/m/02fsn', '/m/018vs#/m/0dwt5', '/m/028tv0#/m/01vj9c', '/m/0l14j_#/m/03bx0bm', '/m/04rzd#/m/0l14qv', '/m/03bx0bm#/m/03qjg', '/m/01v1d8#/m/01vj9c', '/m/0342h#/m/06ncr', '/m/028tv0#/m/02sgy', '/m/0l14qv#/m/018j2']]
[1:['/m/02sgy#/m/0l14md', '/m/013y1f#/m/0395lw', '/m/018j2#/m/0342h', '/m/0l14md#/m/0l14j_', '/m/07y_7#/m/0342h', '/m/02hnl#/m/018vs', '/m/0342h#/m/028tv0', '/m/0gkd1#/m/0l14qv', '/m/0342h#/m/07y_7', '/m/0g2dz#/m/07brj', '/m/028tv0#/m/018vs', '/m/0l14md#/m/05148p4', '/m/013y1f#/m/0l15bq', '/m/0gkd1#/m/018vs', '/m/0l14qv#/m/01s0ps', '/m/018vs#/m/028tv0', '/m/0342h#/m/0l14j_', '/m/02sgy#/m/042v_gx', '/m/07y_7#/m/04rzd', '/m/03gvt#/m/0l14qv', '/m/018vs#/m/026t6', '/m/026t6#/m/018vs', '/m/0342h#/m/0l14md', '/m/05148p4#/m/026t6', '/m/0l14qv#/m/0gkd1', '/m/0l14qv#/m/03bx0bm', '/m/03gvt#/m/05r5c', '/m/0342h#/m/0dwt5', '/m/018vs#/m/02hnl', '/m/026t6#/m/03gvt', '/m/02hnl#/m/0dwt5', '/m/02hnl#/m/026t6', '/m/0gkd1#/m/042v_gx', '/m/02sgy#/m/018vs', '/m/05148p4#/m/02hnl', '/m/07brj#/m/0l14qv', '/m/018vs#/m/07brj', '/m/0342h#/m/0gkd1', '/m/02hnl#/m/03qjg', '/m/018vs#/m/0l14md', '/m/05r5c#/m/03bx0bm', '/m/0395lw#/m/0l15bq', '/m/0l15bq#/m/0l14qv', '/m/042v_gx#/m/0l14md', '/m/042v_gx#/m/028tv0', '/m/07brj#/m/018vs', '/m/05r5c#/m/0gkd1', '/m/0l14j_#/m/0l14qv', '/m/02hnl#/m/0l14qv', '/m/03bx0bm#/m/0342h', '/m/05148p4#/m/04rzd', '/m/0g2dz#/m/02hnl', '/m/05r5c#/m/02sgy', '/m/04rzd#/m/0342h', '/m/05r5c#/m/0342h', '/m/0342h#/m/0g2dz', '/m/042v_gx#/m/0342h', '/m/0bxl5#/m/01s0ps', '/m/0342h#/m/042v_gx', '/m/0l14md#/m/04rzd', '/m/02hnl#/m/05148p4', '/m/0342h#/m/018j2', '/m/05r5c#/m/0l14md', '/m/0342h#/m/018vs', '/m/0l14qv#/m/026t6', '/m/0342h#/m/04rzd', '/m/05148p4#/m/0l14md', '/m/01s0ps#/m/0l14qv', '/m/04rzd#/m/018vs', '/m/0l14md#/m/02sgy', '/m/02sgy#/m/0l14qv', '/m/03qjg#/m/0342h', '/m/018vs#/m/03gvt', '/m/018vs#/m/042v_gx', '/m/042v_gx#/m/03bx0bm', '/m/0l14md#/m/05r5c', '/m/04rzd#/m/02sgy', '/m/018vs#/m/03bx0bm', '/m/02sgy#/m/026t6', '/m/02hnl#/m/01v1d8', '/m/013y1f#/m/0l14md', '/m/05148p4#/m/01v1d8', '/m/01v1d8#/m/018vs', '/m/02sgy#/m/04rzd', '/m/05r5c#/m/05148p4', '/m/0l14j_#/m/013y1f', '/m/0l14j_#/m/06ncr', '/m/0l14j_#/m/0l14md', '/m/03qjg#/m/02hnl', '/m/0l14qv#/m/0l14j_', '/m/013y1f#/m/0l14qv', '/m/02hnl#/m/0342h', '/m/0dwt5#/m/02hnl', '/m/042v_gx#/m/02hnl', '/m/03gvt#/m/0gkd1', '/m/018vs#/m/04rzd', '/m/026t6#/m/0l14qv', '/m/05148p4#/m/02sgy', '/m/0l14qv#/m/0395lw', '/m/0342h#/m/05r5c', '/m/0l14qv#/m/05r5c', '/m/026t6#/m/02hnl', '/m/0gkd1#/m/028tv0', '/m/0l14md#/m/03bx0bm', '/m/013y1f#/m/0l14j_', '/m/05148p4#/m/03bx0bm', '/m/0342h#/m/07brj', '/m/0l14md#/m/0l14qv', '/m/01s0ps#/m/0gkd1', '/m/026t6#/m/0l15bq', '/m/013y1f#/m/026t6', '/m/03gvt#/m/03bx0bm', '/m/01v1d8#/m/0l14qv', '/m/0l14qv#/m/03gvt', '/m/0l14j_#/m/05r5c', '/m/01vj9c#/m/028tv0', '/m/018vs#/m/0g2dz', '/m/042v_gx#/m/02sgy', '/m/0l14j_#/m/0342h', '/m/028tv0#/m/0gkd1', '/m/0l14qv#/m/01v1d8', '/m/03qjg#/m/03bx0bm', '/m/018vs#/m/0l14j_', '/m/0l14md#/m/0l15bq', '/m/07y_7#/m/01xqw', '/m/026t6#/m/04rzd', '/m/0342h#/m/0l14qv', '/m/0342h#/m/01vj9c', '/m/042v_gx#/m/04rzd', '/m/018vs#/m/03qjg', '/m/0dwt5#/m/0342h', '/m/05148p4#/m/018vs', '/m/0l14j_#/m/03bx0bm', '/m/0l14md#/m/03gvt', '/m/01s0ps#/m/0bxl5', '/m/05r5c#/m/026t6', '/m/0342h#/m/02sgy', '/m/0l14md#/m/0395lw', '/m/0l14j_#/m/07brj', '/m/0l14md#/m/01v1d8', '/m/05r5c#/m/0l14j_', '/m/0l15bq#/m/0395lw', '/m/01xqw#/m/07y_7', '/m/0bxl5#/m/05r5c', '/m/03bx0bm#/m/0l14j_', '/m/05148p4#/m/0l14j_', '/m/0342h#/m/03qjg', '/m/02fsn#/m/05r5c', '/m/0l14j_#/m/0dwt5', '/m/0g2dz#/m/0l14qv', '/m/03bx0bm#/m/028tv0', '/m/05148p4#/m/05r5c', '/m/05148p4#/m/07y_7', '/m/03bx0bm#/m/018vs', '/m/01s0ps#/m/03bx0bm', '/m/026t6#/m/02sgy', '/m/018vs#/m/0gkd1', '/m/0dwt5#/m/0l14md', '/m/01v1d8#/m/0l14md', '/m/04rzd#/m/05r5c', '/m/05148p4#/m/0dwtp', '/m/01v1d8#/m/05148p4', '/m/07brj#/m/0l14j_', '/m/03bx0bm#/m/03gvt', '/m/02hnl#/m/042v_gx', '/m/0395lw#/m/0l14qv', '/m/03bx0bm#/m/05148p4', '/m/03bx0bm#/m/0l14md', '/m/01vj9c#/m/0342h', '/m/0395lw#/m/0l14j_', '/m/013y1f#/m/01v1d8', '/m/01v1d8#/m/013y1f', '/m/0l14qv#/m/0g2dz', '/m/07brj#/m/0g2dz', '/m/02hnl#/m/02sgy', '/m/026t6#/m/0395lw', '/m/03gvt#/m/018vs', '/m/0l14qv#/m/02sgy', '/m/028tv0#/m/0342h', '/m/0342h#/m/05148p4', '/m/0395lw#/m/0l14md', '/m/0l15bq#/m/03gvt', '/m/04rzd#/m/026t6', '/m/0l14j_#/m/018vs', '/m/0g2dz#/m/018vs', '/m/0l14qv#/m/013y1f', '/m/05148p4#/m/0dwt5', '/m/02sgy#/m/0342h', '/m/0l14md#/m/026t6', '/m/03bx0bm#/m/042v_gx', '/m/03qjg#/m/0l14md', '/m/042v_gx#/m/0gkd1', '/m/042v_gx#/m/018vs', '/m/0l14qv#/m/0l14md', '/m/0gkd1#/m/05r5c', '/m/0l14qv#/m/02hnl', '/m/01v1d8#/m/02sgy', '/m/018vs#/m/0l14qv', '/m/042v_gx#/m/03qjg', '/m/05148p4#/m/013y1f', '/m/03gvt#/m/0l14md', '/m/018vs#/m/0342h', '/m/028tv0#/m/02hnl', '/m/02hnl#/m/0g2dz', '/m/0l14md#/m/0dwt5', '/m/03gvt#/m/026t6', '/m/05r5c#/m/03gvt', '/m/0l15bq#/m/013y1f', '/m/04rzd#/m/0l14md', '/m/05r5c#/m/0l14qv', '/m/0gkd1#/m/03gvt', '/m/05r5c#/m/01s0ps', '/m/01v1d8#/m/02hnl', '/m/018vs#/m/05r5c', '/m/03gvt#/m/0l15bq', '/m/013y1f#/m/03gvt', '/m/02sgy#/m/05r5c', '/m/0bxl5#/m/05148p4', '/m/0l14j_#/m/0395lw', '/m/0l14md#/m/0342h', '/m/0gkd1#/m/0342h', '/m/03gvt#/m/013y1f', '/m/01v1d8#/m/026t6', '/m/03qjg#/m/028tv0', '/m/07y_7#/m/05148p4', '/m/0l15bq#/m/026t6', '/m/013y1f#/m/05148p4', '/m/0gkd1#/m/01s0ps', '/m/0dwtp#/m/05148p4', '/m/0gkd1#/m/03bx0bm', '/m/0l14j_#/m/05148p4', '/m/06ncr#/m/0l14j_', '/m/03bx0bm#/m/01s0ps', '/m/04rzd#/m/05148p4', '/m/0395lw#/m/013y1f', '/m/02sgy#/m/01v1d8']]
============================
# 获得所有正确的三元组
def get_all_true_triples():
return train_triples + valid_triples + test_triples
get_all_true_triples
<function __main__.get_all_true_triples()>
# 从训练数据集中获取hr2t和rt2h的集合,数据类型为numpy。
# hr2t_train: hr2t的集合。
# rt2h_train: rt2h的集合。
hr2t_train = ddict(set)
rt2h_train = ddict(set)
for h, r, t in train_triples:
hr2t_train[(h, r)].add(t)
rt2h_train[(r, t)].add(h)
for h, r in hr2t_train:
hr2t_train[(h, r)] = np.array(list(hr2t_train[(h, r)]))
for r, t in rt2h_train:
rt2h_train[(r, t)] = np.array(list(rt2h_train[(r, t)]))
print("【hr2t_train】:")
cnt=0
for key,value in hr2t_train.items():
cnt+=1
if cnt!=3:
print("[{}:{}]".format(key,value))
else:
break
print("=====================================")
print("【rt2h_train】:")
cnt=0
for key,value in rt2h_train.items():
cnt+=1
if cnt!=3:
print("[{}:{}]".format(key,value))
else:
break
print("=====================================")
【hr2t_train】:
[(0, 0):[ 1 2 3 12 13 144 17 19 24 25 26 29 30 31 32 33 39 44
45 176 50 53 61 67 85 95 104 116 121]]
[(2, 1):[ 32 1 0 3 144 25 90 30 31]]
=====================================
【rt2h_train】:
[(0, 1):[ 0 2 3 132 12 13 144 17 18 19 16 24 25 26 30 31 32 33
39 40 44 45 176 50 53 58 61 76 90 104]]
[(1, 3):[ 0 1 2 32 58 40 45 176 17 144 18 53 24 25 90 31]]
=====================================
二、负采样
在知识图谱中,负样本采样的需求来源于KG中仅包含真实的正样本三元组,而在训练知识图谱表示的过程中,每个正样本需要对应相应的负样本。当前很多方法都使用均匀采样(Uniform Sampling)的方式,然而这样的方式很容易造成训练过程中的梯度消失,也就是很多负样本都是很容易被划分为负样本的简单样例,得分函数很自然的对这一类负样本给出较低的分数,从而导致训练过程中梯度为零。所以高质量的负样本应该是得分函数给出较高分数(被误认为是正样本)的负样本。
2.1 负采样头实体or尾实体
# 负采样的滤波器
filter_flag=True
def corrupt_head(t, r, num_max=1):
"""
头实体负采样
Args:
t: 三元组中的尾实体
r: 三元组中的关系
num_max: 生成的负样本的最大值
Returns:
neg: 返回负采样头实体
"""
tmp = torch.randint(low=0, high=num_ent, size=(num_max,)).numpy()
if not filter_flag:
return tmp
mask = np.in1d(tmp, rt2h_train[(r, t)], assume_unique=True, invert=True)
neg = tmp[mask]
return neg
def corrupt_tail( h, r, num_max=1):
"""
尾实体负采样
Args:
h: 头实体
r: 关系
num_max: 生成的负样本的最大值
Returns:
neg: 返回负采样的尾实体
"""
tmp = torch.randint(low=0, high=num_ent, size=(num_max,)).numpy()
if not filter_flag:
return tmp
mask = np.in1d(tmp, hr2t_train[(h, r)], assume_unique=True, invert=True)
neg = tmp[mask]
return neg
# 测试一下
print("负采样头实体:",corrupt_head(2,3,1)) # 返回一个
print("负采样尾实体:",corrupt_tail(2,3,2)) # 返回俩个
负采样头实体: [152]
负采样尾实体: [277 271]
2.2 批量负采样头实体&尾实体
def head_batch(h, r, t, neg_size=1024):
"""
批量负采样头实体.
Args:
h: 头实体
t: 尾实体
r: 关系
neg_size: 负样本的大小
Returns:
头部实体的负样本 大小=[neg_size]
"""
neg_list = []
neg_cur_size = 0
while neg_cur_size < neg_size:
# 参数num_max设置为剩余需要生成的负样本数量的两倍,以确保足够的负样本生成。
neg_tmp = corrupt_head(t, r, num_max=(neg_size - neg_cur_size) * 2)
neg_list.append(neg_tmp)
neg_cur_size += len(neg_tmp)
# 使用np.concatenate函数将负样本列表连接成一个大数组,
# 并使用切片操作[:neg_size]截取前neg_size个负样本。
return np.concatenate(neg_list)[:neg_size]
def tail_batch( h, r, t, neg_size=1024):
"""批量尾实体负采样
Args:
h: 头实体
t: 尾实体
r: 关系
neg_size: 负采样大小
Returns:
尾实体负采样 大小=[neg_size]
"""
neg_list = []
neg_cur_size = 0
while neg_cur_size < neg_size:
neg_tmp = corrupt_tail(h, r, num_max=(neg_size - neg_cur_size) * 2)
neg_list.append(neg_tmp)
neg_cur_size += len(neg_tmp)
return np.concatenate(neg_list)[:neg_size]
# 测试一下
print("批量负采样头实体:",head_batch(2,3,4)) # [1024]
print("批量负采样尾实体:",tail_batch(4,5,6)) # [1024]
批量负采样头实体: [248 22 246 ... 72 260 193]
批量负采样尾实体: [175 56 7 ... 212 41 148]
2.3 均匀负采样-UniSampler
如果知识图谱只包含观察者事实(正三元组样例),未观测到的事实是被分为负例的概率很大。因此需要在没有观测到的三元组中找出和当前三元组对应的负样本,且负样本的选择是有区分和技巧的,如果负样本选择无效,模型的表现反而会更差。
class UniSampler():
"""
均匀负采样
过滤掉正样本,随机选取一部分样本作为负样本
Attributes:
cross_sampling_flag: 这个标志是交叉采样头尾负样本。
"""
def __init__(self):
super().__init__()
self.cross_sampling_flag = 0
self.num_neg=256
self.use_weight=False
def sampling(self, data):
"""
随机负采样,过滤掉正样本,随机选取一部分样本作为负样本
Args:
data: 这个三元组是用来采样的
Returns:
batch_data: 训练的数据
"""
batch_data = {}
neg_ent_sample = []
subsampling_weight = []
self.cross_sampling_flag = 1 - self.cross_sampling_flag
if self.cross_sampling_flag == 0:
batch_data['mode'] = "head-batch"
for h, r, t in data:
neg_head = head_batch(h, r, t, self.num_neg)
neg_ent_sample.append(neg_head)
if self.args.use_weight:
weight = self.count[(h, r)] + self.count[(t, -r-1)]
subsampling_weight.append(weight)
else:
batch_data['mode'] = "tail-batch"
for h, r, t in data:
neg_tail = tail_batch(h, r, t, self.num_neg)
neg_ent_sample.append(neg_tail)
if self.use_weight:
weight = self.count[(h, r)] + self.count[(t, -r-1)]
subsampling_weight.append(weight)
batch_data["positive_sample"] = torch.LongTensor(np.array(data))
batch_data['negative_sample'] = torch.LongTensor(np.array(neg_ent_sample))
if self.use_weight:
batch_data["subsampling_weight"] = torch.sqrt(1/torch.tensor(subsampling_weight))
return batch_data
def uni_sampling(self, data):
'''
均匀负采样
'''
batch_data = {}
neg_head_list = []
neg_tail_list = []
for h, r, t in data:
neg_head = head_batch(h, r, t, self.num_neg)
neg_head_list.append(neg_head)
neg_tail = tail_batch(h, r, t, self.num_neg)
neg_tail_list.append(neg_tail)
batch_data["positive_sample"] = torch.LongTensor(np.array(data))
batch_data['negative_head'] = torch.LongTensor(neg_head_list)
batch_data['negative_tail'] = torch.LongTensor(neg_tail_list)
return batch_data
def count(triples, start=4):
'''
获得部分三元组的频率,如(头,关系)或(关系,尾)。
该频率将用于像word2vec这样的子采样
Args:
triples: 被采样的三元组
start: 初始化计数
Returns:
count: 记录 (head, relation)的数量
'''
count = {}
for head, relation, tail in triples:
if (head, relation) not in count:
count[(head, relation)] = start
else:
count[(head, relation)] += 1
if (tail, -relation-1) not in count:
count[(tail, -relation-1)] = start
else:
count[(tail, -relation-1)] += 1
return count
def get_sampling_keys(self):
return ['positive_sample', 'negative_sample', 'mode']
2.4 测试集采样-TestSampler
class TestSampler(object):
"""
采样三元组并记录正三元组进行测试。
Attributes:
sampler: 训练采样器
hr2t_all: 记录头实体和关系对应的尾实体
rt2h_all: 记录尾实体和关系对应的头实体
num_ent: 实体的数量
"""
def __init__(self, sampler):
self.sampler = sampler
self.hr2t_all = ddict(set)
self.rt2h_all = ddict(set)
self.get_hr2t_rt2h_from_all()
self.num_ent = num_ent
def get_hr2t_rt2h_from_all(self):
"""
从所有数据集(train、valid和test)中获取hr2t和rt2h的集合,数据类型为张量
Update:
self.hr2t_all: hr2t的集合
self.rt2h_all: rt2h的集合
"""
self.all_true_triples = get_all_true_triples()
for h, r, t in self.all_true_triples:
self.hr2t_all[(h, r)].add(t)
self.rt2h_all[(r, t)].add(h)
for h, r in self.hr2t_all:
self.hr2t_all[(h, r)] = torch.tensor(list(self.hr2t_all[(h, r)]))
for r, t in self.rt2h_all:
self.rt2h_all[(r, t)] = torch.tensor(list(self.rt2h_all[(r, t)]))
def sampling(self, data):
"""
采样三元组并记录正三元组进行测试
Args:
data: 这个三元组是用来采样的
Returns:
batch_data: 用于评估的数据
"""
batch_data = {}
head_label = torch.zeros(len(data), self.num_ent)
tail_label = torch.zeros(len(data), self.num_ent)
for idx, triple in enumerate(data):
head, rel, tail = triple
head_label[idx][self.rt2h_all[(rel, tail)]] = 1.0
tail_label[idx][self.hr2t_all[(head, rel)]] = 1.0
batch_data["positive_sample"] = torch.tensor(data)
batch_data["head_label"] = head_label
batch_data["tail_label"] = tail_label
return batch_data
2.5 对训练集和验证集进行采样
# 对训练集进行随机采样
train_sampler=UniSampler()
train_random_data=train_sampler.sampling(train_triples)
print(train_random_data.keys())
print(train_random_data['mode'])
print(train_random_data['positive_sample'].shape)
print(train_random_data['negative_sample'].shape)
dict_keys(['mode', 'positive_sample', 'negative_sample'])
tail-batch
torch.Size([4565, 3])
torch.Size([4565, 256])
# 对训练集进行均匀采样
train_uni_data=train_sampler.uni_sampling(train_triples)
print(train_uni_data.keys())
print(train_uni_data['positive_sample'].shape)
print(train_uni_data['negative_head'].shape)
print(train_uni_data['negative_tail'].shape)
dict_keys(['positive_sample', 'negative_head', 'negative_tail'])
torch.Size([4565, 3])
torch.Size([4565, 256])
torch.Size([4565, 256])
# 验证集
test_sample=TestSampler(train_random_data)
valid_data=test_sample.sampling(valid_triples)
print(valid_data.keys())
dict_keys(['positive_sample', 'head_label', 'tail_label'])
三、模型
这里以TransE模型为例
3.1 TransE模型
class TransE(nn.Module):
def __init__(self):
super(TransE, self).__init__()
self.ent_emb = None
self.rel_emb = None
self.emb_dim=200
self.init_emb()
def init_emb(self):
"""
以均匀分布的形式初始化实体和关系嵌入
self.epsilon:控制实体嵌入的范围;
self.margin:一个固定的参数,用于计算损失函数中的边界;
self.embedding_range:用于控制实体和关系嵌入的范围;
"""
self.epsilon = 2.0
self.margin = nn.Parameter(
torch.Tensor([12.0]),
requires_grad=False
)
self.embedding_range = nn.Parameter(
torch.Tensor([(self.margin.item() + self.epsilon) / self.emb_dim]),
requires_grad=False
)
# 创建了一个嵌入层,用于将整数索引表示的实体映射为 self.args.emb_dim 维的稠密向量表示。
self.ent_emb = nn.Embedding(num_ent, self.emb_dim)
self.rel_emb = nn.Embedding(num_rel,self.emb_dim)
# 使用nn.init.uniform_函数对嵌入向量进行均匀分布的初始化,取值范围为[-embedding_range, embedding_range]
nn.init.uniform_(tensor=self.ent_emb.weight.data, a=-self.embedding_range.item(), b=self.embedding_range.item())
nn.init.uniform_(tensor=self.rel_emb.weight.data, a=-self.embedding_range.item(), b=self.embedding_range.item())
def tri2emb(self, triples, negs=None, mode="single"):
"""
获得三元组的嵌入
分别获得头实体、关系、尾实体的嵌入
每个嵌入都有三个维度
参数:
triples (tensor): 这个张量保存了三个id,维度是[triples number, 3].
negs (tensor, optional): 该tenosr将实体的id存储到被替换,它有一个维度。
当negs为None时,它是在test/eval。默认为None.
mode (str, optional): 此参数表示负实体将替换头实体或尾实体。当它是“single”时,
意味着该实体不会被取代。默认为'single'。
返回:
head_emb: 头实体嵌入
relation_emb:关系嵌入
tail_emb: 尾实体嵌入
"""
if mode == "single":
head_emb = self.ent_emb(triples[:, 0]).unsqueeze(1) # [bs, 1, dim]
relation_emb = self.rel_emb(triples[:, 1]).unsqueeze(1) # [bs, 1, dim]
tail_emb = self.ent_emb(triples[:, 2]).unsqueeze(1) # [bs, 1, dim]
elif mode == "head-batch" or mode == "head_predict":
if negs is None: # 说明这个时候是在evluation,所以需要直接用所有的entity embedding
head_emb = self.ent_emb.weight.data.unsqueeze(0) # [1, num_ent, dim]
else:
head_emb = self.ent_emb(negs) # [bs, num_neg, dim]
relation_emb = self.rel_emb(triples[:, 1]).unsqueeze(1) # [bs, 1, dim]
tail_emb = self.ent_emb(triples[:, 2]).unsqueeze(1) # [bs, 1, dim]
elif mode == "tail-batch" or mode == "tail_predict":
head_emb = self.ent_emb(triples[:, 0]).unsqueeze(1) # [bs, 1, dim]
relation_emb = self.rel_emb(triples[:, 1]).unsqueeze(1) # [bs, 1, dim]
if negs is None:
tail_emb = self.ent_emb.weight.data.unsqueeze(0) # [1, num_ent, dim]
else:
tail_emb = self.ent_emb(negs) # [bs, num_neg, dim]
return head_emb, relation_emb, tail_emb
def score_func(self, head_emb, relation_emb, tail_emb):
"""
计算三元组的得分
公式为: gamma - ||h + r - t||
参数:
head_emb: 头实体嵌入.
relation_emb: 关系嵌入.
tail_emb: 尾实体嵌入.
mode: 选择头实体预测或尾实体预测.
返回:
score: 三元组得分.
"""
score = (head_emb + relation_emb) - tail_emb
score = self.margin.item() - torch.norm(score, p=1, dim=-1)
return score
def forward(self, triples, negs=None, mode='single'):
"""
在训练阶段使用的函数
参数:
triples: The triples ids, as (h, r, t), shape:[batch_size, 3].
negs:负采样,默认为None。
mode: 选择头实体预测或尾实体预测,默认为'single'。
返回:
score: 三元组的分数.
"""
head_emb, relation_emb, tail_emb = self.tri2emb(triples, negs, mode)
score = self.score_func(head_emb, relation_emb, tail_emb)
return score
def get_score(self, batch, mode):
"""
测试阶段使用的功能
参数:
batch: 一批次数据.
mode: 选择头实体预测或尾实体预测.
返回:
score: 三元组得分.
"""
triples = batch["positive_sample"]
head_emb, relation_emb, tail_emb = self.tri2emb(triples, mode=mode)
score = self.score_func(head_emb, relation_emb, tail_emb)
return score
model = TransE()
pos_sample = train_random_data["positive_sample"]
neg_sample = train_random_data["negative_sample"]
pos_score = model(pos_sample)
mode = train_random_data["mode"]
neg_score = model(pos_sample, neg_sample, mode)
print("pos_score.shape:",pos_score.shape)
print("neg_score.shape:",neg_score.shape)
pos_score.shape: torch.Size([4565, 1])
neg_score.shape: torch.Size([4565, 256])
3.2 损失函数
自对抗(Self-adversarial)的负采样损失函数
class Adv_Loss(nn.Module):
"""
自对抗训练的负采样损失
Attributes:
args: 一些预先设定的参数,如自对抗温度等
model: KG训练模型
"""
def __init__(self):
super(Adv_Loss, self).__init__()
self.negative_adversarial_sampling=True #使用自对抗负抽样
self.use_weight=False #使用子抽样权重
self.adv_temp=1.0 #自对抗负抽样中的抽样温度
def forward(self, pos_score, neg_score, subsampling_weight=None):
"""自我对抗训练的负采样损失
Args:
pos_score: 正样本的得分
neg_score: 负样本的得分
subsampling_weight: 修正pos_score和neg_score的权重
Returns:
loss: 反向传播的训练损失
"""
# 是否采样自对抗
if self.negative_adversarial_sampling:
neg_score = (F.softmax(neg_score * self.adv_temp, dim=1).detach()
* F.logsigmoid(-neg_score)).sum(dim=1) #shape:[bs]
else:
neg_score = F.logsigmoid(-neg_score).mean(dim = 1)
pos_score = F.logsigmoid(pos_score).view(neg_score.shape[0]) #shape:[bs]
if self.use_weight:
positive_sample_loss = - (subsampling_weight * pos_score).sum()/subsampling_weight.sum()
negative_sample_loss = - (subsampling_weight * neg_score).sum()/subsampling_weight.sum()
else:
positive_sample_loss = - pos_score.mean()
negative_sample_loss = - neg_score.mean()
loss = (positive_sample_loss + negative_sample_loss) / 2
return loss
3.3 开始训练
# 开始训练
model = TransE()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # 优化器
num_epochs = 101
advloss=Adv_Loss()
for epoch in range(num_epochs):
optimizer.zero_grad()
pos_sample = train_random_data["positive_sample"]
neg_sample = train_random_data["negative_sample"]
mode = train_random_data["mode"]
pos_score = model(pos_sample)
neg_score = model(pos_sample, neg_sample)
loss = advloss(pos_score, neg_score,None)
loss.backward()
optimizer.step()
if epoch%10==0:
print(f"Epoch: {epoch}, Loss: {loss.item()}")
print("=======================================")
Epoch: 0, Loss: 0.7778230309486389
=======================================
Epoch: 10, Loss: 0.7069342732429504
=======================================
Epoch: 20, Loss: 0.6993292570114136
=======================================
Epoch: 30, Loss: 0.6956018209457397
=======================================
Epoch: 40, Loss: 0.6943122744560242
=======================================
Epoch: 50, Loss: 0.693738579750061
=======================================
Epoch: 60, Loss: 0.6934834718704224
=======================================
Epoch: 70, Loss: 0.6933453679084778
=======================================
Epoch: 80, Loss: 0.6932717561721802
=======================================
Epoch: 90, Loss: 0.6932302713394165
=======================================
Epoch: 100, Loss: 0.6932039856910706
=======================================
四、任务
4.1 链接预测
知识图谱的链接预测(Link Prediction)是指根据已知的知识图谱中的实体和关系,预测图谱中缺失的实体之间可能存在的关系。具体而言,链接预测任务旨在填补知识图谱中的空白,即找到可能的、缺失的实体关系,并进行预测和推断。实验过程中,将每个三元组的头实体去掉,换成其他实体,对得到的每个三元组计算一个距离分数,根据这个分数对三元组排序,距离越小排名越靠前。记录原始的正确的三元组的排名。然后对尾实体也作同样的处理,最后得到所有排名的均值meanrank,和rank<10的实体所占全部实体的百分比hits@10。
# 链接预测
def link_predict(batch, model, prediction="all"):
"""
评估任务是预测不完全三元组中的头部实体或尾部实体。
Args:(参数)
batch: 用于验证或测试的三元组的批次
model: 训练的模型
predicion: 链路预测模式
Returns:
ranks: 被预测的三元组排名
"""
if prediction == "all": # 预测缺失的三元组
tail_ranks = tail_predict(batch, model)
head_ranks = head_predict(batch, model)
ranks = torch.cat([tail_ranks, head_ranks])
elif prediction == "head": # 预测头实体
ranks = head_predict(batch, model)
elif prediction == "tail": # 预测尾实体
ranks = tail_predict(batch, model)
return ranks.float()
def head_predict(batch, model):
""" 获得头部实体排名。
Args:(参数)
batch: 用于验证或测试的三元组的批次
model: 训练的模型
Returns:
tensor: 张量:要预测的头部实体的等级,dim [batch_size]
"""
pos_triple = batch["positive_sample"]
idx = pos_triple[:, 0]
label = batch["head_label"]
pred_score = model.get_score(batch, "head_predict")
return calc_ranks(idx, label, pred_score)
def tail_predict(batch, model):
"""获得尾实体排名
Args:
batch: 用于验证或测试的三元组的批次
model: 训练的模型
Returns:
tensor: 张量:要预测的头部实体的等级,dim [batch_size]
"""
pos_triple = batch["positive_sample"]
idx = pos_triple[:, 2]
label = batch["tail_label"]
pred_score = model.get_score(batch, "tail_predict")
return calc_ranks(idx, label, pred_score)
def calc_ranks(idx, label, pred_score):
""" 计算三元组得分排名
Args:
idx ([type]): 需要预测的实体id。
label ([type]): 现存三元组的id,用于计算过滤后的结果。
pred_score ([type]): 由模型预测的三分。
Returns:
ranks: 要预测的三元组的等级dim [batch_size]。
"""
b_range = torch.arange(pred_score.size()[0])
target_pred = pred_score[b_range, idx]
pred_score = torch.where(label.bool(), -torch.ones_like(pred_score) * 10000000, pred_score)
pred_score[b_range, idx] = target_pred
ranks = (
1
+ torch.argsort(
torch.argsort(pred_score, dim=1, descending=True), dim=1, descending=False
)[b_range, idx]
)
return ranks
# 验证
def valid_step(batch,model):
results = dict()
ranks = link_predict(batch, model, prediction='all')
results["count"] = torch.numel(ranks)
results["mrr"] = torch.sum(1.0 / ranks).item()
for k in [1,3,10]:
results['hits@{}'.format(k)] = torch.numel(ranks[ranks <= k])
return results
# 测试一下
model = TransE()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # 优化器
num_epochs = 101
advloss=Adv_Loss()
for epoch in range(num_epochs):
optimizer.zero_grad()
pos_sample = train_random_data["positive_sample"]
neg_sample = train_random_data["negative_sample"]
mode = train_random_data["mode"]
pos_score = model(pos_sample)
neg_score = model(pos_sample, neg_sample, mode)
loss = advloss(pos_score, neg_score)
loss.backward()
optimizer.step()
if epoch%10==0:
results=valid_step(valid_data,model)
print("|Eval|mrr|:", results['mrr'])
print("|Eval|hit@1|:", results['hits@1'])
print("|Eval|hit@3|:", results['hits@3'])
print("|Eval|hit@10|:", results['hits@10'])
print(f"Epoch: {epoch}, Loss: {loss.item()}")
print("=======================================")
|Eval|mrr|: 7.404098033905029
|Eval|hit@1|: 2
|Eval|hit@3|: 5
|Eval|hit@10|: 16
Epoch: 0, Loss: 1.2577273845672607
=======================================
|Eval|mrr|: 15.437685012817383
|Eval|hit@1|: 2
|Eval|hit@3|: 18
|Eval|hit@10|: 34
Epoch: 10, Loss: 0.8133202791213989
=======================================
|Eval|mrr|: 27.446821212768555
|Eval|hit@1|: 9
|Eval|hit@3|: 37
|Eval|hit@10|: 53
Epoch: 20, Loss: 0.5701720118522644
=======================================
|Eval|mrr|: 38.950252532958984
|Eval|hit@1|: 17
|Eval|hit@3|: 49
|Eval|hit@10|: 73
Epoch: 30, Loss: 0.4747043251991272
=======================================
|Eval|mrr|: 52.88045120239258
|Eval|hit@1|: 30
|Eval|hit@3|: 61
|Eval|hit@10|: 94
Epoch: 40, Loss: 0.4347356855869293
=======================================
|Eval|mrr|: 61.89950942993164
|Eval|hit@1|: 37
|Eval|hit@3|: 72
|Eval|hit@10|: 113
Epoch: 50, Loss: 0.4149724841117859
=======================================
|Eval|mrr|: 64.98758697509766
|Eval|hit@1|: 37
|Eval|hit@3|: 80
|Eval|hit@10|: 115
Epoch: 60, Loss: 0.40343987941741943
=======================================
|Eval|mrr|: 69.46651458740234
|Eval|hit@1|: 41
|Eval|hit@3|: 82
|Eval|hit@10|: 120
Epoch: 70, Loss: 0.39522212743759155
=======================================
|Eval|mrr|: 73.74652099609375
|Eval|hit@1|: 44
|Eval|hit@3|: 91
|Eval|hit@10|: 121
Epoch: 80, Loss: 0.38940179347991943
=======================================
|Eval|mrr|: 78.21752166748047
|Eval|hit@1|: 50
|Eval|hit@3|: 96
|Eval|hit@10|: 127
Epoch: 90, Loss: 0.38539594411849976
=======================================
|Eval|mrr|: 81.95490264892578
|Eval|hit@1|: 54
|Eval|hit@3|: 98
|Eval|hit@10|: 134
Epoch: 100, Loss: 0.382443904876709
=======================================
4.2 三元组分类
在实验中,首先对每个三元组,通过替换头实体或尾实体构建一个不存在于知识图谱中的三元组,作为负例,原三元组则是正例。这样就得到了一个包含同等数量正例和负例的数据集,对这个数据集中的三元组作二分类,计算其分类的准确率。
参考
https://github.com/zjukg/NeuralKG