使用Movielens-20m创建DGL异质图

数据:

code

def add_MG():
    """ 根据MovieID_genreID.csv创建MG边 """
    # 当加载csv文件的多列数据时可以使用unpack将加载的数据列进场解耦到不同数组中
    filename = "data/Movielens-20m/MovieID_genreID.csv"
    movies, genres = np.loadtxt(filename, delimiter=",", skiprows=1, usecols=(0, 1), unpack=True, dtype=int)
    movies, genres = movies.tolist(), genres.tolist()
    MG_data, GM_data = (movies, genres), (genres, movies)
    return MG_data, GM_data


def add_UMR():
    filename = 'data/Movielens-20m/UserID_MovieID_Rating.csv'
    users, movies, ratings = np.loadtxt(filename, delimiter=",", skiprows=1, usecols=(0, 1, 2), unpack=True)
    users, movies, ratings = users.astype(dtype=int).tolist(), movies.astype(dtype=int).tolist(), ratings.tolist()
    UMR_data, MUR_data = (users, movies), (movies, users)
    return UMR_data, MUR_data, ratings

def add_UMT():
    filename = 'data/Movielens-20m/UserID_MovieID_TagID.csv'
    users, movies, tags = np.loadtxt(filename, delimiter=",", skiprows=1, usecols=(0, 1, 2), unpack=True, dtype=int)
    users, movies, tags = users.tolist(), movies.tolist(), tags.tolist()
    UMT_data, MUT_data = (users, movies), (movies, users)
    return UMT_data, MUT_data, tags

def createHeteroGraph():
    MG_data, GM_data = add_MG()
    UMR_data, MUR_data, ratings = add_UMR()
    UMT_data, MUT_data, tags = add_UMT()
    g = dgl.heterograph({
        ('movie', 'MG', 'genre'): MG_data,
        ('genre', 'GM', 'movie'): GM_data,
        ('user', 'UMR', 'movie'): UMR_data,
        ('movie', 'MUR', 'user'): MUR_data,
        ('user', 'UMT', 'movie'): UMT_data,
        ('movie', 'MUT', 'user'): MUT_data
    })
    g.edges['UMR'].data['rate'] = th.tensor(ratings)
    g.edges['MUR'].data['rate'] = th.tensor(ratings)
    g.edges['UMT'].data['tag'] = th.tensor(tags)
    g.edges['MUT'].data['tag'] = th.tensor(tags)
    return g

if __name__ == '__main__':
    g = createHeteroGraph()
    print(g)

output

D:\Apps\Anaconda3\python.exe D:/PYproject/DataSet_Process/MovieLens-20m.py
Using backend: pytorch
Graph(num_nodes={'genre': 19, 'movie': 131263, 'user': 138494},
      num_edges={('genre', 'GM', 'movie'): 54160, ('movie', 'MG', 'genre'): 54160, ('movie', 'MUR', 'user'): 20000263, ('movie', 'MUT', 'user'): 217571, ('user', 'UMR', 'movie'): 20000263, ('user', 'UMT', 'movie'): 217571},
      metagraph=[('genre', 'movie', 'GM'), ('movie', 'genre', 'MG'), ('movie', 'user', 'MUR'), ('movie', 'user', 'MUT'), ('user', 'movie', 'UMR'), ('user', 'movie', 'UMT')])

Process finished with exit code 0

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值