深度图聚类中经常见到.npy文件,有些代码需求.txt文件
从网上学习了很多代码,整合了一份自用的代码
该代码将.npy转成.txt文件,并生成相应的图谱graph文件
import os
import numpy as np
from sklearn.metrics import pairwise_distances as pair
from sklearn.preprocessing import normalize
topk = 5
def construct_graph(features, label, method='heat'):
fname = path + "_graph.txt"
num = len(label)
dist = None
if method == 'heat':
dist = -0.5 * pair(features) ** 2
dist = np.exp(dist)
elif method == 'cos':
features[features > 0] = 1
dist = np.dot(features, features.T)
elif method == 'ncos':
features[features > 0] = 1
features = normalize(features, axis=1, norm='l1')
dist = np.dot(features, features.T)
inds = []
for i in range(dist.shape[0]):
ind = np.argpartition(dist[i, :], -(topk + 1))[-(topk + 1):]
inds.append(ind)
f = open(fname, 'w')
counter = 0
for i, v in enumerate(inds):
for vv in v:
if vv == i:
pass
else:
if label[vv] != label[i]:
counter += 1
f.write('{} {}\n'.format(i, vv))
f.close()
print('error rate: {}'.format(counter / (num * topk)))
dataset_name = 'amap'
path = 'D:/datasets/' + dataset_name # 一个文件夹下多个npy文件,
txtpath = path
namelist = [x for x in os.listdir(path)]
for i in range(len(namelist)):
datapath = os.path.join(path, namelist[i]) # specific address
if 'npy' not in namelist[i] or 'txt' in namelist[i]:
continue
input_data = np.load(datapath, allow_pickle=True)
data = input_data.reshape(input_data.shape[0], -1)
data.tolist()
if 'feat' in namelist[i]:
data_fmt = '%.6f'
else:
data_fmt = '%d'
np.savetxt('%s/%s.txt' % (txtpath, namelist[i].replace('.npy', '')), data, fmt=data_fmt)
path = "D:/datasets/" + dataset_name + "/" + dataset_name
feat = np.loadtxt(path + "_feat.txt", dtype=float) # 特征
label = np.loadtxt(path + "_label.txt", dtype=int) # 标签
construct_graph(feat, label, 'ncos') # 构建图文件
print('over')