'''
auther: DeniuHe
date:2020-08-01
'''
import numpy as np
from sklearn import datasets
from scipy.spatial.distance import pdist,squareform
from sklearn.model_selection import train_test_split
from collections import OrderedDict
from itertools import combinations,product
def initLabeled(y):
## random selected the labeled instances' index
labeledIndex = []
labelDict = OrderedDict()
for label in np.unique(y):
labelDict[label] = []
for i,label in enumerate(y):
labelDict[label].append(i)
for value in labelDict.values():
for idx in np.random.choice(value,size=2,replace=False,p=None):
labeledIndex.append(idx)
return labeledIndex
if __name__ == '__main__':
data, target = datasets.load_iris(return_X_y=True)
X, X_test, y, y_test = train_test_split(data, target, test_size=0.1, random_state=0)
####------初始化已标记数据-----###
print(y[initLabeled(y)])