模型原型
class sklearn.semi_supervised.LabelPropagation(kernel=’rbf’,gamma=20,n_neighbors=7,alpha=1,max_iter=30,tol=0.001)
参数
- kernel:指定核函数(距离函数)
- ’rbf’
- ‘knn’
- gamma:为rbf核的参数
- n_neighbors:为knn核的参数
- alpha
- max_iter
- tol
属性
- X_:输入数组
- classes_:分类问题中,类别标记数组(每个标记出现一次)
- labeldistributions:给出每个样本的标记分布
- transduction_:给出每个样本计算出的标记
- niter
方法
- fit(X,y)
- predict(X)
- predict_proba(X)
- score(X,y[,sample_weight])
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn import datasets
from sklearn.semi_supervised import LabelPropagation
加载数据集
def load_data():
digits=datasets.load_digits()
#混洗样本
rng=np.random.RandomState(0)
indices=np.arange(len(digits.data))
rng.shuffle(indices)
X=digits.data[indices]
y=digits.target[indices]
#生成未标记样本的下标集合
n_labeled_points=int(len(y)/10)
unlabeled_indices=np.arange(len(y))[n_labeled_points:]
return X,y,unlabeled_indices
使用test_LabelPropagation类
def test_LabelPropagation(*data):
X,y,unlabeled_indices=data
y_train=np.copy(y)
y_train[unlabeled_indices]=-1
clf=LabelPropagation(max_iter=100,kernel='rbf',gamma=0.1)
clf.fit(X,y_train)
#获取预测准确率
true_labels=y[unlabeled_indices]
print('Accuracy:%f'%clf.score(X[unlabeled_indices],true_labels))
data=load_data()
test_LabelPropagation(*data)
折中系数alpha以及gamma参数对rbf核的影响
def test_LabelPropagation_rbf(*data):
X,y,unlabeled_indices=data
y_train=np.copy(y)
y_train[unlabeled_indices]=-1
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
alphas=np.linspace(0.01,1,num=10,endpoint=True)
gammas=np.logspace(-2,2,num=50)
colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),
(0.5,0,0.5),(0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)
#训练并绘图
for alpha,color in zip(alphas,colors):
scores=[]
for gamma in gammas:
clf=LabelPropagation(max_iter=100,gamma=gamma,
alpha=alpha,kernel='rbf')
clf.fit(X,y_train)
scores.append(clf.score(X[unlabeled_indices],
y[unlabeled_indices]))
ax.plot(gammas,scores,label=r"$\alpha=%s$"%alpha,color=color)
#设置图形
ax.set_xlabel(r'$\gamma$')
ax.set_ylabel('score')
ax.set_xscale('log')
ax.legend(loc='best')
ax.set_title('LabelPropagation rbf kernel')
plt.show()
test_LabelPropagation_rbf(*data)
折中系数alpha以及n_neighbors参数对knn核的影响
def test_LabelPropagation_knn(*data):
X,y,unlabeled_indices=data
y_train=np.copy(y)
y_train[unlabeled_indices]=-1
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
alphas=np.linspace(0.01,1,num=10,endpoint=True)
Ks=[1,2,3,4,5,8,10,15,20,25,30,35,40,50]
colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),
(0.5,0,0.5),(0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)
#训练并绘图
for alpha,color in zip(alphas,colors):
scores=[]
for K in Ks:
clf=LabelPropagation(max_iter=100,n_neighbors=K,
alpha=alpha,kernel='knn')
clf.fit(X,y_train)
scores.append(clf.score(X[unlabeled_indices],
y[unlabeled_indices]))
ax.plot(Ks,scores,label=r"$\alpha=%s$"%
alpha,color=color)
#设置图形
ax.set_xlabel(r'$k$')
ax.set_ylabel('score')
ax.legend(loc='best')
ax.set_title('LabelPropagation knn kernel')
plt.show()
test_LabelPropagation_knn(*data)