机器学习 scikit-learn（2）

216人阅读 评论(0)

print(__doc__)

# Authors: Clay Woolam <clay@woolam.org>
# Licence: BSD

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

from sklearn import datasets
from sklearn.semi_supervised import label_propagation
from sklearn.metrics import classification_report, confusion_matrix

rng = np.random.RandomState(0)
indices = np.arange(len(digits.data)) #indices是索引号为0-1788的数组
rng.shuffle(indices)  #随机

X = digits.data[indices[:330]]    #取索引号为前330的数据
y = digits.target[indices[:330]]  #取索引号为前330的目标数据
images = digits.images[indices[:330]]

n_total_samples = len(y)          #共330个
n_labeled_points = 10

unlabeled_indices = np.arange(n_total_samples)[n_labeled_points:]	#没有标记后320个索引
f = plt.figure()

for i in range(5):	#循环5次
y_train = np.copy(y)		#索引号为前330个的目标数据
y_train[unlabeled_indices] = -1	#后面320个都是-1

lp_model.fit(X, y_train)		# 应用，进行训练

predicted_labels = lp_model.transduction_[unlabeled_indices]	#进行传播，之后得到预测的值
true_labels = y[unlabeled_indices]	#真实值

cm = confusion_matrix(true_labels, predicted_labels,
labels=lp_model.classes_)

print('Iteration %i %s' % (i, 70 * '_'))
print("Label Spreading model: %d labeled & %d unlabeled (%d total)"
% (n_labeled_points, n_total_samples - n_labeled_points, n_total_samples))

print(classification_report(true_labels, predicted_labels))

print("Confusion matrix")
print(cm)

# compute the entropies of transduced label distributions
pred_entropies = stats.distributions.entropy(
lp_model.label_distributions_.T)

# select five digit examples that the classifier is most uncertain about
uncertainty_index = uncertainty_index = np.argsort(pred_entropies)[-5:]

# keep track of indices that we get labels for
delete_indices = np.array([])

f.text(.05, (1 - (i + 1) * .183),
"model %d\n\nfit with\n%d labels" % ((i + 1), i * 5 + 10), size=10)
for index, image_index in enumerate(uncertainty_index):
image = images[image_index]

sub = f.add_subplot(5, 5, index + 1 + (5 * i))
sub.imshow(image, cmap=plt.cm.gray_r)
sub.set_title('predict: %i\ntrue: %i' % (
lp_model.transduction_[image_index], y[image_index]), size=10)
sub.axis('off')

# labeling 5 points, remote from labeled set
delete_index, = np.where(unlabeled_indices == image_index)
delete_indices = np.concatenate((delete_indices, delete_index))

unlabeled_indices = np.delete(unlabeled_indices, delete_indices)
n_labeled_points += 5

f.suptitle("Active learning with Label Propagation.\nRows show 5 most "
"uncertain labels to learn with the next model.")
plt.subplots_adjust(0.12, 0.03, 0.9, 0.8, 0.2, 0.45)
plt.show()

0
0

* 以上用户言论只代表其个人观点，不代表CSDN网站的观点或立场
个人资料
• 访问：4197次
• 积分：179
• 等级：
• 排名：千里之外
• 原创：13篇
• 转载：8篇
• 译文：0篇
• 评论：0条
文章分类
文章存档
评论排行