Python:Hierarchical Sampling for Active Learning 2008年 ICML上的文章

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold

from active_learning.sampler import hierarchical_clustering_AL
from sklearn.metrics import accuracy_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import precision_score

def select_batch(sampler, N, already_selected,
                 **kwargs):
    kwargs["N"] = N
    kwargs["already_selected"] = already_selected
    batch_AL = sampler.select_batch(**kwargs)
    return batch_AL

def HSAL(X,y,budget):
    AccList = []
    KappaList = []
    WAP_list = []
    initialNum = 2
    SEED = 100
    model = LogisticRegression(random_state=100,multi_class='multinomial',solver="lbfgs", max_iter=200)
    KF = KFold(n_splits=10,shuffle=True)
    for train_idx,test_idx in KF.split(X):
        m = len(train_idx)
        indices = np.arange(m)
        X_train = X[train_idx]
        y_train = y[train_idx]
        X_test = X[test_idx]
        y_test = y[test_idx]
        initialPoints = np.random.choice(indices,initialNum,replace=False)    ##类型为ndarray
        while len(set(y_train[list(initialPoints)])) == 1:
            initialPoints = np.random.choice(indices, initialNum, replace=False)
        selected_inds = list(initialPoints)


        print("初始样本的标记===",y_train[selected_inds])
        sampler = hierarchical_clustering_AL.HierarchicalClusterAL(X_train,y_train,seed=SEED)
        queryNum = budget - initialNum
        for b in range(queryNum):
            batch = 1
            selected_batch_inputs = {"labeled": dict(zip(selected_inds, y_train[selected_inds])), "y": y_train}
            new_selected = select_batch(sampler,batch,selected_inds,**selected_batch_inputs)
            selected_inds.extend(new_selected)

        model.fit(X_train[selected_inds],y_train[selected_inds])
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test,y_pred)
        print("精度===",acc)
        AccList.append(acc)

    return AccList

if __name__ == '__main__':
    X,y = datasets.load_iris(return_X_y=True)
    budget = 50
    AccList = HSAL(X, y, budget)
    print(AccList)

[1]  Dasgupta S, Hsu D. Hierarchical sampling for active learning[C]//Proceedings of the 25th international conference on Machine learning. ACM, 2008: 208-215.

在intelligent-annotation 的基础上做了一个壳。

有兴趣的要代码的留言吧~

 【2】Zhipeng Luo,Milos Hauskrecht. Hierarchical Active Learning with Overlapping Regions

 【3】Fedor Zhdanov. Diverse mini-batch Active Learning. arxiv

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

DeniuHe

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值