论文复现: Active learning for regression using greeding sampling. Information Sciences

Reference: D. Wu, C. Lin, J. Huang. Active learning for regression using greeding sampling. Information Sciences, 2019, 474: 90-105.
'''
author:Deniu He
date:2020-12-11
organization: CQUPT
Reference: D. Wu, C. Lin, J. Huang. Active learning for regression using greeding sampling. Information Sciences, 2019, 474: 90-105.
'''
import numpy as np
import pandas as pd
from copy import deepcopy
from sklearn.linear_model import LinearRegression,Ridge
from sklearn import datasets
from sklearn import linear_model
from sklearn.metrics import accuracy_score, mean_absolute_error
import matplotlib.pyplot as plt
from collections import OrderedDict
from sklearn.model_selection import StratifiedKFold
from pathlib import Path


class GSx():
    def __init__(self,X_pool,y_pool,labeled,budget,X_test,y_test):
        self.X_pool = X_pool
        self.y_pool = y_pool
        self.X_test = X_test
        self.y_test = y_test
        self.labeled = list(deepcopy(labeled))
        self.RR = Ridge(alpha=0.5)
        self.unlabeled = self.initialization()
        self.budgetLeft = deepcopy(budget)
        self.model = LogisticAT()
        self.AccList = []
        self.MAEList = []

    def D(self, a, b):
        return np.sqrt(sum((a - b) ** 2))

    def initialization(self):
        unlabeled = [i for i in range(len(self.y_pool))]
        for j in self.labeled:
            unlabeled.remove(j)
        self.RR.fit(self.X_pool[self.labeled],self.y_pool[self.labeled])
        return unlabeled

    def select(self):
        n_theta = len(set(self.y_pool)) - 1
        while self.budgetLeft > 0:
            tmp_n_theta = deepcopy(n_theta)
            while tmp_n_theta > 0:
                max_metric = OrderedDict()
                for idx in self.unlabeled:
                    little = np.inf
                    for jdx in self.labeled:
                        dist = self.D(self.X_pool[idx],self.X_pool[jdx])
                        if dist < little:
                            little = dist
                    max_metric[idx] = little
                tar_idx = max(max_metric,key=max_metric.get)
                self.labeled.append(tar_idx)
                self.unlabeled.remove(tar_idx)
                tmp_n_theta -= 1
                self.budgetLeft -= 1
                print("预算剩余:{}".format(self.budgetLeft))
            self.model.fit(X=self.X_pool[self.labeled], y=self.y_pool[self.labeled])
            self.AccList.append(accuracy_score(y_true=self.y_test, y_pred=self.model.predict(self.X_test)))
            self.MAEList.append(mean_absolute_error(y_true=self.y_test, y_pred=self.model.predict(self.X_test)))
ALmodel_1 = GSx(X_pool=train_X,y_pool=train_y,labeled=labeled,budget=Budget,X_test=test_X,y_test=test_y)
ALmodel_1.select()
plt.plot(ALmodel_1.AccList)

该算法是基于diversity原则的进行关键样本选择的。

文章中还有两个算法,没功夫撸了~有兴趣的读者尝试码一下吧,超简单~

 

 

  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 7
    评论
评论 7
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

DeniuHe

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值