孤立森林python实现

孤立森林是近几年较为火热的检测异常数据算法,下面使用python来编写代码。
本人初学python,若有错误之处,欢迎指出(csdn再导入python代码时格式有些乱)
“”"
1.创建一个导入数据,选取子样本类
2.创建一个随机选择属性中大小类
3.创建一个二叉树类
4.创建一个孤立森林类
“”"
import numpy as np
import pandas as pd
import random
import math

class Input_Datas(object):
    """
    该方法用于导入数据,与选取子样本
    """
    def __init__(self, subsample_size):
        # 子样本大小
        self.subsample_size = subsample_size

    def  Input(self):
        """导入数据,并将数据转换成list格式"""
        self.Address = "C:\\Users\\Administrator.EBEXMAIZNOAGGTT\\Desktop\\数据.xlsx"
        self.Initial_Datas = pd.read_excel(self.Address, sheet_name=0)
        # self.Sample(总样本)
        self.Sample = self.Initial_Datas.values
        self.Sample = np.delete(self.Sample, 0, axis=1)
        self.Sample = list(self.Sample)
        self.length = len(self.Sample)
        if self.subsample_size >= self.length:
            self.subsample_size = self.length
        self.ranges = list(range(self.length))

    def Subsample(self):
        """从Input处理后的数据中,选取子样本"""
        if self.subsample_size >= len(self.ranges):
            self.subsample_size = len(self.ranges)
        self.random_datas = random.sample(self.ranges, self.subsample_size)
        self.subsample = []
        for temp in self.random_datas:
            self.subsample.append(temp)
        for temp in self.random_datas:
            self.ranges.remove(temp)


class Select_Attribute(object):
    """
    挑选数据属性,并随机挑选一个该属性中的最大值与最小值
    """
    def __init__(self, sample):
        self.Sample = sample

    def random_attribute(self):
        """随机挑选一个属性"""
        length = len(self.Sample[0])
        ranges = list(range(length))
        self.random_attribute_datas = random.sample(ranges, 1)

    def random_values(self, Sample):
        """在所挑选属性中随机挑选一个值"""
        i = 0
        max = self.Sample[Sample[0]][self.random_attribute_datas[0]]
        min = max
        while i < len(Sample):
            if self.Sample[Sample[i]][self.random_attribute_datas[0]] > max:
               max = self.Sample[Sample[i]][self.random_attribute_datas[0]]

            if self.Sample[Sample[i]][self.random_attribute_datas[0]] < min:
                min = self.Sample[Sample[i]][self.random_attribute_datas[0]]
            i += 1

        self.attribute_value = max - random.random() * (max - min)


class ITree(object):
    """
    建立孤立树
    """

    def __init__(self, depth, subsample, Sample):
        self.root = subsample
        self.depth = depth
        self.Sample = Sample

    def itree(self):
        """建立孤立树"""
        attribute = 0
        depth = 0
        self.Tree_1 = []
        self.Tree = [[self.root, 0, attribute]]
        while self.Tree and (depth <= self.depth):
            self.lift = []
            self.right = []
            root, depth, attribute = self.Tree.pop(0)
            set_attribute = Select_Attribute(self.Sample)
            set_attribute.random_attribute()
            attribute = set_attribute.random_attribute_datas[0]
            set_attribute.random_values(root)
            attribute_value = set_attribute.attribute_value
            i = 0
            while i < len(self.Sample[0]):
                j = 0
                while j < len(self.root):
                    if self.Sample[self.root[0]][i] == self.Sample[self.root[j]][i]:
                        self.judge = True
                    else:
                        self.judge = False
                        break
                    j += 1
                if self.judge == False:
                    break
                i += 1

            i = 0
            while i < len(root):
                if (len(root) == 1)or(depth == self.depth - 1)or(self.judge):
                    self.Tree_1.append([root, depth+1])
                    break

                if self.Sample[root[i]][attribute] < attribute_value:
                    self.lift.append(root[i])
                else:
                    self.right.append(root[i])
                i += 1
            depth += 1
            if not(self.lift == []):
                self.Tree.append([self.lift, depth, attribute])
            if not(self.right == []):
                self.Tree.append([self.right, depth, attribute])


    def prediction(self):
        """计算每一个数据的路径长度"""
        self.cn = 2*(math.log(len(self.root) - 1, math.e) + 0.5772156649) - (2*(len(self.root) - 1)/(len(self.root)))

        # 对子样本从小到大进行排序
        self.original = sorted(self.root)
        self.path = []
        i = 0
        while i < len(self.original):
            self.path.append(0)
            i += 1
        i = 0
        while i < len(self.original):
            j = 0
            while j < len(self.Tree_1):
                k = 0
                while k < len(self.Tree_1[j][0]):
                    if self.Tree_1[j][0][k] == self.original[i]:
                        self.path[i] = self.Tree_1[j][1]
                    k += 1
                j += 1
            i += 1


class IForest(object):
    """
    建立多棵树,并求出每一个数据的异常分数
    """
   

     def __init__(self, Number, subsmaple_size, max_depth):
            #孤立树数量
            self.number = Number
            self.subsample_size = subsmaple_size
            #树最大高度
            self.max_depth = max_depth
    
     

   def Build_Forest(self):
        """建立孤立森林"""
        ranges =[0]
        self.scores_1 = []
        self.index = []
        example_a = Input_Datas(self.subsample_size)
        example_a.Input()
        while ranges:
            example_a.Subsample()
            ranges = example_a.ranges
            example_b = ITree(self.max_depth, example_a.subsample, example_a.Sample)
            # 平均路径长度
            level_path = []
            # 异常分数
            score = []
            i = 0
            j = 0
            while j < example_a.subsample_size:
                level_path.append(0)
                score.append(0)
                j += 1

            while i < self.number:
                example_b.itree()
                example_b.prediction()
                k = 0
                while k < example_a.subsample_size:
                    level_path[k] = level_path[k] + example_b.path[k]
                    k += 1
                i += 1
            k = 0
            while k < example_a.subsample_size:
                level_path[k] = level_path[k]/self.number
                score[k] = 2**(-level_path[k]/example_b.cn)
                k += 1

            for temp in score:
                self.scores_1.append(temp)
            for temp in example_b.original:
                self.index.append(temp)

        a = self.sort_1(self.scores_1, self.index)
        print(a)
        i = 0
        sum = 0
        while i < len(a):
            if a[i] > 0.40:
                sum += 1
            i += 1

        print(sum/len(a))


    def sort_1(self, scores_1, index):
        """排序"""
        i = 0
        score = []
        while i < len(index):
            score.append(0)
            i += 1

        i = 0
        while i < len(index):
            result = index[i]
            score[result] = scores_1[i]
            i += 1
        return score

    if __name__ == '__main__':
       forest = IForest(256, 50, 15)
       forest.Build_Forest()
  • 2
    点赞
  • 24
    收藏
    觉得还不错? 一键收藏
  • 6
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 6
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值