离群点检测方法

异常检测

分为三种方法

1.基于模型方法

建立一个模型-----异常:同模型不能拟合.

或者知道数据的分布特征,采用不和谐校验识别离群点

2.基于临近度方法(距离)

针对对象O,如果在样本空间中找到n个与O距离大于dmin的点,那么O即为异常点。

3.基于密度方法

通过基于密度的局部离群点检测就能在样本空间数据分布不均匀的情况下也可以准确发现离群点。

定义密度:到k个最近邻的平均距离的倒数

基于密度的局部离群点检测

计算LOF

代码

package com.lof;

import java.util.ArrayList;
import java.util.List;

public class Node {
    private String nodeName; // 样本点名
    private double[] dimensioin; // 样本点的维度
    private double kDistance; // k-距离
    private List<Node> kNeighbor=new ArrayList<Node>();// k-领域
    private double distance; //到给定点的欧几里得距离
    private double reachDensity;// 可达密度
    private double reachDis;// 可达距离


    private double lof;//局部离群因子

    public Node(){

    }

    public Node(String nodeName,double[] dimensioin){
        this.nodeName=nodeName;
        this.dimensioin=dimensioin;
    }

    public String getNodeName() {
        return nodeName;
    }

    public void setNodeName(String nodeName) {
        this.nodeName = nodeName;
    }

    public double[] getDimensioin() {
        return dimensioin;
    }

    public void setDimensioin(double[] dimensioin) {
        this.dimensioin = dimensioin;
    }

    public double getkDistance() {
        return kDistance;
    }

    public void setkDistance(double kDistance) {
        this.kDistance = kDistance;
    }

    public List<Node> getkNeighbor() {
        return kNeighbor;
    }

    public void setkNeighbor(List<Node> kNeighbor) {
        this.kNeighbor = kNeighbor;
    }


    public double getDistance() {
        return distance;
    }

    public void setDistance(double distance) {
        this.distance = distance;
    }

    public double getReachDensity() {
        return reachDensity;
    }

    public void setReachDensity(double reachDensity) {
        this.reachDensity = reachDensity;
    }

    public double getReachDis() {
        return reachDis;
    }

    public void setReachDis(double reachDis) {
        this.reachDis = reachDis;
    }

    public double getLof() {
        return lof;
    }

    public void setLof(double lof) {
        this.lof = lof;
    }

}

package com.lof;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;


public class OutlierNodeDetect {
    private static int MIN_PTS=5;


    

    public List<Node> getOutlierNode(List<Node> allNodes){

       List<Node> kdAndKnList=getKDAndKN(allNodes);   //1.找到给定点与其他点的欧几里得距离
    //对欧几里得距离进行排序,找到前5位的点,并同时记下k距离
       calReachDis(kdAndKnList);           //2.计算可达距离
       calReachDensity(kdAndKnList);    //3.计算每个点的可达密度
       calLof(kdAndKnList);                //4.计算每个点的局部离群点因子
       Collections.sort(kdAndKnList, new LofComparator());         //5.对每个点的局部离群点因子进行排序,输出。

       return kdAndKnList;
    }

   
    private void calLof(List<Node> kdAndKnList){
        for(Node node:kdAndKnList){
             List<Node> tempNodes=node.getkNeighbor();
             double sum=0.0;
             for(Node tempNode:tempNodes){
                 double rd=getRD(tempNode.getNodeName(),kdAndKnList);
                 sum=rd/node.getReachDensity()+sum;
             }
             sum=sum/(double)MIN_PTS;
             node.setLof(sum);
        }
    }

   
    private void calReachDensity(List<Node> kdAndKnList){
        for(Node node:kdAndKnList){
             List<Node> tempNodes=node.getkNeighbor();
             double sum=0.0;
             double rd=0.0;
             for(Node tempNode:tempNodes){
                 sum=tempNode.getReachDis()+sum;
             }
             rd=(double)MIN_PTS/sum;
             node.setReachDensity(rd);
        }
    }

   
    private void calReachDis(List<Node> kdAndKnList){
       for(Node node:kdAndKnList){
           List<Node> tempNodes=node.getkNeighbor();
           for(Node tempNode:tempNodes){
              double kDis=getKDis(tempNode.getNodeName(),kdAndKnList);
              if(kDis<tempNode.getDistance()){
                  tempNode.setReachDis(tempNode.getDistance());
              }else{
                  tempNode.setReachDis(kDis);
              }
           }
       }
    }
   
    private double getKDis(String nodeName,List<Node> nodeList){
        double kDis=0;
        for(Node node:nodeList){
            if(nodeName.trim().equals(node.getNodeName().trim())){
                kDis=node.getkDistance();
                break;
            }
        }
        return kDis;

    }

   
    private double getRD(String nodeName,List<Node> nodeList){
        double kDis=0;
        for(Node node:nodeList){
            if(nodeName.trim().equals(node.getNodeName().trim())){
                kDis=node.getReachDensity();
                break;
            }
        }
        return kDis;

    }

   //计算每个节点的K邻域(K为MIN_PTS
    private List<Node> getKDAndKN(List<Node> allNodes){
       List<Node> kdAndKnList=new ArrayList<Node>();
       for(int i=0;i<allNodes.size();i++){
           List<Node> tempNodeList=new ArrayList<Node>();
           Node nodeA=new Node(allNodes.get(i).getNodeName(),allNodes.get(i).getDimensioin());
           for(int j=0;j<allNodes.size();j++){
              Node nodeB=new Node(allNodes.get(j).getNodeName(),allNodes.get(j).getDimensioin());
              double tempDis=getDis(nodeA,nodeB);
              nodeB.setDistance(tempDis);
              tempNodeList.add(nodeB);
           }

           //对tempNodeList进行排序
           Collections.sort(tempNodeList, new DistComparator());
           for(int k=1;k<MIN_PTS;k++){
               nodeA.getkNeighbor().add(tempNodeList.get(k));
               if(k==MIN_PTS-1){
                   nodeA.setkDistance(tempNodeList.get(k).getDistance());
               }
           }
           kdAndKnList.add(nodeA);
       }

       return kdAndKnList;
    }

   
    private double getDis(Node A,Node B){
        double dis=0.0;
        double[] dimA=A.getDimensioin();
        double[] dimB=B.getDimensioin();
        if (dimA.length == dimB.length) {
            for (int i = 0; i < dimA.length; i++) {
                double temp = Math.pow(dimA[i] - dimB[i], 2);
                dis = dis + temp;
            }
            dis=Math.pow(dis, 0.5);
        }
        return dis;
    }

    class DistComparator implements Comparator<Node>{
        public int compare(Node A, Node B){
           return A.getDistance()-B.getDistance()<0?-1:1;
        }
    }

    class LofComparator implements Comparator<Node>{
        public int compare(Node A, Node B){
           return A.getLof()-B.getLof()<0?-1:1;
        }
    }

    public static void main(String[] args){
        ArrayList<Node> dpoints = new ArrayList<Node>();
       
        double[] a={2,3};
        double[] b={2,4};
        double[] c={1,4};
        double[] d={1,3};
        double[] e={2,2};
        double[] f={3,2};

        double[] g={8,7};
        double[] h={8,6};
        double[] i={7,7};
        double[] j={7,6};
        double[] k={8,5};

        double[] l={100,2};//孤立点


        double[] m={8,20};
        double[] n={8,19};
        double[] o={7,18};
        double[] p={7,17};
        double[] q={8,21};

        dpoints.add(new Node("a",a));
        dpoints.add(new Node("b",b));
        dpoints.add(new Node("c",c));
        dpoints.add(new Node("d",d));
        dpoints.add(new Node("e",e));
        dpoints.add(new Node("f",f));

        dpoints.add(new Node("g",g));
        dpoints.add(new Node("h",h));
        dpoints.add(new Node("i",i));
        dpoints.add(new Node("j",j));
        dpoints.add(new Node("k",k));

        dpoints.add(new Node("l",l));

        dpoints.add(new Node("m",m));
        dpoints.add(new Node("n",n));
        dpoints.add(new Node("o",o));
        dpoints.add(new Node("p",p));
        dpoints.add(new Node("q",q));

        OutlierNodeDetect lof=new OutlierNodeDetect();

        List<Node> nodeList=lof.getOutlierNode(dpoints);

        for(Node node:nodeList){
            System.out.println(node.getNodeName()+"  "+node.getLof());
        }

    }
}

测试结果:

 0.7459309435620392
 0.7459309435620392
 0.7485293162241347
 0.7518479734971145
 0.7518479734971146
 0.7693717709826069
 0.7693717709826069
 0.7836550344036045
 0.8175878600290553
 0.8175878600290553
 0.827181166228103
 0.8497518729207414
 0.8588773305030418
 0.8625820667657609
 0.8625820667657609
 0.8866630038097529
 39.309353884068194



  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值