支持带权重的对象随机选择方法

最新推荐文章于 2022-05-10 22:02:12 发布

明明如月学长

最新推荐文章于 2022-05-10 22:02:12 发布

阅读量632

点赞数 2

分类专栏： Java基础工具使用文章标签： java maven

本文链接：https://blog.csdn.net/w605283073/article/details/120856786

版权

权重随机一致性哈希 Apache Commons Math NavigableMap 随机选择

关键词由CSDN通过智能技术生成

Java基础同时被 2 个专栏收录

217 篇文章 34 订阅

订阅专栏

工具使用

101 篇文章 27 订阅

订阅专栏

一、背景

在工作中会遇到有多个下游业务接口或者服务器（这里统称为[目标]）需要选择性调用，而且还支持配置权重。

比如有3台服务器，分别给予 20%，30%和 50% 的流量；比如有3个厂商的接相似服务，分别给予 80%，5%，15% 的调用量配比。

那么我们该如何实现？

二、方法

2.1 使用 commons-math3 的工具类（推荐）

使用 Apache Commons Math3 工具包的 EnumeratedDistribution 类
时间复杂度 O(logN)

maven 仓库

https://mvnrepository.com/artifact/org.apache.commons/commons-math3

<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-math3 -->
<dependency>
    <groupId>org.apache.commons</groupId>
    <artifactId>commons-math3</artifactId>
    <version>3.6.1</version>
</dependency>

示例类：

package other.commons.math;

import lombok.AllArgsConstructor;
import lombok.Data;

@Data
@AllArgsConstructor
public class Tool {

    private String name;
}

测试代码

package other.commons.math;

import org.apache.commons.math3.distribution.EnumeratedDistribution;
import org.apache.commons.math3.util.Pair;

import java.util.ArrayList;
import java.util.List;

public class Demo {
    public static void main(String[] args) {
        // 构造数据
        Tool tool1 = new Tool("第1个工具");
        Tool tool2 = new Tool("第2个工具");
        final List<Pair<Tool, Double>> toolWeights = new ArrayList<>();
        toolWeights.add(new Pair<>(tool1, 20D));
        toolWeights.add(new Pair<>(tool2, 80D));


         // 测试1万次
        int first = 0;
        int second = 0;
        for (int i = 0; i < 10000; i++) {
            // 执行带权重随机获取一个
            Tool tool = new EnumeratedDistribution<>(toolWeights).sample();
            if (tool.equals(tool1)) {
                first++;
            }
            if (tool.equals(tool2)) {
                second++;
            }
        }
        System.out.println("工具1出现" + first + "次；工具2出现" + second + "次");

    }
}

运行结果符合预期

工具1出现1952次；工具2出现8048次

大家可以自行去源码里看其原理：
大致是将权重归一化到 0-1 的范围，然后随机获取 0-1 之间的 double 值，通过二分查找的方式，落在哪个区间就获取该区间对应的对象。

 /**
     * Generate a random value sampled from this distribution.
     *
     * @return a random value.
     */
    public T sample() {
        final double randomValue = random.nextDouble();

        int index = Arrays.binarySearch(cumulativeProbabilities, randomValue);
        if (index < 0) {
            index = -index-1;
        }

        if (index >= 0 &&
            index < probabilities.length &&
            randomValue < cumulativeProbabilities[index]) {
            return singletons.get(index);
        }

        /* This should never happen, but it ensures we will return a correct
         * object in case there is some floating point inequality problem
         * wrt the cumulative probabilities. */
        return singletons.get(singletons.size() - 1);
    }

2.2 使用 NavigableMap 类

借助 NavigableMap 的 higherEntry 定位该元素应该落的权重区间，权重未做归一化处理，定位的速度依赖于底层树查找的实现。
时间复杂度 O(logN)

public class RandomCollection<E> {
  private final NavigableMap<Double, E> map = new TreeMap<Double, E>();
  private double total = 0;

  public void add(double weight, E result) {
    if (weight <= 0 || map.containsValue(result))
      return;
    total += weight;
    map.put(total, result);
  }

  public E next() {
    double value = ThreadLocalRandom.current().nextDouble() * total;
    return map.higherEntry(value).getValue();
  }
}

示例

package other.commons.math;

public class Demo3 {
    public static void main(String[] args) {

        Tool tool1 = new Tool("第1个工具");
        Tool tool2 = new Tool("第2个工具");
        RandomCollection<Tool> util = new RandomCollection<>();
        util.add(20, tool1);
        util.add(80, tool2);

         // 测试1万次
        int first = 0;
        int second = 0;
        for (int i = 0; i < 10000; i++) {
            Tool tool = util.next();
            if (tool.equals(tool1)) {
                first++;
            }
            if (tool.equals(tool2)) {
                second++;
            }
        }
        System.out.println("工具1出现" + first + "次；工具2出现" + second + "次");
    }
}

运行结果符合预期

工具1出现1937次；工具2出现8063次

底层原理
java.util.TreeMap#getHigherEntry

 /**
     * Gets the entry for the least key greater than the specified
     * key; if no such entry exists, returns the entry for the least
     * key greater than the specified key; if no such entry exists
     * returns {@code null}.
     */
    final Entry<K,V> getHigherEntry(K key) {
        Entry<K,V> p = root;
        while (p != null) {
            int cmp = compare(key, p.key);
            if (cmp < 0) {
                if (p.left != null)
                    p = p.left;
                else
                    return p;
            } else {
                if (p.right != null) {
                    p = p.right;
                } else {
                    Entry<K,V> parent = p.parent;
                    Entry<K,V> ch = p;
                    while (parent != null && ch == parent.right) {
                        ch = parent;
                        parent = parent.parent;
                    }
                    return parent;
                }
            }
        }
        return null;
    }

2.3 使用 Collections.shuffle()

package other.commons.math;

import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

public class RandomWeightUtils {

    /**
     * 带权重随机
     * @param map  元素和对应权重
     * @param <K> 元素类型
     * @return 符合权重的随机元素
     */
    public static <K> K random(Map<K, Integer> map) {
        if (map == null || map.isEmpty()) {
            return null;
        }

        List<K> list = new LinkedList<>();
        // 放入权重个 K
        for (Map.Entry<K, Integer> entry : map.entrySet()) {
            for (int i = 0; i < entry.getValue(); i++) {
                list.add(entry.getKey());
            }
        }

        // 随机打散 list
        Collections.shuffle(list);

        // 取第一个 （最后一个也可以）
        return list.get(0);
    }
}

测试方法

package other.commons.math;

import java.util.*;


public class demo2 {
    public static void main(String[] args) {
        Tool tool1 = new Tool("第1个工具");
        Tool tool2 = new Tool("第2个工具");
        Map<Tool, Integer> map = new HashMap<Tool, Integer>() {{
            put(tool1, 3);
            put(tool2, 7);
        }};

        // 测试1万次
        int first = 0;
        int second = 0;
        for (int i = 0; i < 10000; i++) {
            Tool tool = RandomWeightUtils.random(map);
            if (tool.equals(tool1)) {
                first++;
            }
            if (tool.equals(tool2)) {
                second++;
            }
        }
        System.out.println("工具1出现" + first + "次；工具2出现" + second + "次");
    }

}

运行结果符合预期

工具1出现3010次；工具2出现6990次

底层原理
java.util.Collections#shuffle(java.util.List<?>)

public static void shuffle(List<?> list) {
        Random rnd = r;
        if (rnd == null)
            r = rnd = new Random(); // harmless race.
        shuffle(list, rnd);
}
    
public static void shuffle(List<?> list, Random rnd) {
        int size = list.size();
        if (size < SHUFFLE_THRESHOLD || list instanceof RandomAccess) {
            for (int i=size; i>1; i--)
                swap(list, i-1, rnd.nextInt(i));
        } else {
            Object[] arr = list.toArray();

            // Shuffle array
            for (int i=size; i>1; i--)
                swap(arr, i-1, rnd.nextInt(i));

            // Dump array back into list
            // instead of using a raw type here, it's possible to capture
            // the wildcard but it will require a call to a supplementary
            // private method
            ListIterator it = list.listIterator();
            for (int i=0; i<arr.length; i++) {
                it.next();
                it.set(arr[i]);
            }
        }
    }

既然已经构造好了目标占比，其实没必要对目标 list 本身随机排序，只需要构造好 [0,size) 中间的一个随机数即可，因此该方法还可以优化。

2.4 构造元素权重占比随机数获取（推荐）

 /**
     * 带权重随机
     *
     * @param map 元素和对应权重
     * @param <K> 元素类型
     * @return 符合权重的随机元素
     */
    public static <K> K random(Map<K, Integer> map) {
        if (map == null || map.isEmpty()) {
            return null;
        }

        List<K> list = new LinkedList<>();
        // 放入权重个 K
        int total = 0;
        for (Map.Entry<K, Integer> entry : map.entrySet()) {
            int value = entry.getValue();
            total += value;
            for (int i = 0; i < value; i++) {
                list.add(entry.getKey());
            }
        }

        return list.get(ThreadLocalRandom.current().nextInt(total));
    }

测试代码：

package other.commons.math;

import java.util.*;


public class demo2 {
    public static void main(String[] args) {
        Tool tool1 = new Tool("第1个工具");
        Tool tool2 = new Tool("第2个工具");
        Map<Tool, Integer> map = new HashMap<Tool, Integer>() {{
            put(tool1, 3);
            put(tool2, 7);
        }};

        // 测试10次
        int first = 0;
        int second = 0;
        for (int i = 0; i < 10000; i++) {
            Tool tool = RandomWeightUtils.random(map);
            if (tool.equals(tool1)) {
                first++;
            }
            if (tool.equals(tool2)) {
                second++;
            }
        }
        System.out.println("工具1出现" + first + "次；工具2出现" + second + "次");
    }
}

运行结果符合预期

工具1出现3052次；工具2出现6948次

三、保持一致性（目标数量权重不变时）

如果想保持同一个用户多次获取的对象保持一致，可以对用户ID 取哈希值，然后取余，落到哪个区间就获取哪个元素即可。

public class HashCodeUtil {

    public static int hash(Object key) {
        int h;
        return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
    }
}

3.1 MyEnumeratedDistribution (仅供参考)

package other.commons.math;

import org.apache.commons.math3.exception.NotPositiveException;
import org.apache.commons.math3.random.RandomGenerator;
import org.apache.commons.math3.random.Well19937c;
import org.apache.commons.math3.util.Pair;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.function.Supplier;

public class MyEnumeratedDistribution<T> implements Serializable {

    /**
     * List of random variable values.
     */
    private final List<T> singletons;

    /**
     * Cumulative probabilities, cached to speed up sampling.
     */
    private final int[] cumulativeProbabilities;

    /**
     * RNG instance used to generate samples from the distribution.
     */
    protected final RandomGenerator random;

    public MyEnumeratedDistribution(List<Pair<T, Integer>> pmf) {

        random = new Well19937c();

        singletons = new ArrayList<T>(pmf.size());
        final int[] probs = new int[pmf.size()];

        for (int i = 0; i < pmf.size(); i++) {
            final Pair<T, Integer> sample = pmf.get(i);
            singletons.add(sample.getKey());
            final int p = sample.getValue();
            if (p < 0) {
                throw new NotPositiveException(sample.getValue());
            }
            probs[i] = p;
        }

        cumulativeProbabilities = new int[probs.length];
        int sum = 0;
        for (int i = 0; i < probs.length; i++) {
            sum += probs[i];
            cumulativeProbabilities[i] = sum;
        }
    }

	public T sample() {
	     return sample(null);
	}
    /**
     * Generate a random value sampled from this distribution.
     *
     * @return a random value.
     */
    public T sample(Object key) {
        int randomValue;
        int max = cumulativeProbabilities[cumulativeProbabilities.length - 1] + 1;
        if (key == null) {
            randomValue = random.nextInt(max);
        } else {
            randomValue = Math.abs(HashCodeUtil.hash(key)) % max;
        }

        int index = Arrays.binarySearch(cumulativeProbabilities, randomValue);
        if (index < 0) {
            index = -index - 1;
        }

        if (index >= 0 &&
                index < cumulativeProbabilities.length &&
                randomValue < cumulativeProbabilities[index]) {
            return singletons.get(index);
        }

        /* This should never happen, but it ensures we will return a correct
         * object in case there is some floating point inequality problem
         * wrt the cumulative probabilities. */
        return singletons.get(singletons.size() - 1);
    }
}

示例

package other.commons.math;

import org.apache.commons.math3.util.Pair;

import java.util.ArrayList;
import java.util.List;

public class Demo4 {
    public static void main(String[] args) {

        // 构造数据
        Tool tool1 = new Tool("第1个工具");
        Tool tool2 = new Tool("第2个工具");
        final List<Pair<Tool, Integer>> toolWeights = new ArrayList<>();
        toolWeights.add(new Pair<>(tool1, 20));
        toolWeights.add(new Pair<>(tool2, 80));

        test(tool1, tool2, toolWeights, null);

        test(tool1, tool2, toolWeights, 1008611);

    }

    private static void test(Tool tool1, Tool tool2, List<Pair<Tool, Integer>> toolWeight, Integer userId) {
        // 测试10次
        int first = 0;
        int second = 0;
        for (int i = 0; i < 10000; i++) {
            // 模拟一致性的对象
            Tool tool;
            if (userId == null) {
                tool = new MyEnumeratedDistribution<>(toolWeight).sample();
            } else {
                tool = new MyEnumeratedDistribution<>(toolWeight).sample(userId);
            }


            if (tool.equals(tool1)) {
                first++;
            }
            if (tool.equals(tool2)) {
                second++;
            }
        }
        System.out.println("工具1出现" + first + "次；工具2出现" + second + "次");

    }
}

输出符合预期

工具1出现1981次；工具2出现8019次
工具1出现0次；工具2出现10000次

3.2 NavigableMap

以 NavigableMap 为例，我们改造下 RandomCollection 类：

package other.commons.math;

import java.util.NavigableMap;
import java.util.TreeMap;
import java.util.concurrent.ThreadLocalRandom;

public class RandomCollection<E> {
    private final NavigableMap<Double, E> map = new TreeMap<Double, E>();
    private double total = 0;

    public void add(double weight, E result) {
        if (weight <= 0 || map.containsValue(result)) {
            return;
        }
        total += weight;
        map.put(total, result);
    }

    public E next(Object object) {
        int hashCode = HashCodeUtil.hash(object);

        double value = Math.abs(hashCode) % total;
        return map.higherEntry(value).getValue();
    }
}

测试方法

package other.commons.math;

import java.util.Random;

public class Demo3 {
    public static void main(String[] args) {

        Tool tool1 = new Tool("第1个工具");
        Tool tool2 = new Tool("第2个工具");
        RandomCollection<Tool> util = new RandomCollection<>();
        util.add(20, tool1);
        util.add(80, tool2);
        test(tool1, tool2, util, null);

        test(tool1, tool2, util, 1008611);

    }

    private static void test(Tool tool1, Tool tool2, RandomCollection<Tool> util, Integer userId) {
        // 测试10次
        int first = 0;
        int second = 0;
        for (int i = 0; i < 10000; i++) {
            // 模拟一致性的对象
            if (userId == null) {
                userId = i + new Random().nextInt(900);
            }
            Tool tool = util.next(userId);

            if (tool.equals(tool1)) {
                first++;
            }
            if (tool.equals(tool2)) {
                second++;
            }
        }
        System.out.println("工具1出现" + first + "次；工具2出现" + second + "次");

    }
}

运行结果，符合预期

工具1出现0次；工具2出现10000次
工具1出现10000次；工具2出现0次

如果想做到一致性哈希，也可以对该代码进行改造。
比如用 key 存储目标 hashCode，使用 higherEntry 改造下即可。

3.3 基于2.4 改造（推荐）

  /**
     * 带权重随机
     *
     * @param map 元素和对应权重
     * @param <K> 元素类型
     * @return 符合权重的随机元素
     */
    public static <K> K random(Map<K, Integer> map, Object key) {
        if (map == null || map.isEmpty()) {
            return null;
        }

        List<K> list = new LinkedList<>();
        // 放入权重个 K
        int total = 0;
        for (Map.Entry<K, Integer> entry : map.entrySet()) {
            int value = entry.getValue();
            total += value;
            for (int i = 0; i < value; i++) {
                list.add(entry.getKey());
            }
        }

        int index;
        if (key == null) {
            index = ThreadLocalRandom.current().nextInt(total);
        } else {
            index = Math.abs(HashCodeUtil.hash(key)) % total;
        }
        return list.get(index);
    }

测试代码

package other.commons.math;

import java.util.*;


public class demo2 {
    public static void main(String[] args) {
        Tool tool1 = new Tool("第1个工具");
        Tool tool2 = new Tool("第2个工具");
        Tool tool3 = new Tool("第3个工具");
        Map<Tool, Integer> map = new HashMap<Tool, Integer>() {{
            put(tool1, 3);
            put(tool2, 6);
            put(tool3, 1);
        }};
        test(tool1, tool2, tool3, map, null);
        test(tool1, tool2, tool3, map, "test-0123");
    }

    private static void test(Tool tool1, Tool tool2, Tool tool3, Map<Tool, Integer> map, String userId) {
        // 测试1万次
        int first = 0;
        int second = 0;
        int third = 0;
        for (int i = 0; i < 10000; i++) {

            Tool tool = RandomWeightUtils.random(map, () -> userId);

            if (tool.equals(tool1)) {
                first++;
            }
            if (tool.equals(tool2)) {
                second++;
            }
            if (tool.equals(tool3)) {
                third++;
            }
        }
        System.out.println("工具1出现【" + first + "】次；工具2出现【" + second + "】次；工具3出现【" + third + "】次");

    }
}