018_java.util.BitSet

继承体系

BitSet虽说是一个set,但是它脱离了set继承体系。可以理解BitSet是对比特位的封装,其底层有点类似与EnumSet使用long的数组存储数据。

重要变量

private final static int ADDRESS_BITS_PER_WORD = 6;

// 数值为64
private final static int BITS_PER_WORD = 1 << ADDRESS_BITS_PER_WORD;

// 值为 63 ,位为 0000000..00011111
private final static int BIT_INDEX_MASK = BITS_PER_WORD - 1;

// 位上全为1的long,后续用作计算
private static final long WORD_MASK = 0xffffffffffffffffL;

// 真正存储数据的地方
private long[] words;

// 表达多少个long已经被用了
private transient int wordsInUse = 0;


构造函数

BitSet的构造可以分为两种,一种是构造函数,一种是提供大量static的valueOf进行转换,我们先看构造函数部分,可以看到默认使用64位进行存储,也可以指定需要存储多少位,经过initWords计算后决定其数组长度。除此之外也可以传入long数组进行初始化操作

public BitSet() {
    // 初始化64 位存储,经过initWords计算后使用1个Long数据来存储
    initWords(BITS_PER_WORD);
    sizeIsSticky = false;
}

public BitSet(int nbits) {
    // nbits can't be negative; size 0 is OK
    if (nbits < 0)
        throw new NegativeArraySizeException("nbits < 0: " + nbits);

    // 初始化nbits
    initWords(nbits);
    sizeIsSticky = true;
}

private BitSet(long[] words) {
    this.words = words;
    this.wordsInUse = words.length;
    checkInvariants();
}

private void initWords(int nbits) {
    // 计算可以容纳nbits的最小long长度
    words = new long[wordIndex(nbits-1) + 1];
}

private static int wordIndex(int bitIndex) {
    // 除以64确定数据位存储在第几个索引上
    return bitIndex >> ADDRESS_BITS_PER_WORD;
}

private void checkInvariants() {
    assert(wordsInUse == 0 || words[wordsInUse - 1] != 0);
    assert(wordsInUse >= 0 && wordsInUse <= words.length);
    assert(wordsInUse == words.length || words[wordsInUse] == 0);
}

我们来看第二组初始化BitSet的方法,这里都是一些valueOf,可以看到你可以使用long数组,以及nio领域内的LongBuffer/ByteBuffer作为入参,将入参内的数据转换为BitSet底层存储的Long数组格式。

public static BitSet valueOf(long[] longs) {
    int n;
    for (n = longs.length; n > 0 && longs[n - 1] == 0; n--)
        ;
    return new BitSet(Arrays.copyOf(longs, n));
}

public static BitSet valueOf(LongBuffer lb) {
    lb = lb.slice();
    int n;
    for (n = lb.remaining(); n > 0 && lb.get(n - 1) == 0; n--)
        ;
    long[] words = new long[n];
    lb.get(words);
    return new BitSet(words);
}

public static BitSet valueOf(byte[] bytes) {
    return BitSet.valueOf(ByteBuffer.wrap(bytes));
}


public static BitSet valueOf(ByteBuffer bb) {
    bb = bb.slice().order(ByteOrder.LITTLE_ENDIAN);
    int n;
    //计算 byteBuffer内的byte长度
    for (n = bb.remaining(); n > 0 && bb.get(n - 1) == 0; n--)
        ;
    // 求出可容纳byte的最小长度
    long[] words = new long[(n + 7) / 8];
    bb.limit(n);
    int i = 0;
    // 前面足够获取long的部分赋值给words
    while (bb.remaining() >= 8)
        words[i++] = bb.getLong();

    // 残留的byte不足够组装为long则遍历做组装,不断向高位进行移位
    for (int remaining = bb.remaining(), j = 0; j < remaining; j++)
        words[i] |= (bb.get() & 0xffL) << (8 * j);
    return new BitSet(words);
}

重要方法

初始化结束之后BitSet内就将存储数据的long数组初始化完毕,接下去就是对这个容器进行使用了,最简单的就是针对某个位置进行赋值与提取,对应的是set与get方法。由于是位存储,BitSet也提供了与位运算相关的一些方法方便批量操作数据。

set方法

public void set(int bitIndex) {
    if (bitIndex < 0)
        throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);
    
    // 计算传入bitIndex落在数组的哪个位置
    int wordIndex = wordIndex(bitIndex);
    // 计算扩容
    expandTo(wordIndex);
    // 1向左位移bitIndex个单位并与原数据执行或操作,将导致原数据该位置赋值为1
    words[wordIndex] |= (1L << bitIndex); // Restores invariants
    // 简单数据合理
    checkInvariants();
}

public void set(int bitIndex, boolean value) {
    // value为true则该位置赋值为1
    if (value)
        set(bitIndex);
    else
        // 该位置赋值为0
        clear(bitIndex);
}


public void set(int fromIndex, int toIndex) {

    // 检查边界
    checkRange(fromIndex, toIndex);

    // 边界相等直接return
    if (fromIndex == toIndex)
        return;
    
    // Increase capacity if necessary
    // 使用from与to分别求得数组索引位置
    int startWordIndex = wordIndex(fromIndex);
    int endWordIndex   = wordIndex(toIndex - 1);
    // 使用endIndex判断是否扩容
    expandTo(endWordIndex);

    // 右移,如果fromIndex>=64 等价于 (WORD_MASK << (fromIndex %64))
    long firstWordMask = WORD_MASK << fromIndex;
    // 左移,11111 >>> -2 = 00011 如果 toIndex > 64 等价于 ( WORD_MASK >>> (-toIndex%64))
    long lastWordMask  = WORD_MASK >>> -toIndex;

    // 如果start与end一致则可以确定是修改数组内同一个位置的数据
    if (startWordIndex == endWordIndex) {
        // Case 1: One word
        // firstWordMask & lastWordMask 相当于合并了两个数据
        words[startWordIndex] |= (firstWordMask & lastWordMask);
    } else {
        // Case 2: Multiple words
        // Handle first word
        words[startWordIndex] |= firstWordMask;
    
        // Handle intermediate words, if any
        // 如果start与end之间存在long的位,则全部赋值为全1位
        for (int i = startWordIndex+1; i < endWordIndex; i++)
            words[i] = WORD_MASK;
    
        // Handle last word (restores invariants)
        words[endWordIndex] |= lastWordMask;
    }
    
    checkInvariants();
}


public void set(int fromIndex, int toIndex, boolean value) {
    // 从from到to之间数据赋值为1
    if (value)
        set(fromIndex, toIndex);
    else
        // 从from到to之间数据赋值为0
        clear(fromIndex, toIndex);
}

public void clear(int bitIndex) {
    if (bitIndex < 0)
        throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);

    int wordIndex = wordIndex(bitIndex);
    if (wordIndex >= wordsInUse)
        return;

    // 1向左移动bitIndex位置并取反,会导致该位置为0其他位置为1,与原数据与运算后
    // 原数据该位置会赋值为0
    words[wordIndex] &= ~(1L << bitIndex);

    // 重新计算使用量
    recalculateWordsInUse();
    checkInvariants();
}

private void recalculateWordsInUse() {
    // Traverse the bitset until a used word is found
    int i;
    // 从尾部遍历,凡是碰到不为零则停下
    for (i = wordsInUse-1; i >= 0; i--)
        if (words[i] != 0)
            break;

    wordsInUse = i+1; // The new logical size
}

private void expandTo(int wordIndex) {
    int wordsRequired = wordIndex+1;
    // 如果超过数据容量
    if (wordsInUse < wordsRequired) {
        // 进行扩容
        ensureCapacity(wordsRequired);
        wordsInUse = wordsRequired;
    }
}

private void ensureCapacity(int wordsRequired) {
    if (words.length < wordsRequired) {
        // Allocate larger of doubled size or required size
        // 拷贝为原有数组的两倍
        int request = Math.max(2 * words.length, wordsRequired);
        // 拷贝数据覆盖旧数据
        words = Arrays.copyOf(words, request);
        sizeIsSticky = false;
    }
}

private static void checkRange(int fromIndex, int toIndex) {
    if (fromIndex < 0)
        throw new IndexOutOfBoundsException("fromIndex < 0: " + fromIndex);
    if (toIndex < 0)
        throw new IndexOutOfBoundsException("toIndex < 0: " + toIndex);
    if (fromIndex > toIndex)
        throw new IndexOutOfBoundsException("fromIndex: " + fromIndex +
                                            " > toIndex: " + toIndex);
}

get方法

get可以从某一个位置获得数据,或者从某个范围内获得数据。

public boolean get(int bitIndex) {
    if (bitIndex < 0)
        throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);

    checkInvariants();

    // 确定数组位置
    int wordIndex = wordIndex(bitIndex);
    // 求得long内该位置的值是否为0
    return (wordIndex < wordsInUse)
        && ((words[wordIndex] & (1L << bitIndex)) != 0);
}


public BitSet get(int fromIndex, int toIndex) {
    checkRange(fromIndex, toIndex);

    checkInvariants();

    int len = length();

    // If no set bits in range return empty bitset
    if (len <= fromIndex || fromIndex == toIndex)
        return new BitSet(0);

    // An optimization
    // 处理超过长度的toIndex,取当前数据组最大长度
    if (toIndex > len)
        toIndex = len;

    // 产生可以存储toIndex - fromIndex的BitSet对象
    BitSet result = new BitSet(toIndex - fromIndex);

    // 计算需要扫描的word
    int targetWords = wordIndex(toIndex - fromIndex - 1) + 1;
    int sourceIndex = wordIndex(fromIndex);

    // 查看fromIndex是否是64的倍数
    boolean wordAligned = ((fromIndex & BIT_INDEX_MASK) == 0);

    // Process all words but the last word 
    for (int i = 0; i < targetWords - 1; i++, sourceIndex++)
        // 如果刚好是64的倍数,则将数据整个进行赋值,否则将对应位置数据取出进行处理再赋值
        // 这段看了很久没看明白~~ todo
        result.words[i] = wordAligned ? words[sourceIndex] :
            (words[sourceIndex] >>> fromIndex) |
            (words[sourceIndex+1] << -fromIndex);

    // Process the last word
    long lastWordMask = WORD_MASK >>> -toIndex;
    result.words[targetWords - 1] =
        ((toIndex-1) & BIT_INDEX_MASK) < (fromIndex & BIT_INDEX_MASK)
        ? /* straddles source words */
        ((words[sourceIndex] >>> fromIndex) |
         (words[sourceIndex+1] & lastWordMask) << -fromIndex)
        :
        ((words[sourceIndex] & lastWordMask) >>> fromIndex);

    // Set wordsInUse correctly
    result.wordsInUse = targetWords;
    result.recalculateWordsInUse();
    result.checkInvariants();

    return result;
}

上面44开始到结尾是个什么鬼东西,看不懂,搁置吧,跑测试例子也没明白

flip方法

flip方法的作用是将某个位置或者某个范围内的数据进行反转,其内部的实现与set大差不差,在此不过多描述

public void flip(int bitIndex) {
    if (bitIndex < 0)
        throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);
    
    int wordIndex = wordIndex(bitIndex);
    expandTo(wordIndex);
    
    words[wordIndex] ^= (1L << bitIndex);
    
    recalculateWordsInUse();
    checkInvariants();
}

public void flip(int fromIndex, int toIndex) {
    checkRange(fromIndex, toIndex);

    if (fromIndex == toIndex)
        return;

    int startWordIndex = wordIndex(fromIndex);
    int endWordIndex   = wordIndex(toIndex - 1);
    expandTo(endWordIndex);

    long firstWordMask = WORD_MASK << fromIndex;
    long lastWordMask  = WORD_MASK >>> -toIndex;
    if (startWordIndex == endWordIndex) {
        // Case 1: One word
        words[startWordIndex] ^= (firstWordMask & lastWordMask);
    } else {
        // Case 2: Multiple words
        // Handle first word
        words[startWordIndex] ^= firstWordMask;

        // Handle intermediate words, if any
        for (int i = startWordIndex+1; i < endWordIndex; i++)
        words[i] ^= WORD_MASK;

        // Handle last word
        words[endWordIndex] ^= lastWordMask;
    }

    recalculateWordsInUse();
    checkInvariants();
}

顺位运算

这部分方法帮助我们从偏移量上按顺位找赋值位的方法,这里主要分析nextSetBit,其他是类似逻辑

// 从fromIndex开始下一个被标记上的bit位置
public int nextSetBit(int fromIndex) {
    if (fromIndex < 0)
        throw new IndexOutOfBoundsException("fromIndex < 0: " + fromIndex);
    
    checkInvariants();
    
    int u = wordIndex(fromIndex);
    if (u >= wordsInUse)
        return -1;
    // 如果不为0则下一个非0标志位在当前索引位置
    long word = words[u] & (WORD_MASK << fromIndex);
    
    while (true) {
        if (word != 0)
            // u * 64 意味着跳过多少long下标,+ long的前置位为0的数获得下一个为0 的位置
            return (u * BITS_PER_WORD) + Long.numberOfTrailingZeros(word);
        if (++u == wordsInUse)
            return -1;
        // 不符合条件找到下一个索引
        word = words[u];
    }
}

// 从fromIndex开始下一个未被标记上的bit位置
public int nextClearBit(int fromIndex) {
    // Neither spec nor implementation handle bitsets of maximal length.
    // See 4816253.
    if (fromIndex < 0)
        throw new IndexOutOfBoundsException("fromIndex < 0: " + fromIndex);

    checkInvariants();

    int u = wordIndex(fromIndex);
    if (u >= wordsInUse)
        return fromIndex;

    long word = ~words[u] & (WORD_MASK << fromIndex);

    while (true) {
        if (word != 0)
            return (u * BITS_PER_WORD) + Long.numberOfTrailingZeros(word);
        if (++u == wordsInUse)
            return wordsInUse * BITS_PER_WORD;
        word = ~words[u];
    }
}

// 从fromIndex开始上一个被标记上的bit位置
public int previousSetBit(int fromIndex) {
    if (fromIndex < 0) {
        if (fromIndex == -1)
            return -1;
        throw new IndexOutOfBoundsException(
            "fromIndex < -1: " + fromIndex);
    }
    
    checkInvariants();
    
    int u = wordIndex(fromIndex);
    if (u >= wordsInUse)
        return length() - 1;
    
    long word = words[u] & (WORD_MASK >>> -(fromIndex+1));
    
    while (true) {
        if (word != 0)
            return (u+1) * BITS_PER_WORD - 1 - Long.numberOfLeadingZeros(word);
        if (u-- == 0)
            return -1;
        word = words[u];
    }
}

// 从fromIndex开始上一个未被标记上的bit位置
public int previousClearBit(int fromIndex) {
    if (fromIndex < 0) {
        if (fromIndex == -1)
            return -1;
        throw new IndexOutOfBoundsException(
            "fromIndex < -1: " + fromIndex);
    }

    checkInvariants();

    int u = wordIndex(fromIndex);
    if (u >= wordsInUse)
        return fromIndex;

    long word = ~words[u] & (WORD_MASK >>> -(fromIndex+1));

    while (true) {
        if (word != 0)
            return (u+1) * BITS_PER_WORD -1 - Long.numberOfLeadingZeros(word);
        if (u-- == 0)
            return -1;
        word = ~words[u];
    }
}

逻辑运算

传入BitSet进行整体的与,或,异或操作

public void and(BitSet set) {
    if (this == set)
        return;

    // 与传入数据不对齐部分直接置为0
    while (wordsInUse > set.wordsInUse)
        words[--wordsInUse] = 0;

    // Perform logical AND on words in common
    for (int i = 0; i < wordsInUse; i++)
        // 重合部分进行与操作
        words[i] &= set.words[i];

    recalculateWordsInUse();
    checkInvariants();
}

public void or(BitSet set) {
    if (this == set)
        return;

    int wordsInCommon = Math.min(wordsInUse, set.wordsInUse);

    if (wordsInUse < set.wordsInUse) {
        ensureCapacity(set.wordsInUse);
        wordsInUse = set.wordsInUse;
    }

    // Perform logical OR on words in common
    for (int i = 0; i < wordsInCommon; i++)
        words[i] |= set.words[i];

    // Copy any remaining words
    if (wordsInCommon < set.wordsInUse)
        System.arraycopy(set.words, wordsInCommon,
                         words, wordsInCommon,
                         wordsInUse - wordsInCommon);

    // recalculateWordsInUse() is unnecessary
    checkInvariants();
}


public void xor(BitSet set) {
    int wordsInCommon = Math.min(wordsInUse, set.wordsInUse);

    if (wordsInUse < set.wordsInUse) {
        ensureCapacity(set.wordsInUse);
        wordsInUse = set.wordsInUse;
    }

    // Perform logical XOR on words in common
    for (int i = 0; i < wordsInCommon; i++)
        words[i] ^= set.words[i];

    // Copy any remaining words
    if (wordsInCommon < set.wordsInUse)
        System.arraycopy(set.words, wordsInCommon,
                         words, wordsInCommon,
                         set.wordsInUse - wordsInCommon);

    recalculateWordsInUse();
    checkInvariants();
}

统计

统计分组下就是一些描述当前信息的方法,较为简单

public int length() {
    if (wordsInUse == 0)
        return 0;

    // 64 * 在使用部分 + (64 - 前置零个数)
    return BITS_PER_WORD * (wordsInUse - 1) +
        (BITS_PER_WORD - Long.numberOfLeadingZeros(words[wordsInUse - 1]));
}

public boolean isEmpty() {
    return wordsInUse == 0;
}

// 判定目标set中的bit位是否全部包含在当前的bit集合中
public boolean intersects(BitSet set) {
    for (int i = Math.min(wordsInUse, set.wordsInUse) - 1; i >= 0; i--)
        if ((words[i] & set.words[i]) != 0)
            return true;
    return false;
}

// 获得当前数据内被标记的总数
public int cardinality() {
    int sum = 0;
    for (int i = 0; i < wordsInUse; i++)
        sum += Long.bitCount(words[i]);
    return sum;
}

总结

在set场景下,且数据量比较大的时候可以使用bit来存储数据是否存在,BitSet就是对这一行为的包装,其内层使用long数组来存储数据,并提供get,set方法支持针对比特位的赋值,除此之外也提供比特位相关的批量操作。

  • 25
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值