死磕Java集合之BitSet源码分析(JDK18)
文章目录
简介
因为Java中没有new bit[]
这种直接创建一个bit数组的方式,所以Java提供了BitSet来实现位图,BitSet是采用一个long型的数组来实现位图的。
BitSet的首个long型数组表示的是[0, 63]
这64个元素。
继承体系
BitSet实现了Cloneable和Serializable
存储结构
BitSet通过long型的数组来实现位图,一个long型元素可以表示64个元素是否存在(第i位为1,表示64 * n + i已存在)。
源码解析
属性
/*
* BitSets are packed into arrays of "words." Currently a word is
* a long, which consists of 64 bits, requiring 6 address bits.
* The choice of word size is determined purely by performance concerns.
*/
private static final int ADDRESS_BITS_PER_WORD = 6;
// 1 << 6的大小是64
private static final int BITS_PER_WORD = 1 << ADDRESS_BITS_PER_WORD;
private static final int BIT_INDEX_MASK = BITS_PER_WORD - 1;
/* Used to shift left or right for a partial word mask */
private static final long WORD_MASK = 0xffffffffffffffffL;
/**
* @serialField bits long[]
*
* The bits in this BitSet. The ith bit is stored in bits[i/64] at
* bit position i % 64 (where bit position 0 refers to the least
* significant bit and 63 refers to the most significant bit).
*/
@java.io.Serial
private static final ObjectStreamField[] serialPersistentFields = {
new ObjectStreamField("bits", long[].class),
};
/**
* The internal field corresponding to the serialField "bits".
* BitSet的底层实现是使用long数组作为内部存储结构的,所以BitSet的大小为long类型大小(64位)的整数倍。
*/
private long[] words;
/**
* The number of words in the logical size of this BitSet.
*/
private transient int wordsInUse = 0;
/**
* Whether the size of "words" is user-specified. If so, we assume
* the user knows what he's doing and try harder to preserve it.
*/
private transient boolean sizeIsSticky = false;
/* use serialVersionUID from JDK 1.0.2 for interoperability */
@java.io.Serial
private static final long serialVersionUID = 7997698588986878753L;
构造方法
/**
* Creates a new bit set. All bits are initially {@code false}.
*/
public BitSet() {
// 默认初始化long[]的长度为1
initWords(BITS_PER_WORD);
sizeIsSticky = false;
}
/**
* Creates a bit set whose initial size is large enough to explicitly
* represent bits with indices in the range {@code 0} through
* {@code nbits-1}. All bits are initially {@code false}.
*
* @param nbits the initial size of the bit set
* @throws NegativeArraySizeException if the specified initial size
* is negative
*/
public BitSet(int nbits) {
// nbits can't be negative; size 0 is OK
if (nbits < 0)
throw new NegativeArraySizeException("nbits < 0: " + nbits);
initWords(nbits);
sizeIsSticky = true;
}
private void initWords(int nbits) {
words = new long[wordIndex(nbits-1) + 1];
}
/**
* Creates a bit set using words as the internal representation.
* The last word (if there is one) must be non-zero.
*/
private BitSet(long[] words) {
this.words = words;
this.wordsInUse = words.length;
checkInvariants();
}
// 相当于bitIndex / 64
private static int wordIndex(int bitIndex) {
return bitIndex >> ADDRESS_BITS_PER_WORD;
}
set(int bitIndex)
将值bitIndex加入BitSet,即索引为bitIndex的元素比特位置为1
public void set(int bitIndex) {
if (bitIndex < 0)
throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);
// 获取bitIndex所在long数组元素的索引
int wordIndex = wordIndex(bitIndex);
expandTo(wordIndex);
// 将对应bitIndex的比特位置为1
words[wordIndex] |= (1L << bitIndex); // Restores invariants
checkInvariants();
}
// 将long数组扩展到可以容纳索引wordIndex
private void expandTo(int wordIndex) {
// 索引从0开始,所以需要的数组长度为wordIndex+1
int wordsRequired = wordIndex+1;
if (wordsInUse < wordsRequired) {
ensureCapacity(wordsRequired);
wordsInUse = wordsRequired;
}
}
// 确保BitSet的容量足够
private void ensureCapacity(int wordsRequired) {
if (words.length < wordsRequired) {
// Allocate larger of doubled size or required size
int request = Math.max(2 * words.length, wordsRequired);
words = Arrays.copyOf(words, request);
sizeIsSticky = false;
}
}
// 新建一个newLength长度的数组,并将原数组的元素复制到新数组中,并返回新数组
public static long[] copyOf(long[] original, int newLength) {
long[] copy = new long[newLength];
System.arraycopy(original, 0, copy, 0,
Math.min(original.length, newLength));
return copy;
}
Java中的左移是循环位移,例如1 << 33
相当于1 << 1
值为2
set(int bitIndex, boolean value)
如果value为真,设置bitIndex位的值为1,否则则置为0
public void set(int bitIndex, boolean value) {
if (value)
set(bitIndex);
else
clear(bitIndex);
}
clear(int bitIndex)
将bitIndex位的值置为0
public void clear(int bitIndex) {
if (bitIndex < 0)
throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);
// 获取bitIndex所在的long数组的索引
int wordIndex = wordIndex(bitIndex);
if (wordIndex >= wordsInUse)
return;
// 1L << bitIndex找到对应的比特位,取反后再&,就将对应比特位置为0了
words[wordIndex] &= ~(1L << bitIndex);
recalculateWordsInUse();
checkInvariants();
}
// 重新计算wordsInUse
private void recalculateWordsInUse() {
// Traverse the bitset until a used word is found
int i;
for (i = wordsInUse-1; i >= 0; i--)
if (words[i] != 0)
break;
wordsInUse = i+1; // The new logical size
}
private void checkInvariants() {
assert(wordsInUse == 0 || words[wordsInUse - 1] != 0);
assert(wordsInUse >= 0 && wordsInUse <= words.length);
assert(wordsInUse == words.length || words[wordsInUse] == 0);
}
set(int fromIndex, int toIndex)
将[fromIndex, toIndex)的比特位置为1
public void set(int fromIndex, int toIndex) {
checkRange(fromIndex, toIndex);
if (fromIndex == toIndex)
return;
// Increase capacity if necessary
int startWordIndex = wordIndex(fromIndex);
int endWordIndex = wordIndex(toIndex - 1);
expandTo(endWordIndex);
long firstWordMask = WORD_MASK << fromIndex;
// 这里的WORD_MASK >>> -toIndex等价于WORD_MASK >>> 64-toIndex
long lastWordMask = WORD_MASK >>> -toIndex;
if (startWordIndex == endWordIndex) {
// Case 1: One word
// 第1种情况:fromIndex和toIndex都在同一个word中
words[startWordIndex] |= (firstWordMask & lastWordMask);
} else {
// Case 2: Multiple words
// Handle first word
words[startWordIndex] |= firstWordMask;
// Handle intermediate words, if any
for (int i = startWordIndex+1; i < endWordIndex; i++)
words[i] = WORD_MASK;
// Handle last word (restores invariants)
words[endWordIndex] |= lastWordMask;
}
checkInvariants();
}
- 检查参数是否合法;
- 分别获取fromIndex和toIndex所在的word的索引;
- 如果fromIndex和toIndex在同一个word中,则将[fromIndex, toIndex)的比特位置为1;
- 如果不在,则分三步处理:
- 处理第1个word;
- 处理第2个到最后1个之间的word,全置为1;
- 处理最后1个word。
clear(int fromIndex, int toIndex)
将[fromIndex, toIndex)的比特位置为0
public void clear(int fromIndex, int toIndex) {
checkRange(fromIndex, toIndex);
if (fromIndex == toIndex)
return;
int startWordIndex = wordIndex(fromIndex);
if (startWordIndex >= wordsInUse)
return;
int endWordIndex = wordIndex(toIndex - 1);
if (endWordIndex >= wordsInUse) {
toIndex = length();
endWordIndex = wordsInUse - 1;
}
long firstWordMask = WORD_MASK << fromIndex;
// 这里的WORD_MASK >>> -toIndex等价于WORD_MASK >>> 64-toIndex
long lastWordMask = WORD_MASK >>> -toIndex;
if (startWordIndex == endWordIndex) {
// Case 1: One word
// 这里与set(fromIndex,toIndex)类似,只是后面进行了取反操作
words[startWordIndex] &= ~(firstWordMask & lastWordMask);
} else {
// Case 2: Multiple words
// Handle first word
words[startWordIndex] &= ~firstWordMask;
// Handle intermediate words, if any
for (int i = startWordIndex+1; i < endWordIndex; i++)
words[i] = 0;
// Handle last word
words[endWordIndex] &= ~lastWordMask;
}
recalculateWordsInUse();
checkInvariants();
}
set(int fromIndex, int toIndex, boolean value)
如果value为真,将[fromIndex, toIndex)的比特位置为1,否则置为0
public void set(int fromIndex, int toIndex, boolean value) {
if (value)
set(fromIndex, toIndex);
else
clear(fromIndex, toIndex);
}
flip(int bitIndex)
翻转bitIndex索引上的比特值
public void flip(int bitIndex) {
if (bitIndex < 0)
throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);
int wordIndex = wordIndex(bitIndex);
expandTo(wordIndex);
// 对bitIndex上的比特值做异或操作
words[wordIndex] ^= (1L << bitIndex);
recalculateWordsInUse();
checkInvariants();
}
flip(int fromIndex, int toIndex)
翻转[fromIndex, toIndex)的比特位
public void flip(int fromIndex, int toIndex) {
checkRange(fromIndex, toIndex);
if (fromIndex == toIndex)
return;
int startWordIndex = wordIndex(fromIndex);
int endWordIndex = wordIndex(toIndex - 1);
expandTo(endWordIndex);
long firstWordMask = WORD_MASK << fromIndex;
long lastWordMask = WORD_MASK >>> -toIndex;
if (startWordIndex == endWordIndex) {
// Case 1: One word
words[startWordIndex] ^= (firstWordMask & lastWordMask);
} else {
// Case 2: Multiple words
// Handle first word
words[startWordIndex] ^= firstWordMask;
// Handle intermediate words, if any
for (int i = startWordIndex+1; i < endWordIndex; i++)
words[i] ^= WORD_MASK;
// Handle last word
words[endWordIndex] ^= lastWordMask;
}
recalculateWordsInUse();
checkInvariants();
}
cardinality()
计算整个BitSet中比特位为1的个数
public int cardinality() {
int sum = 0;
for (int i = 0; i < wordsInUse; i++)
sum += Long.bitCount(words[i]);
return sum;
}
@IntrinsicCandidate
public static int bitCount(long i) {
// HD, Figure 5-2
i = i - ((i >>> 1) & 0x5555555555555555L);
i = (i & 0x3333333333333333L) + ((i >>> 2) & 0x3333333333333333L);
i = (i + (i >>> 4)) & 0x0f0f0f0f0f0f0f0fL;
i = i + (i >>> 8);
i = i + (i >>> 16);
i = i + (i >>> 32);
return (int)i & 0x7f;
}
总结
BitSet中的实现充分利用了位运算,速度很快,因为是位图,所以占用空间也比较小。
下面是boolean数组和BitSet的空间占用对比
public class ClassLayoutTest {
public static void main(String[] args) {
boolean[] bits = new boolean[1024];
System.out.println(ClassLayout.parseInstance(bits).toPrintable());
BitSet bitSet = new BitSet(1024);
System.out.println(GraphLayout.parseInstance(bitSet).toPrintable());
}
}
[Z object internals:
OFFSET SIZE TYPE DESCRIPTION VALUE
0 4 (object header) 01 00 00 00 (00000001 00000000 00000000 00000000) (1)
4 4 (object header) 00 00 00 00 (00000000 00000000 00000000 00000000) (0)
8 4 (object header) 05 00 00 f8 (00000101 00000000 00000000 11111000) (-134217723)
12 4 (object header) 00 04 00 00 (00000000 00000100 00000000 00000000) (1024)
16 1024 boolean [Z.<elements> N/A
Instance size: 1040 bytes
Space losses: 0 bytes internal + 0 bytes external = 0 bytes total
java.util.BitSet@15615099d object externals:
ADDRESS SIZE TYPE PATH VALUE
716ccf4f8 24 java.util.BitSet (object)
716ccf510 144 [J .words [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
可以看出boolean数组,占用的空间是1040 bytes,BitSet占用的空间是24 + 144 = 168 bytes。