前言
本文我们来介绍javac中用到的位图–Bits.本文围绕着位运算的基础知识和Bits源码来讲解
基础知识
所有比特的编号方法是:从低字节的低位比特位开始,第一个bit为0,最后一个bit为 n-1。
比如,给出一个数组:int[] array = new int[4]。那么:
a[0] – a[4] 的比特位分别为:0–31,32–63,64–95,96–127
下面我们依据一个程序探究数组比特位的编号:
public class BitNumber {
public static void main(String[] args) {
int[] array = new int[4];
for (int i = 0; i < array.length; i++) {
array[i] = 16;
}
for (int i = 0; i < array.length; i++) {
array[i] = array[i] >> 4;
System.out.println(array[i]);
}
}
}
结果是输出了4个1,也就是说刚开始比特位编排为:0000 0000 0001 0000,使用位运算,使其右移了4位,变为:0000 0000 0000 0001.
位运算技巧
m & n 相当于mod操作 m mod n 运算
m >> n 相当于 i / (2 ^ n)
举例:
一个int是4个字节,每个字节有32bit,我们可以将数据存储在这些位内。比如我们要存储100这个数,我们只需在位置100存储一个1。将第100位置为1,也就是说最少需要有100个位置,每个位置1bit,100个位置需要12.5字节(100/8 = 12.5),因为一个int型是4字节,所以我们需要定义一个数组 int[4]。(12.5/4 + 1 = 4)
现在我们要对这个数组的100位进行操作,首先要知道100在这个数组中的第几个元素,每个数组元素都是32位,那么100所在的位置就是100/32,也就是 100>>5。然后在元素中的位置也就是:100%32,也就是100&31,也就是100&0x1F.
Bits源码解析
Bits是有状态的,其状态的转换如下:
为此,定义了一个枚举,如下:
protected enum BitsState {
// 如果显示重置了,那么当前就处于UNKNOW状态,当调用reset方法时其状态会变为UNKNOWN
// UNKNOWN状态下的实例赋值到另一个Bits实例后会到达NORMAL状态
UNKNOWN,
// Bits默认构造器中使用的状态
UNINIT,
// 在从现有实例创建Bits实例之后,或者在对UNINIT或NORMAL状态上的实例应用任何操作之后,达到NORMAL状态。从这个状态,位实例可以通过调用RESET方法传递到UNKNOWN状态
NORMAL;
static BitsState getState(int[] someBits, boolean reset) {
if (reset) {
return UNKNOWN;
} else {
if (someBits != unassignedBits) {
return NORMAL;
} else {
return UNINIT;
}
}
}
}
该类中的字段如下:
private final static int wordlen = 32; // 一个int是有32个bit
private final static int wordshift = 5; // 32 是2的5次方
private final static int wordmask = wordlen - 1; // 掩码
public int[] bits = null; // 这个就是保存int的数组了
private static final int[] unassignedBits = new int[0];
protected BitsState currentState; // 状态值
该类的构造函数如下:
public Bits() {
this(false); // 此时bits = unassignedBits,currentState = UNINIT
}
public Bits(Bits someBits) {
this(someBits.dup().bits, BitsState.getState(someBits.bits, false));
// 此时bits = someBits,如果someBits不等于unassignedBits,则currentState = NORMAL,否则为UNINIT
}
public Bits(boolean reset) {
this(unassignedBits, BitsState.getState(unassignedBits, reset));
// 如果reset 等于true,则currentState = UNKNOWN,bits = null,否则currentState = UNINIT,bits = unassignedBits
}
/** Construct a set consisting initially of given bit vector.
*/
protected Bits(int[] bits, BitsState initState) {
this.bits = bits;
this.currentState = initState;
switch (initState) {
case UNKNOWN:
this.bits = null;
break;
case NORMAL:
Assert.check(bits != unassignedBits);
break;
}
}
接下来是操作函数:
-
扩容:
protected void sizeTo(int len) { if (bits.length < len) { bits = Arrays.copyOf(bits, len); } }
-
清空:
public void clear() { Assert.check(currentState != BitsState.UNKNOWN); for (int i = 0; i < bits.length; i++) { bits[i] = 0; } currentState = BitsState.NORMAL; }
-
重置:
public void reset() { internalReset(); } protected void internalReset() { bits = null; currentState = BitsState.UNKNOWN; }
-
判断当前是否重置:
public boolean isReset() { return currentState == BitsState.UNKNOWN; }
-
赋值:
public Bits assign(Bits someBits) { bits = someBits.dup().bits; currentState = BitsState.NORMAL; return this; }
-
复制(copy):
public Bits dup() { Assert.check(currentState != BitsState.UNKNOWN); Bits tmp = new Bits(); tmp.bits = dupBits(); currentState = BitsState.NORMAL; return tmp; } protected int[] dupBits() { int [] result; if (currentState != BitsState.NORMAL) { result = bits; } else { result = new int[bits.length]; System.arraycopy(bits, 0, result, 0, bits.length); } return result; }
接下来是几个位图操作函数:
-
将指定的int值加入到位图中:
public void incl(int x) { Assert.check(currentState != BitsState.UNKNOWN); Assert.check(x >= 0, "Value of x " + x); // 指定的int值应该是大于等于0的,不能是负数 sizeTo((x >>> wordshift) + 1); // 进行位图的扩容 bits[x >>> wordshift] = bits[x >>> wordshift] | (1 << (x & wordmask)); // 加入到位图中 currentState = BitsState.NORMAL; }
-
将指定范围的int值加入到位图中,注意:该区间为左闭右开–> [start,limit):
public void inclRange(int start, int limit) { Assert.check(currentState != BitsState.UNKNOWN); sizeTo((limit >>> wordshift) + 1); // 扩容 for (int x = start; x < limit; x++) { bits[x >>> wordshift] = bits[x >>> wordshift] | (1 << (x & wordmask)); } currentState = BitsState.NORMAL; }
-
将[start,end]返回的int值从位图中删除的操作:
public void excludeFrom(int start) { Assert.check(currentState != BitsState.UNKNOWN); Bits temp = new Bits(); temp.sizeTo(bits.length); temp.inclRange(0, start); // 创建一个临时Bits,其包含[0,strart]范围的int值 internalAndSet(temp); // 当前的Bits与临时的Bits进行&运算,从而将[start,end]返回的int值从位图中删除 currentState = BitsState.NORMAL; }
-
将指定x从位图中删除的操作:
public void excl(int x) { Assert.check(currentState != BitsState.UNKNOWN); Assert.check(x >= 0); sizeTo((x >>> wordshift) + 1); bits[x >>> wordshift] = bits[x >>> wordshift] & ~(1 << (x & wordmask)); currentState = BitsState.NORMAL; }
-
判断指定的int值是否在位图中:
public boolean isMember(int x) { Assert.check(currentState != BitsState.UNKNOWN); return 0 <= x && x < (bits.length << wordshift) && (bits[x >>> wordshift] & (1 << (x & wordmask))) != 0; }
-
与操作:
public Bits andSet(Bits xs) { Assert.check(currentState != BitsState.UNKNOWN); internalAndSet(xs); currentState = BitsState.NORMAL; return this; } protected void internalAndSet(Bits xs) { Assert.check(currentState != BitsState.UNKNOWN); sizeTo(xs.bits.length); for (int i = 0; i < xs.bits.length; i++) { bits[i] = bits[i] & xs.bits[i]; } }
-
或操作:
public Bits orSet(Bits xs) { Assert.check(currentState != BitsState.UNKNOWN); sizeTo(xs.bits.length); for (int i = 0; i < xs.bits.length; i++) { bits[i] = bits[i] | xs.bits[i]; } currentState = BitsState.NORMAL; return this; }
-
异或操作:
public Bits xorSet(Bits xs) { Assert.check(currentState != BitsState.UNKNOWN); sizeTo(xs.bits.length); for (int i = 0; i < xs.bits.length; i++) { bits[i] = bits[i] ^ xs.bits[i]; } currentState = BitsState.NORMAL; return this; }
-
计算当前bits与指定bits的差值:
public Bits diffSet(Bits xs) { Assert.check(currentState != BitsState.UNKNOWN); for (int i = 0; i < bits.length; i++) { if (i < xs.bits.length) { bits[i] = bits[i] & ~xs.bits[i]; } } currentState = BitsState.NORMAL; return this; }
-
获得在位图中下一个值:
public int nextBit(int x) { Assert.check(currentState != BitsState.UNKNOWN); int windex = x >>> wordshift; if (windex >= bits.length) { return -1; } int word = bits[windex] & ~((1 << (x & wordmask))-1); while (true) { if (word != 0) { return (windex << wordshift) + trailingZeroBits(word); } windex++; if (windex >= bits.length) { return -1; } word = bits[windex]; } } // 该算法是Hacker's Delight 中的 private static int trailingZeroBits(int x) { Assert.check(wordlen == 32); if (x == 0) { return 32; } int n = 1; if ((x & 0xffff) == 0) { n += 16; x >>>= 16; } if ((x & 0x00ff) == 0) { n += 8; x >>>= 8; } if ((x & 0x000f) == 0) { n += 4; x >>>= 4; } if ((x & 0x0003) == 0) { n += 2; x >>>= 2; } return n - (x&1); }
该类的使用案例如下:
public static void main(String[] args) {
java.util.Random r = new java.util.Random();
Bits bits = new Bits();
for (int i=0; i<125; i++) {
int k;
do {
k = r.nextInt(250);
} while (bits.isMember(k));
System.out.println("adding " + k);
bits.incl(k);
}
int count = 0;
for (int i = bits.nextBit(0); i >= 0; i = bits.nextBit(i+1)) {
System.out.println("found " + i);
count ++;
}
if (count != 125) {
throw new Error();
}
}