ClickHouse新版使用roaring64map(https://github.com/RoaringBitmap/CRoaring/blob/af9fafb72edcfb88f7adc781eaea4e7e95f68d01/cpp/roaring64map.hh)来支持64位Int计算,采用了和之前不同的序列化和反序列化方式(https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h)。
在SparkSQL & ClickHouse RoaringBitmap使用实践(https://blog.csdn.net/qq_27639777/article/details/111005838)文章中作者实现了Java兼容ClickHouse序列化和反序列化版本,包括32位和64位版本,经过我们测试,64位版本为ClickHouse老版本的序列化方式。
我们实现了ClickHouse新版本(>21.1版本)序列化方式,经验证可以被ClickHouse正确识别。
序列化方式和老版本类似,区别在于加上了highToBitmap的size。
def serialize(rb: Roaring64NavigableMap): ByteBuffer = {
// ck中rbm对小于32的基数进行了优化,使用smallset进行存放
if (rb.getLongCardinality <= 32) {
// the serialization structure of roaringbitmap in clickhouse: Byte(1), VarInt(SerializedSizeInBytes), ByteArray(RoaringBitmap)
// and long occupies 8 bytes
val bos1 = ByteBuffer.allocate(1 + 1 + 8 * rb.getIntCardinality)
val bos = if (bos1.order eq LITTLE_ENDIAN) bos1 else bos1.slice.order(LITTLE_ENDIAN)
bos.put(new Integer(0).toByte)
bos.put(rb.getIntCardinality.toByte)
rb.toArray.foreach(i => bos.putLong(i))
bos
} else {
// Roaring64NavigableMap serialize with prefix of "signedLongs" and "highToBitmap.size()"
// Refer to the implementation of the serialize method of Roaring64NavigableMap, remove the prefix bytes
val rbmPrefixBytes = 1 + 4
val serializedSizeInBytes = rb.serializedSizeInBytes().toInt
val rbTotalSize = serializedSizeInBytes - rbmPrefixBytes + 8
val varIntLen = VarInt.varLongSize(rbTotalSize)
// the serialization structure of roaringbitmap in clickhouse: Byte(1), VarInt(SerializedSizeInBytes), ByteArray(RoaringBitmap)
val bos1 = ByteBuffer.allocate(1 + varIntLen + rbTotalSize)
val bos = if (bos1.order eq LITTLE_ENDIAN) bos1 else bos1.slice.order(LITTLE_ENDIAN)
bos.put(new Integer(1).toByte)
VarInt.putVarInt(rbTotalSize, bos)
val baos = new ByteArrayOutputStream()
val highToBitmap = Roaring64NavigableMapHelper.getHighToBitmap(rb)
bos.putLong(highToBitmap.size())
rb.serialize(new DataOutputStream(baos))
bos.put(baos.toByteArray.slice(rbmPrefixBytes, serializedSizeInBytes))
bos
}
}
package org.roaringbitmap.longlong;
import org.roaringbitmap.BitmapDataProvider;
import java.util.NavigableMap;
public class Roaring64NavigableMapHelper {
public static NavigableMap<Integer, BitmapDataProvider> getHighToBitmap(Roaring64NavigableMap rb) {
return rb.getHighToBitmap();
}
}