MVStore 的分析

最新推荐文章于 2023-08-08 10:32:38 发布

weixin_33709219

最新推荐文章于 2023-08-08 10:32:38 发布

阅读量321

点赞数

文章标签： python

原文链接：https://my.oschina.net/heatonn1/blog/188568

版权

2019独角兽企业重金招聘Python工程师标准>>>

MVStore的存储格式

File format:
header: (blockSize) bytes
header: (blockSize) bytes
[ chunk ] *

首先看到MVStore这个关键class，里面有很多注释，首先看看fileheader怎么写到文件

//在MVStore的构造函数里面，初始设置如下

storeHeader.put("H", "3");

//blockSize：4*1024

storeHeader.put("blockSize", "" + BLOCK_SIZE);

//format_write:1

storeHeader.put("format", "" + FORMAT_WRITE);

//create time

storeHeader.put("creationTime", "" + creationTime);

    private byte[] getStoreHeaderBytes() {

        StringBuilder buff = new StringBuilder();

//最后的mapId

        storeHeader.put("lastMapId", "" + lastMapId);

//最后的chunkId

        storeHeader.put("chunk", "" + lastChunkId);

//root chunk的起始位置

    private void writeStoreHeader() {

        byte[] bytes = getStoreHeaderBytes();

//分配2个blocksize空间

        ByteBuffer header = ByteBuffer.allocate(2 * BLOCK_SIZE);

//写入fileheader

        header.put(bytes);

//移到第二个blocksize起始处

        header.position(BLOCK_SIZE);

//再写一次fileheader（注释中说是为了安全）

        header.put(bytes);

//将position指向0

        header.rewind();

//将整两个块写入filechannel

        fileStore.writeFully(0, header);

//当前的版本

        storeHeader.put("version", "" + currentVersion);

//把map中的key-value对转成字符串存入stringbuilder，其中是按照key的字典序排列的，并且key中不能有：，如果value中有，或者”，会用"把这个value的字符串包起来，并且把字符串中的"前加\

        DataUtils.appendMap(buff, storeHeader);

//用UTF-8编码

        byte[] bytes = buff.toString().getBytes(DataUtils.UTF8);

//用fletcher算法计算效验和

        int checksum = DataUtils.getFletcher32(bytes, bytes.length / 2 * 2);

//把效验和数据也append到header中

        DataUtils.appendMap(buff, "fletcher", Integer.toHexString(checksum));

        bytes = buff.toString().getBytes(DataUtils.UTF8);

//如果header总长度大于blocksize（默认4096），抛异常

        if (bytes.length > BLOCK_SIZE) {

            throw DataUtils.newIllegalStateException(

                    DataUtils.ERROR_UNSUPPORTED_FORMAT,

                    "Store header too large: {0}", buff);

        return bytes;

//写两次fileheader，大小2*4096，但是内容可以小于4096

 private void writeStoreHeader() {

        byte[] bytes = getStoreHeaderBytes();

        ByteBuffer header = ByteBuffer.allocate(2 * BLOCK_SIZE);

        header.put(bytes);

        header.position(BLOCK_SIZE);

        header.put(bytes);

        header.rewind();

        fileStore.writeFully(0, header);

/**

     * Write to the file.

     * @param pos the write position

     * @param src the source buffer

*/

    public void writeFully(long pos, ByteBuffer src) {

        writeCount++;

//fileSize保存下来，不用每次调file.size

        fileSize = Math.max(fileSize, pos + src.remaining());

        DataUtils.writeFully(file, pos, src);

再来看看chunk的存储格式

* Chunks are page aligned (each page is usually 4096 bytes).
* There are at most 67 million (2^26) chunks,
* each chunk is at most 2 GB large.
* File format:
* 1 byte: 'c'
* 4 bytes: length
* 4 bytes: chunk id (an incrementing number)
* 4 bytes: pageCount
* 8 bytes: metaRootPos
* 8 bytes: maxLengthLive
* [ Page ] *

/**

     * Write the chunk header.

     * @param buff the target buffer

*/

    void writeHeader(WriteBuffer buff) {

        buff.put((byte) 'c').

            putInt(length).

            putInt(id).

            putInt(pageCount).

            putLong(metaRootPos).

            putLong(maxLength).

            putLong(maxLengthLive);

Page的存储格式

/**

     * Store the page and update the position.

     * @param chunk the chunk

     * @param buff the target buffer

*/

    private void write(Chunk chunk, WriteBuffer buff) {

//开始的位置

        int start = buff.position();

//存储的关键字的数量

        int len = keyCount;

//叶节点为0，非叶结点为1

        int type = children != null ? DataUtils.PAGE_TYPE_NODE

                : DataUtils.PAGE_TYPE_LEAF;

//首次写入长度，checkum，mapId，关键字的数量，节点类型，后面会更新长度和checksum

        buff.putInt(0).

            putShort((byte) 0).

            putVarInt(map.getId()).

            putVarInt(len).

            put((byte) type);

//获取当前位置，为后面压缩做准备

        int compressStart = buff.position();

//获取关键字的数据类型

        DataType keyType = map.getKeyType();

//根据类型，写入所有关键字

        for (int i = 0; i < len; i++) {

            keyType.write(buff, keys[i]);

//如果是非叶结点，写入所有子节点的pageid和count（还没看懂是干嘛的），如果是叶节点，根据值的数据类型，写入所有值

        if (type == DataUtils.PAGE_TYPE_NODE) {

            for (int i = 0; i <= len; i++) {

                buff.putLong(children[i]);

            for (int i = 0; i <= len; i++) {

                buff.putVarLong(counts[i]);

        } else {

            DataType valueType = map.getValueType();

            for (int i = 0; i < len; i++) {

                valueType.write(buff, values[i]);

//如果是压缩的，得到压缩器对象，把刚才写入的原始自己数组取出来，压缩后更新type为压缩，同时写入节省了多少字节，和压缩后的字节

        if (map.getStore().getCompress()) {

            Compressor compressor = map.getStore().getCompressor();

            int expLen = buff.position() - compressStart;

            byte[] exp = new byte[expLen];

            buff.position(compressStart).

                get(exp);

            byte[] comp = new byte[exp.length * 2];

            int compLen = compressor.compress(exp, exp.length, comp, 0);

//这个地方检查压缩程度没看懂

            if (compLen + DataUtils.getVarIntLen(compLen - expLen) < expLen) {

                buff.position(compressStart - 1).

                    put((byte) (type + DataUtils.PAGE_COMPRESSED)).

                    putVarInt(expLen - compLen).

                    put(comp, 0, compLen);

//获取pagelength

        int pageLength = buff.position() - start;

        int chunkId = chunk.id;

        int check = DataUtils.getCheckValue(chunkId)

                ^ DataUtils.getCheckValue(start)

                ^ DataUtils.getCheckValue(pageLength);

//更新pagelength和checkvalue

        buff.putInt(start, pageLength).

            putShort(start + 4, (short) check);

        if (pos != 0) {

            throw DataUtils.newIllegalStateException(

                    DataUtils.ERROR_INTERNAL, "Page already stored");

//获取pagepositon，没看懂

        pos = DataUtils.getPagePos(chunkId, start, pageLength, type);

        long max = DataUtils.getPageMaxLength(pos);

//更新chunk的四个字段

        chunk.maxLength += max;

        chunk.maxLengthLive += max;

        chunk.pageCount++;

        chunk.pageCountLive++;

转载于:https://my.oschina.net/heatonn1/blog/188568

weixin_33709219

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
MVStore 的分析

2019独角兽企业重金招聘Python工程师标准>>> ...
复制链接

扫一扫

MVStore 的分析

“相关推荐”对你有帮助么？