phoenix local index的使用和join table的原理

phoenix local index的使用和join table的原理

下面分析一下索引的协处理器的代码流程

UngroupedAggregateRegionObserver.doPostScannerOpen

//拿出在scan属性中的数据
    byte[] localIndexBytes = scan.getAttribute(LOCAL_INDEX_BUILD);
    List<IndexMaintainer> indexMaintainers = localIndexBytes == null ? null : IndexMaintainer.deserialize(localIndexBytes);
    List<Mutation> indexMutations = localIndexBytes == null ? Collections.<Mutation>emptyList() : Lists.<Mutation>newArrayListWithExpectedSize(1024);

    RegionScanner theScanner = s;

    byte[] indexUUID = scan.getAttribute(PhoenixIndexCodec.INDEX_UUID);
    List<Expression> selectExpressions = null;
    byte[] upsertSelectTable = scan.getAttribute(BaseScannerRegionObserver.UPSERT_SELECT_TABLE);
    boolean isUpsert = false;
    boolean isDelete = false;
    byte[] deleteCQ = null;
    byte[] deleteCF = null;
    byte[] emptyCF = null;
    ImmutableBytesWritable ptr = new ImmutableBytesWritable();
    if (upsertSelectTable != null) {
        isUpsert = true;
        projectedTable = deserializeTable(upsertSelectTable);
        selectExpressions = deserializeExpressions(scan.getAttribute(BaseScannerRegionObserver.UPSERT_SELECT_EXPRS));
        values = new byte[projectedTable.getPKColumns().size()][];
    } else {
        byte[] isDeleteAgg = scan.getAttribute(BaseScannerRegionObserver.DELETE_AGG);
        isDelete = isDeleteAgg != null && Bytes.compareTo(PDataType.TRUE_BYTES, isDeleteAgg) == 0;
        if (!isDelete) {
            deleteCF = scan.getAttribute(BaseScannerRegionObserver.DELETE_CF);
            deleteCQ = scan.getAttribute(BaseScannerRegionObserver.DELETE_CQ);
        }
        emptyCF = scan.getAttribute(BaseScannerRegionObserver.EMPTY_CF);
    }
    TupleProjector tupleProjector = null;
    Region dataRegion = null;
    byte[][] viewConstants = null;
    ColumnReference[] dataColumns = IndexUtil.deserializeDataTableColumnsToJoin(scan);
    boolean localIndexScan = ScanUtil.isLocalIndex(scan);
    final TupleProjector p = TupleProjector.deserializeProjectorFromScan(scan);
    final HashJoinInfo j = HashJoinInfo.deserializeHashJoinFromScan(scan);
    //判断是否是本地索引
    if ((localIndexScan && !isDelete && !isDescRowKeyOrderUpgrade) || (j == null && p != null)) {
        if (dataColumns != null) {
            tupleProjector = IndexUtil.getTupleProjector(scan, dataColumns);
            dataRegion = IndexUtil.getDataRegion(env);
            viewConstants = IndexUtil.deserializeViewConstantsFromScan(scan);
        }
        ImmutableBytesWritable tempPtr = new ImmutableBytesWritable();
        //组建Scan对象
        theScanner =
                getWrappedScanner(c, theScanner, offset, scan, dataColumns, tupleProjector, 
                        dataRegion, indexMaintainers == null ? null : indexMaintainers.get(0), viewConstants, p, tempPtr);
    } 

    if (j != null)  {
        theScanner = new HashJoinRegionScanner(theScanner, p, j, ScanUtil.getTenantId(scan), env);
    }

从上面的方法中,可以看到,判断是否是本地索引,然后进行组建针对本地索引的scan对象
下面到 getWrappedScanner 方法中进行分析

protected RegionScanner getWrappedScanner(final ObserverContext<RegionCoprocessorEnvironment> c,
        final RegionScanner s, final Set<KeyValueColumnExpression> arrayKVRefs,
        final Expression[] arrayFuncRefs, final int offset, final Scan scan,
        final ColumnReference[] dataColumns, final TupleProjector tupleProjector,
        final Region dataRegion, final IndexMaintainer indexMaintainer,
        Transaction tx, 
        final byte[][] viewConstants, final KeyValueSchema kvSchema,
        final ValueBitSet kvSchemaBitSet, final TupleProjector projector,
        final ImmutableBytesWritable ptr) {
    return new RegionScanner() {

        @Override
        public boolean next(List<Cell> results) throws IOException {
            try {
                return s.next(results);
            } catch (Throwable t) {
                ServerUtil.throwIOException(c.getEnvironment().getRegion().getRegionInfo().getRegionNameAsString(), t);
                return false; // impossible
            }
        }

        @Override
        public boolean next(List<Cell> result, ScannerContext scannerContext) throws IOException {
            try {
                return s.next(result, scannerContext);
            } catch (Throwable t) {
                ServerUtil.throwIOException(c.getEnvironment().getRegion().getRegionInfo().getRegionNameAsString(), t);
                return false; // impossible
            }
        }

        @Override
        public void close() throws IOException {
            s.close();
        }

        @Override
        public HRegionInfo getRegionInfo() {
            return s.getRegionInfo();
        }

        @Override
        public boolean isFilterDone() throws IOException {
            return s.isFilterDone();
        }

        @Override
        public boolean reseek(byte[] row) throws IOException {
            return s.reseek(row);
        }

        @Override
        public long getMvccReadPoint() {
            return s.getMvccReadPoint();
        }

        @Override
        public boolean nextRaw(List<Cell> result) throws IOException {
            try {
                boolean next = s.nextRaw(result);
                Cell arrayElementCell = null;
                if (result.size() == 0) {
                    return next;
                }
                if (arrayFuncRefs != null && arrayFuncRefs.length > 0 && arrayKVRefs.size() > 0) {
                    int arrayElementCellPosition = replaceArrayIndexElement(arrayKVRefs, arrayFuncRefs, result);
                    arrayElementCell = result.get(arrayElementCellPosition);
                }
                //判断是否是本地索引
                if (ScanUtil.isLocalIndex(scan) && !ScanUtil.isAnalyzeTable(scan)) {
                    //把索引对象数据转成物理数据主键
                    IndexUtil.wrapResultUsingOffset(c, result, offset, dataColumns,
                        tupleProjector, dataRegion, indexMaintainer, viewConstants, ptr);
                }
                if (projector != null) {
                    Tuple tuple = projector.projectResults(new ResultTuple(Result.create(result)));
                    result.clear();
                    result.add(tuple.getValue(0));
                    if(arrayElementCell != null)
                        result.add(arrayElementCell);
                }
                // There is a scanattribute set to retrieve the specific array element
                return next;
            } catch (Throwable t) {
                ServerUtil.throwIOException(c.getEnvironment().getRegion().getRegionInfo().getRegionNameAsString(), t);
                return false; // impossible
            }
        }

        @Override
        public boolean nextRaw(List<Cell> result, ScannerContext scannerContext)
            throws IOException {
          try {
            boolean next = s.nextRaw(result, scannerContext);
            Cell arrayElementCell = null;
            if (result.size() == 0) {
                return next;
            }
            if (arrayFuncRefs != null && arrayFuncRefs.length > 0 && arrayKVRefs.size() > 0) {
                int arrayElementCellPosition = replaceArrayIndexElement(arrayKVRefs, arrayFuncRefs, result);
                arrayElementCell = result.get(arrayElementCellPosition);
            }   
            //判断是否是本地索引
            if ((offset > 0 || ScanUtil.isLocalIndex(scan))  && !ScanUtil.isAnalyzeTable(scan)) {
                //把索引对象数据转成物理数据主键
                IndexUtil.wrapResultUsingOffset(c, result, offset, dataColumns,
                    tupleProjector, dataRegion, indexMaintainer, viewConstants, ptr);
            }
            if (projector != null) {
                Tuple tuple = projector.projectResults(new ResultTuple(Result.create(result)));
                result.clear();
                result.add(tuple.getValue(0));
                if(arrayElementCell != null)
                    result.add(arrayElementCell);
            }
            // There is a scanattribute set to retrieve the specific array element
            return next;
          } catch (Throwable t) {
            ServerUtil.throwIOException(c.getEnvironment().getRegion().getRegionInfo().getRegionNameAsString(), t);
            return false; // impossible
          }
        }

        private int replaceArrayIndexElement(final Set<KeyValueColumnExpression> arrayKVRefs,
                final Expression[] arrayFuncRefs, List<Cell> result) {
            // make a copy of the results array here, as we're modifying it below
            MultiKeyValueTuple tuple = new MultiKeyValueTuple(ImmutableList.copyOf(result));
            // The size of both the arrays would be same?
            // Using KeyValueSchema to set and retrieve the value
            // collect the first kv to get the row
            Cell rowKv = result.get(0);
            for (KeyValueColumnExpression kvExp : arrayKVRefs) {
                if (kvExp.evaluate(tuple, ptr)) {
                    for (int idx = tuple.size() - 1; idx >= 0; idx--) {
                        Cell kv = tuple.getValue(idx);
                        if (Bytes.equals(kvExp.getColumnFamily(), 0, kvExp.getColumnFamily().length,
                                kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength())
                            && Bytes.equals(kvExp.getColumnName(), 0, kvExp.getColumnName().length,
                                    kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength())) {
                            // remove the kv that has the full array values.
                            result.remove(idx);
                            break;
                        }
                    }
                }
            }
            byte[] value = kvSchema.toBytes(tuple, arrayFuncRefs,
                    kvSchemaBitSet, ptr);
            // Add a dummy kv with the exact value of the array index
            result.add(new KeyValue(rowKv.getRowArray(), rowKv.getRowOffset(), rowKv.getRowLength(),
                    QueryConstants.ARRAY_VALUE_COLUMN_FAMILY, 0, QueryConstants.ARRAY_VALUE_COLUMN_FAMILY.length,
                    QueryConstants.ARRAY_VALUE_COLUMN_QUALIFIER, 0,
                    QueryConstants.ARRAY_VALUE_COLUMN_QUALIFIER.length, HConstants.LATEST_TIMESTAMP,
                    Type.codeToType(rowKv.getTypeByte()), value, 0, value.length));
            return result.size() - 1;
        }

        @Override
        public long getMaxResultSize() {
            return s.getMaxResultSize();
        }

        @Override
        public int getBatch() {
            return s.getBatch();
        }
    };
}

上面方法中明显组建了 RegionScanner 对象,里面就是一个iterator的对象,然后在拿取下一条数据的过程上
判断是否是本地索引,如果是就通过本地索引的主键到拿物理数据的主键进行查询数据

然后就调用到索引到数据的核心转换方法当中
IndexUtil.wrapResultUsingOffset

public static void wrapResultUsingOffset(final ObserverContext<RegionCoprocessorEnvironment> c,
        List<Cell> result, final int offset, ColumnReference[] dataColumns,
        TupleProjector tupleProjector, Region dataRegion, IndexMaintainer indexMaintainer,
        byte[][] viewConstants, ImmutableBytesWritable ptr) throws IOException {
    if (tupleProjector != null) {
        // Join back to data table here by issuing a local get projecting
        // all of the cq:cf from the KeyValueColumnExpression into the Get.
        Cell firstCell = result.get(0);
        byte[] indexRowKey = firstCell.getRowArray();//拿到索引的rowkey
        ptr.set(indexRowKey, firstCell.getRowOffset() + offset, firstCell.getRowLength() - offset);
        //对索引表集群 中进行查询
        byte[] dataRowKey = indexMaintainer.buildDataRowKey(ptr, viewConstants);
        Get get = new Get(dataRowKey);
        for (int i = 0; i < dataColumns.length; i++) {
            get.addColumn(dataColumns[i].getFamily(), dataColumns[i].getQualifier());
        }
        Result joinResult = null;
        if (dataRegion != null) {
            joinResult = dataRegion.get(get);
        } else {
            TableName dataTable =
                    TableName.valueOf(MetaDataUtil.getUserTableName(c.getEnvironment()
                            .getRegion().getTableDesc().getNameAsString()));
            HTableInterface table = null;
            try {
                table = c.getEnvironment().getTable(dataTable);
                joinResult = table.get(get);
            } finally {
                if (table != null) table.close();
            }
        }

从上面的源码可以看到, indexRowKey 就是一 个索引的主键,然后通过 indexMaintainer.buildDataRowKey 方法的调用
转到了dataRowKey,然后又到数据物理表进行查询数据,上面是一个公共的方法。

可以看到原先的方法中还有一个对象 HashJoinRegionScanner ,里面就是对表进行关联用的

public HashJoinRegionScanner(RegionScanner scanner, TupleProjector projector, HashJoinInfo joinInfo, ImmutableBytesWritable tenantId, RegionCoprocessorEnvironment env) throws IOException {
    this.env = env;
    this.scanner = scanner;//原生的scan对象 
    this.projector = projector;
    this.joinInfo = joinInfo; //关联的相关信息
    this.resultQueue = new LinkedList<Tuple>();
    this.hasMore = true;
    this.count = 0;
    this.limit = Long.MAX_VALUE;
    for (JoinType type : joinInfo.getJoinTypes()) {
        if (type != JoinType.Inner && type != JoinType.Left && type != JoinType.Semi && type != JoinType.Anti)
            throw new DoNotRetryIOException("Got join type '" + type + "'. Expect only INNER or LEFT with hash-joins.");
    }
    if (joinInfo.getLimit() != null) {
        this.limit = joinInfo.getLimit();
    }
    //关联的字段信息
    int count = joinInfo.getJoinIds().length;
    this.tempTuples = new List[count];
    this.hashCaches = new HashCache[count];
    this.tempSrcBitSet = new ValueBitSet[count];
    TenantCache cache = GlobalCache.getTenantCache(env, tenantId);
    for (int i = 0; i < count; i++) {
        ImmutableBytesPtr joinId = joinInfo.getJoinIds()[i];
        if (joinId.getLength() == 0) { // semi-join optimized into skip-scan
            hashCaches[i] = null;
            tempSrcBitSet[i] = null;
            tempTuples[i] = null;
            continue;
        }
        HashCache hashCache = (HashCache)cache.getServerCache(joinId);
        if (hashCache == null)
            throw new DoNotRetryIOException("Could not find hash cache for joinId: "
                    + Bytes.toString(joinId.get(), joinId.getOffset(), joinId.getLength())
                    + ". The cache might have expired and have been removed.");
        hashCaches[i] = hashCache;
        tempSrcBitSet[i] = ValueBitSet.newInstance(joinInfo.getSchemas()[i]);
    }
    if (this.projector != null) {
        this.tempDestBitSet = ValueBitSet.newInstance(joinInfo.getJoinedSchema());
        this.projector.setValueBitSet(tempDestBitSet);
    }
}

上面就是创建join的对象,里面有关联查询的相关信息,如果关联方式,关联字段等。

public boolean nextRaw(List<Cell> result) throws IOException {
    try {
        while (shouldAdvance()) {
            hasMore = scanner.nextRaw(result);
            processResults(result, false);
            result.clear();
        }

        return nextInQueue(result);
    } catch (Throwable t) {
        ServerUtil.throwIOException(env.getRegion().getRegionInfo().getRegionNameAsString(), t);
        return false; // impossible
    }
}

在 HashJoinRegionScanner 的上面的方法当中,拿出关联的下一条数据时,调用到了上面的方法

private void processResults(List<Cell> result, boolean hasBatchLimit) throws IOException {
    if (result.isEmpty())
        return;
    //传进来的tuple对象
    Tuple tuple = new ResultTuple(Result.create(result));
    // For backward compatibility. In new versions, HashJoinInfo.forceProjection()
    // always returns true.
    if (joinInfo.forceProjection()) {
        tuple = projector.projectResults(tuple);
    }

    // TODO: fix below Scanner.next() and Scanner.nextRaw() methods as well.
    if (hasBatchLimit)
        throw new UnsupportedOperationException("Cannot support join operations in scans with limit");

    int count = joinInfo.getJoinIds().length;
    boolean cont = true;
    for (int i = 0; i < count; i++) {
        if (!(joinInfo.earlyEvaluation()[i]) || hashCaches[i] == null)
            continue;
        ImmutableBytesPtr key = TupleUtil.getConcatenatedValue(tuple, joinInfo.getJoinExpressions()[i]);
        tempTuples[i] = hashCaches[i].get(key);
        JoinType type = joinInfo.getJoinTypes()[i];
        if (((type == JoinType.Inner || type == JoinType.Semi) && tempTuples[i] == null)
                || (type == JoinType.Anti && tempTuples[i] != null)) {
            cont = false;
            break;
        }
    }
    if (cont) {
        if (projector == null) {
            int dup = 1;
            for (int i = 0; i < count; i++) {
                dup *= (tempTuples[i] == null ? 1 : tempTuples[i].size());
            }
            for (int i = 0; i < dup; i++) {
                resultQueue.offer(tuple);
            }
        } else {
            KeyValueSchema schema = joinInfo.getJoinedSchema();
            if (!joinInfo.forceProjection()) { // backward compatibility
                tuple = projector.projectResults(tuple);
            }
            resultQueue.offer(tuple);
            for (int i = 0; i < count; i++) {
                boolean earlyEvaluation = joinInfo.earlyEvaluation()[i];
                JoinType type = joinInfo.getJoinTypes()[i];
                if (earlyEvaluation && (type == JoinType.Semi || type == JoinType.Anti))
                    continue;
                int j = resultQueue.size();
                while (j-- > 0) {
                    Tuple lhs = resultQueue.poll();
                    if (!earlyEvaluation) {
                        ImmutableBytesPtr key = TupleUtil.getConcatenatedValue(lhs, joinInfo.getJoinExpressions()[i]);
                        tempTuples[i] = hashCaches[i].get(key);
                        if (tempTuples[i] == null) {
                            if (type == JoinType.Inner || type == JoinType.Semi) {
                                continue;
                            } else if (type == JoinType.Anti) {
                                resultQueue.offer(lhs);
                                continue;
                            }
                        }
                    }
                    if (tempTuples[i] == null) {
                        Tuple joined = tempSrcBitSet[i] == ValueBitSet.EMPTY_VALUE_BITSET ?
                                lhs : TupleProjector.mergeProjectedValue(
                                        (ProjectedValueTuple) lhs, schema, tempDestBitSet,
                                        null, joinInfo.getSchemas()[i], tempSrcBitSet[i],
                                        joinInfo.getFieldPositions()[i]);
                        resultQueue.offer(joined);
                        continue;
                    }
                    for (Tuple t : tempTuples[i]) {
                        Tuple joined = tempSrcBitSet[i] == ValueBitSet.EMPTY_VALUE_BITSET ?
                                lhs : TupleProjector.mergeProjectedValue(
                                        (ProjectedValueTuple) lhs, schema, tempDestBitSet,
                                        t, joinInfo.getSchemas()[i], tempSrcBitSet[i],
                                        joinInfo.getFieldPositions()[i]);
                        resultQueue.offer(joined);
                    }
                }
            }
        }
        // apply post-join filter 执行过滤表达式
        Expression postFilter = joinInfo.getPostJoinFilterExpression();
        if (postFilter != null) {
            for (Iterator<Tuple> iter = resultQueue.iterator(); iter.hasNext();) {
                Tuple t = iter.next();
                postFilter.reset();
                ImmutableBytesWritable tempPtr = new ImmutableBytesWritable();
                try {
                    if (!postFilter.evaluate(t, tempPtr)) {
                        iter.remove();
                        continue;
                    }
                } catch (IllegalDataException e) {
                    iter.remove();
                    continue;
                }
                Boolean b = (Boolean)postFilter.getDataType().toObject(tempPtr);
                if (!b.booleanValue()) {
                    iter.remove();
                }
            }
        }
    }
}

在上面的方法中,进行数据的过滤判断,看是否和表达式相等。

总结:一个本地索引的使用,其实就是先去索引表进行查询,然后拿到主键后,再去物理数据表进行查询

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值