phoenix查询源码分析。
在之前的driver源码分析的基础上,现在分析一下UPSERT的流程
SET_ASYNC_CREATED_DATE =
"UPSERT INTO " + SYSTEM_CATALOG_SCHEMA + ".\"" + SYSTEM_CATALOG_TABLE + "
PreparedStatement setAsync = connection.prepareStatement(SET_ASYNC_CREATED_DATE);
setAsync.setString(1, tenantIdStr);
setAsync.setString(2, schemaName);
setAsync.setString(3, tableName);
setAsync.setDate(4, asyncCreatedDate);
setAsync.execute();
sql预编译生成
PhoenixPreparedStatement statement = new PhoenixPreparedStatement(this, sql);
执行 setAsync.execute()然后调用到 PhoenixStatement 类中的下面的方法
protected int executeMutation(final CompilableStatement stmt) throws SQLException {
if (connection.isReadOnly()) {
throw new SQLExceptionInfo.Builder(
SQLExceptionCode.READ_ONLY_CONNECTION).
build().buildException();
}
GLOBAL_MUTATION_SQL_COUNTER.increment();
try {
return CallRunner
.run(
new CallRunner.CallableThrowable<Integer, SQLException>() {
@Override
public Integer call() throws SQLException {
try {
MutationState state = connection.getMutationState();
MutationPlan plan = stmt.compilePlan(PhoenixStatement.this, Sequence.ValueOp.VALIDATE_SEQUENCE);
if (plan.getTargetRef() != null && plan.getTargetRef().getTable() != null && plan.getTargetRef().getTable().isTransactional()) {
state.startTransaction();
}
Iterator<TableRef> tableRefs = plan.getSourceRefs().iterator();
state.sendUncommitted(tableRefs);
state.checkpointIfNeccessary(plan);
MutationState lastState = plan.execute();
state.join(lastState);
if (connection.getAutoCommit()) {
connection.commit();;
}
setLastResultSet(null);
setLastQueryPlan(null);
// Unfortunately, JDBC uses an int for update count, so we
// just max out at Integer.MAX_VALUE
int lastUpdateCount = (int) Math.min(Integer.MAX_VALUE, lastState.getUpdateCount());
setLastUpdateCount(lastUpdateCount);
setLastUpdateOperation(stmt.getOperation());
connection.incrementStatementExecutionCounter();
return lastUpdateCount;
} catch (RuntimeException e) {
// FIXME: Expression.evaluate does not throw SQLException
// so this will unwrap throws from that.
if (e.getCause() instanceof SQLException) {
throw (SQLException) e.getCause();
}
throw e;
}
}
}, PhoenixContextExecutor.inContext(),
Tracing.withTracing(connection, this.toString()));
} catch (Exception e) {
Throwables.propagateIfInstanceOf(e, SQLException.class);
throw Throwables.propagate(e);
}
}
上面的stmt 就是 ExecutableUpsertStatement,执行了compilePlan
public MutationPlan compilePlan(PhoenixStatement stmt, Sequence.ValueOp seqAction) throws SQLException {
if(!getUdfParseNodes().isEmpty()) {
stmt.throwIfUnallowedUserDefinedFunctions(getUdfParseNodes());
}
UpsertCompiler compiler = new UpsertCompiler(stmt, this.getOperation());
MutationPlan plan = compiler.compile(this);
plan.getContext().getSequenceManager().validateSequences(seqAction);
return plan;
}
接着调用到 UpsertCompiler.compile ,返回一下 MutationPlan对象
然后执行 plan.execute();(在UpsertCompiler类当中) 就是调用下面的方法,返回一下 MutationState 对象
@Override
public MutationState execute() throws SQLException {
ImmutableBytesWritable ptr = context.getTempPtr();
final SequenceManager sequenceManager = context.getSequenceManager();
// Next evaluate all the expressions
int nodeIndex = nodeIndexOffset;
PTable table = tableRef.getTable();
Tuple tuple = sequenceManager.getSequenceCount() == 0 ? null :
sequenceManager.newSequenceTuple(null);
for (Expression constantExpression : constantExpressions) {
PColumn column = allColumns.get(columnIndexes[nodeIndex]);
constantExpression.evaluate(tuple, ptr);
Object value = null;
if (constantExpression.getDataType() != null) {
value = constantExpression.getDataType().toObject(ptr, constantExpression.getSortOrder(), constantExpression.getMaxLength(), constantExpression.getScale());
if (!constantExpression.getDataType().isCoercibleTo(column.getDataType(), value)) {
throw TypeMismatchException.newException(
constantExpression.getDataType(), column.getDataType(), "expression: "
+ constantExpression.toString() + " in column " + column);
}
if (!column.getDataType().isSizeCompatible(ptr, value, constantExpression.getDataType(),
constantExpression.getMaxLength(), constantExpression.getScale(),
column.getMaxLength(), column.getScale())) {
throw new SQLExceptionInfo.Builder(
SQLExceptionCode.DATA_EXCEEDS_MAX_CAPACITY).setColumnName(column.getName().getString())
.setMessage("value=" + constantExpression.toString()).build().buildException();
}
}
column.getDataType().coerceBytes(ptr, value, constantExpression.getDataType(),
constantExpression.getMaxLength(), constantExpression.getScale(), constantExpression.getSortOrder(),
column.getMaxLength(), column.getScale(),column.getSortOrder(),
table.rowKeyOrderOptimizable());
if (overlapViewColumns.contains(column) && Bytes.compareTo(ptr.get(), ptr.getOffset(), ptr.getLength(), column.getViewConstant(), 0, column.getViewConstant().length-1) != 0) {
throw new SQLExceptionInfo.Builder(
SQLExceptionCode.CANNOT_UPDATE_VIEW_COLUMN)
.setColumnName(column.getName().getString())
.setMessage("value=" + constantExpression.toString()).build().buildException();
}
values[nodeIndex] = ByteUtil.copyKeyBytesIfNecessary(ptr);
nodeIndex++;
}
// Add columns based on view
for (PColumn column : addViewColumns) {
if (IndexUtil.getViewConstantValue(column, ptr)) {
values[nodeIndex++] = ByteUtil.copyKeyBytesIfNecessary(ptr);
} else {
throw new IllegalStateException();
}
}
Map<ImmutableBytesPtr, RowMutationState> mutation = Maps.newHashMapWithExpectedSize(1);
setValues(values, pkSlotIndexes, columnIndexes, table, mutation, statement, useServerTimestamp);
return new MutationState(tableRef, mutation, 0, maxSize, connection);
}
在上面的setValues 方法中,就是进行数据的组装
private static void setValues(byte[][] values, int[] pkSlotIndex, int[] columnIndexes, PTable table, Map<ImmutableBytesPtr,RowMutationState> mutation, PhoenixStatement statement, boolean useServerTimestamp) {
Map<PColumn,byte[]> columnValues = Maps.newHashMapWithExpectedSize(columnIndexes.length);
byte[][] pkValues = new byte[table.getPKColumns().size()][];
// If the table uses salting, the first byte is the salting byte, set to an empty array
// here and we will fill in the byte later in PRowImpl.
if (table.getBucketNum() != null) {
pkValues[0] = new byte[] {0};
}
Long rowTimestamp = null; // case when the table doesn't have a row timestamp column
RowTimestampColInfo rowTsColInfo = new RowTimestampColInfo(useServerTimestamp, rowTimestamp);
for (int i = 0; i < values.length; i++) {
byte[] value = values[i];
PColumn column = table.getColumns().get(columnIndexes[i]);
if (SchemaUtil.isPKColumn(column)) {
pkValues[pkSlotIndex[i]] = value;
if (SchemaUtil.getPKPosition(table, column) == table.getRowTimestampColPos()) {
if (!useServerTimestamp) {
PColumn rowTimestampCol = table.getPKColumns().get(table.getRowTimestampColPos());
rowTimestamp = PLong.INSTANCE.getCodec().decodeLong(value, 0, rowTimestampCol.getSortOrder());
if (rowTimestamp < 0) {
throw new IllegalDataException("Value of a column designated as ROW_TIMESTAMP cannot be less than zero");
}
rowTsColInfo = new RowTimestampColInfo(useServerTimestamp, rowTimestamp);
}
}
} else {
columnValues.put(column, value);
}
}
ImmutableBytesPtr ptr = new ImmutableBytesPtr();
table.newKey(ptr, pkValues);
mutation.put(ptr, new RowMutationState(columnValues, statement.getConnection().getStatementExecutionCounter(), rowTsColInfo));
}
大家可能要对MutationState要注意一下,这个对象挺重要的,直接调用hbase.put 原生api的是这个State对象
在MetaDataClient 中,下面的代码,调用方法进行数据的写入的包装和逻辑判断
tableMetaData.addAll(connection.getMutationState().toMutations(timestamp).next().getSecond());
调用到MutationState 对象当中
private Iterator<Pair<byte[],List<Mutation>>> addRowMutations(final TableRef tableRef, final Map<ImmutableBytesPtr, RowMutationState> values, final long timestamp, boolean includeMutableIndexes, final boolean sendAll) {
final PTable table = tableRef.getTable();
final Iterator<PTable> indexes = // Only maintain tables with immutable rows through this client-side mechanism
(table.isImmutableRows() || includeMutableIndexes) ?
IndexMaintainer.nonDisabledIndexIterator(table.getIndexes().iterator()) :
Iterators.<PTable>emptyIterator();
final List<Mutation> mutationList = Lists.newArrayListWithExpectedSize(values.size());
final List<Mutation> mutationsPertainingToIndex = indexes.hasNext() ? Lists.<Mutation>newArrayListWithExpectedSize(values.size()) : null;
generateMutations(tableRef, timestamp, values, mutationList, mutationsPertainingToIndex);
return new Iterator<Pair<byte[],List<Mutation>>>() {
boolean isFirst = true;
@Override
public boolean hasNext() {
return isFirst || indexes.hasNext();
}
@Override
public Pair<byte[], List<Mutation>> next() {
if (isFirst) {
isFirst = false;
return new Pair<byte[],List<Mutation>>(table.getPhysicalName().getBytes(), mutationList);
}
PTable index = indexes.next();
List<Mutation> indexMutations;
try {
indexMutations =
IndexUtil.generateIndexData(table, index, mutationsPertainingToIndex,
connection.getKeyValueBuilder(), connection);
// we may also have to include delete mutations for immutable tables if we are not processing all the tables in the mutations map
if (!sendAll) {
TableRef key = new TableRef(index);
Map<ImmutableBytesPtr, RowMutationState> rowToColumnMap = mutations.remove(key);
if (rowToColumnMap!=null) {
final List<Mutation> deleteMutations = Lists.newArrayList();
generateMutations(tableRef, timestamp, rowToColumnMap, deleteMutations, null);
indexMutations.addAll(deleteMutations);
}
}
} catch (SQLException e) {
throw new IllegalDataException(e);
}
return new Pair<byte[],List<Mutation>>(index.getPhysicalName().getBytes(),indexMutations);
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
上面的方法就是进行生成 Mutations对象,把数据准备往hbase里面插入
当PhoenixConnection.commit 方法jdbc提交查询时,就会调用到如下的方法,就会调用到 MutationState.commit
.当 MutationState 对象进行commit的时候,调用了如下的方法,发送到hbase当中
private void send(Iterator<TableRef> tableRefIterator) throws SQLException {
int i = 0;
long[] serverTimeStamps = null;
boolean sendAll = false;
if (tableRefIterator == null) {
serverTimeStamps = validateAll();
tableRefIterator = mutations.keySet().iterator();
sendAll = true;
}
Map<ImmutableBytesPtr, RowMutationState> valuesMap;
List<TableRef> txTableRefs = Lists.newArrayListWithExpectedSize(mutations.size());
// add tracing for this operation
try (TraceScope trace = Tracing.startNewSpan(connection, "Committing mutations to tables")) {
Span span = trace.getSpan();
ImmutableBytesWritable indexMetaDataPtr = new ImmutableBytesWritable();
boolean isTransactional;
while (tableRefIterator.hasNext()) {
// at this point we are going through mutations for each table
final TableRef tableRef = tableRefIterator.next();
valuesMap = mutations.get(tableRef);
if (valuesMap == null || valuesMap.isEmpty()) {
continue;
}
// Validate as we go if transactional since we can undo if a problem occurs (which is unlikely)
long serverTimestamp = serverTimeStamps == null ? validate(tableRef, valuesMap) : serverTimeStamps[i++];
final PTable table = tableRef.getTable();
// Track tables to which we've sent uncommitted data
if (isTransactional = table.isTransactional()) {
txTableRefs.add(tableRef);
addDMLFence(table);
uncommittedPhysicalNames.add(table.getPhysicalName().getString());
}
boolean isDataTable = true;
table.getIndexMaintainers(indexMetaDataPtr, connection);
Iterator<Pair<byte[],List<Mutation>>> mutationsIterator = addRowMutations(tableRef, valuesMap, serverTimestamp, false, sendAll);
while (mutationsIterator.hasNext()) {
Pair<byte[],List<Mutation>> pair = mutationsIterator.next();
byte[] htableName = pair.getFirst();
List<Mutation> mutationList = pair.getSecond();
//create a span per target table
//TODO maybe we can be smarter about the table name to string here?
Span child = Tracing.child(span,"Writing mutation batch for table: "+Bytes.toString(htableName));
int retryCount = 0;
boolean shouldRetry = false;
do {
final ServerCache cache = isDataTable ? setMetaDataOnMutations(tableRef, mutationList, indexMetaDataPtr) : null;
// If we haven't retried yet, retry for this case only, as it's possible that
// a split will occur after we send the index metadata cache to all known
// region servers.
shouldRetry = cache != null;
SQLException sqlE = null;
HTableInterface hTable = connection.getQueryServices().getTable(htableName);
try {
if (isTransactional) {
// If we have indexes, wrap the HTable in a delegate HTable that
// will attach the necessary index meta data in the event of a
// rollback
if (!table.getIndexes().isEmpty()) {
hTable = new MetaDataAwareHTable(hTable, tableRef);
}
TransactionAwareHTable txnAware = TransactionUtil.getTransactionAwareHTable(hTable, table);
// Don't add immutable indexes (those are the only ones that would participate
// during a commit), as we don't need conflict detection for these.
if (isDataTable) {
// Even for immutable, we need to do this so that an abort has the state
// necessary to generate the rows to delete.
addTransactionParticipant(txnAware);
} else {
txnAware.startTx(getTransaction());
}
hTable = txnAware;
}
long numMutations = mutationList.size();
GLOBAL_MUTATION_BATCH_SIZE.update(numMutations);
long startTime = System.currentTimeMillis();
child.addTimelineAnnotation("Attempt " + retryCount);
//很核心的这里进行了批量入库数据了
hTable.batch(mutationList);
if (logger.isDebugEnabled()) logger.debug("Sent batch of " + numMutations + " for " + Bytes.toString(htableName));
child.stop();
child.stop();
shouldRetry = false;
long mutationCommitTime = System.currentTimeMillis() - startTime;
GLOBAL_MUTATION_COMMIT_TIME.update(mutationCommitTime);
long mutationSizeBytes = calculateMutationSize(mutationList);
MutationMetric mutationsMetric = new MutationMetric(numMutations, mutationSizeBytes, mutationCommitTime);
mutationMetricQueue.addMetricsForTable(Bytes.toString(htableName), mutationsMetric);
这里的upsert的核心就上面的 hTable.batch(mutationList); 方法,把数据直接写到hbase当中去了
这样整个phoenix的插入数据的sql解释流程就完成了