Task3
就是对project3一些算子(除了对中间结果的处理的算子加锁),实际上也就是对insert,delete,seqscan这三个加锁,先加表锁,再加行锁,对不同隔离级别也是不一样的,写锁是都要加的,因为可能ub,而且还是在abort和commit阶段统一释放。读未提交的话,就是不加读锁,然后读以提交的话,就是读一个tuple,释放一个tuple,就不可重复读,如果要重复读的话,要持有读锁,一直到commit和abort阶段。然后因为锁更新只能往高了走,而一个事务可以insert,seqscan,所以我们要先判断是不是持有更高级别的锁,如果有的话,我们就不加,同时持有锁被锁升级这一可能性也要在释放锁那里判断。delete, insert注意要更新索引,同时也要记录一下tuple的修改和index的修改,方便后面commit时应用delete,或者abort时回滚。
LeaderBoard Task
Task1谓词下推
就是 filter -----seqsan 把过滤的地方推到seqscan那里,可以减少锁的元组数量,但是对leaderboard的提升不是很大
Task2 update算子
如果之前我们要更新的话,是先delete后insert,然后我们可以直接更新,但是对于索引我们是要先delete然后insert的。
auto UpdateExecutor::Next([[maybe_unused]] Tuple *tuple, RID *rid) -> bool {
auto lock_manager = exec_ctx_->GetLockManager();
auto txn = exec_ctx_->GetTransaction();
Tuple update_tuple;
RID update_rid;
int cnt = 0;
if (has_no_tuple_) {
return false;
}
while (child_executor_->Next(&update_tuple, &update_rid)) {
TableInfo *table_info = exec_ctx_->GetCatalog()->GetTable(plan_->TableOid());
try {
if (!lock_manager->LockRow(txn, LockManager::LockMode::EXCLUSIVE, plan_->table_oid_, update_rid)) {
throw ExecutionException("update lock row");
}
} catch (TransactionAbortException &e) {
throw ExecutionException("update lock row");
}
Tuple old_tuple;
if (!table_info->table_->GetTuple(update_rid, &old_tuple, txn)) {
LOG_ERROR("can't get old tuple");
exit(-1);
}
if (table_info->table_->UpdateTuple(update_tuple, update_rid, exec_ctx_->GetTransaction())) {
cnt++;
auto vec = exec_ctx_->GetCatalog()->GetTableIndexes(table_info->name_);
for (auto info : vec) {
auto old_key = old_tuple.KeyFromTuple(table_info->schema_, info->key_schema_, info->index_->GetKeyAttrs());
info->index_->DeleteEntry(old_key, update_rid, exec_ctx_->GetTransaction());
auto update_key =
update_tuple.KeyFromTuple(table_info->schema_, info->key_schema_, info->index_->GetKeyAttrs());
info->index_->InsertEntry(update_key, update_rid, exec_ctx_->GetTransaction());
auto update_index_record = IndexWriteRecord(update_rid, plan_->table_oid_, WType::UPDATE, update_tuple,
info->index_oid_, exec_ctx_->GetCatalog());
update_index_record.old_tuple_ = old_tuple;
txn->AppendIndexWriteRecord(update_index_record);
}
// printf("txn{%d} update rid{%u}\n", txn->GetTransactionId(), update_rid.GetSlotNum());
} else {
break;
}
}
Tuple tmp(std::vector<Value>(1, Value(TypeId::INTEGER, cnt)), &plan_->OutputSchema());
*tuple = tmp;
has_no_tuple_ = true;
return true;
}
Task3 index优化。
就是不要去遍历表了,如果where idx = const_value, idx是索引列的话,查一下索引有哪些rid满足条件,直接对这些上锁就行了,也是直接取,这下就快了很多。
对于这个task我们先要修改优化计划,主要index_scan要在filter_scan前面,因为index_scan更加特殊,我们可以做更大的优化,所以我们的原则就是越特殊的越前面。
auto Optimizer::OptimizeCustom(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef {
auto p = plan;
p = OptimizeMergeProjection(p);
p = OptimizeMergeFilterNLJ(p);
p = OptimizeNLJAsIndexJoin(p);
// p = OptimizeNLJAsHashJoin(p); // Enable this rule after you have implemented hash join.
p = OptimizeOrderByAsIndexScan(p);
p = OptimizeSortLimitAsTopN(p);
// my add
p = OptimizeIndexScan(p);
p = OptimizeMergeFilterScan(p);
return p;
}
添加filter(满足上面条件)------seqcan,变为index_scan,然后planNode里面要记录index_oid,就是选取那个属性对应的索引,然后就是要记录一下这个val的值,后面生产tuple,再生产key,到b+树里面去找。
auto Optimizer::OptimizeIndexScan(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef {
std::vector<AbstractPlanNodeRef> children;
for (const auto &child : plan->GetChildren()) {
children.emplace_back(OptimizeIndexScan(child));
}
auto optimized_plan = plan->CloneWithChildren(std::move(children));
// LOG_INFO("get optimized type{%d} childtype{%d}", (int)optimized_plan->GetType(),
// (int)optimized_plan->GetChildAt(0)->GetType());
if (optimized_plan->GetType() == PlanType::Filter && optimized_plan->GetChildAt(0)->GetType() == PlanType::SeqScan) {
BUSTUB_ENSURE(optimized_plan->children_.size() == 1, "index scan no possible !!!");
// const auto &child_plan = optimized_plan->children_[0];
// const auto &seq_scan_plan = dynamic_cast<const SeqScanPlanNode &>(*child_plan);
const auto &filter_plan = dynamic_cast<const FilterPlanNode &>(*optimized_plan);
const auto &seq_plan = dynamic_cast<const SeqScanPlanNode &>(*optimized_plan->GetChildAt(0));
// to do judge the filter type
if (const auto *expr = dynamic_cast<const ComparisonExpression *>(filter_plan.predicate_.get()); expr != nullptr) {
if (expr->comp_type_ == ComparisonType::Equal) {
if (const auto *left_expr = dynamic_cast<const ColumnValueExpression *>(expr->children_[0].get());
left_expr != nullptr) {
if (const auto *right_expr = dynamic_cast<const ConstantValueExpression *>(expr->children_[1].get());
right_expr != nullptr) {
if (auto index = MatchIndex(seq_plan.table_name_, left_expr->GetColIdx()); index != std::nullopt) {
auto [index_oid, index_name] = *index;
auto index_plan = std::make_shared<IndexScanPlanNode>(plan->output_schema_, index_oid);
index_plan->val_ = right_expr->val_;
index_plan->table_name_ = seq_plan.table_name_;
// LOG_INFO("get here");
return index_plan;
}
}
}
}
}
}
return optimized_plan;
}
} // namespace bustub
这个优化就在于一个table实际上是有很多page的,我们这样做的话,就可以少几个page,毕竟disk database的瓶颈在磁盘IO嘛。同时也减少了对元组的锁。
最后的优化结果:
nice!!!