Task 1 Lock Manager
关于LockRow 要检查table上的锁,因为是先锁table, 后面再锁的row,判断类型时要考虑到锁升级
void LockManager::IsTableFit(Transaction *txn, LockMode lock_mode, table_oid_t oid) {
if (lock_mode == LockMode::EXCLUSIVE) {
if (!(txn->IsTableExclusiveLocked(oid) || txn->IsTableIntentionExclusiveLocked(oid) ||
txn->IsTableSharedIntentionExclusiveLocked(oid))) {
TxnAbortAll(txn);
throw TransactionAbortException(txn->GetTransactionId(), AbortReason::TABLE_LOCK_NOT_PRESENT);
}
return;
}
if (lock_mode == LockMode::SHARED) {
if (!(txn->IsTableSharedLocked(oid) || txn->IsTableIntentionSharedLocked(oid) ||
txn->IsTableSharedIntentionExclusiveLocked(oid) || txn->IsTableExclusiveLocked(oid) ||
txn->IsTableIntentionExclusiveLocked(oid) || txn->IsTableSharedIntentionExclusiveLocked(oid))) {
TxnAbortAll(txn);
throw TransactionAbortException(txn->GetTransactionId(), AbortReason::TABLE_LOCK_NOT_PRESENT);
}
return;
}
TxnAbortAll(txn);
throw TransactionAbortException(txn->GetTransactionId(), AbortReason::ATTEMPTED_INTENTION_LOCK_ON_ROW);
}
锁阻塞模型如下
std::unique_lock<std::mutex> lock(queue_ptr->latch_);
while (!GrantLock(txn, lock_mode, oid, rid)) {
queue_ptr->cv_.wait(lock);
if (txn->GetState() == TransactionState::ABORTED) {
LOG_INFO("granting abort!!!");
auto &q = queue_ptr->request_queue_;
if (queue_ptr->upgrading_ == txn->GetTransactionId()) {
queue_ptr->upgrading_ = INVALID_TXN_ID;
}
for (auto iter = q.begin(); iter != q.end(); ++iter) {
if ((*iter)->txn_id_ == txn->GetTransactionId()) {
q.erase(iter);
queue_ptr->cv_.notify_all();
return false;
}
}
}
}
解锁代码如下
auto LockManager::UnlockRow(Transaction *txn, const table_oid_t &oid, const RID &rid) -> bool {
LOG_INFO("unlock row");
Log(txn, LockMode::EXCLUSIVE, oid);
// to do !!!!!!!!!!!!!!!!!!!!!!!!
auto queue_ptr = GetLRQueuePtr(oid, rid);
std::lock_guard<std::mutex> lock(queue_ptr->latch_);
for (auto iter = queue_ptr->request_queue_.begin(); iter != queue_ptr->request_queue_.end(); ++iter) {
if ((*iter)->txn_id_ == txn->GetTransactionId() && (*iter)->granted_) {
BookKeepingRemove(txn, (*iter)->lock_mode_, oid, rid);
UnLockChangeState(txn, (*iter)->lock_mode_);
// free the memory
queue_ptr->request_queue_.erase(iter);
queue_ptr->cv_.notify_all();
return true;
}
}
LOG_INFO("unlock fail %d !!!!!!!!!!!!!!!!", txn->GetTransactionId());
TxnAbortAll(txn);
throw TransactionAbortException(txn->GetTransactionId(), AbortReason::ATTEMPTED_UNLOCK_BUT_NO_LOCK_HELD);
return false;
}
task2 死锁检查
实际上就是用dfs维护一个栈,如果遇到栈里面重复元素就是有环,从栈的重复元素到重复元素就是这个环的全部。
auto LockManager::Dfs(txn_id_t txn_id, std::unordered_map<txn_id_t, int> &mp, std::stack<txn_id_t> &stk,
std::unordered_map<txn_id_t, int> &ump) -> bool {
stk.push(txn_id);
mp[txn_id] = 1;
ump[txn_id] = -1;
for (auto txn : waits_for_[txn_id]) {
if (mp[txn] == 1) {
return true;
}
if (Dfs(txn, mp, stk, ump)) {
return true;
}
}
stk.pop();
mp[txn_id] = 0;
return false;
}
可能有多个环,要重复多次直到没环为止。删除abort的事务,(从最老的事务开始dfs(最可能依赖),然后abort环中最大的txn(最年轻的)因为老的已经运行了很久,abort掉太可惜了, 但从另一种角度看,老的更加可能是死锁的关键)back_ground线程对原来的架构可能造成影响,我们应该要注意数据竞争,并发问题。而且我们还要注意transaction_manger析构函数把背景线程停了一个whille(原子类型的bool变量)判断(最后一行出bug),函数调用上锁的函数会导致死锁。全局加锁顺序不一致,也会导致死锁。。这个也是发了一定时间排查的。然后背景线程是隔一段时间重建图像,不要维护图像,建图的算法如下:
auto LockManager::HasCycle(txn_id_t *txn_id) -> bool {
std::unordered_map<txn_id_t, int> mp;
std::stack<txn_id_t> stk;
txn_id_t txn_min = INT_MAX;
for (auto [key, val] : mp_) {
if (val > 0) {
txn_min = std::min(txn_min, key);
}
}
LOG_INFO("txn min is %d", txn_min);
if (txn_min == INT_MAX) {
return false;
}
std::unordered_map<txn_id_t, int> ump;
while (!Dfs(txn_min, mp, stk, ump)) {
txn_min = INT_MAX;
for (auto [key, val] : mp_) {
if (ump[key] != -1 && val > 0) {
txn_min = std::min(txn_min, key);
}
}
if (txn_min == INT_MAX) {
LOG_INFO("has cycle false");
return false;
}
}
auto txn_start = stk.top();
stk.pop();
while (!stk.empty() && stk.top() != txn_start) {
LOG_INFO("%d .......", txn_start);
txn_start = std::max(txn_start, stk.top());
stk.pop();
}
*txn_id = txn_start;
LOG_INFO("has cycle true");
mp_[txn_start] = 0;
return true;
}