c# 备份oracle waitforexit()方法死锁,C# 解决死锁

处理这个死锁问题,花了好几天,相信遇到的同学,一样头疼,但有个好辅助类的话(好在.net的API足够强大),就没这么头疼了

注意

本篇文章的解决方案只适合使用lock(obj),或是:Monitor.Enter(obj); …. Monitor.Exit(obj)的方式

类似酱紫的死锁

如果你使用的是:AutoResetEvent.Set/Rest, Monitor.Wait/Pulse, Mutex的方式,请另寻他法。

辅助类

//------------------------------------------------------------------------

// Craeted by Jave.Lin 4/21/2018 5:31:57 PM

//------------------------------------------------------------------------

using System;

using System.Collections.Generic;

using System.Threading;

//namespace Common.ComUtil

//{

///

/// Locker Information

/// author : Jave.Lin

/// date : 4/21/2018 5:31:57 PM

///

public class Locker

{

public object tag; // 附带上下文数据

public int threadID; // 获取锁的线程ID

public string name; // 锁的名称

public int lockedTimes; // 累积获取锁多少次,便于分析死锁几率

public int lockingTs; // 获取锁那刻的时间戳

public bool enter; // 获取锁的标记

public bool exit; // 释放锁的标记

public string lockingStackTrace; // 获取锁那刻的调用方法栈,下文Dumps信息实例可以看到很清楚

public int lockingDLTs; // 识别为死锁时设置的时间戳

public List pRecordList;

public Locker(string name)

{

this.name = name;

pRecordList = new List();

}

public void PushRecord()

{

pRecordList.Add(RetrieveRecord());

}

public void ClearRecord()

{

pRecordList.Clear();

}

public Locker RetrieveRecord()

{

var ret = new Locker(this.name);

ret.tag = tag;

ret.threadID = threadID;

ret.name = name;

ret.lockedTimes = lockedTimes;

ret.lockingTs = lockingTs;

ret.enter = enter;

ret.exit = exit;

ret.lockingStackTrace = lockingStackTrace;

ret.lockingDLTs = lockingDLTs;

return ret;

}

public override string ToString()

{

string[] strs = new string[]

{

"ThreadId:" + threadID,

"Name:" + name,

"LockedTimes:" + lockedTimes,

"LockingTs:" + lockingTs,

"LocingEt:" + (lockingDLTs - lockingTs) + ("(ms"),

"Enter:" + enter,

"Exit:" + exit,

"Tag:" + (tag != null ? tag.ToString() : "null"),

"LockingStackTrace:\n" + lockingStackTrace,

(pRecordList.Count > 0 ? "LockedRecord:\n\t" + string.Join("\t-record--------------", pRecordList) : "")

};

return string.Join("\r\n", strs);

}

}

public delegate void OnDL();

///

/// Check Dead Lock (CDL)

/// author : Jave.Lin

/// date : 4/21/2018 5:31:57 PM

///

public static class CDL

{

// had been locked in map

public static readonly Dictionary _s_pLockedMap = new Dictionary();

// the last

public static readonly Dictionary> _s_pLockingMap = new Dictionary>();

public const bool THROW_ER = true;

public const int DEAD_LOCK_TIME_OUT = 3000; // 这个阀值按需调整,实际的线上产品服务器程序如果负载过大时,可能也会有部分任务处理过久,导致‘取锁’等待过久

public static event OnDL OnDLEvent;

private static void _PushToWaitQueue(Locker locker)

{

List list = null;

if (!_s_pLockingMap.TryGetValue(locker, out list))

{

list = new List();

_s_pLockingMap[locker] = list;

}

list.Add(locker.RetrieveRecord());

}

private static void _ClearFromLocking(Locker locker)

{

List list = null;

if (_s_pLockingMap.TryGetValue(locker, out list))

{

list.Clear();

_s_pLockingMap.Remove(locker);

}

}

private static void _BeforeEnter(Locker locker)

{

if (locker.enter)

{

locker.PushRecord();

_PushToWaitQueue(locker);

}

}

private static void _Enter(Locker locker)

{

locker.enter = true;

locker.exit = false;

locker.lockingTs = Environment.TickCount;

locker.threadID = Thread.CurrentThread.ManagedThreadId;

locker.lockingStackTrace = GetCurStackTrace("->\n");

Interlocked.Increment(ref locker.lockedTimes);

_s_pLockedMap[locker] = true;

}

private static void _Exit(Locker locker)

{

if (!Monitor.IsEntered(locker))

{

locker.lockingTs = Environment.TickCount;

var msg = "!Monitor.IsEntered(locker)";

if (THROW_ER)

{

throw new Exception(msg);

}

else

{

_WarningWriteLine(msg);

}

}

else

{

locker.exit = true;

_s_pLockedMap.Remove(locker);

_ClearFromLocking(locker);

locker.ClearRecord();

Monitor.Exit(locker);

}

}

private static string _GetWaitQueue(Locker locker)

{

if (_s_pLockingMap.ContainsKey(locker))

{

return string.Join("\n@@@@@", _s_pLockingMap[locker]);

}

return "";

}

public static string Dumps()

{

var itemList = new List();

var contentList = new List();

foreach (var item in _s_pLockedMap)

{

itemList.Add(item.Key);

}

itemList.Sort((a, b) =>

{

return (b.lockingDLTs - b.lockingTs) - (a.lockingDLTs - a.lockingTs);

});

foreach (var item in itemList)

{

contentList.Add(item.ToString() + "\n$$$$$$$$$$Before Locking WaitQueue$$$$$$$$\n" + _GetWaitQueue(item));

}

return string.Join("\r\n=line============\r\n", contentList);

}

public static void CheckDL(Locker locker, Action actoin)

{

try

{

_BeforeEnter(locker);

if (Monitor.TryEnter(locker, DEAD_LOCK_TIME_OUT))

{

_Enter(locker);

actoin.Invoke();

}

else

{

locker.lockingDLTs = Environment.TickCount;

_WarningWriteLine("TryEnter time out");

if (THROW_ER)

{

_ShowGetLockTimeout();

}

else

{

actoin.Invoke();

}

}

}

catch (Exception e)

{

_ErrorWriteLine(e.ToString());

}

finally

{

_Exit(locker);

}

}

public static T CheckDL(Locker locker, Func actoin)

{

T ret = default(T);

try

{

_BeforeEnter(locker);

if (Monitor.TryEnter(locker, DEAD_LOCK_TIME_OUT))

{

_Enter(locker);

ret = actoin.Invoke();

}

else

{

locker.lockingDLTs = Environment.TickCount;

_WarningWriteLine("TryEnter time out");

if (THROW_ER)

{

_ShowGetLockTimeout();

}

else

{

actoin.Invoke();

}

}

}

catch (Exception e)

{

_ErrorWriteLine(e.ToString());

}

finally

{

_Exit(locker);

}

return ret;

}

public static string GetCurStackTrace(string separactor = "->")

{

System.Diagnostics.StackTrace st = new System.Diagnostics.StackTrace();

System.Diagnostics.StackFrame[] sfs = st.GetFrames();

List methodNameList = new List();

for (int i = 1; i < sfs.Length; ++i)

{

if (System.Diagnostics.StackFrame.OFFSET_UNKNOWN == sfs[i].GetILOffset()) break;

var m = sfs[i].GetMethod();

var dn = m.DeclaringType.Name;

var mn = m.Name;

methodNameList.Add(new string(' ', sfs.Length - i) + dn + "::" + mn + "()");

}

methodNameList.Reverse();

return string.Join(separactor, methodNameList);

}

private static void _ShowGetLockTimeout()

{

OnDLEvent?.Invoke();

var msg = "!!!!!!!!!!!!!!!!!!DeadLock!!!!!!!!!!!!!!!!!!!!!!!!!!";

_ErrorWriteLine(msg);

throw new Exception(msg);

}

private static void _ErrorWriteLine(string msg, params object[] args)

{

var srcColor = Console.ForegroundColor;

Console.ForegroundColor = ConsoleColor.Red;

Console.WriteLine(msg, args);

Console.ForegroundColor = srcColor;

}

private static void _WarningWriteLine(string msg, params object[] args)

{

var srcColor = Console.ForegroundColor;

Console.ForegroundColor = ConsoleColor.Yellow;

Console.WriteLine(msg, args);

Console.ForegroundColor = srcColor;

}

}

//}

使用方法要点、注意副作用、处理副作用

使用方法要点

// 我们平常用的:

lock(obj)

{

// code here

}

改写成:

CDL.CheckDL(obj, ()=>

{

// code here

});

// 怎么方便改,是个问题

// 用到的lock少的话,手动一个个改吧

// 如果巨量的话,建议CTRL + SHIFT + H来批量替换Lock的代码吧(写个正则)

// 将:CDL的namespace去掉,这样就不用导namespace了。

一旦有死锁出现,那么将会命中CDL::_ShowGetLockTimeout方法

然后将CDL.Dumps()的内容打印出来,就可以知道,当前哪些CDL.CheckDL的地方有死锁。

Dumps很详细,具体还可以根据自己的需要来对 Locker的信息做调整。

从dumps信息中辨别死锁

Dumps信息中,辨别哪些是死锁状态的,看:LockingEt的值是多少就知道了

LockingEt是:Locking Elapsed Time的意思,获取锁多长时间了

LockingEt值只要大于零,且接近于:CDL.DEAD_LOCK_TIME_OUT的值,都基本上是死锁

注意副作用

明显原来的代码逻辑会发生改变

多了一些方法调用

特别是将原来的代码的位置,改变了,放到了一个lambda(其实在IL中是个匿名函数)

先不说结构上变化了,效率也会有丢丢影响的。

处理副作用

还有一个方法可以去出副作用,就是再写个工具,将编译出来的DLL,批量处理CDL.CheckDL的IL代码,改为原来的lock(obj)方式,当然,前提是先需要大量测试后再用这个工具处理,不然如果中途还是出现了死锁的话,定位问题还是会很头疼的。(制作这个工具,理论上是可以的,但需要对IL熟悉)

注意CDL.DEAD_LOCK_TIME_OUT

CDL.DEAD_LOCK_TIME_OUT= 3000; // 这个阀值按需调整,实际的线上产品服务器程序如果负载过大时,可能也会有部分任务处理过久,导致‘取锁’等待过久,所以出现Monitor.TryEnter timeout时,不一定是死锁。

你可以按你的需求来调整该值,如:调整个:60000(60秒),意思是你确定了,取锁时间超时为60秒的,都算是有死锁任务导致

CDL.Dumps内容实例

ThreadId:10

Name:CBCServerAliveLocker

LockedTimes:1

LockingTs:7496815

LockingEt:-7496815(ms // =========辨别死锁=======负数的都不用看

Enter:True

Exit:False

Tag:null

LockingStackTrace:

Program::Main()->

XXXServer::WaitForExit()->

CDL::CheckDL()

=line============

ThreadId:15

Name:CTcpNetworker

LockedTimes:486

LockingTs:7519248

LockingEt:2995(ms // =辨别死锁=此乃死锁也,接近CDL.DEAD_LOCK_TIME_OUT(3000 MS)的值,由于CBattleRoom的Locker线程ID为6的锁超时而导致的

Enter:True

Exit:False

Tag:null

LockingStackTrace:

_IOCompletionCallback::PerformIOCompletionCallback()->

BaseOverlappedAsyncResult::CompletionPortCallback()->

LazyAsyncResult::ProtectedInvokeCallback()->

ContextAwareResult::Complete()->

ExecutionContext::Run()->

ExecutionContext::Run()->

ExecutionContext::RunInternal()->

ContextAwareResult::CompleteCallback()->

LazyAsyncResult::Complete()->

XXXNetworker::_OnBeginReceiveCallback()->

CDL::CheckDL()

=line============

ThreadId:15

Name:CEventMgr

LockedTimes:247

LockingTs:7519248

LockingEt:-7519248(ms // =========辨别死锁=======负数的都不用看

Enter:True

Exit:False

Tag:null

LockingStackTrace:

_IOCompletionCallback::PerformIOCompletionCallback()->

BaseOverlappedAsyncResult::CompletionPortCallback()->

LazyAsyncResult::ProtectedInvokeCallback()->

ContextAwareResult::Complete()->

ExecutionContext::Run()->

ExecutionContext::Run()->

ExecutionContext::RunInternal()->

ContextAwareResult::CompleteCallback()->

LazyAsyncResult::Complete()->

XXXNetworker::_OnBeginReceiveCallback()->

CDL::CheckDL()->

<>c__DisplayClass78_0::<_onbeginreceivecallback>b__0()->

XXXConnection::XXXNetworker_OnPackageEvent()->

CEventMgr::Invoke()->

CDL::CheckDL()

=line============

ThreadId:6

Name:CBattleRoomMgr

LockedTimes:689

LockingTs:7519248

LockingEt:-7519248(ms // =========辨别死锁=======负数的都不用看

Enter:True

Exit:False

Tag:null

LockingStackTrace:

_ThreadPoolWaitCallback::PerformWaitCallback()->

ThreadPoolWorkQueue::Dispatch()->

Task::System.Threading.IThreadPoolWorkItem.ExecuteWorkItem()->

Task::ExecuteEntry()->

Task::ExecuteWithThreadLocal()->

ExecutionContext::Run()->

ExecutionContext::RunInternal()->

Task::ExecutionContextCallback()->

Task::Execute()->

Task::InnerInvoke()->

XXXServer::b__36_0()->

CDL::CheckDL()->

XXXServer::b__36_1()->

XXXServer::_DeadLockMethod()->

XXXServerInst::_DeadLockMethod1()->

CDL::CheckDL()

=line============

ThreadId:6

Name:CBattleRoom

LockedTimes:802

LockingTs:7519248

LockingEt:2995(ms // ==============辨别死锁======此乃死锁也,接近CDL.DEAD_LOCK_TIME_OUT(3000 MS)的值

Enter:True

Exit:False

Tag:BroadcastAll3 starting

LockingStackTrace:

_ThreadPoolWaitCallback::PerformWaitCallback()->

ThreadPoolWorkQueue::Dispatch()->

Task::System.Threading.IThreadPoolWorkItem.ExecuteWorkItem()->

Task::ExecuteEntry()->

Task::ExecuteWithThreadLocal()->

ExecutionContext::Run()->

ExecutionContext::RunInternal()->

Task::ExecutionContextCallback()->

Task::Execute()->

Task::InnerInvoke()->

XXXServer::b__36_0()->

CDL::CheckDL()->

XXXServer::b__36_1()->

XXXServer::_DeadLockMethod1()->

XXXServerInst::_DeadLockMethod2()->

CDL::CheckDL()->

XXXServerInst::<_deadlockmethod2>b__19_0()->

XXXServerInst1::_DeadLockMethod1()->

CDL::CheckDL()->

XXXServerInst1::<_deadlockmethod1>b__43_0()->

XXXServerInst1::_DeadLockMethod2()->

CDL::CheckDL()

=line============

ThreadId:6

Name:CBCServerNormalLocker

LockedTimes:668

LockingTs:7519248

LockingEt:-7519248(ms // =========辨别死锁=======负数的都不用看

Enter:True

Exit:False

Tag:null

LockingStackTrace:

_ThreadPoolWaitCallback::PerformWaitCallback()->

ThreadPoolWorkQueue::Dispatch()->

Task::System.Threading.IThreadPoolWorkItem.ExecuteWorkItem()->

Task::ExecuteEntry()->

Task::ExecuteWithThreadLocal()->

ExecutionContext::Run()->

ExecutionContext::RunInternal()->

Task::ExecutionContextCallback()->

Task::Execute()->

Task::InnerInvoke()->

XXXServer::b__36_0()->

CDL::CheckDL()

以上为旧版本的Dumps信息

在新版本的可以根据WaitQueue,还有LockedRecord可便于分析师由于之前那些地址出现死锁任务导致后续死锁

定位死锁后,调整代码思路

定位到哪里死锁之后,我们只要调整代码即可解决:

解除死锁闭环嵌套问题:考虑锁对象的更换(另建一个Locker、实在不能换Locker,就延迟处理:如比较典型的是队列处理(任务的进、出、轮询遍历任务都是同一个锁))

要不要加锁(有没必要加)

检测到死锁可对Locker扩展(判断如果Locker被锁了,添加AutoResetEvent.WaitOne等待,每个Locker的Exit是,调用一下AutoResetEvent.Set去唤醒之前WaitOne的Locker)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值