C#中Delegate的效率问题

最新推荐文章于 2020-04-09 16:00:51 发布

noock

最新推荐文章于 2020-04-09 16:00:51 发布

阅读量4k

点赞数

分类专栏： .NET技术文章标签： c# function 测试 interface 汇编 class

本文链接：https://blog.csdn.net/Nocky/article/details/6056419

版权

.NET技术专栏收录该内容

40 篇文章 0 订阅

订阅专栏

昨天在研究C＃的定时器的过程中，发现　Delegate的效率比直接调用一个函数的效率要低不少，今天研究了一下，的确如此。在网上查了一下
http://www.cnblogs.com/sumtec/archive/2004/05/23/11025.aspx
也写了一个程序进行测试，但给出的程序不是太合理，因为在调用接口注册的函数时使用了for循环，操作明显多于delegate测试函数，所以执行结果反而是delegate比接口调用要快很多，于是我又写了如下程序进行测试：

    interface IVoidMethod
    {
        void WorkMethod();
    }

    interface IDelegateTester
    {
        void Run();
    }

    abstract class DelegateTester : IDelegateTester, IVoidMethod
    {
        private int _multiCast;
        private long _iteration;
        protected delegate void TaskHandler();
        protected event TaskHandler task;

        public int MultiCast
        {
            get { return _multiCast; }
            set { _multiCast = value; }
        }

        public long Iteration
        {
            get { return _iteration; }
            set { _iteration = value; }
        }

        protected DelegateTester(int multicast, long iteration)
        {
            _multiCast = multicast;
            _iteration = iteration;
            while (multicast-- > 0) {
                task += new TaskHandler(WorkMethod);
            }
        }

        protected void CallDelegate()
        {
            //if (task != null) 
            {
                task();
            }
        }

        protected abstract void CallFunction();

        /// <summary>
        /// work method for test
        /// </summary>
        public virtual void WorkMethod()
        {
        }

        #region IDelegateTester Members

        public void Run()
        {

            Console.WriteLine("/n Delgate Test(Multicast={0}, iteration={1}", MultiCast, Iteration);
            DateTime t0 = DateTime.Now;
            Console.WriteLine(t0.ToString("mm:ss.ffff/t:") + "Runing delegate test ...");
            for (int i = 0; i < _iteration; i++) {
                CallDelegate();
            }
            DateTime t1 = DateTime.Now;
            Console.WriteLine(t1.ToString("mm:ss.ffff/t:") + "Runing function call test ...");
            for (int i = 0; i < _iteration; i++) {
                CallFunction();
            }
            DateTime t2 = DateTime.Now;
            Console.WriteLine(t2.ToString("mm:ss.ffff/t:") + "Test finished!");
            TimeSpan deltaDelegate = t1 - t0;
            TimeSpan deltaFunction = t2 - t1;
            Console.WriteLine("Result:");
            Console.WriteLine("/tTime expense (delegate : function) = {0} : {1} = {2:0.000}",
                deltaDelegate.Ticks.ToString(),
                deltaFunction.Ticks.ToString(),
                (double)(deltaDelegate.Ticks) / (double)(deltaFunction.Ticks));
        }
        #endregion

    }

	// 比较调用单个函数时的效率
    class SingleDelegateTester : DelegateTester
    {

        public SingleDelegateTester(int multicast, long iteration)
            : base(multicast, iteration)
        {
        }

        protected override void CallFunction()
        {
            this.WorkMethod();
        }
    }

    /// <summary>
    /// 测试多播方式的函数调用时的效率
    /// </summary>
    class MulticastDelegateTester : DelegateTester
    {
        IVoidMethod[] calls;

        public MulticastDelegateTester(int multicast, long iteration)
            : base(multicast, iteration)
        {
            calls = new IVoidMethod[multicast];
            for (int i = 0; i < calls.Length; i++) {
                calls[i] = this;
            }
        }

        protected override void CallFunction()
        {
            foreach (IVoidMethod i in calls) {
                i.WorkMethod();
            }
        }
    }


    public class Test
    {
        /// <summary>
        /// 应用程序的主入口点。
        /// </summary>
        static void Main()
        {
            long iteration = 1000000000;
            IDelegateTester[] tests = new IDelegateTester[]{
                new SingleDelegateTester(1, iteration),
                new MulticastDelegateTester(10, iteration)};
            foreach (IDelegateTester t in tests) {
                t.Run();
                Console.WriteLine("Press any key to run next test...");
                Console.ReadKey();
            }
            Console.WriteLine("Test finished, press any key to quit ...");
            Console.ReadKey();
            
        }
    }

程序运行结果如下：

 Delgate Test(Multicast=1, iteration=1000000000
07:49.2850      :Runing delegate test ...
08:01.3380      :Runing function call test ...
08:11.6500      :Test finished!
Result:
        Time expense (delegate : function) = 120530000 : 103120000 = 1.169
Press any key to run next test...

 Delgate Test(Multicast=10, iteration=1000000000
08:58.1870      :Runing delegate test ...
10:54.3060      :Runing function call test ...
12:36.5080      :Test finished!
Result:
        Time expense (delegate : function) = 1161190000 : 1022020000 = 1.136
Press any key to run next test...
 Test finished, press any key to quit ...

由以上运行结果可以看出，delegate方式的函数调用的确比直接调用函数效率要低很多，特别是只注册一个事件处理函数时效率差别非常明显，相差近20%，这在实时控制程序中还是应该注意的问题。而在多播时由于两种方式都需要对注册的事件列表进行遍历，使得性能差距拉近，但仍在13.6%，在对性能要求比较高的情况要真的要考虑是使用delegate的方便性还是自行实现多播方式的函数调用了。

注意上面为了不让条件判断语句影响测试的准确性，在调用delegate时把判空操作注释掉了，即使在这种情况下仍然效率不及直接调用，下面研究一下原因。

首先看一下Run()函数，两种调用方式的测试代码段分别如下：

（1）Delegate调用段

 DateTime t0 = DateTime.Now;
00000095  lea         ecx,[ebp-5Ch] 
00000098  call        78639580 
0000009d  lea         edi,[ebp-14h] 
000000a0  lea         esi,[ebp-5Ch] 
000000a3  movq        xmm0,mmword ptr [esi] 
000000a7  movq        mmword ptr [edi],xmm0 
            Console.WriteLine(t0.ToString("mm:ss.ffff/t:") + "Runing delegate test ...");
000000ab  lea         ecx,[ebp-14h] 
000000ae  mov         edx,dword ptr ds:[023930A4h] 
000000b4  call        78667010 
000000b9  mov         esi,eax 
000000bb  mov         edx,dword ptr ds:[023930A8h] 
000000c1  mov         ecx,esi 
000000c3  call        785FBDE0 
000000c8  mov         esi,eax 
000000ca  mov         ecx,esi 
000000cc  call        78678544 
            for (int i = 0; i < _iteration; i++) {
000000d1  xor         edx,edx 
000000d3  mov         dword ptr [ebp-18h],edx 
000000d6  nop              
000000d7  jmp         000000E8 
                CallDelegate();
000000d9  mov         ecx,dword ptr [ebp+FFFFFF68h] 
000000df  call        dword ptr ds:[0389007Ch] 

           for (int i = 0; i < _iteration; i++) {
000000e5  inc         dword ptr [ebp-18h] 
000000e8  mov         eax,dword ptr [ebp-18h] 
000000eb  cdq              
000000ec  mov         ecx,dword ptr [ebp+FFFFFF68h] 
000000f2  cmp         edx,dword ptr [ecx+8] 
000000f5  jg          000000FE 
000000f7  jl          000000D9 
000000f9  cmp         eax,dword ptr [ecx+4] 
000000fc  jb          000000D9 
            }

（2）函数直接调用ukw

DateTime t1 = DateTime.Now;
000000fe  lea         ecx,[ebp-64h] 
00000101  call        78639580 
00000106  lea         edi,[ebp-20h] 
00000109  lea         esi,[ebp-64h] 
0000010c  movq        xmm0,mmword ptr [esi] 
00000110  movq        mmword ptr [edi],xmm0 
            Console.WriteLine(t1.ToString("mm:ss.ffff/t:") + "Runing function call test ...");
00000114  lea         ecx,[ebp-20h] 
00000117  mov         edx,dword ptr ds:[023930A4h] 
0000011d  call        78667010 
00000122  mov         esi,eax 
00000124  mov         edx,dword ptr ds:[023930ACh] 
0000012a  mov         ecx,esi 
0000012c  call        785FBDE0 
00000131  mov         esi,eax 
00000133  mov         ecx,esi 
00000135  call        78678544 
            for (int i = 0; i < _iteration; i++) {
0000013a  xor         edx,edx 
0000013c  mov         dword ptr [ebp-24h],edx 
0000013f  nop              
00000140  jmp         00000150 
                CallFunction();
00000142  mov         ecx,dword ptr [ebp+FFFFFF68h] 
00000148  mov         eax,dword ptr [ecx] 
0000014a  call        dword ptr [eax+38h] 
            for (int i = 0; i < _iteration; i++) {
0000014d  inc         dword ptr [ebp-24h] 
00000150  mov         eax,dword ptr [ebp-24h] 
00000153  cdq              
00000154  mov         ecx,dword ptr [ebp+FFFFFF68h] 
0000015a  cmp         edx,dword ptr [ecx+8] 
0000015d  jg          00000166 
0000015f  jl          00000142 
00000161  cmp         eax,dword ptr [ecx+4] 
00000164  jb          00000142 
            }

但是从汇编代码看，这里调用CallFunction()时反而多了一条MOV指令，从这里看应该直接调用函数会比delegate调用更消耗资源，但这条指令是单周期指令消耗资源极少，先不去管它，其它代码都一样，看来性能差在CallFunction()和CallDelegate()两个函数上，且看这两个函数的汇编代码：

protected void CallDelegate()
        {
            //if (task != null) 
            {
                task();
00000000  push        esi  
00000001  mov         esi,ecx 
00000003  cmp         dword ptr ds:[00269204h],0 
0000000a  je          00000011 
0000000c  call        793B672F 
00000011  mov         ecx,dword ptr [esi+0Ch] 
00000014  mov         eax,dword ptr [ecx+0Ch] 
00000017  mov         ecx,dword ptr [ecx+4] 
0000001a  call        eax  
            }
        }
0000001c  nop              
0000001d  pop         esi  
0000001e  ret

protected override void CallFunction()
        {

            this.WorkMethod();

00000000  push        esi  
00000001  mov         esi,ecx 
00000003  cmp         dword ptr ds:[00269204h],0 
0000000a  je          00000011 
0000000c  call        793B66D7 
00000011  mov         ecx,esi 
00000013  mov         eax,dword ptr [ecx] 

00000015  call        dword ptr [eax+3Ch] 
        }

00000018  nop              
00000019  pop         esi  
0000001a  ret

两段代码不同的地方如上面的黄色区域所示，在这里CallDelegate却又比CallFunction多执行一次MOV操作，这与调用这些函数前面那段结合起来应该一样，如果差别的话只能差在这几个MOV和CALL操作上了，在CallDelegate函数中三次MOV操作都是双字节赋值，并且是间接寻址到寄存器，而CallFunction的两次MOV操作只有一次是间接寻址，而另一次是从寄存器到寄存器，这要比从内存到寄存器快。看来差别也只是这个原因了。

下面看一下多播的情况。函数CallFunction代码如下：

protected override void CallFunction()
        {
            foreach (IVoidMethod i in calls) {
00000000  push        edi  
00000001  push        esi  
00000002  push        ebx  
00000003  push        ebp  
00000004  mov         edi,ecx 
00000006  cmp         dword ptr ds:[00309204h],0 
0000000d  je          00000014 
0000000f  call        796C66A7 
00000014  xor         ebx,ebx 
00000016  xor         ebp,ebp 
00000018  xor         esi,esi 
0000001a  mov         eax,dword ptr [edi+14h] 
0000001d  mov         ebp,eax 
0000001f  xor         esi,esi 
00000021  nop              
00000022  jmp         0000003D 
00000024  cmp         esi,dword ptr [ebp+4] 
00000027  jb          0000002E 
00000029  call        796C7CE3 
0000002e  mov         eax,dword ptr [ebp+esi*4+0Ch] 
00000032  mov         ebx,eax 
                i.WorkMethod();
00000034  mov         ecx,ebx 
00000036  call        dword ptr ds:[00310024h] 
0000003c  inc         esi  
            foreach (IVoidMethod i in calls) {
0000003d  cmp         esi,dword ptr [ebp+4] 
00000040  jl          00000024 
            }
        }
00000042  nop              
00000043  pop         ebp  
00000044  pop         ebx  
00000045  pop         esi  
00000046  pop         edi  
00000047  ret

而CallDelegate的代码并没有改变，代码差别很大，比较汇编比较困难。换条思路，研究一下delegate的实现机制