我猜想他的例子中使用的基准测试方法是有缺陷的。以下代码在 LINQPad 中运行,显示了您的预期:
/* This is a benchmarking template I use in LINQPad when I want to do a
* quick performance test. Just give it a couple of actions to test and
* it will give you a pretty good idea of how long they take compared
* to one another. It's not perfect: You can expect a 3% error margin
* under ideal circumstances. But if you're not going to improve
* performance by more than 3%, you probably don't care anyway.*/
void Main()
{
// Enter setup code here
var foo = new Foo();
var actions = new[]
{
new TimedAction("control", () =>
{
// do nothing
}),
new TimedAction("non-virtual instance", () =>
{
foo.DoSomething();
}),
new TimedAction("virtual instance", () =>
{
foo.DoSomethingVirtual();
}),
new TimedAction("static", () =>
{
Foo.DoSomethingStatic();
}),
};
const int TimesToRun = 10000000; // Tweak this as necessary
TimeActions(TimesToRun, actions);
}
public class Foo
{
public void DoSomething() {}
public virtual void DoSomethingVirtual() {}
public static void DoSomethingStatic() {}
}
#region timer helper methods
// Define other methods and classes here
public void TimeActions(int iterations, params TimedAction[] actions)
{
Stopwatch s = new Stopwatch();
int length = actions.Length;
var results = new ActionResult[actions.Length];
// Perform the actions in their initial order.
for(int i = 0; i < length; i++)
{
var action = actions[i];
var result = results[i] = new ActionResult{Message = action.Message};
// Do a dry run to get things ramped up/cached
result.DryRun1 = s.Time(action.Action, 10);
result.FullRun1 = s.Time(action.Action, iterations);
}
// Perform the actions in reverse order.
for(int i = length - 1; i >= 0; i--)
{
var action = actions[i];
var result = results[i];
// Do a dry run to get things ramped up/cached
result.DryRun2 = s.Time(action.Action, 10);
result.FullRun2 = s.Time(action.Action, iterations);
}
results.Dump();
}
public class ActionResult
{
public string Message {get;set;}
public double DryRun1 {get;set;}
public double DryRun2 {get;set;}
public double FullRun1 {get;set;}
public double FullRun2 {get;set;}
}
public class TimedAction
{
public TimedAction(string message, Action action)
{
Message = message;
Action = action;
}
public string Message {get;private set;}
public Action Action {get;private set;}
}
public static class StopwatchExtensions
{
public static double Time(this Stopwatch sw, Action action, int iterations)
{
sw.Restart();
for (int i = 0; i < iterations; i++)
{
action();
}
sw.Stop();
return sw.Elapsed.TotalMilliseconds;
}
}
#endregion
结果:
DryRun1 DryRun2 FullRun1 FullRun2
control 0.0361 0 47.82 47.1971
non-virtual instance 0.0858 0.0004 69.6178 68.7508
virtual instance 0.1676 0.0004 70.5103 69.2135
static 0.1138 0 66.6182 67.0308
结论
这些结果表明,与常规实例方法调用相比,对虚拟实例的方法调用仅比静态调用稍长一点(在考虑控制因素后可能会增加 2-3%)。这就是我所期望的。
更新
在@colinfang 评论了[MethodImpl(MethodImplOptions.NoInlining)]
在我的方法中添加属性之后,我做了更多的尝试,我只能得出结论,微优化很复杂。以下是一些观察:
- 正如@colinfang 所说,在方法中添加 NoInlining 确实会产生更像他所描述的结果。毫不奇怪,方法内联是系统可以优化非虚拟方法以比虚拟方法运行得更快的一种方式。但令人惊讶的是,不内联实际上会使虚拟方法比非虚拟方法花费更长的时间。
- 如果我用 编译
/optimize+
,非虚拟实例调用实际上比控件花费的时间少 20% 以上。
如果我消除 lambda 函数,并像这样直接传递方法组:
new TimedAction("non-virtual instance", foo.DoSomething),
new TimedAction("virtual instance", foo.DoSomethingVirtual),
new TimedAction("static", Foo.DoSomethingStatic),
...然后虚拟和非虚拟调用最终花费的时间大致相同,但静态方法调用花费的时间要长得多(超过 20%)。
所以,是的,奇怪的东西。关键是:当你深入到这个优化级别时,由于编译器、JIT 甚至硬件级别的任意数量的优化,都会出现意想不到的结果。我们看到的差异可能是由于 CPU 的 L2 缓存策略等无法控制的结果。这里是龙。