我已经很频繁地想知道自己,所以我开始编写一些非常小的基准测试,尝试通过循环原子计数器来模拟每个函数指针回调版本的性能。
请记住,这些只是对只做一件事的函数的简单调用,即原子地递增其计数器;
通过检查生成的汇编程序输出,您可能会发现,一个裸 C 函数指针循环被编译为 3 个 CPU 指令;
C++11 的std::function
调用只增加了 2 条 CPU 指令,因此在我们的示例中为 5 条。作为结论:无论您使用哪种函数指针技术方式,开销差异在任何情况下都非常小。
((但令人困惑的是,分配的 lambda 表达式似乎比其他表达式运行得更快,甚至比 C-one 还要快。))
编译示例:clang++ -o tests/perftest-fncb tests/perftest-fncb.cpp -std=c++11 -pthread -lpthread -lrt -O3 -march=native -mtune=native
#include <functional>
#include <pthread.h>
#include <stdio.h>
#include <unistd.h>
typedef unsigned long long counter_t;
struct Counter {
volatile counter_t bare;
volatile counter_t cxx;
volatile counter_t cxo1;
volatile counter_t virt;
volatile counter_t lambda;
Counter() : bare(0), cxx(0), cxo1(0), virt(0), lambda(0) {}
} counter;
void bare(Counter* counter) { __sync_fetch_and_add(&counter->bare, 1); }
void cxx(Counter* counter) { __sync_fetch_and_add(&counter->cxx, 1); }
struct CXO1 {
void cxo1(Counter* counter) { __sync_fetch_and_add(&counter->cxo1, 1); }
virtual void virt(Counter* counter) { __sync_fetch_and_add(&counter->virt, 1); }
} cxo1;
void (*bare_cb)(Counter*) = nullptr;
std::function<void(Counter*)> cxx_cb;
std::function<void(Counter*)> cxo1_cb;
std::function<void(Counter*)> virt_cb;
std::function<void(Counter*)> lambda_cb;
void* bare_main(void* p) { while (true) { bare_cb(&counter); } }
void* cxx_main(void* p) { while (true) { cxx_cb(&counter); } }
void* cxo1_main(void* p) { while (true) { cxo1_cb(&counter); } }
void* virt_main(void* p) { while (true) { virt_cb(&counter); } }
void* lambda_main(void* p) { while (true) { lambda_cb(&counter); } }
int main()
{
pthread_t bare_thread;
pthread_t cxx_thread;
pthread_t cxo1_thread;
pthread_t virt_thread;
pthread_t lambda_thread;
bare_cb = &bare;
cxx_cb = std::bind(&cxx, std::placeholders::_1);
cxo1_cb = std::bind(&CXO1::cxo1, &cxo1, std::placeholders::_1);
virt_cb = std::bind(&CXO1::virt, &cxo1, std::placeholders::_1);
lambda_cb = [](Counter* counter) { __sync_fetch_and_add(&counter->lambda, 1); };
pthread_create(&bare_thread, nullptr, &bare_main, nullptr);
pthread_create(&cxx_thread, nullptr, &cxx_main, nullptr);
pthread_create(&cxo1_thread, nullptr, &cxo1_main, nullptr);
pthread_create(&virt_thread, nullptr, &virt_main, nullptr);
pthread_create(&lambda_thread, nullptr, &lambda_main, nullptr);
for (unsigned long long n = 1; true; ++n) {
sleep(1);
Counter c = counter;
printf(
"%15llu bare function pointer\n"
"%15llu C++11 function object to bare function\n"
"%15llu C++11 function object to object method\n"
"%15llu C++11 function object to object method (virtual)\n"
"%15llu C++11 function object to lambda expression %30llu-th second.\n\n",
c.bare, c.cxx, c.cxo1, c.virt, c.lambda, n
);
}
}