我在我的一个应用程序中使用了 C 和 Haskell 线程的混合,并没有注意到在两者之间切换对性能有太大影响。所以我制作了一个简单的基准……它比 Don 的更快/更便宜。这是在 2.66GHz i7 上测量 1000 万次迭代:
$ ./foo
IO : 2381952795 nanoseconds total, 238.195279 nanoseconds per, 160000000 value
Pure: 2188546976 nanoseconds total, 218.854698 nanoseconds per, 160000000 value
在 OSX 10.6 上使用 GHC 7.0.3/x86_64 和 gcc-4.2.1 编译
ghc -no-hs-main -lstdc++ -O2 -optc-O2 -o foo ForeignExportCost.hs Driver.cpp
哈斯克尔:
{-# LANGUAGE ForeignFunctionInterface #-}
module ForeignExportCost where
import Foreign.C.Types
foreign export ccall simpleFunction :: CInt -> CInt
simpleFunction i = i * i
foreign export ccall simpleFunctionIO :: CInt -> IO CInt
simpleFunctionIO i = return (i * i)
驱动它的 OSX C++ 应用程序应该很容易适应 Windows 或 Linux:
#include <stdio.h>
#include <mach/mach_time.h>
#include <mach/kern_return.h>
#include <HsFFI.h>
#include "ForeignExportCost_stub.h"
static const int s_loop = 10000000;
int main(int argc, char** argv) {
hs_init(&argc, &argv);
struct mach_timebase_info timebase_info = { };
kern_return_t err;
err = mach_timebase_info(&timebase_info);
if (err != KERN_SUCCESS) {
fprintf(stderr, "error: %x\n", err);
return err;
}
// timing a function in IO
uint64_t start = mach_absolute_time();
HsInt32 val = 0;
for (int i = 0; i < s_loop; ++i) {
val += simpleFunctionIO(4);
}
// in nanoseconds per http://developer.apple.com/library/mac/#qa/qa1398/_index.html
uint64_t duration = (mach_absolute_time() - start) * timebase_info.numer / timebase_info.denom;
double duration_per = static_cast<double>(duration) / s_loop;
printf("IO : %lld nanoseconds total, %f nanoseconds per, %d value\n", duration, duration_per, val);
// run the loop again with a pure function
start = mach_absolute_time();
val = 0;
for (int i = 0; i < s_loop; ++i) {
val += simpleFunction(4);
}
duration = (mach_absolute_time() - start) * timebase_info.numer / timebase_info.denom;
duration_per = static_cast<double>(duration) / s_loop;
printf("Pure: %lld nanoseconds total, %f nanoseconds per, %d value\n", duration, duration_per, val);
hs_exit();
}