性能比较:numpy vs. Cython vs. C vs. Fortran vs. Boost.Python (pyublas)
| function | time, usec | ratio | type |
|------------------------+------------+-------+--------------|
| slow_xor | 2020 | 1.0 | numpy |
| xorf_int16 | 1570 | 1.3 | fortran |
| xorf_int32 | 1530 | 1.3 | fortran |
| xorf_int64 | 1420 | 1.4 | fortran |
| faster_slow_xor | 1360 | 1.5 | numpy |
| inline_xor | 1280 | 1.6 | C |
| cython_xor | 1290 | 1.6 | cython |
| xorcpp_inplace (int32) | 440 | 4.6 | pyublas |
| cython_xor_vectorised | 325 | 6.2 | cython |
| inline_xor_nocopy | 172 | 11.7 | C |
| xorcpp | 144 | 14.0 | boost.python |
| xorcpp_inplace | 122 | 16.6 | boost.python |
#+TBLFM: $3=@2$2/$2;%.1f
要重现结果,请下载http://gist.github.com/353005并键入make
(要安装依赖项,请键入:sudo apt-get install build-essential python-numpy python-scipy cython gfortran
、依赖项Boost.Python
,pyublas
不包括在内,因为它们需要手动干预才能工作)
在哪里:
并且xor_$type_sig()
是:
! xorf.f90.template
subroutine xor_$type_sig(a, b, n, out)
implicit none
integer, intent(in) :: n
$type, intent(in), dimension(n) :: a
$type, intent(in), dimension(n) :: b
$type, intent(out), dimension(n) :: out
integer i
forall(i=1:n) out(i) = ieor(a(i), b(i))
end subroutine xor_$type_sig
它在 Python 中使用如下:
import xorf # extension module generated from xorf.f90.template
import numpy as np
def xor_strings(a, b, type_sig='int64'):
assert len(a) == len(b)
a = np.frombuffer(a, dtype=np.dtype(type_sig))
b = np.frombuffer(b, dtype=np.dtype(type_sig))
return getattr(xorf, 'xor_'+type_sig)(a, b).tostring()
xorcpp_inplace()
(Boost.Python,pyublas):
xor.cpp:
#include <inttypes.h>
#include <algorithm>
#include <boost/lambda/lambda.hpp>
#include <boost/python.hpp>
#include <pyublas/numpy.hpp>
namespace {
namespace py = boost::python;
template<class InputIterator, class InputIterator2, class OutputIterator>
void
xor_(InputIterator first, InputIterator last,
InputIterator2 first2, OutputIterator result) {
// `result` migth `first` but not any of the input iterators
namespace ll = boost::lambda;
(void)std::transform(first, last, first2, result, ll::_1 ^ ll::_2);
}
template<class T>
py::str
xorcpp_str_inplace(const py::str& a, py::str& b) {
const size_t alignment = std::max(sizeof(T), 16ul);
const size_t n = py::len(b);
const char* ai = py::extract<const char*>(a);
char* bi = py::extract<char*>(b);
char* end = bi + n;
if (n < 2*alignment)
xor_(bi, end, ai, bi);
else {
assert(n >= 2*alignment);
// applying Marek's algorithm to align
const ptrdiff_t head = (alignment - ((size_t)bi % alignment))% alignment;
const ptrdiff_t tail = (size_t) end % alignment;
xor_(bi, bi + head, ai, bi);
xor_((const T*)(bi + head), (const T*)(end - tail),
(const T*)(ai + head),
(T*)(bi + head));
if (tail > 0) xor_(end - tail, end, ai + (n - tail), end - tail);
}
return b;
}
template<class Int>
pyublas::numpy_vector<Int>
xorcpp_pyublas_inplace(pyublas::numpy_vector<Int> a,
pyublas::numpy_vector<Int> b) {
xor_(b.begin(), b.end(), a.begin(), b.begin());
return b;
}
}
BOOST_PYTHON_MODULE(xorcpp)
{
py::def("xorcpp_inplace", xorcpp_str_inplace<int64_t>); // for strings
py::def("xorcpp_inplace", xorcpp_pyublas_inplace<int32_t>); // for numpy
}
它在 Python 中使用如下:
import os
import xorcpp
a = os.urandom(2**20)
b = os.urandom(2**20)
c = xorcpp.xorcpp_inplace(a, b) # it calls xorcpp_str_inplace()