我想使用模板进行优化,如此所述。但是,随着 bool 模板参数数量的增加,实例化模板可能有太多的分支。如果您使用更大的枚举而不是布尔值,它会变得更加复杂。

#include <iostream>
using namespace std;

template <bool b1, bool b2>
int HeavyLoop_impl(int arg)
    for (int i = 0; i < 10000000; i++)
        // b1 is known at compile-time, so this branch will be eliminated
        if (b1) { arg += 1; }
        else    { arg += 2; }

        // b2 is known at compile-time, so this branch will be eliminated
        if (b2) { arg += 10; }
        else    { arg += 20; }
    return arg;

// This function could be generated automatically
void HeavyLoop(bool b1, bool b2, int arg)
    int res;
    if (b1) {
        if (b2) { res = HeavyLoop_impl<true, true>(arg); }
        else    { res = HeavyLoop_impl<true, false>(arg); }
    } else {
        if (b2) { res = HeavyLoop_impl<false, true>(arg); }
        else    { res = HeavyLoop_impl<false, false>(arg); }
    cout << "res: "<<res<<endl;

int main(int argc, char**argv)
    bool b1 = true;
    bool b2 = false;
    int arg = 0;
    HeavyLoop(b1, b2, arg);
    return 0;


vars_to_template_function<bool, bool>(HeavyLoop_impl, b1, b2, arg);




6 回答 6



  • 支持enum类型
  • 明确指定应该转换多少个参数
  • 复杂部分的通用实现,每个使用它的函数都有一个小助手。


#include <iostream>
#include <utility>
#include <type_traits>

// an enum we would like to support
enum class tribool { FALSE, TRUE, FILE_NOT_FOUND };

// declare basic generic template
// (independent of a specific function you'd like to call)
template< template< class > class CB, std::size_t N, typename = std::tuple<> >
struct var_to_template;

// register types that should be supported
template< template< class > class CB, std::size_t N, typename... Cs >
struct var_to_template< CB, N, std::tuple< Cs... > >
    // bool is pretty simple, there are only two values
    template< typename R, typename... Args >
    static R impl( bool b, Args&&... args )
        return b
          ? var_to_template< CB, N-1, std::tuple< Cs..., std::true_type > >::template impl< R >( std::forward< Args >( args )... )
          : var_to_template< CB, N-1, std::tuple< Cs..., std::false_type > >::template impl< R >( std::forward< Args >( args )... );

    // for each enum, you need to register all its values
    template< typename R, typename... Args >
    static R impl( tribool tb, Args&&... args )
        switch( tb ) {
        case tribool::FALSE:
          return var_to_template< CB, N-1, std::tuple< Cs..., std::integral_constant< tribool, tribool::FALSE > > >::template impl< R >( std::forward< Args >( args )... );
        case tribool::TRUE:
          return var_to_template< CB, N-1, std::tuple< Cs..., std::integral_constant< tribool, tribool::TRUE > > >::template impl< R >( std::forward< Args >( args )... );
        case tribool::FILE_NOT_FOUND:
          return var_to_template< CB, N-1, std::tuple< Cs..., std::integral_constant< tribool, tribool::FILE_NOT_FOUND > > >::template impl< R >( std::forward< Args >( args )... );
        throw "unreachable";

    // in theory you could also add int, long, ... but
    // you'd have to switch on every possible value that you want to support!

// terminate the recursion
template< template< class > class CB, typename... Cs >
struct var_to_template< CB, 0, std::tuple< Cs... > >
    template< typename R, typename... Args >
    static R impl( Args&&... args )
        return CB< std::tuple< Cs... > >::template impl< R >( std::forward< Args >( args )... );

// here's your function with the template parameters
template< bool B, tribool TB >
int HeavyLoop_impl( int arg )
    for( int i = 0; i < 10000000; i++ ) {
        arg += B ? 1 : 2;
        arg += ( TB == tribool::TRUE ) ? 10 : ( TB == tribool::FALSE ) ? 20 : 30;
    return arg;

// a helper class, required once per function that you'd like to forward
template< typename > struct HeavyLoop_callback;
template< typename... Cs >
struct HeavyLoop_callback< std::tuple< Cs... > >
    template< typename R, typename... Args >
    static R impl( Args&&... args )
        return HeavyLoop_impl< Cs::value... >( std::forward< Args >( args )... );

// and here, everything comes together:
int HeavyLoop( bool b, tribool tb, int arg )
    // you provide the helper and the number of arguments
    // that should be converted to var_to_template<>
    // and you provide the return type to impl<>
    return var_to_template< HeavyLoop_callback, 2 >::impl< int >( b, tb, arg );

int main()
    bool b = true;
    tribool tb = tribool::FALSE;
    int arg = 0;
    int res = HeavyLoop( b, tb, arg );
    std::cout << "res: " << res << std::endl;
    return 0;


于 2013-10-07T10:23:18.533 回答


#include <iostream>
using namespace std;

template <bool b1, bool b2>
struct HeavyLoopImpl
    static int func(int arg)
        for (int i = 0; i < 10000000; i++) {
            arg += b1 ? 1 : 2;
            arg += b2 ? 10 : 20;
        return arg;

template <template<bool...> class Impl,bool...Bs>
struct GenericJump
    template<typename... Args>
    static int impl(Args&&... args)
        return Impl<Bs...>::func(std::forward<Args>(args)...);

    template<typename... Args>
    static int impl(bool b, Args&&... args)
        return b
            ? GenericJump<Impl,Bs...,true >::impl(std::forward<Args>(args)...)
            : GenericJump<Impl,Bs...,false>::impl(std::forward<Args>(args)...);

int HeavyLoop(bool b1, bool b2, int arg)
    return GenericJump<HeavyLoopImpl>::impl(b1,b2,arg);

int main()
    bool b1 = true;
    bool b2 = false;
    int arg = 0;
    int res = HeavyLoop(b1, b2, arg);
    cout << "res: "<<res<<endl;
    return 0;

这基本上是 Daniels 的解决方案,但它允许您使用除实现之外HeavyLoop_impl()的功能。仅能够调用单个模板函数就违背了成为通用解决方案的目的。GenericJump模板类也可以调用其他函数。您只需将HeavyLoop_impl()模板函数更改为具有静态函数的模板类func()。它非常有效。它使用 gcc 4.7.3 编译并给出正确的输出。

于 2013-10-07T10:51:19.117 回答




于 2013-10-07T10:05:17.097 回答


  • 分支的数量,以及被分支的变量的类型。
  • 要执行的操作及其参数的数量和类型。


假设分支总数小于 2^64,可以使用 switch 语句进行调度。以下解决方案演示了它是如何工作的:

template<unsigned permutation>
struct Permutation
    static_assert(permutation < 4, "permutation must be in the range [0, 4)");
    static const bool b1 = permutation & (1 << 0);
    static const bool b2 = permutation & (1 << 1);

unsigned makePermutation(bool b1, bool b2)
    return (b1 << 0) | (b2 << 1);

template<unsigned p>
int HeavyLoop_impl(int arg)
    return HeavyLoop_impl<Permutation<p>::b1, Permutation<p>::b2>(arg);

int HeavyLoop_impl(unsigned permutation, int arg)
    case 0: return HeavyLoop_impl<0>(arg);
    case 1: return HeavyLoop_impl<1>(arg);
    case 2: return HeavyLoop_impl<2>(arg);
    case 3: return HeavyLoop_impl<3>(arg);

[注意:使用 Boost.Preprocessor 生成上述 switch 语句将是微不足道的。]

void HeavyLoop(bool b1, bool b2, int arg)
    int res = HeavyLoop_impl(makePermutation(b1, b2), arg);
    cout << "res: "<<res<<endl;
于 2013-10-07T14:30:54.520 回答




我有一个类似的例子。就我而言,我可以在一组值之间应用许多不同的操作。数组大小相等。但是,我也有一个结构,可以将数组的子范围映射到影响我的操作的权重值。因此,例如,我可能正在使用 100 个值的数组,并且具有这样的权重范围:

[0,25] rangeWeight = 0
[26,35] rangeWeight = 0.25
[36,50] rangeWeight = 0.5
[51,99] rangeWeight = 1.0


for each subrange:
    alias to the dst buffer
    alias to the src buffer
    determine the number of elements in the range
    if there's any
        weight = weightPassedIn * rangeWeight;

        Op(dst, src, weight, numElements);

对我来说,有几个优化涉及是否触摸目的地(如果它仍然处于清除值,可以做出一些假设来简化每个操作的数学运算),如果重量恰好是满的,1.0 , 还有其他捷径。


if (weight == 1.0f)
    if ( arrayIsCleared )
        Blend<BlendOpSet, true, false>(otherBuff, subRangesMask, 1.0f);
        Blend<BlendOpAccumulate, true, false>(otherBuff, subRangesMask, 1.0f);
    if ( arrayIsCleared )
        Blend<BlendOpSet, false, false>(otherBuff, subRangesMask, weight);
        Blend<BlendOpAccumulate, false, false>(otherBuff, subRangesMask, weight);
于 2014-02-08T05:54:16.790 回答

这是另一个带有 boost::hana 的解决方案,它也可以处理枚举:

#include <cstdio>
#include <type_traits>
#include <boost/hana.hpp>

namespace hana = boost::hana;

template <typename F, typename TArgs, typename TIn, typename TOut>
void fun_arg_combinations_impl(F&& f, TArgs targs, TIn tin, TOut tout) {
    if constexpr (hana::is_empty(tin)) {
        hana::unpack(tout, f);
    } else {
        hana::for_each(hana::front(tin), [&](auto v){
            if (v == hana::front(targs)) {
                fun_arg_combinations_impl(f, hana::drop_front(targs), hana::drop_front(tin), hana::append(tout, v));

template <typename F, typename TArgs, typename TIn>
void fun_arg_combinations(F&& f, TArgs targs, TIn tin) {
    fun_arg_combinations_impl(f, targs, tin, hana::tuple<>());

enum Shape {LINE, CIRCLE, SQUARE};

int main()
    auto f_heavy_loop = [](auto b1t, auto b2t, auto st) {
        constexpr bool b1 = decltype(b1t)::value;
        constexpr bool b2 = decltype(b2t)::value;
        constexpr Shape s = decltype(st )::value;

        printf("out:%d %d %d\n", b1, b2, s);

    //constexpr auto bools = hana::make_tuple(std::true_type{}, std::false_type{});
    constexpr auto bools = hana::tuple<std::true_type, std::false_type>{};
    constexpr auto shapes = hana::tuple<
        std::integral_constant<Shape, LINE>,
        std::integral_constant<Shape, CIRCLE>,
        std::integral_constant<Shape, SQUARE>>{};

    // Using volatile to not allow the compiler to optimize for hard-coded values
    volatile bool b1 = true;
    volatile bool b2 = false;
    volatile Shape s = SQUARE;
        hana::make_tuple(b1   , b2   , s     ),
        hana::make_tuple(bools, bools, shapes));

b1b2并且sf_heavy_loop()lambda 内部都是constexpr,所以我们可以if constexpr在它们上使用。


out:1 0 2

在此处查看生成的程序集:https ://godbolt.org/z/nsF2l5

于 2019-10-04T12:38:12.403 回答