0

为了提供上下文,我正在完成Programming Praxis Bingo Challenge并想看看我能让这段代码运行多快。

static void fisher_yates(T& source) {   
    const size_t len = source.size();
    for(size_t i = 1; i < len;++i) {
        std::swap(source[i],source[rand() % (i+1)]);
    }
}
std::array<int,25> generate_table() {
    std::array<int,25> bingo_grid;
    for(int i = 0 ; i < 25;++i) {
        switch(i) {
        case 0: case 1: case 2: case 3: case 4:
            bingo_grid[i] = rand() % 15 + 1;
            break;
        case 5: case 6: case 7: case 8: case 9:
            bingo_grid[i] = rand() % 15 + 16;
            break;
        case 10: case 11: case 12: case 13: case 14:
            bingo_grid[i] = rand() % 15 + 31;
            break;
        case 15: case 16: case 17: case 18: case 19:
            bingo_grid[i] = rand() % 15 + 46;
            break;
        case 20: case 21: case 22: case 23: case 24:
            bingo_grid[i] = rand() % 15 + 61;
            break;
        }
    }
    bingo_grid[12] = 0;
    return bingo_grid;
}

bool is_bingoed(const std::array<int,25>& grid) { 
    // Check columns
    if(grid[0] == 0) {
        if(grid[1] == 0 && grid[2] == 0 && grid[3] == 0 && grid[4] == 0)
            return true;
        if(grid[0]  == 0 && grid[6]  == 0 && grid[18]  == 0 && grid[24]  == 0)
            return true;
        if(grid[5] == 0 && grid[10] == 0 && grid[15] == 0 && grid[20] == 0)
            return true;
    }
    if(grid[1] == 0) {
        if(grid[6]  == 0 && grid[11]  == 0 && grid[16]  == 0 && grid[21]  == 0)
            return true;
    }
    if(grid[2] == 0) {  
        if(grid[7] == 0 && grid[17]  == 0 && grid[22]  == 0)
            return true;
    }
    if(grid[3] == 0) {
        if(grid[8]  == 0 && grid[13]  == 0 && grid[18]  == 0 && grid[23]  == 0)
            return true;
    }
    if(grid[4] == 0) {
        if(grid[9]  == 0 && grid[14]  == 0 && grid[19]  == 0 && grid[24]  == 0)
            return true;
        if(grid[8]  == 0 && grid[16]  == 0 && grid[21]  == 0)
            return true;
    }
    if(grid[6] == 0) {
        if(grid[6]  == 0 && grid[7]  == 0 && grid[8]  == 0 && grid[9]  == 0)
            return true;
    }
    if(grid[12] == 0) {
        if(grid[10]  == 0 && grid[11]  == 0 && grid[13]  == 0 && grid[14] == 0)
            return true;
    }
    if(grid[18] == 0) {
        if(grid[15]  == 0 && grid[16]  == 0 && grid[17]  == 0 && grid[19]  == 0)
            return true;
    }   
    return false;
}

static bool mark_card(const int card,std::array<int,25>& bingo_grid) {
    for(auto &i : bingo_grid)
        if(card == i) {
            i = 0;
            return true;
        }
        return false;
}

int play_game() {
    // Bingo is 5 columns, each column(n) is random permutation of 1-15*n
    // Fisher-Yates to generate random permutations

    // Create 500 playing cards
    const int max = 500;
    std::vector<std::array<int,25>> bingo_cards;
    bingo_cards.reserve(max);
    for(int i = 0; i<max;++i) {
        bingo_cards.push_back(generate_table());
        //display_bingo(bingo_cards[i]);
    }
    // Random shuffle 75 cards
    auto iter = boost::counting_range(1,76);
    std::vector<int> cards(std::begin(iter),std::end(iter));
    fisher_yates(cards);
    bool is_finished = false;
    int counter = 0;
    for(auto card : cards) {
        for(auto& playing_card : bingo_cards) {
            if(mark_card(card,playing_card)) {
                //display_bingo(playing_card);
                if(is_bingoed(playing_card))
                    return counter;
            }
        }
        counter++;
    }
    return counter;
}
int bingo() {
    srand(time(NULL));
    int total = 0;
    for(int i = 0 ; i < 10000;i++) {
        total+=play_game();
    }
    boost::singleton_pool<boost::pool_allocator_tag, sizeof(int)>::release_memory();
    return total / 10000;
}

原始版本使用 boost::multi_array 来表示网格。分析后,我将其更改为 std::array ,这让我显着加快了速度。然后我从使用 fisher_yates shuffle 生成宾果卡转移到使用随机数生成器。最后我更改了 is_bingoed 测试函数以减少每次调用的检查次数,从而加快游戏结束检查。

这一切都有帮助。现在,如果我分析这段代码,generate_table 函数占用了 72% 的时间,mark_card() 是 18%,is_bingoed() 大约是 6%。我正在寻找提示,看看可以做些什么来提高两者的速度。

我对 is_bingoed() 的第一个想法是使用 SSE 内在函数与 0 进行比较(也许使用 XOR?),但我对 generate_table() 或 mark_car() 没有任何想法。这更像是一种自我挑战的乐趣,但想知道其他人是怎么想的?

当前时间是在 2Ghz Q6660 上需要 4.6 秒(原来是 35 秒)

4

2 回答 2

1

只需专注于最昂贵的功能,generate_table就可以简化这部分代码并使其不那么繁琐,这可能会有所帮助:

for(int i = 0 ; i < 25;++i) {
    switch(i) {
    case 0: case 1: case 2: case 3: case 4:
        bingo_grid[i] = rand() % 15 + 1;
        break;
    case 5: case 6: case 7: case 8: case 9:
        bingo_grid[i] = rand() % 15 + 16;
        break;
    case 10: case 11: case 12: case 13: case 14:
        bingo_grid[i] = rand() % 15 + 31;
        break;
    case 15: case 16: case 17: case 18: case 19:
        bingo_grid[i] = rand() % 15 + 46;
        break;
    case 20: case 21: case 22: case 23: case 24:
        bingo_grid[i] = rand() % 15 + 61;
        break;
    }
}

例如

for(int i = 0 ; i < 25;++i) {
    int r = rand() % 15 + 1;
    bingo_grid[i] = r + (i / 5) * 15;
}

除此之外,我会看一个更快的 rand() 并看看你是否可以摆脱除法和模数。

另外,您的算法可能存在缺陷,因为没有什么可以防止 bingo_grid 中出现重复数字。

于 2013-06-26T08:02:35.357 回答
0

将 is_bingoed() 方法更改为使用 SSE 指令(使用Agner Fog 的库)和 Paul R 的 generate_table() 将时间减少到仅 1.05 秒。使用Intel 的 fast_rand()函数可以将它降低到 0.38 秒。所以我想我会为可能感兴趣的其他人粘贴代码更改。

static unsigned int g_seed;


//Used to seed the generator.
inline void fast_srand( int seed )
{
    g_seed = seed;
}


//fastrand routine returns one integer, similar output value range as C lib.
inline int fastrand()
{
    g_seed = (214013*g_seed+2531011);
    return (g_seed>>16)&0x7FFF;

}
bool is_bingoed(const std::array<int,25>& grid) { 
    // Check columns
    Vec8i vec(grid[0],grid[1],grid[2],grid[3],grid[4],0,0,0);
    Vec4i vec2(grid[6],grid[18],grid[24],0);
    Vec4i vec3(grid[5],grid[10],grid[15],20);
    Vec8i vec4(grid[1],grid[6],grid[11],grid[16],grid[21],0,0,0);
    Vec4i vec5(grid[2],grid[7],grid[17],grid[22]);
    Vec8i vec6(grid[3],grid[8],grid[13],grid[18],grid[23],0,0,0);
    Vec8i vec7(grid[4],grid[9],grid[14],grid[19],grid[24],0,0,0);
    Vec4i vec8(grid[8],grid[16],grid[21],grid[4]);
    Vec4i vec9(grid[6],grid[7],grid[8],grid[9]);
    Vec8i vec10(grid[12],grid[10],grid[11],grid[13],grid[14],0,0,0);
    Vec8i vec11(grid[18],grid[15],grid[16],grid[17],grid[19],0,0,0);
    if(horizontal_and(vec) && horizontal_and(vec2) && horizontal_and(vec3) && horizontal_and(vec4) &&
        horizontal_and(vec5) && horizontal_and(vec6) && horizontal_and(vec7) && horizontal_and(vec8)) {
            return false;
    }
    if(horizontal_and(vec9) && horizontal_and(vec10) && horizontal_and(vec11)) {
            return false;
    }
    return true;
}
于 2013-07-11T22:13:58.943 回答