为了提供上下文,我正在完成Programming Praxis Bingo Challenge并想看看我能让这段代码运行多快。
static void fisher_yates(T& source) {
const size_t len = source.size();
for(size_t i = 1; i < len;++i) {
std::swap(source[i],source[rand() % (i+1)]);
}
}
std::array<int,25> generate_table() {
std::array<int,25> bingo_grid;
for(int i = 0 ; i < 25;++i) {
switch(i) {
case 0: case 1: case 2: case 3: case 4:
bingo_grid[i] = rand() % 15 + 1;
break;
case 5: case 6: case 7: case 8: case 9:
bingo_grid[i] = rand() % 15 + 16;
break;
case 10: case 11: case 12: case 13: case 14:
bingo_grid[i] = rand() % 15 + 31;
break;
case 15: case 16: case 17: case 18: case 19:
bingo_grid[i] = rand() % 15 + 46;
break;
case 20: case 21: case 22: case 23: case 24:
bingo_grid[i] = rand() % 15 + 61;
break;
}
}
bingo_grid[12] = 0;
return bingo_grid;
}
bool is_bingoed(const std::array<int,25>& grid) {
// Check columns
if(grid[0] == 0) {
if(grid[1] == 0 && grid[2] == 0 && grid[3] == 0 && grid[4] == 0)
return true;
if(grid[0] == 0 && grid[6] == 0 && grid[18] == 0 && grid[24] == 0)
return true;
if(grid[5] == 0 && grid[10] == 0 && grid[15] == 0 && grid[20] == 0)
return true;
}
if(grid[1] == 0) {
if(grid[6] == 0 && grid[11] == 0 && grid[16] == 0 && grid[21] == 0)
return true;
}
if(grid[2] == 0) {
if(grid[7] == 0 && grid[17] == 0 && grid[22] == 0)
return true;
}
if(grid[3] == 0) {
if(grid[8] == 0 && grid[13] == 0 && grid[18] == 0 && grid[23] == 0)
return true;
}
if(grid[4] == 0) {
if(grid[9] == 0 && grid[14] == 0 && grid[19] == 0 && grid[24] == 0)
return true;
if(grid[8] == 0 && grid[16] == 0 && grid[21] == 0)
return true;
}
if(grid[6] == 0) {
if(grid[6] == 0 && grid[7] == 0 && grid[8] == 0 && grid[9] == 0)
return true;
}
if(grid[12] == 0) {
if(grid[10] == 0 && grid[11] == 0 && grid[13] == 0 && grid[14] == 0)
return true;
}
if(grid[18] == 0) {
if(grid[15] == 0 && grid[16] == 0 && grid[17] == 0 && grid[19] == 0)
return true;
}
return false;
}
static bool mark_card(const int card,std::array<int,25>& bingo_grid) {
for(auto &i : bingo_grid)
if(card == i) {
i = 0;
return true;
}
return false;
}
int play_game() {
// Bingo is 5 columns, each column(n) is random permutation of 1-15*n
// Fisher-Yates to generate random permutations
// Create 500 playing cards
const int max = 500;
std::vector<std::array<int,25>> bingo_cards;
bingo_cards.reserve(max);
for(int i = 0; i<max;++i) {
bingo_cards.push_back(generate_table());
//display_bingo(bingo_cards[i]);
}
// Random shuffle 75 cards
auto iter = boost::counting_range(1,76);
std::vector<int> cards(std::begin(iter),std::end(iter));
fisher_yates(cards);
bool is_finished = false;
int counter = 0;
for(auto card : cards) {
for(auto& playing_card : bingo_cards) {
if(mark_card(card,playing_card)) {
//display_bingo(playing_card);
if(is_bingoed(playing_card))
return counter;
}
}
counter++;
}
return counter;
}
int bingo() {
srand(time(NULL));
int total = 0;
for(int i = 0 ; i < 10000;i++) {
total+=play_game();
}
boost::singleton_pool<boost::pool_allocator_tag, sizeof(int)>::release_memory();
return total / 10000;
}
原始版本使用 boost::multi_array 来表示网格。分析后,我将其更改为 std::array ,这让我显着加快了速度。然后我从使用 fisher_yates shuffle 生成宾果卡转移到使用随机数生成器。最后我更改了 is_bingoed 测试函数以减少每次调用的检查次数,从而加快游戏结束检查。
这一切都有帮助。现在,如果我分析这段代码,generate_table 函数占用了 72% 的时间,mark_card() 是 18%,is_bingoed() 大约是 6%。我正在寻找提示,看看可以做些什么来提高两者的速度。
我对 is_bingoed() 的第一个想法是使用 SSE 内在函数与 0 进行比较(也许使用 XOR?),但我对 generate_table() 或 mark_car() 没有任何想法。这更像是一种自我挑战的乐趣,但想知道其他人是怎么想的?
当前时间是在 2Ghz Q6660 上需要 4.6 秒(原来是 35 秒)