memcpy 将 CPU 使用率增加到 100%,以便从缓冲区中复制每 10000 个元素。有什么方法可以优化 memcpy 以减少 CPU 使用率?
问问题
683 次
1 回答
3
(自此答案以来,该问题已被完全重写)。
您的代码可以更改为在 Linux 上运行,如下所示:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
const size_t NUM_ELEMENTS = 2*1024 * 1024;
const size_t ITERATIONS = 10000;
int main(int argc, char *argv[])
{
struct timespec start, stop;
unsigned short * src = (unsigned short *) malloc(sizeof(unsigned short) * NUM_ELEMENTS);
unsigned short * dest = (unsigned short *) malloc(sizeof(unsigned short) * NUM_ELEMENTS);
for(int ctr = 0; ctr < NUM_ELEMENTS; ctr++)
{
src[ctr] = rand();
}
clock_gettime(CLOCK_MONOTONIC, &start);
for(int iter = 0; iter < ITERATIONS; iter++){
memcpy(dest, src, NUM_ELEMENTS * sizeof(unsigned short));
}
clock_gettime(CLOCK_MONOTONIC, &stop);
double duration_d = (double)(stop.tv_sec - start.tv_sec) + (stop.tv_nsec - start.tv_nsec) / 1000000000.0;
double bytes_sec = (ITERATIONS * (NUM_ELEMENTS/1024/1024) * sizeof(unsigned short)) / duration_d;
printf("Duration: %.5lfs for %d iterations, %.3lfMB/sec\n", duration_d, ITERATIONS, bytes_sec);
free(src);
free(dest);
return 0;
}
您可能需要链接-lrt
以获取该clock_gettime()
功能。
于 2013-06-19T04:45:20.467 回答