2

我似乎无法获得一个简单的程序(具有大量内存访问)来在 Linux 中实现一致的时序。我使用的是 2.6 内核,程序运行在具有实时优先级的双核处理器上。我试图通过将内存数组声明为易失性来禁用缓存效果。下面是结果和程序。异常值的一些可能来源是什么?

结果:

Number of trials: 100
Range: 0.021732s to 0.085596s
Average Time: 0.058094s
Standard Deviation: 0.006944s
Extreme Outliers (2 SDs away from mean): 7
Average Time, excluding extreme outliers: 0.059273s

程序:

#include <stdio.h>
#include <stdlib.h> 
#include <math.h>

#include <sched.h>
#include <sys/time.h>

#define NUM_POINTS 5000000
#define REPS 100

unsigned long long getTimestamp() {
  unsigned long long usecCount;
  struct timeval timeVal;
  gettimeofday(&timeVal, 0);
  usecCount = timeVal.tv_sec * (unsigned long long) 1000000;
  usecCount += timeVal.tv_usec;
  return (usecCount);
}

double convertTimestampToSecs(unsigned long long timestamp) {
  return (timestamp / (double) 1000000);
}

int main(int argc, char* argv[]) {
  unsigned long long start, stop;
  double times[REPS];
  double sum = 0;
  double scale, avg, newavg, median;
  double stddev = 0;
  double maxval = -1.0, minval = 1000000.0;
  int i, j, freq, count;
  int outliers = 0;
  struct sched_param sparam;

  sched_getparam(getpid(), &sparam);
  sparam.sched_priority = sched_get_priority_max(SCHED_FIFO);
  sched_setscheduler(getpid(), SCHED_FIFO, &sparam);

  volatile float* data;
  volatile float* results;

  data = calloc(NUM_POINTS, sizeof(float)); 
  results = calloc(NUM_POINTS, sizeof(float)); 

  for (i = 0; i < REPS; ++i) {
    start = getTimestamp();
    for (j = 0; j < NUM_POINTS; ++j) {
      results[j] = data[j];
    }
    stop = getTimestamp();
    times[i] = convertTimestampToSecs(stop-start);
  }

  free(data);
  free(results);

  for (i = 0; i < REPS; i++) {
    sum += times[i];

    if (times[i] > maxval)
      maxval = times[i];

    if (times[i] < minval)
      minval = times[i];
  }
  avg = sum/REPS;

  for (i = 0; i < REPS; i++)
    stddev += (times[i] - avg)*(times[i] - avg);
  stddev /= REPS;
  stddev = sqrt(stddev);

  for (i = 0; i < REPS; i++) {
    if (times[i] > avg + 2*stddev || times[i] < avg - 2*stddev) {
      sum -= times[i];
      outliers++;
    }
  }
  newavg = sum/(REPS-outliers);

  printf("Number of trials: %d\n", REPS);
  printf("Range: %fs to %fs\n", minval, maxval);
  printf("Average Time: %fs\n", avg);
  printf("Standard Deviation: %fs\n", stddev);
  printf("Extreme Outliers (2 SDs away from mean): %d\n", outliers);
  printf("Average Time, excluding extreme outliers: %fs\n", newavg);

  return 0;
}
4

1 回答 1

3

确保没有其他进程占用 CPU 时间。特别要注意屏幕保护程序和任何定期进行 GUI 更新的东西(例如时钟或类似物)。尝试为您的基准测试过程设置 CPU 亲和性,以将其锁定到一个核心上(例如taskset,从命令行)。如果不分页,请进行基准测试 - 通常您希望有一个运行 N 次的外部循环,然后为最后 N-1 次执行计时。

于 2010-04-05T19:10:54.660 回答