更好的测量程序是使用clock_gettime(CLOCK_MONOTONIC, &now)
或clock_gettime(CLOCK_BOOTTIME, &now)
循环。每当连续迭代之间的差异大于某个限制(例如,0.025 毫秒或 25 微秒)时,您就会开始一个新的周期。该时期在下一个这样的较大差异之前的测量结束。
这是一个示例程序,可让您命名要使用的时钟、线程数和要测量的秒数:
#define _POSIX_C_SOURCE 200809L
#include <stdlib.h>
#include <inttypes.h>
#include <pthread.h>
#include <limits.h>
#include <string.h>
#include <strings.h>
#include <stdio.h>
#include <time.h>
#include <errno.h>
/* Maximum interval between iterations */
#ifndef NS_PER_ITERATION_MAX
#define NS_PER_ITERATION_MAX 25000 /* 25 us, or 1/40,000ths of a second */
#endif
/* Number of slots per second to reserve per thread. */
#ifndef SLOTS_PER_SEC
#define SLOTS_PER_SEC 1000
#endif
static inline int64_t difftimespec_ns(const struct timespec after, const struct timespec before)
{
return (int64_t)(after.tv_sec - before.tv_sec) * INT64_C(1000000000) + (int64_t)(after.tv_nsec - before.tv_nsec);
}
struct measurements {
struct measurements *next;
pthread_t thread;
clockid_t clk;
int64_t ns_runtime;
size_t ns_max;
size_t ns_num;
uint32_t ns[];
};
void *measure_slices(void *payload) {
struct measurements *const m = payload;
const clockid_t clk = m->clk;
const int64_t ns_runtime = m->ns_runtime;
const size_t ns_max = m->ns_max;
size_t ns_num = 0;
uint32_t *const ns_array = m->ns;
struct timespec started, mark, prev, curr;
int64_t ns;
if (clock_gettime(clk, &started) == -1)
return (void *)(intptr_t)errno;
if (clock_gettime(clk, &mark) == -1)
return (void *)(intptr_t)errno;
if (clock_gettime(clk, &prev) == -1)
return (void *)(intptr_t)errno;
while (1) {
if (clock_gettime(clk, &curr) == -1)
return (void *)(intptr_t)errno;
/* Ran for long enough? */
if (difftimespec_ns(curr, started) >= ns_runtime)
break;
/* Interval between this and previous iteration */
ns = difftimespec_ns(curr, prev);
if (ns <= NS_PER_ITERATION_MAX) {
/* Still within the same timeslice */
prev = curr;
continue;
}
/* We started a new timeslice. */
ns = difftimespec_ns(prev, mark);
if (ns > NS_PER_ITERATION_MAX && ns < INT64_C(4294967296)) {
/* Skip the very first measurement. */
if (ns_num > 0)
ns_array[ns_num - 1] = ns;
if (++ns_num >= ns_max)
break;
}
mark = prev = curr;
}
m->ns_num = ns_num - 1;
return 0;
}
static int parse_clock(const char *src, clockid_t *dst)
{
if (!src || !*src || !dst)
return -1;
/* Skip optional CLOCK_ prefix. */
if (!strncasecmp(src, "CLOCK_", 6))
src += 6;
if (!strcasecmp(src, "REALTIME")) {
*dst = CLOCK_REALTIME;
return 0;
} else
if (!strcasecmp(src, "REALTIME_COARSE")) {
*dst = CLOCK_REALTIME_COARSE;
return 0;
} else
if (!strcasecmp(src, "MONOTONIC")) {
*dst = CLOCK_MONOTONIC;
return 0;
} else
if (!strcasecmp(src, "MONOTONIC_COARSE")) {
*dst = CLOCK_MONOTONIC_COARSE;
return 0;
} else
if (!strcasecmp(src, "MONOTONIC_RAW")) {
*dst = CLOCK_MONOTONIC_RAW;
return 0;
} else
if (!strcasecmp(src, "BOOTTIME")) {
*dst = CLOCK_BOOTTIME;
return 0;
}
/* We deliberately omit CLOCK_PROCESS_CPUTIME_ID and CLOCK_THREAD_CPUTIME_ID,
because they reflect CPU time used, and our algorithm needs a wall clock. */
return -1;
}
static int parse_double(const char *src, double *dst)
{
const char *end;
double val;
if (!src || !*src || !dst)
return -1;
end = src;
errno = 0;
val = strtod(src, (char **)(&end));
if (errno || *end)
return -1;
*dst = val;
return 0;
}
static int parse_size(const char *src, size_t *dst)
{
const char *end;
unsigned long val;
if (!src || !*src || !dst)
return -1;
end = src;
errno = 0;
val = strtoul(src, (char **)(&end), 0);
if (errno || *end)
return -1;
if ((unsigned long)(size_t)(val) != val)
return -1;
*dst = val;
return 0;
}
static int compare_uint32_t(const void *ptr1, const void *ptr2)
{
const uint32_t val1 = *(const uint32_t *)ptr1;
const uint32_t val2 = *(const uint32_t *)ptr2;
return (val1 < val2) ? -1 :
(val1 > val2) ? +1 : 0;
}
int main(int argc, char *argv[])
{
int64_t ns;
double seconds;
size_t threads, n, i;
clockid_t clk;
struct measurements *list, *curr;
uint32_t *allns;
pthread_attr_t attrs;
int err;
if (argc != 4 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
const char *argv0 = (argc > 0 && argv && argv[0] && argv[0][0]) ? argv[0] : "(this)";
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv0);
fprintf(stderr, " %s CLOCK THREADS SECONDS\n", argv0);
fprintf(stderr, "\n");
fprintf(stderr, "This program measures the named clock,\n");
fprintf(stderr, " REALTIME\n");
fprintf(stderr, " MONOTONIC\n");
fprintf(stderr, " BOOTTIME\n");
fprintf(stderr, "in a tight loop, in order to determine how long such\n");
fprintf(stderr, "a tight loop can run uninterrupted (technically, with\n");
fprintf(stderr, "at most %.1f microseconds (%.3f ms) between iterations)\n",
NS_PER_ITERATION_MAX / 1000.0, NS_PER_ITERATION_MAX / 1000000.0);
fprintf(stderr, "collecting statistics of such durations.\n");
fprintf(stderr, "Output is the measured durations in nanoseconds.\n");
fprintf(stderr, "\n");
return (argc < 3) ? EXIT_SUCCESS : EXIT_FAILURE;
}
if (parse_clock(argv[1], &clk)) {
fprintf(stderr, "%s: Invalid clock name.\n", argv[1]);
return EXIT_FAILURE;
}
if (parse_size(argv[2], &threads) || threads < 1) {
fprintf(stderr, "%s: Invalid number of threads.\n", argv[2]);
return EXIT_FAILURE;
}
if (parse_double(argv[3], &seconds) || seconds <= 0.010) {
fprintf(stderr, "%s: Invalid number of seconds to run.\n", argv[3]);
return EXIT_FAILURE;
}
ns = (int64_t)(1000000000.0 * seconds);
if (ns <= 0) {
fprintf(stderr, "%s: Invalid number of seconds to run.\n", argv[3]);
return EXIT_FAILURE;
}
/* Number of duration slots per second per thread. */
n = (size_t)(SLOTS_PER_SEC * seconds);
/* Allocate a measurement structure per thread. */
list = NULL;
for (i = 0; i < threads; i++) {
curr = malloc(n * sizeof (uint32_t) + sizeof (struct measurements));
if (!curr) {
fprintf(stderr, "Out of memory.\n");
return EXIT_FAILURE;
}
/* Initialize. */
curr->clk = clk;
curr->ns_runtime = ns;
curr->ns_max = n;
curr->ns_num = 0;
/* Prepend to list of measurement structures. */
curr->next = list;
list = curr;
}
/* Measurement threads don't need much stack. */
pthread_attr_init(&attrs);
pthread_attr_setstacksize(&attrs, 2 * PTHREAD_STACK_MIN);
for (curr = list; curr != NULL; curr = curr->next) {
err = pthread_create(&(curr->thread), &attrs, measure_slices, curr);
if (err) {
fprintf(stderr, "Cannot create measurement threads: %s.\n", strerror(err));
exit(EXIT_FAILURE); /* This kills also the already running threads. */
}
}
pthread_attr_destroy(&attrs);
fprintf(stderr, "Measuring ... ");
n = 0;
for (curr = list; curr != NULL; curr = curr->next) {
void *retval = (void *)0;
err = pthread_join(curr->thread, &retval);
if (err) {
fprintf(stderr, "Lost a measurement thread: %s.\n", strerror(err));
exit(EXIT_FAILURE); /* Also kills still running threads. */
}
if (retval != (void *)0) {
fprintf(stderr, "Measurement thread failed: %s.\n", strerror((int)(intptr_t)retval));
exit(EXIT_FAILURE);
}
n += curr->ns_num;
}
fprintf(stderr, "Done; obtained %zu durations total.\n", n);
/* Collate results. */
allns = malloc(n * sizeof allns[0]);
if (!allns) {
fprintf(stderr, "Cannot collate results: %s.\n", strerror(ENOMEM));
return EXIT_FAILURE;
}
i = 0;
while (list) {
curr = list;
list = list->next;
curr->next = NULL;
if (i + curr->ns_num > n) {
fprintf(stderr, "Internal bug when collating data: buffer overrun.\n");
return EXIT_FAILURE;
}
memcpy(allns + i, curr->ns, curr->ns_num * sizeof (uint32_t));
i += curr->ns_num;
free(curr);
}
if (i != n) {
fprintf(stderr, "Internal bug when collating data: buffer underrun.\n");
return EXIT_FAILURE;
}
qsort(allns, n, sizeof (uint32_t), compare_uint32_t);
/* Output in increasing order. */
for (i = 0; i < n; i++)
printf("%lu\n", (unsigned long)allns[i]);
return EXIT_SUCCESS;
}
如果你把它保存为eg example.c
,你可以编译它来ex
使用gcc -Wall -Wextra -O2 example.c -pthread -o ex
。运行./ex --help
查看使用情况。
这是一个示例输出(较大时钟差异之间的持续时间,以纳秒为单位,按递增顺序)在./ex BOOTTIME 4 1
i5-7200U 上运行时,两个内核各有两个线程,因此有四个执行线程:
25346
30283
32421
32725
33492
35245
35840
36478
37728
37764
38316
39747
40843
41106
41460
42031
42086
42198
42234
42347
42352
42505
42535
42777
43297
43415
43491
43523
43537
43642
43766
43787
43966
43977
43983
43988
43988
43992
44020
44036
44054
44080
44093
44192
44255
44308
44465
44504
44576
44586
44618
44619
44651
44658
44712
44717
44773
44791
44794
44834
44857
45097
45509
45659
45874
45991
46003
46022
46024
46102
46106
46108
46121
46159
46176
46187
46216
46252
46279
46281
46644
47064
47188
47229
47276
47468
47619
47803
48882
48937
49229
49235
49291
49331
49364
49496
49574
49675
49717
49762
49782
49822
49835
49893
49907
49935
49975
49994
50341
50372
50398
51238
51273
51304
51424
51703
51773
51787
51837
51838
51912
55287
57287
59358
59934
60645
62583
65875
82664
91088
190836
229532
287851
288769
310045
329565
372843
423063
433414
434355
474231
669240
688068
774163
783945
798116
844045
878774
986042
996956
1099045
1116371
1273081
1277074
1345787
1362752
1473027
1533827
1661606
1661648
1681454
1681696
1930065
1952703
1967833
2001524
2032112
2044125
2049316
2235882
2332409
2602839
2602850
2603006
2817151
2898885
3062130
3921933
3927327
3965095
3970580
3978114
4069431
4097642
4238546
4513423
4579035
4641020
4803608
4911424
5032919
6012137
6039261
6270161
6300259
6303458
6310116
6557892
6737421
7660021
7671463
7732257
7732308
7732436
7732455
7939267
7971787
8031922
8050166
8197531
8534918
8677078
8728024
8808732
8814990
9235892
9449015
9741948
10012384
10806507
11361965
11556520
12587385
12953578
13038394
13858728
14278834
15041709
15810753
16062442
16737253
17377461
17377603
17377634
17377647
17377736
17377742
17377783
17377807
17378073
17378178
17378302
17378489
17378598
17378604
17378650
17379365
17379576
17545273
17545739
17545753
17546676
17876097
17877331
17877365
17877829
17878187
17878375
17878614
17878660
17879356
17879481
17879494
17879618
17879665
18044880
18044938
18045074
18045188
18045351
18045459
18045550
18046538
18859731
19947402
19951568
19960383
19961624
19962660
23957571
23963028
26022469
27927170
39508952
39778596
40246691
43238996
43275323
43973206
57182522
69860693
79694268
82876417
90479990
91137669
96605181
97441594
141011381
199029008
242310741
246159499
391260825
请注意,这些以纳秒为单位,或 1/1,000,000,000 秒;和 1 毫秒 = 1,000,000 纳秒。如您所见,没有特别典型的“时间片”长度;它们从非常短(可能是缓存效果,可能是内核硬件中断)到 0.39 秒不等。
https://pastebin.com/cvLfrWxw具有更大数据集的输出./ex BOOTTIME 8 10
。对于 8 个线程,60 Hz 刷新率不再出现(可能是因为调度程序随后也切换到不同的任务,因为正在运行的任务比硬件线程多得多)。
如您所见,没有典型的持续时间(线程可以在不被中断的情况下运行超过 25µs)。
这和我预想的一样,因为目前的 Linux 任务调度器(CFS Scheduler)没有传统意义上的时间片。