43

我正在尝试将 GPU 与 CPU 性能进行比较。对于 NVIDIA GPU,我一直在使用这些cudaEvent_t类型来获得非常精确的时序。

对于 CPU,我一直在使用以下代码:

// Timers
clock_t start, stop;
float elapsedTime = 0;

// Capture the start time

start = clock();

// Do something here
.......

// Capture the stop time
stop = clock();
// Retrieve time elapsed in milliseconds
elapsedTime = (float)(stop - start) / (float)CLOCKS_PER_SEC * 1000.0f;

显然,仅当您以秒为单位计算时,那段代码才有效。此外,结果有时会很奇怪。

有人知道在 Linux 中创建高分辨率计时器的方法吗?

4

7 回答 7

60

查看clock_gettime,这是高分辨率计时器的 POSIX 接口。

如果在阅读手册页后,您想知道 和 之间的区别CLOCK_REALTIMECLOCK_MONOTONIC请参阅CLOCK_REALTIME 和 CLOCK_MONOTONIC 之间的区别?

有关完整示例,请参见以下页面:http: //www.guyrutenberg.com/2007/09/22/profiling-code-using-clock_gettime/

#include <iostream>
#include <time.h>
using namespace std;

timespec diff(timespec start, timespec end);

int main()
{
    timespec time1, time2;
    int temp;
    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time1);
    for (int i = 0; i< 242000000; i++)
        temp+=temp;
    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time2);
    cout<<diff(time1,time2).tv_sec<<":"<<diff(time1,time2).tv_nsec<<endl;
    return 0;
}

timespec diff(timespec start, timespec end)
{
    timespec temp;
    if ((end.tv_nsec-start.tv_nsec)<0) {
        temp.tv_sec = end.tv_sec-start.tv_sec-1;
        temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
    } else {
        temp.tv_sec = end.tv_sec-start.tv_sec;
        temp.tv_nsec = end.tv_nsec-start.tv_nsec;
    }
    return temp;
}
于 2011-07-19T15:27:24.243 回答
20

总结到目前为止提供的信息,这是典型应用程序所需的两个功能。

#include <time.h>

// call this function to start a nanosecond-resolution timer
struct timespec timer_start(){
    struct timespec start_time;
    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start_time);
    return start_time;
}

// call this function to end a timer, returning nanoseconds elapsed as a long
long timer_end(struct timespec start_time){
    struct timespec end_time;
    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end_time);
    long diffInNanos = (end_time.tv_sec - start_time.tv_sec) * (long)1e9 + (end_time.tv_nsec - start_time.tv_nsec);
    return diffInNanos;
}

这是一个示例,说明如何在计算输入列表方差所需的时间时使用它们。

struct timespec vartime = timer_start();  // begin a timer called 'vartime'
double variance = var(input, MAXLEN);  // perform the task we want to time
long time_elapsed_nanos = timer_end(vartime);
printf("Variance = %f, Time taken (nanoseconds): %ld\n", variance, time_elapsed_nanos);
于 2013-11-11T03:07:55.453 回答
1
struct timespec t;
clock_gettime(CLOCK_REALTIME, &t);

还有 CLOCK_REALTIME_HR,但我不确定它是否有任何区别..

于 2011-07-19T15:28:40.343 回答
1

您对挂墙时间(实际经过多少时间)或周期数(多少个周期)感兴趣?在第一种情况下,您应该使用类似gettimeofday.

最高分辨率计时器使用RDTSCx86 汇编指令。但是,这会测量时钟滴答声,因此您应该确保禁用了省电模式。

TSC 的 wiki 页面提供了一些示例:http ://en.wikipedia.org/wiki/Time_Stamp_Counter

于 2011-07-19T22:41:16.170 回答
1

在阅读了这个线程之后,我开始针对 c++11 的 chrono 测试 clock_gettime 的代码,它们似乎不匹配。

他们之间有很大的差距!

std::chrono::seconds(1)似乎相当于~30,000clock_gettime

#include <ctime>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <thread>
#include <chrono>
#include <iomanip>
#include <vector>

timespec diff(timespec start, timespec end);
timespec get_cpu_now_time();
std::vector<timespec> get_start_end_pairs();
void output_deltas(const std::vector<timespec> &start_end_pairs);

//=============================================================
int main()
{
    std::cout << "Hello waiter" << std::endl; // flush is intentional
    std::vector<timespec> start_end_pairs = get_start_end_pairs();
    output_deltas(start_end_pairs);

    return EXIT_SUCCESS;
}

//=============================================================
std::vector<timespec> get_start_end_pairs()
{
    std::vector<timespec> start_end_pairs;
    for (int i = 0; i < 20; ++i)
    {
        start_end_pairs.push_back(get_cpu_now_time());
        std::this_thread::sleep_for(std::chrono::seconds(1));
        start_end_pairs.push_back(get_cpu_now_time());
    }

    return start_end_pairs;
}

//=============================================================
void output_deltas(const std::vector<timespec> &start_end_pairs)
{
    for (auto it_start = start_end_pairs.begin(); it_start != start_end_pairs.end(); it_start += 2)
    {
        auto it_end = it_start + 1;
        auto delta = diff(*it_start, *it_end);

        std::cout
            << "Waited ("
            << delta.tv_sec
            << "\ts\t"
            << std::setw(9)
            << std::setfill('0')
            << delta.tv_nsec
            << "\tns)"
            << std::endl;
    }
}

//=============================================================
timespec diff(timespec start, timespec end)
{
    timespec temp;
        temp.tv_sec = end.tv_sec-start.tv_sec;
        temp.tv_nsec = end.tv_nsec-start.tv_nsec;

        if (temp.tv_nsec < 0) {
        ++temp.tv_sec;
        temp.tv_nsec += 1000000000;
    }
    return temp;
}

//=============================================================
timespec get_cpu_now_time()
{
    timespec now_time;
    memset(&now_time, 0, sizeof(timespec));
    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &now_time);

    return now_time;
}

输出:

Waited (0   s   000064802   ns)
Waited (0   s   000028512   ns)
Waited (0   s   000030664   ns)
Waited (0   s   000041233   ns)
Waited (0   s   000013458   ns)
Waited (0   s   000024068   ns)
Waited (0   s   000027591   ns)
Waited (0   s   000028148   ns)
Waited (0   s   000033783   ns)
Waited (0   s   000022382   ns)
Waited (0   s   000027866   ns)
Waited (0   s   000028085   ns)
Waited (0   s   000028012   ns)
Waited (0   s   000028172   ns)
Waited (0   s   000022121   ns)
Waited (0   s   000052940   ns)
Waited (0   s   000032138   ns)
Waited (0   s   000028082   ns)
Waited (0   s   000034486   ns)
Waited (0   s   000018875   ns)
于 2019-01-08T17:43:25.807 回答
0

clock_gettime(2)

于 2011-07-19T15:27:30.160 回答
0

epoll 实现: https ://github.com/ielife/simple-timer-for-c-language

像这样使用:

timer_server_handle_t *timer_handle = timer_server_init(1024);
if (NULL == timer_handle) {
    fprintf(stderr, "timer_server_init failed\n");
    return -1;
}
ctimer timer1;
    timer1.count_ = 3;
    timer1.timer_internal_ = 0.5;
    timer1.timer_cb_ = timer_cb1;
    int *user_data1 = (int *)malloc(sizeof(int));
    *user_data1 = 100;
    timer1.user_data_ = user_data1;
    timer_server_addtimer(timer_handle, &timer1);

    ctimer timer2;
    timer2.count_ = -1;
    timer2.timer_internal_ = 0.5;
    timer2.timer_cb_ = timer_cb2;
    int *user_data2 = (int *)malloc(sizeof(int));
    *user_data2 = 10;
    timer2.user_data_ = user_data2;
    timer_server_addtimer(timer_handle, &timer2);

    sleep(10);

    timer_server_deltimer(timer_handle, timer1.fd);
    timer_server_deltimer(timer_handle, timer2.fd);
    timer_server_uninit(timer_handle);
于 2018-03-02T03:27:13.057 回答