I solved the problem by changing the function clock() that I used for measuring latencies. The clock() function was measuring the CPU time latency, but what I want to do is to measure real time latency. Now I am using std::chrono to measure the latencies. Now inference results are latency-deterministic.
That was wrong one, (clock())
int main ()
{
clock_t t;
int f;
t = clock();
inferenceEngine(); // Tahmin yapılıyor
t = clock() - t;
printf ("It took me %d clicks (%f seconds).\n",t,((float)t)/CLOCKS_PER_SEC);
return 0;
}
Use Cuda Events like this, (CudaEvent)
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start);
inferenceEngine(); // Do the inference
cudaEventRecord(stop);
cudaEventSynchronize(stop);
float milliseconds = 0;
cudaEventElapsedTime(&milliseconds, start, stop);
Use chrono like this: (std::chrono)
#include <iostream>
#include <chrono>
#include <ctime>
int main()
{
auto start = std::chrono::system_clock::now();
inferenceEngine(); // Do the inference
auto end = std::chrono::system_clock::now();
std::chrono::duration<double> elapsed_seconds = end-start;
std::time_t end_time = std::chrono::system_clock::to_time_t(end);
std::cout << "finished computation at " << std::ctime(&end_time)
<< "elapsed time: " << elapsed_seconds.count() << "s\n";
}