我想用 C 或 C++ 清除我的缓存行。我的代码如下,我的 gcc 版本是 9.3.0。
#include <chrono>
#include <cmath>
#include <cstring>
#include <iostream>
#include <x86intrin.h>
void clear_cache(char *addr, int size) {
int round = (size-1) / 64 + 1;
for (int i = 0; i < round; i++) {
_mm_clflush(addr);
addr += 64;
}
}
float time_test_cache_hit(char * mem_block, size_t len) {
char tmp;
std::chrono::high_resolution_clock::time_point tmp1, tmp2;
tmp1 = std::chrono::high_resolution_clock::now();
for (int idx = 0; idx < len; idx ++) {
tmp = *(mem_block + idx);
}
tmp2 = std::chrono::high_resolution_clock::now();
return ((std::chrono::duration<float>)(tmp2 - tmp1)).count();
}
float time_test_cache_miss(char * mem_block, size_t len) {
char tmp;
clear_cache(mem_block, sizeof(char)*len);
std::chrono::high_resolution_clock::time_point tmp1, tmp2;
tmp1 = std::chrono::high_resolution_clock::now();
for (int idx = 0; idx < len; idx ++) {
tmp = *(mem_block + idx);
}
tmp2 = std::chrono::high_resolution_clock::now();
return ((std::chrono::duration<float>)(tmp2 - tmp1)).count();
}
uint64_t tsc_test_cache_hit(char * mem_block, size_t len) {
char tmp;
for (int i = 0; i < len; i ++) {
*(mem_block + i) = i;
}
uint64_t time1, time2;
time1 = __rdtsc();
for (int idx = 0; idx < len; idx ++) {
tmp = *(mem_block + idx);
}
time2 = __rdtsc();
return time2 - time1;
}
uint64_t tsc_test_cache_miss(char * mem_block, size_t len) {
char tmp;
clear_cache(mem_block, sizeof(char)*len);
uint64_t time1, time2;
time1 = __rdtsc();
for (int idx = 0; idx < len; idx ++) {
tmp = *(mem_block + idx);
}
time2 = __rdtsc();
return time2 - time1;
}
int main(int argc, char ** argv)
{
int len = 100;
char* mem_block = (char*) malloc(sizeof(char)*len);
for (int i = 0; i < len; i ++) {
*(mem_block + i) = i;
}
std::cout << "cache hit time: " << time_test_cache_hit(mem_block, len) << "\ncache miss time: " << time_test_cache_miss(mem_block, len) << std::endl;
std::cout << "cache hit tsc: " << tsc_test_cache_hit(mem_block, len) << "\ncache miss tsc: " << tsc_test_cache_miss(mem_block, len) << std::endl;
free(mem_block);
return 0;
}
我的缓存线是 64 字节。至于我的cpu缓存大小,是这样的:
L1d cache: 1.3 MiB
L1i cache: 1.3 MiB
L2 cache: 40 MiB
L3 cache: 55 MiB
我多次运行这段代码,结果似乎很奇怪,
cache hit time: 1.224e-06
cache miss time: 9.07e-07
cache hit tsc: 1672
cache miss tsc: 2114
如果我将clear_cache
功能更改如下:
void clear_cache(char *addr, int len) {
for (int idx = 0; idx < len; idx ++) {
_mm_clflush(mem_block + idx);
}
}
并将调用 clear_cache 的函数从clear_cache(mem_block, sizeof(char)*len)
改为clear_cache(mem_block, len)
,运行的程序似乎是有道理的。结果是这样的:
cache hit time: 1.195e-06
cache miss time: 3.411e-06
cache hit tsc: 1580
cache miss tsc: 8896
如上所示,为什么我之前写的代码不能产生正常的结果(缓存命中比缓存未命中需要更多时间),但两篇文章都表明缓存未命中比缓存命中花费更多的 tsc(时间戳计数),尽管它们的缓存未命中 tsc 也有很大不同。为什么代码会这样?或者如果我的编码有问题?非常感谢您的帮助。