我在 Ubuntu 12.04 上运行该程序,计数器的值为 awalys 0。看来我的计数器对我不起作用。
当我运行程序时,输出是:
Get the number of CPU:8
Create counters for each CPU finished!
run_perf_stat()!
Cache miss in cpu1:0
Cache miss in cpu2:0
Cache miss in cpu3:0
Cache miss in cpu4:0
Cache miss in cpu5:0
Cache miss in cpu6:0
Cache miss in cpu7:0
Cache miss in cpu8:0
我无法理解这些值的含义。我想要得到的是每个 cpu 的缓存引用,它是缓存未命中。但显然结果似乎不正确!
我已经阅读了有关 perf 方法的文档:http: //www.man7.org/linux/man-pages/man2/perf_event_open.2.html
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <asm-generic/unistd.h>
#include <dirent.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
#include "perf_event.h"
#define MAX_COUNTERS 256
static int fd[MAX_COUNTERS];
static unsigned int counter;
static unsigned int nr_cpus = 0; // amount of cpus
//open counter
long sys_perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
int cpu, int group_fd, unsigned long flags)
{
int ret;
ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
group_fd, flags);
return ret;
}
void run_perf_stat()
{
long long eventContents=0;
for (counter = 0; counter < nr_cpus; counter++){
if(fd[counter]!=-1){
read(fd[counter],&eventContents,sizeof(long long));
printf("Cache miss in cpu%d:%lld\n",counter+1,eventContents);
}
else{
fprintf(stderr, "Fail to read counter %d\n", counter);
}
}
}
int main(int argc, const char **argv){
DIR *dir; //access to dir
struct dirent *drp;
int run_count, p, pid;
struct timespec tim, tim2;
tim.tv_sec = 1; tim.tv_nsec = 0;
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);// the number of CPU
printf("Get the number of CPU:%d\n",nr_cpus);
//create counters for each CPU (system-wide)
struct perf_event_attr attr; //cache miss
memset(&attr, 0, sizeof(struct perf_event_attr));
attr.type = PERF_TYPE_HARDWARE;
attr.config = PERF_COUNT_HW_CACHE_MISSES;
attr.size = sizeof(struct perf_event_attr);
attr.disabled = 0;
attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
PERF_FORMAT_TOTAL_TIME_RUNNING;
unsigned int cpu = 0;
for(cpu = 0; cpu < nr_cpus; cpu++)
fd[cpu] = sys_perf_event_open(&attr, -1, cpu, -1, 0);
printf("Create counters for each CPU finished!\n");
//get perf report
while (1) {
nanosleep(&tim , &tim2);
printf("run_perf_stat()!\n");
run_perf_stat();
}
return 1;
}
我还运行http://www.man7.org/linux/man-pages/man2/perf_event_open.2.html给出的示例
但是,不幸的是,结果也总是 0。这是手册页中的示例。
下面是一个简短的示例,用于测量调用 printf(3) 的总指令数。
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
long
perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
int cpu, int group_fd, unsigned long flags)
{
int ret;
ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
group_fd, flags);
return ret;
}
int
main(int argc, char **argv)
{
struct perf_event_attr pe;
long long count;
int fd;
memset(&pe, 0, sizeof(struct perf_event_attr));
pe.type = PERF_TYPE_HARDWARE;
pe.size = sizeof(struct perf_event_attr);
pe.config = PERF_COUNT_HW_INSTRUCTIONS;
pe.disabled = 1;
pe.exclude_kernel = 1;
pe.exclude_hv = 1;
fd = perf_event_open(&pe, 0, -1, -1, 0);
if (fd == -1) {
fprintf(stderr, "Error opening leader %llx\n", pe.config);
exit(EXIT_FAILURE);
}
ioctl(fd, PERF_EVENT_IOC_RESET, 0);
ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
printf("Measuring instruction count for this printf\n");
ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
read(fd, &count, sizeof(long long));
printf("Used %lld instructions\n", count);
close(fd);
}