1

我正在编写一个程序来测量矩阵乘法的性能rdtsc(),读取时间戳计数器。

但是,当我结合矩阵乘法程序和性能测量程序时,出现了错误。

如果您能告诉我如何更改我当前的调试代码,我将不胜感激。

环境:macOS Mojave 版本 10.14.5,终端 2.9.5

当我执行矩阵乘法程序本身时matrixmul.c,编译成功并且输出正确。但是,如果我添加性能测量,它无法编译。

这是matrixmul.c

#include<stdio.h>

#define N 3
int main(int argc, char *argv[])
{
    double A[N][N] = {
        {1.0, 3.0, 1.0},
        {3.0, 1.0, 3.0},
        {1.0, 1.0, 1.0}
    };

    double B[N][N] = {
        {6.0, 1.0, 0.0},
        {0.0, 1.0, 6.0},
        {6.0, 1.0, 1.0}
    };

    double C[N][N] = {
        {0.0, 0.0, 0.0},
        {0.0, 0.0, 0.0},
        {0.0, 0.0, 0.0}
    };
    int i, j, k;

    for(i=0; i<N; i++)
        for(j=0; j<N; j++)
            for(k=0; k<N; k++)
                C[i][j] += A[i][k]*B[k][j];

    for(i=0; i<N; i++)
        for(j=0; j<N; j++)
            printf("C[%d][%d] = %f\n", i, j, C[i][j]);
}

输出:

$ ./matrixmul
C[0][0] = 6.000000
C[0][1] = 5.000000
C[0][2] = 19.000000
C[1][0] = 18.000000
C[1][1] = 7.000000
C[1][2] = 9.000000
C[2][0] = 6.000000
C[2][1] = 3.000000
C[2][2] = 7.000000

这是measurement.c

#include <stdio.h>
#include "rdtsc.h"

#define N 3
int main(int argc, char *argv[])
{
    unsigned long long start = rdtsc();

    double A[N][N] = {
        {1.0, 3.0, 1.0},
        {3.0, 1.0, 3.0},
        {1.0, 1.0, 1.0}
    };

    double B[N][N] = {
        {6.0, 1.0, 0.0},
        {0.0, 1.0, 6.0},
        {6.0, 1.0, 1.0}
    };

    double C[N][N] = {
        {0.0, 0.0, 0.0},
        {0.0, 0.0, 0.0},
        {0.0, 0.0, 0.0}
    };
    int i, j, k;

    for(i=0; i<N; i++)
        for(j=0; j<N; j++)
            for(k=0; k<N; k++)
                C[i][j] += A[i][k]*B[k][j];

    for(i=0; i<N; i++)
        for(j=0; j<N; j++)
            printf("C[%d][%d] = %f\n", i, j, C[i][j]);
        
    unsigned long long stop = rdtsc();
    printf("measured time : %I64d [clock]\n", stop - start);

    return 0;
}

这是rdtsc.h(该程序位于同一文件夹中measurement.c):

#ifndef RDTSC_H_
#define RDTSC_H_

inline unsigned long long rdtsc() {
    unsigned long long ret;
    __asm__ volatile ("rdtsc" : "=A" (ret));
    return ret;
}

#endif /* RDTSC_H_ */

输出:

$ gcc -o measurement measurement.c
measurement.c:38:30: warning: length modifier 'I64' results in undefined
      behavior or no effect with 'd' conversion specifier [-Wformat]
    printf("measured time : %I64d [clock]\n", stop - start);
                            ~^~~~
1 warning generated.
Undefined symbols for architecture x86_64:
  "_rdtsc", referenced from:
      _main in measurement-510357.o
ld: symbol(s) not found for architecture x86_64
clang: error: linker command failed with exit code 1 (use -v to see invocation)

答案后试用

我已经修复了我的代码并再次执行它,但我仍然有一个错误。错误信息

$ gcc -o measurement measurement.c
measurement.c:38:53: error: use of undeclared identifier 'start'
    printf("measured time : %lld [clock]\n", stop - start);
                                                    ^
1 error generated.

测量.c

#include <stdio.h>
#include "rdtsc.h"

#define N 3
int main(int argc, char *argv[])
{
    //unsigned long long start = rdtsc();

    double A[N][N] = {
        {1.0, 3.0, 1.0},
        {3.0, 1.0, 3.0},
        {1.0, 1.0, 1.0}
    };

    double B[N][N] = {
        {6.0, 1.0, 0.0},
        {0.0, 1.0, 6.0},
        {6.0, 1.0, 1.0}
    };

    double C[N][N] = {
        {0.0, 0.0, 0.0},
        {0.0, 0.0, 0.0},
        {0.0, 0.0, 0.0}
    };
    int i, j, k;

    for(i=0; i<N; i++)
        for(j=0; j<N; j++)
            for(k=0; k<N; k++)
                C[i][j] += A[i][k]*B[k][j];

    for(i=0; i<N; i++)
        for(j=0; j<N; j++)
            printf("C[%d][%d] = %f\n", i, j, C[i][j]);
        
    unsigned long long stop = rdtsc();
    printf("measured time : %lld [clock]\n", stop - start);

    return 0;
}

rdtsc.h

#ifndef RDTSC_H_
#define RDTSC_H_

unsigned long long rdtsc() {
    unsigned long long ret;
    __asm__ volatile ("rdtsc" : "=A" (ret));
    return ret;
}

#endif /* RDTSC_H_ */
4

1 回答 1

2

真的没有必要制作rdtsc函数inline,它只会让编译器混淆是否导出函数。只需删除它或extern在它之前添加以明确告诉编译器导出函数。这将起作用:

#ifndef RDTSC_H_
#define RDTSC_H_

unsigned long long rdtsc() {
    unsigned long long ret;
    __asm__ volatile ("rdtsc" : "=A" (ret));
    return ret;
}

#endif /* RDTSC_H_ */

此外,clang如前所述,%I64d它是无效的格式修饰符。%lld改为使用long long int.

于 2019-07-21T01:12:57.447 回答