4

我目前的尝试:

/**simplified from
 * https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
 **/

#include <stdio.h>
#include <stdint.h>

#if defined(_MSC_VER)
#   include <intrin.h>
#endif

void get_cpuid(uint32_t eax, uint32_t ecx, uint32_t *abcd){
    #if defined(_MSC_VER)
        __cpuidex(abcd,eax,ecx);
    #else
        uint32_t ebx,edx;
        #if defined( __i386__ ) && defined ( __PIC__ )
            /*in case of PIC, under 32-bit EBX cannot be clobbered*/
            __asm__( "movl %%ebx, %%edi \n\t xchgl %%ebx, %%edi" : "=D"(ebx),
        #else
            __asm__( "cpuid" : "+b"(ebx),
        #endif
            "+a"(eax), "+c"(ecx), "=d"(edx));

        abcd[0]=eax;abcd[1]=ebx;abcd[2]=ecx;abcd[3]=edx;
    #endif
}

int has_RTM_support(){
    uint32_t abcd[4];
    
    /*processor supports RTM execution if CPUID.07H.EBX.RTM [bit 11] = 1*/
    get_cpuid(0x7,0x0,abcd);
    return (abcd[1] & (1 << 11)) != 0;
}


int main(int argc, char **argv){
    
    if(has_RTM_support()){
        printf("This CPU supports RTM.");
    }else{
        printf("This CPU does NOT support RTM.");
    }
    return 0;
}

我有一个Intel® Core™ i7-7600U(下面是 cpuinfo),正如您从 ark 中看到的,它应该支持 TSX-NI。

不过,上面的检查将返回

此 CPU 不支持 RTM。

has_tsx来自tsx-tools的实现同意:

RTM:没有

HLE:没有

然而与此同时,我可以很好地执行这个片段......

#include <stdio.h>

int main()
{
    volatile int i = 0;
    while (i < 100000000) {
        __asm__ ("xbegin ABORT");
        i++;
        __asm__ ("xend");
        __asm__ ("ABORT:");
    }

    printf("%d\n", i);
    return 0;
}

我的理解是这些 asm 指令“在不支持 RTM 的处理器上使用时会产生 #UD 异常”,或者至少这是英特尔手册对此事所说的(第 387 页)。

我也检查了 asm 代码,这些说明仍然存在(有关 .s 文件的内容,请参见下文)。

那么既然这些指令似乎被执行了,那么这些检查是不是完全错误的呢?

如果是这样,您将如何正确测试 RTM 支持?

片段的 ASM 代码

    .file   "rtm_simple.c"
# GNU C11 (Ubuntu 6.3.0-12ubuntu2) version 6.3.0 20170406 (x86_64-linux-gnu)
#   compiled by GNU C version 6.3.0 20170406, GMP version 6.1.2, MPFR version 3.1.5, MPC version 1.0.3, isl version 0.15
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed:  -imultiarch x86_64-linux-gnu rtm_simple.c -mtune=generic
# -march=x86-64 -fverbose-asm -fstack-protector-strong -Wformat
# -Wformat-security
# options enabled:  -fPIC -fPIE -faggressive-loop-optimizations
# -fasynchronous-unwind-tables -fauto-inc-dec -fchkp-check-incomplete-type
# -fchkp-check-read -fchkp-check-write -fchkp-instrument-calls
# -fchkp-narrow-bounds -fchkp-optimize -fchkp-store-bounds
# -fchkp-use-static-bounds -fchkp-use-static-const-bounds
# -fchkp-use-wrappers -fcommon -fdelete-null-pointer-checks
# -fdwarf2-cfi-asm -fearly-inlining -feliminate-unused-debug-types
# -ffunction-cse -fgcse-lm -fgnu-runtime -fgnu-unique -fident
# -finline-atomics -fira-hoist-pressure -fira-share-save-slots
# -fira-share-spill-slots -fivopts -fkeep-static-consts
# -fleading-underscore -flifetime-dse -flto-odr-type-merging -fmath-errno
# -fmerge-debug-strings -fpeephole -fplt -fprefetch-loop-arrays
# -freg-struct-return -fsched-critical-path-heuristic
# -fsched-dep-count-heuristic -fsched-group-heuristic -fsched-interblock
# -fsched-last-insn-heuristic -fsched-rank-heuristic -fsched-spec
# -fsched-spec-insn-heuristic -fsched-stalled-insns-dep -fschedule-fusion
# -fsemantic-interposition -fshow-column -fsigned-zeros
# -fsplit-ivs-in-unroller -fssa-backprop -fstack-protector-strong
# -fstdarg-opt -fstrict-volatile-bitfields -fsync-libcalls -ftrapping-math
# -ftree-cselim -ftree-forwprop -ftree-loop-if-convert -ftree-loop-im
# -ftree-loop-ivcanon -ftree-loop-optimize -ftree-parallelize-loops=
# -ftree-phiprop -ftree-reassoc -ftree-scev-cprop -funit-at-a-time
# -funwind-tables -fverbose-asm -fzero-initialized-in-bss
# -m128bit-long-double -m64 -m80387 -malign-stringops
# -mavx256-split-unaligned-load -mavx256-split-unaligned-store
# -mfancy-math-387 -mfp-ret-in-387 -mfxsr -mglibc -mieee-fp
# -mlong-double-80 -mmmx -mno-sse4 -mpush-args -mred-zone -msse -msse2
# -mstv -mtls-direct-seg-refs -mvzeroupper

    .section    .rodata
.LC0:
    .string "%d\n"
    .text
    .globl  main
    .type   main, @function
main:
.LFB0:
    .cfi_startproc
    pushq   %rbp    #
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp  #,
    .cfi_def_cfa_register 6
    subq    $16, %rsp   #,
    movl    $0, -4(%rbp)    #, i
    jmp .L2 #
.L3:
#APP
# 7 "rtm_simple.c" 1
    xbegin ABORT
# 0 "" 2
#NO_APP
    movl    -4(%rbp), %eax  # i, i.0_5
    addl    $1, %eax    #, i.1_6
    movl    %eax, -4(%rbp)  # i.1_6, i
#APP
# 9 "rtm_simple.c" 1
    xend
# 0 "" 2
# 10 "rtm_simple.c" 1
    ABORT:
# 0 "" 2
#NO_APP
.L2:
    movl    -4(%rbp), %eax  # i, i.2_4
    cmpl    $99999999, %eax #, i.2_4
    jle .L3 #,
    movl    -4(%rbp), %eax  # i, i.3_8
    movl    %eax, %esi  # i.3_8,
    leaq    .LC0(%rip), %rdi    #,
    movl    $0, %eax    #,
    call    printf@PLT  #
    movl    $0, %eax    #, _10
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE0:
    .size   main, .-main
    .ident  "GCC: (Ubuntu 6.3.0-12ubuntu2) 6.3.0 20170406"
    .section    .note.GNU-stack,"",@progbits

CPU信息

processor       : 0
vendor_id       : GenuineIntel
cpu family      : 6
model           : 142
model name      : Intel(R) Core(TM) i7-7600U CPU @ 2.80GHz
stepping        : 9
cpu MHz         : 2904.004
cache size      : 4096 KB
physical id     : 0
siblings        : 2
core id         : 0
cpu cores       : 2
apicid          : 0
initial apicid  : 0
fpu             : yes
fpu_exception   : yes
cpuid level     : 22
wp              : yes
flags           : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc pni pclmulqdq ssse3 cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx rdrand hypervisor lahf_lm abm 3dnowprefetch rdseed clflushopt
bugs            :
bogomips        : 5808.00
clflush size    : 64
cache_alignment : 64
address sizes   : 39 bits physical, 48 bits virtual
power management:

processor       : 1
vendor_id       : GenuineIntel
cpu family      : 6
model           : 142
model name      : Intel(R) Core(TM) i7-7600U CPU @ 2.80GHz
stepping        : 9
cpu MHz         : 2904.004
cache size      : 4096 KB
physical id     : 0
siblings        : 2
core id         : 1
cpu cores       : 2
apicid          : 1
initial apicid  : 1
fpu             : yes
fpu_exception   : yes
cpuid level     : 22
wp              : yes
flags           : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc pni pclmulqdq ssse3 cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx rdrand hypervisor lahf_lm abm 3dnowprefetch rdseed clflushopt
bugs            :
bogomips        : 5808.00
clflush size    : 64
cache_alignment : 64
address sizes   : 39 bits physical, 48 bits virtual
power management:
4

0 回答 0