5

通过 gcc 文档:x86-transactional-memory-intrinsics.html,当事务失败/中止时,_xbegin()应该返回中止状态。但是,我发现它有时会返回 0。而且频率非常高。**_xbegin()**会在什么情况下返回0?

检查手册后,我发现很多情况都可能导致此结果。例如,CPUID、SYSTEMCALL、CFLUSH.etc。但是,我认为我的代码没有触发其中任何一个。

这是我的代码:模拟一家小银行,一个随机账户将 1 美元转移到另一个账户。

#include "immintrin.h"
#include <thread>
#include <unistd.h>
#include <iostream>

using namespace std;

#define n_threads 1
#define OPSIZE 1000000000
typedef struct Account{
    long balance;
    long number;
} __attribute__((aligned(64))) account_t;

typedef struct Bank{
    account_t* accounts;
    long size;
} bank_t;

bool done = 0;
long *tx, *_abort, *capacity, *debug, *failed, *conflict, *zero;

void* f1(bank_t* bank, int id){
    for(int i=0; i<OPSIZE; i++){ 
        int src = rand()%bank->size;
        int dst = rand()%bank->size;
        while(src == dst){
            dst = rand()%bank->size;
        } 

        while(true){
            unsigned stat =  _xbegin();
            if(stat == _XBEGIN_STARTED){
                bank->accounts[src].balance++;  
                bank->accounts[dst].balance--;
                _xend();
                asm volatile("":::"memory");    
                tx[id]++;
                break;
            }else{
                _abort[id]++;

                if (stat == 0){
                    zero[id]++;
                }
                if (stat & _XABORT_CONFLICT){
                    conflict[id]++;
                }
                if (stat & _XABORT_CAPACITY){
                    capacity[id]++;
                }
                if (stat & _XABORT_DEBUG){
                    debug[id]++;
                }
                if ((stat & _XABORT_RETRY) == 0){
                    failed[id]++;
                    break;
                }
                if (stat & _XABORT_NESTED){
                    printf("[ PANIC ] _XABORT_NESTED\n");
                    exit(-1);
                }
                if (stat & _XABORT_EXPLICIT){
                    printf("[ panic ] _XBEGIN_EXPLICIT\n");
                    exit(-1);
                }
            }
        }
    }
    return NULL;
}
void* f2(bank_t* bank){
    printf("_heartbeat function\n");
    long last_txs=0, last_aborts=0, last_capacities=0, last_debugs=0, last_faileds=0, last_conflicts=0, last_zeros = 0;
    long txs=0, aborts=0, capacities=0, debugs=0, faileds=0, conflicts=0, zeros = 0;
    while(1){
        last_txs = txs;
        last_aborts = aborts;
        last_capacities = capacities;
        last_debugs = debugs;
        last_conflicts = conflicts;
        last_faileds = faileds;
        last_zeros = zeros;

        txs=aborts=capacities=debugs=faileds=conflicts=zeros = 0;
        for(int i=0; i<n_threads; i++){
            txs += tx[i];
            aborts += _abort[i];
            faileds += failed[i];
            capacities += capacity[i];
            debugs += debug[i];
            conflicts += conflict[i];
            zeros += zero[i];
        }

        printf("txs\t%ld\taborts\t\t%ld\tfaileds\t%ld\tcapacities\t%ld\tdebugs\t%ld\tconflit\t%ld\tzero\t%ld\n", 
            txs - last_txs, aborts - last_aborts , faileds - last_faileds, 
            capacities- last_capacities, debugs - last_debugs, conflicts - last_conflicts,
            zeros- last_zeros);

        sleep(1);
    }
}

int main(int argc, char** argv){
    int accounts = 10240;

    bank_t* bank = new bank_t;
    bank->accounts = new account_t[accounts];
    bank->size = accounts;

    for(int i=0; i<accounts; i++){
        bank->accounts[i].number = i;
        bank->accounts[i].balance = 0;
    }

    thread* pid[n_threads];
    tx = new long[n_threads];
    _abort = new long[n_threads];
    capacity = new long[n_threads];
    debug = new long[n_threads];
    failed = new long[n_threads];
    conflict = new long[n_threads];
    zero = new long[n_threads];

    thread* _heartbeat = new thread(f2, bank);
    for(int i=0; i<n_threads; i++){
        tx[i] = _abort[i] = capacity[i] = debug[i] = failed[i] = conflict[i] = zero[i] =  0;
        pid[i] = new thread(f1, bank, i);
    }

//  sleep(5);
    for(int i=0; i<n_threads;i++){
        pid[i]->join();
    }
    return 0;
}

补充:

  1. 所有帐户都是 64 位对齐的。我打印了 bank->accounts[0], bank->accounts 1地址。0xf41080,0xf410c0。</li>
  2. 使用 -O0asm volatile("":::"memory");因此没有指令重新排序问题。
  3. 中止率随时间增加。这是结果

    txs     84      aborts          0       faileds 0       capacities      0     debugs  0       conflit 0       zero    0
    txs     17070804      aborts          71      faileds 68      capacities      9       debugs  0       conflit 3       zero    59
    txs     58838         aborts          9516662 faileds 9516661 capacities      0       debugs  0       conflit 1       zero    9516661
    txs     0             aborts          9550428 faileds 9550428 capacities      0       debugs  0       conflit 0       zero    9550428
    txs     0             aborts          9549254 faileds 9549254 capacities      0       debugs  0       conflit 0       zero    9549254
    
  4. 即使 n_threads 为 1,结果也是一样的。

  5. 如果我按如下方式在回退后添加粗锁,结果似乎是正确的。

    int fallback_lock;
    
    bool 
    rtm_begin(int id)
    {   
        while(true) { 
            unsigned stat;
            stat = _xbegin ();
            if(stat == _XBEGIN_STARTED) {
                return true;
            } else {
                _abort[id]++;
                if (stat == 0){
                    zero[id]++;
                }
                //call some fallback function
                if (stat& _XABORT_CONFLICT){
                    conflict[id]++;
                }
    
                //will not succeed on a retry
                if ((stat &  _XABORT_RETRY) == 0) {
                    failed[id]++;
                    //grab a fallback lock
                    while (!__sync_bool_compare_and_swap(&fallback_lock,0,1)) {
                    }
                    return false;
                }
            }
        }
    }
    ....
    
    in_rtm = rtm_begin(id);
    y = fallback_lock;
    accounts[src].balance--;
    accounts[dst].balance++;
    if (in_rtm){
        _xend();
    }else{
        while(!__sync_bool_compare_and_swap(&fallback_lock, 1, 0)){
        }
    }
    
4

1 回答 1

0

RTM上的硬件文档建议如下:

在 RTM 中止后,EAX 的值可以为“0”。例如,在 RTM 区域内使用 CPUID 指令会导致事务中止,并且可能不满足设置任何 EAX 位的要求。这可能导致 EAX 值为“0”。

(其中,EAX 是用于通信状态的硬件寄存器,GCC 会依次返回给你作为 的返回值)

于 2016-07-06T11:10:35.483 回答