10

采用这个简单的函数,它在由实现的锁下递增一个整数std::mutex

#include <mutex>

std::mutex m;

void inc(int& i) {
    std::unique_lock<std::mutex> lock(m);
    i++;
}

我希望这(在内联之后)以一种直接的方式编译为调用thenm.lock()的增量。im.unlock()

gcc但是,检查生成的程序集是否有和的最新版本clang,我们会发现一个额外的复杂问题。先取gcc版本:

inc(int&):
  mov eax, OFFSET FLAT:__gthrw___pthread_key_create(unsigned int*, void (*)(void*))
  test rax, rax
  je .L2
  push rbx
  mov rbx, rdi
  mov edi, OFFSET FLAT:m
  call __gthrw_pthread_mutex_lock(pthread_mutex_t*)
  test eax, eax
  jne .L10
  add DWORD PTR [rbx], 1
  mov edi, OFFSET FLAT:m
  pop rbx
  jmp __gthrw_pthread_mutex_unlock(pthread_mutex_t*)
.L2:
  add DWORD PTR [rdi], 1
  ret
.L10:
  mov edi, eax
  call std::__throw_system_error(int)

有趣的是前几行。组装后的代码检查地址__gthrw___pthread_key_create(它是pthread_key_create创建线程本地存储键的函数的实现),如果它为零,它会分支到.L2在单个指令中实现增量而根本没有任何锁定的地址。

如果它不为零,它会按预期进行:锁定互斥体,执行增量,然后解锁。

clang做得更多:它检查函数的地址两次,一次在 the之前,一次在 thelock之前unlock

inc(int&): # @inc(int&)
  push rbx
  mov rbx, rdi
  mov eax, __pthread_key_create
  test rax, rax
  je .LBB0_4
  mov edi, m
  call pthread_mutex_lock
  test eax, eax
  jne .LBB0_6
  inc dword ptr [rbx]
  mov eax, __pthread_key_create
  test rax, rax
  je .LBB0_5
  mov edi, m
  pop rbx
  jmp pthread_mutex_unlock # TAILCALL
.LBB0_4:
  inc dword ptr [rbx]
.LBB0_5:
  pop rbx
  ret
.LBB0_6:
  mov edi, eax
  call std::__throw_system_error(int)

这次检查的目的是什么?

也许是为了支持目标文件最终编译成没有 pthreads 支持的二进制文件,然后在这种情况下回退到没有锁定的版本的情况?我找不到有关此行为的任何文档。

4

1 回答 1

7

你的猜测看起来是正确的。从libgcc/gthr-posix.hgcc 的源代码库 ( https://github.com/gcc-mirror/gcc.git ) 中的文件:

/* For a program to be multi-threaded the only thing that it certainly must
   be using is pthread_create.  However, there may be other libraries that
   intercept pthread_create with their own definitions to wrap pthreads
   functionality for some purpose.  In those cases, pthread_create being
   defined might not necessarily mean that libpthread is actually linked
   in.

   For the GNU C library, we can use a known internal name.  This is always
   available in the ABI, but no other library would define it.  That is
   ideal, since any public pthread function might be intercepted just as
   pthread_create might be.  __pthread_key_create is an "internal"
   implementation symbol, but it is part of the public exported ABI.  Also,
   it's among the symbols that the static libpthread.a always links in
   whenever pthread_create is used, so there is no danger of a false
   negative result in any statically-linked, multi-threaded program.

   For others, we choose pthread_cancel as a function that seems unlikely
   to be redefined by an interceptor library.  The bionic (Android) C
   library does not provide pthread_cancel, so we do use pthread_create
   there (and interceptor libraries lose).  */

#ifdef __GLIBC__
__gthrw2(__gthrw_(__pthread_key_create),
     __pthread_key_create,
     pthread_key_create)
# define GTHR_ACTIVE_PROXY  __gthrw_(__pthread_key_create)
#elif defined (__BIONIC__)
# define GTHR_ACTIVE_PROXY  __gthrw_(pthread_create)
#else
# define GTHR_ACTIVE_PROXY  __gthrw_(pthread_cancel)
#endif

static inline int
__gthread_active_p (void)
{
  static void *const __gthread_active_ptr
    = __extension__ (void *) &GTHR_ACTIVE_PROXY;
  return __gthread_active_ptr != 0;
}

然后在文件的其余部分中,许多 pthread API 都包含在对__gthread_active_p()函数的检查中。如果__gthread_active_p()返回 0,则不执行任何操作并返回成功。

于 2017-09-22T19:45:26.260 回答