寻找一个最小的、基于futex的单写/多读锁实现,除了单个 4 字节 futex 状态变量之外,不需要空间开销。
一些背景知识:我有一个应用程序,它将在数千万到数亿个小对象中嵌入一个锁。由于锁定的非常细粒度的性质和应用程序的结构,我预计争用最小。此外,作家将很少见,而有竞争力的作家将更加罕见。由于所有这些原因,在这个特定的环境中,(理论上)容易出现“雷声”现象的解决方案是完全可以接受的。
寻找一个最小的、基于futex的单写/多读锁实现,除了单个 4 字节 futex 状态变量之外,不需要空间开销。
一些背景知识:我有一个应用程序,它将在数千万到数亿个小对象中嵌入一个锁。由于锁定的非常细粒度的性质和应用程序的结构,我预计争用最小。此外,作家将很少见,而有竞争力的作家将更加罕见。由于所有这些原因,在这个特定的环境中,(理论上)容易出现“雷声”现象的解决方案是完全可以接受的。
你会在https://gist.github.com/smokku/653c469d695d60be4fe8170630ba8205找到我的实现
这个想法是只有一个线程获取写入锁(futex 值0
),锁可以打开(futex 值1
)或者可以有许多读取线程(futex 值大于1
)。所以下面的值1
(只有一个)会阻塞 futex 上的读取器和写入器,而上面的值1
只会阻塞写入器。解锁线程会唤醒等待线程之一,但您需要注意不要只消耗由写入线程唤醒的读取器。
#define cpu_relax() __builtin_ia32_pause()
#define cmpxchg(P, O, N) __sync_val_compare_and_swap((P), (O), (N))
static unsigned _lock = 1; // read-write lock futex
const static unsigned _lock_open = 1;
const static unsigned _lock_wlocked = 0;
static void _unlock()
{
unsigned current, wanted;
do {
current = _lock;
if (current == _lock_open) return;
if (current == _lock_wlocked) {
wanted = _lock_open;
} else {
wanted = current - 1;
}
} while (cmpxchg(&_lock, current, wanted) != current);
syscall(SYS_futex, &_lock, FUTEX_WAKE_PRIVATE, 1, NULL, NULL, 0);
}
static void _rlock()
{
unsigned current;
while ((current = _lock) == _lock_wlocked || cmpxchg(&_lock, current, current + 1) != current) {
while (syscall(SYS_futex, &_lock, FUTEX_WAIT_PRIVATE, current, NULL, NULL, 0) != 0) {
cpu_relax();
if (_lock >= _lock_open) break;
}
// will be able to acquire rlock no matter what unlock woke us
}
}
static void _wlock()
{
unsigned current;
while ((current = cmpxchg(&_lock, _lock_open, _lock_wlocked)) != _lock_open) {
while (syscall(SYS_futex, &_lock, FUTEX_WAIT_PRIVATE, current, NULL, NULL, 0) != 0) {
cpu_relax();
if (_lock == _lock_open) break;
}
if (_lock != _lock_open) {
// in rlock - won't be able to acquire lock - wake someone else
syscall(SYS_futex, &_lock, FUTEX_WAKE_PRIVATE, 1, NULL, NULL, 0);
}
}
}