我最近在 C++ 中实现了一个公平的读写器票据自旋锁。代码相当简单,我认为它工作得很好。我已经将自旋锁集成到一个更大的应用程序中,并且我注意到在极少数情况下,代码运行非常缓慢,而大多数时候,它运行得非常快。我知道这是由于自旋锁,因为如果我立即用简单的读写器自旋锁替换它(不公平且没有票),代码突然运行得更快。它在不同的机器上发生了几次。我知道如果你用比内核更多的线程来运行这些锁,它们会运行得很慢,但我在一台有 48 个内核的机器上用 16 个线程运行它。我无法在具有 4 个线程和 4 个内核的笔记本电脑上重现该问题。这是代码:
inline size_t rndup(size_t v) {
v--;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v |= v >> 32;
v++;
return v;
}
class SpinLockRW_MCS {
public:
SpinLockRW_MCS(const size_t nb_readers) : writer(nullptr), lock_pool(nullptr), it_lock_pool(0),
load_lock_pool(0), mask_it(rndup(2 * nb_readers + 1) - 1),
padding1{0}, padding2{0}, padding3{0}, padding4{0} {
if (nb_readers <= std::thread::hardware_concurrency()){
lock_pool = new Lock[mask_it + 1];
lock_pool[0].is_locked = false;
}
}
~SpinLockRW_MCS() {
clear();
}
inline void clear() {
if (lock_pool != nullptr){
delete[] lock_pool;
lock_pool = nullptr;
}
writer = nullptr;
it_lock_pool = 0;
load_lock_pool = 0;
}
inline void acquire_reader() {
uint_fast32_t retry = 0;
const size_t prev_reader_id = it_lock_pool.fetch_add(1) & mask_it;
const size_t new_reader_id = (prev_reader_id + 1) & mask_it;
while (lock_pool[prev_reader_id].is_locked){
if (++retry > 100) this_thread::yield();
}
++load_lock_pool;
lock_pool[prev_reader_id].is_locked = true;
lock_pool[new_reader_id].is_locked = false;
}
inline void release_reader() {
--load_lock_pool;
}
inline void acquire_writer() {
uint_fast32_t retry = 0;
const size_t prev_reader_id = it_lock_pool.fetch_add(1) & mask_it;
const size_t new_reader_id = (prev_reader_id + 1) & mask_it;
while (lock_pool[prev_reader_id].is_locked){
if (++retry > 100) this_thread::yield();
}
while (load_lock_pool){
if (++retry > 100) this_thread::yield();
}
lock_pool[prev_reader_id].is_locked = true;
writer = &lock_pool[new_reader_id];
}
inline void release_writer() {
writer->is_locked = false;
}
inline void release_writer_acquire_reader() {
++load_lock_pool;
writer->is_locked = false;
}
private:
struct Lock {
std::atomic<bool> is_locked;
const int padding[15];
Lock() : is_locked(true), padding{0} {}
};
Lock* writer;
const int padding1[14];
Lock* lock_pool;
const int padding2[14];
const size_t mask_it;
const int padding3[14];
std::atomic<size_t> it_lock_pool;
const int padding4[14];
std::atomic<size_t> load_lock_pool;
};
任何建议将不胜感激!谢谢!