尝试使用(linux 内核)rwlock API,我得到了一个我无法理解为什么会发生的行为(死锁)。有人有什么解释吗?(请注意,代码仅用于实验,没有背后的逻辑:示例我知道在持有自旋锁时睡觉是个坏主意,但这对我测试的代码没有影响)。
代码如下,运行会导致死锁。
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("<toto@gmail.com>");
MODULE_DESCRIPTION("test rwlock");
static DEFINE_RWLOCK(rwlock);
// shared variable
static int var;
static struct task_struct *w;
static struct task_struct *r1;
static struct task_struct *r2;
static int w_f(void *unsed)
{
while (1) {
printk("w: try to lock\n");
if (write_trylock(&rwlock)) {
printk("w: locked\n");
} else {
printk("w: unavailable lock, spin\n");
write_lock(&rwlock);
printk("w: out of spin\n");
}
var = 1;
ssleep(4);
write_unlock(&rwlock);
printk("w: write unlock\n");
if (kthread_should_stop()) {
printk("w: should stop now\n");
return 1;
} else {
printk("w: continue...\n");
}
}
return 1;
}
static int r1_f(void *unsed)
{
while (1) {
if (read_trylock(&rwlock)) {
printk("\tr1: locked\n");
ssleep(3);
printk("\tr1: read access\n");
read_unlock(&rwlock);
} else {
printk("\tr1: unavailbe lock, spin\n");
read_lock(&rwlock);
}
if (kthread_should_stop()) {
printk("\tr1: should stop now\n");
return 1;
} else {
printk("\tr1: continue...\n");
}
}
return 0;
}
static int r2_f(void *unsed)
{
while (1) {
if (read_trylock(&rwlock)) {
printk("\tr2: locked\n");
ssleep(1);
printk("\tr2: read access\n");
read_unlock(&rwlock);
} else {
printk("\tr2: unavailbe lock, spin\n");
read_lock(&rwlock);
}
if (kthread_should_stop()) {
printk("\tr2: should stop now\n");
return 1;
} else {
printk("\tr2: continue...\n");
}
}
return 0;
}
static int __init init_thread(void)
{
printk(KERN_ALERT "Thread creating ...\n");
w = kthread_create(w_f, NULL, "writer1");
r1 = kthread_create(r1_f, NULL, "reader1");
r2 = kthread_create(r2_f, NULL, "reader2");
if (w && r1 && r2) {
printk(KERN_ALERT "Thread Created Sucessfully\n");
wake_up_process(w);
wake_up_process(r1);
wake_up_process(r2);
} else {
printk("Thread Creation Failed\n");
}
return 0;
}
static void __exit cleanup_thread(void)
{
int ret, ret1, ret2;
printk(KERN_ALERT "Cleaning up ...\n");
ret = kthread_stop(w);
ret1 = kthread_stop(r2);
ret2 = kthread_stop(r2);
printk("stop threads returned %d,%d,%d\n", ret, ret1, ret2);
}
module_init(init_thread)
module_exit(cleanup_thread)
我正在获取日志:
[0 291.18289] Thread creating ...
[0 291.18444] Thread Created Sucessfully
[0 291.18450] w: try to lock
[0 291.18453] r1: locked
[0 291.18456] r2: locked
[0 291.18459] w: unavailable lock, spin
[1 292.261093] r2: read access
[1 292.261096] r2: continue...
[1 292.261096] r2: unavailbe lock, spin
[3 294.261151] r1: read access
[3 294.261153] r1: continue...
[3 294.261167] w: out of spin
[3 294.261168] r1: unavailbe lock, spin
[7 298.265307] w: write unlock
[7 298.265308] r1: continue...
[7 298.265308] r2: continue...
[7 298.265309] r1: locked
[7 298.265309] r2: locked
[7 298.265314] w: continue...
[7 298.265314] w: try to lock
[7 298.265315] w: unavailable lock, spin
[8 299.269282] r2: read access
[8 299.269285] r2: continue...
[8 299.269286] r2: unavailbe lock, spin
[10 301.269377] r1: read access
[10 301.269380] r1: continue...
[10 301.269381] r1: unavailbe lock, spin
[33 324.122110] **NMI watchdog: BUG: soft lockup - CPU#2 stuck for 22s! [writer1:3819]**
我试图将运行时模式化:
(----) -> running(ssleep) holding the lock.
(====) -> spinning on the lock
time 0 1 2 3 4 5 6 7 8 9 10 11 12 33
r1 *-----+-----+-----+=====+=====+=====+=====+-----+-----+-----+=====+=====+===== ... +=====+=====
r2 *-----+=====+=====+=====+=====+=====+=====+-----+=====+=====+=====+=====+===== ... +=====+=====
w *=====+=====+=====+-----+-----+-----+-----+=====+=====+=====+=====+=====+===== ... +=====+=====
无法理解的行为: 我想知道为什么 (w) 线程一直在 10s 旋转,情况与即时 3s 相同:r2 正在旋转,因为 w 试图获取写锁(w 正在旋转,因为 r1 持有锁),但是当r1 完成,w 应该被解锁并开始运行。这发生在 3 秒而不是 10 秒。