0

尝试使用(linux 内核)rwlock API,我得到了一个我无法理解为什么会发生的行为(死锁)。有人有什么解释吗?(请注意,代码仅用于实验,没有背后的逻辑:示例我知道在持有自旋锁时睡觉是个坏主意,但这对我测试的代码没有影响)。

代码如下,运行会导致死锁。

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/sched.h>
#include <linux/spinlock.h>

MODULE_LICENSE("GPL");
MODULE_AUTHOR("<toto@gmail.com>");
MODULE_DESCRIPTION("test rwlock");
static DEFINE_RWLOCK(rwlock);

// shared variable
static int var;

static struct task_struct *w;
static struct task_struct *r1;
static struct task_struct *r2;

static int w_f(void *unsed)
{
    while (1) {
        printk("w: try to lock\n");
        if (write_trylock(&rwlock)) {
            printk("w: locked\n");
        } else {
            printk("w: unavailable lock, spin\n");
            write_lock(&rwlock);
            printk("w: out of spin\n");
        }

        var = 1;
        ssleep(4);
        write_unlock(&rwlock);
        printk("w: write unlock\n");
        if (kthread_should_stop()) {
            printk("w: should stop now\n");
            return 1;
        } else {
            printk("w: continue...\n");
        }
    }
    return 1;
}

static int r1_f(void *unsed)
{
    while (1) {
        if (read_trylock(&rwlock)) {
            printk("\tr1: locked\n");
            ssleep(3);
            printk("\tr1: read access\n");
            read_unlock(&rwlock);
        } else {
            printk("\tr1: unavailbe lock, spin\n");
            read_lock(&rwlock);
        }
        if (kthread_should_stop()) {
            printk("\tr1: should stop now\n");
            return 1;
        } else {
            printk("\tr1: continue...\n");
        }
    }

    return 0;
}


static int r2_f(void *unsed)
{
    while (1) {
        if (read_trylock(&rwlock)) {
            printk("\tr2: locked\n");
            ssleep(1);
            printk("\tr2: read access\n");
            read_unlock(&rwlock);

        } else {
            printk("\tr2: unavailbe lock, spin\n");
            read_lock(&rwlock);
        }

        if (kthread_should_stop()) {
            printk("\tr2: should stop now\n");
            return 1;
        } else {
            printk("\tr2: continue...\n");
        }
    }
    return 0;
}


static int __init init_thread(void)
{
    printk(KERN_ALERT "Thread creating ...\n");
    w = kthread_create(w_f, NULL, "writer1");
    r1 = kthread_create(r1_f, NULL, "reader1");
    r2 = kthread_create(r2_f, NULL, "reader2");
    if (w && r1 && r2) {
        printk(KERN_ALERT "Thread Created Sucessfully\n");
        wake_up_process(w);
        wake_up_process(r1);
        wake_up_process(r2);
    } else {
        printk("Thread Creation Failed\n");
    }
    return 0;
}

static void __exit cleanup_thread(void)
{
    int ret, ret1, ret2;

    printk(KERN_ALERT "Cleaning up ...\n");
    ret = kthread_stop(w);
    ret1 = kthread_stop(r2);
    ret2 = kthread_stop(r2);
    printk("stop threads returned %d,%d,%d\n", ret, ret1, ret2);
}
module_init(init_thread)
module_exit(cleanup_thread)

我正在获取日志:

[0  291.18289] Thread creating ...
[0  291.18444] Thread Created Sucessfully
[0  291.18450] w: try to lock
[0  291.18453]  r1: locked
[0  291.18456]  r2: locked
[0  291.18459] w: unavailable lock, spin
[1  292.261093]     r2: read access
[1  292.261096]     r2: continue...
[1  292.261096]     r2: unavailbe lock, spin
[3  294.261151]     r1: read access
[3  294.261153]     r1: continue...
[3  294.261167] w: out of spin
[3  294.261168]     r1: unavailbe lock, spin
[7  298.265307] w: write unlock
[7  298.265308]     r1: continue...
[7  298.265308]     r2: continue...
[7  298.265309]     r1: locked
[7  298.265309]     r2: locked
[7  298.265314] w: continue...
[7  298.265314] w: try to lock
[7  298.265315] w: unavailable lock, spin
[8  299.269282]     r2: read access
[8  299.269285]     r2: continue...
[8  299.269286]     r2: unavailbe lock, spin
[10  301.269377]    r1: read access
[10  301.269380]    r1: continue...
[10  301.269381]    r1: unavailbe lock, spin
[33  324.122110] **NMI watchdog: BUG: soft lockup - CPU#2 stuck for 22s! [writer1:3819]**

我试图将运行时模式化:

(----) -> running(ssleep) holding the lock.
(====) -> spinning on the lock

time  0     1     2     3     4     5     6     7     8     9     10    11    12         33
r1    *-----+-----+-----+=====+=====+=====+=====+-----+-----+-----+=====+=====+===== ... +=====+=====

r2    *-----+=====+=====+=====+=====+=====+=====+-----+=====+=====+=====+=====+===== ... +=====+=====

w     *=====+=====+=====+-----+-----+-----+-----+=====+=====+=====+=====+=====+===== ... +=====+=====

无法理解的行为: 我想知道为什么 (w) 线程一直在 10s 旋转情况与即时 3s 相同:r2 正在旋转,因为 w 试图获取写锁(w 正在旋转,因为 r1 持有锁),但是当r1 完成,w 应该被解锁并开始运行。这发生在 3 秒而不是 10 秒。

4

1 回答 1

0

是固定的。

static int w_f(void *unsed)
{
    while (1) {
        printk("w: try to lock\n");
        if (write_trylock(&rwlock)) {
            printk("w: locked\n");
        } else {
            printk("w: unavailable lock, spin\n");
            write_lock(&rwlock);
            printk("w: out of spin\n");
        }

        var = 1;
        ssleep(4);
        write_unlock(&rwlock);
        printk("w: write unlock\n");
        if (kthread_should_stop()) {
            printk("w: should stop now\n");
            return 1;
        } else {
            printk("w: continue...\n");
        }
    }
    return 1;
}

static int r1_f(void *unsed)
{
    while (1) {
        if (read_trylock(&rwlock)) {
            printk("\tr1: locked\n");
        } else {
            printk("\tr1: unavailbe lock, spin\n");
            read_lock(&rwlock);
        }

        ssleep(3);
        printk("\tr1: read access\n");
        read_unlock(&rwlock);

        if (kthread_should_stop()) {
            printk("\tr1: should stop now\n");
            return 1;
        } else {
            printk("\tr1: continue...\n");
        }
    }

    return 0;
}


static int r2_f(void *unsed)
{
    while (1) {
        if (read_trylock(&rwlock)) {
            printk("\tr2: locked\n");
        } else {
            printk("\tr2: unavailbe lock, spin\n");
            read_lock(&rwlock);
        }
        ssleep(1);
        printk("\tr2: read access\n");
        read_unlock(&rwlock);

        if (kthread_should_stop()) {
            printk("\tr2: should stop now\n");
            return 1;
        } else {
            printk("\tr2: continue...\n");
        }
    }
    return 0;
}


static int __init init_thread(void)
{
    printk(KERN_ALERT "Thread creating ...\n");
    w = kthread_create(w_f, NULL, "writer1");
    r1 = kthread_create(r1_f, NULL, "reader1");
    r2 = kthread_create(r2_f, NULL, "reader2");
    if (w && r1 && r2) {
        printk(KERN_ALERT "Thread Created Sucessfully\n");
        wake_up_process(w);
        wake_up_process(r1);
        wake_up_process(r2);
    } else {
        printk("Thread Creation Failed\n");
    }
    return 0;
}

static void __exit cleanup_thread(void)
{
    int ret, ret1, ret2;

    printk(KERN_ALERT "Cleaning up ...\n");
    ret = kthread_stop(w);
    ret1 = kthread_stop(r2);
    ret2 = kthread_stop(r2);
    printk("stop threads returned %d,%d,%d\n", ret, ret1, ret2);
}
module_init(init_thread)
module_exit(cleanup_thread)
于 2019-09-26T09:49:06.537 回答