1

我正在使用内核模块在 Infiniband 的内核空间中进行 RDMA 传输(krping.c链接:git.openfabrics.org Git - ~sgrimberg/krping.git/summary)。我拥有的卡是 Mellanox ConnectX-4(驱动程序:mlx5)、Linux 内核版本:3.13、Ubuntu 12.04 和 Mellanox OFED 3.3。

代码似乎停留在获取mutex_lockinmlx5_ib_query_qp函数(在 mlx5_ib.h 中)。它是从krping.c带有ib_req_notify_cq function. 我能否获得有关如何解决此错误/死锁的帮助?我在这篇文章中附加了 dmesg 跟踪。

Dmesg 跟踪:

[  499.178862] BUG: unable to handle kernel NULL pointer dereference at 0000000000000020
[  499.178951] IP: [<ffffffff8176c451>] __mutex_lock_slowpath+0xf1/0x1b0
[  499.179024] PGD 7dadd8067 PUD 7be174067 PMD 0
[  499.179079] Oops: 0002 [#1] SMP
[  499.179118] Modules linked in: rdma_krping(OX) target_core_mod ib_iser(OX) iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi snd_hda_codec_realtek snd_hda_codec_hdmi snd_hda_intel snd_hda_codec snd_hwdep i915 snd_pcm rfcomm bnep mei_me snd_seq_midi snd_rawmidi snd_seq_midi_event snd_seq snd_seq_device snd_timer bluetooth drm_kms_helper psmouse snd mei drm mac_hid soundcore snd_page_alloc i2c_algo_bit shpchp serio_raw dcdbas lpc_ich video knem(OX) parport_pc ppdev lp parport rdma_ucm(OX) ib_ucm(OX) rdma_cm(OX) iw_cm(OX) configfs ib_ipoib(OX) ib_cm(OX) ib_uverbs(OX) ib_umad(OX) mlx5_ib(OX) mlx5_core(OX) mlx4_en(OX) vxlan ip_tunnel mlx4_ib(OX) ib_sa(OX) ib_mad(OX) ib_core(OX) nls_iso8859_1 ib_addr(OX) ib_netlink(OX) mlx4_core(OX) mlx_compat(OX) hid_generic usbhid hid e1000e ptp ahci pps_core libahci
[  499.180007] CPU: 0 PID: 2618 Comm: bash Tainted: G           OX 3.13.0-91-generic #138~precise1-Ubuntu
[  499.180091] Hardware name: Dell Inc. OptiPlex 9020/0N4YC8, BIOS A11 04/01/2015
[  499.180159] task: ffff8807c6d8e000 ti: ffff8807c0daa000 task.ti: ffff8807c0daa000
[  499.180228] RIP: 0010:[<ffffffff8176c451>]  [<ffffffff8176c451>] __mutex_lock_slowpath+0xf1/0x1b0
[  499.180315] RSP: 0018:ffff8807c0dabaf8  EFLAGS: 00010286
[  499.180366] RAX: 0000000000000020 RBX: ffff8807ee0c4898 RCX: ffff8807c0dabaf0
[  499.180431] RDX: ffff8807c0dabb00 RSI: ffff8807c0dabb18 RDI: ffff8807ee0c489c
[  499.180496] RBP: ffff8807c0dabb58 R08: 0000000000000000 R09: 0000000000000000
[  499.180561] R10: 00000000000004bd R11: 00000000000004bc R12: ffff8807ee0c489c
[  499.180626] R13: 00000000ffffffff R14: ffff8807c6d8e000 R15: ffff8807ee0c48a0
[  499.180692] FS:  00007ff11ca66700(0000) GS:ffff88081ea00000(0000) knlGS:0000000000000000
[  499.180766] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  499.180819] CR2: 0000000000000020 CR3: 00000007c0d98000 CR4: 00000000001407f0

[  499.180884] Stack:
[  499.180905]  ffff8807c0dabaf0 ffff8807ee0c48a0 0000000000000020 0000000000020002
[  499.180986]  0000000000000000 ffff880700000001 ffff880700000000 ffff8807ee0c4898
[  499.181067]  0000000000000002 0000000000000006 ffff8807edd6636a ffff8800d46f0000

[  499.181146] Call Trace:
[  499.181178]  [<ffffffff8176c533>] mutex_lock+0x23/0x37
[  499.181242]  [<ffffffffa024bd11>] mlx5_ib_query_qp+0x41/0x660 [mlx5_ib]
[  499.181309]  [<ffffffff817552b6>] ? printk+0x61/0x63
[  499.181361]  [<ffffffffa0699543>] krping_setup_qp.isra.8+0x115/0x25f [rdma_krping]
[  499.181435]  [<ffffffffa069a518>] krping_run_client+0x56/0x757 [rdma_krping]
[  499.181508]  [<ffffffff81387f7e>] ? memzero_explicit+0xe/0x10
[  499.181567]  [<ffffffff8149eca7>] ? extract_entropy+0xc7/0x180
[  499.181626]  [<ffffffff8149f0b7>] ? get_random_bytes+0x47/0xd0
[  499.181684]  [<ffffffff810c457a>] ? console_unlock+0x1a/0x30
[  499.181741]  [<ffffffffa06961a0>] ? krping_getopt+0x1a0/0x1a0 [rdma_krping]
[  499.181815]  [<ffffffffa02b7f56>] ? rdma_create_id+0x136/0x150 [rdma_cm]
[  499.181881]  [<ffffffff8149f0b7>] ? get_random_bytes+0x47/0xd0
[  499.181939]  [<ffffffffa06961a0>] ? krping_getopt+0x1a0/0x1a0 [rdma_krping]
[  499.182007]  [<ffffffffa0696ff7>] krping_doit+0x5f7/0x9e0 [rdma_krping]
[  499.182077]  [<ffffffff811b250a>] ? __kmalloc+0x5a/0x250
[  499.182130]  [<ffffffffa0697434>] ? krping_write_proc+0x54/0xf4 [rdma_krping]
[  499.182199]  [<ffffffffa0697493>] krping_write_proc+0xb3/0xf4 [rdma_krping]
[  499.182270]  [<ffffffff81234d93>] proc_reg_write+0x43/0x70
[  499.184645]  [<ffffffff811cdff5>] vfs_write+0xc5/0x1f0
[  499.186973]  [<ffffffff811ce4f2>] SyS_write+0x52/0xa0
[  499.189256]  [<ffffffff81776fdd>] system_call_fastpath+0x1a/0x1f

[  499.191569] Code: 85 c0 78 09 31 c0 87 03 83 f8 01 74 74 48 8b 43 10 48 8d 55 a8 4c 8d 7b 08 41 bd ff ff ff ff 48 89 53 10 4c 89 7d a8 48 89 45 b0 <48> 89 10 4c 89 75 b8 eb 22 66 0f 1f 44 00 00 4c 89 e7 49 c7 06
[  499.196527] RIP  [<ffffffff8176c451>] __mutex_lock_slowpath+0xf1/0x1b0
[  499.198850]  RSP <ffff8807c0dabaf8>
[  499.201122] CR2: 0000000000000020
4

0 回答 0