1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
From e3d3fd13d0ac1e12848b4f2a57e9e4bf2b557bf0 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 3 Dec 2019 10:13:46 +0000
Subject: drm/i915/gem: Take runtime-pm wakeref prior to unbinding
Some machines require ACPI for runtime resume, and ACPI is quite kmalloc
happy. We cannot handle kmalloc from inside the vm->mutex, as they are
used by the shrinker, and so we must ensure the global runtime-pm is
awake prior to unbinding to avoid the potential inversion.
<4> [57.121748] ======================================================
<4> [57.121750] WARNING: possible circular locking dependency detected
<4> [57.121753] 5.4.0-rc8-CI-CI_DRM_7466+ #1 Tainted: G U
<4> [57.121754] ------------------------------------------------------
<4> [57.121756] i915_pm_rpm/1105 is trying to acquire lock:
<4> [57.121758] ffffffff82263a40 (fs_reclaim){+.+.}, at: fs_reclaim_acquire.part.117+0x0/0x30
<4> [57.121766]
but task is already holding lock:
<4> [57.121768] ffff888475a593c0 (&vm->mutex){+.+.}, at: i915_vma_unbind+0x21/0x50 [i915]
<4> [57.121868]
which lock already depends on the new lock.
<4> [57.121869]
the existing dependency chain (in reverse order) is:
<4> [57.121871]
-> #1 (&vm->mutex){+.+.}:
<4> [57.121951] i915_gem_shrinker_taints_mutex+0xa2/0xd0 [i915]
<4> [57.122028] i915_address_space_init+0xa9/0x170 [i915]
<4> [57.122104] i915_ggtt_init_hw+0x47/0x130 [i915]
<4> [57.122150] i915_driver_probe+0xbb4/0x15f0 [i915]
<4> [57.122197] i915_pci_probe+0x43/0x1c0 [i915]
<4> [57.122202] pci_device_probe+0x9e/0x120
<4> [57.122206] really_probe+0xea/0x420
<4> [57.122209] driver_probe_device+0x10b/0x120
<4> [57.122212] device_driver_attach+0x4a/0x50
<4> [57.122214] __driver_attach+0x97/0x130
<4> [57.122217] bus_for_each_dev+0x74/0xc0
<4> [57.122220] bus_add_driver+0x142/0x220
<4> [57.122222] driver_register+0x56/0xf0
<4> [57.122226] do_one_initcall+0x58/0x2ff
<4> [57.122230] do_init_module+0x56/0x1f8
<4> [57.122233] load_module+0x243e/0x29f0
<4> [57.122236] __do_sys_finit_module+0xe9/0x110
<4> [57.122239] do_syscall_64+0x4f/0x210
<4> [57.122242] entry_SYSCALL_64_after_hwframe+0x49/0xbe
<4> [57.122244]
-> #0 (fs_reclaim){+.+.}:
<4> [57.122249] __lock_acquire+0x1328/0x15d0
<4> [57.122251] lock_acquire+0xa7/0x1c0
<4> [57.122254] fs_reclaim_acquire.part.117+0x24/0x30
<4> [57.122257] __kmalloc+0x48/0x320
<4> [57.122261] acpi_ns_internalize_name+0x44/0x9b
<4> [57.122264] acpi_ns_get_node_unlocked+0x6b/0xd3
<4> [57.122267] acpi_ns_get_node+0x3b/0x50
<4> [57.122271] acpi_get_handle+0x8a/0xb4
<4> [57.122274] acpi_has_method+0x1c/0x40
<4> [57.122278] acpi_pci_set_power_state+0x40/0xe0
<4> [57.122281] pci_platform_power_transition+0x3e/0x90
<4> [57.122284] pci_set_power_state+0x83/0xf0
<4> [57.122287] pci_restore_standard_config+0x22/0x40
<4> [57.122289] pci_pm_runtime_resume+0x23/0xc0
<4> [57.122293] __rpm_callback+0xb1/0x110
<4> [57.122296] rpm_callback+0x1a/0x70
<4> [57.122299] rpm_resume+0x50e/0x790
<4> [57.122302] __pm_runtime_resume+0x42/0x80
<4> [57.122357] __intel_runtime_pm_get+0x15/0x60 [i915]
<4> [57.122435] ggtt_unbind_vma+0x24/0x60 [i915]
<4> [57.122514] __i915_vma_unbind.part.39+0xb5/0x500 [i915]
<4> [57.122593] i915_vma_unbind+0x2d/0x50 [i915]
<4> [57.122668] i915_gem_object_unbind+0x11c/0x260 [i915]
<4> [57.122740] i915_gem_object_set_cache_level+0x32/0x90 [i915]
<4> [57.122810] i915_gem_set_caching_ioctl+0x1f7/0x2f0 [i915]
<4> [57.122815] drm_ioctl_kernel+0xa7/0xf0
<4> [57.122818] drm_ioctl+0x2e1/0x390
<4> [57.122822] do_vfs_ioctl+0xa0/0x6f0
<4> [57.122825] ksys_ioctl+0x35/0x60
<4> [57.122828] __x64_sys_ioctl+0x11/0x20
<4> [57.122830] do_syscall_64+0x4f/0x210
<4> [57.122833] entry_SYSCALL_64_after_hwframe+0x49/0xbe
Closes: https://gitlab.freedesktop.org/drm/intel/issues/711
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191203101347.2836057-1-chris@chris-wilson.co.uk
---
drivers/gpu/drm/i915/i915_gem.c | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3f07948ea4da..ed2436db5dd2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -119,10 +119,23 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
unsigned long flags)
{
- struct i915_vma *vma;
+ struct intel_runtime_pm *rpm = &to_i915(obj->base.dev)->runtime_pm;
LIST_HEAD(still_in_list);
+ intel_wakeref_t wakeref;
+ struct i915_vma *vma;
int ret = 0;
+ if (!atomic_read(&obj->bind_count))
+ return 0;
+
+ /*
+ * As some machines use ACPI to handle runtime-resume callbacks, and
+ * ACPI is quite kmalloc happy, we cannot resume beneath the vm->mutex
+ * as they are required by the shrinker. Ergo, we wake the device up
+ * first just in case.
+ */
+ wakeref = intel_runtime_pm_get(rpm);
+
spin_lock(&obj->vma.lock);
while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
struct i915_vma,
@@ -146,6 +159,8 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
list_splice(&still_in_list, &obj->vma.list);
spin_unlock(&obj->vma.lock);
+ intel_runtime_pm_put(rpm, wakeref);
+
return ret;
}
--
cgit v1.2.2-1-gc45e
|