1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2025 Intel Corporation
4 */
5
6 #include <drm/drm_managed.h>
7
8 #include "regs/xe_gtt_defs.h"
9
10 #include "xe_assert.h"
11 #include "xe_ggtt.h"
12 #include "xe_gt_sriov_vf.h"
13 #include "xe_sriov.h"
14 #include "xe_sriov_printk.h"
15 #include "xe_tile_sriov_vf.h"
16 #include "xe_wopcm.h"
17
vf_init_ggtt_balloons(struct xe_tile * tile)18 static int vf_init_ggtt_balloons(struct xe_tile *tile)
19 {
20 struct xe_ggtt *ggtt = tile->mem.ggtt;
21
22 xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
23
24 tile->sriov.vf.ggtt_balloon[0] = xe_ggtt_node_init(ggtt);
25 if (IS_ERR(tile->sriov.vf.ggtt_balloon[0]))
26 return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]);
27
28 tile->sriov.vf.ggtt_balloon[1] = xe_ggtt_node_init(ggtt);
29 if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) {
30 xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[0]);
31 return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]);
32 }
33
34 return 0;
35 }
36
37 /**
38 * xe_tile_sriov_vf_balloon_ggtt_locked - Insert balloon nodes to limit used GGTT address range.
39 * @tile: the &xe_tile struct instance
40 *
41 * Return: 0 on success or a negative error code on failure.
42 */
xe_tile_sriov_vf_balloon_ggtt_locked(struct xe_tile * tile)43 int xe_tile_sriov_vf_balloon_ggtt_locked(struct xe_tile *tile)
44 {
45 u64 ggtt_base = xe_gt_sriov_vf_ggtt_base(tile->primary_gt);
46 u64 ggtt_size = xe_gt_sriov_vf_ggtt(tile->primary_gt);
47 struct xe_device *xe = tile_to_xe(tile);
48 u64 wopcm = xe_wopcm_size(xe);
49 u64 start, end;
50 int err;
51
52 xe_tile_assert(tile, IS_SRIOV_VF(xe));
53 xe_tile_assert(tile, ggtt_size);
54 lockdep_assert_held(&tile->mem.ggtt->lock);
55
56 /*
57 * VF can only use part of the GGTT as allocated by the PF:
58 *
59 * WOPCM GUC_GGTT_TOP
60 * |<------------ Total GGTT size ------------------>|
61 *
62 * VF GGTT base -->|<- size ->|
63 *
64 * +--------------------+----------+-----------------+
65 * |////////////////////| block |\\\\\\\\\\\\\\\\\|
66 * +--------------------+----------+-----------------+
67 *
68 * |<--- balloon[0] --->|<-- VF -->|<-- balloon[1] ->|
69 */
70
71 if (ggtt_base < wopcm || ggtt_base > GUC_GGTT_TOP ||
72 ggtt_size > GUC_GGTT_TOP - ggtt_base) {
73 xe_sriov_err(xe, "tile%u: Invalid GGTT configuration: %#llx-%#llx\n",
74 tile->id, ggtt_base, ggtt_base + ggtt_size - 1);
75 return -ERANGE;
76 }
77
78 start = wopcm;
79 end = ggtt_base;
80 if (end != start) {
81 err = xe_ggtt_node_insert_balloon_locked(tile->sriov.vf.ggtt_balloon[0],
82 start, end);
83 if (err)
84 return err;
85 }
86
87 start = ggtt_base + ggtt_size;
88 end = GUC_GGTT_TOP;
89 if (end != start) {
90 err = xe_ggtt_node_insert_balloon_locked(tile->sriov.vf.ggtt_balloon[1],
91 start, end);
92 if (err) {
93 xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[0]);
94 return err;
95 }
96 }
97
98 return 0;
99 }
100
vf_balloon_ggtt(struct xe_tile * tile)101 static int vf_balloon_ggtt(struct xe_tile *tile)
102 {
103 struct xe_ggtt *ggtt = tile->mem.ggtt;
104 int err;
105
106 mutex_lock(&ggtt->lock);
107 err = xe_tile_sriov_vf_balloon_ggtt_locked(tile);
108 mutex_unlock(&ggtt->lock);
109
110 return err;
111 }
112
113 /**
114 * xe_tile_sriov_vf_deballoon_ggtt_locked - Remove balloon nodes.
115 * @tile: the &xe_tile struct instance
116 */
xe_tile_sriov_vf_deballoon_ggtt_locked(struct xe_tile * tile)117 void xe_tile_sriov_vf_deballoon_ggtt_locked(struct xe_tile *tile)
118 {
119 xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
120
121 xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[1]);
122 xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[0]);
123 }
124
vf_deballoon_ggtt(struct xe_tile * tile)125 static void vf_deballoon_ggtt(struct xe_tile *tile)
126 {
127 mutex_lock(&tile->mem.ggtt->lock);
128 xe_tile_sriov_vf_deballoon_ggtt_locked(tile);
129 mutex_unlock(&tile->mem.ggtt->lock);
130 }
131
vf_fini_ggtt_balloons(struct xe_tile * tile)132 static void vf_fini_ggtt_balloons(struct xe_tile *tile)
133 {
134 xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
135
136 xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[1]);
137 xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[0]);
138 }
139
cleanup_ggtt(struct drm_device * drm,void * arg)140 static void cleanup_ggtt(struct drm_device *drm, void *arg)
141 {
142 struct xe_tile *tile = arg;
143
144 vf_deballoon_ggtt(tile);
145 vf_fini_ggtt_balloons(tile);
146 }
147
148 /**
149 * xe_tile_sriov_vf_prepare_ggtt - Prepare a VF's GGTT configuration.
150 * @tile: the &xe_tile
151 *
152 * This function is for VF use only.
153 *
154 * Return: 0 on success or a negative error code on failure.
155 */
xe_tile_sriov_vf_prepare_ggtt(struct xe_tile * tile)156 int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile)
157 {
158 struct xe_device *xe = tile_to_xe(tile);
159 int err;
160
161 err = vf_init_ggtt_balloons(tile);
162 if (err)
163 return err;
164
165 err = vf_balloon_ggtt(tile);
166 if (err) {
167 vf_fini_ggtt_balloons(tile);
168 return err;
169 }
170
171 return drmm_add_action_or_reset(&xe->drm, cleanup_ggtt, tile);
172 }
173
174 /**
175 * DOC: GGTT nodes shifting during VF post-migration recovery
176 *
177 * The first fixup applied to the VF KMD structures as part of post-migration
178 * recovery is shifting nodes within &xe_ggtt instance. The nodes are moved
179 * from range previously assigned to this VF, into newly provisioned area.
180 * The changes include balloons, which are resized accordingly.
181 *
182 * The balloon nodes are there to eliminate unavailable ranges from use: one
183 * reserves the GGTT area below the range for current VF, and another one
184 * reserves area above.
185 *
186 * Below is a GGTT layout of example VF, with a certain address range assigned to
187 * said VF, and inaccessible areas above and below:
188 *
189 * 0 4GiB
190 * |<--------------------------- Total GGTT size ----------------------------->|
191 * WOPCM GUC_TOP
192 * |<-------------- Area mappable by xe_ggtt instance ---------------->|
193 *
194 * +---+---------------------------------+----------+----------------------+---+
195 * |\\\|/////////////////////////////////| VF mem |//////////////////////|\\\|
196 * +---+---------------------------------+----------+----------------------+---+
197 *
198 * Hardware enforced access rules before migration:
199 *
200 * |<------- inaccessible for VF ------->|<VF owned>|<-- inaccessible for VF ->|
201 *
202 * GGTT nodes used for tracking allocations:
203 *
204 * |<---------- balloon ------------>|<- nodes->|<----- balloon ------>|
205 *
206 * After the migration, GGTT area assigned to the VF might have shifted, either
207 * to lower or to higher address. But we expect the total size and extra areas to
208 * be identical, as migration can only happen between matching platforms.
209 * Below is an example of GGTT layout of the VF after migration. Content of the
210 * GGTT for VF has been moved to a new area, and we receive its address from GuC:
211 *
212 * +---+----------------------+----------+---------------------------------+---+
213 * |\\\|//////////////////////| VF mem |/////////////////////////////////|\\\|
214 * +---+----------------------+----------+---------------------------------+---+
215 *
216 * Hardware enforced access rules after migration:
217 *
218 * |<- inaccessible for VF -->|<VF owned>|<------- inaccessible for VF ------->|
219 *
220 * So the VF has a new slice of GGTT assigned, and during migration process, the
221 * memory content was copied to that new area. But the &xe_ggtt nodes are still
222 * tracking allocations using the old addresses. The nodes within VF owned area
223 * have to be shifted, and balloon nodes need to be resized to properly mask out
224 * areas not owned by the VF.
225 *
226 * Fixed &xe_ggtt nodes used for tracking allocations:
227 *
228 * |<------ balloon ------>|<- nodes->|<----------- balloon ----------->|
229 *
230 * Due to use of GPU profiles, we do not expect the old and new GGTT ares to
231 * overlap; but our node shifting will fix addresses properly regardless.
232 */
233
234 /**
235 * xe_tile_sriov_vf_fixup_ggtt_nodes - Shift GGTT allocations to match assigned range.
236 * @tile: the &xe_tile struct instance
237 * @shift: the shift value
238 *
239 * Since Global GTT is not virtualized, each VF has an assigned range
240 * within the global space. This range might have changed during migration,
241 * which requires all memory addresses pointing to GGTT to be shifted.
242 */
xe_tile_sriov_vf_fixup_ggtt_nodes(struct xe_tile * tile,s64 shift)243 void xe_tile_sriov_vf_fixup_ggtt_nodes(struct xe_tile *tile, s64 shift)
244 {
245 struct xe_ggtt *ggtt = tile->mem.ggtt;
246
247 mutex_lock(&ggtt->lock);
248
249 xe_tile_sriov_vf_deballoon_ggtt_locked(tile);
250 xe_ggtt_shift_nodes_locked(ggtt, shift);
251 xe_tile_sriov_vf_balloon_ggtt_locked(tile);
252
253 mutex_unlock(&ggtt->lock);
254 }
255