1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /*
3 * Copyright 2014-2022 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include <linux/slab.h>
26 #include <linux/mutex.h>
27 #include "kfd_device_queue_manager.h"
28 #include "kfd_kernel_queue.h"
29 #include "kfd_priv.h"
30
31 #define OVER_SUBSCRIPTION_PROCESS_COUNT (1 << 0)
32 #define OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT (1 << 1)
33 #define OVER_SUBSCRIPTION_GWS_QUEUE_COUNT (1 << 2)
34 #define OVER_SUBSCRIPTION_XNACK_CONFLICT (1 << 3)
35
inc_wptr(unsigned int * wptr,unsigned int increment_bytes,unsigned int buffer_size_bytes)36 static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
37 unsigned int buffer_size_bytes)
38 {
39 unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t);
40
41 WARN((temp * sizeof(uint32_t)) > buffer_size_bytes,
42 "Runlist IB overflow");
43 *wptr = temp;
44 }
45
pm_calc_rlib_size(struct packet_manager * pm,unsigned int * rlib_size,int * over_subscription,int xnack_conflict)46 static void pm_calc_rlib_size(struct packet_manager *pm,
47 unsigned int *rlib_size,
48 int *over_subscription,
49 int xnack_conflict)
50 {
51 unsigned int process_count, queue_count, compute_queue_count, gws_queue_count;
52 unsigned int map_queue_size;
53 unsigned int max_proc_per_quantum = 1;
54 struct kfd_node *node = pm->dqm->dev;
55 struct device *dev = node->adev->dev;
56
57 process_count = pm->dqm->processes_count;
58 queue_count = pm->dqm->active_queue_count;
59 compute_queue_count = pm->dqm->active_cp_queue_count;
60 gws_queue_count = pm->dqm->gws_queue_count;
61
62 /* check if there is over subscription
63 * Note: the arbitration between the number of VMIDs and
64 * hws_max_conc_proc has been done in
65 * kgd2kfd_device_init().
66 */
67 *over_subscription = 0;
68
69 if (node->max_proc_per_quantum > 1)
70 max_proc_per_quantum = node->max_proc_per_quantum;
71
72 if (process_count > max_proc_per_quantum)
73 *over_subscription |= OVER_SUBSCRIPTION_PROCESS_COUNT;
74 if (compute_queue_count > get_cp_queues_num(pm->dqm))
75 *over_subscription |= OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT;
76 if (gws_queue_count > 1)
77 *over_subscription |= OVER_SUBSCRIPTION_GWS_QUEUE_COUNT;
78 if (xnack_conflict && (node->adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN))
79 *over_subscription |= OVER_SUBSCRIPTION_XNACK_CONFLICT;
80
81 if (*over_subscription)
82 dev_dbg(dev, "Over subscribed runlist\n");
83
84 map_queue_size = pm->pmf->map_queues_size;
85 /* calculate run list ib allocation size */
86 *rlib_size = process_count * pm->pmf->map_process_size +
87 queue_count * map_queue_size;
88
89 /*
90 * Increase the allocation size in case we need a chained run list
91 * when over subscription
92 */
93 if (*over_subscription)
94 *rlib_size += pm->pmf->runlist_size;
95
96 dev_dbg(dev, "runlist ib size %d\n", *rlib_size);
97 }
98
pm_allocate_runlist_ib(struct packet_manager * pm,unsigned int ** rl_buffer,uint64_t * rl_gpu_buffer,unsigned int * rl_buffer_size,int * is_over_subscription,int xnack_conflict)99 static int pm_allocate_runlist_ib(struct packet_manager *pm,
100 unsigned int **rl_buffer,
101 uint64_t *rl_gpu_buffer,
102 unsigned int *rl_buffer_size,
103 int *is_over_subscription,
104 int xnack_conflict)
105 {
106 struct kfd_node *node = pm->dqm->dev;
107 struct device *dev = node->adev->dev;
108 int retval;
109
110 if (WARN_ON(pm->allocated))
111 return -EINVAL;
112
113 pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription,
114 xnack_conflict);
115
116 mutex_lock(&pm->lock);
117
118 retval = kfd_gtt_sa_allocate(node, *rl_buffer_size, &pm->ib_buffer_obj);
119
120 if (retval) {
121 dev_err(dev, "Failed to allocate runlist IB\n");
122 goto out;
123 }
124
125 *(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr;
126 *rl_gpu_buffer = pm->ib_buffer_obj->gpu_addr;
127
128 memset(*rl_buffer, 0, *rl_buffer_size);
129 pm->allocated = true;
130
131 out:
132 mutex_unlock(&pm->lock);
133 return retval;
134 }
135
pm_create_runlist_ib(struct packet_manager * pm,struct list_head * queues,uint64_t * rl_gpu_addr,size_t * rl_size_bytes)136 static int pm_create_runlist_ib(struct packet_manager *pm,
137 struct list_head *queues,
138 uint64_t *rl_gpu_addr,
139 size_t *rl_size_bytes)
140 {
141 unsigned int alloc_size_bytes;
142 unsigned int *rl_buffer, rl_wptr, i;
143 struct kfd_node *node = pm->dqm->dev;
144 struct device *dev = node->adev->dev;
145 int retval, processes_mapped;
146 struct device_process_node *cur;
147 struct qcm_process_device *qpd;
148 struct queue *q;
149 struct kernel_queue *kq;
150 int is_over_subscription;
151 int xnack_enabled = -1;
152 bool xnack_conflict = 0;
153
154 rl_wptr = retval = processes_mapped = 0;
155
156 /* Check if processes set different xnack modes */
157 list_for_each_entry(cur, queues, list) {
158 qpd = cur->qpd;
159 if (xnack_enabled < 0)
160 /* First process */
161 xnack_enabled = qpd->pqm->process->xnack_enabled;
162 else if (qpd->pqm->process->xnack_enabled != xnack_enabled) {
163 /* Found a process with a different xnack mode */
164 xnack_conflict = 1;
165 break;
166 }
167 }
168
169 retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr,
170 &alloc_size_bytes, &is_over_subscription,
171 xnack_conflict);
172 if (retval)
173 return retval;
174
175 *rl_size_bytes = alloc_size_bytes;
176 pm->ib_size_bytes = alloc_size_bytes;
177
178 dev_dbg(dev, "Building runlist ib process count: %d queues count %d\n",
179 pm->dqm->processes_count, pm->dqm->active_queue_count);
180
181 build_runlist_ib:
182 /* build the run list ib packet */
183 list_for_each_entry(cur, queues, list) {
184 qpd = cur->qpd;
185 /* group processes with the same xnack mode together */
186 if (qpd->pqm->process->xnack_enabled != xnack_enabled)
187 continue;
188 /* build map process packet */
189 if (processes_mapped >= pm->dqm->processes_count) {
190 dev_dbg(dev, "Not enough space left in runlist IB\n");
191 pm_release_ib(pm);
192 return -ENOMEM;
193 }
194
195 retval = pm->pmf->map_process(pm, &rl_buffer[rl_wptr], qpd);
196 if (retval)
197 return retval;
198
199 processes_mapped++;
200 inc_wptr(&rl_wptr, pm->pmf->map_process_size,
201 alloc_size_bytes);
202
203 list_for_each_entry(kq, &qpd->priv_queue_list, list) {
204 if (!kq->queue->properties.is_active)
205 continue;
206
207 dev_dbg(dev,
208 "static_queue, mapping kernel q %d, is debug status %d\n",
209 kq->queue->queue, qpd->is_debug);
210
211 retval = pm->pmf->map_queues(pm,
212 &rl_buffer[rl_wptr],
213 kq->queue,
214 qpd->is_debug);
215 if (retval)
216 return retval;
217
218 inc_wptr(&rl_wptr,
219 pm->pmf->map_queues_size,
220 alloc_size_bytes);
221 }
222
223 list_for_each_entry(q, &qpd->queues_list, list) {
224 if (!q->properties.is_active)
225 continue;
226
227 dev_dbg(dev,
228 "static_queue, mapping user queue %d, is debug status %d\n",
229 q->queue, qpd->is_debug);
230
231 retval = pm->pmf->map_queues(pm,
232 &rl_buffer[rl_wptr],
233 q,
234 qpd->is_debug);
235
236 if (retval)
237 return retval;
238
239 inc_wptr(&rl_wptr,
240 pm->pmf->map_queues_size,
241 alloc_size_bytes);
242 }
243 }
244 if (xnack_conflict) {
245 /* pick up processes with the other xnack mode */
246 xnack_enabled = !xnack_enabled;
247 xnack_conflict = 0;
248 goto build_runlist_ib;
249 }
250
251 dev_dbg(dev, "Finished map process and queues to runlist\n");
252
253 if (is_over_subscription) {
254 if (!pm->is_over_subscription)
255 dev_warn(dev, "Runlist is getting oversubscribed due to%s%s%s%s. Expect reduced ROCm performance.\n",
256 is_over_subscription & OVER_SUBSCRIPTION_PROCESS_COUNT ?
257 " too many processes" : "",
258 is_over_subscription & OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT ?
259 " too many queues" : "",
260 is_over_subscription & OVER_SUBSCRIPTION_GWS_QUEUE_COUNT ?
261 " multiple processes using cooperative launch" : "",
262 is_over_subscription & OVER_SUBSCRIPTION_XNACK_CONFLICT ?
263 " xnack on/off processes mixed on gfx9" : "");
264
265 retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr],
266 *rl_gpu_addr,
267 alloc_size_bytes / sizeof(uint32_t),
268 true);
269 }
270 pm->is_over_subscription = !!is_over_subscription;
271
272 for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++)
273 pr_debug("0x%2X ", rl_buffer[i]);
274 pr_debug("\n");
275
276 return retval;
277 }
278
pm_init(struct packet_manager * pm,struct device_queue_manager * dqm)279 int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
280 {
281 switch (dqm->dev->adev->asic_type) {
282 case CHIP_KAVERI:
283 case CHIP_HAWAII:
284 /* PM4 packet structures on CIK are the same as on VI */
285 case CHIP_CARRIZO:
286 case CHIP_TONGA:
287 case CHIP_FIJI:
288 case CHIP_POLARIS10:
289 case CHIP_POLARIS11:
290 case CHIP_POLARIS12:
291 case CHIP_VEGAM:
292 pm->pmf = &kfd_vi_pm_funcs;
293 break;
294 default:
295 if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2) ||
296 KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3) ||
297 KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4) ||
298 KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 5, 0))
299 pm->pmf = &kfd_aldebaran_pm_funcs;
300 else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1))
301 pm->pmf = &kfd_v9_pm_funcs;
302 else {
303 WARN(1, "Unexpected ASIC family %u",
304 dqm->dev->adev->asic_type);
305 return -EINVAL;
306 }
307 }
308
309 pm->dqm = dqm;
310 mutex_init(&pm->lock);
311 pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ);
312 if (!pm->priv_queue) {
313 mutex_destroy(&pm->lock);
314 return -ENOMEM;
315 }
316 pm->allocated = false;
317
318 return 0;
319 }
320
pm_uninit(struct packet_manager * pm)321 void pm_uninit(struct packet_manager *pm)
322 {
323 mutex_destroy(&pm->lock);
324 kernel_queue_uninit(pm->priv_queue);
325 pm->priv_queue = NULL;
326 }
327
pm_send_set_resources(struct packet_manager * pm,struct scheduling_resources * res)328 int pm_send_set_resources(struct packet_manager *pm,
329 struct scheduling_resources *res)
330 {
331 struct kfd_node *node = pm->dqm->dev;
332 struct device *dev = node->adev->dev;
333 uint32_t *buffer, size;
334 int retval = 0;
335
336 size = pm->pmf->set_resources_size;
337 mutex_lock(&pm->lock);
338 kq_acquire_packet_buffer(pm->priv_queue,
339 size / sizeof(uint32_t),
340 (unsigned int **)&buffer);
341 if (!buffer) {
342 dev_err(dev, "Failed to allocate buffer on kernel queue\n");
343 retval = -ENOMEM;
344 goto out;
345 }
346
347 retval = pm->pmf->set_resources(pm, buffer, res);
348 if (!retval)
349 retval = kq_submit_packet(pm->priv_queue);
350 else
351 kq_rollback_packet(pm->priv_queue);
352
353 out:
354 mutex_unlock(&pm->lock);
355
356 return retval;
357 }
358
pm_send_runlist(struct packet_manager * pm,struct list_head * dqm_queues)359 int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
360 {
361 uint64_t rl_gpu_ib_addr;
362 uint32_t *rl_buffer;
363 size_t rl_ib_size, packet_size_dwords;
364 int retval;
365
366 retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr,
367 &rl_ib_size);
368 if (retval)
369 goto fail_create_runlist_ib;
370
371 pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
372
373 packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
374 mutex_lock(&pm->lock);
375
376 retval = kq_acquire_packet_buffer(pm->priv_queue,
377 packet_size_dwords, &rl_buffer);
378 if (retval)
379 goto fail_acquire_packet_buffer;
380
381 retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr,
382 rl_ib_size / sizeof(uint32_t), false);
383 if (retval)
384 goto fail_create_runlist;
385
386 retval = kq_submit_packet(pm->priv_queue);
387
388 mutex_unlock(&pm->lock);
389
390 return retval;
391
392 fail_create_runlist:
393 kq_rollback_packet(pm->priv_queue);
394 fail_acquire_packet_buffer:
395 mutex_unlock(&pm->lock);
396 fail_create_runlist_ib:
397 pm_release_ib(pm);
398 return retval;
399 }
400
pm_send_query_status(struct packet_manager * pm,uint64_t fence_address,uint64_t fence_value)401 int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
402 uint64_t fence_value)
403 {
404 struct kfd_node *node = pm->dqm->dev;
405 struct device *dev = node->adev->dev;
406 uint32_t *buffer, size;
407 int retval = 0;
408
409 if (WARN_ON(!fence_address))
410 return -EFAULT;
411
412 size = pm->pmf->query_status_size;
413 mutex_lock(&pm->lock);
414 kq_acquire_packet_buffer(pm->priv_queue,
415 size / sizeof(uint32_t), (unsigned int **)&buffer);
416 if (!buffer) {
417 dev_err(dev, "Failed to allocate buffer on kernel queue\n");
418 retval = -ENOMEM;
419 goto out;
420 }
421
422 retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value);
423 if (!retval)
424 retval = kq_submit_packet(pm->priv_queue);
425 else
426 kq_rollback_packet(pm->priv_queue);
427
428 out:
429 mutex_unlock(&pm->lock);
430 return retval;
431 }
432
433 /* pm_config_dequeue_wait_counts: Configure dequeue timer Wait Counts
434 * by writing to CP_IQ_WAIT_TIME2 registers.
435 *
436 * @cmd: See emum kfd_config_dequeue_wait_counts_cmd definition
437 * @value: Depends on the cmd. This parameter is unused for
438 * KFD_DEQUEUE_WAIT_INIT and KFD_DEQUEUE_WAIT_RESET. For
439 * KFD_DEQUEUE_WAIT_SET_SCH_WAVE it holds value to be set
440 *
441 */
pm_config_dequeue_wait_counts(struct packet_manager * pm,enum kfd_config_dequeue_wait_counts_cmd cmd,uint32_t value)442 int pm_config_dequeue_wait_counts(struct packet_manager *pm,
443 enum kfd_config_dequeue_wait_counts_cmd cmd,
444 uint32_t value)
445 {
446 struct kfd_node *node = pm->dqm->dev;
447 struct device *dev = node->adev->dev;
448 int retval = 0;
449 uint32_t *buffer, size;
450
451 if (!pm->pmf->config_dequeue_wait_counts ||
452 !pm->pmf->config_dequeue_wait_counts_size)
453 return 0;
454
455 if (cmd == KFD_DEQUEUE_WAIT_INIT && (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) ||
456 KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0)))
457 return 0;
458
459 size = pm->pmf->config_dequeue_wait_counts_size;
460
461 mutex_lock(&pm->lock);
462
463 if (size) {
464 kq_acquire_packet_buffer(pm->priv_queue,
465 size / sizeof(uint32_t),
466 (unsigned int **)&buffer);
467
468 if (!buffer) {
469 dev_err(dev,
470 "Failed to allocate buffer on kernel queue\n");
471 retval = -ENOMEM;
472 goto out;
473 }
474
475 retval = pm->pmf->config_dequeue_wait_counts(pm, buffer,
476 cmd, value);
477 if (!retval) {
478 retval = kq_submit_packet(pm->priv_queue);
479
480 /* If default value is modified, cache that in dqm->wait_times */
481 if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT)
482 update_dqm_wait_times(pm->dqm);
483 } else {
484 kq_rollback_packet(pm->priv_queue);
485 }
486 }
487 out:
488 mutex_unlock(&pm->lock);
489 return retval;
490 }
491
pm_send_unmap_queue(struct packet_manager * pm,enum kfd_unmap_queues_filter filter,uint32_t filter_param,bool reset)492 int pm_send_unmap_queue(struct packet_manager *pm,
493 enum kfd_unmap_queues_filter filter,
494 uint32_t filter_param, bool reset)
495 {
496 struct kfd_node *node = pm->dqm->dev;
497 struct device *dev = node->adev->dev;
498 uint32_t *buffer, size;
499 int retval = 0;
500
501 size = pm->pmf->unmap_queues_size;
502 mutex_lock(&pm->lock);
503 kq_acquire_packet_buffer(pm->priv_queue,
504 size / sizeof(uint32_t), (unsigned int **)&buffer);
505 if (!buffer) {
506 dev_err(dev, "Failed to allocate buffer on kernel queue\n");
507 retval = -ENOMEM;
508 goto out;
509 }
510
511 retval = pm->pmf->unmap_queues(pm, buffer, filter, filter_param, reset);
512 if (!retval)
513 retval = kq_submit_packet(pm->priv_queue);
514 else
515 kq_rollback_packet(pm->priv_queue);
516
517 out:
518 mutex_unlock(&pm->lock);
519 return retval;
520 }
521
pm_release_ib(struct packet_manager * pm)522 void pm_release_ib(struct packet_manager *pm)
523 {
524 mutex_lock(&pm->lock);
525 if (pm->allocated) {
526 kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj);
527 pm->allocated = false;
528 }
529 mutex_unlock(&pm->lock);
530 }
531
532 #if defined(CONFIG_DEBUG_FS)
533
pm_debugfs_runlist(struct seq_file * m,void * data)534 int pm_debugfs_runlist(struct seq_file *m, void *data)
535 {
536 struct packet_manager *pm = data;
537
538 mutex_lock(&pm->lock);
539
540 if (!pm->allocated) {
541 seq_puts(m, " No active runlist\n");
542 goto out;
543 }
544
545 seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
546 pm->ib_buffer_obj->cpu_ptr, pm->ib_size_bytes, false);
547
548 out:
549 mutex_unlock(&pm->lock);
550 return 0;
551 }
552
pm_debugfs_hang_hws(struct packet_manager * pm)553 int pm_debugfs_hang_hws(struct packet_manager *pm)
554 {
555 struct kfd_node *node = pm->dqm->dev;
556 struct device *dev = node->adev->dev;
557 uint32_t *buffer, size;
558 int r = 0;
559
560 if (!pm->priv_queue)
561 return -EAGAIN;
562
563 size = pm->pmf->query_status_size;
564 mutex_lock(&pm->lock);
565 kq_acquire_packet_buffer(pm->priv_queue,
566 size / sizeof(uint32_t), (unsigned int **)&buffer);
567 if (!buffer) {
568 dev_err(dev, "Failed to allocate buffer on kernel queue\n");
569 r = -ENOMEM;
570 goto out;
571 }
572 memset(buffer, 0x55, size);
573 kq_submit_packet(pm->priv_queue);
574
575 dev_info(dev, "Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
576 buffer[0], buffer[1], buffer[2], buffer[3], buffer[4],
577 buffer[5], buffer[6]);
578 out:
579 mutex_unlock(&pm->lock);
580 return r;
581 }
582
583
584 #endif
585