1  // SPDX-License-Identifier: GPL-2.0 OR MIT
2  /*
3   * Copyright 2014-2022 Advanced Micro Devices, Inc.
4   *
5   * Permission is hereby granted, free of charge, to any person obtaining a
6   * copy of this software and associated documentation files (the "Software"),
7   * to deal in the Software without restriction, including without limitation
8   * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9   * and/or sell copies of the Software, and to permit persons to whom the
10   * Software is furnished to do so, subject to the following conditions:
11   *
12   * The above copyright notice and this permission notice shall be included in
13   * all copies or substantial portions of the Software.
14   *
15   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18   * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19   * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21   * OTHER DEALINGS IN THE SOFTWARE.
22   *
23   */
24  
25  #include <linux/slab.h>
26  #include <linux/list.h>
27  #include "kfd_device_queue_manager.h"
28  #include "kfd_priv.h"
29  #include "kfd_kernel_queue.h"
30  #include "amdgpu_amdkfd.h"
31  
get_queue_by_qid(struct process_queue_manager * pqm,unsigned int qid)32  static inline struct process_queue_node *get_queue_by_qid(
33  			struct process_queue_manager *pqm, unsigned int qid)
34  {
35  	struct process_queue_node *pqn;
36  
37  	list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
38  		if ((pqn->q && pqn->q->properties.queue_id == qid) ||
39  		    (pqn->kq && pqn->kq->queue->properties.queue_id == qid))
40  			return pqn;
41  	}
42  
43  	return NULL;
44  }
45  
assign_queue_slot_by_qid(struct process_queue_manager * pqm,unsigned int qid)46  static int assign_queue_slot_by_qid(struct process_queue_manager *pqm,
47  				    unsigned int qid)
48  {
49  	if (qid >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
50  		return -EINVAL;
51  
52  	if (__test_and_set_bit(qid, pqm->queue_slot_bitmap)) {
53  		pr_err("Cannot create new queue because requested qid(%u) is in use\n", qid);
54  		return -ENOSPC;
55  	}
56  
57  	return 0;
58  }
59  
find_available_queue_slot(struct process_queue_manager * pqm,unsigned int * qid)60  static int find_available_queue_slot(struct process_queue_manager *pqm,
61  					unsigned int *qid)
62  {
63  	unsigned long found;
64  
65  	found = find_first_zero_bit(pqm->queue_slot_bitmap,
66  			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
67  
68  	pr_debug("The new slot id %lu\n", found);
69  
70  	if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
71  		pr_info("Cannot open more queues for process with pasid 0x%x\n",
72  				pqm->process->pasid);
73  		return -ENOMEM;
74  	}
75  
76  	set_bit(found, pqm->queue_slot_bitmap);
77  	*qid = found;
78  
79  	return 0;
80  }
81  
kfd_process_dequeue_from_device(struct kfd_process_device * pdd)82  void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
83  {
84  	struct kfd_dev *dev = pdd->dev;
85  
86  	if (pdd->already_dequeued)
87  		return;
88  
89  	dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
90  	pdd->already_dequeued = true;
91  }
92  
pqm_set_gws(struct process_queue_manager * pqm,unsigned int qid,void * gws)93  int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
94  			void *gws)
95  {
96  	struct kfd_dev *dev = NULL;
97  	struct process_queue_node *pqn;
98  	struct kfd_process_device *pdd;
99  	struct kgd_mem *mem = NULL;
100  	int ret;
101  
102  	pqn = get_queue_by_qid(pqm, qid);
103  	if (!pqn) {
104  		pr_err("Queue id does not match any known queue\n");
105  		return -EINVAL;
106  	}
107  
108  	if (pqn->q)
109  		dev = pqn->q->device;
110  	if (WARN_ON(!dev))
111  		return -ENODEV;
112  
113  	pdd = kfd_get_process_device_data(dev, pqm->process);
114  	if (!pdd) {
115  		pr_err("Process device data doesn't exist\n");
116  		return -EINVAL;
117  	}
118  
119  	/* Only allow one queue per process can have GWS assigned */
120  	if (gws && pdd->qpd.num_gws)
121  		return -EBUSY;
122  
123  	if (!gws && pdd->qpd.num_gws == 0)
124  		return -EINVAL;
125  
126  	if (gws)
127  		ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
128  			gws, &mem);
129  	else
130  		ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info,
131  			pqn->q->gws);
132  	if (unlikely(ret))
133  		return ret;
134  
135  	pqn->q->gws = mem;
136  	pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;
137  
138  	return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
139  							pqn->q, NULL);
140  }
141  
kfd_process_dequeue_from_all_devices(struct kfd_process * p)142  void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
143  {
144  	int i;
145  
146  	for (i = 0; i < p->n_pdds; i++)
147  		kfd_process_dequeue_from_device(p->pdds[i]);
148  }
149  
pqm_init(struct process_queue_manager * pqm,struct kfd_process * p)150  int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
151  {
152  	INIT_LIST_HEAD(&pqm->queues);
153  	pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
154  					       GFP_KERNEL);
155  	if (!pqm->queue_slot_bitmap)
156  		return -ENOMEM;
157  	pqm->process = p;
158  
159  	return 0;
160  }
161  
pqm_uninit(struct process_queue_manager * pqm)162  void pqm_uninit(struct process_queue_manager *pqm)
163  {
164  	struct process_queue_node *pqn, *next;
165  
166  	list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
167  		if (pqn->q && pqn->q->gws)
168  			amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
169  				pqn->q->gws);
170  		kfd_procfs_del_queue(pqn->q);
171  		uninit_queue(pqn->q);
172  		list_del(&pqn->process_queue_list);
173  		kfree(pqn);
174  	}
175  
176  	bitmap_free(pqm->queue_slot_bitmap);
177  	pqm->queue_slot_bitmap = NULL;
178  }
179  
init_user_queue(struct process_queue_manager * pqm,struct kfd_dev * dev,struct queue ** q,struct queue_properties * q_properties,struct file * f,struct amdgpu_bo * wptr_bo,unsigned int qid)180  static int init_user_queue(struct process_queue_manager *pqm,
181  				struct kfd_dev *dev, struct queue **q,
182  				struct queue_properties *q_properties,
183  				struct file *f, struct amdgpu_bo *wptr_bo,
184  				unsigned int qid)
185  {
186  	int retval;
187  
188  	/* Doorbell initialized in user space*/
189  	q_properties->doorbell_ptr = NULL;
190  
191  	/* let DQM handle it*/
192  	q_properties->vmid = 0;
193  	q_properties->queue_id = qid;
194  
195  	retval = init_queue(q, q_properties);
196  	if (retval != 0)
197  		return retval;
198  
199  	(*q)->device = dev;
200  	(*q)->process = pqm->process;
201  
202  	if (dev->shared_resources.enable_mes) {
203  		retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
204  						AMDGPU_MES_GANG_CTX_SIZE,
205  						&(*q)->gang_ctx_bo,
206  						&(*q)->gang_ctx_gpu_addr,
207  						&(*q)->gang_ctx_cpu_ptr,
208  						false);
209  		if (retval) {
210  			pr_err("failed to allocate gang context bo\n");
211  			goto cleanup;
212  		}
213  		memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
214  		(*q)->wptr_bo = wptr_bo;
215  	}
216  
217  	pr_debug("PQM After init queue");
218  	return 0;
219  
220  cleanup:
221  	if (dev->shared_resources.enable_mes)
222  		uninit_queue(*q);
223  	return retval;
224  }
225  
pqm_create_queue(struct process_queue_manager * pqm,struct kfd_dev * dev,struct file * f,struct queue_properties * properties,unsigned int * qid,struct amdgpu_bo * wptr_bo,const struct kfd_criu_queue_priv_data * q_data,const void * restore_mqd,const void * restore_ctl_stack,uint32_t * p_doorbell_offset_in_process)226  int pqm_create_queue(struct process_queue_manager *pqm,
227  			    struct kfd_dev *dev,
228  			    struct file *f,
229  			    struct queue_properties *properties,
230  			    unsigned int *qid,
231  			    struct amdgpu_bo *wptr_bo,
232  			    const struct kfd_criu_queue_priv_data *q_data,
233  			    const void *restore_mqd,
234  			    const void *restore_ctl_stack,
235  			    uint32_t *p_doorbell_offset_in_process)
236  {
237  	int retval;
238  	struct kfd_process_device *pdd;
239  	struct queue *q;
240  	struct process_queue_node *pqn;
241  	struct kernel_queue *kq;
242  	enum kfd_queue_type type = properties->type;
243  	unsigned int max_queues = 127; /* HWS limit */
244  
245  	q = NULL;
246  	kq = NULL;
247  
248  	pdd = kfd_get_process_device_data(dev, pqm->process);
249  	if (!pdd) {
250  		pr_err("Process device data doesn't exist\n");
251  		return -1;
252  	}
253  
254  	/*
255  	 * for debug process, verify that it is within the static queues limit
256  	 * currently limit is set to half of the total avail HQD slots
257  	 * If we are just about to create DIQ, the is_debug flag is not set yet
258  	 * Hence we also check the type as well
259  	 */
260  	if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ))
261  		max_queues = dev->device_info.max_no_of_hqd/2;
262  
263  	if (pdd->qpd.queue_count >= max_queues)
264  		return -ENOSPC;
265  
266  	if (q_data) {
267  		retval = assign_queue_slot_by_qid(pqm, q_data->q_id);
268  		*qid = q_data->q_id;
269  	} else
270  		retval = find_available_queue_slot(pqm, qid);
271  
272  	if (retval != 0)
273  		return retval;
274  
275  	if (list_empty(&pdd->qpd.queues_list) &&
276  	    list_empty(&pdd->qpd.priv_queue_list))
277  		dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
278  
279  	pqn = kzalloc(sizeof(*pqn), GFP_KERNEL);
280  	if (!pqn) {
281  		retval = -ENOMEM;
282  		goto err_allocate_pqn;
283  	}
284  
285  	switch (type) {
286  	case KFD_QUEUE_TYPE_SDMA:
287  	case KFD_QUEUE_TYPE_SDMA_XGMI:
288  		/* SDMA queues are always allocated statically no matter
289  		 * which scheduler mode is used. We also do not need to
290  		 * check whether a SDMA queue can be allocated here, because
291  		 * allocate_sdma_queue() in create_queue() has the
292  		 * corresponding check logic.
293  		 */
294  		retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
295  		if (retval != 0)
296  			goto err_create_queue;
297  		pqn->q = q;
298  		pqn->kq = NULL;
299  		retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
300  						    restore_mqd, restore_ctl_stack);
301  		print_queue(q);
302  		break;
303  
304  	case KFD_QUEUE_TYPE_COMPUTE:
305  		/* check if there is over subscription */
306  		if ((dev->dqm->sched_policy ==
307  		     KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
308  		((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) ||
309  		(dev->dqm->active_queue_count >= get_cp_queues_num(dev->dqm)))) {
310  			pr_debug("Over-subscription is not allowed when amdkfd.sched_policy == 1\n");
311  			retval = -EPERM;
312  			goto err_create_queue;
313  		}
314  
315  		retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
316  		if (retval != 0)
317  			goto err_create_queue;
318  		pqn->q = q;
319  		pqn->kq = NULL;
320  		retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
321  						    restore_mqd, restore_ctl_stack);
322  		print_queue(q);
323  		break;
324  	case KFD_QUEUE_TYPE_DIQ:
325  		kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ);
326  		if (!kq) {
327  			retval = -ENOMEM;
328  			goto err_create_queue;
329  		}
330  		kq->queue->properties.queue_id = *qid;
331  		pqn->kq = kq;
332  		pqn->q = NULL;
333  		retval = dev->dqm->ops.create_kernel_queue(dev->dqm,
334  							kq, &pdd->qpd);
335  		break;
336  	default:
337  		WARN(1, "Invalid queue type %d", type);
338  		retval = -EINVAL;
339  	}
340  
341  	if (retval != 0) {
342  		pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n",
343  			pqm->process->pasid, type, retval);
344  		goto err_create_queue;
345  	}
346  
347  	if (q && p_doorbell_offset_in_process)
348  		/* Return the doorbell offset within the doorbell page
349  		 * to the caller so it can be passed up to user mode
350  		 * (in bytes).
351  		 * There are always 1024 doorbells per process, so in case
352  		 * of 8-byte doorbells, there are two doorbell pages per
353  		 * process.
354  		 */
355  		*p_doorbell_offset_in_process =
356  			(q->properties.doorbell_off * sizeof(uint32_t)) &
357  			(kfd_doorbell_process_slice(dev) - 1);
358  
359  	pr_debug("PQM After DQM create queue\n");
360  
361  	list_add(&pqn->process_queue_list, &pqm->queues);
362  
363  	if (q) {
364  		pr_debug("PQM done creating queue\n");
365  		kfd_procfs_add_queue(q);
366  		print_queue_properties(&q->properties);
367  	}
368  
369  	return retval;
370  
371  err_create_queue:
372  	uninit_queue(q);
373  	if (kq)
374  		kernel_queue_uninit(kq, false);
375  	kfree(pqn);
376  err_allocate_pqn:
377  	/* check if queues list is empty unregister process from device */
378  	clear_bit(*qid, pqm->queue_slot_bitmap);
379  	if (list_empty(&pdd->qpd.queues_list) &&
380  	    list_empty(&pdd->qpd.priv_queue_list))
381  		dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd);
382  	return retval;
383  }
384  
pqm_destroy_queue(struct process_queue_manager * pqm,unsigned int qid)385  int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
386  {
387  	struct process_queue_node *pqn;
388  	struct kfd_process_device *pdd;
389  	struct device_queue_manager *dqm;
390  	struct kfd_dev *dev;
391  	int retval;
392  
393  	dqm = NULL;
394  
395  	retval = 0;
396  
397  	pqn = get_queue_by_qid(pqm, qid);
398  	if (!pqn) {
399  		pr_err("Queue id does not match any known queue\n");
400  		return -EINVAL;
401  	}
402  
403  	dev = NULL;
404  	if (pqn->kq)
405  		dev = pqn->kq->dev;
406  	if (pqn->q)
407  		dev = pqn->q->device;
408  	if (WARN_ON(!dev))
409  		return -ENODEV;
410  
411  	pdd = kfd_get_process_device_data(dev, pqm->process);
412  	if (!pdd) {
413  		pr_err("Process device data doesn't exist\n");
414  		return -1;
415  	}
416  
417  	if (pqn->kq) {
418  		/* destroy kernel queue (DIQ) */
419  		dqm = pqn->kq->dev->dqm;
420  		dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
421  		kernel_queue_uninit(pqn->kq, false);
422  	}
423  
424  	if (pqn->q) {
425  		kfd_procfs_del_queue(pqn->q);
426  		dqm = pqn->q->device->dqm;
427  		retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
428  		if (retval) {
429  			pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n",
430  				pqm->process->pasid,
431  				pqn->q->properties.queue_id, retval);
432  			if (retval != -ETIME)
433  				goto err_destroy_queue;
434  		}
435  
436  		if (pqn->q->gws) {
437  			amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
438  				pqn->q->gws);
439  			pdd->qpd.num_gws = 0;
440  		}
441  
442  		if (dev->shared_resources.enable_mes) {
443  			amdgpu_amdkfd_free_gtt_mem(dev->adev,
444  						   pqn->q->gang_ctx_bo);
445  			if (pqn->q->wptr_bo)
446  				amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo);
447  
448  		}
449  		uninit_queue(pqn->q);
450  	}
451  
452  	list_del(&pqn->process_queue_list);
453  	kfree(pqn);
454  	clear_bit(qid, pqm->queue_slot_bitmap);
455  
456  	if (list_empty(&pdd->qpd.queues_list) &&
457  	    list_empty(&pdd->qpd.priv_queue_list))
458  		dqm->ops.unregister_process(dqm, &pdd->qpd);
459  
460  err_destroy_queue:
461  	return retval;
462  }
463  
pqm_update_queue_properties(struct process_queue_manager * pqm,unsigned int qid,struct queue_properties * p)464  int pqm_update_queue_properties(struct process_queue_manager *pqm,
465  				unsigned int qid, struct queue_properties *p)
466  {
467  	int retval;
468  	struct process_queue_node *pqn;
469  
470  	pqn = get_queue_by_qid(pqm, qid);
471  	if (!pqn) {
472  		pr_debug("No queue %d exists for update operation\n", qid);
473  		return -EFAULT;
474  	}
475  
476  	pqn->q->properties.queue_address = p->queue_address;
477  	pqn->q->properties.queue_size = p->queue_size;
478  	pqn->q->properties.queue_percent = p->queue_percent;
479  	pqn->q->properties.priority = p->priority;
480  
481  	retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
482  							pqn->q, NULL);
483  	if (retval != 0)
484  		return retval;
485  
486  	return 0;
487  }
488  
pqm_update_mqd(struct process_queue_manager * pqm,unsigned int qid,struct mqd_update_info * minfo)489  int pqm_update_mqd(struct process_queue_manager *pqm,
490  				unsigned int qid, struct mqd_update_info *minfo)
491  {
492  	int retval;
493  	struct process_queue_node *pqn;
494  
495  	pqn = get_queue_by_qid(pqm, qid);
496  	if (!pqn) {
497  		pr_debug("No queue %d exists for update operation\n", qid);
498  		return -EFAULT;
499  	}
500  
501  	/* ASICs that have WGPs must enforce pairwise enabled mask checks. */
502  	if (minfo && minfo->update_flag == UPDATE_FLAG_CU_MASK && minfo->cu_mask.ptr &&
503  			KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(10, 0, 0)) {
504  		int i;
505  
506  		for (i = 0; i < minfo->cu_mask.count; i += 2) {
507  			uint32_t cu_pair = (minfo->cu_mask.ptr[i / 32] >> (i % 32)) & 0x3;
508  
509  			if (cu_pair && cu_pair != 0x3) {
510  				pr_debug("CUs must be adjacent pairwise enabled.\n");
511  				return -EINVAL;
512  			}
513  		}
514  	}
515  
516  	retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
517  							pqn->q, minfo);
518  	if (retval != 0)
519  		return retval;
520  
521  	return 0;
522  }
523  
pqm_get_kernel_queue(struct process_queue_manager * pqm,unsigned int qid)524  struct kernel_queue *pqm_get_kernel_queue(
525  					struct process_queue_manager *pqm,
526  					unsigned int qid)
527  {
528  	struct process_queue_node *pqn;
529  
530  	pqn = get_queue_by_qid(pqm, qid);
531  	if (pqn && pqn->kq)
532  		return pqn->kq;
533  
534  	return NULL;
535  }
536  
pqm_get_user_queue(struct process_queue_manager * pqm,unsigned int qid)537  struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
538  					unsigned int qid)
539  {
540  	struct process_queue_node *pqn;
541  
542  	pqn = get_queue_by_qid(pqm, qid);
543  	return pqn ? pqn->q : NULL;
544  }
545  
pqm_get_wave_state(struct process_queue_manager * pqm,unsigned int qid,void __user * ctl_stack,u32 * ctl_stack_used_size,u32 * save_area_used_size)546  int pqm_get_wave_state(struct process_queue_manager *pqm,
547  		       unsigned int qid,
548  		       void __user *ctl_stack,
549  		       u32 *ctl_stack_used_size,
550  		       u32 *save_area_used_size)
551  {
552  	struct process_queue_node *pqn;
553  
554  	pqn = get_queue_by_qid(pqm, qid);
555  	if (!pqn) {
556  		pr_debug("amdkfd: No queue %d exists for operation\n",
557  			 qid);
558  		return -EFAULT;
559  	}
560  
561  	return pqn->q->device->dqm->ops.get_wave_state(pqn->q->device->dqm,
562  						       pqn->q,
563  						       ctl_stack,
564  						       ctl_stack_used_size,
565  						       save_area_used_size);
566  }
567  
get_queue_data_sizes(struct kfd_process_device * pdd,struct queue * q,uint32_t * mqd_size,uint32_t * ctl_stack_size)568  static int get_queue_data_sizes(struct kfd_process_device *pdd,
569  				struct queue *q,
570  				uint32_t *mqd_size,
571  				uint32_t *ctl_stack_size)
572  {
573  	int ret;
574  
575  	ret = pqm_get_queue_checkpoint_info(&pdd->process->pqm,
576  					    q->properties.queue_id,
577  					    mqd_size,
578  					    ctl_stack_size);
579  	if (ret)
580  		pr_err("Failed to get queue dump info (%d)\n", ret);
581  
582  	return ret;
583  }
584  
kfd_process_get_queue_info(struct kfd_process * p,uint32_t * num_queues,uint64_t * priv_data_sizes)585  int kfd_process_get_queue_info(struct kfd_process *p,
586  			       uint32_t *num_queues,
587  			       uint64_t *priv_data_sizes)
588  {
589  	uint32_t extra_data_sizes = 0;
590  	struct queue *q;
591  	int i;
592  	int ret;
593  
594  	*num_queues = 0;
595  
596  	/* Run over all PDDs of the process */
597  	for (i = 0; i < p->n_pdds; i++) {
598  		struct kfd_process_device *pdd = p->pdds[i];
599  
600  		list_for_each_entry(q, &pdd->qpd.queues_list, list) {
601  			if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
602  				q->properties.type == KFD_QUEUE_TYPE_SDMA ||
603  				q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
604  				uint32_t mqd_size, ctl_stack_size;
605  
606  				*num_queues = *num_queues + 1;
607  
608  				ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size);
609  				if (ret)
610  					return ret;
611  
612  				extra_data_sizes += mqd_size + ctl_stack_size;
613  			} else {
614  				pr_err("Unsupported queue type (%d)\n", q->properties.type);
615  				return -EOPNOTSUPP;
616  			}
617  		}
618  	}
619  	*priv_data_sizes = extra_data_sizes +
620  				(*num_queues * sizeof(struct kfd_criu_queue_priv_data));
621  
622  	return 0;
623  }
624  
pqm_checkpoint_mqd(struct process_queue_manager * pqm,unsigned int qid,void * mqd,void * ctl_stack)625  static int pqm_checkpoint_mqd(struct process_queue_manager *pqm,
626  			      unsigned int qid,
627  			      void *mqd,
628  			      void *ctl_stack)
629  {
630  	struct process_queue_node *pqn;
631  
632  	pqn = get_queue_by_qid(pqm, qid);
633  	if (!pqn) {
634  		pr_debug("amdkfd: No queue %d exists for operation\n", qid);
635  		return -EFAULT;
636  	}
637  
638  	if (!pqn->q->device->dqm->ops.checkpoint_mqd) {
639  		pr_err("amdkfd: queue dumping not supported on this device\n");
640  		return -EOPNOTSUPP;
641  	}
642  
643  	return pqn->q->device->dqm->ops.checkpoint_mqd(pqn->q->device->dqm,
644  						       pqn->q, mqd, ctl_stack);
645  }
646  
criu_checkpoint_queue(struct kfd_process_device * pdd,struct queue * q,struct kfd_criu_queue_priv_data * q_data)647  static int criu_checkpoint_queue(struct kfd_process_device *pdd,
648  			   struct queue *q,
649  			   struct kfd_criu_queue_priv_data *q_data)
650  {
651  	uint8_t *mqd, *ctl_stack;
652  	int ret;
653  
654  	mqd = (void *)(q_data + 1);
655  	ctl_stack = mqd + q_data->mqd_size;
656  
657  	q_data->gpu_id = pdd->user_gpu_id;
658  	q_data->type = q->properties.type;
659  	q_data->format = q->properties.format;
660  	q_data->q_id =  q->properties.queue_id;
661  	q_data->q_address = q->properties.queue_address;
662  	q_data->q_size = q->properties.queue_size;
663  	q_data->priority = q->properties.priority;
664  	q_data->q_percent = q->properties.queue_percent;
665  	q_data->read_ptr_addr = (uint64_t)q->properties.read_ptr;
666  	q_data->write_ptr_addr = (uint64_t)q->properties.write_ptr;
667  	q_data->doorbell_id = q->doorbell_id;
668  
669  	q_data->sdma_id = q->sdma_id;
670  
671  	q_data->eop_ring_buffer_address =
672  		q->properties.eop_ring_buffer_address;
673  
674  	q_data->eop_ring_buffer_size = q->properties.eop_ring_buffer_size;
675  
676  	q_data->ctx_save_restore_area_address =
677  		q->properties.ctx_save_restore_area_address;
678  
679  	q_data->ctx_save_restore_area_size =
680  		q->properties.ctx_save_restore_area_size;
681  
682  	q_data->gws = !!q->gws;
683  
684  	ret = pqm_checkpoint_mqd(&pdd->process->pqm, q->properties.queue_id, mqd, ctl_stack);
685  	if (ret) {
686  		pr_err("Failed checkpoint queue_mqd (%d)\n", ret);
687  		return ret;
688  	}
689  
690  	pr_debug("Dumping Queue: gpu_id:%x queue_id:%u\n", q_data->gpu_id, q_data->q_id);
691  	return ret;
692  }
693  
criu_checkpoint_queues_device(struct kfd_process_device * pdd,uint8_t __user * user_priv,unsigned int * q_index,uint64_t * queues_priv_data_offset)694  static int criu_checkpoint_queues_device(struct kfd_process_device *pdd,
695  				   uint8_t __user *user_priv,
696  				   unsigned int *q_index,
697  				   uint64_t *queues_priv_data_offset)
698  {
699  	unsigned int q_private_data_size = 0;
700  	uint8_t *q_private_data = NULL; /* Local buffer to store individual queue private data */
701  	struct queue *q;
702  	int ret = 0;
703  
704  	list_for_each_entry(q, &pdd->qpd.queues_list, list) {
705  		struct kfd_criu_queue_priv_data *q_data;
706  		uint64_t q_data_size;
707  		uint32_t mqd_size;
708  		uint32_t ctl_stack_size;
709  
710  		if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE &&
711  			q->properties.type != KFD_QUEUE_TYPE_SDMA &&
712  			q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI) {
713  
714  			pr_err("Unsupported queue type (%d)\n", q->properties.type);
715  			ret = -EOPNOTSUPP;
716  			break;
717  		}
718  
719  		ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size);
720  		if (ret)
721  			break;
722  
723  		q_data_size = sizeof(*q_data) + mqd_size + ctl_stack_size;
724  
725  		/* Increase local buffer space if needed */
726  		if (q_private_data_size < q_data_size) {
727  			kfree(q_private_data);
728  
729  			q_private_data = kzalloc(q_data_size, GFP_KERNEL);
730  			if (!q_private_data) {
731  				ret = -ENOMEM;
732  				break;
733  			}
734  			q_private_data_size = q_data_size;
735  		}
736  
737  		q_data = (struct kfd_criu_queue_priv_data *)q_private_data;
738  
739  		/* data stored in this order: priv_data, mqd, ctl_stack */
740  		q_data->mqd_size = mqd_size;
741  		q_data->ctl_stack_size = ctl_stack_size;
742  
743  		ret = criu_checkpoint_queue(pdd, q, q_data);
744  		if (ret)
745  			break;
746  
747  		q_data->object_type = KFD_CRIU_OBJECT_TYPE_QUEUE;
748  
749  		ret = copy_to_user(user_priv + *queues_priv_data_offset,
750  				q_data, q_data_size);
751  		if (ret) {
752  			ret = -EFAULT;
753  			break;
754  		}
755  		*queues_priv_data_offset += q_data_size;
756  		*q_index = *q_index + 1;
757  	}
758  
759  	kfree(q_private_data);
760  
761  	return ret;
762  }
763  
kfd_criu_checkpoint_queues(struct kfd_process * p,uint8_t __user * user_priv_data,uint64_t * priv_data_offset)764  int kfd_criu_checkpoint_queues(struct kfd_process *p,
765  			 uint8_t __user *user_priv_data,
766  			 uint64_t *priv_data_offset)
767  {
768  	int ret = 0, pdd_index, q_index = 0;
769  
770  	for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
771  		struct kfd_process_device *pdd = p->pdds[pdd_index];
772  
773  		/*
774  		 * criu_checkpoint_queues_device will copy data to user and update q_index and
775  		 * queues_priv_data_offset
776  		 */
777  		ret = criu_checkpoint_queues_device(pdd, user_priv_data, &q_index,
778  					      priv_data_offset);
779  
780  		if (ret)
781  			break;
782  	}
783  
784  	return ret;
785  }
786  
set_queue_properties_from_criu(struct queue_properties * qp,struct kfd_criu_queue_priv_data * q_data)787  static void set_queue_properties_from_criu(struct queue_properties *qp,
788  					  struct kfd_criu_queue_priv_data *q_data)
789  {
790  	qp->is_interop = false;
791  	qp->queue_percent = q_data->q_percent;
792  	qp->priority = q_data->priority;
793  	qp->queue_address = q_data->q_address;
794  	qp->queue_size = q_data->q_size;
795  	qp->read_ptr = (uint32_t *) q_data->read_ptr_addr;
796  	qp->write_ptr = (uint32_t *) q_data->write_ptr_addr;
797  	qp->eop_ring_buffer_address = q_data->eop_ring_buffer_address;
798  	qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size;
799  	qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address;
800  	qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size;
801  	qp->ctl_stack_size = q_data->ctl_stack_size;
802  	qp->type = q_data->type;
803  	qp->format = q_data->format;
804  }
805  
kfd_criu_restore_queue(struct kfd_process * p,uint8_t __user * user_priv_ptr,uint64_t * priv_data_offset,uint64_t max_priv_data_size)806  int kfd_criu_restore_queue(struct kfd_process *p,
807  			   uint8_t __user *user_priv_ptr,
808  			   uint64_t *priv_data_offset,
809  			   uint64_t max_priv_data_size)
810  {
811  	uint8_t *mqd, *ctl_stack, *q_extra_data = NULL;
812  	struct kfd_criu_queue_priv_data *q_data;
813  	struct kfd_process_device *pdd;
814  	uint64_t q_extra_data_size;
815  	struct queue_properties qp;
816  	unsigned int queue_id;
817  	int ret = 0;
818  
819  	if (*priv_data_offset + sizeof(*q_data) > max_priv_data_size)
820  		return -EINVAL;
821  
822  	q_data = kmalloc(sizeof(*q_data), GFP_KERNEL);
823  	if (!q_data)
824  		return -ENOMEM;
825  
826  	ret = copy_from_user(q_data, user_priv_ptr + *priv_data_offset, sizeof(*q_data));
827  	if (ret) {
828  		ret = -EFAULT;
829  		goto exit;
830  	}
831  
832  	*priv_data_offset += sizeof(*q_data);
833  	q_extra_data_size = (uint64_t)q_data->ctl_stack_size + q_data->mqd_size;
834  
835  	if (*priv_data_offset + q_extra_data_size > max_priv_data_size) {
836  		ret = -EINVAL;
837  		goto exit;
838  	}
839  
840  	q_extra_data = kmalloc(q_extra_data_size, GFP_KERNEL);
841  	if (!q_extra_data) {
842  		ret = -ENOMEM;
843  		goto exit;
844  	}
845  
846  	ret = copy_from_user(q_extra_data, user_priv_ptr + *priv_data_offset, q_extra_data_size);
847  	if (ret) {
848  		ret = -EFAULT;
849  		goto exit;
850  	}
851  
852  	*priv_data_offset += q_extra_data_size;
853  
854  	pdd = kfd_process_device_data_by_id(p, q_data->gpu_id);
855  	if (!pdd) {
856  		pr_err("Failed to get pdd\n");
857  		ret = -EINVAL;
858  		goto exit;
859  	}
860  
861  	if (!pdd->doorbell_index &&
862  	    kfd_alloc_process_doorbells(pdd->dev, &pdd->doorbell_index) < 0) {
863  		ret = -ENOMEM;
864  		goto exit;
865  	}
866  
867  	/* data stored in this order: mqd, ctl_stack */
868  	mqd = q_extra_data;
869  	ctl_stack = mqd + q_data->mqd_size;
870  
871  	memset(&qp, 0, sizeof(qp));
872  	set_queue_properties_from_criu(&qp, q_data);
873  
874  	print_queue_properties(&qp);
875  
876  	ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL, q_data, mqd, ctl_stack,
877  				NULL);
878  	if (ret) {
879  		pr_err("Failed to create new queue err:%d\n", ret);
880  		goto exit;
881  	}
882  
883  	if (q_data->gws)
884  		ret = pqm_set_gws(&p->pqm, q_data->q_id, pdd->dev->gws);
885  
886  exit:
887  	if (ret)
888  		pr_err("Failed to restore queue (%d)\n", ret);
889  	else
890  		pr_debug("Queue id %d was restored successfully\n", queue_id);
891  
892  	kfree(q_data);
893  
894  	return ret;
895  }
896  
pqm_get_queue_checkpoint_info(struct process_queue_manager * pqm,unsigned int qid,uint32_t * mqd_size,uint32_t * ctl_stack_size)897  int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm,
898  				  unsigned int qid,
899  				  uint32_t *mqd_size,
900  				  uint32_t *ctl_stack_size)
901  {
902  	struct process_queue_node *pqn;
903  
904  	pqn = get_queue_by_qid(pqm, qid);
905  	if (!pqn) {
906  		pr_debug("amdkfd: No queue %d exists for operation\n", qid);
907  		return -EFAULT;
908  	}
909  
910  	if (!pqn->q->device->dqm->ops.get_queue_checkpoint_info) {
911  		pr_err("amdkfd: queue dumping not supported on this device\n");
912  		return -EOPNOTSUPP;
913  	}
914  
915  	pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm,
916  						       pqn->q, mqd_size,
917  						       ctl_stack_size);
918  	return 0;
919  }
920  
921  #if defined(CONFIG_DEBUG_FS)
922  
pqm_debugfs_mqds(struct seq_file * m,void * data)923  int pqm_debugfs_mqds(struct seq_file *m, void *data)
924  {
925  	struct process_queue_manager *pqm = data;
926  	struct process_queue_node *pqn;
927  	struct queue *q;
928  	enum KFD_MQD_TYPE mqd_type;
929  	struct mqd_manager *mqd_mgr;
930  	int r = 0;
931  
932  	list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
933  		if (pqn->q) {
934  			q = pqn->q;
935  			switch (q->properties.type) {
936  			case KFD_QUEUE_TYPE_SDMA:
937  			case KFD_QUEUE_TYPE_SDMA_XGMI:
938  				seq_printf(m, "  SDMA queue on device %x\n",
939  					   q->device->id);
940  				mqd_type = KFD_MQD_TYPE_SDMA;
941  				break;
942  			case KFD_QUEUE_TYPE_COMPUTE:
943  				seq_printf(m, "  Compute queue on device %x\n",
944  					   q->device->id);
945  				mqd_type = KFD_MQD_TYPE_CP;
946  				break;
947  			default:
948  				seq_printf(m,
949  				"  Bad user queue type %d on device %x\n",
950  					   q->properties.type, q->device->id);
951  				continue;
952  			}
953  			mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type];
954  		} else if (pqn->kq) {
955  			q = pqn->kq->queue;
956  			mqd_mgr = pqn->kq->mqd_mgr;
957  			switch (q->properties.type) {
958  			case KFD_QUEUE_TYPE_DIQ:
959  				seq_printf(m, "  DIQ on device %x\n",
960  					   pqn->kq->dev->id);
961  				break;
962  			default:
963  				seq_printf(m,
964  				"  Bad kernel queue type %d on device %x\n",
965  					   q->properties.type,
966  					   pqn->kq->dev->id);
967  				continue;
968  			}
969  		} else {
970  			seq_printf(m,
971  		"  Weird: Queue node with neither kernel nor user queue\n");
972  			continue;
973  		}
974  
975  		r = mqd_mgr->debugfs_show_mqd(m, q->mqd);
976  		if (r != 0)
977  			break;
978  	}
979  
980  	return r;
981  }
982  
983  #endif
984