1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2020-2023 Intel Corporation
4  */
5 
6 #include <linux/bitfield.h>
7 #include <linux/highmem.h>
8 
9 #include "ivpu_drv.h"
10 #include "ivpu_hw.h"
11 #include "ivpu_mmu.h"
12 #include "ivpu_mmu_context.h"
13 
14 #define IVPU_MMU_PGD_INDEX_MASK          GENMASK(38, 30)
15 #define IVPU_MMU_PMD_INDEX_MASK          GENMASK(29, 21)
16 #define IVPU_MMU_PTE_INDEX_MASK          GENMASK(20, 12)
17 #define IVPU_MMU_ENTRY_FLAGS_MASK        GENMASK(11, 0)
18 #define IVPU_MMU_ENTRY_FLAG_NG           BIT(11)
19 #define IVPU_MMU_ENTRY_FLAG_AF           BIT(10)
20 #define IVPU_MMU_ENTRY_FLAG_USER         BIT(6)
21 #define IVPU_MMU_ENTRY_FLAG_LLC_COHERENT BIT(2)
22 #define IVPU_MMU_ENTRY_FLAG_TYPE_PAGE    BIT(1)
23 #define IVPU_MMU_ENTRY_FLAG_VALID        BIT(0)
24 
25 #define IVPU_MMU_PAGE_SIZE    SZ_4K
26 #define IVPU_MMU_PTE_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PAGE_SIZE)
27 #define IVPU_MMU_PMD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PTE_MAP_SIZE)
28 #define IVPU_MMU_PGTABLE_SIZE (IVPU_MMU_PGTABLE_ENTRIES * sizeof(u64))
29 
30 #define IVPU_MMU_DUMMY_ADDRESS 0xdeadb000
31 #define IVPU_MMU_ENTRY_VALID   (IVPU_MMU_ENTRY_FLAG_TYPE_PAGE | IVPU_MMU_ENTRY_FLAG_VALID)
32 #define IVPU_MMU_ENTRY_INVALID (IVPU_MMU_DUMMY_ADDRESS & ~IVPU_MMU_ENTRY_FLAGS_MASK)
33 #define IVPU_MMU_ENTRY_MAPPED  (IVPU_MMU_ENTRY_FLAG_AF | IVPU_MMU_ENTRY_FLAG_USER | \
34 				IVPU_MMU_ENTRY_FLAG_NG | IVPU_MMU_ENTRY_VALID)
35 
ivpu_mmu_pgtable_init(struct ivpu_device * vdev,struct ivpu_mmu_pgtable * pgtable)36 static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
37 {
38 	dma_addr_t pgd_dma;
39 	u64 *pgd;
40 
41 	pgd = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pgd_dma, GFP_KERNEL);
42 	if (!pgd)
43 		return -ENOMEM;
44 
45 	pgtable->pgd = pgd;
46 	pgtable->pgd_dma = pgd_dma;
47 
48 	return 0;
49 }
50 
ivpu_mmu_pgtable_free(struct ivpu_device * vdev,struct ivpu_mmu_pgtable * pgtable)51 static void ivpu_mmu_pgtable_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
52 {
53 	int pgd_index, pmd_index;
54 
55 	for (pgd_index = 0; pgd_index < IVPU_MMU_PGTABLE_ENTRIES; ++pgd_index) {
56 		u64 **pmd_entries = pgtable->pgd_cpu_entries[pgd_index];
57 		u64 *pmd = pgtable->pgd_entries[pgd_index];
58 
59 		if (!pmd_entries)
60 			continue;
61 
62 		for (pmd_index = 0; pmd_index < IVPU_MMU_PGTABLE_ENTRIES; ++pmd_index) {
63 			if (pmd_entries[pmd_index])
64 				dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE,
65 					    pmd_entries[pmd_index],
66 					    pmd[pmd_index] & ~IVPU_MMU_ENTRY_FLAGS_MASK);
67 		}
68 
69 		kfree(pmd_entries);
70 		dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pgtable->pgd_entries[pgd_index],
71 			    pgtable->pgd[pgd_index] & ~IVPU_MMU_ENTRY_FLAGS_MASK);
72 	}
73 
74 	dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pgtable->pgd,
75 		    pgtable->pgd_dma & ~IVPU_MMU_ENTRY_FLAGS_MASK);
76 }
77 
78 static u64*
ivpu_mmu_ensure_pmd(struct ivpu_device * vdev,struct ivpu_mmu_pgtable * pgtable,u64 pgd_index)79 ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, u64 pgd_index)
80 {
81 	u64 **pmd_entries;
82 	dma_addr_t pmd_dma;
83 	u64 *pmd;
84 
85 	if (pgtable->pgd_entries[pgd_index])
86 		return pgtable->pgd_entries[pgd_index];
87 
88 	pmd = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pmd_dma, GFP_KERNEL);
89 	if (!pmd)
90 		return NULL;
91 
92 	pmd_entries = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL);
93 	if (!pmd_entries)
94 		goto err_free_pgd;
95 
96 	pgtable->pgd_entries[pgd_index] = pmd;
97 	pgtable->pgd_cpu_entries[pgd_index] = pmd_entries;
98 	pgtable->pgd[pgd_index] = pmd_dma | IVPU_MMU_ENTRY_VALID;
99 
100 	return pmd;
101 
102 err_free_pgd:
103 	dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pmd, pmd_dma);
104 	return NULL;
105 }
106 
107 static u64*
ivpu_mmu_ensure_pte(struct ivpu_device * vdev,struct ivpu_mmu_pgtable * pgtable,int pgd_index,int pmd_index)108 ivpu_mmu_ensure_pte(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
109 		    int pgd_index, int pmd_index)
110 {
111 	dma_addr_t pte_dma;
112 	u64 *pte;
113 
114 	if (pgtable->pgd_cpu_entries[pgd_index][pmd_index])
115 		return pgtable->pgd_cpu_entries[pgd_index][pmd_index];
116 
117 	pte = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pte_dma, GFP_KERNEL);
118 	if (!pte)
119 		return NULL;
120 
121 	pgtable->pgd_cpu_entries[pgd_index][pmd_index] = pte;
122 	pgtable->pgd_entries[pgd_index][pmd_index] = pte_dma | IVPU_MMU_ENTRY_VALID;
123 
124 	return pte;
125 }
126 
127 static int
ivpu_mmu_context_map_page(struct ivpu_device * vdev,struct ivpu_mmu_context * ctx,u64 vpu_addr,dma_addr_t dma_addr,int prot)128 ivpu_mmu_context_map_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
129 			  u64 vpu_addr, dma_addr_t dma_addr, int prot)
130 {
131 	u64 *pte;
132 	int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
133 	int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
134 	int pte_index = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);
135 
136 	/* Allocate PMD - second level page table if needed */
137 	if (!ivpu_mmu_ensure_pmd(vdev, &ctx->pgtable, pgd_index))
138 		return -ENOMEM;
139 
140 	/* Allocate PTE - third level page table if needed */
141 	pte = ivpu_mmu_ensure_pte(vdev, &ctx->pgtable, pgd_index, pmd_index);
142 	if (!pte)
143 		return -ENOMEM;
144 
145 	/* Update PTE - third level page table with DMA address */
146 	pte[pte_index] = dma_addr | prot;
147 
148 	return 0;
149 }
150 
ivpu_mmu_context_unmap_page(struct ivpu_mmu_context * ctx,u64 vpu_addr)151 static void ivpu_mmu_context_unmap_page(struct ivpu_mmu_context *ctx, u64 vpu_addr)
152 {
153 	int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
154 	int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
155 	int pte_index = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);
156 
157 	/* Update PTE with dummy physical address and clear flags */
158 	ctx->pgtable.pgd_cpu_entries[pgd_index][pmd_index][pte_index] = IVPU_MMU_ENTRY_INVALID;
159 }
160 
161 static void
ivpu_mmu_context_flush_page_tables(struct ivpu_mmu_context * ctx,u64 vpu_addr,size_t size)162 ivpu_mmu_context_flush_page_tables(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size)
163 {
164 	u64 end_addr = vpu_addr + size;
165 	u64 *pgd = ctx->pgtable.pgd;
166 
167 	/* Align to PMD entry (2 MB) */
168 	vpu_addr &= ~(IVPU_MMU_PTE_MAP_SIZE - 1);
169 
170 	while (vpu_addr < end_addr) {
171 		int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
172 		u64 pmd_end = (pgd_index + 1) * (u64)IVPU_MMU_PMD_MAP_SIZE;
173 		u64 *pmd = ctx->pgtable.pgd_entries[pgd_index];
174 
175 		while (vpu_addr < end_addr && vpu_addr < pmd_end) {
176 			int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
177 			u64 *pte = ctx->pgtable.pgd_cpu_entries[pgd_index][pmd_index];
178 
179 			clflush_cache_range(pte, IVPU_MMU_PGTABLE_SIZE);
180 			vpu_addr += IVPU_MMU_PTE_MAP_SIZE;
181 		}
182 		clflush_cache_range(pmd, IVPU_MMU_PGTABLE_SIZE);
183 	}
184 	clflush_cache_range(pgd, IVPU_MMU_PGTABLE_SIZE);
185 }
186 
187 static int
ivpu_mmu_context_map_pages(struct ivpu_device * vdev,struct ivpu_mmu_context * ctx,u64 vpu_addr,dma_addr_t dma_addr,size_t size,int prot)188 ivpu_mmu_context_map_pages(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
189 			   u64 vpu_addr, dma_addr_t dma_addr, size_t size, int prot)
190 {
191 	while (size) {
192 		int ret = ivpu_mmu_context_map_page(vdev, ctx, vpu_addr, dma_addr, prot);
193 
194 		if (ret)
195 			return ret;
196 
197 		vpu_addr += IVPU_MMU_PAGE_SIZE;
198 		dma_addr += IVPU_MMU_PAGE_SIZE;
199 		size -= IVPU_MMU_PAGE_SIZE;
200 	}
201 
202 	return 0;
203 }
204 
ivpu_mmu_context_unmap_pages(struct ivpu_mmu_context * ctx,u64 vpu_addr,size_t size)205 static void ivpu_mmu_context_unmap_pages(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size)
206 {
207 	while (size) {
208 		ivpu_mmu_context_unmap_page(ctx, vpu_addr);
209 		vpu_addr += IVPU_MMU_PAGE_SIZE;
210 		size -= IVPU_MMU_PAGE_SIZE;
211 	}
212 }
213 
214 int
ivpu_mmu_context_map_sgt(struct ivpu_device * vdev,struct ivpu_mmu_context * ctx,u64 vpu_addr,struct sg_table * sgt,bool llc_coherent)215 ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
216 			 u64 vpu_addr, struct sg_table *sgt,  bool llc_coherent)
217 {
218 	struct scatterlist *sg;
219 	int prot;
220 	int ret;
221 	u64 i;
222 
223 	if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE))
224 		return -EINVAL;
225 	/*
226 	 * VPU is only 32 bit, but DMA engine is 38 bit
227 	 * Ranges < 2 GB are reserved for VPU internal registers
228 	 * Limit range to 8 GB
229 	 */
230 	if (vpu_addr < SZ_2G || vpu_addr > SZ_8G)
231 		return -EINVAL;
232 
233 	prot = IVPU_MMU_ENTRY_MAPPED;
234 	if (llc_coherent)
235 		prot |= IVPU_MMU_ENTRY_FLAG_LLC_COHERENT;
236 
237 	mutex_lock(&ctx->lock);
238 
239 	for_each_sgtable_dma_sg(sgt, sg, i) {
240 		u64 dma_addr = sg_dma_address(sg) - sg->offset;
241 		size_t size = sg_dma_len(sg) + sg->offset;
242 
243 		ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot);
244 		if (ret) {
245 			ivpu_err(vdev, "Failed to map context pages\n");
246 			mutex_unlock(&ctx->lock);
247 			return ret;
248 		}
249 		ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size);
250 		vpu_addr += size;
251 	}
252 
253 	mutex_unlock(&ctx->lock);
254 
255 	ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
256 	if (ret)
257 		ivpu_err(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret);
258 	return ret;
259 }
260 
261 void
ivpu_mmu_context_unmap_sgt(struct ivpu_device * vdev,struct ivpu_mmu_context * ctx,u64 vpu_addr,struct sg_table * sgt)262 ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
263 			   u64 vpu_addr, struct sg_table *sgt)
264 {
265 	struct scatterlist *sg;
266 	int ret;
267 	u64 i;
268 
269 	if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE))
270 		ivpu_warn(vdev, "Unaligned vpu_addr: 0x%llx\n", vpu_addr);
271 
272 	mutex_lock(&ctx->lock);
273 
274 	for_each_sgtable_dma_sg(sgt, sg, i) {
275 		size_t size = sg_dma_len(sg) + sg->offset;
276 
277 		ivpu_mmu_context_unmap_pages(ctx, vpu_addr, size);
278 		ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size);
279 		vpu_addr += size;
280 	}
281 
282 	mutex_unlock(&ctx->lock);
283 
284 	ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
285 	if (ret)
286 		ivpu_warn(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret);
287 }
288 
289 int
ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context * ctx,const struct ivpu_addr_range * range,u64 size,struct drm_mm_node * node)290 ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx,
291 				    const struct ivpu_addr_range *range,
292 				    u64 size, struct drm_mm_node *node)
293 {
294 	lockdep_assert_held(&ctx->lock);
295 
296 	return drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE,
297 					  0, range->start, range->end, DRM_MM_INSERT_BEST);
298 }
299 
300 void
ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context * ctx,struct drm_mm_node * node)301 ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx, struct drm_mm_node *node)
302 {
303 	lockdep_assert_held(&ctx->lock);
304 
305 	drm_mm_remove_node(node);
306 }
307 
308 static int
ivpu_mmu_context_init(struct ivpu_device * vdev,struct ivpu_mmu_context * ctx,u32 context_id)309 ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id)
310 {
311 	u64 start, end;
312 	int ret;
313 
314 	mutex_init(&ctx->lock);
315 	INIT_LIST_HEAD(&ctx->bo_list);
316 
317 	ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable);
318 	if (ret)
319 		return ret;
320 
321 	if (!context_id) {
322 		start = vdev->hw->ranges.global_low.start;
323 		end = vdev->hw->ranges.global_high.end;
324 	} else {
325 		start = vdev->hw->ranges.user_low.start;
326 		end = vdev->hw->ranges.user_high.end;
327 	}
328 
329 	drm_mm_init(&ctx->mm, start, end - start);
330 	ctx->id = context_id;
331 
332 	return 0;
333 }
334 
ivpu_mmu_context_fini(struct ivpu_device * vdev,struct ivpu_mmu_context * ctx)335 static void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
336 {
337 	drm_WARN_ON(&vdev->drm, !ctx->pgtable.pgd);
338 
339 	mutex_destroy(&ctx->lock);
340 	ivpu_mmu_pgtable_free(vdev, &ctx->pgtable);
341 	drm_mm_takedown(&ctx->mm);
342 }
343 
ivpu_mmu_global_context_init(struct ivpu_device * vdev)344 int ivpu_mmu_global_context_init(struct ivpu_device *vdev)
345 {
346 	return ivpu_mmu_context_init(vdev, &vdev->gctx, IVPU_GLOBAL_CONTEXT_MMU_SSID);
347 }
348 
ivpu_mmu_global_context_fini(struct ivpu_device * vdev)349 void ivpu_mmu_global_context_fini(struct ivpu_device *vdev)
350 {
351 	return ivpu_mmu_context_fini(vdev, &vdev->gctx);
352 }
353 
ivpu_mmu_user_context_mark_invalid(struct ivpu_device * vdev,u32 ssid)354 void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid)
355 {
356 	struct ivpu_file_priv *file_priv;
357 
358 	xa_lock(&vdev->context_xa);
359 
360 	file_priv = xa_load(&vdev->context_xa, ssid);
361 	if (file_priv)
362 		file_priv->has_mmu_faults = true;
363 
364 	xa_unlock(&vdev->context_xa);
365 }
366 
ivpu_mmu_user_context_init(struct ivpu_device * vdev,struct ivpu_mmu_context * ctx,u32 ctx_id)367 int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id)
368 {
369 	int ret;
370 
371 	drm_WARN_ON(&vdev->drm, !ctx_id);
372 
373 	ret = ivpu_mmu_context_init(vdev, ctx, ctx_id);
374 	if (ret) {
375 		ivpu_err(vdev, "Failed to initialize context: %d\n", ret);
376 		return ret;
377 	}
378 
379 	ret = ivpu_mmu_set_pgtable(vdev, ctx_id, &ctx->pgtable);
380 	if (ret) {
381 		ivpu_err(vdev, "Failed to set page table: %d\n", ret);
382 		goto err_context_fini;
383 	}
384 
385 	return 0;
386 
387 err_context_fini:
388 	ivpu_mmu_context_fini(vdev, ctx);
389 	return ret;
390 }
391 
ivpu_mmu_user_context_fini(struct ivpu_device * vdev,struct ivpu_mmu_context * ctx)392 void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
393 {
394 	drm_WARN_ON(&vdev->drm, !ctx->id);
395 
396 	ivpu_mmu_clear_pgtable(vdev, ctx->id);
397 	ivpu_mmu_context_fini(vdev, ctx);
398 }
399