1 /*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright © 2014-2016 Intel Corporation
5 */
6
7 #include "display/intel_display.h"
8 #include "display/intel_frontbuffer.h"
9 #include "gt/intel_gt.h"
10
11 #include "i915_drv.h"
12 #include "i915_gem_clflush.h"
13 #include "i915_gem_domain.h"
14 #include "i915_gem_gtt.h"
15 #include "i915_gem_ioctls.h"
16 #include "i915_gem_lmem.h"
17 #include "i915_gem_mman.h"
18 #include "i915_gem_object.h"
19 #include "i915_vma.h"
20
21 #define VTD_GUARD (168u * I915_GTT_PAGE_SIZE) /* 168 or tile-row PTE padding */
22
gpu_write_needs_clflush(struct drm_i915_gem_object * obj)23 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
24 {
25 struct drm_i915_private *i915 = to_i915(obj->base.dev);
26
27 if (IS_DGFX(i915))
28 return false;
29
30 return !(obj->cache_level == I915_CACHE_NONE ||
31 obj->cache_level == I915_CACHE_WT);
32 }
33
i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object * obj)34 bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
35 {
36 struct drm_i915_private *i915 = to_i915(obj->base.dev);
37
38 if (obj->cache_dirty)
39 return false;
40
41 if (IS_DGFX(i915))
42 return false;
43
44 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
45 return true;
46
47 /* Currently in use by HW (display engine)? Keep flushed. */
48 return i915_gem_object_is_framebuffer(obj);
49 }
50
51 static void
flush_write_domain(struct drm_i915_gem_object * obj,unsigned int flush_domains)52 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
53 {
54 struct i915_vma *vma;
55
56 assert_object_held(obj);
57
58 if (!(obj->write_domain & flush_domains))
59 return;
60
61 switch (obj->write_domain) {
62 case I915_GEM_DOMAIN_GTT:
63 spin_lock(&obj->vma.lock);
64 for_each_ggtt_vma(vma, obj) {
65 if (i915_vma_unset_ggtt_write(vma))
66 intel_gt_flush_ggtt_writes(vma->vm->gt);
67 }
68 spin_unlock(&obj->vma.lock);
69
70 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
71 break;
72
73 case I915_GEM_DOMAIN_WC:
74 wmb();
75 break;
76
77 case I915_GEM_DOMAIN_CPU:
78 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
79 break;
80
81 case I915_GEM_DOMAIN_RENDER:
82 if (gpu_write_needs_clflush(obj))
83 obj->cache_dirty = true;
84 break;
85 }
86
87 obj->write_domain = 0;
88 }
89
__i915_gem_object_flush_for_display(struct drm_i915_gem_object * obj)90 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
91 {
92 /*
93 * We manually flush the CPU domain so that we can override and
94 * force the flush for the display, and perform it asyncrhonously.
95 */
96 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
97 if (obj->cache_dirty)
98 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
99 obj->write_domain = 0;
100 }
101
i915_gem_object_flush_if_display(struct drm_i915_gem_object * obj)102 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
103 {
104 if (!i915_gem_object_is_framebuffer(obj))
105 return;
106
107 i915_gem_object_lock(obj, NULL);
108 __i915_gem_object_flush_for_display(obj);
109 i915_gem_object_unlock(obj);
110 }
111
i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object * obj)112 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
113 {
114 if (i915_gem_object_is_framebuffer(obj))
115 __i915_gem_object_flush_for_display(obj);
116 }
117
118 /**
119 * Moves a single object to the WC read, and possibly write domain.
120 * @obj: object to act on
121 * @write: ask for write access or read only
122 *
123 * This function returns when the move is complete, including waiting on
124 * flushes to occur.
125 */
126 int
i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object * obj,bool write)127 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
128 {
129 int ret;
130
131 assert_object_held(obj);
132
133 ret = i915_gem_object_wait(obj,
134 I915_WAIT_INTERRUPTIBLE |
135 (write ? I915_WAIT_ALL : 0),
136 MAX_SCHEDULE_TIMEOUT);
137 if (ret)
138 return ret;
139
140 if (obj->write_domain == I915_GEM_DOMAIN_WC)
141 return 0;
142
143 /* Flush and acquire obj->pages so that we are coherent through
144 * direct access in memory with previous cached writes through
145 * shmemfs and that our cache domain tracking remains valid.
146 * For example, if the obj->filp was moved to swap without us
147 * being notified and releasing the pages, we would mistakenly
148 * continue to assume that the obj remained out of the CPU cached
149 * domain.
150 */
151 ret = i915_gem_object_pin_pages(obj);
152 if (ret)
153 return ret;
154
155 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
156
157 /* Serialise direct access to this object with the barriers for
158 * coherent writes from the GPU, by effectively invalidating the
159 * WC domain upon first access.
160 */
161 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
162 mb();
163
164 /* It should now be out of any other write domains, and we can update
165 * the domain values for our changes.
166 */
167 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
168 obj->read_domains |= I915_GEM_DOMAIN_WC;
169 if (write) {
170 obj->read_domains = I915_GEM_DOMAIN_WC;
171 obj->write_domain = I915_GEM_DOMAIN_WC;
172 obj->mm.dirty = true;
173 }
174
175 i915_gem_object_unpin_pages(obj);
176 return 0;
177 }
178
179 /**
180 * Moves a single object to the GTT read, and possibly write domain.
181 * @obj: object to act on
182 * @write: ask for write access or read only
183 *
184 * This function returns when the move is complete, including waiting on
185 * flushes to occur.
186 */
187 int
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object * obj,bool write)188 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
189 {
190 int ret;
191
192 assert_object_held(obj);
193
194 ret = i915_gem_object_wait(obj,
195 I915_WAIT_INTERRUPTIBLE |
196 (write ? I915_WAIT_ALL : 0),
197 MAX_SCHEDULE_TIMEOUT);
198 if (ret)
199 return ret;
200
201 if (obj->write_domain == I915_GEM_DOMAIN_GTT)
202 return 0;
203
204 /* Flush and acquire obj->pages so that we are coherent through
205 * direct access in memory with previous cached writes through
206 * shmemfs and that our cache domain tracking remains valid.
207 * For example, if the obj->filp was moved to swap without us
208 * being notified and releasing the pages, we would mistakenly
209 * continue to assume that the obj remained out of the CPU cached
210 * domain.
211 */
212 ret = i915_gem_object_pin_pages(obj);
213 if (ret)
214 return ret;
215
216 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
217
218 /* Serialise direct access to this object with the barriers for
219 * coherent writes from the GPU, by effectively invalidating the
220 * GTT domain upon first access.
221 */
222 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
223 mb();
224
225 /* It should now be out of any other write domains, and we can update
226 * the domain values for our changes.
227 */
228 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
229 obj->read_domains |= I915_GEM_DOMAIN_GTT;
230 if (write) {
231 struct i915_vma *vma;
232
233 obj->read_domains = I915_GEM_DOMAIN_GTT;
234 obj->write_domain = I915_GEM_DOMAIN_GTT;
235 obj->mm.dirty = true;
236
237 spin_lock(&obj->vma.lock);
238 for_each_ggtt_vma(vma, obj)
239 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
240 i915_vma_set_ggtt_write(vma);
241 spin_unlock(&obj->vma.lock);
242 }
243
244 i915_gem_object_unpin_pages(obj);
245 return 0;
246 }
247
248 /**
249 * Changes the cache-level of an object across all VMA.
250 * @obj: object to act on
251 * @cache_level: new cache level to set for the object
252 *
253 * After this function returns, the object will be in the new cache-level
254 * across all GTT and the contents of the backing storage will be coherent,
255 * with respect to the new cache-level. In order to keep the backing storage
256 * coherent for all users, we only allow a single cache level to be set
257 * globally on the object and prevent it from being changed whilst the
258 * hardware is reading from the object. That is if the object is currently
259 * on the scanout it will be set to uncached (or equivalent display
260 * cache coherency) and all non-MOCS GPU access will also be uncached so
261 * that all direct access to the scanout remains coherent.
262 */
i915_gem_object_set_cache_level(struct drm_i915_gem_object * obj,enum i915_cache_level cache_level)263 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
264 enum i915_cache_level cache_level)
265 {
266 int ret;
267
268 if (obj->cache_level == cache_level)
269 return 0;
270
271 ret = i915_gem_object_wait(obj,
272 I915_WAIT_INTERRUPTIBLE |
273 I915_WAIT_ALL,
274 MAX_SCHEDULE_TIMEOUT);
275 if (ret)
276 return ret;
277
278 /* Always invalidate stale cachelines */
279 if (obj->cache_level != cache_level) {
280 i915_gem_object_set_cache_coherency(obj, cache_level);
281 obj->cache_dirty = true;
282 }
283
284 /* The cache-level will be applied when each vma is rebound. */
285 return i915_gem_object_unbind(obj,
286 I915_GEM_OBJECT_UNBIND_ACTIVE |
287 I915_GEM_OBJECT_UNBIND_BARRIER);
288 }
289
i915_gem_get_caching_ioctl(struct drm_device * dev,void * data,struct drm_file * file)290 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
291 struct drm_file *file)
292 {
293 struct drm_i915_gem_caching *args = data;
294 struct drm_i915_gem_object *obj;
295 int err = 0;
296
297 if (IS_DGFX(to_i915(dev)))
298 return -ENODEV;
299
300 rcu_read_lock();
301 obj = i915_gem_object_lookup_rcu(file, args->handle);
302 if (!obj) {
303 err = -ENOENT;
304 goto out;
305 }
306
307 switch (obj->cache_level) {
308 case I915_CACHE_LLC:
309 case I915_CACHE_L3_LLC:
310 args->caching = I915_CACHING_CACHED;
311 break;
312
313 case I915_CACHE_WT:
314 args->caching = I915_CACHING_DISPLAY;
315 break;
316
317 default:
318 args->caching = I915_CACHING_NONE;
319 break;
320 }
321 out:
322 rcu_read_unlock();
323 return err;
324 }
325
i915_gem_set_caching_ioctl(struct drm_device * dev,void * data,struct drm_file * file)326 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
327 struct drm_file *file)
328 {
329 struct drm_i915_private *i915 = to_i915(dev);
330 struct drm_i915_gem_caching *args = data;
331 struct drm_i915_gem_object *obj;
332 enum i915_cache_level level;
333 int ret = 0;
334
335 if (IS_DGFX(i915))
336 return -ENODEV;
337
338 switch (args->caching) {
339 case I915_CACHING_NONE:
340 level = I915_CACHE_NONE;
341 break;
342 case I915_CACHING_CACHED:
343 /*
344 * Due to a HW issue on BXT A stepping, GPU stores via a
345 * snooped mapping may leave stale data in a corresponding CPU
346 * cacheline, whereas normally such cachelines would get
347 * invalidated.
348 */
349 if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
350 return -ENODEV;
351
352 level = I915_CACHE_LLC;
353 break;
354 case I915_CACHING_DISPLAY:
355 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
356 break;
357 default:
358 return -EINVAL;
359 }
360
361 obj = i915_gem_object_lookup(file, args->handle);
362 if (!obj)
363 return -ENOENT;
364
365 /*
366 * The caching mode of proxy object is handled by its generator, and
367 * not allowed to be changed by userspace.
368 */
369 if (i915_gem_object_is_proxy(obj)) {
370 /*
371 * Silently allow cached for userptr; the vulkan driver
372 * sets all objects to cached
373 */
374 if (!i915_gem_object_is_userptr(obj) ||
375 args->caching != I915_CACHING_CACHED)
376 ret = -ENXIO;
377
378 goto out;
379 }
380
381 ret = i915_gem_object_lock_interruptible(obj, NULL);
382 if (ret)
383 goto out;
384
385 ret = i915_gem_object_set_cache_level(obj, level);
386 i915_gem_object_unlock(obj);
387
388 out:
389 i915_gem_object_put(obj);
390 return ret;
391 }
392
393 /*
394 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
395 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
396 * (for pageflips). We only flush the caches while preparing the buffer for
397 * display, the callers are responsible for frontbuffer flush.
398 */
399 struct i915_vma *
i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object * obj,struct i915_gem_ww_ctx * ww,u32 alignment,const struct i915_gtt_view * view,unsigned int flags)400 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
401 struct i915_gem_ww_ctx *ww,
402 u32 alignment,
403 const struct i915_gtt_view *view,
404 unsigned int flags)
405 {
406 struct drm_i915_private *i915 = to_i915(obj->base.dev);
407 struct i915_vma *vma;
408 int ret;
409
410 /* Frame buffer must be in LMEM */
411 if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
412 return ERR_PTR(-EINVAL);
413
414 /*
415 * The display engine is not coherent with the LLC cache on gen6. As
416 * a result, we make sure that the pinning that is about to occur is
417 * done with uncached PTEs. This is lowest common denominator for all
418 * chipsets.
419 *
420 * However for gen6+, we could do better by using the GFDT bit instead
421 * of uncaching, which would allow us to flush all the LLC-cached data
422 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
423 */
424 ret = i915_gem_object_set_cache_level(obj,
425 HAS_WT(i915) ?
426 I915_CACHE_WT : I915_CACHE_NONE);
427 if (ret)
428 return ERR_PTR(ret);
429
430 /* VT-d may overfetch before/after the vma, so pad with scratch */
431 if (intel_scanout_needs_vtd_wa(i915)) {
432 unsigned int guard = VTD_GUARD;
433
434 if (i915_gem_object_is_tiled(obj))
435 guard = max(guard,
436 i915_gem_object_get_tile_row_size(obj));
437
438 flags |= PIN_OFFSET_GUARD | guard;
439 }
440
441 /*
442 * As the user may map the buffer once pinned in the display plane
443 * (e.g. libkms for the bootup splash), we have to ensure that we
444 * always use map_and_fenceable for all scanout buffers. However,
445 * it may simply be too big to fit into mappable, in which case
446 * put it anyway and hope that userspace can cope (but always first
447 * try to preserve the existing ABI).
448 */
449 vma = ERR_PTR(-ENOSPC);
450 if ((flags & PIN_MAPPABLE) == 0 &&
451 (!view || view->type == I915_GTT_VIEW_NORMAL))
452 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment,
453 flags | PIN_MAPPABLE |
454 PIN_NONBLOCK);
455 if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
456 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0,
457 alignment, flags);
458 if (IS_ERR(vma))
459 return vma;
460
461 vma->display_alignment = max(vma->display_alignment, alignment);
462 i915_vma_mark_scanout(vma);
463
464 i915_gem_object_flush_if_display_locked(obj);
465
466 return vma;
467 }
468
469 /**
470 * Moves a single object to the CPU read, and possibly write domain.
471 * @obj: object to act on
472 * @write: requesting write or read-only access
473 *
474 * This function returns when the move is complete, including waiting on
475 * flushes to occur.
476 */
477 int
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object * obj,bool write)478 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
479 {
480 int ret;
481
482 assert_object_held(obj);
483
484 ret = i915_gem_object_wait(obj,
485 I915_WAIT_INTERRUPTIBLE |
486 (write ? I915_WAIT_ALL : 0),
487 MAX_SCHEDULE_TIMEOUT);
488 if (ret)
489 return ret;
490
491 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
492
493 /* Flush the CPU cache if it's still invalid. */
494 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
495 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
496 obj->read_domains |= I915_GEM_DOMAIN_CPU;
497 }
498
499 /* It should now be out of any other write domains, and we can update
500 * the domain values for our changes.
501 */
502 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
503
504 /* If we're writing through the CPU, then the GPU read domains will
505 * need to be invalidated at next use.
506 */
507 if (write)
508 __start_cpu_write(obj);
509
510 return 0;
511 }
512
513 /**
514 * Called when user space prepares to use an object with the CPU, either
515 * through the mmap ioctl's mapping or a GTT mapping.
516 * @dev: drm device
517 * @data: ioctl data blob
518 * @file: drm file
519 */
520 int
i915_gem_set_domain_ioctl(struct drm_device * dev,void * data,struct drm_file * file)521 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
522 struct drm_file *file)
523 {
524 struct drm_i915_gem_set_domain *args = data;
525 struct drm_i915_gem_object *obj;
526 u32 read_domains = args->read_domains;
527 u32 write_domain = args->write_domain;
528 int err;
529
530 if (IS_DGFX(to_i915(dev)))
531 return -ENODEV;
532
533 /* Only handle setting domains to types used by the CPU. */
534 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
535 return -EINVAL;
536
537 /*
538 * Having something in the write domain implies it's in the read
539 * domain, and only that read domain. Enforce that in the request.
540 */
541 if (write_domain && read_domains != write_domain)
542 return -EINVAL;
543
544 if (!read_domains)
545 return 0;
546
547 obj = i915_gem_object_lookup(file, args->handle);
548 if (!obj)
549 return -ENOENT;
550
551 /*
552 * Try to flush the object off the GPU without holding the lock.
553 * We will repeat the flush holding the lock in the normal manner
554 * to catch cases where we are gazumped.
555 */
556 err = i915_gem_object_wait(obj,
557 I915_WAIT_INTERRUPTIBLE |
558 I915_WAIT_PRIORITY |
559 (write_domain ? I915_WAIT_ALL : 0),
560 MAX_SCHEDULE_TIMEOUT);
561 if (err)
562 goto out;
563
564 if (i915_gem_object_is_userptr(obj)) {
565 /*
566 * Try to grab userptr pages, iris uses set_domain to check
567 * userptr validity
568 */
569 err = i915_gem_object_userptr_validate(obj);
570 if (!err)
571 err = i915_gem_object_wait(obj,
572 I915_WAIT_INTERRUPTIBLE |
573 I915_WAIT_PRIORITY |
574 (write_domain ? I915_WAIT_ALL : 0),
575 MAX_SCHEDULE_TIMEOUT);
576 goto out;
577 }
578
579 /*
580 * Proxy objects do not control access to the backing storage, ergo
581 * they cannot be used as a means to manipulate the cache domain
582 * tracking for that backing storage. The proxy object is always
583 * considered to be outside of any cache domain.
584 */
585 if (i915_gem_object_is_proxy(obj)) {
586 err = -ENXIO;
587 goto out;
588 }
589
590 err = i915_gem_object_lock_interruptible(obj, NULL);
591 if (err)
592 goto out;
593
594 /*
595 * Flush and acquire obj->pages so that we are coherent through
596 * direct access in memory with previous cached writes through
597 * shmemfs and that our cache domain tracking remains valid.
598 * For example, if the obj->filp was moved to swap without us
599 * being notified and releasing the pages, we would mistakenly
600 * continue to assume that the obj remained out of the CPU cached
601 * domain.
602 */
603 err = i915_gem_object_pin_pages(obj);
604 if (err)
605 goto out_unlock;
606
607 /*
608 * Already in the desired write domain? Nothing for us to do!
609 *
610 * We apply a little bit of cunning here to catch a broader set of
611 * no-ops. If obj->write_domain is set, we must be in the same
612 * obj->read_domains, and only that domain. Therefore, if that
613 * obj->write_domain matches the request read_domains, we are
614 * already in the same read/write domain and can skip the operation,
615 * without having to further check the requested write_domain.
616 */
617 if (READ_ONCE(obj->write_domain) == read_domains)
618 goto out_unpin;
619
620 if (read_domains & I915_GEM_DOMAIN_WC)
621 err = i915_gem_object_set_to_wc_domain(obj, write_domain);
622 else if (read_domains & I915_GEM_DOMAIN_GTT)
623 err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
624 else
625 err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
626
627 out_unpin:
628 i915_gem_object_unpin_pages(obj);
629
630 out_unlock:
631 i915_gem_object_unlock(obj);
632
633 if (!err && write_domain)
634 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
635
636 out:
637 i915_gem_object_put(obj);
638 return err;
639 }
640
641 /*
642 * Pins the specified object's pages and synchronizes the object with
643 * GPU accesses. Sets needs_clflush to non-zero if the caller should
644 * flush the object from the CPU cache.
645 */
i915_gem_object_prepare_read(struct drm_i915_gem_object * obj,unsigned int * needs_clflush)646 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
647 unsigned int *needs_clflush)
648 {
649 int ret;
650
651 *needs_clflush = 0;
652 if (!i915_gem_object_has_struct_page(obj))
653 return -ENODEV;
654
655 assert_object_held(obj);
656
657 ret = i915_gem_object_wait(obj,
658 I915_WAIT_INTERRUPTIBLE,
659 MAX_SCHEDULE_TIMEOUT);
660 if (ret)
661 return ret;
662
663 ret = i915_gem_object_pin_pages(obj);
664 if (ret)
665 return ret;
666
667 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
668 !static_cpu_has(X86_FEATURE_CLFLUSH)) {
669 ret = i915_gem_object_set_to_cpu_domain(obj, false);
670 if (ret)
671 goto err_unpin;
672 else
673 goto out;
674 }
675
676 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
677
678 /* If we're not in the cpu read domain, set ourself into the gtt
679 * read domain and manually flush cachelines (if required). This
680 * optimizes for the case when the gpu will dirty the data
681 * anyway again before the next pread happens.
682 */
683 if (!obj->cache_dirty &&
684 !(obj->read_domains & I915_GEM_DOMAIN_CPU))
685 *needs_clflush = CLFLUSH_BEFORE;
686
687 out:
688 /* return with the pages pinned */
689 return 0;
690
691 err_unpin:
692 i915_gem_object_unpin_pages(obj);
693 return ret;
694 }
695
i915_gem_object_prepare_write(struct drm_i915_gem_object * obj,unsigned int * needs_clflush)696 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
697 unsigned int *needs_clflush)
698 {
699 int ret;
700
701 *needs_clflush = 0;
702 if (!i915_gem_object_has_struct_page(obj))
703 return -ENODEV;
704
705 assert_object_held(obj);
706
707 ret = i915_gem_object_wait(obj,
708 I915_WAIT_INTERRUPTIBLE |
709 I915_WAIT_ALL,
710 MAX_SCHEDULE_TIMEOUT);
711 if (ret)
712 return ret;
713
714 ret = i915_gem_object_pin_pages(obj);
715 if (ret)
716 return ret;
717
718 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
719 !static_cpu_has(X86_FEATURE_CLFLUSH)) {
720 ret = i915_gem_object_set_to_cpu_domain(obj, true);
721 if (ret)
722 goto err_unpin;
723 else
724 goto out;
725 }
726
727 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
728
729 /* If we're not in the cpu write domain, set ourself into the
730 * gtt write domain and manually flush cachelines (as required).
731 * This optimizes for the case when the gpu will use the data
732 * right away and we therefore have to clflush anyway.
733 */
734 if (!obj->cache_dirty) {
735 *needs_clflush |= CLFLUSH_AFTER;
736
737 /*
738 * Same trick applies to invalidate partially written
739 * cachelines read before writing.
740 */
741 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
742 *needs_clflush |= CLFLUSH_BEFORE;
743 }
744
745 out:
746 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
747 obj->mm.dirty = true;
748 /* return with the pages pinned */
749 return 0;
750
751 err_unpin:
752 i915_gem_object_unpin_pages(obj);
753 return ret;
754 }
755