Line data Source code
1 : /*
2 : * Copyright © 2008-2015 Intel Corporation
3 : *
4 : * Permission is hereby granted, free of charge, to any person obtaining a
5 : * copy of this software and associated documentation files (the "Software"),
6 : * to deal in the Software without restriction, including without limitation
7 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 : * and/or sell copies of the Software, and to permit persons to whom the
9 : * Software is furnished to do so, subject to the following conditions:
10 : *
11 : * The above copyright notice and this permission notice (including the next
12 : * paragraph) shall be included in all copies or substantial portions of the
13 : * Software.
14 : *
15 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 : * IN THE SOFTWARE.
22 : *
23 : * Authors:
24 : * Eric Anholt <eric@anholt.net>
25 : *
26 : */
27 :
28 : #include <dev/pci/drm/drmP.h>
29 : #include <dev/pci/drm/drm_vma_manager.h>
30 : #include <dev/pci/drm/i915_drm.h>
31 : #include "i915_drv.h"
32 : #include "i915_vgpu.h"
33 : #include "i915_trace.h"
34 : #include "intel_drv.h"
35 : #ifdef __linux__
36 : #include <linux/shmem_fs.h>
37 : #include <linux/slab.h>
38 : #include <linux/swap.h>
39 : #include <linux/pci.h>
40 : #include <linux/dma-buf.h>
41 : #endif
42 :
43 : #define RQ_BUG_ON(expr)
44 :
45 : static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
46 : static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
47 : static void
48 : i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
49 : static void
50 : i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
51 :
52 0 : static bool cpu_cache_is_coherent(struct drm_device *dev,
53 : enum i915_cache_level level)
54 : {
55 0 : return HAS_LLC(dev) || level != I915_CACHE_NONE;
56 : }
57 :
58 0 : static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
59 : {
60 0 : if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
61 0 : return true;
62 :
63 0 : return obj->pin_display;
64 0 : }
65 :
66 : /* some bookkeeping */
67 0 : static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
68 : size_t size)
69 : {
70 0 : spin_lock(&dev_priv->mm.object_stat_lock);
71 0 : dev_priv->mm.object_count++;
72 0 : dev_priv->mm.object_memory += size;
73 0 : spin_unlock(&dev_priv->mm.object_stat_lock);
74 0 : }
75 :
76 0 : static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
77 : size_t size)
78 : {
79 0 : spin_lock(&dev_priv->mm.object_stat_lock);
80 0 : dev_priv->mm.object_count--;
81 0 : dev_priv->mm.object_memory -= size;
82 0 : spin_unlock(&dev_priv->mm.object_stat_lock);
83 0 : }
84 :
85 : static int
86 0 : i915_gem_wait_for_error(struct i915_gpu_error *error)
87 : {
88 : int ret;
89 :
90 : #define EXIT_COND (!i915_reset_in_progress(error) || \
91 : i915_terminally_wedged(error))
92 0 : if (EXIT_COND)
93 0 : return 0;
94 :
95 : /*
96 : * Only wait 10 seconds for the gpu reset to complete to avoid hanging
97 : * userspace. If it takes that long something really bad is going on and
98 : * we should simply try to bail out and fail as gracefully as possible.
99 : */
100 0 : ret = wait_event_interruptible_timeout(error->reset_queue,
101 : EXIT_COND,
102 : 10*HZ);
103 0 : if (ret == 0) {
104 0 : DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
105 0 : return -EIO;
106 0 : } else if (ret < 0) {
107 0 : return ret;
108 : }
109 : #undef EXIT_COND
110 :
111 0 : return 0;
112 0 : }
113 :
114 0 : int i915_mutex_lock_interruptible(struct drm_device *dev)
115 : {
116 0 : struct drm_i915_private *dev_priv = dev->dev_private;
117 : int ret;
118 :
119 0 : ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
120 0 : if (ret)
121 0 : return ret;
122 :
123 0 : ret = mutex_lock_interruptible(&dev->struct_mutex);
124 0 : if (ret)
125 0 : return ret;
126 :
127 0 : WARN_ON(i915_verify_lists(dev));
128 0 : return 0;
129 0 : }
130 :
131 : int
132 0 : i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
133 : struct drm_file *file)
134 : {
135 0 : struct drm_i915_private *dev_priv = dev->dev_private;
136 0 : struct drm_i915_gem_get_aperture *args = data;
137 0 : struct i915_gtt *ggtt = &dev_priv->gtt;
138 : struct i915_vma *vma;
139 : size_t pinned;
140 :
141 : pinned = 0;
142 0 : mutex_lock(&dev->struct_mutex);
143 0 : list_for_each_entry(vma, &ggtt->base.active_list, mm_list)
144 0 : if (vma->pin_count)
145 0 : pinned += vma->node.size;
146 0 : list_for_each_entry(vma, &ggtt->base.inactive_list, mm_list)
147 0 : if (vma->pin_count)
148 0 : pinned += vma->node.size;
149 0 : mutex_unlock(&dev->struct_mutex);
150 :
151 0 : args->aper_size = dev_priv->gtt.base.total;
152 0 : args->aper_available_size = args->aper_size - pinned;
153 :
154 0 : return 0;
155 : }
156 :
157 : static int
158 0 : i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
159 : {
160 : #ifdef __linux__
161 : struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
162 : #endif
163 0 : char *vaddr = obj->phys_handle->vaddr;
164 : struct sg_table *st;
165 : struct scatterlist *sg;
166 : int i;
167 :
168 0 : if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
169 0 : return -EINVAL;
170 :
171 0 : for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
172 : struct vm_page *page;
173 : char *src;
174 :
175 : #ifdef __linux__
176 : page = shmem_read_mapping_page(mapping, i);
177 : if (IS_ERR(page))
178 : return PTR_ERR(page);
179 : #else
180 0 : struct pglist plist;
181 0 : TAILQ_INIT(&plist);
182 0 : if (uvm_objwire(obj->base.uao, i * PAGE_SIZE, (i + 1) * PAGE_SIZE, &plist))
183 0 : return -ENOMEM;
184 0 : page = TAILQ_FIRST(&plist);
185 : #endif
186 :
187 0 : src = kmap_atomic(page);
188 0 : memcpy(vaddr, src, PAGE_SIZE);
189 0 : drm_clflush_virt_range(vaddr, PAGE_SIZE);
190 0 : kunmap_atomic(src);
191 :
192 : #ifdef __linux__
193 : page_cache_release(page);
194 : #else
195 0 : uvm_objunwire(obj->base.uao, i * PAGE_SIZE, (i + 1) * PAGE_SIZE);
196 : #endif
197 0 : vaddr += PAGE_SIZE;
198 0 : }
199 :
200 0 : i915_gem_chipset_flush(obj->base.dev);
201 :
202 0 : st = kmalloc(sizeof(*st), GFP_KERNEL);
203 0 : if (st == NULL)
204 0 : return -ENOMEM;
205 :
206 0 : if (sg_alloc_table(st, 1, GFP_KERNEL)) {
207 0 : kfree(st);
208 0 : return -ENOMEM;
209 : }
210 :
211 0 : sg = st->sgl;
212 0 : sg->offset = 0;
213 0 : sg->length = obj->base.size;
214 :
215 0 : sg_dma_address(sg) = obj->phys_handle->busaddr;
216 0 : sg_dma_len(sg) = obj->base.size;
217 :
218 0 : obj->pages = st;
219 0 : return 0;
220 0 : }
221 :
222 : static void
223 0 : i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
224 : {
225 : int ret;
226 :
227 0 : BUG_ON(obj->madv == __I915_MADV_PURGED);
228 :
229 0 : ret = i915_gem_object_set_to_cpu_domain(obj, true);
230 0 : if (ret) {
231 : /* In the event of a disaster, abandon all caches and
232 : * hope for the best.
233 : */
234 0 : WARN_ON(ret != -EIO);
235 0 : obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
236 0 : }
237 :
238 0 : if (obj->madv == I915_MADV_DONTNEED)
239 0 : obj->dirty = 0;
240 :
241 0 : if (obj->dirty) {
242 : #ifdef __linux__
243 : struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
244 : #endif
245 0 : char *vaddr = obj->phys_handle->vaddr;
246 : int i;
247 :
248 0 : for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
249 : struct vm_page *page;
250 : char *dst;
251 :
252 : #ifdef __linux__
253 : page = shmem_read_mapping_page(mapping, i);
254 : if (IS_ERR(page))
255 : continue;
256 : #else
257 0 : struct pglist plist;
258 0 : TAILQ_INIT(&plist);
259 0 : if (uvm_objwire(obj->base.uao, i * PAGE_SIZE, (i + 1) * PAGE_SIZE, &plist))
260 0 : continue;
261 0 : page = TAILQ_FIRST(&plist);
262 : #endif
263 :
264 0 : dst = kmap_atomic(page);
265 0 : drm_clflush_virt_range(vaddr, PAGE_SIZE);
266 0 : memcpy(dst, vaddr, PAGE_SIZE);
267 0 : kunmap_atomic(dst);
268 :
269 0 : set_page_dirty(page);
270 : #ifdef __linux__
271 : if (obj->madv == I915_MADV_WILLNEED)
272 : mark_page_accessed(page);
273 : page_cache_release(page);
274 : #else
275 0 : uvm_objunwire(obj->base.uao, i * PAGE_SIZE, (i + 1) * PAGE_SIZE);
276 : #endif
277 0 : vaddr += PAGE_SIZE;
278 0 : }
279 0 : obj->dirty = 0;
280 0 : }
281 :
282 0 : sg_free_table(obj->pages);
283 0 : kfree(obj->pages);
284 0 : }
285 :
286 : static void
287 0 : i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
288 : {
289 0 : drm_pci_free(obj->base.dev, obj->phys_handle);
290 0 : }
291 :
292 : static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
293 : .get_pages = i915_gem_object_get_pages_phys,
294 : .put_pages = i915_gem_object_put_pages_phys,
295 : .release = i915_gem_object_release_phys,
296 : };
297 :
298 : static int
299 0 : drop_pages(struct drm_i915_gem_object *obj)
300 : {
301 : struct i915_vma *vma, *next;
302 : int ret;
303 :
304 0 : drm_gem_object_reference(&obj->base);
305 0 : list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
306 0 : if (i915_vma_unbind(vma))
307 : break;
308 :
309 0 : ret = i915_gem_object_put_pages(obj);
310 0 : drm_gem_object_unreference(&obj->base);
311 :
312 0 : return ret;
313 : }
314 :
315 : int
316 0 : i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
317 : int align)
318 : {
319 : drm_dma_handle_t *phys;
320 : int ret;
321 :
322 0 : if (obj->phys_handle) {
323 0 : if ((unsigned long)obj->phys_handle->vaddr & (align -1))
324 0 : return -EBUSY;
325 :
326 0 : return 0;
327 : }
328 :
329 0 : if (obj->madv != I915_MADV_WILLNEED)
330 0 : return -EFAULT;
331 :
332 0 : if (obj->base.filp == NULL)
333 0 : return -EINVAL;
334 :
335 0 : ret = drop_pages(obj);
336 0 : if (ret)
337 0 : return ret;
338 :
339 : /* create a new object */
340 0 : phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
341 0 : if (!phys)
342 0 : return -ENOMEM;
343 :
344 0 : obj->phys_handle = phys;
345 0 : obj->ops = &i915_gem_phys_ops;
346 :
347 0 : return i915_gem_object_get_pages(obj);
348 0 : }
349 :
350 : static int
351 0 : i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
352 : struct drm_i915_gem_pwrite *args,
353 : struct drm_file *file_priv)
354 : {
355 0 : struct drm_device *dev = obj->base.dev;
356 0 : void *vaddr = obj->phys_handle->vaddr + args->offset;
357 0 : char __user *user_data = to_user_ptr(args->data_ptr);
358 : int ret = 0;
359 :
360 : /* We manually control the domain here and pretend that it
361 : * remains coherent i.e. in the GTT domain, like shmem_pwrite.
362 : */
363 0 : ret = i915_gem_object_wait_rendering(obj, false);
364 0 : if (ret)
365 0 : return ret;
366 :
367 0 : intel_fb_obj_invalidate(obj, ORIGIN_CPU);
368 0 : if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
369 : unsigned long unwritten;
370 :
371 : /* The physical object once assigned is fixed for the lifetime
372 : * of the obj, so we can safely drop the lock and continue
373 : * to access vaddr.
374 : */
375 0 : mutex_unlock(&dev->struct_mutex);
376 0 : unwritten = copy_from_user(vaddr, user_data, args->size);
377 0 : mutex_lock(&dev->struct_mutex);
378 0 : if (unwritten) {
379 : ret = -EFAULT;
380 0 : goto out;
381 : }
382 0 : }
383 :
384 0 : drm_clflush_virt_range(vaddr, args->size);
385 0 : i915_gem_chipset_flush(dev);
386 :
387 : out:
388 0 : intel_fb_obj_flush(obj, false, ORIGIN_CPU);
389 0 : return ret;
390 0 : }
391 :
392 0 : void *i915_gem_object_alloc(struct drm_device *dev)
393 : {
394 0 : struct drm_i915_private *dev_priv = dev->dev_private;
395 : #ifdef __linux__
396 : return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL);
397 : #else
398 0 : return pool_get(&dev_priv->objects, PR_WAITOK | PR_ZERO);
399 : #endif
400 : }
401 :
402 0 : void i915_gem_object_free(struct drm_i915_gem_object *obj)
403 : {
404 0 : struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
405 : #ifdef __linux__
406 : kmem_cache_free(dev_priv->objects, obj);
407 : #else
408 0 : pool_put(&dev_priv->objects, obj);
409 : #endif
410 0 : }
411 :
412 : static int
413 0 : i915_gem_create(struct drm_file *file,
414 : struct drm_device *dev,
415 : uint64_t size,
416 : uint32_t *handle_p)
417 : {
418 : struct drm_i915_gem_object *obj;
419 : int ret;
420 0 : u32 handle;
421 :
422 0 : size = roundup(size, PAGE_SIZE);
423 0 : if (size == 0)
424 0 : return -EINVAL;
425 :
426 : /* Allocate the new object */
427 0 : obj = i915_gem_alloc_object(dev, size);
428 0 : if (obj == NULL)
429 0 : return -ENOMEM;
430 :
431 0 : ret = drm_gem_handle_create(file, &obj->base, &handle);
432 : /* drop reference from allocate - handle holds it now */
433 0 : drm_gem_object_unreference_unlocked(&obj->base);
434 0 : if (ret)
435 0 : return ret;
436 :
437 0 : *handle_p = handle;
438 0 : return 0;
439 0 : }
440 :
441 : int
442 0 : i915_gem_dumb_create(struct drm_file *file,
443 : struct drm_device *dev,
444 : struct drm_mode_create_dumb *args)
445 : {
446 : /* have to work out size/pitch and return them */
447 0 : args->pitch = roundup2(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
448 0 : args->size = args->pitch * args->height;
449 0 : return i915_gem_create(file, dev,
450 0 : args->size, &args->handle);
451 : }
452 :
453 : /**
454 : * Creates a new mm object and returns a handle to it.
455 : */
456 : int
457 0 : i915_gem_create_ioctl(struct drm_device *dev, void *data,
458 : struct drm_file *file)
459 : {
460 0 : struct drm_i915_gem_create *args = data;
461 :
462 0 : return i915_gem_create(file, dev,
463 0 : args->size, &args->handle);
464 : }
465 :
466 : static inline int
467 0 : __copy_to_user_swizzled(char __user *cpu_vaddr,
468 : const char *gpu_vaddr, int gpu_offset,
469 : int length)
470 : {
471 : int ret, cpu_offset = 0;
472 :
473 0 : while (length > 0) {
474 0 : int cacheline_end = roundup2(gpu_offset + 1, 64);
475 0 : int this_length = min(cacheline_end - gpu_offset, length);
476 0 : int swizzled_gpu_offset = gpu_offset ^ 64;
477 :
478 0 : ret = __copy_to_user(cpu_vaddr + cpu_offset,
479 0 : gpu_vaddr + swizzled_gpu_offset,
480 : this_length);
481 0 : if (ret)
482 0 : return ret + length;
483 :
484 0 : cpu_offset += this_length;
485 0 : gpu_offset += this_length;
486 0 : length -= this_length;
487 0 : }
488 :
489 0 : return 0;
490 0 : }
491 :
492 : static inline int
493 0 : __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
494 : const char __user *cpu_vaddr,
495 : int length)
496 : {
497 : int ret, cpu_offset = 0;
498 :
499 0 : while (length > 0) {
500 0 : int cacheline_end = roundup2(gpu_offset + 1, 64);
501 0 : int this_length = min(cacheline_end - gpu_offset, length);
502 0 : int swizzled_gpu_offset = gpu_offset ^ 64;
503 :
504 0 : ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
505 0 : cpu_vaddr + cpu_offset,
506 : this_length);
507 0 : if (ret)
508 0 : return ret + length;
509 :
510 0 : cpu_offset += this_length;
511 0 : gpu_offset += this_length;
512 0 : length -= this_length;
513 0 : }
514 :
515 0 : return 0;
516 0 : }
517 :
518 : /*
519 : * Pins the specified object's pages and synchronizes the object with
520 : * GPU accesses. Sets needs_clflush to non-zero if the caller should
521 : * flush the object from the CPU cache.
522 : */
523 0 : int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
524 : int *needs_clflush)
525 : {
526 : int ret;
527 :
528 0 : *needs_clflush = 0;
529 :
530 0 : if (!obj->base.filp)
531 0 : return -EINVAL;
532 :
533 0 : if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
534 : /* If we're not in the cpu read domain, set ourself into the gtt
535 : * read domain and manually flush cachelines (if required). This
536 : * optimizes for the case when the gpu will dirty the data
537 : * anyway again before the next pread happens. */
538 0 : *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
539 0 : obj->cache_level);
540 0 : ret = i915_gem_object_wait_rendering(obj, true);
541 0 : if (ret)
542 0 : return ret;
543 : }
544 :
545 0 : ret = i915_gem_object_get_pages(obj);
546 0 : if (ret)
547 0 : return ret;
548 :
549 0 : i915_gem_object_pin_pages(obj);
550 :
551 0 : return ret;
552 0 : }
553 :
554 : /* Per-page copy function for the shmem pread fastpath.
555 : * Flushes invalid cachelines before reading the target if
556 : * needs_clflush is set. */
557 : static int
558 0 : shmem_pread_fast(struct vm_page *page, int shmem_page_offset, int page_length,
559 : char __user *user_data,
560 : bool page_do_bit17_swizzling, bool needs_clflush)
561 : {
562 : char *vaddr;
563 : int ret;
564 :
565 0 : if (unlikely(page_do_bit17_swizzling))
566 0 : return -EINVAL;
567 :
568 0 : vaddr = kmap_atomic(page);
569 0 : if (needs_clflush)
570 0 : drm_clflush_virt_range(vaddr + shmem_page_offset,
571 0 : page_length);
572 0 : ret = __copy_to_user_inatomic(user_data,
573 0 : vaddr + shmem_page_offset,
574 : page_length);
575 0 : kunmap_atomic(vaddr);
576 :
577 0 : return ret ? -EFAULT : 0;
578 0 : }
579 :
580 : static void
581 0 : shmem_clflush_swizzled_range(char *addr, unsigned long length,
582 : bool swizzled)
583 : {
584 0 : if (unlikely(swizzled)) {
585 0 : unsigned long start = (unsigned long) addr;
586 0 : unsigned long end = (unsigned long) addr + length;
587 :
588 : /* For swizzling simply ensure that we always flush both
589 : * channels. Lame, but simple and it works. Swizzled
590 : * pwrite/pread is far from a hotpath - current userspace
591 : * doesn't use it at all. */
592 0 : start = round_down(start, 128);
593 0 : end = round_up(end, 128);
594 :
595 0 : drm_clflush_virt_range((void *)start, end - start);
596 0 : } else {
597 0 : drm_clflush_virt_range(addr, length);
598 : }
599 :
600 0 : }
601 :
602 : /* Only difference to the fast-path function is that this can handle bit17
603 : * and uses non-atomic copy and kmap functions. */
604 : static int
605 0 : shmem_pread_slow(struct vm_page *page, int shmem_page_offset, int page_length,
606 : char __user *user_data,
607 : bool page_do_bit17_swizzling, bool needs_clflush)
608 : {
609 : char *vaddr;
610 : int ret;
611 :
612 0 : vaddr = kmap(page);
613 0 : if (needs_clflush)
614 0 : shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
615 0 : page_length,
616 : page_do_bit17_swizzling);
617 :
618 0 : if (page_do_bit17_swizzling)
619 0 : ret = __copy_to_user_swizzled(user_data,
620 : vaddr, shmem_page_offset,
621 : page_length);
622 : else
623 0 : ret = __copy_to_user(user_data,
624 0 : vaddr + shmem_page_offset,
625 : page_length);
626 0 : kunmap(page);
627 :
628 0 : return ret ? - EFAULT : 0;
629 : }
630 :
631 : static int
632 0 : i915_gem_shmem_pread(struct drm_device *dev,
633 : struct drm_i915_gem_object *obj,
634 : struct drm_i915_gem_pread *args,
635 : struct drm_file *file)
636 : {
637 : char __user *user_data;
638 : ssize_t remain;
639 : loff_t offset;
640 : int shmem_page_offset, page_length, ret = 0;
641 : int obj_do_bit17_swizzling, page_do_bit17_swizzling;
642 : #ifdef __linux__
643 : int prefaulted = 0;
644 : #endif
645 0 : int needs_clflush = 0;
646 0 : struct sg_page_iter sg_iter;
647 :
648 0 : user_data = to_user_ptr(args->data_ptr);
649 0 : remain = args->size;
650 :
651 0 : obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
652 :
653 0 : ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
654 0 : if (ret)
655 0 : return ret;
656 :
657 0 : offset = args->offset;
658 :
659 0 : for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
660 : offset >> PAGE_SHIFT) {
661 0 : struct vm_page *page = sg_page_iter_page(&sg_iter);
662 :
663 0 : if (remain <= 0)
664 0 : break;
665 :
666 : /* Operation in this page
667 : *
668 : * shmem_page_offset = offset within page in shmem file
669 : * page_length = bytes to copy for this page
670 : */
671 0 : shmem_page_offset = offset_in_page(offset);
672 0 : page_length = remain;
673 0 : if ((shmem_page_offset + page_length) > PAGE_SIZE)
674 0 : page_length = PAGE_SIZE - shmem_page_offset;
675 :
676 0 : page_do_bit17_swizzling = obj_do_bit17_swizzling &&
677 0 : (page_to_phys(page) & (1 << 17)) != 0;
678 :
679 0 : ret = shmem_pread_fast(page, shmem_page_offset, page_length,
680 : user_data, page_do_bit17_swizzling,
681 0 : needs_clflush);
682 0 : if (ret == 0)
683 : goto next_page;
684 :
685 0 : mutex_unlock(&dev->struct_mutex);
686 :
687 : #ifdef __linux__
688 : if (likely(!i915.prefault_disable) && !prefaulted) {
689 : ret = fault_in_multipages_writeable(user_data, remain);
690 : /* Userspace is tricking us, but we've already clobbered
691 : * its pages with the prefault and promised to write the
692 : * data up to the first fault. Hence ignore any errors
693 : * and just continue. */
694 : (void)ret;
695 : prefaulted = 1;
696 : }
697 : #endif
698 :
699 0 : ret = shmem_pread_slow(page, shmem_page_offset, page_length,
700 : user_data, page_do_bit17_swizzling,
701 0 : needs_clflush);
702 :
703 0 : mutex_lock(&dev->struct_mutex);
704 :
705 0 : if (ret)
706 0 : goto out;
707 :
708 : next_page:
709 0 : remain -= page_length;
710 0 : user_data += page_length;
711 0 : offset += page_length;
712 0 : }
713 :
714 : out:
715 0 : i915_gem_object_unpin_pages(obj);
716 :
717 0 : return ret;
718 0 : }
719 :
720 : /**
721 : * Reads data from the object referenced by handle.
722 : *
723 : * On error, the contents of *data are undefined.
724 : */
725 : int
726 0 : i915_gem_pread_ioctl(struct drm_device *dev, void *data,
727 : struct drm_file *file)
728 : {
729 0 : struct drm_i915_gem_pread *args = data;
730 : struct drm_i915_gem_object *obj;
731 : int ret = 0;
732 :
733 0 : if (args->size == 0)
734 0 : return 0;
735 :
736 0 : if (!access_ok(VERIFY_WRITE,
737 0 : to_user_ptr(args->data_ptr),
738 0 : args->size))
739 0 : return -EFAULT;
740 :
741 0 : ret = i915_mutex_lock_interruptible(dev);
742 0 : if (ret)
743 0 : return ret;
744 :
745 0 : obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
746 0 : if (&obj->base == NULL) {
747 : ret = -ENOENT;
748 0 : goto unlock;
749 : }
750 :
751 : /* Bounds check source. */
752 0 : if (args->offset > obj->base.size ||
753 0 : args->size > obj->base.size - args->offset) {
754 : ret = -EINVAL;
755 0 : goto out;
756 : }
757 :
758 : /* prime objects have no backing filp to GEM pread/pwrite
759 : * pages from.
760 : */
761 0 : if (!obj->base.filp) {
762 : ret = -EINVAL;
763 0 : goto out;
764 : }
765 :
766 0 : trace_i915_gem_object_pread(obj, args->offset, args->size);
767 :
768 0 : ret = i915_gem_shmem_pread(dev, obj, args, file);
769 :
770 : out:
771 0 : drm_gem_object_unreference(&obj->base);
772 : unlock:
773 0 : mutex_unlock(&dev->struct_mutex);
774 0 : return ret;
775 0 : }
776 :
777 : /* This is the fast write path which cannot handle
778 : * page faults in the source data
779 : */
780 :
781 : #ifdef __linux__
782 : static inline int
783 : fast_user_write(struct io_mapping *mapping,
784 : loff_t page_base, int page_offset,
785 : char __user *user_data,
786 : int length)
787 : {
788 : void __iomem *vaddr_atomic;
789 : void *vaddr;
790 : unsigned long unwritten;
791 :
792 : vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
793 : /* We can use the cpu mem copy function because this is X86. */
794 : vaddr = (void __force*)vaddr_atomic + page_offset;
795 : unwritten = __copy_from_user_inatomic_nocache(vaddr,
796 : user_data, length);
797 : io_mapping_unmap_atomic(vaddr_atomic);
798 : return unwritten;
799 : }
800 : #else
801 : static inline int
802 0 : fast_user_write(struct drm_i915_private *dev_priv,
803 : bus_size_t page_base, int page_offset,
804 : char __user *user_data,
805 : int length)
806 : {
807 0 : bus_space_handle_t bsh;
808 : void __iomem *vaddr_atomic;
809 : void *vaddr;
810 : unsigned long unwritten;
811 :
812 0 : agp_map_atomic(dev_priv->agph, page_base, &bsh);
813 0 : vaddr_atomic = bus_space_vaddr(dev_priv->bst, bsh);
814 : /* We can use the cpu mem copy function because this is X86. */
815 0 : vaddr = (void __force*)vaddr_atomic + page_offset;
816 0 : unwritten = __copy_from_user_inatomic_nocache(vaddr,
817 : user_data, length);
818 0 : agp_unmap_atomic(dev_priv->agph, bsh);
819 0 : return unwritten;
820 0 : }
821 : #endif
822 :
823 : /**
824 : * This is the fast pwrite path, where we copy the data directly from the
825 : * user into the GTT, uncached.
826 : */
827 : static int
828 0 : i915_gem_gtt_pwrite_fast(struct drm_device *dev,
829 : struct drm_i915_gem_object *obj,
830 : struct drm_i915_gem_pwrite *args,
831 : struct drm_file *file)
832 : {
833 0 : struct drm_i915_private *dev_priv = dev->dev_private;
834 : ssize_t remain;
835 : loff_t offset, page_base;
836 : char __user *user_data;
837 : int page_offset, page_length, ret;
838 :
839 0 : ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
840 0 : if (ret)
841 : goto out;
842 :
843 0 : ret = i915_gem_object_set_to_gtt_domain(obj, true);
844 0 : if (ret)
845 : goto out_unpin;
846 :
847 0 : ret = i915_gem_object_put_fence(obj);
848 0 : if (ret)
849 : goto out_unpin;
850 :
851 0 : user_data = to_user_ptr(args->data_ptr);
852 0 : remain = args->size;
853 :
854 0 : offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
855 :
856 0 : intel_fb_obj_invalidate(obj, ORIGIN_GTT);
857 :
858 0 : while (remain > 0) {
859 : /* Operation in this page
860 : *
861 : * page_base = page offset within aperture
862 : * page_offset = offset within page
863 : * page_length = bytes to copy for this page
864 : */
865 0 : page_base = trunc_page(offset);
866 0 : page_offset = offset_in_page(offset);
867 0 : page_length = remain;
868 0 : if ((page_offset + remain) > PAGE_SIZE)
869 0 : page_length = PAGE_SIZE - page_offset;
870 :
871 : /* If we get a fault while copying data, then (presumably) our
872 : * source page isn't available. Return the error and we'll
873 : * retry in the slow path.
874 : */
875 0 : if (fast_user_write(dev_priv, page_base,
876 : page_offset, user_data, page_length)) {
877 : ret = -EFAULT;
878 0 : goto out_flush;
879 : }
880 :
881 0 : remain -= page_length;
882 0 : user_data += page_length;
883 0 : offset += page_length;
884 : }
885 :
886 : out_flush:
887 0 : intel_fb_obj_flush(obj, false, ORIGIN_GTT);
888 : out_unpin:
889 0 : i915_gem_object_ggtt_unpin(obj);
890 : out:
891 0 : return ret;
892 : }
893 :
894 : /* Per-page copy function for the shmem pwrite fastpath.
895 : * Flushes invalid cachelines before writing to the target if
896 : * needs_clflush_before is set and flushes out any written cachelines after
897 : * writing if needs_clflush is set. */
898 : static int
899 0 : shmem_pwrite_fast(struct vm_page *page, int shmem_page_offset, int page_length,
900 : char __user *user_data,
901 : bool page_do_bit17_swizzling,
902 : bool needs_clflush_before,
903 : bool needs_clflush_after)
904 : {
905 : char *vaddr;
906 : int ret;
907 :
908 0 : if (unlikely(page_do_bit17_swizzling))
909 0 : return -EINVAL;
910 :
911 0 : vaddr = kmap_atomic(page);
912 0 : if (needs_clflush_before)
913 0 : drm_clflush_virt_range(vaddr + shmem_page_offset,
914 0 : page_length);
915 0 : ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
916 : user_data, page_length);
917 0 : if (needs_clflush_after)
918 0 : drm_clflush_virt_range(vaddr + shmem_page_offset,
919 0 : page_length);
920 0 : kunmap_atomic(vaddr);
921 :
922 0 : return ret ? -EFAULT : 0;
923 0 : }
924 :
925 : /* Only difference to the fast-path function is that this can handle bit17
926 : * and uses non-atomic copy and kmap functions. */
927 : static int
928 0 : shmem_pwrite_slow(struct vm_page *page, int shmem_page_offset, int page_length,
929 : char __user *user_data,
930 : bool page_do_bit17_swizzling,
931 : bool needs_clflush_before,
932 : bool needs_clflush_after)
933 : {
934 : char *vaddr;
935 : int ret;
936 :
937 0 : vaddr = kmap(page);
938 0 : if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
939 0 : shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
940 0 : page_length,
941 : page_do_bit17_swizzling);
942 0 : if (page_do_bit17_swizzling)
943 0 : ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
944 : user_data,
945 : page_length);
946 : else
947 0 : ret = __copy_from_user(vaddr + shmem_page_offset,
948 : user_data,
949 : page_length);
950 0 : if (needs_clflush_after)
951 0 : shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
952 0 : page_length,
953 : page_do_bit17_swizzling);
954 0 : kunmap(page);
955 :
956 0 : return ret ? -EFAULT : 0;
957 : }
958 :
959 : static int
960 0 : i915_gem_shmem_pwrite(struct drm_device *dev,
961 : struct drm_i915_gem_object *obj,
962 : struct drm_i915_gem_pwrite *args,
963 : struct drm_file *file)
964 : {
965 : ssize_t remain;
966 : loff_t offset;
967 : char __user *user_data;
968 : int shmem_page_offset, page_length, ret = 0;
969 : int obj_do_bit17_swizzling, page_do_bit17_swizzling;
970 : int hit_slowpath = 0;
971 : int needs_clflush_after = 0;
972 : int needs_clflush_before = 0;
973 0 : struct sg_page_iter sg_iter;
974 :
975 0 : user_data = to_user_ptr(args->data_ptr);
976 0 : remain = args->size;
977 :
978 0 : obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
979 :
980 0 : if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
981 : /* If we're not in the cpu write domain, set ourself into the gtt
982 : * write domain and manually flush cachelines (if required). This
983 : * optimizes for the case when the gpu will use the data
984 : * right away and we therefore have to clflush anyway. */
985 0 : needs_clflush_after = cpu_write_needs_clflush(obj);
986 0 : ret = i915_gem_object_wait_rendering(obj, false);
987 0 : if (ret)
988 0 : return ret;
989 : }
990 : /* Same trick applies to invalidate partially written cachelines read
991 : * before writing. */
992 0 : if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
993 0 : needs_clflush_before =
994 0 : !cpu_cache_is_coherent(dev, obj->cache_level);
995 :
996 0 : ret = i915_gem_object_get_pages(obj);
997 0 : if (ret)
998 0 : return ret;
999 :
1000 0 : intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1001 :
1002 0 : i915_gem_object_pin_pages(obj);
1003 :
1004 0 : offset = args->offset;
1005 0 : obj->dirty = 1;
1006 :
1007 0 : for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
1008 : offset >> PAGE_SHIFT) {
1009 0 : struct vm_page *page = sg_page_iter_page(&sg_iter);
1010 : int partial_cacheline_write;
1011 :
1012 0 : if (remain <= 0)
1013 0 : break;
1014 :
1015 : /* Operation in this page
1016 : *
1017 : * shmem_page_offset = offset within page in shmem file
1018 : * page_length = bytes to copy for this page
1019 : */
1020 0 : shmem_page_offset = offset_in_page(offset);
1021 :
1022 0 : page_length = remain;
1023 0 : if ((shmem_page_offset + page_length) > PAGE_SIZE)
1024 0 : page_length = PAGE_SIZE - shmem_page_offset;
1025 :
1026 : /* If we don't overwrite a cacheline completely we need to be
1027 : * careful to have up-to-date data by first clflushing. Don't
1028 : * overcomplicate things and flush the entire patch. */
1029 0 : partial_cacheline_write = needs_clflush_before &&
1030 0 : ((shmem_page_offset | page_length)
1031 0 : & (curcpu()->ci_cflushsz - 1));
1032 :
1033 0 : page_do_bit17_swizzling = obj_do_bit17_swizzling &&
1034 0 : (page_to_phys(page) & (1 << 17)) != 0;
1035 :
1036 0 : ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
1037 : user_data, page_do_bit17_swizzling,
1038 : partial_cacheline_write,
1039 0 : needs_clflush_after);
1040 0 : if (ret == 0)
1041 : goto next_page;
1042 :
1043 : hit_slowpath = 1;
1044 0 : mutex_unlock(&dev->struct_mutex);
1045 0 : ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
1046 : user_data, page_do_bit17_swizzling,
1047 : partial_cacheline_write,
1048 : needs_clflush_after);
1049 :
1050 0 : mutex_lock(&dev->struct_mutex);
1051 :
1052 0 : if (ret)
1053 0 : goto out;
1054 :
1055 : next_page:
1056 0 : remain -= page_length;
1057 0 : user_data += page_length;
1058 0 : offset += page_length;
1059 0 : }
1060 :
1061 : out:
1062 0 : i915_gem_object_unpin_pages(obj);
1063 :
1064 0 : if (hit_slowpath) {
1065 : /*
1066 : * Fixup: Flush cpu caches in case we didn't flush the dirty
1067 : * cachelines in-line while writing and the object moved
1068 : * out of the cpu write domain while we've dropped the lock.
1069 : */
1070 0 : if (!needs_clflush_after &&
1071 0 : obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1072 0 : if (i915_gem_clflush_object(obj, obj->pin_display))
1073 0 : needs_clflush_after = true;
1074 : }
1075 : }
1076 :
1077 0 : if (needs_clflush_after)
1078 0 : i915_gem_chipset_flush(dev);
1079 : else
1080 0 : obj->cache_dirty = true;
1081 :
1082 0 : intel_fb_obj_flush(obj, false, ORIGIN_CPU);
1083 0 : return ret;
1084 0 : }
1085 :
1086 : /**
1087 : * Writes data to the object referenced by handle.
1088 : *
1089 : * On error, the contents of the buffer that were to be modified are undefined.
1090 : */
1091 : int
1092 0 : i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1093 : struct drm_file *file)
1094 : {
1095 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1096 0 : struct drm_i915_gem_pwrite *args = data;
1097 : struct drm_i915_gem_object *obj;
1098 : int ret;
1099 :
1100 0 : if (args->size == 0)
1101 0 : return 0;
1102 :
1103 0 : if (!access_ok(VERIFY_READ,
1104 0 : to_user_ptr(args->data_ptr),
1105 0 : args->size))
1106 0 : return -EFAULT;
1107 :
1108 : #ifdef __linux__
1109 : if (likely(!i915.prefault_disable)) {
1110 : ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr),
1111 : args->size);
1112 : if (ret)
1113 : return -EFAULT;
1114 : }
1115 : #endif
1116 :
1117 0 : intel_runtime_pm_get(dev_priv);
1118 :
1119 0 : ret = i915_mutex_lock_interruptible(dev);
1120 0 : if (ret)
1121 : goto put_rpm;
1122 :
1123 0 : obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1124 0 : if (&obj->base == NULL) {
1125 : ret = -ENOENT;
1126 0 : goto unlock;
1127 : }
1128 :
1129 : /* Bounds check destination. */
1130 0 : if (args->offset > obj->base.size ||
1131 0 : args->size > obj->base.size - args->offset) {
1132 : ret = -EINVAL;
1133 0 : goto out;
1134 : }
1135 :
1136 : /* prime objects have no backing filp to GEM pread/pwrite
1137 : * pages from.
1138 : */
1139 0 : if (!obj->base.filp) {
1140 : ret = -EINVAL;
1141 0 : goto out;
1142 : }
1143 :
1144 0 : trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1145 :
1146 : ret = -EFAULT;
1147 : /* We can only do the GTT pwrite on untiled buffers, as otherwise
1148 : * it would end up going through the fenced access, and we'll get
1149 : * different detiling behavior between reading and writing.
1150 : * pread/pwrite currently are reading and writing from the CPU
1151 : * perspective, requiring manual detiling by the client.
1152 : */
1153 0 : if (obj->tiling_mode == I915_TILING_NONE &&
1154 0 : obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
1155 0 : cpu_write_needs_clflush(obj)) {
1156 0 : ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
1157 : /* Note that the gtt paths might fail with non-page-backed user
1158 : * pointers (e.g. gtt mappings when moving data between
1159 : * textures). Fallback to the shmem path in that case. */
1160 0 : }
1161 :
1162 0 : if (ret == -EFAULT || ret == -ENOSPC) {
1163 0 : if (obj->phys_handle)
1164 0 : ret = i915_gem_phys_pwrite(obj, args, file);
1165 : else
1166 0 : ret = i915_gem_shmem_pwrite(dev, obj, args, file);
1167 : }
1168 :
1169 : out:
1170 0 : drm_gem_object_unreference(&obj->base);
1171 : unlock:
1172 0 : mutex_unlock(&dev->struct_mutex);
1173 : put_rpm:
1174 0 : intel_runtime_pm_put(dev_priv);
1175 :
1176 0 : return ret;
1177 0 : }
1178 :
1179 : int
1180 0 : i915_gem_check_wedge(struct i915_gpu_error *error,
1181 : bool interruptible)
1182 : {
1183 0 : if (i915_reset_in_progress(error)) {
1184 : /* Non-interruptible callers can't handle -EAGAIN, hence return
1185 : * -EIO unconditionally for these. */
1186 0 : if (!interruptible)
1187 0 : return -EIO;
1188 :
1189 : /* Recovery complete, but the reset failed ... */
1190 0 : if (i915_terminally_wedged(error))
1191 0 : return -EIO;
1192 :
1193 : /*
1194 : * Check if GPU Reset is in progress - we need intel_ring_begin
1195 : * to work properly to reinit the hw state while the gpu is
1196 : * still marked as reset-in-progress. Handle this with a flag.
1197 : */
1198 0 : if (!error->reload_in_reset)
1199 0 : return -EAGAIN;
1200 : }
1201 :
1202 0 : return 0;
1203 0 : }
1204 :
1205 : #ifdef __linux__
1206 : static void fake_irq(unsigned long data)
1207 : {
1208 : wake_up_process((struct task_struct *)data);
1209 : }
1210 : #endif
1211 :
1212 0 : static bool missed_irq(struct drm_i915_private *dev_priv,
1213 : struct intel_engine_cs *ring)
1214 : {
1215 0 : return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
1216 : }
1217 :
1218 : #ifdef __linux__
1219 : static unsigned long local_clock_us(unsigned *cpu)
1220 : {
1221 : unsigned long t;
1222 :
1223 : /* Cheaply and approximately convert from nanoseconds to microseconds.
1224 : * The result and subsequent calculations are also defined in the same
1225 : * approximate microseconds units. The principal source of timing
1226 : * error here is from the simple truncation.
1227 : *
1228 : * Note that local_clock() is only defined wrt to the current CPU;
1229 : * the comparisons are no longer valid if we switch CPUs. Instead of
1230 : * blocking preemption for the entire busywait, we can detect the CPU
1231 : * switch and use that as indicator of system load and a reason to
1232 : * stop busywaiting, see busywait_stop().
1233 : */
1234 : *cpu = get_cpu();
1235 : t = local_clock() >> 10;
1236 : put_cpu();
1237 :
1238 : return t;
1239 : }
1240 : #else
1241 0 : static unsigned long local_clock_us(unsigned *cpu)
1242 : {
1243 0 : *cpu = cpu_number();
1244 0 : return ticks * tick;
1245 : }
1246 : #endif
1247 :
1248 0 : static bool busywait_stop(unsigned long timeout, unsigned cpu)
1249 : {
1250 0 : unsigned this_cpu;
1251 :
1252 0 : if (time_after(local_clock_us(&this_cpu), timeout))
1253 0 : return true;
1254 :
1255 0 : return this_cpu != cpu;
1256 0 : }
1257 :
1258 0 : static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
1259 : {
1260 : unsigned long timeout;
1261 0 : unsigned cpu;
1262 :
1263 : /* When waiting for high frequency requests, e.g. during synchronous
1264 : * rendering split between the CPU and GPU, the finite amount of time
1265 : * required to set up the irq and wait upon it limits the response
1266 : * rate. By busywaiting on the request completion for a short while we
1267 : * can service the high frequency waits as quick as possible. However,
1268 : * if it is a slow request, we want to sleep as quickly as possible.
1269 : * The tradeoff between waiting and sleeping is roughly the time it
1270 : * takes to sleep on a request, on the order of a microsecond.
1271 : */
1272 :
1273 0 : if (req->ring->irq_refcount)
1274 0 : return -EBUSY;
1275 :
1276 : /* Only spin if we know the GPU is processing this request */
1277 0 : if (!i915_gem_request_started(req, true))
1278 0 : return -EAGAIN;
1279 :
1280 0 : timeout = local_clock_us(&cpu) + 5;
1281 0 : while (!drm_need_resched()) {
1282 0 : if (i915_gem_request_completed(req, true))
1283 0 : return 0;
1284 :
1285 0 : if (signal_pending_state(state, current))
1286 : break;
1287 :
1288 0 : if (busywait_stop(timeout, cpu))
1289 : break;
1290 :
1291 0 : cpu_relax_lowlatency();
1292 : }
1293 :
1294 0 : if (i915_gem_request_completed(req, false))
1295 0 : return 0;
1296 :
1297 0 : return -EAGAIN;
1298 0 : }
1299 :
1300 : #ifdef __linux__
1301 : /**
1302 : * __i915_wait_request - wait until execution of request has finished
1303 : * @req: duh!
1304 : * @reset_counter: reset sequence associated with the given request
1305 : * @interruptible: do an interruptible wait (normally yes)
1306 : * @timeout: in - how long to wait (NULL forever); out - how much time remaining
1307 : *
1308 : * Note: It is of utmost importance that the passed in seqno and reset_counter
1309 : * values have been read by the caller in an smp safe manner. Where read-side
1310 : * locks are involved, it is sufficient to read the reset_counter before
1311 : * unlocking the lock that protects the seqno. For lockless tricks, the
1312 : * reset_counter _must_ be read before, and an appropriate smp_rmb must be
1313 : * inserted.
1314 : *
1315 : * Returns 0 if the request was found within the alloted time. Else returns the
1316 : * errno with remaining time filled in timeout argument.
1317 : */
1318 : int __i915_wait_request(struct drm_i915_gem_request *req,
1319 : unsigned reset_counter,
1320 : bool interruptible,
1321 : s64 *timeout,
1322 : struct intel_rps_client *rps)
1323 : {
1324 : struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
1325 : struct drm_device *dev = ring->dev;
1326 : struct drm_i915_private *dev_priv = dev->dev_private;
1327 : const bool irq_test_in_progress =
1328 : ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
1329 : int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
1330 : DEFINE_WAIT(wait);
1331 : unsigned long timeout_expire;
1332 : s64 before, now;
1333 : int ret;
1334 :
1335 : WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
1336 :
1337 : if (list_empty(&req->list))
1338 : return 0;
1339 :
1340 : if (i915_gem_request_completed(req, true))
1341 : return 0;
1342 :
1343 : timeout_expire = 0;
1344 : if (timeout) {
1345 : if (WARN_ON(*timeout < 0))
1346 : return -EINVAL;
1347 :
1348 : if (*timeout == 0)
1349 : return -ETIME;
1350 :
1351 : timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
1352 : }
1353 :
1354 : if (INTEL_INFO(dev_priv)->gen >= 6)
1355 : gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
1356 :
1357 : /* Record current time in case interrupted by signal, or wedged */
1358 : trace_i915_gem_request_wait_begin(req);
1359 : before = ktime_get_raw_ns();
1360 :
1361 : /* Optimistic spin for the next jiffie before touching IRQs */
1362 : ret = __i915_spin_request(req, state);
1363 : if (ret == 0)
1364 : goto out;
1365 :
1366 : if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) {
1367 : ret = -ENODEV;
1368 : goto out;
1369 : }
1370 :
1371 : for (;;) {
1372 : struct timer_list timer;
1373 :
1374 : prepare_to_wait(&ring->irq_queue, &wait, state);
1375 :
1376 : /* We need to check whether any gpu reset happened in between
1377 : * the caller grabbing the seqno and now ... */
1378 : if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
1379 : /* ... but upgrade the -EAGAIN to an -EIO if the gpu
1380 : * is truely gone. */
1381 : ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1382 : if (ret == 0)
1383 : ret = -EAGAIN;
1384 : break;
1385 : }
1386 :
1387 : if (i915_gem_request_completed(req, false)) {
1388 : ret = 0;
1389 : break;
1390 : }
1391 :
1392 : if (signal_pending_state(state, current)) {
1393 : ret = -ERESTARTSYS;
1394 : break;
1395 : }
1396 :
1397 : if (timeout && time_after_eq(jiffies, timeout_expire)) {
1398 : ret = -ETIME;
1399 : break;
1400 : }
1401 :
1402 : timer.function = NULL;
1403 : if (timeout || missed_irq(dev_priv, ring)) {
1404 : unsigned long expire;
1405 :
1406 : setup_timer_on_stack(&timer, fake_irq, (unsigned long)current);
1407 : expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire;
1408 : mod_timer(&timer, expire);
1409 : }
1410 :
1411 : io_schedule();
1412 :
1413 : if (timer.function) {
1414 : del_singleshot_timer_sync(&timer);
1415 : destroy_timer_on_stack(&timer);
1416 : }
1417 : }
1418 : if (!irq_test_in_progress)
1419 : ring->irq_put(ring);
1420 :
1421 : finish_wait(&ring->irq_queue, &wait);
1422 :
1423 : out:
1424 : now = ktime_get_raw_ns();
1425 : trace_i915_gem_request_wait_end(req);
1426 :
1427 : if (timeout) {
1428 : s64 tres = *timeout - (now - before);
1429 :
1430 : *timeout = tres < 0 ? 0 : tres;
1431 :
1432 : /*
1433 : * Apparently ktime isn't accurate enough and occasionally has a
1434 : * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
1435 : * things up to make the test happy. We allow up to 1 jiffy.
1436 : *
1437 : * This is a regrssion from the timespec->ktime conversion.
1438 : */
1439 : if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
1440 : *timeout = 0;
1441 : }
1442 :
1443 : return ret;
1444 : }
1445 : #else
1446 0 : int __i915_wait_request(struct drm_i915_gem_request *req,
1447 : unsigned reset_counter,
1448 : bool interruptible,
1449 : s64 *timeout,
1450 : struct intel_rps_client *rps)
1451 : {
1452 0 : struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
1453 0 : struct drm_device *dev = ring->dev;
1454 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1455 0 : const bool irq_test_in_progress =
1456 0 : ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
1457 0 : int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
1458 0 : struct sleep_state sls;
1459 : unsigned long timeout_expire;
1460 : s64 before, now;
1461 : int ret;
1462 :
1463 0 : WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
1464 :
1465 0 : if (list_empty(&req->list))
1466 0 : return 0;
1467 :
1468 0 : if (i915_gem_request_completed(req, true))
1469 0 : return 0;
1470 :
1471 : timeout_expire = 0;
1472 0 : if (timeout) {
1473 0 : if (WARN_ON(*timeout < 0))
1474 0 : return -EINVAL;
1475 :
1476 0 : if (*timeout == 0)
1477 0 : return -ETIME;
1478 :
1479 0 : timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
1480 0 : }
1481 :
1482 0 : if (INTEL_INFO(dev_priv)->gen >= 6)
1483 0 : gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
1484 :
1485 : /* Record current time in case interrupted by signal, or wedged */
1486 0 : trace_i915_gem_request_wait_begin(req);
1487 0 : before = ktime_get_raw_ns();
1488 :
1489 : /* Optimistic spin for the next jiffie before touching IRQs */
1490 0 : ret = __i915_spin_request(req, state);
1491 0 : if (ret == 0)
1492 : goto out;
1493 :
1494 0 : if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) {
1495 : ret = -ENODEV;
1496 0 : goto out;
1497 : }
1498 :
1499 0 : KASSERT(!cold);
1500 0 : for (;;) {
1501 0 : sleep_setup(&sls, &ring->irq_queue, state, "wseq");
1502 :
1503 : /* We need to check whether any gpu reset happened in between
1504 : * the caller grabbing the seqno and now ... */
1505 0 : if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
1506 : /* ... but upgrade the -EAGAIN to an -EIO if the gpu
1507 : * is truely gone. */
1508 0 : ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1509 0 : if (ret == 0)
1510 : ret = -EAGAIN;
1511 0 : break;
1512 : }
1513 :
1514 0 : if (i915_gem_request_completed(req, false)) {
1515 : ret = 0;
1516 0 : break;
1517 : }
1518 :
1519 0 : if (interruptible && ret) {
1520 : ret = -ERESTARTSYS;
1521 0 : break;
1522 : }
1523 :
1524 0 : if (timeout && time_after_eq(jiffies, timeout_expire)) {
1525 : ret = -ETIME;
1526 0 : break;
1527 : }
1528 :
1529 0 : if (timeout || missed_irq(dev_priv, ring)) {
1530 : unsigned long expire;
1531 : int timo;
1532 :
1533 0 : expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire;
1534 0 : timo = expire - jiffies;
1535 0 : if (timo < 1)
1536 : timo = 1;
1537 0 : sleep_setup_timeout(&sls, timo);
1538 0 : }
1539 :
1540 0 : sleep_setup_signal(&sls, state);
1541 0 : sleep_finish_all(&sls, 1);
1542 : }
1543 0 : if (!irq_test_in_progress)
1544 0 : ring->irq_put(ring);
1545 :
1546 0 : sleep_finish_all(&sls, 0);
1547 :
1548 : out:
1549 0 : now = ktime_get_raw_ns();
1550 0 : trace_i915_gem_request_wait_end(req);
1551 :
1552 0 : if (timeout) {
1553 0 : s64 tres = *timeout - (now - before);
1554 :
1555 0 : *timeout = tres < 0 ? 0 : tres;
1556 :
1557 : /*
1558 : * Apparently ktime isn't accurate enough and occasionally has a
1559 : * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
1560 : * things up to make the test happy. We allow up to 1 jiffy.
1561 : *
1562 : * This is a regrssion from the timespec->ktime conversion.
1563 : */
1564 0 : if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
1565 0 : *timeout = 0;
1566 0 : }
1567 :
1568 0 : return ret;
1569 0 : }
1570 : #endif
1571 :
1572 0 : int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
1573 : struct drm_file *file)
1574 : {
1575 : struct drm_i915_private *dev_private;
1576 : struct drm_i915_file_private *file_priv;
1577 :
1578 0 : WARN_ON(!req || !file || req->file_priv);
1579 :
1580 0 : if (!req || !file)
1581 0 : return -EINVAL;
1582 :
1583 0 : if (req->file_priv)
1584 0 : return -EINVAL;
1585 :
1586 0 : dev_private = req->ring->dev->dev_private;
1587 0 : file_priv = file->driver_priv;
1588 :
1589 0 : spin_lock(&file_priv->mm.lock);
1590 0 : req->file_priv = file_priv;
1591 0 : list_add_tail(&req->client_list, &file_priv->mm.request_list);
1592 0 : spin_unlock(&file_priv->mm.lock);
1593 :
1594 : #ifdef __linux__
1595 : req->pid = get_pid(task_pid(current));
1596 : #endif
1597 :
1598 0 : return 0;
1599 0 : }
1600 :
1601 : static inline void
1602 0 : i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1603 : {
1604 0 : struct drm_i915_file_private *file_priv = request->file_priv;
1605 :
1606 0 : if (!file_priv)
1607 0 : return;
1608 :
1609 0 : spin_lock(&file_priv->mm.lock);
1610 0 : list_del(&request->client_list);
1611 0 : request->file_priv = NULL;
1612 0 : spin_unlock(&file_priv->mm.lock);
1613 :
1614 : #ifdef __linux__
1615 : put_pid(request->pid);
1616 : request->pid = NULL;
1617 : #endif
1618 0 : }
1619 :
1620 0 : static void i915_gem_request_retire(struct drm_i915_gem_request *request)
1621 : {
1622 0 : trace_i915_gem_request_retire(request);
1623 :
1624 : /* We know the GPU must have read the request to have
1625 : * sent us the seqno + interrupt, so use the position
1626 : * of tail of the request to update the last known position
1627 : * of the GPU head.
1628 : *
1629 : * Note this requires that we are always called in request
1630 : * completion order.
1631 : */
1632 0 : request->ringbuf->last_retired_head = request->postfix;
1633 :
1634 0 : list_del_init(&request->list);
1635 0 : i915_gem_request_remove_from_client(request);
1636 :
1637 0 : i915_gem_request_unreference(request);
1638 0 : }
1639 :
1640 : static void
1641 0 : __i915_gem_request_retire__upto(struct drm_i915_gem_request *req)
1642 : {
1643 0 : struct intel_engine_cs *engine = req->ring;
1644 : struct drm_i915_gem_request *tmp;
1645 :
1646 0 : lockdep_assert_held(&engine->dev->struct_mutex);
1647 :
1648 0 : if (list_empty(&req->list))
1649 0 : return;
1650 :
1651 0 : do {
1652 0 : tmp = list_first_entry(&engine->request_list,
1653 : typeof(*tmp), list);
1654 :
1655 0 : i915_gem_request_retire(tmp);
1656 0 : } while (tmp != req);
1657 :
1658 0 : WARN_ON(i915_verify_lists(engine->dev));
1659 0 : }
1660 :
1661 : /**
1662 : * Waits for a request to be signaled, and cleans up the
1663 : * request and object lists appropriately for that event.
1664 : */
1665 : int
1666 0 : i915_wait_request(struct drm_i915_gem_request *req)
1667 : {
1668 : struct drm_device *dev;
1669 : struct drm_i915_private *dev_priv;
1670 : bool interruptible;
1671 : int ret;
1672 :
1673 0 : BUG_ON(req == NULL);
1674 :
1675 0 : dev = req->ring->dev;
1676 0 : dev_priv = dev->dev_private;
1677 0 : interruptible = dev_priv->mm.interruptible;
1678 :
1679 0 : BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1680 :
1681 0 : ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1682 0 : if (ret)
1683 0 : return ret;
1684 :
1685 0 : ret = __i915_wait_request(req,
1686 0 : atomic_read(&dev_priv->gpu_error.reset_counter),
1687 : interruptible, NULL, NULL);
1688 0 : if (ret)
1689 0 : return ret;
1690 :
1691 0 : __i915_gem_request_retire__upto(req);
1692 0 : return 0;
1693 0 : }
1694 :
1695 : /**
1696 : * Ensures that all rendering to the object has completed and the object is
1697 : * safe to unbind from the GTT or access from the CPU.
1698 : */
1699 : int
1700 0 : i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1701 : bool readonly)
1702 : {
1703 : int ret, i;
1704 :
1705 0 : if (!obj->active)
1706 0 : return 0;
1707 :
1708 0 : if (readonly) {
1709 0 : if (obj->last_write_req != NULL) {
1710 0 : ret = i915_wait_request(obj->last_write_req);
1711 0 : if (ret)
1712 0 : return ret;
1713 :
1714 0 : i = obj->last_write_req->ring->id;
1715 0 : if (obj->last_read_req[i] == obj->last_write_req)
1716 0 : i915_gem_object_retire__read(obj, i);
1717 : else
1718 0 : i915_gem_object_retire__write(obj);
1719 : }
1720 : } else {
1721 0 : for (i = 0; i < I915_NUM_RINGS; i++) {
1722 0 : if (obj->last_read_req[i] == NULL)
1723 : continue;
1724 :
1725 0 : ret = i915_wait_request(obj->last_read_req[i]);
1726 0 : if (ret)
1727 0 : return ret;
1728 :
1729 0 : i915_gem_object_retire__read(obj, i);
1730 0 : }
1731 : RQ_BUG_ON(obj->active);
1732 : }
1733 :
1734 0 : return 0;
1735 0 : }
1736 :
1737 : static void
1738 0 : i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
1739 : struct drm_i915_gem_request *req)
1740 : {
1741 0 : int ring = req->ring->id;
1742 :
1743 0 : if (obj->last_read_req[ring] == req)
1744 0 : i915_gem_object_retire__read(obj, ring);
1745 0 : else if (obj->last_write_req == req)
1746 0 : i915_gem_object_retire__write(obj);
1747 :
1748 0 : __i915_gem_request_retire__upto(req);
1749 0 : }
1750 :
1751 : /* A nonblocking variant of the above wait. This is a highly dangerous routine
1752 : * as the object state may change during this call.
1753 : */
1754 : static __must_check int
1755 0 : i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
1756 : struct intel_rps_client *rps,
1757 : bool readonly)
1758 : {
1759 0 : struct drm_device *dev = obj->base.dev;
1760 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1761 0 : struct drm_i915_gem_request *requests[I915_NUM_RINGS];
1762 : unsigned reset_counter;
1763 : int ret, i, n = 0;
1764 :
1765 0 : BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1766 0 : BUG_ON(!dev_priv->mm.interruptible);
1767 :
1768 0 : if (!obj->active)
1769 0 : return 0;
1770 :
1771 0 : ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
1772 0 : if (ret)
1773 0 : return ret;
1774 :
1775 0 : reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
1776 :
1777 0 : if (readonly) {
1778 : struct drm_i915_gem_request *req;
1779 :
1780 0 : req = obj->last_write_req;
1781 0 : if (req == NULL)
1782 0 : return 0;
1783 :
1784 0 : requests[n++] = i915_gem_request_reference(req);
1785 0 : } else {
1786 0 : for (i = 0; i < I915_NUM_RINGS; i++) {
1787 : struct drm_i915_gem_request *req;
1788 :
1789 0 : req = obj->last_read_req[i];
1790 0 : if (req == NULL)
1791 0 : continue;
1792 :
1793 0 : requests[n++] = i915_gem_request_reference(req);
1794 0 : }
1795 : }
1796 :
1797 0 : mutex_unlock(&dev->struct_mutex);
1798 0 : for (i = 0; ret == 0 && i < n; i++)
1799 0 : ret = __i915_wait_request(requests[i], reset_counter, true,
1800 : NULL, rps);
1801 0 : mutex_lock(&dev->struct_mutex);
1802 :
1803 0 : for (i = 0; i < n; i++) {
1804 0 : if (ret == 0)
1805 0 : i915_gem_object_retire_request(obj, requests[i]);
1806 0 : i915_gem_request_unreference(requests[i]);
1807 : }
1808 :
1809 0 : return ret;
1810 0 : }
1811 :
1812 0 : static struct intel_rps_client *to_rps_client(struct drm_file *file)
1813 : {
1814 0 : struct drm_i915_file_private *fpriv = file->driver_priv;
1815 0 : return &fpriv->rps;
1816 : }
1817 :
1818 : /**
1819 : * Called when user space prepares to use an object with the CPU, either
1820 : * through the mmap ioctl's mapping or a GTT mapping.
1821 : */
1822 : int
1823 0 : i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1824 : struct drm_file *file)
1825 : {
1826 0 : struct drm_i915_gem_set_domain *args = data;
1827 : struct drm_i915_gem_object *obj;
1828 0 : uint32_t read_domains = args->read_domains;
1829 0 : uint32_t write_domain = args->write_domain;
1830 : int ret;
1831 :
1832 : /* Only handle setting domains to types used by the CPU. */
1833 0 : if (write_domain & I915_GEM_GPU_DOMAINS)
1834 0 : return -EINVAL;
1835 :
1836 0 : if (read_domains & I915_GEM_GPU_DOMAINS)
1837 0 : return -EINVAL;
1838 :
1839 : /* Having something in the write domain implies it's in the read
1840 : * domain, and only that read domain. Enforce that in the request.
1841 : */
1842 0 : if (write_domain != 0 && read_domains != write_domain)
1843 0 : return -EINVAL;
1844 :
1845 0 : ret = i915_mutex_lock_interruptible(dev);
1846 0 : if (ret)
1847 0 : return ret;
1848 :
1849 0 : obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1850 0 : if (&obj->base == NULL) {
1851 : ret = -ENOENT;
1852 0 : goto unlock;
1853 : }
1854 :
1855 : /* Try to flush the object off the GPU without holding the lock.
1856 : * We will repeat the flush holding the lock in the normal manner
1857 : * to catch cases where we are gazumped.
1858 : */
1859 0 : ret = i915_gem_object_wait_rendering__nonblocking(obj,
1860 0 : to_rps_client(file),
1861 0 : !write_domain);
1862 0 : if (ret)
1863 : goto unref;
1864 :
1865 0 : if (read_domains & I915_GEM_DOMAIN_GTT)
1866 0 : ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1867 : else
1868 0 : ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1869 :
1870 0 : if (write_domain != 0)
1871 0 : intel_fb_obj_invalidate(obj,
1872 0 : write_domain == I915_GEM_DOMAIN_GTT ?
1873 : ORIGIN_GTT : ORIGIN_CPU);
1874 :
1875 : unref:
1876 0 : drm_gem_object_unreference(&obj->base);
1877 : unlock:
1878 0 : mutex_unlock(&dev->struct_mutex);
1879 0 : return ret;
1880 0 : }
1881 :
1882 : /**
1883 : * Called when user space has done writes to this buffer
1884 : */
1885 : int
1886 0 : i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1887 : struct drm_file *file)
1888 : {
1889 0 : struct drm_i915_gem_sw_finish *args = data;
1890 : struct drm_i915_gem_object *obj;
1891 : int ret = 0;
1892 :
1893 0 : ret = i915_mutex_lock_interruptible(dev);
1894 0 : if (ret)
1895 0 : return ret;
1896 :
1897 0 : obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1898 0 : if (&obj->base == NULL) {
1899 : ret = -ENOENT;
1900 0 : goto unlock;
1901 : }
1902 :
1903 : /* Pinned buffers may be scanout, so flush the cache */
1904 0 : if (obj->pin_display)
1905 0 : i915_gem_object_flush_cpu_write_domain(obj);
1906 :
1907 0 : drm_gem_object_unreference(&obj->base);
1908 : unlock:
1909 0 : mutex_unlock(&dev->struct_mutex);
1910 0 : return ret;
1911 0 : }
1912 :
1913 : /**
1914 : * Maps the contents of an object, returning the address it is mapped
1915 : * into.
1916 : *
1917 : * While the mapping holds a reference on the contents of the object, it doesn't
1918 : * imply a ref on the object itself.
1919 : *
1920 : * IMPORTANT:
1921 : *
1922 : * DRM driver writers who look a this function as an example for how to do GEM
1923 : * mmap support, please don't implement mmap support like here. The modern way
1924 : * to implement DRM mmap support is with an mmap offset ioctl (like
1925 : * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1926 : * That way debug tooling like valgrind will understand what's going on, hiding
1927 : * the mmap call in a driver private ioctl will break that. The i915 driver only
1928 : * does cpu mmaps this way because we didn't know better.
1929 : */
1930 : int
1931 0 : i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1932 : struct drm_file *file)
1933 : {
1934 0 : struct drm_i915_gem_mmap *args = data;
1935 : struct drm_gem_object *obj;
1936 0 : vaddr_t addr;
1937 : vsize_t size;
1938 : int ret;
1939 :
1940 : #ifdef __OpenBSD__
1941 0 : if (args->size == 0 || args->offset & PAGE_MASK)
1942 0 : return -EINVAL;
1943 0 : size = round_page(args->size);
1944 0 : if (args->offset + size < args->offset)
1945 0 : return -EINVAL;
1946 : #endif
1947 :
1948 0 : if (args->flags & ~(I915_MMAP_WC))
1949 0 : return -EINVAL;
1950 :
1951 0 : if (args->flags & I915_MMAP_WC && !cpu_has_pat)
1952 : return -ENODEV;
1953 :
1954 0 : obj = drm_gem_object_lookup(dev, file, args->handle);
1955 0 : if (obj == NULL)
1956 0 : return -ENOENT;
1957 :
1958 : /* prime objects have no backing filp to GEM mmap
1959 : * pages from.
1960 : */
1961 0 : if (!obj->filp) {
1962 0 : drm_gem_object_unreference_unlocked(obj);
1963 0 : return -EINVAL;
1964 : }
1965 :
1966 : #ifdef __linux__
1967 : addr = vm_mmap(obj->filp, 0, args->size,
1968 : PROT_READ | PROT_WRITE, MAP_SHARED,
1969 : args->offset);
1970 : if (args->flags & I915_MMAP_WC) {
1971 : struct mm_struct *mm = current->mm;
1972 : struct vm_area_struct *vma;
1973 :
1974 : down_write(&mm->mmap_sem);
1975 : vma = find_vma(mm, addr);
1976 : if (vma)
1977 : vma->vm_page_prot =
1978 : pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1979 : else
1980 : addr = -ENOMEM;
1981 : up_write(&mm->mmap_sem);
1982 : }
1983 : drm_gem_object_unreference_unlocked(obj);
1984 : if (IS_ERR((void *)addr))
1985 : return addr;
1986 : #else
1987 0 : addr = 0;
1988 0 : ret = -uvm_map(&curproc->p_vmspace->vm_map, &addr, size,
1989 0 : obj->uao, args->offset, 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE,
1990 : PROT_READ | PROT_WRITE, MAP_INHERIT_SHARE, MADV_RANDOM, 0));
1991 0 : if (ret == 0)
1992 0 : uao_reference(obj->uao);
1993 0 : drm_gem_object_unreference_unlocked(obj);
1994 0 : if (ret)
1995 0 : return ret;
1996 : #endif
1997 :
1998 0 : args->addr_ptr = (uint64_t) addr;
1999 :
2000 0 : return 0;
2001 0 : }
2002 :
2003 : #ifdef __linux__
2004 :
2005 : /**
2006 : * i915_gem_fault - fault a page into the GTT
2007 : * @vma: VMA in question
2008 : * @vmf: fault info
2009 : *
2010 : * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
2011 : * from userspace. The fault handler takes care of binding the object to
2012 : * the GTT (if needed), allocating and programming a fence register (again,
2013 : * only if needed based on whether the old reg is still valid or the object
2014 : * is tiled) and inserting a new PTE into the faulting process.
2015 : *
2016 : * Note that the faulting process may involve evicting existing objects
2017 : * from the GTT and/or fence registers to make room. So performance may
2018 : * suffer if the GTT working set is large or there are few fence registers
2019 : * left.
2020 : */
2021 : int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
2022 : {
2023 : struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
2024 : struct drm_device *dev = obj->base.dev;
2025 : struct drm_i915_private *dev_priv = dev->dev_private;
2026 : struct i915_ggtt_view view = i915_ggtt_view_normal;
2027 : pgoff_t page_offset;
2028 : unsigned long pfn;
2029 : int ret = 0;
2030 : bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
2031 :
2032 : intel_runtime_pm_get(dev_priv);
2033 :
2034 : /* We don't use vmf->pgoff since that has the fake offset */
2035 : page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
2036 : PAGE_SHIFT;
2037 :
2038 : ret = i915_mutex_lock_interruptible(dev);
2039 : if (ret)
2040 : goto out;
2041 :
2042 : trace_i915_gem_object_fault(obj, page_offset, true, write);
2043 :
2044 : /* Try to flush the object off the GPU first without holding the lock.
2045 : * Upon reacquiring the lock, we will perform our sanity checks and then
2046 : * repeat the flush holding the lock in the normal manner to catch cases
2047 : * where we are gazumped.
2048 : */
2049 : ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
2050 : if (ret)
2051 : goto unlock;
2052 :
2053 : /* Access to snoopable pages through the GTT is incoherent. */
2054 : if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
2055 : ret = -EFAULT;
2056 : goto unlock;
2057 : }
2058 :
2059 : /* Use a partial view if the object is bigger than the aperture. */
2060 : if (obj->base.size >= dev_priv->gtt.mappable_end &&
2061 : obj->tiling_mode == I915_TILING_NONE) {
2062 : static const unsigned int chunk_size = 256; // 1 MiB
2063 :
2064 : memset(&view, 0, sizeof(view));
2065 : view.type = I915_GGTT_VIEW_PARTIAL;
2066 : view.params.partial.offset = rounddown(page_offset, chunk_size);
2067 : view.params.partial.size =
2068 : min_t(unsigned int,
2069 : chunk_size,
2070 : (vma->vm_end - vma->vm_start)/PAGE_SIZE -
2071 : view.params.partial.offset);
2072 : }
2073 :
2074 : /* Now pin it into the GTT if needed */
2075 : ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE);
2076 : if (ret)
2077 : goto unlock;
2078 :
2079 : ret = i915_gem_object_set_to_gtt_domain(obj, write);
2080 : if (ret)
2081 : goto unpin;
2082 :
2083 : ret = i915_gem_object_get_fence(obj);
2084 : if (ret)
2085 : goto unpin;
2086 :
2087 : /* Finally, remap it using the new GTT offset */
2088 : pfn = dev_priv->gtt.mappable_base +
2089 : i915_gem_obj_ggtt_offset_view(obj, &view);
2090 : pfn >>= PAGE_SHIFT;
2091 :
2092 : if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) {
2093 : /* Overriding existing pages in partial view does not cause
2094 : * us any trouble as TLBs are still valid because the fault
2095 : * is due to userspace losing part of the mapping or never
2096 : * having accessed it before (at this partials' range).
2097 : */
2098 : unsigned long base = vma->vm_start +
2099 : (view.params.partial.offset << PAGE_SHIFT);
2100 : unsigned int i;
2101 :
2102 : for (i = 0; i < view.params.partial.size; i++) {
2103 : ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i);
2104 : if (ret)
2105 : break;
2106 : }
2107 :
2108 : obj->fault_mappable = true;
2109 : } else {
2110 : if (!obj->fault_mappable) {
2111 : unsigned long size = min_t(unsigned long,
2112 : vma->vm_end - vma->vm_start,
2113 : obj->base.size);
2114 : int i;
2115 :
2116 : for (i = 0; i < size >> PAGE_SHIFT; i++) {
2117 : ret = vm_insert_pfn(vma,
2118 : (unsigned long)vma->vm_start + i * PAGE_SIZE,
2119 : pfn + i);
2120 : if (ret)
2121 : break;
2122 : }
2123 :
2124 : obj->fault_mappable = true;
2125 : } else
2126 : ret = vm_insert_pfn(vma,
2127 : (unsigned long)vmf->virtual_address,
2128 : pfn + page_offset);
2129 : }
2130 : unpin:
2131 : i915_gem_object_ggtt_unpin_view(obj, &view);
2132 : unlock:
2133 : mutex_unlock(&dev->struct_mutex);
2134 : out:
2135 : switch (ret) {
2136 : case -EIO:
2137 : /*
2138 : * We eat errors when the gpu is terminally wedged to avoid
2139 : * userspace unduly crashing (gl has no provisions for mmaps to
2140 : * fail). But any other -EIO isn't ours (e.g. swap in failure)
2141 : * and so needs to be reported.
2142 : */
2143 : if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
2144 : ret = VM_FAULT_SIGBUS;
2145 : break;
2146 : }
2147 : case -EAGAIN:
2148 : /*
2149 : * EAGAIN means the gpu is hung and we'll wait for the error
2150 : * handler to reset everything when re-faulting in
2151 : * i915_mutex_lock_interruptible.
2152 : */
2153 : case 0:
2154 : case -ERESTARTSYS:
2155 : case -EINTR:
2156 : case -EBUSY:
2157 : /*
2158 : * EBUSY is ok: this just means that another thread
2159 : * already did the job.
2160 : */
2161 : ret = VM_FAULT_NOPAGE;
2162 : break;
2163 : case -ENOMEM:
2164 : ret = VM_FAULT_OOM;
2165 : break;
2166 : case -ENOSPC:
2167 : case -EFAULT:
2168 : ret = VM_FAULT_SIGBUS;
2169 : break;
2170 : default:
2171 : WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
2172 : ret = VM_FAULT_SIGBUS;
2173 : break;
2174 : }
2175 :
2176 : intel_runtime_pm_put(dev_priv);
2177 : return ret;
2178 : }
2179 :
2180 : #else
2181 :
2182 : int
2183 0 : i915_gem_fault(struct drm_gem_object *gem_obj, struct uvm_faultinfo *ufi,
2184 : off_t offset, vaddr_t vaddr, vm_page_t *pps, int npages, int centeridx,
2185 : vm_prot_t access_type, int flags)
2186 : {
2187 0 : struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
2188 0 : struct drm_device *dev = obj->base.dev;
2189 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2190 0 : struct i915_ggtt_view view = i915_ggtt_view_normal;
2191 : paddr_t paddr;
2192 : int lcv, ret = 0;
2193 0 : int write = !!(access_type & PROT_WRITE);
2194 : vm_prot_t mapprot;
2195 : boolean_t locked = TRUE;
2196 :
2197 0 : intel_runtime_pm_get(dev_priv);
2198 :
2199 : /*
2200 : * If we already own the lock, we must be doing a copyin or
2201 : * copyout in one of the fast paths. Return failure such that
2202 : * we fall back on the slow path.
2203 : */
2204 0 : if (!drm_vma_node_has_offset(&obj->base.vma_node) ||
2205 0 : RWLOCK_OWNER(&dev->struct_mutex) == curproc) {
2206 0 : uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap,
2207 0 : &obj->base.uobj, NULL);
2208 : ret = VM_PAGER_BAD;
2209 0 : goto out;
2210 : }
2211 :
2212 0 : offset -= drm_vma_node_offset_addr(&obj->base.vma_node);
2213 :
2214 0 : if (!mutex_trylock(&dev->struct_mutex)) {
2215 0 : uvmfault_unlockall(ufi, NULL, &obj->base.uobj, NULL);
2216 0 : mutex_lock(&dev->struct_mutex);
2217 0 : locked = uvmfault_relock(ufi);
2218 0 : }
2219 0 : if (!locked) {
2220 0 : mutex_unlock(&dev->struct_mutex);
2221 : ret = VM_PAGER_REFAULT;
2222 0 : goto out;
2223 : }
2224 :
2225 : /* Try to flush the object off the GPU first without holding the lock.
2226 : * Upon reacquiring the lock, we will perform our sanity checks and then
2227 : * repeat the flush holding the lock in the normal manner to catch cases
2228 : * where we are gazumped.
2229 : */
2230 0 : ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
2231 0 : if (ret)
2232 : goto unlock;
2233 :
2234 : /* Access to snoopable pages through the GTT is incoherent. */
2235 0 : if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
2236 : ret = -EINVAL;
2237 0 : goto unlock;
2238 : }
2239 :
2240 : /* Now bind it into the GTT if needed */
2241 0 : ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE);
2242 0 : if (ret)
2243 : goto unlock;
2244 :
2245 0 : ret = i915_gem_object_set_to_gtt_domain(obj, write);
2246 0 : if (ret)
2247 : goto unpin;
2248 :
2249 0 : ret = i915_gem_object_get_fence(obj);
2250 0 : if (ret)
2251 : goto unpin;
2252 :
2253 0 : obj->fault_mappable = true;
2254 :
2255 0 : mapprot = ufi->entry->protection;
2256 : /*
2257 : * if it's only a read fault, we only put ourselves into the gtt
2258 : * read domain, so make sure we fault again and set ourselves to write.
2259 : * this prevents us needing userland to do domain management and get
2260 : * it wrong, and makes us fully coherent with the gpu re mmap.
2261 : */
2262 0 : if (write == 0)
2263 0 : mapprot &= ~PROT_WRITE;
2264 : /* XXX try and be more efficient when we do this */
2265 0 : for (lcv = 0 ; lcv < npages ; lcv++, offset += PAGE_SIZE,
2266 0 : vaddr += PAGE_SIZE) {
2267 0 : if ((flags & PGO_ALLPAGES) == 0 && lcv != centeridx)
2268 : continue;
2269 :
2270 0 : if (pps[lcv] == PGO_DONTCARE)
2271 : continue;
2272 :
2273 0 : paddr = dev_priv->gtt.mappable_base +
2274 0 : i915_gem_obj_ggtt_offset(obj) + offset;
2275 :
2276 0 : if (pmap_enter(ufi->orig_map->pmap, vaddr, paddr,
2277 0 : mapprot, PMAP_CANFAIL | mapprot) != 0) {
2278 0 : i915_gem_object_ggtt_unpin_view(obj, &view);
2279 0 : uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap,
2280 : NULL, NULL);
2281 0 : mutex_unlock(&dev->struct_mutex);
2282 : pmap_update(ufi->orig_map->pmap);
2283 0 : uvm_wait("intelflt");
2284 : ret = VM_PAGER_REFAULT;
2285 0 : goto out;
2286 : }
2287 : }
2288 : unpin:
2289 0 : i915_gem_object_ggtt_unpin_view(obj, &view);
2290 : unlock:
2291 0 : uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, NULL, NULL);
2292 0 : mutex_unlock(&dev->struct_mutex);
2293 : pmap_update(ufi->orig_map->pmap);
2294 :
2295 0 : switch (ret) {
2296 : case -EIO:
2297 : /*
2298 : * We eat errors when the gpu is terminally wedged to avoid
2299 : * userspace unduly crashing (gl has no provisions for mmaps to
2300 : * fail). But any other -EIO isn't ours (e.g. swap in failure)
2301 : * and so needs to be reported.
2302 : */
2303 0 : if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
2304 : ret = VM_PAGER_ERROR;
2305 0 : break;
2306 : }
2307 : case -EAGAIN:
2308 : /*
2309 : * EAGAIN means the gpu is hung and we'll wait for the error
2310 : * handler to reset everything when re-faulting in
2311 : * i915_mutex_lock_interruptible.
2312 : */
2313 : case 0:
2314 : case -ERESTART:
2315 : case -EINTR:
2316 : case -EBUSY:
2317 : /*
2318 : * EBUSY is ok: this just means that another thread
2319 : * already did the job.
2320 : */
2321 : ret = VM_PAGER_OK;
2322 0 : break;
2323 : case -ENOMEM:
2324 : ret = VM_PAGER_ERROR;
2325 0 : break;
2326 : case -ENOSPC:
2327 : case -EFAULT:
2328 : ret = VM_PAGER_ERROR;
2329 0 : break;
2330 : default:
2331 0 : WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
2332 : ret = VM_PAGER_ERROR;
2333 0 : break;
2334 : }
2335 :
2336 : out:
2337 0 : intel_runtime_pm_put(dev_priv);
2338 0 : return ret;
2339 0 : }
2340 :
2341 : #endif
2342 :
2343 : /**
2344 : * i915_gem_release_mmap - remove physical page mappings
2345 : * @obj: obj in question
2346 : *
2347 : * Preserve the reservation of the mmapping with the DRM core code, but
2348 : * relinquish ownership of the pages back to the system.
2349 : *
2350 : * It is vital that we remove the page mapping if we have mapped a tiled
2351 : * object through the GTT and then lose the fence register due to
2352 : * resource pressure. Similarly if the object has been moved out of the
2353 : * aperture, than pages mapped into userspace must be revoked. Removing the
2354 : * mapping will then trigger a page fault on the next user access, allowing
2355 : * fixup by i915_gem_fault().
2356 : */
2357 : void
2358 0 : i915_gem_release_mmap(struct drm_i915_gem_object *obj)
2359 : {
2360 0 : if (!obj->fault_mappable)
2361 : return;
2362 :
2363 : #ifdef __linux__
2364 : drm_vma_node_unmap(&obj->base.vma_node,
2365 : obj->base.dev->anon_inode->i_mapping);
2366 : #else
2367 0 : if (drm_vma_node_has_offset(&obj->base.vma_node)) {
2368 0 : struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2369 : struct vm_page *pg;
2370 :
2371 0 : for (pg = &dev_priv->pgs[atop(i915_gem_obj_ggtt_offset(obj))];
2372 0 : pg != &dev_priv->pgs[atop(i915_gem_obj_ggtt_offset(obj) + obj->base.size)];
2373 0 : pg++)
2374 0 : pmap_page_protect(pg, PROT_NONE);
2375 0 : }
2376 : #endif
2377 0 : obj->fault_mappable = false;
2378 0 : }
2379 :
2380 : void
2381 0 : i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
2382 : {
2383 : struct drm_i915_gem_object *obj;
2384 :
2385 0 : list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
2386 0 : i915_gem_release_mmap(obj);
2387 0 : }
2388 :
2389 : uint32_t
2390 0 : i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
2391 : {
2392 : uint32_t gtt_size;
2393 :
2394 0 : if (INTEL_INFO(dev)->gen >= 4 ||
2395 0 : tiling_mode == I915_TILING_NONE)
2396 0 : return size;
2397 :
2398 : /* Previous chips need a power-of-two fence region when tiling */
2399 0 : if (INTEL_INFO(dev)->gen == 3)
2400 0 : gtt_size = 1024*1024;
2401 : else
2402 : gtt_size = 512*1024;
2403 :
2404 0 : while (gtt_size < size)
2405 0 : gtt_size <<= 1;
2406 :
2407 0 : return gtt_size;
2408 0 : }
2409 :
2410 : /**
2411 : * i915_gem_get_gtt_alignment - return required GTT alignment for an object
2412 : * @obj: object to check
2413 : *
2414 : * Return the required GTT alignment for an object, taking into account
2415 : * potential fence register mapping.
2416 : */
2417 : uint32_t
2418 0 : i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
2419 : int tiling_mode, bool fenced)
2420 : {
2421 : /*
2422 : * Minimum alignment is 4k (GTT page size), but might be greater
2423 : * if a fence register is needed for the object.
2424 : */
2425 0 : if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
2426 0 : tiling_mode == I915_TILING_NONE)
2427 0 : return 4096;
2428 :
2429 : /*
2430 : * Previous chips need to be aligned to the size of the smallest
2431 : * fence register that can contain the object.
2432 : */
2433 0 : return i915_gem_get_gtt_size(dev, size, tiling_mode);
2434 0 : }
2435 :
2436 0 : static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2437 : {
2438 : #ifdef notyet
2439 : struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2440 : #endif
2441 : int ret;
2442 :
2443 0 : if (drm_vma_node_has_offset(&obj->base.vma_node))
2444 0 : return 0;
2445 :
2446 : #ifdef notyet
2447 : dev_priv->mm.shrinker_no_lock_stealing = true;
2448 : #endif
2449 :
2450 0 : ret = drm_gem_create_mmap_offset(&obj->base);
2451 : #ifdef notyet
2452 : if (ret != -ENOSPC)
2453 : goto out;
2454 :
2455 : /* Badly fragmented mmap space? The only way we can recover
2456 : * space is by destroying unwanted objects. We can't randomly release
2457 : * mmap_offsets as userspace expects them to be persistent for the
2458 : * lifetime of the objects. The closest we can is to release the
2459 : * offsets on purgeable objects by truncating it and marking it purged,
2460 : * which prevents userspace from ever using that object again.
2461 : */
2462 : i915_gem_shrink(dev_priv,
2463 : obj->base.size >> PAGE_SHIFT,
2464 : I915_SHRINK_BOUND |
2465 : I915_SHRINK_UNBOUND |
2466 : I915_SHRINK_PURGEABLE);
2467 : ret = drm_gem_create_mmap_offset(&obj->base);
2468 : if (ret != -ENOSPC)
2469 : goto out;
2470 :
2471 : i915_gem_shrink_all(dev_priv);
2472 : ret = drm_gem_create_mmap_offset(&obj->base);
2473 : out:
2474 : dev_priv->mm.shrinker_no_lock_stealing = false;
2475 : #endif
2476 :
2477 0 : return ret;
2478 0 : }
2479 :
2480 0 : static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2481 : {
2482 0 : drm_gem_free_mmap_offset(&obj->base);
2483 0 : }
2484 :
2485 : int
2486 0 : i915_gem_mmap_gtt(struct drm_file *file,
2487 : struct drm_device *dev,
2488 : uint32_t handle,
2489 : uint64_t *offset)
2490 : {
2491 : struct drm_i915_gem_object *obj;
2492 : int ret;
2493 :
2494 0 : ret = i915_mutex_lock_interruptible(dev);
2495 0 : if (ret)
2496 0 : return ret;
2497 :
2498 0 : obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
2499 0 : if (&obj->base == NULL) {
2500 : ret = -ENOENT;
2501 0 : goto unlock;
2502 : }
2503 :
2504 0 : if (obj->madv != I915_MADV_WILLNEED) {
2505 : DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
2506 : ret = -EFAULT;
2507 0 : goto out;
2508 : }
2509 :
2510 0 : ret = i915_gem_object_create_mmap_offset(obj);
2511 0 : if (ret)
2512 : goto out;
2513 :
2514 0 : *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2515 :
2516 : out:
2517 0 : drm_gem_object_unreference(&obj->base);
2518 : unlock:
2519 0 : mutex_unlock(&dev->struct_mutex);
2520 0 : return ret;
2521 0 : }
2522 :
2523 : /**
2524 : * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2525 : * @dev: DRM device
2526 : * @data: GTT mapping ioctl data
2527 : * @file: GEM object info
2528 : *
2529 : * Simply returns the fake offset to userspace so it can mmap it.
2530 : * The mmap call will end up in drm_gem_mmap(), which will set things
2531 : * up so we can get faults in the handler above.
2532 : *
2533 : * The fault handler will take care of binding the object into the GTT
2534 : * (since it may have been evicted to make room for something), allocating
2535 : * a fence register, and mapping the appropriate aperture address into
2536 : * userspace.
2537 : */
2538 : int
2539 0 : i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2540 : struct drm_file *file)
2541 : {
2542 0 : struct drm_i915_gem_mmap_gtt *args = data;
2543 :
2544 0 : return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2545 : }
2546 :
2547 : /* Immediately discard the backing storage */
2548 : static void
2549 0 : i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2550 : {
2551 0 : i915_gem_object_free_mmap_offset(obj);
2552 :
2553 0 : if (obj->base.filp == NULL)
2554 : return;
2555 :
2556 : /* Our goal here is to return as much of the memory as
2557 : * is possible back to the system as we are called from OOM.
2558 : * To do this we must instruct the shmfs to drop all of its
2559 : * backing pages, *now*.
2560 : */
2561 : #ifdef __linux__
2562 : shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
2563 : #else
2564 0 : obj->base.uao->pgops->pgo_flush(obj->base.uao, 0, obj->base.size,
2565 : PGO_ALLPAGES | PGO_FREE);
2566 : #endif
2567 0 : obj->madv = __I915_MADV_PURGED;
2568 0 : }
2569 :
2570 : /* Try to discard unwanted pages */
2571 : static void
2572 0 : i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
2573 : {
2574 : #ifdef __linux__
2575 : struct address_space *mapping;
2576 : #endif
2577 :
2578 0 : switch (obj->madv) {
2579 : case I915_MADV_DONTNEED:
2580 0 : i915_gem_object_truncate(obj);
2581 : case __I915_MADV_PURGED:
2582 : return;
2583 : }
2584 :
2585 0 : if (obj->base.filp == NULL)
2586 : return;
2587 :
2588 : #ifdef __linux__
2589 : mapping = file_inode(obj->base.filp)->i_mapping,
2590 : invalidate_mapping_pages(mapping, 0, (loff_t)-1);
2591 : #endif
2592 0 : }
2593 :
2594 : static void
2595 0 : i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
2596 : {
2597 0 : struct sg_page_iter sg_iter;
2598 : int ret;
2599 :
2600 0 : BUG_ON(obj->madv == __I915_MADV_PURGED);
2601 :
2602 0 : ret = i915_gem_object_set_to_cpu_domain(obj, true);
2603 0 : if (ret) {
2604 : /* In the event of a disaster, abandon all caches and
2605 : * hope for the best.
2606 : */
2607 0 : WARN_ON(ret != -EIO);
2608 0 : i915_gem_clflush_object(obj, true);
2609 0 : obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2610 0 : }
2611 :
2612 0 : i915_gem_gtt_finish_object(obj);
2613 :
2614 0 : if (i915_gem_object_needs_bit17_swizzle(obj))
2615 0 : i915_gem_object_save_bit_17_swizzle(obj);
2616 :
2617 0 : if (obj->madv == I915_MADV_DONTNEED)
2618 0 : obj->dirty = 0;
2619 :
2620 0 : for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
2621 0 : struct vm_page *page = sg_page_iter_page(&sg_iter);
2622 :
2623 0 : if (obj->dirty)
2624 0 : set_page_dirty(page);
2625 :
2626 : #ifdef __linux__
2627 : if (obj->madv == I915_MADV_WILLNEED)
2628 : mark_page_accessed(page);
2629 :
2630 : page_cache_release(page);
2631 : #endif
2632 : }
2633 : #ifdef __OpenBSD__
2634 0 : uvm_objunwire(obj->base.uao, 0, obj->base.size);
2635 : #endif
2636 0 : obj->dirty = 0;
2637 :
2638 0 : sg_free_table(obj->pages);
2639 0 : kfree(obj->pages);
2640 0 : }
2641 :
2642 : int
2643 0 : i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
2644 : {
2645 0 : const struct drm_i915_gem_object_ops *ops = obj->ops;
2646 :
2647 0 : if (obj->pages == NULL)
2648 0 : return 0;
2649 :
2650 0 : if (obj->pages_pin_count)
2651 0 : return -EBUSY;
2652 :
2653 0 : BUG_ON(i915_gem_obj_bound_any(obj));
2654 :
2655 : /* ->put_pages might need to allocate memory for the bit17 swizzle
2656 : * array, hence protect them from being reaped by removing them from gtt
2657 : * lists early. */
2658 0 : list_del(&obj->global_list);
2659 :
2660 0 : ops->put_pages(obj);
2661 0 : obj->pages = NULL;
2662 :
2663 0 : i915_gem_object_invalidate(obj);
2664 :
2665 0 : return 0;
2666 0 : }
2667 :
2668 : static int
2669 0 : i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2670 : {
2671 0 : struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2672 : int page_count, i;
2673 : #ifdef __linux__
2674 : struct address_space *mapping;
2675 : #endif
2676 : struct sg_table *st;
2677 : struct scatterlist *sg;
2678 : #ifdef __linux__
2679 : struct sg_page_iter sg_iter;
2680 : #endif
2681 0 : struct pglist plist;
2682 : struct vm_page *page;
2683 : #ifdef __linux__
2684 : unsigned long last_pfn = 0; /* suppress gcc warning */
2685 : #endif
2686 : int ret;
2687 : #ifdef __linux__
2688 : gfp_t gfp;
2689 : #endif
2690 :
2691 : /* Assert that the object is not currently in any GPU domain. As it
2692 : * wasn't in the GTT, there shouldn't be any way it could have been in
2693 : * a GPU cache
2694 : */
2695 0 : BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2696 0 : BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2697 :
2698 0 : st = kmalloc(sizeof(*st), GFP_KERNEL);
2699 0 : if (st == NULL)
2700 0 : return -ENOMEM;
2701 :
2702 0 : page_count = obj->base.size / PAGE_SIZE;
2703 0 : if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2704 0 : kfree(st);
2705 0 : return -ENOMEM;
2706 : }
2707 :
2708 : #ifdef __linux__
2709 : /* Get the list of pages out of our struct file. They'll be pinned
2710 : * at this point until we release them.
2711 : *
2712 : * Fail silently without starting the shrinker
2713 : */
2714 : mapping = file_inode(obj->base.filp)->i_mapping;
2715 : gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
2716 : gfp |= __GFP_NORETRY | __GFP_NOWARN;
2717 : sg = st->sgl;
2718 : st->nents = 0;
2719 : for (i = 0; i < page_count; i++) {
2720 : page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2721 : if (IS_ERR(page)) {
2722 : i915_gem_shrink(dev_priv,
2723 : page_count,
2724 : I915_SHRINK_BOUND |
2725 : I915_SHRINK_UNBOUND |
2726 : I915_SHRINK_PURGEABLE);
2727 : page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2728 : }
2729 : if (IS_ERR(page)) {
2730 : /* We've tried hard to allocate the memory by reaping
2731 : * our own buffer, now let the real VM do its job and
2732 : * go down in flames if truly OOM.
2733 : */
2734 : i915_gem_shrink_all(dev_priv);
2735 : page = shmem_read_mapping_page(mapping, i);
2736 : if (IS_ERR(page)) {
2737 : ret = PTR_ERR(page);
2738 : goto err_pages;
2739 : }
2740 : }
2741 : #ifdef CONFIG_SWIOTLB
2742 : if (swiotlb_nr_tbl()) {
2743 : st->nents++;
2744 : sg_set_page(sg, page, PAGE_SIZE, 0);
2745 : sg = sg_next(sg);
2746 : continue;
2747 : }
2748 : #endif
2749 : if (!i || page_to_pfn(page) != last_pfn + 1) {
2750 : if (i)
2751 : sg = sg_next(sg);
2752 : st->nents++;
2753 : sg_set_page(sg, page, PAGE_SIZE, 0);
2754 : } else {
2755 : sg->length += PAGE_SIZE;
2756 : }
2757 : last_pfn = page_to_pfn(page);
2758 :
2759 : /* Check that the i965g/gm workaround works. */
2760 : WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2761 : }
2762 : #ifdef CONFIG_SWIOTLB
2763 : if (!swiotlb_nr_tbl())
2764 : #endif
2765 : sg_mark_end(sg);
2766 : #else
2767 0 : sg = st->sgl;
2768 0 : st->nents = 0;
2769 :
2770 0 : TAILQ_INIT(&plist);
2771 0 : if (uvm_objwire(obj->base.uao, 0, obj->base.size, &plist)) {
2772 : ret = -ENOMEM;
2773 0 : goto err_pages;
2774 : }
2775 :
2776 : i = 0;
2777 0 : TAILQ_FOREACH(page, &plist, pageq) {
2778 0 : st->nents++;
2779 0 : sg_dma_address(sg) = VM_PAGE_TO_PHYS(page);
2780 0 : sg_dma_len(sg) = PAGE_SIZE;
2781 0 : sg++;
2782 0 : i++;
2783 : }
2784 : #endif
2785 0 : obj->pages = st;
2786 :
2787 0 : ret = i915_gem_gtt_prepare_object(obj);
2788 0 : if (ret)
2789 : goto err_pages;
2790 :
2791 0 : if (i915_gem_object_needs_bit17_swizzle(obj))
2792 0 : i915_gem_object_do_bit_17_swizzle(obj);
2793 :
2794 0 : if (obj->tiling_mode != I915_TILING_NONE &&
2795 0 : dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
2796 0 : i915_gem_object_pin_pages(obj);
2797 :
2798 0 : return 0;
2799 :
2800 : err_pages:
2801 : sg_mark_end(sg);
2802 : #ifdef __linux__
2803 : for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
2804 : page_cache_release(sg_page_iter_page(&sg_iter));
2805 : #else
2806 0 : uvm_objunwire(obj->base.uao, 0, obj->base.size);
2807 : #endif
2808 0 : sg_free_table(st);
2809 0 : kfree(st);
2810 :
2811 : /* shmemfs first checks if there is enough memory to allocate the page
2812 : * and reports ENOSPC should there be insufficient, along with the usual
2813 : * ENOMEM for a genuine allocation failure.
2814 : *
2815 : * We use ENOSPC in our driver to mean that we have run out of aperture
2816 : * space and so want to translate the error from shmemfs back to our
2817 : * usual understanding of ENOMEM.
2818 : */
2819 0 : if (ret == -ENOSPC)
2820 0 : ret = -ENOMEM;
2821 :
2822 0 : return ret;
2823 0 : }
2824 :
2825 : /* Ensure that the associated pages are gathered from the backing storage
2826 : * and pinned into our object. i915_gem_object_get_pages() may be called
2827 : * multiple times before they are released by a single call to
2828 : * i915_gem_object_put_pages() - once the pages are no longer referenced
2829 : * either as a result of memory pressure (reaping pages under the shrinker)
2830 : * or as the object is itself released.
2831 : */
2832 : int
2833 0 : i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2834 : {
2835 0 : struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2836 0 : const struct drm_i915_gem_object_ops *ops = obj->ops;
2837 : int ret;
2838 :
2839 0 : if (obj->pages)
2840 0 : return 0;
2841 :
2842 0 : if (obj->madv != I915_MADV_WILLNEED) {
2843 : DRM_DEBUG("Attempting to obtain a purgeable object\n");
2844 0 : return -EFAULT;
2845 : }
2846 :
2847 0 : BUG_ON(obj->pages_pin_count);
2848 :
2849 0 : ret = ops->get_pages(obj);
2850 0 : if (ret)
2851 0 : return ret;
2852 :
2853 0 : list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
2854 :
2855 0 : obj->get_page.sg = obj->pages->sgl;
2856 0 : obj->get_page.last = 0;
2857 :
2858 0 : return 0;
2859 0 : }
2860 :
2861 0 : void i915_vma_move_to_active(struct i915_vma *vma,
2862 : struct drm_i915_gem_request *req)
2863 : {
2864 0 : struct drm_i915_gem_object *obj = vma->obj;
2865 : struct intel_engine_cs *ring;
2866 :
2867 0 : ring = i915_gem_request_get_ring(req);
2868 :
2869 : /* Add a reference if we're newly entering the active list. */
2870 0 : if (obj->active == 0)
2871 0 : drm_gem_object_reference(&obj->base);
2872 0 : obj->active |= intel_ring_flag(ring);
2873 :
2874 0 : list_move_tail(&obj->ring_list[ring->id], &ring->active_list);
2875 0 : i915_gem_request_assign(&obj->last_read_req[ring->id], req);
2876 :
2877 0 : list_move_tail(&vma->mm_list, &vma->vm->active_list);
2878 0 : }
2879 :
2880 : static void
2881 0 : i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
2882 : {
2883 : RQ_BUG_ON(obj->last_write_req == NULL);
2884 : RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring)));
2885 :
2886 0 : i915_gem_request_assign(&obj->last_write_req, NULL);
2887 0 : intel_fb_obj_flush(obj, true, ORIGIN_CS);
2888 0 : }
2889 :
2890 : static void
2891 0 : i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
2892 : {
2893 : struct i915_vma *vma;
2894 :
2895 : RQ_BUG_ON(obj->last_read_req[ring] == NULL);
2896 : RQ_BUG_ON(!(obj->active & (1 << ring)));
2897 :
2898 0 : list_del_init(&obj->ring_list[ring]);
2899 0 : i915_gem_request_assign(&obj->last_read_req[ring], NULL);
2900 :
2901 0 : if (obj->last_write_req && obj->last_write_req->ring->id == ring)
2902 0 : i915_gem_object_retire__write(obj);
2903 :
2904 0 : obj->active &= ~(1 << ring);
2905 0 : if (obj->active)
2906 0 : return;
2907 :
2908 : /* Bump our place on the bound list to keep it roughly in LRU order
2909 : * so that we don't steal from recently used but inactive objects
2910 : * (unless we are forced to ofc!)
2911 : */
2912 0 : list_move_tail(&obj->global_list,
2913 0 : &to_i915(obj->base.dev)->mm.bound_list);
2914 :
2915 0 : list_for_each_entry(vma, &obj->vma_list, vma_link) {
2916 0 : if (!list_empty(&vma->mm_list))
2917 0 : list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
2918 : }
2919 :
2920 0 : i915_gem_request_assign(&obj->last_fenced_req, NULL);
2921 0 : drm_gem_object_unreference(&obj->base);
2922 0 : }
2923 :
2924 : static int
2925 0 : i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
2926 : {
2927 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2928 : struct intel_engine_cs *ring;
2929 : int ret, i, j;
2930 :
2931 : /* Carefully retire all requests without writing to the rings */
2932 0 : for_each_ring(ring, dev_priv, i) {
2933 0 : ret = intel_ring_idle(ring);
2934 0 : if (ret)
2935 0 : return ret;
2936 : }
2937 0 : i915_gem_retire_requests(dev);
2938 :
2939 : /* Finally reset hw state */
2940 0 : for_each_ring(ring, dev_priv, i) {
2941 0 : intel_ring_init_seqno(ring, seqno);
2942 :
2943 0 : for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++)
2944 0 : ring->semaphore.sync_seqno[j] = 0;
2945 : }
2946 :
2947 0 : return 0;
2948 0 : }
2949 :
2950 0 : int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
2951 : {
2952 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2953 : int ret;
2954 :
2955 0 : if (seqno == 0)
2956 0 : return -EINVAL;
2957 :
2958 : /* HWS page needs to be set less than what we
2959 : * will inject to ring
2960 : */
2961 0 : ret = i915_gem_init_seqno(dev, seqno - 1);
2962 0 : if (ret)
2963 0 : return ret;
2964 :
2965 : /* Carefully set the last_seqno value so that wrap
2966 : * detection still works
2967 : */
2968 0 : dev_priv->next_seqno = seqno;
2969 0 : dev_priv->last_seqno = seqno - 1;
2970 0 : if (dev_priv->last_seqno == 0)
2971 0 : dev_priv->last_seqno--;
2972 :
2973 0 : return 0;
2974 0 : }
2975 :
2976 : int
2977 0 : i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
2978 : {
2979 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2980 :
2981 : /* reserve 0 for non-seqno */
2982 0 : if (dev_priv->next_seqno == 0) {
2983 0 : int ret = i915_gem_init_seqno(dev, 0);
2984 0 : if (ret)
2985 0 : return ret;
2986 :
2987 0 : dev_priv->next_seqno = 1;
2988 0 : }
2989 :
2990 0 : *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
2991 0 : return 0;
2992 0 : }
2993 :
2994 : /*
2995 : * NB: This function is not allowed to fail. Doing so would mean the the
2996 : * request is not being tracked for completion but the work itself is
2997 : * going to happen on the hardware. This would be a Bad Thing(tm).
2998 : */
2999 0 : void __i915_add_request(struct drm_i915_gem_request *request,
3000 : struct drm_i915_gem_object *obj,
3001 : bool flush_caches)
3002 : {
3003 : struct intel_engine_cs *ring;
3004 : struct drm_i915_private *dev_priv;
3005 : struct intel_ringbuffer *ringbuf;
3006 : u32 request_start;
3007 : int ret;
3008 :
3009 0 : if (WARN_ON(request == NULL))
3010 0 : return;
3011 :
3012 0 : ring = request->ring;
3013 0 : dev_priv = ring->dev->dev_private;
3014 0 : ringbuf = request->ringbuf;
3015 :
3016 : /*
3017 : * To ensure that this call will not fail, space for its emissions
3018 : * should already have been reserved in the ring buffer. Let the ring
3019 : * know that it is time to use that space up.
3020 : */
3021 0 : intel_ring_reserved_space_use(ringbuf);
3022 :
3023 0 : request_start = intel_ring_get_tail(ringbuf);
3024 : /*
3025 : * Emit any outstanding flushes - execbuf can fail to emit the flush
3026 : * after having emitted the batchbuffer command. Hence we need to fix
3027 : * things up similar to emitting the lazy request. The difference here
3028 : * is that the flush _must_ happen before the next request, no matter
3029 : * what.
3030 : */
3031 0 : if (flush_caches) {
3032 0 : if (i915.enable_execlists)
3033 0 : ret = logical_ring_flush_all_caches(request);
3034 : else
3035 0 : ret = intel_ring_flush_all_caches(request);
3036 : /* Not allowed to fail! */
3037 0 : WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
3038 0 : }
3039 :
3040 : /* Record the position of the start of the request so that
3041 : * should we detect the updated seqno part-way through the
3042 : * GPU processing the request, we never over-estimate the
3043 : * position of the head.
3044 : */
3045 0 : request->postfix = intel_ring_get_tail(ringbuf);
3046 :
3047 0 : if (i915.enable_execlists)
3048 0 : ret = ring->emit_request(request);
3049 : else {
3050 0 : ret = ring->add_request(request);
3051 :
3052 0 : request->tail = intel_ring_get_tail(ringbuf);
3053 : }
3054 : /* Not allowed to fail! */
3055 0 : WARN(ret, "emit|add_request failed: %d!\n", ret);
3056 :
3057 0 : request->head = request_start;
3058 :
3059 : /* Whilst this request exists, batch_obj will be on the
3060 : * active_list, and so will hold the active reference. Only when this
3061 : * request is retired will the the batch_obj be moved onto the
3062 : * inactive_list and lose its active reference. Hence we do not need
3063 : * to explicitly hold another reference here.
3064 : */
3065 0 : request->batch_obj = obj;
3066 :
3067 0 : request->emitted_jiffies = jiffies;
3068 0 : request->previous_seqno = ring->last_submitted_seqno;
3069 0 : ring->last_submitted_seqno = request->seqno;
3070 0 : list_add_tail(&request->list, &ring->request_list);
3071 :
3072 0 : trace_i915_gem_request_add(request);
3073 :
3074 0 : i915_queue_hangcheck(ring->dev);
3075 :
3076 0 : queue_delayed_work(dev_priv->wq,
3077 0 : &dev_priv->mm.retire_work,
3078 0 : round_jiffies_up_relative(HZ));
3079 0 : intel_mark_busy(dev_priv->dev);
3080 :
3081 : /* Sanity check that the reserved size was large enough. */
3082 0 : intel_ring_reserved_space_end(ringbuf);
3083 0 : }
3084 :
3085 0 : static bool i915_context_is_banned(struct drm_i915_private *dev_priv,
3086 : const struct intel_context *ctx)
3087 : {
3088 : unsigned long elapsed;
3089 :
3090 0 : elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
3091 :
3092 0 : if (ctx->hang_stats.banned)
3093 0 : return true;
3094 :
3095 0 : if (ctx->hang_stats.ban_period_seconds &&
3096 0 : elapsed <= ctx->hang_stats.ban_period_seconds) {
3097 0 : if (!i915_gem_context_is_default(ctx)) {
3098 : DRM_DEBUG("context hanging too fast, banning!\n");
3099 0 : return true;
3100 0 : } else if (i915_stop_ring_allow_ban(dev_priv)) {
3101 0 : if (i915_stop_ring_allow_warn(dev_priv))
3102 0 : DRM_ERROR("gpu hanging too fast, banning!\n");
3103 0 : return true;
3104 : }
3105 : }
3106 :
3107 0 : return false;
3108 0 : }
3109 :
3110 0 : static void i915_set_reset_status(struct drm_i915_private *dev_priv,
3111 : struct intel_context *ctx,
3112 : const bool guilty)
3113 : {
3114 : struct i915_ctx_hang_stats *hs;
3115 :
3116 0 : if (WARN_ON(!ctx))
3117 0 : return;
3118 :
3119 0 : hs = &ctx->hang_stats;
3120 :
3121 0 : if (guilty) {
3122 0 : hs->banned = i915_context_is_banned(dev_priv, ctx);
3123 0 : hs->batch_active++;
3124 0 : hs->guilty_ts = get_seconds();
3125 0 : } else {
3126 0 : hs->batch_pending++;
3127 : }
3128 0 : }
3129 :
3130 0 : void i915_gem_request_free(struct kref *req_ref)
3131 : {
3132 0 : struct drm_i915_gem_request *req = container_of(req_ref,
3133 : typeof(*req), ref);
3134 0 : struct intel_context *ctx = req->ctx;
3135 :
3136 0 : if (req->file_priv)
3137 0 : i915_gem_request_remove_from_client(req);
3138 :
3139 0 : if (ctx) {
3140 0 : if (i915.enable_execlists) {
3141 0 : if (ctx != req->ring->default_context)
3142 0 : intel_lr_context_unpin(req);
3143 : }
3144 :
3145 0 : i915_gem_context_unreference(ctx);
3146 0 : }
3147 :
3148 : #ifdef __linux__
3149 : kmem_cache_free(req->i915->requests, req);
3150 : #else
3151 0 : pool_put(&req->i915->requests, req);
3152 : #endif
3153 0 : }
3154 :
3155 0 : int i915_gem_request_alloc(struct intel_engine_cs *ring,
3156 : struct intel_context *ctx,
3157 : struct drm_i915_gem_request **req_out)
3158 : {
3159 0 : struct drm_i915_private *dev_priv = to_i915(ring->dev);
3160 : struct drm_i915_gem_request *req;
3161 : int ret;
3162 :
3163 0 : if (!req_out)
3164 0 : return -EINVAL;
3165 :
3166 0 : *req_out = NULL;
3167 :
3168 : #ifdef __linux__
3169 : req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
3170 : #else
3171 0 : req = pool_get(&dev_priv->requests, PR_WAITOK | PR_ZERO);
3172 : #endif
3173 0 : if (req == NULL)
3174 0 : return -ENOMEM;
3175 :
3176 0 : ret = i915_gem_get_seqno(ring->dev, &req->seqno);
3177 0 : if (ret)
3178 : goto err;
3179 :
3180 0 : kref_init(&req->ref);
3181 0 : req->i915 = dev_priv;
3182 0 : req->ring = ring;
3183 0 : req->ctx = ctx;
3184 0 : i915_gem_context_reference(req->ctx);
3185 :
3186 0 : if (i915.enable_execlists)
3187 0 : ret = intel_logical_ring_alloc_request_extras(req);
3188 : else
3189 0 : ret = intel_ring_alloc_request_extras(req);
3190 0 : if (ret) {
3191 0 : i915_gem_context_unreference(req->ctx);
3192 0 : goto err;
3193 : }
3194 :
3195 : /*
3196 : * Reserve space in the ring buffer for all the commands required to
3197 : * eventually emit this request. This is to guarantee that the
3198 : * i915_add_request() call can't fail. Note that the reserve may need
3199 : * to be redone if the request is not actually submitted straight
3200 : * away, e.g. because a GPU scheduler has deferred it.
3201 : */
3202 0 : if (i915.enable_execlists)
3203 0 : ret = intel_logical_ring_reserve_space(req);
3204 : else
3205 0 : ret = intel_ring_reserve_space(req);
3206 0 : if (ret) {
3207 : /*
3208 : * At this point, the request is fully allocated even if not
3209 : * fully prepared. Thus it can be cleaned up using the proper
3210 : * free code.
3211 : */
3212 0 : i915_gem_request_cancel(req);
3213 0 : return ret;
3214 : }
3215 :
3216 0 : *req_out = req;
3217 0 : return 0;
3218 :
3219 : err:
3220 : #ifdef __linux__
3221 : kmem_cache_free(dev_priv->requests, req);
3222 : #else
3223 0 : pool_put(&dev_priv->requests, req);
3224 : #endif
3225 0 : return ret;
3226 0 : }
3227 :
3228 0 : void i915_gem_request_cancel(struct drm_i915_gem_request *req)
3229 : {
3230 0 : intel_ring_reserved_space_cancel(req->ringbuf);
3231 :
3232 0 : i915_gem_request_unreference(req);
3233 0 : }
3234 :
3235 : struct drm_i915_gem_request *
3236 0 : i915_gem_find_active_request(struct intel_engine_cs *ring)
3237 : {
3238 : struct drm_i915_gem_request *request;
3239 :
3240 0 : list_for_each_entry(request, &ring->request_list, list) {
3241 0 : if (i915_gem_request_completed(request, false))
3242 : continue;
3243 :
3244 0 : return request;
3245 : }
3246 :
3247 0 : return NULL;
3248 0 : }
3249 :
3250 0 : static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
3251 : struct intel_engine_cs *ring)
3252 : {
3253 : struct drm_i915_gem_request *request;
3254 : bool ring_hung;
3255 :
3256 0 : request = i915_gem_find_active_request(ring);
3257 :
3258 0 : if (request == NULL)
3259 0 : return;
3260 :
3261 0 : ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
3262 :
3263 0 : i915_set_reset_status(dev_priv, request->ctx, ring_hung);
3264 :
3265 0 : list_for_each_entry_continue(request, &ring->request_list, list)
3266 0 : i915_set_reset_status(dev_priv, request->ctx, false);
3267 0 : }
3268 :
3269 0 : static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
3270 : struct intel_engine_cs *ring)
3271 : {
3272 0 : while (!list_empty(&ring->active_list)) {
3273 : struct drm_i915_gem_object *obj;
3274 :
3275 0 : obj = list_first_entry(&ring->active_list,
3276 : struct drm_i915_gem_object,
3277 : ring_list[ring->id]);
3278 :
3279 0 : i915_gem_object_retire__read(obj, ring->id);
3280 : }
3281 :
3282 : /*
3283 : * Clear the execlists queue up before freeing the requests, as those
3284 : * are the ones that keep the context and ringbuffer backing objects
3285 : * pinned in place.
3286 : */
3287 0 : while (!list_empty(&ring->execlist_queue)) {
3288 : struct drm_i915_gem_request *submit_req;
3289 :
3290 0 : submit_req = list_first_entry(&ring->execlist_queue,
3291 : struct drm_i915_gem_request,
3292 : execlist_link);
3293 0 : list_del(&submit_req->execlist_link);
3294 :
3295 0 : if (submit_req->ctx != ring->default_context)
3296 0 : intel_lr_context_unpin(submit_req);
3297 :
3298 0 : i915_gem_request_unreference(submit_req);
3299 : }
3300 :
3301 : /*
3302 : * We must free the requests after all the corresponding objects have
3303 : * been moved off active lists. Which is the same order as the normal
3304 : * retire_requests function does. This is important if object hold
3305 : * implicit references on things like e.g. ppgtt address spaces through
3306 : * the request.
3307 : */
3308 0 : while (!list_empty(&ring->request_list)) {
3309 : struct drm_i915_gem_request *request;
3310 :
3311 0 : request = list_first_entry(&ring->request_list,
3312 : struct drm_i915_gem_request,
3313 : list);
3314 :
3315 0 : i915_gem_request_retire(request);
3316 : }
3317 0 : }
3318 :
3319 0 : void i915_gem_reset(struct drm_device *dev)
3320 : {
3321 0 : struct drm_i915_private *dev_priv = dev->dev_private;
3322 : struct intel_engine_cs *ring;
3323 : int i;
3324 :
3325 : /*
3326 : * Before we free the objects from the requests, we need to inspect
3327 : * them for finding the guilty party. As the requests only borrow
3328 : * their reference to the objects, the inspection must be done first.
3329 : */
3330 0 : for_each_ring(ring, dev_priv, i)
3331 0 : i915_gem_reset_ring_status(dev_priv, ring);
3332 :
3333 0 : for_each_ring(ring, dev_priv, i)
3334 0 : i915_gem_reset_ring_cleanup(dev_priv, ring);
3335 :
3336 0 : i915_gem_context_reset(dev);
3337 :
3338 0 : i915_gem_restore_fences(dev);
3339 :
3340 0 : WARN_ON(i915_verify_lists(dev));
3341 0 : }
3342 :
3343 : /**
3344 : * This function clears the request list as sequence numbers are passed.
3345 : */
3346 : void
3347 0 : i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
3348 : {
3349 0 : WARN_ON(i915_verify_lists(ring->dev));
3350 :
3351 : /* Retire requests first as we use it above for the early return.
3352 : * If we retire requests last, we may use a later seqno and so clear
3353 : * the requests lists without clearing the active list, leading to
3354 : * confusion.
3355 : */
3356 0 : while (!list_empty(&ring->request_list)) {
3357 : struct drm_i915_gem_request *request;
3358 :
3359 0 : request = list_first_entry(&ring->request_list,
3360 : struct drm_i915_gem_request,
3361 : list);
3362 :
3363 0 : if (!i915_gem_request_completed(request, true))
3364 0 : break;
3365 :
3366 0 : i915_gem_request_retire(request);
3367 0 : }
3368 :
3369 : /* Move any buffers on the active list that are no longer referenced
3370 : * by the ringbuffer to the flushing/inactive lists as appropriate,
3371 : * before we free the context associated with the requests.
3372 : */
3373 0 : while (!list_empty(&ring->active_list)) {
3374 : struct drm_i915_gem_object *obj;
3375 :
3376 0 : obj = list_first_entry(&ring->active_list,
3377 : struct drm_i915_gem_object,
3378 : ring_list[ring->id]);
3379 :
3380 0 : if (!list_empty(&obj->last_read_req[ring->id]->list))
3381 0 : break;
3382 :
3383 0 : i915_gem_object_retire__read(obj, ring->id);
3384 0 : }
3385 :
3386 0 : if (unlikely(ring->trace_irq_req &&
3387 : i915_gem_request_completed(ring->trace_irq_req, true))) {
3388 0 : ring->irq_put(ring);
3389 0 : i915_gem_request_assign(&ring->trace_irq_req, NULL);
3390 0 : }
3391 :
3392 0 : WARN_ON(i915_verify_lists(ring->dev));
3393 0 : }
3394 :
3395 : bool
3396 0 : i915_gem_retire_requests(struct drm_device *dev)
3397 : {
3398 0 : struct drm_i915_private *dev_priv = dev->dev_private;
3399 : struct intel_engine_cs *ring;
3400 : bool idle = true;
3401 : int i;
3402 :
3403 0 : for_each_ring(ring, dev_priv, i) {
3404 0 : i915_gem_retire_requests_ring(ring);
3405 0 : idle &= list_empty(&ring->request_list);
3406 0 : if (i915.enable_execlists) {
3407 : unsigned long flags;
3408 :
3409 0 : spin_lock_irqsave(&ring->execlist_lock, flags);
3410 0 : idle &= list_empty(&ring->execlist_queue);
3411 0 : spin_unlock_irqrestore(&ring->execlist_lock, flags);
3412 :
3413 0 : intel_execlists_retire_requests(ring);
3414 0 : }
3415 : }
3416 :
3417 0 : if (idle)
3418 0 : mod_delayed_work(dev_priv->wq,
3419 0 : &dev_priv->mm.idle_work,
3420 0 : msecs_to_jiffies(100));
3421 :
3422 0 : return idle;
3423 : }
3424 :
3425 : static void
3426 0 : i915_gem_retire_work_handler(struct work_struct *work)
3427 : {
3428 : struct drm_i915_private *dev_priv =
3429 0 : container_of(work, typeof(*dev_priv), mm.retire_work.work);
3430 0 : struct drm_device *dev = dev_priv->dev;
3431 : bool idle;
3432 :
3433 : /* Come back later if the device is busy... */
3434 : idle = false;
3435 0 : if (mutex_trylock(&dev->struct_mutex)) {
3436 0 : idle = i915_gem_retire_requests(dev);
3437 0 : mutex_unlock(&dev->struct_mutex);
3438 0 : }
3439 0 : if (!idle)
3440 0 : queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
3441 0 : round_jiffies_up_relative(HZ));
3442 0 : }
3443 :
3444 : static void
3445 0 : i915_gem_idle_work_handler(struct work_struct *work)
3446 : {
3447 : struct drm_i915_private *dev_priv =
3448 0 : container_of(work, typeof(*dev_priv), mm.idle_work.work);
3449 0 : struct drm_device *dev = dev_priv->dev;
3450 : struct intel_engine_cs *ring;
3451 : int i;
3452 :
3453 0 : for_each_ring(ring, dev_priv, i)
3454 0 : if (!list_empty(&ring->request_list))
3455 0 : return;
3456 :
3457 0 : intel_mark_idle(dev);
3458 :
3459 0 : if (mutex_trylock(&dev->struct_mutex)) {
3460 : struct intel_engine_cs *ring;
3461 : int i;
3462 :
3463 0 : for_each_ring(ring, dev_priv, i)
3464 0 : i915_gem_batch_pool_fini(&ring->batch_pool);
3465 :
3466 0 : mutex_unlock(&dev->struct_mutex);
3467 0 : }
3468 0 : }
3469 :
3470 : /**
3471 : * Ensures that an object will eventually get non-busy by flushing any required
3472 : * write domains, emitting any outstanding lazy request and retiring and
3473 : * completed requests.
3474 : */
3475 : static int
3476 0 : i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
3477 : {
3478 : int i;
3479 :
3480 0 : if (!obj->active)
3481 0 : return 0;
3482 :
3483 0 : for (i = 0; i < I915_NUM_RINGS; i++) {
3484 : struct drm_i915_gem_request *req;
3485 :
3486 0 : req = obj->last_read_req[i];
3487 0 : if (req == NULL)
3488 0 : continue;
3489 :
3490 0 : if (list_empty(&req->list))
3491 : goto retire;
3492 :
3493 0 : if (i915_gem_request_completed(req, true)) {
3494 0 : __i915_gem_request_retire__upto(req);
3495 : retire:
3496 0 : i915_gem_object_retire__read(obj, i);
3497 0 : }
3498 0 : }
3499 :
3500 0 : return 0;
3501 0 : }
3502 :
3503 : /**
3504 : * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
3505 : * @DRM_IOCTL_ARGS: standard ioctl arguments
3506 : *
3507 : * Returns 0 if successful, else an error is returned with the remaining time in
3508 : * the timeout parameter.
3509 : * -ETIME: object is still busy after timeout
3510 : * -ERESTARTSYS: signal interrupted the wait
3511 : * -ENONENT: object doesn't exist
3512 : * Also possible, but rare:
3513 : * -EAGAIN: GPU wedged
3514 : * -ENOMEM: damn
3515 : * -ENODEV: Internal IRQ fail
3516 : * -E?: The add request failed
3517 : *
3518 : * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
3519 : * non-zero timeout parameter the wait ioctl will wait for the given number of
3520 : * nanoseconds on an object becoming unbusy. Since the wait itself does so
3521 : * without holding struct_mutex the object may become re-busied before this
3522 : * function completes. A similar but shorter * race condition exists in the busy
3523 : * ioctl
3524 : */
3525 : int
3526 0 : i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3527 : {
3528 0 : struct drm_i915_private *dev_priv = dev->dev_private;
3529 0 : struct drm_i915_gem_wait *args = data;
3530 : struct drm_i915_gem_object *obj;
3531 0 : struct drm_i915_gem_request *req[I915_NUM_RINGS];
3532 : unsigned reset_counter;
3533 : int i, n = 0;
3534 : int ret;
3535 :
3536 0 : if (args->flags != 0)
3537 0 : return -EINVAL;
3538 :
3539 0 : ret = i915_mutex_lock_interruptible(dev);
3540 0 : if (ret)
3541 0 : return ret;
3542 :
3543 0 : obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle));
3544 0 : if (&obj->base == NULL) {
3545 0 : mutex_unlock(&dev->struct_mutex);
3546 0 : return -ENOENT;
3547 : }
3548 :
3549 : /* Need to make sure the object gets inactive eventually. */
3550 0 : ret = i915_gem_object_flush_active(obj);
3551 0 : if (ret)
3552 : goto out;
3553 :
3554 0 : if (!obj->active)
3555 : goto out;
3556 :
3557 : /* Do this after OLR check to make sure we make forward progress polling
3558 : * on this IOCTL with a timeout == 0 (like busy ioctl)
3559 : */
3560 0 : if (args->timeout_ns == 0) {
3561 : ret = -ETIME;
3562 0 : goto out;
3563 : }
3564 :
3565 0 : drm_gem_object_unreference(&obj->base);
3566 0 : reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
3567 :
3568 0 : for (i = 0; i < I915_NUM_RINGS; i++) {
3569 0 : if (obj->last_read_req[i] == NULL)
3570 : continue;
3571 :
3572 0 : req[n++] = i915_gem_request_reference(obj->last_read_req[i]);
3573 0 : }
3574 :
3575 0 : mutex_unlock(&dev->struct_mutex);
3576 :
3577 0 : for (i = 0; i < n; i++) {
3578 0 : if (ret == 0)
3579 0 : ret = __i915_wait_request(req[i], reset_counter, true,
3580 0 : args->timeout_ns > 0 ? &args->timeout_ns : NULL,
3581 0 : file->driver_priv);
3582 0 : i915_gem_request_unreference__unlocked(req[i]);
3583 : }
3584 0 : return ret;
3585 :
3586 : out:
3587 0 : drm_gem_object_unreference(&obj->base);
3588 0 : mutex_unlock(&dev->struct_mutex);
3589 0 : return ret;
3590 0 : }
3591 :
3592 : static int
3593 0 : __i915_gem_object_sync(struct drm_i915_gem_object *obj,
3594 : struct intel_engine_cs *to,
3595 : struct drm_i915_gem_request *from_req,
3596 : struct drm_i915_gem_request **to_req)
3597 : {
3598 : struct intel_engine_cs *from;
3599 : int ret;
3600 :
3601 0 : from = i915_gem_request_get_ring(from_req);
3602 0 : if (to == from)
3603 0 : return 0;
3604 :
3605 0 : if (i915_gem_request_completed(from_req, true))
3606 0 : return 0;
3607 :
3608 0 : if (!i915_semaphore_is_enabled(obj->base.dev)) {
3609 0 : struct drm_i915_private *i915 = to_i915(obj->base.dev);
3610 0 : ret = __i915_wait_request(from_req,
3611 0 : atomic_read(&i915->gpu_error.reset_counter),
3612 0 : i915->mm.interruptible,
3613 : NULL,
3614 0 : &i915->rps.semaphores);
3615 0 : if (ret)
3616 0 : return ret;
3617 :
3618 0 : i915_gem_object_retire_request(obj, from_req);
3619 0 : } else {
3620 0 : int idx = intel_ring_sync_index(from, to);
3621 0 : u32 seqno = i915_gem_request_get_seqno(from_req);
3622 :
3623 0 : WARN_ON(!to_req);
3624 :
3625 0 : if (seqno <= from->semaphore.sync_seqno[idx])
3626 0 : return 0;
3627 :
3628 0 : if (*to_req == NULL) {
3629 0 : ret = i915_gem_request_alloc(to, to->default_context, to_req);
3630 0 : if (ret)
3631 0 : return ret;
3632 : }
3633 :
3634 0 : trace_i915_gem_ring_sync_to(*to_req, from, from_req);
3635 0 : ret = to->semaphore.sync_to(*to_req, from, seqno);
3636 0 : if (ret)
3637 0 : return ret;
3638 :
3639 : /* We use last_read_req because sync_to()
3640 : * might have just caused seqno wrap under
3641 : * the radar.
3642 : */
3643 0 : from->semaphore.sync_seqno[idx] =
3644 0 : i915_gem_request_get_seqno(obj->last_read_req[from->id]);
3645 0 : }
3646 :
3647 0 : return 0;
3648 0 : }
3649 :
3650 : /**
3651 : * i915_gem_object_sync - sync an object to a ring.
3652 : *
3653 : * @obj: object which may be in use on another ring.
3654 : * @to: ring we wish to use the object on. May be NULL.
3655 : * @to_req: request we wish to use the object for. See below.
3656 : * This will be allocated and returned if a request is
3657 : * required but not passed in.
3658 : *
3659 : * This code is meant to abstract object synchronization with the GPU.
3660 : * Calling with NULL implies synchronizing the object with the CPU
3661 : * rather than a particular GPU ring. Conceptually we serialise writes
3662 : * between engines inside the GPU. We only allow one engine to write
3663 : * into a buffer at any time, but multiple readers. To ensure each has
3664 : * a coherent view of memory, we must:
3665 : *
3666 : * - If there is an outstanding write request to the object, the new
3667 : * request must wait for it to complete (either CPU or in hw, requests
3668 : * on the same ring will be naturally ordered).
3669 : *
3670 : * - If we are a write request (pending_write_domain is set), the new
3671 : * request must wait for outstanding read requests to complete.
3672 : *
3673 : * For CPU synchronisation (NULL to) no request is required. For syncing with
3674 : * rings to_req must be non-NULL. However, a request does not have to be
3675 : * pre-allocated. If *to_req is NULL and sync commands will be emitted then a
3676 : * request will be allocated automatically and returned through *to_req. Note
3677 : * that it is not guaranteed that commands will be emitted (because the system
3678 : * might already be idle). Hence there is no need to create a request that
3679 : * might never have any work submitted. Note further that if a request is
3680 : * returned in *to_req, it is the responsibility of the caller to submit
3681 : * that request (after potentially adding more work to it).
3682 : *
3683 : * Returns 0 if successful, else propagates up the lower layer error.
3684 : */
3685 : int
3686 0 : i915_gem_object_sync(struct drm_i915_gem_object *obj,
3687 : struct intel_engine_cs *to,
3688 : struct drm_i915_gem_request **to_req)
3689 : {
3690 0 : const bool readonly = obj->base.pending_write_domain == 0;
3691 0 : struct drm_i915_gem_request *req[I915_NUM_RINGS];
3692 : int ret, i, n;
3693 :
3694 0 : if (!obj->active)
3695 0 : return 0;
3696 :
3697 0 : if (to == NULL)
3698 0 : return i915_gem_object_wait_rendering(obj, readonly);
3699 :
3700 : n = 0;
3701 0 : if (readonly) {
3702 0 : if (obj->last_write_req)
3703 0 : req[n++] = obj->last_write_req;
3704 : } else {
3705 0 : for (i = 0; i < I915_NUM_RINGS; i++)
3706 0 : if (obj->last_read_req[i])
3707 0 : req[n++] = obj->last_read_req[i];
3708 : }
3709 0 : for (i = 0; i < n; i++) {
3710 0 : ret = __i915_gem_object_sync(obj, to, req[i], to_req);
3711 0 : if (ret)
3712 0 : return ret;
3713 : }
3714 :
3715 0 : return 0;
3716 0 : }
3717 :
3718 0 : static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
3719 : {
3720 : u32 old_write_domain, old_read_domains;
3721 :
3722 : /* Force a pagefault for domain tracking on next user access */
3723 0 : i915_gem_release_mmap(obj);
3724 :
3725 0 : if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3726 0 : return;
3727 :
3728 : /* Wait for any direct GTT access to complete */
3729 0 : mb();
3730 :
3731 0 : old_read_domains = obj->base.read_domains;
3732 0 : old_write_domain = obj->base.write_domain;
3733 :
3734 0 : obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
3735 0 : obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
3736 :
3737 0 : trace_i915_gem_object_change_domain(obj,
3738 : old_read_domains,
3739 : old_write_domain);
3740 0 : }
3741 :
3742 0 : static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
3743 : {
3744 0 : struct drm_i915_gem_object *obj = vma->obj;
3745 0 : struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3746 : int ret;
3747 :
3748 0 : if (list_empty(&vma->vma_link))
3749 0 : return 0;
3750 :
3751 0 : if (!drm_mm_node_allocated(&vma->node)) {
3752 0 : i915_gem_vma_destroy(vma);
3753 0 : return 0;
3754 : }
3755 :
3756 0 : if (vma->pin_count)
3757 0 : return -EBUSY;
3758 :
3759 0 : BUG_ON(obj->pages == NULL);
3760 :
3761 0 : if (wait) {
3762 0 : ret = i915_gem_object_wait_rendering(obj, false);
3763 0 : if (ret)
3764 0 : return ret;
3765 : }
3766 :
3767 0 : if (i915_is_ggtt(vma->vm) &&
3768 0 : vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
3769 0 : i915_gem_object_finish_gtt(obj);
3770 :
3771 : /* release the fence reg _after_ flushing */
3772 0 : ret = i915_gem_object_put_fence(obj);
3773 0 : if (ret)
3774 0 : return ret;
3775 : }
3776 :
3777 0 : trace_i915_vma_unbind(vma);
3778 :
3779 0 : vma->vm->unbind_vma(vma);
3780 0 : vma->bound = 0;
3781 :
3782 0 : list_del_init(&vma->mm_list);
3783 0 : if (i915_is_ggtt(vma->vm)) {
3784 0 : if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
3785 0 : obj->map_and_fenceable = false;
3786 0 : } else if (vma->ggtt_view.pages) {
3787 0 : sg_free_table(vma->ggtt_view.pages);
3788 0 : kfree(vma->ggtt_view.pages);
3789 0 : }
3790 0 : vma->ggtt_view.pages = NULL;
3791 0 : }
3792 :
3793 0 : drm_mm_remove_node(&vma->node);
3794 0 : i915_gem_vma_destroy(vma);
3795 :
3796 : /* Since the unbound list is global, only move to that list if
3797 : * no more VMAs exist. */
3798 0 : if (list_empty(&obj->vma_list))
3799 0 : list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
3800 :
3801 : /* And finally now the object is completely decoupled from this vma,
3802 : * we can drop its hold on the backing storage and allow it to be
3803 : * reaped by the shrinker.
3804 : */
3805 0 : i915_gem_object_unpin_pages(obj);
3806 :
3807 0 : return 0;
3808 0 : }
3809 :
3810 0 : int i915_vma_unbind(struct i915_vma *vma)
3811 : {
3812 0 : return __i915_vma_unbind(vma, true);
3813 : }
3814 :
3815 0 : int __i915_vma_unbind_no_wait(struct i915_vma *vma)
3816 : {
3817 0 : return __i915_vma_unbind(vma, false);
3818 : }
3819 :
3820 0 : int i915_gpu_idle(struct drm_device *dev)
3821 : {
3822 0 : struct drm_i915_private *dev_priv = dev->dev_private;
3823 : struct intel_engine_cs *ring;
3824 : int ret, i;
3825 :
3826 : /* Flush everything onto the inactive list. */
3827 0 : for_each_ring(ring, dev_priv, i) {
3828 0 : if (!i915.enable_execlists) {
3829 0 : struct drm_i915_gem_request *req;
3830 :
3831 0 : ret = i915_gem_request_alloc(ring, ring->default_context, &req);
3832 0 : if (ret)
3833 0 : return ret;
3834 :
3835 0 : ret = i915_switch_context(req);
3836 0 : if (ret) {
3837 0 : i915_gem_request_cancel(req);
3838 0 : return ret;
3839 : }
3840 :
3841 0 : i915_add_request_no_flush(req);
3842 0 : }
3843 :
3844 0 : ret = intel_ring_idle(ring);
3845 0 : if (ret)
3846 0 : return ret;
3847 : }
3848 :
3849 0 : WARN_ON(i915_verify_lists(dev));
3850 0 : return 0;
3851 0 : }
3852 :
3853 0 : static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
3854 : unsigned long cache_level)
3855 : {
3856 0 : struct drm_mm_node *gtt_space = &vma->node;
3857 : struct drm_mm_node *other;
3858 :
3859 : /*
3860 : * On some machines we have to be careful when putting differing types
3861 : * of snoopable memory together to avoid the prefetcher crossing memory
3862 : * domains and dying. During vm initialisation, we decide whether or not
3863 : * these constraints apply and set the drm_mm.color_adjust
3864 : * appropriately.
3865 : */
3866 0 : if (vma->vm->mm.color_adjust == NULL)
3867 0 : return true;
3868 :
3869 0 : if (!drm_mm_node_allocated(gtt_space))
3870 0 : return true;
3871 :
3872 0 : if (list_empty(>t_space->node_list))
3873 0 : return true;
3874 :
3875 0 : other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
3876 0 : if (other->allocated && !other->hole_follows && other->color != cache_level)
3877 0 : return false;
3878 :
3879 0 : other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
3880 0 : if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
3881 0 : return false;
3882 :
3883 0 : return true;
3884 0 : }
3885 :
3886 : /**
3887 : * Finds free space in the GTT aperture and binds the object or a view of it
3888 : * there.
3889 : */
3890 : static struct i915_vma *
3891 0 : i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
3892 : struct i915_address_space *vm,
3893 : const struct i915_ggtt_view *ggtt_view,
3894 : unsigned alignment,
3895 : uint64_t flags)
3896 : {
3897 0 : struct drm_device *dev = obj->base.dev;
3898 0 : struct drm_i915_private *dev_priv = dev->dev_private;
3899 : u32 fence_alignment, unfenced_alignment;
3900 : u32 search_flag, alloc_flag;
3901 : u64 start, end;
3902 : u64 size, fence_size;
3903 : struct i915_vma *vma;
3904 : int ret;
3905 :
3906 0 : if (i915_is_ggtt(vm)) {
3907 : u32 view_size;
3908 :
3909 0 : if (WARN_ON(!ggtt_view))
3910 0 : return ERR_PTR(-EINVAL);
3911 :
3912 0 : view_size = i915_ggtt_view_size(obj, ggtt_view);
3913 :
3914 0 : fence_size = i915_gem_get_gtt_size(dev,
3915 : view_size,
3916 0 : obj->tiling_mode);
3917 0 : fence_alignment = i915_gem_get_gtt_alignment(dev,
3918 : view_size,
3919 0 : obj->tiling_mode,
3920 : true);
3921 0 : unfenced_alignment = i915_gem_get_gtt_alignment(dev,
3922 : view_size,
3923 0 : obj->tiling_mode,
3924 : false);
3925 0 : size = flags & PIN_MAPPABLE ? fence_size : view_size;
3926 0 : } else {
3927 0 : fence_size = i915_gem_get_gtt_size(dev,
3928 0 : obj->base.size,
3929 0 : obj->tiling_mode);
3930 0 : fence_alignment = i915_gem_get_gtt_alignment(dev,
3931 0 : obj->base.size,
3932 0 : obj->tiling_mode,
3933 : true);
3934 : unfenced_alignment =
3935 0 : i915_gem_get_gtt_alignment(dev,
3936 0 : obj->base.size,
3937 0 : obj->tiling_mode,
3938 : false);
3939 0 : size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
3940 : }
3941 :
3942 0 : start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
3943 0 : end = vm->total;
3944 0 : if (flags & PIN_MAPPABLE)
3945 0 : end = min_t(u64, end, dev_priv->gtt.mappable_end);
3946 0 : if (flags & PIN_ZONE_4G)
3947 0 : end = min_t(u64, end, (1ULL << 32));
3948 :
3949 0 : if (alignment == 0)
3950 0 : alignment = flags & PIN_MAPPABLE ? fence_alignment :
3951 : unfenced_alignment;
3952 0 : if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
3953 : DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
3954 : ggtt_view ? ggtt_view->type : 0,
3955 : alignment);
3956 0 : return ERR_PTR(-EINVAL);
3957 : }
3958 :
3959 : /* If binding the object/GGTT view requires more space than the entire
3960 : * aperture has, reject it early before evicting everything in a vain
3961 : * attempt to find space.
3962 : */
3963 0 : if (size > end) {
3964 : DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
3965 : ggtt_view ? ggtt_view->type : 0,
3966 : size,
3967 : flags & PIN_MAPPABLE ? "mappable" : "total",
3968 : end);
3969 0 : return ERR_PTR(-E2BIG);
3970 : }
3971 :
3972 0 : ret = i915_gem_object_get_pages(obj);
3973 0 : if (ret)
3974 0 : return ERR_PTR(ret);
3975 :
3976 0 : i915_gem_object_pin_pages(obj);
3977 :
3978 0 : vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
3979 0 : i915_gem_obj_lookup_or_create_vma(obj, vm);
3980 :
3981 0 : if (IS_ERR(vma))
3982 : goto err_unpin;
3983 :
3984 0 : if (flags & PIN_HIGH) {
3985 : search_flag = DRM_MM_SEARCH_BELOW;
3986 : alloc_flag = DRM_MM_CREATE_TOP;
3987 0 : } else {
3988 : search_flag = DRM_MM_SEARCH_DEFAULT;
3989 : alloc_flag = DRM_MM_CREATE_DEFAULT;
3990 : }
3991 :
3992 : search_free:
3993 0 : ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
3994 : size, alignment,
3995 0 : obj->cache_level,
3996 : start, end,
3997 : search_flag,
3998 : alloc_flag);
3999 0 : if (ret) {
4000 0 : ret = i915_gem_evict_something(dev, vm, size, alignment,
4001 0 : obj->cache_level,
4002 : start, end,
4003 0 : flags);
4004 0 : if (ret == 0)
4005 0 : goto search_free;
4006 :
4007 : goto err_free_vma;
4008 : }
4009 0 : if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
4010 : ret = -EINVAL;
4011 0 : goto err_remove_node;
4012 : }
4013 :
4014 0 : trace_i915_vma_bind(vma, flags);
4015 0 : ret = i915_vma_bind(vma, obj->cache_level, flags);
4016 0 : if (ret)
4017 : goto err_remove_node;
4018 :
4019 0 : list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
4020 0 : list_add_tail(&vma->mm_list, &vm->inactive_list);
4021 :
4022 0 : return vma;
4023 :
4024 : err_remove_node:
4025 0 : drm_mm_remove_node(&vma->node);
4026 : err_free_vma:
4027 0 : i915_gem_vma_destroy(vma);
4028 0 : vma = ERR_PTR(ret);
4029 : err_unpin:
4030 0 : i915_gem_object_unpin_pages(obj);
4031 0 : return vma;
4032 0 : }
4033 :
4034 : bool
4035 0 : i915_gem_clflush_object(struct drm_i915_gem_object *obj,
4036 : bool force)
4037 : {
4038 : /* If we don't have a page list set up, then we're not pinned
4039 : * to GPU, and we can ignore the cache flush because it'll happen
4040 : * again at bind time.
4041 : */
4042 0 : if (obj->pages == NULL)
4043 0 : return false;
4044 :
4045 : /*
4046 : * Stolen memory is always coherent with the GPU as it is explicitly
4047 : * marked as wc by the system, or the system is cache-coherent.
4048 : */
4049 0 : if (obj->stolen || obj->phys_handle)
4050 0 : return false;
4051 :
4052 : /* If the GPU is snooping the contents of the CPU cache,
4053 : * we do not need to manually clear the CPU cache lines. However,
4054 : * the caches are only snooped when the render cache is
4055 : * flushed/invalidated. As we always have to emit invalidations
4056 : * and flushes when moving into and out of the RENDER domain, correct
4057 : * snooping behaviour occurs naturally as the result of our domain
4058 : * tracking.
4059 : */
4060 0 : if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
4061 0 : obj->cache_dirty = true;
4062 0 : return false;
4063 : }
4064 :
4065 0 : trace_i915_gem_object_clflush(obj);
4066 0 : drm_clflush_sg(obj->pages);
4067 0 : obj->cache_dirty = false;
4068 :
4069 0 : return true;
4070 0 : }
4071 :
4072 : /** Flushes the GTT write domain for the object if it's dirty. */
4073 : static void
4074 0 : i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
4075 : {
4076 : uint32_t old_write_domain;
4077 :
4078 0 : if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
4079 0 : return;
4080 :
4081 : /* No actual flushing is required for the GTT write domain. Writes
4082 : * to it immediately go to main memory as far as we know, so there's
4083 : * no chipset flush. It also doesn't land in render cache.
4084 : *
4085 : * However, we do have to enforce the order so that all writes through
4086 : * the GTT land before any writes to the device, such as updates to
4087 : * the GATT itself.
4088 : */
4089 0 : wmb();
4090 :
4091 0 : old_write_domain = obj->base.write_domain;
4092 0 : obj->base.write_domain = 0;
4093 :
4094 0 : intel_fb_obj_flush(obj, false, ORIGIN_GTT);
4095 :
4096 0 : trace_i915_gem_object_change_domain(obj,
4097 0 : obj->base.read_domains,
4098 : old_write_domain);
4099 0 : }
4100 :
4101 : /** Flushes the CPU write domain for the object if it's dirty. */
4102 : static void
4103 0 : i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
4104 : {
4105 : uint32_t old_write_domain;
4106 :
4107 0 : if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
4108 0 : return;
4109 :
4110 0 : if (i915_gem_clflush_object(obj, obj->pin_display))
4111 0 : i915_gem_chipset_flush(obj->base.dev);
4112 :
4113 0 : old_write_domain = obj->base.write_domain;
4114 0 : obj->base.write_domain = 0;
4115 :
4116 0 : intel_fb_obj_flush(obj, false, ORIGIN_CPU);
4117 :
4118 0 : trace_i915_gem_object_change_domain(obj,
4119 0 : obj->base.read_domains,
4120 : old_write_domain);
4121 0 : }
4122 :
4123 : /**
4124 : * Moves a single object to the GTT read, and possibly write domain.
4125 : *
4126 : * This function returns when the move is complete, including waiting on
4127 : * flushes to occur.
4128 : */
4129 : int
4130 0 : i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
4131 : {
4132 : uint32_t old_write_domain, old_read_domains;
4133 : struct i915_vma *vma;
4134 : int ret;
4135 :
4136 0 : if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
4137 0 : return 0;
4138 :
4139 0 : ret = i915_gem_object_wait_rendering(obj, !write);
4140 0 : if (ret)
4141 0 : return ret;
4142 :
4143 : /* Flush and acquire obj->pages so that we are coherent through
4144 : * direct access in memory with previous cached writes through
4145 : * shmemfs and that our cache domain tracking remains valid.
4146 : * For example, if the obj->filp was moved to swap without us
4147 : * being notified and releasing the pages, we would mistakenly
4148 : * continue to assume that the obj remained out of the CPU cached
4149 : * domain.
4150 : */
4151 0 : ret = i915_gem_object_get_pages(obj);
4152 0 : if (ret)
4153 0 : return ret;
4154 :
4155 0 : i915_gem_object_flush_cpu_write_domain(obj);
4156 :
4157 : /* Serialise direct access to this object with the barriers for
4158 : * coherent writes from the GPU, by effectively invalidating the
4159 : * GTT domain upon first access.
4160 : */
4161 0 : if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
4162 0 : mb();
4163 :
4164 0 : old_write_domain = obj->base.write_domain;
4165 0 : old_read_domains = obj->base.read_domains;
4166 :
4167 : /* It should now be out of any other write domains, and we can update
4168 : * the domain values for our changes.
4169 : */
4170 0 : BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
4171 0 : obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
4172 0 : if (write) {
4173 0 : obj->base.read_domains = I915_GEM_DOMAIN_GTT;
4174 0 : obj->base.write_domain = I915_GEM_DOMAIN_GTT;
4175 0 : obj->dirty = 1;
4176 0 : }
4177 :
4178 0 : trace_i915_gem_object_change_domain(obj,
4179 : old_read_domains,
4180 : old_write_domain);
4181 :
4182 : /* And bump the LRU for this access */
4183 0 : vma = i915_gem_obj_to_ggtt(obj);
4184 0 : if (vma && drm_mm_node_allocated(&vma->node) && !obj->active)
4185 0 : list_move_tail(&vma->mm_list,
4186 0 : &to_i915(obj->base.dev)->gtt.base.inactive_list);
4187 :
4188 0 : return 0;
4189 0 : }
4190 :
4191 : /**
4192 : * Changes the cache-level of an object across all VMA.
4193 : *
4194 : * After this function returns, the object will be in the new cache-level
4195 : * across all GTT and the contents of the backing storage will be coherent,
4196 : * with respect to the new cache-level. In order to keep the backing storage
4197 : * coherent for all users, we only allow a single cache level to be set
4198 : * globally on the object and prevent it from being changed whilst the
4199 : * hardware is reading from the object. That is if the object is currently
4200 : * on the scanout it will be set to uncached (or equivalent display
4201 : * cache coherency) and all non-MOCS GPU access will also be uncached so
4202 : * that all direct access to the scanout remains coherent.
4203 : */
4204 0 : int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
4205 : enum i915_cache_level cache_level)
4206 : {
4207 0 : struct drm_device *dev = obj->base.dev;
4208 : struct i915_vma *vma, *next;
4209 : bool bound = false;
4210 : int ret = 0;
4211 :
4212 0 : if (obj->cache_level == cache_level)
4213 : goto out;
4214 :
4215 : /* Inspect the list of currently bound VMA and unbind any that would
4216 : * be invalid given the new cache-level. This is principally to
4217 : * catch the issue of the CS prefetch crossing page boundaries and
4218 : * reading an invalid PTE on older architectures.
4219 : */
4220 0 : list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
4221 0 : if (!drm_mm_node_allocated(&vma->node))
4222 : continue;
4223 :
4224 0 : if (vma->pin_count) {
4225 : DRM_DEBUG("can not change the cache level of pinned objects\n");
4226 0 : return -EBUSY;
4227 : }
4228 :
4229 0 : if (!i915_gem_valid_gtt_space(vma, cache_level)) {
4230 0 : ret = i915_vma_unbind(vma);
4231 0 : if (ret)
4232 0 : return ret;
4233 : } else
4234 : bound = true;
4235 : }
4236 :
4237 : /* We can reuse the existing drm_mm nodes but need to change the
4238 : * cache-level on the PTE. We could simply unbind them all and
4239 : * rebind with the correct cache-level on next use. However since
4240 : * we already have a valid slot, dma mapping, pages etc, we may as
4241 : * rewrite the PTE in the belief that doing so tramples upon less
4242 : * state and so involves less work.
4243 : */
4244 0 : if (bound) {
4245 : /* Before we change the PTE, the GPU must not be accessing it.
4246 : * If we wait upon the object, we know that all the bound
4247 : * VMA are no longer active.
4248 : */
4249 0 : ret = i915_gem_object_wait_rendering(obj, false);
4250 0 : if (ret)
4251 0 : return ret;
4252 :
4253 0 : if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) {
4254 : /* Access to snoopable pages through the GTT is
4255 : * incoherent and on some machines causes a hard
4256 : * lockup. Relinquish the CPU mmaping to force
4257 : * userspace to refault in the pages and we can
4258 : * then double check if the GTT mapping is still
4259 : * valid for that pointer access.
4260 : */
4261 0 : i915_gem_release_mmap(obj);
4262 :
4263 : /* As we no longer need a fence for GTT access,
4264 : * we can relinquish it now (and so prevent having
4265 : * to steal a fence from someone else on the next
4266 : * fence request). Note GPU activity would have
4267 : * dropped the fence as all snoopable access is
4268 : * supposed to be linear.
4269 : */
4270 0 : ret = i915_gem_object_put_fence(obj);
4271 0 : if (ret)
4272 0 : return ret;
4273 : } else {
4274 : /* We either have incoherent backing store and
4275 : * so no GTT access or the architecture is fully
4276 : * coherent. In such cases, existing GTT mmaps
4277 : * ignore the cache bit in the PTE and we can
4278 : * rewrite it without confusing the GPU or having
4279 : * to force userspace to fault back in its mmaps.
4280 : */
4281 : }
4282 :
4283 0 : list_for_each_entry(vma, &obj->vma_list, vma_link) {
4284 0 : if (!drm_mm_node_allocated(&vma->node))
4285 : continue;
4286 :
4287 0 : ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
4288 0 : if (ret)
4289 0 : return ret;
4290 : }
4291 : }
4292 :
4293 0 : list_for_each_entry(vma, &obj->vma_list, vma_link)
4294 0 : vma->node.color = cache_level;
4295 0 : obj->cache_level = cache_level;
4296 :
4297 : out:
4298 : /* Flush the dirty CPU caches to the backing storage so that the
4299 : * object is now coherent at its new cache level (with respect
4300 : * to the access domain).
4301 : */
4302 0 : if (obj->cache_dirty &&
4303 0 : obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
4304 0 : cpu_write_needs_clflush(obj)) {
4305 0 : if (i915_gem_clflush_object(obj, true))
4306 0 : i915_gem_chipset_flush(obj->base.dev);
4307 : }
4308 :
4309 0 : return 0;
4310 0 : }
4311 :
4312 0 : int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
4313 : struct drm_file *file)
4314 : {
4315 0 : struct drm_i915_gem_caching *args = data;
4316 : struct drm_i915_gem_object *obj;
4317 :
4318 0 : obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4319 0 : if (&obj->base == NULL)
4320 0 : return -ENOENT;
4321 :
4322 0 : switch (obj->cache_level) {
4323 : case I915_CACHE_LLC:
4324 : case I915_CACHE_L3_LLC:
4325 0 : args->caching = I915_CACHING_CACHED;
4326 0 : break;
4327 :
4328 : case I915_CACHE_WT:
4329 0 : args->caching = I915_CACHING_DISPLAY;
4330 0 : break;
4331 :
4332 : default:
4333 0 : args->caching = I915_CACHING_NONE;
4334 0 : break;
4335 : }
4336 :
4337 0 : drm_gem_object_unreference_unlocked(&obj->base);
4338 0 : return 0;
4339 0 : }
4340 :
4341 0 : int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
4342 : struct drm_file *file)
4343 : {
4344 0 : struct drm_i915_private *dev_priv = dev->dev_private;
4345 0 : struct drm_i915_gem_caching *args = data;
4346 : struct drm_i915_gem_object *obj;
4347 : enum i915_cache_level level;
4348 : int ret;
4349 :
4350 0 : switch (args->caching) {
4351 : case I915_CACHING_NONE:
4352 : level = I915_CACHE_NONE;
4353 0 : break;
4354 : case I915_CACHING_CACHED:
4355 : /*
4356 : * Due to a HW issue on BXT A stepping, GPU stores via a
4357 : * snooped mapping may leave stale data in a corresponding CPU
4358 : * cacheline, whereas normally such cachelines would get
4359 : * invalidated.
4360 : */
4361 0 : if (IS_BXT_REVID(dev, 0, BXT_REVID_A1))
4362 0 : return -ENODEV;
4363 :
4364 : level = I915_CACHE_LLC;
4365 0 : break;
4366 : case I915_CACHING_DISPLAY:
4367 0 : level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
4368 0 : break;
4369 : default:
4370 0 : return -EINVAL;
4371 : }
4372 :
4373 0 : intel_runtime_pm_get(dev_priv);
4374 :
4375 0 : ret = i915_mutex_lock_interruptible(dev);
4376 0 : if (ret)
4377 : goto rpm_put;
4378 :
4379 0 : obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4380 0 : if (&obj->base == NULL) {
4381 : ret = -ENOENT;
4382 0 : goto unlock;
4383 : }
4384 :
4385 0 : ret = i915_gem_object_set_cache_level(obj, level);
4386 :
4387 0 : drm_gem_object_unreference(&obj->base);
4388 : unlock:
4389 0 : mutex_unlock(&dev->struct_mutex);
4390 : rpm_put:
4391 0 : intel_runtime_pm_put(dev_priv);
4392 :
4393 0 : return ret;
4394 0 : }
4395 :
4396 : /*
4397 : * Prepare buffer for display plane (scanout, cursors, etc).
4398 : * Can be called from an uninterruptible phase (modesetting) and allows
4399 : * any flushes to be pipelined (for pageflips).
4400 : */
4401 : int
4402 0 : i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
4403 : u32 alignment,
4404 : struct intel_engine_cs *pipelined,
4405 : struct drm_i915_gem_request **pipelined_request,
4406 : const struct i915_ggtt_view *view)
4407 : {
4408 : u32 old_read_domains, old_write_domain;
4409 : int ret;
4410 :
4411 0 : ret = i915_gem_object_sync(obj, pipelined, pipelined_request);
4412 0 : if (ret)
4413 0 : return ret;
4414 :
4415 : /* Mark the pin_display early so that we account for the
4416 : * display coherency whilst setting up the cache domains.
4417 : */
4418 0 : obj->pin_display++;
4419 :
4420 : /* The display engine is not coherent with the LLC cache on gen6. As
4421 : * a result, we make sure that the pinning that is about to occur is
4422 : * done with uncached PTEs. This is lowest common denominator for all
4423 : * chipsets.
4424 : *
4425 : * However for gen6+, we could do better by using the GFDT bit instead
4426 : * of uncaching, which would allow us to flush all the LLC-cached data
4427 : * with that bit in the PTE to main memory with just one PIPE_CONTROL.
4428 : */
4429 0 : ret = i915_gem_object_set_cache_level(obj,
4430 0 : HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
4431 0 : if (ret)
4432 : goto err_unpin_display;
4433 :
4434 : /* As the user may map the buffer once pinned in the display plane
4435 : * (e.g. libkms for the bootup splash), we have to ensure that we
4436 : * always use map_and_fenceable for all scanout buffers.
4437 : */
4438 0 : ret = i915_gem_object_ggtt_pin(obj, view, alignment,
4439 0 : view->type == I915_GGTT_VIEW_NORMAL ?
4440 : PIN_MAPPABLE : 0);
4441 0 : if (ret)
4442 : goto err_unpin_display;
4443 :
4444 0 : i915_gem_object_flush_cpu_write_domain(obj);
4445 :
4446 0 : old_write_domain = obj->base.write_domain;
4447 0 : old_read_domains = obj->base.read_domains;
4448 :
4449 : /* It should now be out of any other write domains, and we can update
4450 : * the domain values for our changes.
4451 : */
4452 0 : obj->base.write_domain = 0;
4453 0 : obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
4454 :
4455 0 : trace_i915_gem_object_change_domain(obj,
4456 : old_read_domains,
4457 : old_write_domain);
4458 :
4459 0 : return 0;
4460 :
4461 : err_unpin_display:
4462 0 : obj->pin_display--;
4463 0 : return ret;
4464 0 : }
4465 :
4466 : void
4467 0 : i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
4468 : const struct i915_ggtt_view *view)
4469 : {
4470 0 : if (WARN_ON(obj->pin_display == 0))
4471 : return;
4472 :
4473 0 : i915_gem_object_ggtt_unpin_view(obj, view);
4474 :
4475 0 : obj->pin_display--;
4476 0 : }
4477 :
4478 : /**
4479 : * Moves a single object to the CPU read, and possibly write domain.
4480 : *
4481 : * This function returns when the move is complete, including waiting on
4482 : * flushes to occur.
4483 : */
4484 : int
4485 0 : i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
4486 : {
4487 : uint32_t old_write_domain, old_read_domains;
4488 : int ret;
4489 :
4490 0 : if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
4491 0 : return 0;
4492 :
4493 0 : ret = i915_gem_object_wait_rendering(obj, !write);
4494 0 : if (ret)
4495 0 : return ret;
4496 :
4497 0 : i915_gem_object_flush_gtt_write_domain(obj);
4498 :
4499 0 : old_write_domain = obj->base.write_domain;
4500 0 : old_read_domains = obj->base.read_domains;
4501 :
4502 : /* Flush the CPU cache if it's still invalid. */
4503 0 : if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
4504 0 : i915_gem_clflush_object(obj, false);
4505 :
4506 0 : obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
4507 0 : }
4508 :
4509 : /* It should now be out of any other write domains, and we can update
4510 : * the domain values for our changes.
4511 : */
4512 0 : BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
4513 :
4514 : /* If we're writing through the CPU, then the GPU read domains will
4515 : * need to be invalidated at next use.
4516 : */
4517 0 : if (write) {
4518 0 : obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4519 0 : obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4520 0 : }
4521 :
4522 0 : trace_i915_gem_object_change_domain(obj,
4523 : old_read_domains,
4524 : old_write_domain);
4525 :
4526 0 : return 0;
4527 0 : }
4528 :
4529 : /* Throttle our rendering by waiting until the ring has completed our requests
4530 : * emitted over 20 msec ago.
4531 : *
4532 : * Note that if we were to use the current jiffies each time around the loop,
4533 : * we wouldn't escape the function with any frames outstanding if the time to
4534 : * render a frame was over 20ms.
4535 : *
4536 : * This should get us reasonable parallelism between CPU and GPU but also
4537 : * relatively low latency when blocking on a particular request to finish.
4538 : */
4539 : static int
4540 0 : i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
4541 : {
4542 0 : struct drm_i915_private *dev_priv = dev->dev_private;
4543 0 : struct drm_i915_file_private *file_priv = file->driver_priv;
4544 0 : unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
4545 : struct drm_i915_gem_request *request, *target = NULL;
4546 : unsigned reset_counter;
4547 : int ret;
4548 :
4549 0 : ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
4550 0 : if (ret)
4551 0 : return ret;
4552 :
4553 0 : ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
4554 0 : if (ret)
4555 0 : return ret;
4556 :
4557 0 : spin_lock(&file_priv->mm.lock);
4558 0 : list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
4559 0 : if (time_after_eq(request->emitted_jiffies, recent_enough))
4560 : break;
4561 :
4562 : /*
4563 : * Note that the request might not have been submitted yet.
4564 : * In which case emitted_jiffies will be zero.
4565 : */
4566 0 : if (!request->emitted_jiffies)
4567 : continue;
4568 :
4569 : target = request;
4570 0 : }
4571 0 : reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
4572 0 : if (target)
4573 0 : i915_gem_request_reference(target);
4574 0 : spin_unlock(&file_priv->mm.lock);
4575 :
4576 0 : if (target == NULL)
4577 0 : return 0;
4578 :
4579 0 : ret = __i915_wait_request(target, reset_counter, true, NULL, NULL);
4580 0 : if (ret == 0)
4581 0 : queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
4582 :
4583 0 : i915_gem_request_unreference__unlocked(target);
4584 :
4585 0 : return ret;
4586 0 : }
4587 :
4588 : static bool
4589 0 : i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
4590 : {
4591 0 : struct drm_i915_gem_object *obj = vma->obj;
4592 :
4593 0 : if (alignment &&
4594 0 : vma->node.start & (alignment - 1))
4595 0 : return true;
4596 :
4597 0 : if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
4598 0 : return true;
4599 :
4600 0 : if (flags & PIN_OFFSET_BIAS &&
4601 0 : vma->node.start < (flags & PIN_OFFSET_MASK))
4602 0 : return true;
4603 :
4604 0 : return false;
4605 0 : }
4606 :
4607 0 : void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
4608 : {
4609 0 : struct drm_i915_gem_object *obj = vma->obj;
4610 : bool mappable, fenceable;
4611 : u32 fence_size, fence_alignment;
4612 :
4613 0 : fence_size = i915_gem_get_gtt_size(obj->base.dev,
4614 0 : obj->base.size,
4615 0 : obj->tiling_mode);
4616 0 : fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
4617 0 : obj->base.size,
4618 0 : obj->tiling_mode,
4619 : true);
4620 :
4621 0 : fenceable = (vma->node.size == fence_size &&
4622 0 : (vma->node.start & (fence_alignment - 1)) == 0);
4623 :
4624 0 : mappable = (vma->node.start + fence_size <=
4625 0 : to_i915(obj->base.dev)->gtt.mappable_end);
4626 :
4627 0 : obj->map_and_fenceable = mappable && fenceable;
4628 0 : }
4629 :
4630 : static int
4631 0 : i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
4632 : struct i915_address_space *vm,
4633 : const struct i915_ggtt_view *ggtt_view,
4634 : uint32_t alignment,
4635 : uint64_t flags)
4636 : {
4637 0 : struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
4638 : struct i915_vma *vma;
4639 : unsigned bound;
4640 : int ret;
4641 :
4642 0 : if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base))
4643 0 : return -ENODEV;
4644 :
4645 0 : if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
4646 0 : return -EINVAL;
4647 :
4648 0 : if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE))
4649 0 : return -EINVAL;
4650 :
4651 0 : if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
4652 0 : return -EINVAL;
4653 :
4654 0 : vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) :
4655 0 : i915_gem_obj_to_vma(obj, vm);
4656 :
4657 0 : if (IS_ERR(vma))
4658 0 : return PTR_ERR(vma);
4659 :
4660 0 : if (vma) {
4661 0 : if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
4662 0 : return -EBUSY;
4663 :
4664 0 : if (i915_vma_misplaced(vma, alignment, flags)) {
4665 0 : WARN(vma->pin_count,
4666 : "bo is already pinned in %s with incorrect alignment:"
4667 : " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
4668 : " obj->map_and_fenceable=%d\n",
4669 : ggtt_view ? "ggtt" : "ppgtt",
4670 : upper_32_bits(vma->node.start),
4671 : lower_32_bits(vma->node.start),
4672 : alignment,
4673 : !!(flags & PIN_MAPPABLE),
4674 : obj->map_and_fenceable);
4675 0 : ret = i915_vma_unbind(vma);
4676 0 : if (ret)
4677 0 : return ret;
4678 :
4679 : vma = NULL;
4680 0 : }
4681 : }
4682 :
4683 0 : bound = vma ? vma->bound : 0;
4684 0 : if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
4685 0 : vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
4686 : flags);
4687 0 : if (IS_ERR(vma))
4688 0 : return PTR_ERR(vma);
4689 : } else {
4690 0 : ret = i915_vma_bind(vma, obj->cache_level, flags);
4691 0 : if (ret)
4692 0 : return ret;
4693 : }
4694 :
4695 0 : if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
4696 0 : (bound ^ vma->bound) & GLOBAL_BIND) {
4697 0 : __i915_vma_set_map_and_fenceable(vma);
4698 0 : WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
4699 0 : }
4700 :
4701 0 : vma->pin_count++;
4702 0 : return 0;
4703 0 : }
4704 :
4705 : int
4706 0 : i915_gem_object_pin(struct drm_i915_gem_object *obj,
4707 : struct i915_address_space *vm,
4708 : uint32_t alignment,
4709 : uint64_t flags)
4710 : {
4711 0 : return i915_gem_object_do_pin(obj, vm,
4712 0 : i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
4713 : alignment, flags);
4714 : }
4715 :
4716 : int
4717 0 : i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
4718 : const struct i915_ggtt_view *view,
4719 : uint32_t alignment,
4720 : uint64_t flags)
4721 : {
4722 0 : if (WARN_ONCE(!view, "no view specified"))
4723 0 : return -EINVAL;
4724 :
4725 0 : return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view,
4726 0 : alignment, flags | PIN_GLOBAL);
4727 0 : }
4728 :
4729 : void
4730 0 : i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
4731 : const struct i915_ggtt_view *view)
4732 : {
4733 0 : struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
4734 :
4735 0 : BUG_ON(!vma);
4736 0 : WARN_ON(vma->pin_count == 0);
4737 0 : WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view));
4738 :
4739 0 : --vma->pin_count;
4740 0 : }
4741 :
4742 : int
4743 0 : i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4744 : struct drm_file *file)
4745 : {
4746 0 : struct drm_i915_gem_busy *args = data;
4747 : struct drm_i915_gem_object *obj;
4748 : int ret;
4749 :
4750 0 : ret = i915_mutex_lock_interruptible(dev);
4751 0 : if (ret)
4752 0 : return ret;
4753 :
4754 0 : obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4755 0 : if (&obj->base == NULL) {
4756 : ret = -ENOENT;
4757 0 : goto unlock;
4758 : }
4759 :
4760 : /* Count all active objects as busy, even if they are currently not used
4761 : * by the gpu. Users of this interface expect objects to eventually
4762 : * become non-busy without any further actions, therefore emit any
4763 : * necessary flushes here.
4764 : */
4765 0 : ret = i915_gem_object_flush_active(obj);
4766 0 : if (ret)
4767 : goto unref;
4768 :
4769 : BUILD_BUG_ON(I915_NUM_RINGS > 16);
4770 0 : args->busy = obj->active << 16;
4771 0 : if (obj->last_write_req)
4772 0 : args->busy |= obj->last_write_req->ring->id;
4773 :
4774 : unref:
4775 0 : drm_gem_object_unreference(&obj->base);
4776 : unlock:
4777 0 : mutex_unlock(&dev->struct_mutex);
4778 0 : return ret;
4779 0 : }
4780 :
4781 : int
4782 0 : i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4783 : struct drm_file *file_priv)
4784 : {
4785 0 : return i915_gem_ring_throttle(dev, file_priv);
4786 : }
4787 :
4788 : int
4789 0 : i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4790 : struct drm_file *file_priv)
4791 : {
4792 0 : struct drm_i915_private *dev_priv = dev->dev_private;
4793 0 : struct drm_i915_gem_madvise *args = data;
4794 : struct drm_i915_gem_object *obj;
4795 : int ret;
4796 :
4797 0 : switch (args->madv) {
4798 : case I915_MADV_DONTNEED:
4799 : case I915_MADV_WILLNEED:
4800 : break;
4801 : default:
4802 0 : return -EINVAL;
4803 : }
4804 :
4805 0 : ret = i915_mutex_lock_interruptible(dev);
4806 0 : if (ret)
4807 0 : return ret;
4808 :
4809 0 : obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
4810 0 : if (&obj->base == NULL) {
4811 : ret = -ENOENT;
4812 0 : goto unlock;
4813 : }
4814 :
4815 0 : if (i915_gem_obj_is_pinned(obj)) {
4816 : ret = -EINVAL;
4817 0 : goto out;
4818 : }
4819 :
4820 0 : if (obj->pages &&
4821 0 : obj->tiling_mode != I915_TILING_NONE &&
4822 0 : dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
4823 0 : if (obj->madv == I915_MADV_WILLNEED)
4824 0 : i915_gem_object_unpin_pages(obj);
4825 0 : if (args->madv == I915_MADV_WILLNEED)
4826 0 : i915_gem_object_pin_pages(obj);
4827 : }
4828 :
4829 0 : if (obj->madv != __I915_MADV_PURGED)
4830 0 : obj->madv = args->madv;
4831 :
4832 : /* if the object is no longer attached, discard its backing storage */
4833 0 : if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL)
4834 0 : i915_gem_object_truncate(obj);
4835 :
4836 0 : args->retained = obj->madv != __I915_MADV_PURGED;
4837 :
4838 : out:
4839 0 : drm_gem_object_unreference(&obj->base);
4840 : unlock:
4841 0 : mutex_unlock(&dev->struct_mutex);
4842 0 : return ret;
4843 0 : }
4844 :
4845 0 : void i915_gem_object_init(struct drm_i915_gem_object *obj,
4846 : const struct drm_i915_gem_object_ops *ops)
4847 : {
4848 : int i;
4849 :
4850 0 : INIT_LIST_HEAD(&obj->global_list);
4851 0 : for (i = 0; i < I915_NUM_RINGS; i++)
4852 0 : INIT_LIST_HEAD(&obj->ring_list[i]);
4853 0 : INIT_LIST_HEAD(&obj->obj_exec_link);
4854 0 : INIT_LIST_HEAD(&obj->vma_list);
4855 0 : INIT_LIST_HEAD(&obj->batch_pool_link);
4856 :
4857 0 : obj->ops = ops;
4858 :
4859 0 : obj->fence_reg = I915_FENCE_REG_NONE;
4860 0 : obj->madv = I915_MADV_WILLNEED;
4861 :
4862 0 : i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
4863 0 : }
4864 :
4865 : static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
4866 : .get_pages = i915_gem_object_get_pages_gtt,
4867 : .put_pages = i915_gem_object_put_pages_gtt,
4868 : };
4869 :
4870 0 : struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
4871 : size_t size)
4872 : {
4873 : struct drm_i915_gem_object *obj;
4874 : #ifdef __linux__
4875 : struct address_space *mapping;
4876 : gfp_t mask;
4877 : #endif
4878 :
4879 0 : obj = i915_gem_object_alloc(dev);
4880 0 : if (obj == NULL)
4881 0 : return NULL;
4882 :
4883 0 : if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4884 0 : i915_gem_object_free(obj);
4885 0 : return NULL;
4886 : }
4887 :
4888 : #ifdef __linux__
4889 : mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4890 : if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
4891 : /* 965gm cannot relocate objects above 4GiB. */
4892 : mask &= ~__GFP_HIGHMEM;
4893 : mask |= __GFP_DMA32;
4894 : }
4895 :
4896 : mapping = file_inode(obj->base.filp)->i_mapping;
4897 : mapping_set_gfp_mask(mapping, mask);
4898 : #endif
4899 :
4900 0 : i915_gem_object_init(obj, &i915_gem_object_ops);
4901 :
4902 0 : obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4903 0 : obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4904 :
4905 0 : if (HAS_LLC(dev)) {
4906 : /* On some devices, we can have the GPU use the LLC (the CPU
4907 : * cache) for about a 10% performance improvement
4908 : * compared to uncached. Graphics requests other than
4909 : * display scanout are coherent with the CPU in
4910 : * accessing this cache. This means in this mode we
4911 : * don't need to clflush on the CPU side, and on the
4912 : * GPU side we only need to flush internal caches to
4913 : * get data visible to the CPU.
4914 : *
4915 : * However, we maintain the display planes as UC, and so
4916 : * need to rebind when first used as such.
4917 : */
4918 0 : obj->cache_level = I915_CACHE_LLC;
4919 0 : } else
4920 0 : obj->cache_level = I915_CACHE_NONE;
4921 :
4922 0 : trace_i915_gem_object_create(obj);
4923 :
4924 0 : return obj;
4925 0 : }
4926 :
4927 : #ifdef __linux__
4928 : static bool discard_backing_storage(struct drm_i915_gem_object *obj)
4929 : {
4930 : /* If we are the last user of the backing storage (be it shmemfs
4931 : * pages or stolen etc), we know that the pages are going to be
4932 : * immediately released. In this case, we can then skip copying
4933 : * back the contents from the GPU.
4934 : */
4935 :
4936 : if (obj->madv != I915_MADV_WILLNEED)
4937 : return false;
4938 :
4939 : if (obj->base.filp == NULL)
4940 : return true;
4941 :
4942 : /* At first glance, this looks racy, but then again so would be
4943 : * userspace racing mmap against close. However, the first external
4944 : * reference to the filp can only be obtained through the
4945 : * i915_gem_mmap_ioctl() which safeguards us against the user
4946 : * acquiring such a reference whilst we are in the middle of
4947 : * freeing the object.
4948 : */
4949 : return atomic_long_read(&obj->base.filp->f_count) == 1;
4950 : }
4951 : #endif
4952 :
4953 0 : void i915_gem_free_object(struct drm_gem_object *gem_obj)
4954 : {
4955 0 : struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4956 0 : struct drm_device *dev = obj->base.dev;
4957 0 : struct drm_i915_private *dev_priv = dev->dev_private;
4958 : struct i915_vma *vma, *next;
4959 :
4960 0 : intel_runtime_pm_get(dev_priv);
4961 :
4962 0 : trace_i915_gem_object_destroy(obj);
4963 :
4964 0 : list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
4965 : int ret;
4966 :
4967 0 : vma->pin_count = 0;
4968 0 : ret = i915_vma_unbind(vma);
4969 0 : if (WARN_ON(ret == -ERESTARTSYS)) {
4970 : bool was_interruptible;
4971 :
4972 0 : was_interruptible = dev_priv->mm.interruptible;
4973 0 : dev_priv->mm.interruptible = false;
4974 :
4975 0 : WARN_ON(i915_vma_unbind(vma));
4976 :
4977 0 : dev_priv->mm.interruptible = was_interruptible;
4978 0 : }
4979 : }
4980 :
4981 : /* Stolen objects don't hold a ref, but do hold pin count. Fix that up
4982 : * before progressing. */
4983 0 : if (obj->stolen)
4984 0 : i915_gem_object_unpin_pages(obj);
4985 :
4986 0 : WARN_ON(obj->frontbuffer_bits);
4987 :
4988 0 : if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
4989 0 : dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
4990 0 : obj->tiling_mode != I915_TILING_NONE)
4991 0 : i915_gem_object_unpin_pages(obj);
4992 :
4993 0 : if (WARN_ON(obj->pages_pin_count))
4994 0 : obj->pages_pin_count = 0;
4995 : #ifdef notyet
4996 : if (discard_backing_storage(obj))
4997 : obj->madv = I915_MADV_DONTNEED;
4998 : #endif
4999 0 : i915_gem_object_put_pages(obj);
5000 0 : i915_gem_object_free_mmap_offset(obj);
5001 :
5002 0 : BUG_ON(obj->pages);
5003 :
5004 : #ifdef notyet
5005 : if (obj->base.import_attach)
5006 : drm_prime_gem_destroy(&obj->base, NULL);
5007 : #endif
5008 :
5009 0 : if (obj->ops->release)
5010 0 : obj->ops->release(obj);
5011 :
5012 0 : drm_gem_object_release(&obj->base);
5013 0 : i915_gem_info_remove_obj(dev_priv, obj->base.size);
5014 :
5015 0 : kfree(obj->bit_17);
5016 0 : i915_gem_object_free(obj);
5017 :
5018 0 : intel_runtime_pm_put(dev_priv);
5019 0 : }
5020 :
5021 0 : struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
5022 : struct i915_address_space *vm)
5023 : {
5024 : struct i915_vma *vma;
5025 0 : list_for_each_entry(vma, &obj->vma_list, vma_link) {
5026 0 : if (i915_is_ggtt(vma->vm) &&
5027 0 : vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
5028 : continue;
5029 0 : if (vma->vm == vm)
5030 0 : return vma;
5031 : }
5032 0 : return NULL;
5033 0 : }
5034 :
5035 0 : struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
5036 : const struct i915_ggtt_view *view)
5037 : {
5038 0 : struct i915_address_space *ggtt = i915_obj_to_ggtt(obj);
5039 : struct i915_vma *vma;
5040 :
5041 0 : if (WARN_ONCE(!view, "no view specified"))
5042 0 : return ERR_PTR(-EINVAL);
5043 :
5044 0 : list_for_each_entry(vma, &obj->vma_list, vma_link)
5045 0 : if (vma->vm == ggtt &&
5046 0 : i915_ggtt_view_equal(&vma->ggtt_view, view))
5047 0 : return vma;
5048 0 : return NULL;
5049 0 : }
5050 :
5051 0 : void i915_gem_vma_destroy(struct i915_vma *vma)
5052 : {
5053 : struct i915_address_space *vm = NULL;
5054 0 : WARN_ON(vma->node.allocated);
5055 :
5056 : /* Keep the vma as a placeholder in the execbuffer reservation lists */
5057 0 : if (!list_empty(&vma->exec_list))
5058 0 : return;
5059 :
5060 0 : vm = vma->vm;
5061 :
5062 0 : if (!i915_is_ggtt(vm))
5063 0 : i915_ppgtt_put(i915_vm_to_ppgtt(vm));
5064 :
5065 0 : list_del(&vma->vma_link);
5066 :
5067 : #ifdef __linux__
5068 : kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
5069 : #else
5070 0 : pool_put(&(to_i915(vma->obj->base.dev)->vmas), vma);
5071 : #endif
5072 0 : }
5073 :
5074 : static void
5075 0 : i915_gem_stop_ringbuffers(struct drm_device *dev)
5076 : {
5077 0 : struct drm_i915_private *dev_priv = dev->dev_private;
5078 : struct intel_engine_cs *ring;
5079 : int i;
5080 :
5081 0 : for_each_ring(ring, dev_priv, i)
5082 0 : dev_priv->gt.stop_ring(ring);
5083 0 : }
5084 :
5085 : int
5086 0 : i915_gem_suspend(struct drm_device *dev)
5087 : {
5088 0 : struct drm_i915_private *dev_priv = dev->dev_private;
5089 : int ret = 0;
5090 :
5091 0 : mutex_lock(&dev->struct_mutex);
5092 0 : ret = i915_gpu_idle(dev);
5093 0 : if (ret)
5094 : goto err;
5095 :
5096 0 : i915_gem_retire_requests(dev);
5097 :
5098 0 : i915_gem_stop_ringbuffers(dev);
5099 0 : mutex_unlock(&dev->struct_mutex);
5100 :
5101 0 : cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
5102 0 : cancel_delayed_work_sync(&dev_priv->mm.retire_work);
5103 0 : flush_delayed_work(&dev_priv->mm.idle_work);
5104 :
5105 : /* Assert that we sucessfully flushed all the work and
5106 : * reset the GPU back to its idle, low power state.
5107 : */
5108 0 : WARN_ON(dev_priv->mm.busy);
5109 :
5110 0 : return 0;
5111 :
5112 : err:
5113 0 : mutex_unlock(&dev->struct_mutex);
5114 0 : return ret;
5115 0 : }
5116 :
5117 0 : int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
5118 : {
5119 0 : struct intel_engine_cs *ring = req->ring;
5120 0 : struct drm_device *dev = ring->dev;
5121 0 : struct drm_i915_private *dev_priv = dev->dev_private;
5122 0 : u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
5123 0 : u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
5124 : int i, ret;
5125 :
5126 0 : if (!HAS_L3_DPF(dev) || !remap_info)
5127 0 : return 0;
5128 :
5129 0 : ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3);
5130 0 : if (ret)
5131 0 : return ret;
5132 :
5133 : /*
5134 : * Note: We do not worry about the concurrent register cacheline hang
5135 : * here because no other code should access these registers other than
5136 : * at initialization time.
5137 : */
5138 0 : for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
5139 0 : intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
5140 0 : intel_ring_emit(ring, reg_base + i);
5141 0 : intel_ring_emit(ring, remap_info[i/4]);
5142 : }
5143 :
5144 0 : intel_ring_advance(ring);
5145 :
5146 0 : return ret;
5147 0 : }
5148 :
5149 0 : void i915_gem_init_swizzling(struct drm_device *dev)
5150 : {
5151 0 : struct drm_i915_private *dev_priv = dev->dev_private;
5152 :
5153 0 : if (INTEL_INFO(dev)->gen < 5 ||
5154 0 : dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
5155 0 : return;
5156 :
5157 0 : I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
5158 : DISP_TILE_SURFACE_SWIZZLING);
5159 :
5160 0 : if (IS_GEN5(dev))
5161 0 : return;
5162 :
5163 0 : I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
5164 0 : if (IS_GEN6(dev))
5165 0 : I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
5166 0 : else if (IS_GEN7(dev))
5167 0 : I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
5168 0 : else if (IS_GEN8(dev))
5169 0 : I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
5170 : else
5171 0 : BUG();
5172 0 : }
5173 :
5174 0 : static void init_unused_ring(struct drm_device *dev, u32 base)
5175 : {
5176 0 : struct drm_i915_private *dev_priv = dev->dev_private;
5177 :
5178 0 : I915_WRITE(RING_CTL(base), 0);
5179 0 : I915_WRITE(RING_HEAD(base), 0);
5180 0 : I915_WRITE(RING_TAIL(base), 0);
5181 0 : I915_WRITE(RING_START(base), 0);
5182 0 : }
5183 :
5184 0 : static void init_unused_rings(struct drm_device *dev)
5185 : {
5186 0 : if (IS_I830(dev)) {
5187 0 : init_unused_ring(dev, PRB1_BASE);
5188 0 : init_unused_ring(dev, SRB0_BASE);
5189 0 : init_unused_ring(dev, SRB1_BASE);
5190 0 : init_unused_ring(dev, SRB2_BASE);
5191 0 : init_unused_ring(dev, SRB3_BASE);
5192 0 : } else if (IS_GEN2(dev)) {
5193 0 : init_unused_ring(dev, SRB0_BASE);
5194 0 : init_unused_ring(dev, SRB1_BASE);
5195 0 : } else if (IS_GEN3(dev)) {
5196 0 : init_unused_ring(dev, PRB1_BASE);
5197 0 : init_unused_ring(dev, PRB2_BASE);
5198 0 : }
5199 0 : }
5200 :
5201 0 : int i915_gem_init_rings(struct drm_device *dev)
5202 : {
5203 0 : struct drm_i915_private *dev_priv = dev->dev_private;
5204 : int ret;
5205 :
5206 0 : ret = intel_init_render_ring_buffer(dev);
5207 0 : if (ret)
5208 0 : return ret;
5209 :
5210 0 : if (HAS_BSD(dev)) {
5211 0 : ret = intel_init_bsd_ring_buffer(dev);
5212 0 : if (ret)
5213 : goto cleanup_render_ring;
5214 : }
5215 :
5216 0 : if (HAS_BLT(dev)) {
5217 0 : ret = intel_init_blt_ring_buffer(dev);
5218 0 : if (ret)
5219 : goto cleanup_bsd_ring;
5220 : }
5221 :
5222 0 : if (HAS_VEBOX(dev)) {
5223 0 : ret = intel_init_vebox_ring_buffer(dev);
5224 0 : if (ret)
5225 : goto cleanup_blt_ring;
5226 : }
5227 :
5228 0 : if (HAS_BSD2(dev)) {
5229 0 : ret = intel_init_bsd2_ring_buffer(dev);
5230 0 : if (ret)
5231 : goto cleanup_vebox_ring;
5232 : }
5233 :
5234 0 : return 0;
5235 :
5236 : cleanup_vebox_ring:
5237 0 : intel_cleanup_ring_buffer(&dev_priv->ring[VECS]);
5238 : cleanup_blt_ring:
5239 0 : intel_cleanup_ring_buffer(&dev_priv->ring[BCS]);
5240 : cleanup_bsd_ring:
5241 0 : intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
5242 : cleanup_render_ring:
5243 0 : intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
5244 :
5245 0 : return ret;
5246 0 : }
5247 :
5248 : int
5249 0 : i915_gem_init_hw(struct drm_device *dev)
5250 : {
5251 0 : struct drm_i915_private *dev_priv = dev->dev_private;
5252 : struct intel_engine_cs *ring;
5253 : int ret, i, j;
5254 :
5255 0 : if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
5256 0 : return -EIO;
5257 :
5258 : /* Double layer security blanket, see i915_gem_init() */
5259 0 : intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5260 :
5261 0 : if (dev_priv->ellc_size)
5262 0 : I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
5263 :
5264 0 : if (IS_HASWELL(dev))
5265 0 : I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ?
5266 : LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
5267 :
5268 0 : if (HAS_PCH_NOP(dev)) {
5269 0 : if (IS_IVYBRIDGE(dev)) {
5270 0 : u32 temp = I915_READ(GEN7_MSG_CTL);
5271 0 : temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
5272 0 : I915_WRITE(GEN7_MSG_CTL, temp);
5273 0 : } else if (INTEL_INFO(dev)->gen >= 7) {
5274 0 : u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
5275 0 : temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
5276 0 : I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
5277 0 : }
5278 : }
5279 :
5280 0 : i915_gem_init_swizzling(dev);
5281 :
5282 : /*
5283 : * At least 830 can leave some of the unused rings
5284 : * "active" (ie. head != tail) after resume which
5285 : * will prevent c3 entry. Makes sure all unused rings
5286 : * are totally idle.
5287 : */
5288 0 : init_unused_rings(dev);
5289 :
5290 0 : BUG_ON(!dev_priv->ring[RCS].default_context);
5291 :
5292 0 : ret = i915_ppgtt_init_hw(dev);
5293 0 : if (ret) {
5294 0 : DRM_ERROR("PPGTT enable HW failed %d\n", ret);
5295 0 : goto out;
5296 : }
5297 :
5298 : /* Need to do basic initialisation of all rings first: */
5299 0 : for_each_ring(ring, dev_priv, i) {
5300 0 : ret = ring->init_hw(ring);
5301 0 : if (ret)
5302 : goto out;
5303 : }
5304 :
5305 : /* We can't enable contexts until all firmware is loaded */
5306 0 : if (HAS_GUC_UCODE(dev)) {
5307 0 : ret = intel_guc_ucode_load(dev);
5308 0 : if (ret) {
5309 : /*
5310 : * If we got an error and GuC submission is enabled, map
5311 : * the error to -EIO so the GPU will be declared wedged.
5312 : * OTOH, if we didn't intend to use the GuC anyway, just
5313 : * discard the error and carry on.
5314 : */
5315 0 : DRM_ERROR("Failed to initialize GuC, error %d%s\n", ret,
5316 : i915.enable_guc_submission ? "" :
5317 : " (ignored)");
5318 0 : ret = i915.enable_guc_submission ? -EIO : 0;
5319 0 : if (ret)
5320 : goto out;
5321 : }
5322 : }
5323 :
5324 : /*
5325 : * Increment the next seqno by 0x100 so we have a visible break
5326 : * on re-initialisation
5327 : */
5328 0 : ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100);
5329 0 : if (ret)
5330 : goto out;
5331 :
5332 : /* Now it is safe to go back round and do everything else: */
5333 0 : for_each_ring(ring, dev_priv, i) {
5334 0 : struct drm_i915_gem_request *req;
5335 :
5336 0 : WARN_ON(!ring->default_context);
5337 :
5338 0 : ret = i915_gem_request_alloc(ring, ring->default_context, &req);
5339 0 : if (ret) {
5340 0 : i915_gem_cleanup_ringbuffer(dev);
5341 0 : goto out;
5342 : }
5343 :
5344 0 : if (ring->id == RCS) {
5345 0 : for (j = 0; j < NUM_L3_SLICES(dev); j++)
5346 0 : i915_gem_l3_remap(req, j);
5347 : }
5348 :
5349 0 : ret = i915_ppgtt_init_ring(req);
5350 0 : if (ret && ret != -EIO) {
5351 0 : DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret);
5352 0 : i915_gem_request_cancel(req);
5353 0 : i915_gem_cleanup_ringbuffer(dev);
5354 0 : goto out;
5355 : }
5356 :
5357 0 : ret = i915_gem_context_enable(req);
5358 0 : if (ret && ret != -EIO) {
5359 0 : DRM_ERROR("Context enable ring #%d failed %d\n", i, ret);
5360 0 : i915_gem_request_cancel(req);
5361 0 : i915_gem_cleanup_ringbuffer(dev);
5362 0 : goto out;
5363 : }
5364 :
5365 0 : i915_add_request_no_flush(req);
5366 0 : }
5367 :
5368 : out:
5369 0 : intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5370 0 : return ret;
5371 0 : }
5372 :
5373 0 : int i915_gem_init(struct drm_device *dev)
5374 : {
5375 0 : struct drm_i915_private *dev_priv = dev->dev_private;
5376 : int ret;
5377 :
5378 0 : i915.enable_execlists = intel_sanitize_enable_execlists(dev,
5379 0 : i915.enable_execlists);
5380 :
5381 0 : mutex_lock(&dev->struct_mutex);
5382 :
5383 0 : if (IS_VALLEYVIEW(dev)) {
5384 : /* VLVA0 (potential hack), BIOS isn't actually waking us */
5385 0 : I915_WRITE(VLV_GTLC_WAKE_CTRL, VLV_GTLC_ALLOWWAKEREQ);
5386 0 : if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) &
5387 : VLV_GTLC_ALLOWWAKEACK), 10))
5388 : DRM_DEBUG_DRIVER("allow wake ack timed out\n");
5389 0 : }
5390 :
5391 0 : if (!i915.enable_execlists) {
5392 0 : dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission;
5393 0 : dev_priv->gt.init_rings = i915_gem_init_rings;
5394 0 : dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer;
5395 0 : dev_priv->gt.stop_ring = intel_stop_ring_buffer;
5396 0 : } else {
5397 0 : dev_priv->gt.execbuf_submit = intel_execlists_submission;
5398 0 : dev_priv->gt.init_rings = intel_logical_rings_init;
5399 0 : dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup;
5400 0 : dev_priv->gt.stop_ring = intel_logical_ring_stop;
5401 : }
5402 :
5403 : /* This is just a security blanket to placate dragons.
5404 : * On some systems, we very sporadically observe that the first TLBs
5405 : * used by the CS may be stale, despite us poking the TLB reset. If
5406 : * we hold the forcewake during initialisation these problems
5407 : * just magically go away.
5408 : */
5409 0 : intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5410 :
5411 0 : ret = i915_gem_init_userptr(dev);
5412 0 : if (ret)
5413 : goto out_unlock;
5414 :
5415 0 : i915_gem_init_global_gtt(dev);
5416 :
5417 0 : ret = i915_gem_context_init(dev);
5418 0 : if (ret)
5419 : goto out_unlock;
5420 :
5421 0 : ret = dev_priv->gt.init_rings(dev);
5422 0 : if (ret)
5423 : goto out_unlock;
5424 :
5425 0 : ret = i915_gem_init_hw(dev);
5426 0 : if (ret == -EIO) {
5427 : /* Allow ring initialisation to fail by marking the GPU as
5428 : * wedged. But we only want to do this where the GPU is angry,
5429 : * for all other failure, such as an allocation failure, bail.
5430 : */
5431 0 : DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
5432 0 : atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
5433 : ret = 0;
5434 0 : }
5435 :
5436 : out_unlock:
5437 0 : intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5438 0 : mutex_unlock(&dev->struct_mutex);
5439 :
5440 0 : return ret;
5441 : }
5442 :
5443 : void
5444 0 : i915_gem_cleanup_ringbuffer(struct drm_device *dev)
5445 : {
5446 0 : struct drm_i915_private *dev_priv = dev->dev_private;
5447 : struct intel_engine_cs *ring;
5448 : int i;
5449 :
5450 0 : for_each_ring(ring, dev_priv, i)
5451 0 : dev_priv->gt.cleanup_ring(ring);
5452 :
5453 0 : if (i915.enable_execlists)
5454 : /*
5455 : * Neither the BIOS, ourselves or any other kernel
5456 : * expects the system to be in execlists mode on startup,
5457 : * so we need to reset the GPU back to legacy mode.
5458 : */
5459 0 : intel_gpu_reset(dev);
5460 0 : }
5461 :
5462 : static void
5463 0 : init_ring_lists(struct intel_engine_cs *ring)
5464 : {
5465 0 : INIT_LIST_HEAD(&ring->active_list);
5466 0 : INIT_LIST_HEAD(&ring->request_list);
5467 0 : }
5468 :
5469 : void
5470 0 : i915_gem_load(struct drm_device *dev)
5471 : {
5472 0 : struct drm_i915_private *dev_priv = dev->dev_private;
5473 : int i;
5474 :
5475 : #ifdef __linux__
5476 : dev_priv->objects =
5477 : kmem_cache_create("i915_gem_object",
5478 : sizeof(struct drm_i915_gem_object), 0,
5479 : SLAB_HWCACHE_ALIGN,
5480 : NULL);
5481 : dev_priv->vmas =
5482 : kmem_cache_create("i915_gem_vma",
5483 : sizeof(struct i915_vma), 0,
5484 : SLAB_HWCACHE_ALIGN,
5485 : NULL);
5486 : dev_priv->requests =
5487 : kmem_cache_create("i915_gem_request",
5488 : sizeof(struct drm_i915_gem_request), 0,
5489 : SLAB_HWCACHE_ALIGN,
5490 : NULL);
5491 : #else
5492 0 : pool_init(&dev_priv->objects, sizeof(struct drm_i915_gem_object),
5493 : 0, IPL_NONE, 0, "drmobj", NULL);
5494 0 : pool_init(&dev_priv->vmas, sizeof(struct i915_vma),
5495 : 0, IPL_NONE, 0, "drmvma", NULL);
5496 0 : pool_init(&dev_priv->requests, sizeof(struct drm_i915_gem_request),
5497 : 0, IPL_NONE, 0, "drmreq", NULL);
5498 : #endif
5499 :
5500 0 : INIT_LIST_HEAD(&dev_priv->vm_list);
5501 0 : INIT_LIST_HEAD(&dev_priv->context_list);
5502 0 : INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
5503 0 : INIT_LIST_HEAD(&dev_priv->mm.bound_list);
5504 0 : INIT_LIST_HEAD(&dev_priv->mm.fence_list);
5505 0 : for (i = 0; i < I915_NUM_RINGS; i++)
5506 0 : init_ring_lists(&dev_priv->ring[i]);
5507 0 : for (i = 0; i < I915_MAX_NUM_FENCES; i++)
5508 0 : INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
5509 0 : INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
5510 : i915_gem_retire_work_handler);
5511 0 : INIT_DELAYED_WORK(&dev_priv->mm.idle_work,
5512 : i915_gem_idle_work_handler);
5513 0 : init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
5514 :
5515 0 : dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
5516 :
5517 0 : if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev))
5518 0 : dev_priv->num_fence_regs = 32;
5519 0 : else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
5520 0 : dev_priv->num_fence_regs = 16;
5521 : else
5522 0 : dev_priv->num_fence_regs = 8;
5523 :
5524 0 : if (intel_vgpu_active(dev))
5525 0 : dev_priv->num_fence_regs =
5526 0 : I915_READ(vgtif_reg(avail_rs.fence_num));
5527 :
5528 : /*
5529 : * Set initial sequence number for requests.
5530 : * Using this number allows the wraparound to happen early,
5531 : * catching any obvious problems.
5532 : */
5533 0 : dev_priv->next_seqno = ((u32)~0 - 0x1100);
5534 0 : dev_priv->last_seqno = ((u32)~0 - 0x1101);
5535 :
5536 : /* Initialize fence registers to zero */
5537 0 : INIT_LIST_HEAD(&dev_priv->mm.fence_list);
5538 0 : i915_gem_restore_fences(dev);
5539 :
5540 0 : i915_gem_detect_bit_6_swizzle(dev);
5541 0 : init_waitqueue_head(&dev_priv->pending_flip_queue);
5542 :
5543 0 : dev_priv->mm.interruptible = true;
5544 :
5545 : #ifdef notyet
5546 : i915_gem_shrinker_init(dev_priv);
5547 : #endif
5548 :
5549 0 : rw_init(&dev_priv->fb_tracking.lock, "fbtrlk");
5550 0 : }
5551 :
5552 0 : void i915_gem_release(struct drm_device *dev, struct drm_file *file)
5553 : {
5554 0 : struct drm_i915_file_private *file_priv = file->driver_priv;
5555 :
5556 : /* Clean up our request list when the client is going away, so that
5557 : * later retire_requests won't dereference our soon-to-be-gone
5558 : * file_priv.
5559 : */
5560 0 : spin_lock(&file_priv->mm.lock);
5561 0 : while (!list_empty(&file_priv->mm.request_list)) {
5562 : struct drm_i915_gem_request *request;
5563 :
5564 0 : request = list_first_entry(&file_priv->mm.request_list,
5565 : struct drm_i915_gem_request,
5566 : client_list);
5567 0 : list_del(&request->client_list);
5568 0 : request->file_priv = NULL;
5569 : }
5570 0 : spin_unlock(&file_priv->mm.lock);
5571 :
5572 0 : if (!list_empty(&file_priv->rps.link)) {
5573 0 : spin_lock(&to_i915(dev)->rps.client_lock);
5574 0 : list_del(&file_priv->rps.link);
5575 0 : spin_unlock(&to_i915(dev)->rps.client_lock);
5576 0 : }
5577 0 : }
5578 :
5579 0 : int i915_gem_open(struct drm_device *dev, struct drm_file *file)
5580 : {
5581 : struct drm_i915_file_private *file_priv;
5582 : int ret;
5583 :
5584 : DRM_DEBUG_DRIVER("\n");
5585 :
5586 0 : file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
5587 0 : if (!file_priv)
5588 0 : return -ENOMEM;
5589 :
5590 0 : file->driver_priv = file_priv;
5591 0 : file_priv->dev_priv = dev->dev_private;
5592 0 : file_priv->file = file;
5593 0 : INIT_LIST_HEAD(&file_priv->rps.link);
5594 :
5595 0 : mtx_init(&file_priv->mm.lock, IPL_NONE);
5596 0 : INIT_LIST_HEAD(&file_priv->mm.request_list);
5597 :
5598 0 : ret = i915_gem_context_open(dev, file);
5599 0 : if (ret)
5600 0 : kfree(file_priv);
5601 :
5602 0 : return ret;
5603 0 : }
5604 :
5605 : /**
5606 : * i915_gem_track_fb - update frontbuffer tracking
5607 : * @old: current GEM buffer for the frontbuffer slots
5608 : * @new: new GEM buffer for the frontbuffer slots
5609 : * @frontbuffer_bits: bitmask of frontbuffer slots
5610 : *
5611 : * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
5612 : * from @old and setting them in @new. Both @old and @new can be NULL.
5613 : */
5614 0 : void i915_gem_track_fb(struct drm_i915_gem_object *old,
5615 : struct drm_i915_gem_object *new,
5616 : unsigned frontbuffer_bits)
5617 : {
5618 0 : if (old) {
5619 0 : WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex));
5620 0 : WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits));
5621 0 : old->frontbuffer_bits &= ~frontbuffer_bits;
5622 0 : }
5623 :
5624 0 : if (new) {
5625 0 : WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex));
5626 0 : WARN_ON(new->frontbuffer_bits & frontbuffer_bits);
5627 0 : new->frontbuffer_bits |= frontbuffer_bits;
5628 0 : }
5629 0 : }
5630 :
5631 : /* All the new VM stuff */
5632 0 : u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
5633 : struct i915_address_space *vm)
5634 : {
5635 0 : struct drm_i915_private *dev_priv = o->base.dev->dev_private;
5636 : struct i915_vma *vma;
5637 :
5638 0 : WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
5639 :
5640 0 : list_for_each_entry(vma, &o->vma_list, vma_link) {
5641 0 : if (i915_is_ggtt(vma->vm) &&
5642 0 : vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
5643 : continue;
5644 0 : if (vma->vm == vm)
5645 0 : return vma->node.start;
5646 : }
5647 :
5648 0 : WARN(1, "%s vma for this object not found.\n",
5649 : i915_is_ggtt(vm) ? "global" : "ppgtt");
5650 0 : return -1;
5651 0 : }
5652 :
5653 0 : u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
5654 : const struct i915_ggtt_view *view)
5655 : {
5656 0 : struct i915_address_space *ggtt = i915_obj_to_ggtt(o);
5657 : struct i915_vma *vma;
5658 :
5659 0 : list_for_each_entry(vma, &o->vma_list, vma_link)
5660 0 : if (vma->vm == ggtt &&
5661 0 : i915_ggtt_view_equal(&vma->ggtt_view, view))
5662 0 : return vma->node.start;
5663 :
5664 0 : WARN(1, "global vma for this object not found. (view=%u)\n", view->type);
5665 0 : return -1;
5666 0 : }
5667 :
5668 0 : bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
5669 : struct i915_address_space *vm)
5670 : {
5671 : struct i915_vma *vma;
5672 :
5673 0 : list_for_each_entry(vma, &o->vma_list, vma_link) {
5674 0 : if (i915_is_ggtt(vma->vm) &&
5675 0 : vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
5676 : continue;
5677 0 : if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
5678 0 : return true;
5679 : }
5680 :
5681 0 : return false;
5682 0 : }
5683 :
5684 0 : bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
5685 : const struct i915_ggtt_view *view)
5686 : {
5687 0 : struct i915_address_space *ggtt = i915_obj_to_ggtt(o);
5688 : struct i915_vma *vma;
5689 :
5690 0 : list_for_each_entry(vma, &o->vma_list, vma_link)
5691 0 : if (vma->vm == ggtt &&
5692 0 : i915_ggtt_view_equal(&vma->ggtt_view, view) &&
5693 0 : drm_mm_node_allocated(&vma->node))
5694 0 : return true;
5695 :
5696 0 : return false;
5697 0 : }
5698 :
5699 0 : bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
5700 : {
5701 : struct i915_vma *vma;
5702 :
5703 0 : list_for_each_entry(vma, &o->vma_list, vma_link)
5704 0 : if (drm_mm_node_allocated(&vma->node))
5705 0 : return true;
5706 :
5707 0 : return false;
5708 0 : }
5709 :
5710 0 : unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
5711 : struct i915_address_space *vm)
5712 : {
5713 0 : struct drm_i915_private *dev_priv = o->base.dev->dev_private;
5714 : struct i915_vma *vma;
5715 :
5716 0 : WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
5717 :
5718 0 : BUG_ON(list_empty(&o->vma_list));
5719 :
5720 0 : list_for_each_entry(vma, &o->vma_list, vma_link) {
5721 0 : if (i915_is_ggtt(vma->vm) &&
5722 0 : vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
5723 : continue;
5724 0 : if (vma->vm == vm)
5725 0 : return vma->node.size;
5726 : }
5727 0 : return 0;
5728 0 : }
5729 :
5730 0 : bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
5731 : {
5732 : struct i915_vma *vma;
5733 0 : list_for_each_entry(vma, &obj->vma_list, vma_link)
5734 0 : if (vma->pin_count > 0)
5735 0 : return true;
5736 :
5737 0 : return false;
5738 0 : }
5739 :
5740 : /* Allocate a new GEM object and fill it with the supplied data */
5741 : struct drm_i915_gem_object *
5742 0 : i915_gem_object_create_from_data(struct drm_device *dev,
5743 : const void *data, size_t size)
5744 : {
5745 : struct drm_i915_gem_object *obj;
5746 : struct sg_table *sg;
5747 : size_t bytes;
5748 : int ret;
5749 :
5750 0 : obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE));
5751 0 : if (IS_ERR_OR_NULL(obj))
5752 0 : return obj;
5753 :
5754 0 : ret = i915_gem_object_set_to_cpu_domain(obj, true);
5755 0 : if (ret)
5756 : goto fail;
5757 :
5758 0 : ret = i915_gem_object_get_pages(obj);
5759 0 : if (ret)
5760 : goto fail;
5761 :
5762 0 : i915_gem_object_pin_pages(obj);
5763 0 : sg = obj->pages;
5764 0 : bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
5765 0 : i915_gem_object_unpin_pages(obj);
5766 :
5767 0 : if (WARN_ON(bytes != size)) {
5768 0 : DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
5769 : ret = -EFAULT;
5770 0 : goto fail;
5771 : }
5772 :
5773 0 : return obj;
5774 :
5775 : fail:
5776 0 : drm_gem_object_unreference(&obj->base);
5777 0 : return ERR_PTR(ret);
5778 0 : }
|