Line data Source code
1 : /*
2 : * Copyright © 2010 Daniel Vetter
3 : * Copyright © 2011-2014 Intel Corporation
4 : *
5 : * Permission is hereby granted, free of charge, to any person obtaining a
6 : * copy of this software and associated documentation files (the "Software"),
7 : * to deal in the Software without restriction, including without limitation
8 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 : * and/or sell copies of the Software, and to permit persons to whom the
10 : * Software is furnished to do so, subject to the following conditions:
11 : *
12 : * The above copyright notice and this permission notice (including the next
13 : * paragraph) shall be included in all copies or substantial portions of the
14 : * Software.
15 : *
16 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 : * IN THE SOFTWARE.
23 : *
24 : */
25 :
26 : #ifdef __linux__
27 : #include <linux/seq_file.h>
28 : #endif
29 : #include <dev/pci/drm/drmP.h>
30 : #include <dev/pci/drm/i915_drm.h>
31 : #include "i915_drv.h"
32 : #include "i915_vgpu.h"
33 : #include "i915_trace.h"
34 : #include "intel_drv.h"
35 :
36 : static inline void
37 0 : set_pages_uc(struct vm_page *page, int n)
38 : {
39 : /* XXX */
40 0 : }
41 :
42 : static inline void
43 0 : set_pages_wb(struct vm_page *page, int n)
44 : {
45 : /* XXX */
46 0 : }
47 :
48 : /**
49 : * DOC: Global GTT views
50 : *
51 : * Background and previous state
52 : *
53 : * Historically objects could exists (be bound) in global GTT space only as
54 : * singular instances with a view representing all of the object's backing pages
55 : * in a linear fashion. This view will be called a normal view.
56 : *
57 : * To support multiple views of the same object, where the number of mapped
58 : * pages is not equal to the backing store, or where the layout of the pages
59 : * is not linear, concept of a GGTT view was added.
60 : *
61 : * One example of an alternative view is a stereo display driven by a single
62 : * image. In this case we would have a framebuffer looking like this
63 : * (2x2 pages):
64 : *
65 : * 12
66 : * 34
67 : *
68 : * Above would represent a normal GGTT view as normally mapped for GPU or CPU
69 : * rendering. In contrast, fed to the display engine would be an alternative
70 : * view which could look something like this:
71 : *
72 : * 1212
73 : * 3434
74 : *
75 : * In this example both the size and layout of pages in the alternative view is
76 : * different from the normal view.
77 : *
78 : * Implementation and usage
79 : *
80 : * GGTT views are implemented using VMAs and are distinguished via enum
81 : * i915_ggtt_view_type and struct i915_ggtt_view.
82 : *
83 : * A new flavour of core GEM functions which work with GGTT bound objects were
84 : * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
85 : * renaming in large amounts of code. They take the struct i915_ggtt_view
86 : * parameter encapsulating all metadata required to implement a view.
87 : *
88 : * As a helper for callers which are only interested in the normal view,
89 : * globally const i915_ggtt_view_normal singleton instance exists. All old core
90 : * GEM API functions, the ones not taking the view parameter, are operating on,
91 : * or with the normal GGTT view.
92 : *
93 : * Code wanting to add or use a new GGTT view needs to:
94 : *
95 : * 1. Add a new enum with a suitable name.
96 : * 2. Extend the metadata in the i915_ggtt_view structure if required.
97 : * 3. Add support to i915_get_vma_pages().
98 : *
99 : * New views are required to build a scatter-gather table from within the
100 : * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
101 : * exists for the lifetime of an VMA.
102 : *
103 : * Core API is designed to have copy semantics which means that passed in
104 : * struct i915_ggtt_view does not need to be persistent (left around after
105 : * calling the core API functions).
106 : *
107 : */
108 :
109 : static int
110 : i915_get_ggtt_vma_pages(struct i915_vma *vma);
111 :
112 : const struct i915_ggtt_view i915_ggtt_view_normal;
113 : const struct i915_ggtt_view i915_ggtt_view_rotated = {
114 : .type = I915_GGTT_VIEW_ROTATED
115 : };
116 :
117 0 : static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
118 : {
119 : bool has_aliasing_ppgtt;
120 : bool has_full_ppgtt;
121 :
122 0 : has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6;
123 0 : has_full_ppgtt = INTEL_INFO(dev)->gen >= 7;
124 :
125 0 : if (intel_vgpu_active(dev))
126 0 : has_full_ppgtt = false; /* emulation is too hard */
127 :
128 : /*
129 : * We don't allow disabling PPGTT for gen9+ as it's a requirement for
130 : * execlists, the sole mechanism available to submit work.
131 : */
132 0 : if (INTEL_INFO(dev)->gen < 9 &&
133 0 : (enable_ppgtt == 0 || !has_aliasing_ppgtt))
134 0 : return 0;
135 :
136 0 : if (enable_ppgtt == 1)
137 0 : return 1;
138 :
139 0 : if (enable_ppgtt == 2 && has_full_ppgtt)
140 0 : return 2;
141 :
142 : #ifdef CONFIG_INTEL_IOMMU
143 : /* Disable ppgtt on SNB if VT-d is on. */
144 : if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) {
145 : DRM_INFO("Disabling PPGTT because VT-d is on\n");
146 : return 0;
147 : }
148 : #endif
149 :
150 : /* Early VLV doesn't have this */
151 0 : if (IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) &&
152 0 : dev->pdev->revision < 0xb) {
153 : DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
154 0 : return 0;
155 : }
156 :
157 0 : if (INTEL_INFO(dev)->gen >= 8 && i915.enable_execlists)
158 0 : return 2;
159 : else
160 0 : return has_aliasing_ppgtt ? 1 : 0;
161 0 : }
162 :
163 0 : static int ppgtt_bind_vma(struct i915_vma *vma,
164 : enum i915_cache_level cache_level,
165 : u32 unused)
166 : {
167 : u32 pte_flags = 0;
168 :
169 : /* Currently applicable only to VLV */
170 0 : if (vma->obj->gt_ro)
171 0 : pte_flags |= PTE_READ_ONLY;
172 :
173 0 : vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
174 : cache_level, pte_flags);
175 :
176 0 : return 0;
177 : }
178 :
179 0 : static void ppgtt_unbind_vma(struct i915_vma *vma)
180 : {
181 0 : vma->vm->clear_range(vma->vm,
182 0 : vma->node.start,
183 0 : vma->obj->base.size,
184 : true);
185 0 : }
186 :
187 0 : static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
188 : enum i915_cache_level level,
189 : bool valid)
190 : {
191 0 : gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
192 0 : pte |= addr;
193 :
194 0 : switch (level) {
195 : case I915_CACHE_NONE:
196 0 : pte |= PPAT_UNCACHED_INDEX;
197 0 : break;
198 : case I915_CACHE_WT:
199 0 : pte |= PPAT_DISPLAY_ELLC_INDEX;
200 0 : break;
201 : default:
202 0 : pte |= PPAT_CACHED_INDEX;
203 0 : break;
204 : }
205 :
206 0 : return pte;
207 : }
208 :
209 0 : static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
210 : const enum i915_cache_level level)
211 : {
212 : gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
213 0 : pde |= addr;
214 0 : if (level != I915_CACHE_NONE)
215 0 : pde |= PPAT_CACHED_PDE_INDEX;
216 : else
217 0 : pde |= PPAT_UNCACHED_INDEX;
218 0 : return pde;
219 : }
220 :
221 : #define gen8_pdpe_encode gen8_pde_encode
222 : #define gen8_pml4e_encode gen8_pde_encode
223 :
224 0 : static gen6_pte_t snb_pte_encode(dma_addr_t addr,
225 : enum i915_cache_level level,
226 : bool valid, u32 unused)
227 : {
228 0 : gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
229 0 : pte |= GEN6_PTE_ADDR_ENCODE(addr);
230 :
231 0 : switch (level) {
232 : case I915_CACHE_L3_LLC:
233 : case I915_CACHE_LLC:
234 0 : pte |= GEN6_PTE_CACHE_LLC;
235 0 : break;
236 : case I915_CACHE_NONE:
237 0 : pte |= GEN6_PTE_UNCACHED;
238 0 : break;
239 : default:
240 0 : MISSING_CASE(level);
241 0 : }
242 :
243 0 : return pte;
244 : }
245 :
246 0 : static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
247 : enum i915_cache_level level,
248 : bool valid, u32 unused)
249 : {
250 0 : gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
251 0 : pte |= GEN6_PTE_ADDR_ENCODE(addr);
252 :
253 0 : switch (level) {
254 : case I915_CACHE_L3_LLC:
255 0 : pte |= GEN7_PTE_CACHE_L3_LLC;
256 0 : break;
257 : case I915_CACHE_LLC:
258 0 : pte |= GEN6_PTE_CACHE_LLC;
259 0 : break;
260 : case I915_CACHE_NONE:
261 0 : pte |= GEN6_PTE_UNCACHED;
262 0 : break;
263 : default:
264 0 : MISSING_CASE(level);
265 0 : }
266 :
267 0 : return pte;
268 : }
269 :
270 0 : static gen6_pte_t byt_pte_encode(dma_addr_t addr,
271 : enum i915_cache_level level,
272 : bool valid, u32 flags)
273 : {
274 0 : gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
275 0 : pte |= GEN6_PTE_ADDR_ENCODE(addr);
276 :
277 0 : if (!(flags & PTE_READ_ONLY))
278 0 : pte |= BYT_PTE_WRITEABLE;
279 :
280 0 : if (level != I915_CACHE_NONE)
281 0 : pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
282 :
283 0 : return pte;
284 : }
285 :
286 0 : static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
287 : enum i915_cache_level level,
288 : bool valid, u32 unused)
289 : {
290 0 : gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
291 0 : pte |= HSW_PTE_ADDR_ENCODE(addr);
292 :
293 0 : if (level != I915_CACHE_NONE)
294 0 : pte |= HSW_WB_LLC_AGE3;
295 :
296 0 : return pte;
297 : }
298 :
299 0 : static gen6_pte_t iris_pte_encode(dma_addr_t addr,
300 : enum i915_cache_level level,
301 : bool valid, u32 unused)
302 : {
303 0 : gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
304 0 : pte |= HSW_PTE_ADDR_ENCODE(addr);
305 :
306 0 : switch (level) {
307 : case I915_CACHE_NONE:
308 : break;
309 : case I915_CACHE_WT:
310 0 : pte |= HSW_WT_ELLC_LLC_AGE3;
311 0 : break;
312 : default:
313 0 : pte |= HSW_WB_ELLC_LLC_AGE3;
314 0 : break;
315 : }
316 :
317 0 : return pte;
318 : }
319 :
320 : #ifdef __linux__
321 : static int __setup_page_dma(struct drm_device *dev,
322 : struct i915_page_dma *p, gfp_t flags)
323 : {
324 : struct device *device = &dev->pdev->dev;
325 :
326 : p->page = alloc_page(flags);
327 : if (!p->page)
328 : return -ENOMEM;
329 :
330 : p->daddr = dma_map_page(device,
331 : p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
332 :
333 : if (dma_mapping_error(device, p->daddr)) {
334 : __free_page(p->page);
335 : return -EINVAL;
336 : }
337 :
338 : return 0;
339 : }
340 : #else
341 0 : static int __setup_page_dma(struct drm_device *dev,
342 : struct i915_page_dma *p, gfp_t flags)
343 : {
344 0 : p->page = alloc_page(flags);
345 0 : if (!p->page)
346 0 : return -ENOMEM;
347 :
348 0 : p->daddr = VM_PAGE_TO_PHYS(p->page);
349 :
350 0 : return 0;
351 0 : }
352 : #endif
353 :
354 0 : static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
355 : {
356 0 : return __setup_page_dma(dev, p, GFP_KERNEL);
357 : }
358 :
359 : #ifdef __linux__
360 : static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
361 : {
362 : if (WARN_ON(!p->page))
363 : return;
364 :
365 : dma_unmap_page(&dev->pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL);
366 : __free_page(p->page);
367 : memset(p, 0, sizeof(*p));
368 : }
369 : #else
370 0 : static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
371 : {
372 0 : if (WARN_ON(!p->page))
373 : return;
374 :
375 0 : __free_page(p->page);
376 0 : memset(p, 0, sizeof(*p));
377 0 : }
378 : #endif
379 :
380 0 : static void *kmap_page_dma(struct i915_page_dma *p)
381 : {
382 0 : return kmap_atomic(p->page);
383 : }
384 :
385 : /* We use the flushing unmap only with ppgtt structures:
386 : * page directories, page tables and scratch pages.
387 : */
388 0 : static void kunmap_page_dma(struct drm_device *dev, void *vaddr)
389 : {
390 : /* There are only few exceptions for gen >=6. chv and bxt.
391 : * And we are not sure about the latter so play safe for now.
392 : */
393 0 : if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
394 0 : drm_clflush_virt_range(vaddr, PAGE_SIZE);
395 :
396 0 : kunmap_atomic(vaddr);
397 0 : }
398 :
399 : #define kmap_px(px) kmap_page_dma(px_base(px))
400 : #define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr))
401 :
402 : #define setup_px(dev, px) setup_page_dma((dev), px_base(px))
403 : #define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px))
404 : #define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v))
405 : #define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v))
406 :
407 0 : static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p,
408 : const uint64_t val)
409 : {
410 : int i;
411 0 : uint64_t * const vaddr = kmap_page_dma(p);
412 :
413 0 : for (i = 0; i < 512; i++)
414 0 : vaddr[i] = val;
415 :
416 0 : kunmap_page_dma(dev, vaddr);
417 0 : }
418 :
419 0 : static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p,
420 : const uint32_t val32)
421 : {
422 0 : uint64_t v = val32;
423 :
424 0 : v = v << 32 | val32;
425 :
426 0 : fill_page_dma(dev, p, v);
427 0 : }
428 :
429 0 : static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev)
430 : {
431 : struct i915_page_scratch *sp;
432 : int ret;
433 :
434 0 : sp = kzalloc(sizeof(*sp), GFP_KERNEL);
435 0 : if (sp == NULL)
436 0 : return ERR_PTR(-ENOMEM);
437 :
438 0 : ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO);
439 0 : if (ret) {
440 0 : kfree(sp);
441 0 : return ERR_PTR(ret);
442 : }
443 :
444 0 : set_pages_uc(px_page(sp), 1);
445 :
446 0 : return sp;
447 0 : }
448 :
449 0 : static void free_scratch_page(struct drm_device *dev,
450 : struct i915_page_scratch *sp)
451 : {
452 0 : set_pages_wb(px_page(sp), 1);
453 :
454 0 : cleanup_px(dev, sp);
455 0 : kfree(sp);
456 0 : }
457 :
458 0 : static struct i915_page_table *alloc_pt(struct drm_device *dev)
459 : {
460 : struct i915_page_table *pt;
461 0 : const size_t count = INTEL_INFO(dev)->gen >= 8 ?
462 : GEN8_PTES : GEN6_PTES;
463 : int ret = -ENOMEM;
464 :
465 0 : pt = kzalloc(sizeof(*pt), GFP_KERNEL);
466 0 : if (!pt)
467 0 : return ERR_PTR(-ENOMEM);
468 :
469 0 : pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
470 : GFP_KERNEL);
471 :
472 0 : if (!pt->used_ptes)
473 : goto fail_bitmap;
474 :
475 0 : ret = setup_px(dev, pt);
476 0 : if (ret)
477 : goto fail_page_m;
478 :
479 0 : return pt;
480 :
481 : fail_page_m:
482 0 : kfree(pt->used_ptes);
483 : fail_bitmap:
484 0 : kfree(pt);
485 :
486 0 : return ERR_PTR(ret);
487 0 : }
488 :
489 0 : static void free_pt(struct drm_device *dev, struct i915_page_table *pt)
490 : {
491 0 : cleanup_px(dev, pt);
492 0 : kfree(pt->used_ptes);
493 0 : kfree(pt);
494 0 : }
495 :
496 0 : static void gen8_initialize_pt(struct i915_address_space *vm,
497 : struct i915_page_table *pt)
498 : {
499 : gen8_pte_t scratch_pte;
500 :
501 0 : scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
502 : I915_CACHE_LLC, true);
503 :
504 0 : fill_px(vm->dev, pt, scratch_pte);
505 0 : }
506 :
507 0 : static void gen6_initialize_pt(struct i915_address_space *vm,
508 : struct i915_page_table *pt)
509 : {
510 : gen6_pte_t scratch_pte;
511 :
512 0 : WARN_ON(px_dma(vm->scratch_page) == 0);
513 :
514 0 : scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
515 : I915_CACHE_LLC, true, 0);
516 :
517 0 : fill32_px(vm->dev, pt, scratch_pte);
518 0 : }
519 :
520 0 : static struct i915_page_directory *alloc_pd(struct drm_device *dev)
521 : {
522 : struct i915_page_directory *pd;
523 : int ret = -ENOMEM;
524 :
525 0 : pd = kzalloc(sizeof(*pd), GFP_KERNEL);
526 0 : if (!pd)
527 0 : return ERR_PTR(-ENOMEM);
528 :
529 0 : pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
530 : sizeof(*pd->used_pdes), GFP_KERNEL);
531 0 : if (!pd->used_pdes)
532 : goto fail_bitmap;
533 :
534 0 : ret = setup_px(dev, pd);
535 0 : if (ret)
536 : goto fail_page_m;
537 :
538 0 : return pd;
539 :
540 : fail_page_m:
541 0 : kfree(pd->used_pdes);
542 : fail_bitmap:
543 0 : kfree(pd);
544 :
545 0 : return ERR_PTR(ret);
546 0 : }
547 :
548 0 : static void free_pd(struct drm_device *dev, struct i915_page_directory *pd)
549 : {
550 0 : if (px_page(pd)) {
551 0 : cleanup_px(dev, pd);
552 0 : kfree(pd->used_pdes);
553 0 : kfree(pd);
554 0 : }
555 0 : }
556 :
557 0 : static void gen8_initialize_pd(struct i915_address_space *vm,
558 : struct i915_page_directory *pd)
559 : {
560 : gen8_pde_t scratch_pde;
561 :
562 0 : scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
563 :
564 0 : fill_px(vm->dev, pd, scratch_pde);
565 0 : }
566 :
567 0 : static int __pdp_init(struct drm_device *dev,
568 : struct i915_page_directory_pointer *pdp)
569 : {
570 0 : size_t pdpes = I915_PDPES_PER_PDP(dev);
571 :
572 0 : pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
573 : sizeof(unsigned long),
574 : GFP_KERNEL);
575 0 : if (!pdp->used_pdpes)
576 0 : return -ENOMEM;
577 :
578 0 : pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory),
579 : GFP_KERNEL);
580 0 : if (!pdp->page_directory) {
581 0 : kfree(pdp->used_pdpes);
582 : /* the PDP might be the statically allocated top level. Keep it
583 : * as clean as possible */
584 0 : pdp->used_pdpes = NULL;
585 0 : return -ENOMEM;
586 : }
587 :
588 0 : return 0;
589 0 : }
590 :
591 0 : static void __pdp_fini(struct i915_page_directory_pointer *pdp)
592 : {
593 0 : kfree(pdp->used_pdpes);
594 0 : kfree(pdp->page_directory);
595 0 : pdp->page_directory = NULL;
596 0 : }
597 :
598 : static struct
599 0 : i915_page_directory_pointer *alloc_pdp(struct drm_device *dev)
600 : {
601 : struct i915_page_directory_pointer *pdp;
602 : int ret = -ENOMEM;
603 :
604 0 : WARN_ON(!USES_FULL_48BIT_PPGTT(dev));
605 :
606 0 : pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
607 0 : if (!pdp)
608 0 : return ERR_PTR(-ENOMEM);
609 :
610 0 : ret = __pdp_init(dev, pdp);
611 0 : if (ret)
612 : goto fail_bitmap;
613 :
614 0 : ret = setup_px(dev, pdp);
615 0 : if (ret)
616 : goto fail_page_m;
617 :
618 0 : return pdp;
619 :
620 : fail_page_m:
621 0 : __pdp_fini(pdp);
622 : fail_bitmap:
623 0 : kfree(pdp);
624 :
625 0 : return ERR_PTR(ret);
626 0 : }
627 :
628 0 : static void free_pdp(struct drm_device *dev,
629 : struct i915_page_directory_pointer *pdp)
630 : {
631 0 : __pdp_fini(pdp);
632 0 : if (USES_FULL_48BIT_PPGTT(dev)) {
633 0 : cleanup_px(dev, pdp);
634 0 : kfree(pdp);
635 0 : }
636 0 : }
637 :
638 0 : static void gen8_initialize_pdp(struct i915_address_space *vm,
639 : struct i915_page_directory_pointer *pdp)
640 : {
641 : gen8_ppgtt_pdpe_t scratch_pdpe;
642 :
643 0 : scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
644 :
645 0 : fill_px(vm->dev, pdp, scratch_pdpe);
646 0 : }
647 :
648 0 : static void gen8_initialize_pml4(struct i915_address_space *vm,
649 : struct i915_pml4 *pml4)
650 : {
651 : gen8_ppgtt_pml4e_t scratch_pml4e;
652 :
653 0 : scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
654 : I915_CACHE_LLC);
655 :
656 0 : fill_px(vm->dev, pml4, scratch_pml4e);
657 0 : }
658 :
659 : static void
660 0 : gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
661 : struct i915_page_directory_pointer *pdp,
662 : struct i915_page_directory *pd,
663 : int index)
664 : {
665 : gen8_ppgtt_pdpe_t *page_directorypo;
666 :
667 0 : if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
668 0 : return;
669 :
670 0 : page_directorypo = kmap_px(pdp);
671 0 : page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
672 0 : kunmap_px(ppgtt, page_directorypo);
673 0 : }
674 :
675 : static void
676 0 : gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt,
677 : struct i915_pml4 *pml4,
678 : struct i915_page_directory_pointer *pdp,
679 : int index)
680 : {
681 0 : gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
682 :
683 0 : WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev));
684 0 : pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
685 0 : kunmap_px(ppgtt, pagemap);
686 0 : }
687 :
688 : /* Broadwell Page Directory Pointer Descriptors */
689 0 : static int gen8_write_pdp(struct drm_i915_gem_request *req,
690 : unsigned entry,
691 : dma_addr_t addr)
692 : {
693 0 : struct intel_engine_cs *ring = req->ring;
694 : int ret;
695 :
696 0 : BUG_ON(entry >= 4);
697 :
698 0 : ret = intel_ring_begin(req, 6);
699 0 : if (ret)
700 0 : return ret;
701 :
702 0 : intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
703 0 : intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
704 0 : intel_ring_emit(ring, upper_32_bits(addr));
705 0 : intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
706 0 : intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
707 0 : intel_ring_emit(ring, lower_32_bits(addr));
708 0 : intel_ring_advance(ring);
709 :
710 0 : return 0;
711 0 : }
712 :
713 0 : static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt,
714 : struct drm_i915_gem_request *req)
715 : {
716 : int i, ret;
717 :
718 0 : for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
719 0 : const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
720 :
721 0 : ret = gen8_write_pdp(req, i, pd_daddr);
722 0 : if (ret)
723 0 : return ret;
724 0 : }
725 :
726 0 : return 0;
727 0 : }
728 :
729 0 : static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
730 : struct drm_i915_gem_request *req)
731 : {
732 0 : return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
733 : }
734 :
735 0 : static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
736 : struct i915_page_directory_pointer *pdp,
737 : uint64_t start,
738 : uint64_t length,
739 : gen8_pte_t scratch_pte)
740 : {
741 : struct i915_hw_ppgtt *ppgtt =
742 0 : container_of(vm, struct i915_hw_ppgtt, base);
743 : gen8_pte_t *pt_vaddr;
744 0 : unsigned pdpe = gen8_pdpe_index(start);
745 0 : unsigned pde = gen8_pde_index(start);
746 0 : unsigned pte = gen8_pte_index(start);
747 0 : unsigned num_entries = length >> PAGE_SHIFT;
748 : unsigned last_pte, i;
749 :
750 0 : if (WARN_ON(!pdp))
751 0 : return;
752 :
753 0 : while (num_entries) {
754 : struct i915_page_directory *pd;
755 : struct i915_page_table *pt;
756 :
757 0 : if (WARN_ON(!pdp->page_directory[pdpe]))
758 0 : break;
759 :
760 0 : pd = pdp->page_directory[pdpe];
761 :
762 0 : if (WARN_ON(!pd->page_table[pde]))
763 0 : break;
764 :
765 0 : pt = pd->page_table[pde];
766 :
767 0 : if (WARN_ON(!px_page(pt)))
768 0 : break;
769 :
770 0 : last_pte = pte + num_entries;
771 0 : if (last_pte > GEN8_PTES)
772 : last_pte = GEN8_PTES;
773 :
774 0 : pt_vaddr = kmap_px(pt);
775 :
776 0 : for (i = pte; i < last_pte; i++) {
777 0 : pt_vaddr[i] = scratch_pte;
778 0 : num_entries--;
779 : }
780 :
781 0 : kunmap_px(ppgtt, pt_vaddr);
782 :
783 : pte = 0;
784 0 : if (++pde == I915_PDES) {
785 0 : if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
786 0 : break;
787 : pde = 0;
788 0 : }
789 0 : }
790 0 : }
791 :
792 0 : static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
793 : uint64_t start,
794 : uint64_t length,
795 : bool use_scratch)
796 : {
797 : struct i915_hw_ppgtt *ppgtt =
798 0 : container_of(vm, struct i915_hw_ppgtt, base);
799 0 : gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
800 : I915_CACHE_LLC, use_scratch);
801 :
802 0 : if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
803 0 : gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
804 : scratch_pte);
805 0 : } else {
806 : uint64_t templ4, pml4e;
807 : struct i915_page_directory_pointer *pdp;
808 :
809 0 : gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4, pml4e) {
810 0 : gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
811 : scratch_pte);
812 : }
813 : }
814 0 : }
815 :
816 : static void
817 0 : gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
818 : struct i915_page_directory_pointer *pdp,
819 : struct sg_page_iter *sg_iter,
820 : uint64_t start,
821 : enum i915_cache_level cache_level)
822 : {
823 : struct i915_hw_ppgtt *ppgtt =
824 0 : container_of(vm, struct i915_hw_ppgtt, base);
825 : gen8_pte_t *pt_vaddr;
826 0 : unsigned pdpe = gen8_pdpe_index(start);
827 0 : unsigned pde = gen8_pde_index(start);
828 0 : unsigned pte = gen8_pte_index(start);
829 :
830 : pt_vaddr = NULL;
831 :
832 0 : while (__sg_page_iter_next(sg_iter)) {
833 0 : if (pt_vaddr == NULL) {
834 0 : struct i915_page_directory *pd = pdp->page_directory[pdpe];
835 0 : struct i915_page_table *pt = pd->page_table[pde];
836 0 : pt_vaddr = kmap_px(pt);
837 0 : }
838 :
839 0 : pt_vaddr[pte] =
840 0 : gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
841 : cache_level, true);
842 0 : if (++pte == GEN8_PTES) {
843 0 : kunmap_px(ppgtt, pt_vaddr);
844 : pt_vaddr = NULL;
845 0 : if (++pde == I915_PDES) {
846 0 : if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
847 : break;
848 : pde = 0;
849 0 : }
850 : pte = 0;
851 0 : }
852 : }
853 :
854 0 : if (pt_vaddr)
855 0 : kunmap_px(ppgtt, pt_vaddr);
856 0 : }
857 :
858 0 : static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
859 : struct sg_table *pages,
860 : uint64_t start,
861 : enum i915_cache_level cache_level,
862 : u32 unused)
863 : {
864 : struct i915_hw_ppgtt *ppgtt =
865 0 : container_of(vm, struct i915_hw_ppgtt, base);
866 0 : struct sg_page_iter sg_iter;
867 :
868 : #ifdef __linux__
869 : __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
870 : #else
871 0 : __sg_page_iter_start(&sg_iter, pages->sgl, pages->nents, 0);
872 : #endif
873 :
874 0 : if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
875 0 : gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
876 : cache_level);
877 0 : } else {
878 : struct i915_page_directory_pointer *pdp;
879 : uint64_t templ4, pml4e;
880 0 : uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
881 :
882 0 : gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4, pml4e) {
883 0 : gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
884 : start, cache_level);
885 : }
886 : }
887 0 : }
888 :
889 0 : static void gen8_free_page_tables(struct drm_device *dev,
890 : struct i915_page_directory *pd)
891 : {
892 : int i;
893 :
894 0 : if (!px_page(pd))
895 0 : return;
896 :
897 0 : for_each_set_bit(i, pd->used_pdes, I915_PDES) {
898 0 : if (WARN_ON(!pd->page_table[i]))
899 : continue;
900 :
901 0 : free_pt(dev, pd->page_table[i]);
902 0 : pd->page_table[i] = NULL;
903 0 : }
904 0 : }
905 :
906 0 : static int gen8_init_scratch(struct i915_address_space *vm)
907 : {
908 0 : struct drm_device *dev = vm->dev;
909 :
910 0 : vm->scratch_page = alloc_scratch_page(dev);
911 0 : if (IS_ERR(vm->scratch_page))
912 0 : return PTR_ERR(vm->scratch_page);
913 :
914 0 : vm->scratch_pt = alloc_pt(dev);
915 0 : if (IS_ERR(vm->scratch_pt)) {
916 0 : free_scratch_page(dev, vm->scratch_page);
917 0 : return PTR_ERR(vm->scratch_pt);
918 : }
919 :
920 0 : vm->scratch_pd = alloc_pd(dev);
921 0 : if (IS_ERR(vm->scratch_pd)) {
922 0 : free_pt(dev, vm->scratch_pt);
923 0 : free_scratch_page(dev, vm->scratch_page);
924 0 : return PTR_ERR(vm->scratch_pd);
925 : }
926 :
927 0 : if (USES_FULL_48BIT_PPGTT(dev)) {
928 0 : vm->scratch_pdp = alloc_pdp(dev);
929 0 : if (IS_ERR(vm->scratch_pdp)) {
930 0 : free_pd(dev, vm->scratch_pd);
931 0 : free_pt(dev, vm->scratch_pt);
932 0 : free_scratch_page(dev, vm->scratch_page);
933 0 : return PTR_ERR(vm->scratch_pdp);
934 : }
935 : }
936 :
937 0 : gen8_initialize_pt(vm, vm->scratch_pt);
938 0 : gen8_initialize_pd(vm, vm->scratch_pd);
939 0 : if (USES_FULL_48BIT_PPGTT(dev))
940 0 : gen8_initialize_pdp(vm, vm->scratch_pdp);
941 :
942 0 : return 0;
943 0 : }
944 :
945 0 : static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
946 : {
947 : enum vgt_g2v_type msg;
948 0 : struct drm_device *dev = ppgtt->base.dev;
949 0 : struct drm_i915_private *dev_priv = dev->dev_private;
950 : unsigned int offset = vgtif_reg(pdp0_lo);
951 : int i;
952 :
953 0 : if (USES_FULL_48BIT_PPGTT(dev)) {
954 0 : u64 daddr = px_dma(&ppgtt->pml4);
955 :
956 0 : I915_WRITE(offset, lower_32_bits(daddr));
957 0 : I915_WRITE(offset + 4, upper_32_bits(daddr));
958 :
959 0 : msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
960 : VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
961 0 : } else {
962 0 : for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
963 0 : u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
964 :
965 0 : I915_WRITE(offset, lower_32_bits(daddr));
966 0 : I915_WRITE(offset + 4, upper_32_bits(daddr));
967 :
968 0 : offset += 8;
969 : }
970 :
971 0 : msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
972 : VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
973 : }
974 :
975 0 : I915_WRITE(vgtif_reg(g2v_notify), msg);
976 :
977 0 : return 0;
978 : }
979 :
980 0 : static void gen8_free_scratch(struct i915_address_space *vm)
981 : {
982 0 : struct drm_device *dev = vm->dev;
983 :
984 0 : if (USES_FULL_48BIT_PPGTT(dev))
985 0 : free_pdp(dev, vm->scratch_pdp);
986 0 : free_pd(dev, vm->scratch_pd);
987 0 : free_pt(dev, vm->scratch_pt);
988 0 : free_scratch_page(dev, vm->scratch_page);
989 0 : }
990 :
991 0 : static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev,
992 : struct i915_page_directory_pointer *pdp)
993 : {
994 : int i;
995 :
996 0 : for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
997 0 : if (WARN_ON(!pdp->page_directory[i]))
998 : continue;
999 :
1000 0 : gen8_free_page_tables(dev, pdp->page_directory[i]);
1001 0 : free_pd(dev, pdp->page_directory[i]);
1002 0 : }
1003 :
1004 0 : free_pdp(dev, pdp);
1005 0 : }
1006 :
1007 0 : static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
1008 : {
1009 : int i;
1010 :
1011 0 : for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
1012 0 : if (WARN_ON(!ppgtt->pml4.pdps[i]))
1013 : continue;
1014 :
1015 0 : gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]);
1016 0 : }
1017 :
1018 0 : cleanup_px(ppgtt->base.dev, &ppgtt->pml4);
1019 0 : }
1020 :
1021 0 : static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
1022 : {
1023 : struct i915_hw_ppgtt *ppgtt =
1024 0 : container_of(vm, struct i915_hw_ppgtt, base);
1025 :
1026 0 : if (intel_vgpu_active(vm->dev))
1027 0 : gen8_ppgtt_notify_vgt(ppgtt, false);
1028 :
1029 0 : if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
1030 0 : gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp);
1031 : else
1032 0 : gen8_ppgtt_cleanup_4lvl(ppgtt);
1033 :
1034 0 : gen8_free_scratch(vm);
1035 0 : }
1036 :
1037 : /**
1038 : * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
1039 : * @vm: Master vm structure.
1040 : * @pd: Page directory for this address range.
1041 : * @start: Starting virtual address to begin allocations.
1042 : * @length: Size of the allocations.
1043 : * @new_pts: Bitmap set by function with new allocations. Likely used by the
1044 : * caller to free on error.
1045 : *
1046 : * Allocate the required number of page tables. Extremely similar to
1047 : * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
1048 : * the page directory boundary (instead of the page directory pointer). That
1049 : * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
1050 : * possible, and likely that the caller will need to use multiple calls of this
1051 : * function to achieve the appropriate allocation.
1052 : *
1053 : * Return: 0 if success; negative error code otherwise.
1054 : */
1055 0 : static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
1056 : struct i915_page_directory *pd,
1057 : uint64_t start,
1058 : uint64_t length,
1059 : unsigned long *new_pts)
1060 : {
1061 0 : struct drm_device *dev = vm->dev;
1062 : struct i915_page_table *pt;
1063 : uint64_t temp;
1064 : uint32_t pde;
1065 :
1066 0 : gen8_for_each_pde(pt, pd, start, length, temp, pde) {
1067 : /* Don't reallocate page tables */
1068 0 : if (test_bit(pde, pd->used_pdes)) {
1069 : /* Scratch is never allocated this way */
1070 0 : WARN_ON(pt == vm->scratch_pt);
1071 0 : continue;
1072 : }
1073 :
1074 0 : pt = alloc_pt(dev);
1075 0 : if (IS_ERR(pt))
1076 : goto unwind_out;
1077 :
1078 0 : gen8_initialize_pt(vm, pt);
1079 0 : pd->page_table[pde] = pt;
1080 0 : __set_bit(pde, new_pts);
1081 0 : trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
1082 0 : }
1083 :
1084 0 : return 0;
1085 :
1086 : unwind_out:
1087 0 : for_each_set_bit(pde, new_pts, I915_PDES)
1088 0 : free_pt(dev, pd->page_table[pde]);
1089 :
1090 0 : return -ENOMEM;
1091 0 : }
1092 :
1093 : /**
1094 : * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
1095 : * @vm: Master vm structure.
1096 : * @pdp: Page directory pointer for this address range.
1097 : * @start: Starting virtual address to begin allocations.
1098 : * @length: Size of the allocations.
1099 : * @new_pds: Bitmap set by function with new allocations. Likely used by the
1100 : * caller to free on error.
1101 : *
1102 : * Allocate the required number of page directories starting at the pde index of
1103 : * @start, and ending at the pde index @start + @length. This function will skip
1104 : * over already allocated page directories within the range, and only allocate
1105 : * new ones, setting the appropriate pointer within the pdp as well as the
1106 : * correct position in the bitmap @new_pds.
1107 : *
1108 : * The function will only allocate the pages within the range for a give page
1109 : * directory pointer. In other words, if @start + @length straddles a virtually
1110 : * addressed PDP boundary (512GB for 4k pages), there will be more allocations
1111 : * required by the caller, This is not currently possible, and the BUG in the
1112 : * code will prevent it.
1113 : *
1114 : * Return: 0 if success; negative error code otherwise.
1115 : */
1116 : static int
1117 0 : gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
1118 : struct i915_page_directory_pointer *pdp,
1119 : uint64_t start,
1120 : uint64_t length,
1121 : unsigned long *new_pds)
1122 : {
1123 0 : struct drm_device *dev = vm->dev;
1124 : struct i915_page_directory *pd;
1125 : uint64_t temp;
1126 : uint32_t pdpe;
1127 0 : uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1128 :
1129 0 : WARN_ON(!bitmap_empty(new_pds, pdpes));
1130 :
1131 0 : gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
1132 0 : if (test_bit(pdpe, pdp->used_pdpes))
1133 : continue;
1134 :
1135 0 : pd = alloc_pd(dev);
1136 0 : if (IS_ERR(pd))
1137 : goto unwind_out;
1138 :
1139 0 : gen8_initialize_pd(vm, pd);
1140 0 : pdp->page_directory[pdpe] = pd;
1141 0 : __set_bit(pdpe, new_pds);
1142 0 : trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT);
1143 0 : }
1144 :
1145 0 : return 0;
1146 :
1147 : unwind_out:
1148 0 : for_each_set_bit(pdpe, new_pds, pdpes)
1149 0 : free_pd(dev, pdp->page_directory[pdpe]);
1150 :
1151 0 : return -ENOMEM;
1152 0 : }
1153 :
1154 : /**
1155 : * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range.
1156 : * @vm: Master vm structure.
1157 : * @pml4: Page map level 4 for this address range.
1158 : * @start: Starting virtual address to begin allocations.
1159 : * @length: Size of the allocations.
1160 : * @new_pdps: Bitmap set by function with new allocations. Likely used by the
1161 : * caller to free on error.
1162 : *
1163 : * Allocate the required number of page directory pointers. Extremely similar to
1164 : * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs().
1165 : * The main difference is here we are limited by the pml4 boundary (instead of
1166 : * the page directory pointer).
1167 : *
1168 : * Return: 0 if success; negative error code otherwise.
1169 : */
1170 : static int
1171 0 : gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
1172 : struct i915_pml4 *pml4,
1173 : uint64_t start,
1174 : uint64_t length,
1175 : unsigned long *new_pdps)
1176 : {
1177 0 : struct drm_device *dev = vm->dev;
1178 : struct i915_page_directory_pointer *pdp;
1179 : uint64_t temp;
1180 : uint32_t pml4e;
1181 :
1182 0 : WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4));
1183 :
1184 0 : gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
1185 0 : if (!test_bit(pml4e, pml4->used_pml4es)) {
1186 0 : pdp = alloc_pdp(dev);
1187 0 : if (IS_ERR(pdp))
1188 : goto unwind_out;
1189 :
1190 0 : gen8_initialize_pdp(vm, pdp);
1191 0 : pml4->pdps[pml4e] = pdp;
1192 0 : __set_bit(pml4e, new_pdps);
1193 0 : trace_i915_page_directory_pointer_entry_alloc(vm,
1194 : pml4e,
1195 : start,
1196 : GEN8_PML4E_SHIFT);
1197 0 : }
1198 : }
1199 :
1200 0 : return 0;
1201 :
1202 : unwind_out:
1203 0 : for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1204 0 : free_pdp(dev, pml4->pdps[pml4e]);
1205 :
1206 0 : return -ENOMEM;
1207 0 : }
1208 :
1209 : static void
1210 0 : free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
1211 : {
1212 0 : kfree(new_pts);
1213 0 : kfree(new_pds);
1214 0 : }
1215 :
1216 : /* Fills in the page directory bitmap, and the array of page tables bitmap. Both
1217 : * of these are based on the number of PDPEs in the system.
1218 : */
1219 : static
1220 0 : int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
1221 : unsigned long **new_pts,
1222 : uint32_t pdpes)
1223 : {
1224 : unsigned long *pds;
1225 : unsigned long *pts;
1226 :
1227 0 : pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
1228 0 : if (!pds)
1229 0 : return -ENOMEM;
1230 :
1231 0 : pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long),
1232 : GFP_TEMPORARY);
1233 0 : if (!pts)
1234 : goto err_out;
1235 :
1236 0 : *new_pds = pds;
1237 0 : *new_pts = pts;
1238 :
1239 0 : return 0;
1240 :
1241 : err_out:
1242 0 : free_gen8_temp_bitmaps(pds, pts);
1243 0 : return -ENOMEM;
1244 0 : }
1245 :
1246 : /* PDE TLBs are a pain to invalidate on GEN8+. When we modify
1247 : * the page table structures, we mark them dirty so that
1248 : * context switching/execlist queuing code takes extra steps
1249 : * to ensure that tlbs are flushed.
1250 : */
1251 0 : static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
1252 : {
1253 0 : ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
1254 0 : }
1255 :
1256 0 : static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
1257 : struct i915_page_directory_pointer *pdp,
1258 : uint64_t start,
1259 : uint64_t length)
1260 : {
1261 : struct i915_hw_ppgtt *ppgtt =
1262 0 : container_of(vm, struct i915_hw_ppgtt, base);
1263 0 : unsigned long *new_page_dirs, *new_page_tables;
1264 0 : struct drm_device *dev = vm->dev;
1265 : struct i915_page_directory *pd;
1266 : const uint64_t orig_start = start;
1267 : const uint64_t orig_length = length;
1268 : uint64_t temp;
1269 : uint32_t pdpe;
1270 0 : uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1271 : int ret;
1272 :
1273 : /* Wrap is never okay since we can only represent 48b, and we don't
1274 : * actually use the other side of the canonical address space.
1275 : */
1276 0 : if (WARN_ON(start + length < start))
1277 0 : return -ENODEV;
1278 :
1279 0 : if (WARN_ON(start + length > vm->total))
1280 0 : return -ENODEV;
1281 :
1282 0 : ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
1283 0 : if (ret)
1284 0 : return ret;
1285 :
1286 : /* Do the allocations first so we can easily bail out */
1287 0 : ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
1288 0 : new_page_dirs);
1289 0 : if (ret) {
1290 0 : free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1291 0 : return ret;
1292 : }
1293 :
1294 : /* For every page directory referenced, allocate page tables */
1295 0 : gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
1296 0 : ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
1297 0 : new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
1298 0 : if (ret)
1299 : goto err_out;
1300 : }
1301 :
1302 : start = orig_start;
1303 : length = orig_length;
1304 :
1305 : /* Allocations have completed successfully, so set the bitmaps, and do
1306 : * the mappings. */
1307 0 : gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
1308 0 : gen8_pde_t *const page_directory = kmap_px(pd);
1309 : struct i915_page_table *pt;
1310 : uint64_t pd_len = length;
1311 : uint64_t pd_start = start;
1312 : uint32_t pde;
1313 :
1314 : /* Every pd should be allocated, we just did that above. */
1315 0 : WARN_ON(!pd);
1316 :
1317 0 : gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
1318 : /* Same reasoning as pd */
1319 0 : WARN_ON(!pt);
1320 0 : WARN_ON(!pd_len);
1321 0 : WARN_ON(!gen8_pte_count(pd_start, pd_len));
1322 :
1323 : /* Set our used ptes within the page table */
1324 0 : bitmap_set(pt->used_ptes,
1325 0 : gen8_pte_index(pd_start),
1326 0 : gen8_pte_count(pd_start, pd_len));
1327 :
1328 : /* Our pde is now pointing to the pagetable, pt */
1329 0 : __set_bit(pde, pd->used_pdes);
1330 :
1331 : /* Map the PDE to the page table */
1332 0 : page_directory[pde] = gen8_pde_encode(px_dma(pt),
1333 : I915_CACHE_LLC);
1334 0 : trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
1335 0 : gen8_pte_index(start),
1336 0 : gen8_pte_count(start, length),
1337 : GEN8_PTES);
1338 :
1339 : /* NB: We haven't yet mapped ptes to pages. At this
1340 : * point we're still relying on insert_entries() */
1341 : }
1342 :
1343 0 : kunmap_px(ppgtt, page_directory);
1344 0 : __set_bit(pdpe, pdp->used_pdpes);
1345 0 : gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
1346 : }
1347 :
1348 0 : free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1349 0 : mark_tlbs_dirty(ppgtt);
1350 0 : return 0;
1351 :
1352 : err_out:
1353 0 : while (pdpe--) {
1354 0 : for_each_set_bit(temp, new_page_tables + pdpe *
1355 : BITS_TO_LONGS(I915_PDES), I915_PDES)
1356 0 : free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]);
1357 : }
1358 :
1359 0 : for_each_set_bit(pdpe, new_page_dirs, pdpes)
1360 0 : free_pd(dev, pdp->page_directory[pdpe]);
1361 :
1362 0 : free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1363 0 : mark_tlbs_dirty(ppgtt);
1364 0 : return ret;
1365 0 : }
1366 :
1367 0 : static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
1368 : struct i915_pml4 *pml4,
1369 : uint64_t start,
1370 : uint64_t length)
1371 : {
1372 0 : DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
1373 : struct i915_hw_ppgtt *ppgtt =
1374 0 : container_of(vm, struct i915_hw_ppgtt, base);
1375 : struct i915_page_directory_pointer *pdp;
1376 : uint64_t temp, pml4e;
1377 : int ret = 0;
1378 :
1379 : /* Do the pml4 allocations first, so we don't need to track the newly
1380 : * allocated tables below the pdp */
1381 0 : bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
1382 :
1383 : /* The pagedirectory and pagetable allocations are done in the shared 3
1384 : * and 4 level code. Just allocate the pdps.
1385 : */
1386 0 : ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length,
1387 : new_pdps);
1388 0 : if (ret)
1389 0 : return ret;
1390 :
1391 0 : WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2,
1392 : "The allocation has spanned more than 512GB. "
1393 : "It is highly likely this is incorrect.");
1394 :
1395 0 : gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
1396 0 : WARN_ON(!pdp);
1397 :
1398 0 : ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
1399 0 : if (ret)
1400 : goto err_out;
1401 :
1402 0 : gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e);
1403 : }
1404 :
1405 0 : bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
1406 : GEN8_PML4ES_PER_PML4);
1407 :
1408 0 : return 0;
1409 :
1410 : err_out:
1411 0 : for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1412 0 : gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]);
1413 :
1414 0 : return ret;
1415 0 : }
1416 :
1417 0 : static int gen8_alloc_va_range(struct i915_address_space *vm,
1418 : uint64_t start, uint64_t length)
1419 : {
1420 : struct i915_hw_ppgtt *ppgtt =
1421 0 : container_of(vm, struct i915_hw_ppgtt, base);
1422 :
1423 0 : if (USES_FULL_48BIT_PPGTT(vm->dev))
1424 0 : return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
1425 : else
1426 0 : return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
1427 0 : }
1428 :
1429 : #ifdef __linux__
1430 : static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
1431 : uint64_t start, uint64_t length,
1432 : gen8_pte_t scratch_pte,
1433 : struct seq_file *m)
1434 : {
1435 : struct i915_page_directory *pd;
1436 : uint64_t temp;
1437 : uint32_t pdpe;
1438 :
1439 : gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
1440 : struct i915_page_table *pt;
1441 : uint64_t pd_len = length;
1442 : uint64_t pd_start = start;
1443 : uint32_t pde;
1444 :
1445 : if (!test_bit(pdpe, pdp->used_pdpes))
1446 : continue;
1447 :
1448 : seq_printf(m, "\tPDPE #%d\n", pdpe);
1449 : gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
1450 : uint32_t pte;
1451 : gen8_pte_t *pt_vaddr;
1452 :
1453 : if (!test_bit(pde, pd->used_pdes))
1454 : continue;
1455 :
1456 : pt_vaddr = kmap_px(pt);
1457 : for (pte = 0; pte < GEN8_PTES; pte += 4) {
1458 : uint64_t va =
1459 : (pdpe << GEN8_PDPE_SHIFT) |
1460 : (pde << GEN8_PDE_SHIFT) |
1461 : (pte << GEN8_PTE_SHIFT);
1462 : int i;
1463 : bool found = false;
1464 :
1465 : for (i = 0; i < 4; i++)
1466 : if (pt_vaddr[pte + i] != scratch_pte)
1467 : found = true;
1468 : if (!found)
1469 : continue;
1470 :
1471 : seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
1472 : for (i = 0; i < 4; i++) {
1473 : if (pt_vaddr[pte + i] != scratch_pte)
1474 : seq_printf(m, " %llx", pt_vaddr[pte + i]);
1475 : else
1476 : seq_puts(m, " SCRATCH ");
1477 : }
1478 : seq_puts(m, "\n");
1479 : }
1480 : /* don't use kunmap_px, it could trigger
1481 : * an unnecessary flush.
1482 : */
1483 : kunmap_atomic(pt_vaddr);
1484 : }
1485 : }
1486 : }
1487 :
1488 : static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1489 : {
1490 : struct i915_address_space *vm = &ppgtt->base;
1491 : uint64_t start = ppgtt->base.start;
1492 : uint64_t length = ppgtt->base.total;
1493 : gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
1494 : I915_CACHE_LLC, true);
1495 :
1496 : if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
1497 : gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
1498 : } else {
1499 : uint64_t templ4, pml4e;
1500 : struct i915_pml4 *pml4 = &ppgtt->pml4;
1501 : struct i915_page_directory_pointer *pdp;
1502 :
1503 : gen8_for_each_pml4e(pdp, pml4, start, length, templ4, pml4e) {
1504 : if (!test_bit(pml4e, pml4->used_pml4es))
1505 : continue;
1506 :
1507 : seq_printf(m, " PML4E #%llu\n", pml4e);
1508 : gen8_dump_pdp(pdp, start, length, scratch_pte, m);
1509 : }
1510 : }
1511 : }
1512 : #endif
1513 :
1514 0 : static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
1515 : {
1516 0 : unsigned long *new_page_dirs, *new_page_tables;
1517 0 : uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1518 : int ret;
1519 :
1520 : /* We allocate temp bitmap for page tables for no gain
1521 : * but as this is for init only, lets keep the things simple
1522 : */
1523 0 : ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
1524 0 : if (ret)
1525 0 : return ret;
1526 :
1527 : /* Allocate for all pdps regardless of how the ppgtt
1528 : * was defined.
1529 : */
1530 0 : ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp,
1531 : 0, 1ULL << 32,
1532 0 : new_page_dirs);
1533 0 : if (!ret)
1534 0 : *ppgtt->pdp.used_pdpes = *new_page_dirs;
1535 :
1536 0 : free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1537 :
1538 0 : return ret;
1539 0 : }
1540 :
1541 : /*
1542 : * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
1543 : * with a net effect resembling a 2-level page table in normal x86 terms. Each
1544 : * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
1545 : * space.
1546 : *
1547 : */
1548 0 : static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1549 : {
1550 : int ret;
1551 :
1552 0 : ret = gen8_init_scratch(&ppgtt->base);
1553 0 : if (ret)
1554 0 : return ret;
1555 :
1556 0 : ppgtt->base.start = 0;
1557 0 : ppgtt->base.cleanup = gen8_ppgtt_cleanup;
1558 0 : ppgtt->base.allocate_va_range = gen8_alloc_va_range;
1559 0 : ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
1560 0 : ppgtt->base.clear_range = gen8_ppgtt_clear_range;
1561 0 : ppgtt->base.unbind_vma = ppgtt_unbind_vma;
1562 0 : ppgtt->base.bind_vma = ppgtt_bind_vma;
1563 : #ifdef __linux__
1564 : ppgtt->debug_dump = gen8_dump_ppgtt;
1565 : #endif
1566 :
1567 0 : if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
1568 0 : ret = setup_px(ppgtt->base.dev, &ppgtt->pml4);
1569 0 : if (ret)
1570 : goto free_scratch;
1571 :
1572 0 : gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4);
1573 :
1574 0 : ppgtt->base.total = 1ULL << 48;
1575 0 : ppgtt->switch_mm = gen8_48b_mm_switch;
1576 0 : } else {
1577 0 : ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp);
1578 0 : if (ret)
1579 : goto free_scratch;
1580 :
1581 0 : ppgtt->base.total = 1ULL << 32;
1582 0 : ppgtt->switch_mm = gen8_legacy_mm_switch;
1583 0 : trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base,
1584 : 0, 0,
1585 : GEN8_PML4E_SHIFT);
1586 :
1587 0 : if (intel_vgpu_active(ppgtt->base.dev)) {
1588 0 : ret = gen8_preallocate_top_level_pdps(ppgtt);
1589 0 : if (ret)
1590 : goto free_scratch;
1591 : }
1592 : }
1593 :
1594 0 : if (intel_vgpu_active(ppgtt->base.dev))
1595 0 : gen8_ppgtt_notify_vgt(ppgtt, true);
1596 :
1597 0 : return 0;
1598 :
1599 : free_scratch:
1600 0 : gen8_free_scratch(&ppgtt->base);
1601 0 : return ret;
1602 0 : }
1603 :
1604 : #ifdef __linux__
1605 : static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1606 : {
1607 : struct i915_address_space *vm = &ppgtt->base;
1608 : struct i915_page_table *unused;
1609 : gen6_pte_t scratch_pte;
1610 : uint32_t pd_entry;
1611 : uint32_t pte, pde, temp;
1612 : uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
1613 :
1614 : scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
1615 : I915_CACHE_LLC, true, 0);
1616 :
1617 : gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) {
1618 : u32 expected;
1619 : gen6_pte_t *pt_vaddr;
1620 : const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]);
1621 : pd_entry = readl(ppgtt->pd_addr + pde);
1622 : expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
1623 :
1624 : if (pd_entry != expected)
1625 : seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
1626 : pde,
1627 : pd_entry,
1628 : expected);
1629 : seq_printf(m, "\tPDE: %x\n", pd_entry);
1630 :
1631 : pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]);
1632 :
1633 : for (pte = 0; pte < GEN6_PTES; pte+=4) {
1634 : unsigned long va =
1635 : (pde * PAGE_SIZE * GEN6_PTES) +
1636 : (pte * PAGE_SIZE);
1637 : int i;
1638 : bool found = false;
1639 : for (i = 0; i < 4; i++)
1640 : if (pt_vaddr[pte + i] != scratch_pte)
1641 : found = true;
1642 : if (!found)
1643 : continue;
1644 :
1645 : seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
1646 : for (i = 0; i < 4; i++) {
1647 : if (pt_vaddr[pte + i] != scratch_pte)
1648 : seq_printf(m, " %08x", pt_vaddr[pte + i]);
1649 : else
1650 : seq_puts(m, " SCRATCH ");
1651 : }
1652 : seq_puts(m, "\n");
1653 : }
1654 : kunmap_px(ppgtt, pt_vaddr);
1655 : }
1656 : }
1657 : #endif
1658 :
1659 : /* Write pde (index) from the page directory @pd to the page table @pt */
1660 0 : static void gen6_write_pde(struct i915_page_directory *pd,
1661 : const int pde, struct i915_page_table *pt)
1662 : {
1663 : /* Caller needs to make sure the write completes if necessary */
1664 : struct i915_hw_ppgtt *ppgtt =
1665 0 : container_of(pd, struct i915_hw_ppgtt, pd);
1666 : u32 pd_entry;
1667 :
1668 0 : pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt));
1669 0 : pd_entry |= GEN6_PDE_VALID;
1670 :
1671 0 : writel(pd_entry, ppgtt->pd_addr + pde);
1672 0 : }
1673 :
1674 : /* Write all the page tables found in the ppgtt structure to incrementing page
1675 : * directories. */
1676 0 : static void gen6_write_page_range(struct drm_i915_private *dev_priv,
1677 : struct i915_page_directory *pd,
1678 : uint32_t start, uint32_t length)
1679 : {
1680 : struct i915_page_table *pt;
1681 : uint32_t pde, temp;
1682 :
1683 0 : gen6_for_each_pde(pt, pd, start, length, temp, pde)
1684 0 : gen6_write_pde(pd, pde, pt);
1685 :
1686 : /* Make sure write is complete before other code can use this page
1687 : * table. Also require for WC mapped PTEs */
1688 0 : readl(dev_priv->gtt.gsm);
1689 0 : }
1690 :
1691 0 : static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
1692 : {
1693 0 : BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
1694 :
1695 0 : return (ppgtt->pd.base.ggtt_offset / 64) << 16;
1696 : }
1697 :
1698 0 : static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
1699 : struct drm_i915_gem_request *req)
1700 : {
1701 0 : struct intel_engine_cs *ring = req->ring;
1702 : int ret;
1703 :
1704 : /* NB: TLBs must be flushed and invalidated before a switch */
1705 0 : ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1706 0 : if (ret)
1707 0 : return ret;
1708 :
1709 0 : ret = intel_ring_begin(req, 6);
1710 0 : if (ret)
1711 0 : return ret;
1712 :
1713 0 : intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1714 0 : intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
1715 0 : intel_ring_emit(ring, PP_DIR_DCLV_2G);
1716 0 : intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
1717 0 : intel_ring_emit(ring, get_pd_offset(ppgtt));
1718 0 : intel_ring_emit(ring, MI_NOOP);
1719 0 : intel_ring_advance(ring);
1720 :
1721 0 : return 0;
1722 0 : }
1723 :
1724 0 : static int vgpu_mm_switch(struct i915_hw_ppgtt *ppgtt,
1725 : struct drm_i915_gem_request *req)
1726 : {
1727 0 : struct intel_engine_cs *ring = req->ring;
1728 0 : struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);
1729 :
1730 0 : I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
1731 0 : I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
1732 0 : return 0;
1733 : }
1734 :
1735 0 : static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
1736 : struct drm_i915_gem_request *req)
1737 : {
1738 0 : struct intel_engine_cs *ring = req->ring;
1739 : int ret;
1740 :
1741 : /* NB: TLBs must be flushed and invalidated before a switch */
1742 0 : ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1743 0 : if (ret)
1744 0 : return ret;
1745 :
1746 0 : ret = intel_ring_begin(req, 6);
1747 0 : if (ret)
1748 0 : return ret;
1749 :
1750 0 : intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1751 0 : intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
1752 0 : intel_ring_emit(ring, PP_DIR_DCLV_2G);
1753 0 : intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
1754 0 : intel_ring_emit(ring, get_pd_offset(ppgtt));
1755 0 : intel_ring_emit(ring, MI_NOOP);
1756 0 : intel_ring_advance(ring);
1757 :
1758 : /* XXX: RCS is the only one to auto invalidate the TLBs? */
1759 0 : if (ring->id != RCS) {
1760 0 : ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1761 0 : if (ret)
1762 0 : return ret;
1763 : }
1764 :
1765 0 : return 0;
1766 0 : }
1767 :
1768 0 : static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
1769 : struct drm_i915_gem_request *req)
1770 : {
1771 0 : struct intel_engine_cs *ring = req->ring;
1772 0 : struct drm_device *dev = ppgtt->base.dev;
1773 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1774 :
1775 :
1776 0 : I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
1777 0 : I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
1778 :
1779 0 : POSTING_READ(RING_PP_DIR_DCLV(ring));
1780 :
1781 0 : return 0;
1782 : }
1783 :
1784 0 : static void gen8_ppgtt_enable(struct drm_device *dev)
1785 : {
1786 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1787 : struct intel_engine_cs *ring;
1788 : int j;
1789 :
1790 0 : for_each_ring(ring, dev_priv, j) {
1791 0 : u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0;
1792 0 : I915_WRITE(RING_MODE_GEN7(ring),
1793 : _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
1794 0 : }
1795 0 : }
1796 :
1797 0 : static void gen7_ppgtt_enable(struct drm_device *dev)
1798 : {
1799 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1800 : struct intel_engine_cs *ring;
1801 : uint32_t ecochk, ecobits;
1802 : int i;
1803 :
1804 0 : ecobits = I915_READ(GAC_ECO_BITS);
1805 0 : I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1806 :
1807 0 : ecochk = I915_READ(GAM_ECOCHK);
1808 0 : if (IS_HASWELL(dev)) {
1809 0 : ecochk |= ECOCHK_PPGTT_WB_HSW;
1810 0 : } else {
1811 0 : ecochk |= ECOCHK_PPGTT_LLC_IVB;
1812 0 : ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1813 : }
1814 0 : I915_WRITE(GAM_ECOCHK, ecochk);
1815 :
1816 0 : for_each_ring(ring, dev_priv, i) {
1817 : /* GFX_MODE is per-ring on gen7+ */
1818 0 : I915_WRITE(RING_MODE_GEN7(ring),
1819 : _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1820 0 : }
1821 0 : }
1822 :
1823 0 : static void gen6_ppgtt_enable(struct drm_device *dev)
1824 : {
1825 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1826 : uint32_t ecochk, gab_ctl, ecobits;
1827 :
1828 0 : ecobits = I915_READ(GAC_ECO_BITS);
1829 0 : I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1830 : ECOBITS_PPGTT_CACHE64B);
1831 :
1832 0 : gab_ctl = I915_READ(GAB_CTL);
1833 0 : I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1834 :
1835 0 : ecochk = I915_READ(GAM_ECOCHK);
1836 0 : I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1837 :
1838 0 : I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1839 0 : }
1840 :
1841 : /* PPGTT support for Sandybdrige/Gen6 and later */
1842 0 : static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1843 : uint64_t start,
1844 : uint64_t length,
1845 : bool use_scratch)
1846 : {
1847 : struct i915_hw_ppgtt *ppgtt =
1848 0 : container_of(vm, struct i915_hw_ppgtt, base);
1849 : gen6_pte_t *pt_vaddr, scratch_pte;
1850 0 : unsigned first_entry = start >> PAGE_SHIFT;
1851 0 : unsigned num_entries = length >> PAGE_SHIFT;
1852 0 : unsigned act_pt = first_entry / GEN6_PTES;
1853 0 : unsigned first_pte = first_entry % GEN6_PTES;
1854 : unsigned last_pte, i;
1855 :
1856 0 : scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
1857 : I915_CACHE_LLC, true, 0);
1858 :
1859 0 : while (num_entries) {
1860 0 : last_pte = first_pte + num_entries;
1861 0 : if (last_pte > GEN6_PTES)
1862 : last_pte = GEN6_PTES;
1863 :
1864 0 : pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
1865 :
1866 0 : for (i = first_pte; i < last_pte; i++)
1867 0 : pt_vaddr[i] = scratch_pte;
1868 :
1869 0 : kunmap_px(ppgtt, pt_vaddr);
1870 :
1871 0 : num_entries -= last_pte - first_pte;
1872 : first_pte = 0;
1873 0 : act_pt++;
1874 : }
1875 0 : }
1876 :
1877 0 : static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
1878 : struct sg_table *pages,
1879 : uint64_t start,
1880 : enum i915_cache_level cache_level, u32 flags)
1881 : {
1882 : struct i915_hw_ppgtt *ppgtt =
1883 0 : container_of(vm, struct i915_hw_ppgtt, base);
1884 : gen6_pte_t *pt_vaddr;
1885 0 : unsigned first_entry = start >> PAGE_SHIFT;
1886 0 : unsigned act_pt = first_entry / GEN6_PTES;
1887 0 : unsigned act_pte = first_entry % GEN6_PTES;
1888 0 : struct sg_page_iter sg_iter;
1889 :
1890 : pt_vaddr = NULL;
1891 0 : for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
1892 0 : if (pt_vaddr == NULL)
1893 0 : pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
1894 :
1895 0 : pt_vaddr[act_pte] =
1896 0 : vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
1897 : cache_level, true, flags);
1898 :
1899 0 : if (++act_pte == GEN6_PTES) {
1900 0 : kunmap_px(ppgtt, pt_vaddr);
1901 : pt_vaddr = NULL;
1902 0 : act_pt++;
1903 : act_pte = 0;
1904 0 : }
1905 : }
1906 0 : if (pt_vaddr)
1907 0 : kunmap_px(ppgtt, pt_vaddr);
1908 0 : }
1909 :
1910 0 : static int gen6_alloc_va_range(struct i915_address_space *vm,
1911 : uint64_t start_in, uint64_t length_in)
1912 : {
1913 0 : DECLARE_BITMAP(new_page_tables, I915_PDES);
1914 0 : struct drm_device *dev = vm->dev;
1915 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1916 : struct i915_hw_ppgtt *ppgtt =
1917 0 : container_of(vm, struct i915_hw_ppgtt, base);
1918 : struct i915_page_table *pt;
1919 : uint32_t start, length, start_save, length_save;
1920 : uint32_t pde, temp;
1921 : int ret;
1922 :
1923 0 : if (WARN_ON(start_in + length_in > ppgtt->base.total))
1924 0 : return -ENODEV;
1925 :
1926 0 : start = start_save = start_in;
1927 0 : length = length_save = length_in;
1928 :
1929 0 : bitmap_zero(new_page_tables, I915_PDES);
1930 :
1931 : /* The allocation is done in two stages so that we can bail out with
1932 : * minimal amount of pain. The first stage finds new page tables that
1933 : * need allocation. The second stage marks use ptes within the page
1934 : * tables.
1935 : */
1936 0 : gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
1937 0 : if (pt != vm->scratch_pt) {
1938 0 : WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
1939 0 : continue;
1940 : }
1941 :
1942 : /* We've already allocated a page table */
1943 0 : WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
1944 :
1945 0 : pt = alloc_pt(dev);
1946 0 : if (IS_ERR(pt)) {
1947 0 : ret = PTR_ERR(pt);
1948 : goto unwind_out;
1949 : }
1950 :
1951 0 : gen6_initialize_pt(vm, pt);
1952 :
1953 0 : ppgtt->pd.page_table[pde] = pt;
1954 0 : __set_bit(pde, new_page_tables);
1955 0 : trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT);
1956 0 : }
1957 :
1958 : start = start_save;
1959 : length = length_save;
1960 :
1961 0 : gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
1962 0 : DECLARE_BITMAP(tmp_bitmap, GEN6_PTES);
1963 :
1964 0 : bitmap_zero(tmp_bitmap, GEN6_PTES);
1965 0 : bitmap_set(tmp_bitmap, gen6_pte_index(start),
1966 0 : gen6_pte_count(start, length));
1967 :
1968 0 : if (__test_and_clear_bit(pde, new_page_tables))
1969 0 : gen6_write_pde(&ppgtt->pd, pde, pt);
1970 :
1971 0 : trace_i915_page_table_entry_map(vm, pde, pt,
1972 0 : gen6_pte_index(start),
1973 0 : gen6_pte_count(start, length),
1974 : GEN6_PTES);
1975 0 : bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes,
1976 : GEN6_PTES);
1977 0 : }
1978 :
1979 0 : WARN_ON(!bitmap_empty(new_page_tables, I915_PDES));
1980 :
1981 : /* Make sure write is complete before other code can use this page
1982 : * table. Also require for WC mapped PTEs */
1983 0 : readl(dev_priv->gtt.gsm);
1984 :
1985 0 : mark_tlbs_dirty(ppgtt);
1986 0 : return 0;
1987 :
1988 : unwind_out:
1989 0 : for_each_set_bit(pde, new_page_tables, I915_PDES) {
1990 0 : struct i915_page_table *pt = ppgtt->pd.page_table[pde];
1991 :
1992 0 : ppgtt->pd.page_table[pde] = vm->scratch_pt;
1993 0 : free_pt(vm->dev, pt);
1994 : }
1995 :
1996 0 : mark_tlbs_dirty(ppgtt);
1997 0 : return ret;
1998 0 : }
1999 :
2000 0 : static int gen6_init_scratch(struct i915_address_space *vm)
2001 : {
2002 0 : struct drm_device *dev = vm->dev;
2003 :
2004 0 : vm->scratch_page = alloc_scratch_page(dev);
2005 0 : if (IS_ERR(vm->scratch_page))
2006 0 : return PTR_ERR(vm->scratch_page);
2007 :
2008 0 : vm->scratch_pt = alloc_pt(dev);
2009 0 : if (IS_ERR(vm->scratch_pt)) {
2010 0 : free_scratch_page(dev, vm->scratch_page);
2011 0 : return PTR_ERR(vm->scratch_pt);
2012 : }
2013 :
2014 0 : gen6_initialize_pt(vm, vm->scratch_pt);
2015 :
2016 0 : return 0;
2017 0 : }
2018 :
2019 0 : static void gen6_free_scratch(struct i915_address_space *vm)
2020 : {
2021 0 : struct drm_device *dev = vm->dev;
2022 :
2023 0 : free_pt(dev, vm->scratch_pt);
2024 0 : free_scratch_page(dev, vm->scratch_page);
2025 0 : }
2026 :
2027 0 : static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
2028 : {
2029 : struct i915_hw_ppgtt *ppgtt =
2030 0 : container_of(vm, struct i915_hw_ppgtt, base);
2031 : struct i915_page_table *pt;
2032 : uint32_t pde;
2033 :
2034 0 : drm_mm_remove_node(&ppgtt->node);
2035 :
2036 0 : gen6_for_all_pdes(pt, ppgtt, pde) {
2037 0 : if (pt != vm->scratch_pt)
2038 0 : free_pt(ppgtt->base.dev, pt);
2039 : }
2040 :
2041 0 : gen6_free_scratch(vm);
2042 0 : }
2043 :
2044 0 : static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
2045 : {
2046 0 : struct i915_address_space *vm = &ppgtt->base;
2047 0 : struct drm_device *dev = ppgtt->base.dev;
2048 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2049 : bool retried = false;
2050 : int ret;
2051 :
2052 : /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
2053 : * allocator works in address space sizes, so it's multiplied by page
2054 : * size. We allocate at the top of the GTT to avoid fragmentation.
2055 : */
2056 0 : BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm));
2057 :
2058 0 : ret = gen6_init_scratch(vm);
2059 0 : if (ret)
2060 0 : return ret;
2061 :
2062 : alloc:
2063 0 : ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
2064 0 : &ppgtt->node, GEN6_PD_SIZE,
2065 : GEN6_PD_ALIGN, 0,
2066 0 : 0, dev_priv->gtt.base.total,
2067 : DRM_MM_TOPDOWN);
2068 0 : if (ret == -ENOSPC && !retried) {
2069 0 : ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
2070 : GEN6_PD_SIZE, GEN6_PD_ALIGN,
2071 : I915_CACHE_NONE,
2072 0 : 0, dev_priv->gtt.base.total,
2073 : 0);
2074 0 : if (ret)
2075 : goto err_out;
2076 :
2077 : retried = true;
2078 0 : goto alloc;
2079 : }
2080 :
2081 0 : if (ret)
2082 : goto err_out;
2083 :
2084 :
2085 0 : if (ppgtt->node.start < dev_priv->gtt.mappable_end)
2086 : DRM_DEBUG("Forced to use aperture for PDEs\n");
2087 :
2088 0 : return 0;
2089 :
2090 : err_out:
2091 0 : gen6_free_scratch(vm);
2092 0 : return ret;
2093 0 : }
2094 :
2095 0 : static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
2096 : {
2097 0 : return gen6_ppgtt_allocate_page_directories(ppgtt);
2098 : }
2099 :
2100 0 : static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
2101 : uint64_t start, uint64_t length)
2102 : {
2103 : struct i915_page_table *unused;
2104 : uint32_t pde, temp;
2105 :
2106 0 : gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde)
2107 0 : ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
2108 0 : }
2109 :
2110 0 : static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
2111 : {
2112 0 : struct drm_device *dev = ppgtt->base.dev;
2113 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2114 : int ret;
2115 :
2116 0 : ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
2117 0 : if (IS_GEN6(dev)) {
2118 0 : ppgtt->switch_mm = gen6_mm_switch;
2119 0 : } else if (IS_HASWELL(dev)) {
2120 0 : ppgtt->switch_mm = hsw_mm_switch;
2121 0 : } else if (IS_GEN7(dev)) {
2122 0 : ppgtt->switch_mm = gen7_mm_switch;
2123 : } else
2124 0 : BUG();
2125 :
2126 0 : if (intel_vgpu_active(dev))
2127 0 : ppgtt->switch_mm = vgpu_mm_switch;
2128 :
2129 0 : ret = gen6_ppgtt_alloc(ppgtt);
2130 0 : if (ret)
2131 0 : return ret;
2132 :
2133 0 : ppgtt->base.allocate_va_range = gen6_alloc_va_range;
2134 0 : ppgtt->base.clear_range = gen6_ppgtt_clear_range;
2135 0 : ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
2136 0 : ppgtt->base.unbind_vma = ppgtt_unbind_vma;
2137 0 : ppgtt->base.bind_vma = ppgtt_bind_vma;
2138 0 : ppgtt->base.cleanup = gen6_ppgtt_cleanup;
2139 0 : ppgtt->base.start = 0;
2140 0 : ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
2141 : #ifdef __linux__
2142 : ppgtt->debug_dump = gen6_dump_ppgtt;
2143 : #endif
2144 :
2145 0 : ppgtt->pd.base.ggtt_offset =
2146 0 : ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
2147 :
2148 0 : ppgtt->pd_addr = (gen6_pte_t __iomem *)dev_priv->gtt.gsm +
2149 0 : ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
2150 :
2151 0 : gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
2152 :
2153 0 : gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
2154 :
2155 : DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
2156 : ppgtt->node.size >> 20,
2157 : ppgtt->node.start / PAGE_SIZE);
2158 :
2159 : DRM_DEBUG("Adding PPGTT at offset %x\n",
2160 : ppgtt->pd.base.ggtt_offset << 10);
2161 :
2162 0 : return 0;
2163 0 : }
2164 :
2165 0 : static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
2166 : {
2167 0 : ppgtt->base.dev = dev;
2168 :
2169 0 : if (INTEL_INFO(dev)->gen < 8)
2170 0 : return gen6_ppgtt_init(ppgtt);
2171 : else
2172 0 : return gen8_ppgtt_init(ppgtt);
2173 0 : }
2174 :
2175 0 : static void i915_address_space_init(struct i915_address_space *vm,
2176 : struct drm_i915_private *dev_priv)
2177 : {
2178 0 : drm_mm_init(&vm->mm, vm->start, vm->total);
2179 0 : vm->dev = dev_priv->dev;
2180 0 : INIT_LIST_HEAD(&vm->active_list);
2181 0 : INIT_LIST_HEAD(&vm->inactive_list);
2182 0 : list_add_tail(&vm->global_link, &dev_priv->vm_list);
2183 0 : }
2184 :
2185 0 : int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
2186 : {
2187 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2188 : int ret = 0;
2189 :
2190 0 : ret = __hw_ppgtt_init(dev, ppgtt);
2191 0 : if (ret == 0) {
2192 0 : kref_init(&ppgtt->ref);
2193 0 : i915_address_space_init(&ppgtt->base, dev_priv);
2194 0 : }
2195 :
2196 0 : return ret;
2197 : }
2198 :
2199 0 : int i915_ppgtt_init_hw(struct drm_device *dev)
2200 : {
2201 : /* In the case of execlists, PPGTT is enabled by the context descriptor
2202 : * and the PDPs are contained within the context itself. We don't
2203 : * need to do anything here. */
2204 0 : if (i915.enable_execlists)
2205 0 : return 0;
2206 :
2207 0 : if (!USES_PPGTT(dev))
2208 0 : return 0;
2209 :
2210 0 : if (IS_GEN6(dev))
2211 0 : gen6_ppgtt_enable(dev);
2212 0 : else if (IS_GEN7(dev))
2213 0 : gen7_ppgtt_enable(dev);
2214 0 : else if (INTEL_INFO(dev)->gen >= 8)
2215 0 : gen8_ppgtt_enable(dev);
2216 : else
2217 0 : MISSING_CASE(INTEL_INFO(dev)->gen);
2218 :
2219 0 : return 0;
2220 0 : }
2221 :
2222 0 : int i915_ppgtt_init_ring(struct drm_i915_gem_request *req)
2223 : {
2224 0 : struct drm_i915_private *dev_priv = req->ring->dev->dev_private;
2225 0 : struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
2226 :
2227 0 : if (i915.enable_execlists)
2228 0 : return 0;
2229 :
2230 0 : if (!ppgtt)
2231 0 : return 0;
2232 :
2233 0 : return ppgtt->switch_mm(ppgtt, req);
2234 0 : }
2235 :
2236 : struct i915_hw_ppgtt *
2237 0 : i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
2238 : {
2239 : struct i915_hw_ppgtt *ppgtt;
2240 : int ret;
2241 :
2242 0 : ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2243 0 : if (!ppgtt)
2244 0 : return ERR_PTR(-ENOMEM);
2245 :
2246 0 : ret = i915_ppgtt_init(dev, ppgtt);
2247 0 : if (ret) {
2248 0 : kfree(ppgtt);
2249 0 : return ERR_PTR(ret);
2250 : }
2251 :
2252 0 : ppgtt->file_priv = fpriv;
2253 :
2254 0 : trace_i915_ppgtt_create(&ppgtt->base);
2255 :
2256 0 : return ppgtt;
2257 0 : }
2258 :
2259 0 : void i915_ppgtt_release(struct kref *kref)
2260 : {
2261 : struct i915_hw_ppgtt *ppgtt =
2262 0 : container_of(kref, struct i915_hw_ppgtt, ref);
2263 :
2264 0 : trace_i915_ppgtt_release(&ppgtt->base);
2265 :
2266 : /* vmas should already be unbound */
2267 0 : WARN_ON(!list_empty(&ppgtt->base.active_list));
2268 0 : WARN_ON(!list_empty(&ppgtt->base.inactive_list));
2269 :
2270 0 : list_del(&ppgtt->base.global_link);
2271 0 : drm_mm_takedown(&ppgtt->base.mm);
2272 :
2273 0 : ppgtt->base.cleanup(&ppgtt->base);
2274 0 : kfree(ppgtt);
2275 0 : }
2276 :
2277 : extern int intel_iommu_gfx_mapped;
2278 : /* Certain Gen5 chipsets require require idling the GPU before
2279 : * unmapping anything from the GTT when VT-d is enabled.
2280 : */
2281 0 : static bool needs_idle_maps(struct drm_device *dev)
2282 : {
2283 : #ifdef CONFIG_INTEL_IOMMU
2284 : /* Query intel_iommu to see if we need the workaround. Presumably that
2285 : * was loaded first.
2286 : */
2287 : if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
2288 : return true;
2289 : #endif
2290 0 : return false;
2291 : }
2292 :
2293 0 : static bool do_idling(struct drm_i915_private *dev_priv)
2294 : {
2295 0 : bool ret = dev_priv->mm.interruptible;
2296 :
2297 0 : if (unlikely(dev_priv->gtt.do_idle_maps)) {
2298 0 : dev_priv->mm.interruptible = false;
2299 0 : if (i915_gpu_idle(dev_priv->dev)) {
2300 0 : DRM_ERROR("Couldn't idle GPU\n");
2301 : /* Wait a bit, in hopes it avoids the hang */
2302 0 : udelay(10);
2303 0 : }
2304 : }
2305 :
2306 0 : return ret;
2307 : }
2308 :
2309 0 : static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
2310 : {
2311 0 : if (unlikely(dev_priv->gtt.do_idle_maps))
2312 0 : dev_priv->mm.interruptible = interruptible;
2313 0 : }
2314 :
2315 0 : void i915_check_and_clear_faults(struct drm_device *dev)
2316 : {
2317 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2318 : struct intel_engine_cs *ring;
2319 : int i;
2320 :
2321 0 : if (INTEL_INFO(dev)->gen < 6)
2322 0 : return;
2323 :
2324 0 : for_each_ring(ring, dev_priv, i) {
2325 : u32 fault_reg;
2326 0 : fault_reg = I915_READ(RING_FAULT_REG(ring));
2327 0 : if (fault_reg & RING_FAULT_VALID) {
2328 : DRM_DEBUG_DRIVER("Unexpected fault\n"
2329 : "\tAddr: 0x%08x\n"
2330 : "\tAddress space: %s\n"
2331 : "\tSource ID: %d\n"
2332 : "\tType: %d\n",
2333 : trunc_page(fault_reg),
2334 : fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
2335 : RING_FAULT_SRCID(fault_reg),
2336 : RING_FAULT_FAULT_TYPE(fault_reg));
2337 0 : I915_WRITE(RING_FAULT_REG(ring),
2338 : fault_reg & ~RING_FAULT_VALID);
2339 0 : }
2340 0 : }
2341 0 : POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS]));
2342 0 : }
2343 :
2344 0 : static void i915_ggtt_flush(struct drm_i915_private *dev_priv)
2345 : {
2346 0 : if (INTEL_INFO(dev_priv->dev)->gen < 6) {
2347 0 : intel_gtt_chipset_flush();
2348 0 : } else {
2349 0 : I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2350 0 : POSTING_READ(GFX_FLSH_CNTL_GEN6);
2351 : }
2352 0 : }
2353 :
2354 0 : void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
2355 : {
2356 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2357 :
2358 : /* Don't bother messing with faults pre GEN6 as we have little
2359 : * documentation supporting that it's a good idea.
2360 : */
2361 0 : if (INTEL_INFO(dev)->gen < 6)
2362 0 : return;
2363 :
2364 0 : i915_check_and_clear_faults(dev);
2365 :
2366 0 : dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
2367 0 : dev_priv->gtt.base.start,
2368 0 : dev_priv->gtt.base.total,
2369 : true);
2370 :
2371 0 : i915_ggtt_flush(dev_priv);
2372 0 : }
2373 :
2374 0 : int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
2375 : {
2376 : #ifdef __linux__
2377 : if (!dma_map_sg(&obj->base.dev->pdev->dev,
2378 : obj->pages->sgl, obj->pages->nents,
2379 : PCI_DMA_BIDIRECTIONAL))
2380 : return -ENOSPC;
2381 : #endif
2382 :
2383 0 : return 0;
2384 : }
2385 :
2386 0 : static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
2387 : {
2388 : #ifdef writeq
2389 0 : writeq(pte, addr);
2390 : #else
2391 : iowrite32((u32)pte, addr);
2392 : iowrite32(pte >> 32, addr + 4);
2393 : #endif
2394 0 : }
2395 :
2396 0 : static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
2397 : struct sg_table *st,
2398 : uint64_t start,
2399 : enum i915_cache_level level, u32 unused)
2400 : {
2401 0 : struct drm_i915_private *dev_priv = vm->dev->dev_private;
2402 0 : unsigned first_entry = start >> PAGE_SHIFT;
2403 : gen8_pte_t __iomem *gtt_entries =
2404 0 : (gen8_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
2405 : int i = 0;
2406 0 : struct sg_page_iter sg_iter;
2407 : dma_addr_t addr = 0; /* shut up gcc */
2408 :
2409 0 : for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
2410 0 : addr = sg_dma_address(sg_iter.sg) +
2411 0 : (sg_iter.sg_pgoffset << PAGE_SHIFT);
2412 0 : gen8_set_pte(>t_entries[i],
2413 0 : gen8_pte_encode(addr, level, true));
2414 0 : i++;
2415 : }
2416 :
2417 : /*
2418 : * XXX: This serves as a posting read to make sure that the PTE has
2419 : * actually been updated. There is some concern that even though
2420 : * registers and PTEs are within the same BAR that they are potentially
2421 : * of NUMA access patterns. Therefore, even with the way we assume
2422 : * hardware should work, we must keep this posting read for paranoia.
2423 : */
2424 0 : if (i != 0)
2425 0 : WARN_ON(readq(>t_entries[i-1])
2426 : != gen8_pte_encode(addr, level, true));
2427 :
2428 : /* This next bit makes the above posting read even more important. We
2429 : * want to flush the TLBs only after we're certain all the PTE updates
2430 : * have finished.
2431 : */
2432 0 : I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2433 0 : POSTING_READ(GFX_FLSH_CNTL_GEN6);
2434 0 : }
2435 :
2436 : /*
2437 : * Binds an object into the global gtt with the specified cache level. The object
2438 : * will be accessible to the GPU via commands whose operands reference offsets
2439 : * within the global GTT as well as accessible by the GPU through the GMADR
2440 : * mapped BAR (dev_priv->mm.gtt->gtt).
2441 : */
2442 0 : static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
2443 : struct sg_table *st,
2444 : uint64_t start,
2445 : enum i915_cache_level level, u32 flags)
2446 : {
2447 0 : struct drm_i915_private *dev_priv = vm->dev->dev_private;
2448 0 : unsigned first_entry = start >> PAGE_SHIFT;
2449 : gen6_pte_t __iomem *gtt_entries =
2450 0 : (gen6_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
2451 : int i = 0;
2452 0 : struct sg_page_iter sg_iter;
2453 : dma_addr_t addr = 0;
2454 :
2455 0 : for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
2456 0 : addr = sg_page_iter_dma_address(&sg_iter);
2457 0 : iowrite32(vm->pte_encode(addr, level, true, flags), >t_entries[i]);
2458 0 : i++;
2459 : }
2460 :
2461 : /* XXX: This serves as a posting read to make sure that the PTE has
2462 : * actually been updated. There is some concern that even though
2463 : * registers and PTEs are within the same BAR that they are potentially
2464 : * of NUMA access patterns. Therefore, even with the way we assume
2465 : * hardware should work, we must keep this posting read for paranoia.
2466 : */
2467 0 : if (i != 0) {
2468 0 : unsigned long gtt = readl(>t_entries[i-1]);
2469 0 : WARN_ON(gtt != vm->pte_encode(addr, level, true, flags));
2470 0 : }
2471 :
2472 : /* This next bit makes the above posting read even more important. We
2473 : * want to flush the TLBs only after we're certain all the PTE updates
2474 : * have finished.
2475 : */
2476 0 : I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2477 0 : POSTING_READ(GFX_FLSH_CNTL_GEN6);
2478 0 : }
2479 :
2480 0 : static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2481 : uint64_t start,
2482 : uint64_t length,
2483 : bool use_scratch)
2484 : {
2485 0 : struct drm_i915_private *dev_priv = vm->dev->dev_private;
2486 0 : unsigned first_entry = start >> PAGE_SHIFT;
2487 0 : unsigned num_entries = length >> PAGE_SHIFT;
2488 : gen8_pte_t scratch_pte, __iomem *gtt_base =
2489 0 : (gen8_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
2490 0 : const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
2491 : int i;
2492 :
2493 0 : if (WARN(num_entries > max_entries,
2494 : "First entry = %d; Num entries = %d (max=%d)\n",
2495 : first_entry, num_entries, max_entries))
2496 0 : num_entries = max_entries;
2497 :
2498 0 : scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
2499 : I915_CACHE_LLC,
2500 : use_scratch);
2501 0 : for (i = 0; i < num_entries; i++)
2502 0 : gen8_set_pte(>t_base[i], scratch_pte);
2503 0 : readl(gtt_base);
2504 0 : }
2505 :
2506 0 : static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2507 : uint64_t start,
2508 : uint64_t length,
2509 : bool use_scratch)
2510 : {
2511 0 : struct drm_i915_private *dev_priv = vm->dev->dev_private;
2512 0 : unsigned first_entry = start >> PAGE_SHIFT;
2513 0 : unsigned num_entries = length >> PAGE_SHIFT;
2514 : gen6_pte_t scratch_pte, __iomem *gtt_base =
2515 0 : (gen6_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
2516 0 : const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
2517 : int i;
2518 :
2519 0 : if (WARN(num_entries > max_entries,
2520 : "First entry = %d; Num entries = %d (max=%d)\n",
2521 : first_entry, num_entries, max_entries))
2522 0 : num_entries = max_entries;
2523 :
2524 0 : scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
2525 : I915_CACHE_LLC, use_scratch, 0);
2526 :
2527 0 : for (i = 0; i < num_entries; i++)
2528 0 : iowrite32(scratch_pte, >t_base[i]);
2529 0 : readl(gtt_base);
2530 0 : }
2531 :
2532 0 : static void i915_ggtt_insert_entries(struct i915_address_space *vm,
2533 : struct sg_table *pages,
2534 : uint64_t start,
2535 : enum i915_cache_level cache_level, u32 unused)
2536 : {
2537 0 : unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2538 : AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2539 :
2540 0 : intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
2541 :
2542 0 : }
2543 :
2544 0 : static void i915_ggtt_clear_range(struct i915_address_space *vm,
2545 : uint64_t start,
2546 : uint64_t length,
2547 : bool unused)
2548 : {
2549 0 : unsigned first_entry = start >> PAGE_SHIFT;
2550 0 : unsigned num_entries = length >> PAGE_SHIFT;
2551 0 : intel_gtt_clear_range(first_entry, num_entries);
2552 0 : }
2553 :
2554 0 : static int ggtt_bind_vma(struct i915_vma *vma,
2555 : enum i915_cache_level cache_level,
2556 : u32 flags)
2557 : {
2558 0 : struct drm_i915_gem_object *obj = vma->obj;
2559 : u32 pte_flags = 0;
2560 : int ret;
2561 :
2562 0 : ret = i915_get_ggtt_vma_pages(vma);
2563 0 : if (ret)
2564 0 : return ret;
2565 :
2566 : /* Currently applicable only to VLV */
2567 0 : if (obj->gt_ro)
2568 0 : pte_flags |= PTE_READ_ONLY;
2569 :
2570 0 : vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages,
2571 0 : vma->node.start,
2572 : cache_level, pte_flags);
2573 :
2574 : /*
2575 : * Without aliasing PPGTT there's no difference between
2576 : * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
2577 : * upgrade to both bound if we bind either to avoid double-binding.
2578 : */
2579 0 : vma->bound |= GLOBAL_BIND | LOCAL_BIND;
2580 :
2581 0 : return 0;
2582 0 : }
2583 :
2584 0 : static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2585 : enum i915_cache_level cache_level,
2586 : u32 flags)
2587 : {
2588 0 : struct drm_device *dev = vma->vm->dev;
2589 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2590 0 : struct drm_i915_gem_object *obj = vma->obj;
2591 0 : struct sg_table *pages = obj->pages;
2592 : u32 pte_flags = 0;
2593 : int ret;
2594 :
2595 0 : ret = i915_get_ggtt_vma_pages(vma);
2596 0 : if (ret)
2597 0 : return ret;
2598 0 : pages = vma->ggtt_view.pages;
2599 :
2600 : /* Currently applicable only to VLV */
2601 0 : if (obj->gt_ro)
2602 0 : pte_flags |= PTE_READ_ONLY;
2603 :
2604 :
2605 0 : if (flags & GLOBAL_BIND) {
2606 0 : vma->vm->insert_entries(vma->vm, pages,
2607 0 : vma->node.start,
2608 : cache_level, pte_flags);
2609 0 : }
2610 :
2611 0 : if (flags & LOCAL_BIND) {
2612 0 : struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
2613 0 : appgtt->base.insert_entries(&appgtt->base, pages,
2614 0 : vma->node.start,
2615 : cache_level, pte_flags);
2616 0 : }
2617 :
2618 0 : return 0;
2619 0 : }
2620 :
2621 0 : static void ggtt_unbind_vma(struct i915_vma *vma)
2622 : {
2623 0 : struct drm_device *dev = vma->vm->dev;
2624 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2625 0 : struct drm_i915_gem_object *obj = vma->obj;
2626 0 : const uint64_t size = min_t(uint64_t,
2627 : obj->base.size,
2628 : vma->node.size);
2629 :
2630 0 : if (vma->bound & GLOBAL_BIND) {
2631 0 : vma->vm->clear_range(vma->vm,
2632 0 : vma->node.start,
2633 : size,
2634 : true);
2635 0 : }
2636 :
2637 0 : if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) {
2638 : struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
2639 :
2640 0 : appgtt->base.clear_range(&appgtt->base,
2641 0 : vma->node.start,
2642 : size,
2643 : true);
2644 0 : }
2645 0 : }
2646 :
2647 0 : void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
2648 : {
2649 0 : struct drm_device *dev = obj->base.dev;
2650 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2651 : bool interruptible;
2652 :
2653 0 : interruptible = do_idling(dev_priv);
2654 :
2655 : #ifdef __linux__
2656 : dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents,
2657 : PCI_DMA_BIDIRECTIONAL);
2658 : #endif
2659 :
2660 0 : undo_idling(dev_priv, interruptible);
2661 0 : }
2662 :
2663 0 : static void i915_gtt_color_adjust(struct drm_mm_node *node,
2664 : unsigned long color,
2665 : u64 *start,
2666 : u64 *end)
2667 : {
2668 0 : if (node->color != color)
2669 0 : *start += 4096;
2670 :
2671 0 : if (!list_empty(&node->node_list)) {
2672 0 : node = list_entry(node->node_list.next,
2673 : struct drm_mm_node,
2674 : node_list);
2675 0 : if (node->allocated && node->color != color)
2676 0 : *end -= 4096;
2677 : }
2678 0 : }
2679 :
2680 0 : static int i915_gem_setup_global_gtt(struct drm_device *dev,
2681 : u64 start,
2682 : u64 mappable_end,
2683 : u64 end)
2684 : {
2685 : /* Let GEM Manage all of the aperture.
2686 : *
2687 : * However, leave one page at the end still bound to the scratch page.
2688 : * There are a number of places where the hardware apparently prefetches
2689 : * past the end of the object, and we've seen multiple hangs with the
2690 : * GPU head pointer stuck in a batchbuffer bound at the last page of the
2691 : * aperture. One page should be enough to keep any prefetching inside
2692 : * of the aperture.
2693 : */
2694 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2695 0 : struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
2696 : struct drm_mm_node *entry;
2697 : struct drm_i915_gem_object *obj;
2698 : unsigned long hole_start, hole_end;
2699 : int ret;
2700 :
2701 0 : BUG_ON(mappable_end > end);
2702 :
2703 0 : ggtt_vm->start = start;
2704 :
2705 : /* Subtract the guard page before address space initialization to
2706 : * shrink the range used by drm_mm */
2707 0 : ggtt_vm->total = end - start - PAGE_SIZE;
2708 0 : i915_address_space_init(ggtt_vm, dev_priv);
2709 0 : ggtt_vm->total += PAGE_SIZE;
2710 :
2711 0 : if (intel_vgpu_active(dev)) {
2712 0 : ret = intel_vgt_balloon(dev);
2713 0 : if (ret)
2714 0 : return ret;
2715 : }
2716 :
2717 0 : if (!HAS_LLC(dev))
2718 0 : ggtt_vm->mm.color_adjust = i915_gtt_color_adjust;
2719 :
2720 : /* Mark any preallocated objects as occupied */
2721 0 : list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
2722 0 : struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
2723 :
2724 : DRM_DEBUG_KMS("reserving preallocated space: %llx + %zx\n",
2725 : i915_gem_obj_ggtt_offset(obj), obj->base.size);
2726 :
2727 0 : WARN_ON(i915_gem_obj_ggtt_bound(obj));
2728 0 : ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
2729 0 : if (ret) {
2730 : DRM_DEBUG_KMS("Reservation failed: %i\n", ret);
2731 0 : return ret;
2732 : }
2733 0 : vma->bound |= GLOBAL_BIND;
2734 0 : __i915_vma_set_map_and_fenceable(vma);
2735 0 : list_add_tail(&vma->mm_list, &ggtt_vm->inactive_list);
2736 0 : }
2737 :
2738 : /* Clear any non-preallocated blocks */
2739 0 : drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
2740 : DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2741 : hole_start, hole_end);
2742 0 : ggtt_vm->clear_range(ggtt_vm, hole_start,
2743 0 : hole_end - hole_start, true);
2744 : }
2745 :
2746 : /* And finally clear the reserved guard page */
2747 0 : ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true);
2748 :
2749 0 : if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) {
2750 : struct i915_hw_ppgtt *ppgtt;
2751 :
2752 0 : ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2753 0 : if (!ppgtt)
2754 0 : return -ENOMEM;
2755 :
2756 0 : ret = __hw_ppgtt_init(dev, ppgtt);
2757 0 : if (ret) {
2758 0 : ppgtt->base.cleanup(&ppgtt->base);
2759 0 : kfree(ppgtt);
2760 0 : return ret;
2761 : }
2762 :
2763 0 : if (ppgtt->base.allocate_va_range)
2764 0 : ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
2765 0 : ppgtt->base.total);
2766 0 : if (ret) {
2767 0 : ppgtt->base.cleanup(&ppgtt->base);
2768 0 : kfree(ppgtt);
2769 0 : return ret;
2770 : }
2771 :
2772 0 : ppgtt->base.clear_range(&ppgtt->base,
2773 0 : ppgtt->base.start,
2774 0 : ppgtt->base.total,
2775 : true);
2776 :
2777 0 : dev_priv->mm.aliasing_ppgtt = ppgtt;
2778 0 : WARN_ON(dev_priv->gtt.base.bind_vma != ggtt_bind_vma);
2779 0 : dev_priv->gtt.base.bind_vma = aliasing_gtt_bind_vma;
2780 0 : }
2781 :
2782 0 : return 0;
2783 0 : }
2784 :
2785 0 : void i915_gem_init_global_gtt(struct drm_device *dev)
2786 : {
2787 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2788 : u64 gtt_size, mappable_size;
2789 :
2790 0 : gtt_size = dev_priv->gtt.base.total;
2791 0 : mappable_size = dev_priv->gtt.mappable_end;
2792 :
2793 0 : i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
2794 0 : }
2795 :
2796 0 : void i915_global_gtt_cleanup(struct drm_device *dev)
2797 : {
2798 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2799 0 : struct i915_address_space *vm = &dev_priv->gtt.base;
2800 :
2801 0 : if (dev_priv->mm.aliasing_ppgtt) {
2802 : struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
2803 :
2804 0 : ppgtt->base.cleanup(&ppgtt->base);
2805 0 : kfree(ppgtt);
2806 0 : }
2807 :
2808 0 : if (drm_mm_initialized(&vm->mm)) {
2809 0 : if (intel_vgpu_active(dev))
2810 0 : intel_vgt_deballoon();
2811 :
2812 0 : drm_mm_takedown(&vm->mm);
2813 0 : list_del(&vm->global_link);
2814 0 : }
2815 :
2816 0 : vm->cleanup(vm);
2817 0 : }
2818 :
2819 0 : static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
2820 : {
2821 0 : snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
2822 0 : snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
2823 0 : return snb_gmch_ctl << 20;
2824 : }
2825 :
2826 0 : static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
2827 : {
2828 0 : bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
2829 0 : bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
2830 0 : if (bdw_gmch_ctl)
2831 0 : bdw_gmch_ctl = 1 << bdw_gmch_ctl;
2832 :
2833 : #ifdef CONFIG_X86_32
2834 : /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
2835 : if (bdw_gmch_ctl > 4)
2836 : bdw_gmch_ctl = 4;
2837 : #endif
2838 :
2839 0 : return bdw_gmch_ctl << 20;
2840 : }
2841 :
2842 0 : static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
2843 : {
2844 0 : gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
2845 0 : gmch_ctrl &= SNB_GMCH_GGMS_MASK;
2846 :
2847 0 : if (gmch_ctrl)
2848 0 : return 1 << (20 + gmch_ctrl);
2849 :
2850 0 : return 0;
2851 0 : }
2852 :
2853 0 : static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
2854 : {
2855 0 : snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
2856 0 : snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
2857 0 : return snb_gmch_ctl << 25; /* 32 MB units */
2858 : }
2859 :
2860 0 : static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
2861 : {
2862 0 : bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2863 0 : bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
2864 0 : return bdw_gmch_ctl << 25; /* 32 MB units */
2865 : }
2866 :
2867 0 : static size_t chv_get_stolen_size(u16 gmch_ctrl)
2868 : {
2869 0 : gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
2870 0 : gmch_ctrl &= SNB_GMCH_GMS_MASK;
2871 :
2872 : /*
2873 : * 0x0 to 0x10: 32MB increments starting at 0MB
2874 : * 0x11 to 0x16: 4MB increments starting at 8MB
2875 : * 0x17 to 0x1d: 4MB increments start at 36MB
2876 : */
2877 0 : if (gmch_ctrl < 0x11)
2878 0 : return gmch_ctrl << 25;
2879 0 : else if (gmch_ctrl < 0x17)
2880 0 : return (gmch_ctrl - 0x11 + 2) << 22;
2881 : else
2882 0 : return (gmch_ctrl - 0x17 + 9) << 22;
2883 0 : }
2884 :
2885 0 : static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
2886 : {
2887 0 : gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2888 0 : gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
2889 :
2890 0 : if (gen9_gmch_ctl < 0xf0)
2891 0 : return gen9_gmch_ctl << 25; /* 32 MB units */
2892 : else
2893 : /* 4MB increments starting at 0xf0 for 4MB */
2894 0 : return (gen9_gmch_ctl - 0xf0 + 1) << 22;
2895 0 : }
2896 :
2897 : #ifdef __linux__
2898 :
2899 : static int ggtt_probe_common(struct drm_device *dev,
2900 : size_t gtt_size)
2901 : {
2902 : struct drm_i915_private *dev_priv = dev->dev_private;
2903 : struct i915_page_scratch *scratch_page;
2904 : phys_addr_t gtt_phys_addr;
2905 :
2906 : /* For Modern GENs the PTEs and register space are split in the BAR */
2907 : gtt_phys_addr = pci_resource_start(dev->pdev, 0) +
2908 : (pci_resource_len(dev->pdev, 0) / 2);
2909 :
2910 : /*
2911 : * On BXT writes larger than 64 bit to the GTT pagetable range will be
2912 : * dropped. For WC mappings in general we have 64 byte burst writes
2913 : * when the WC buffer is flushed, so we can't use it, but have to
2914 : * resort to an uncached mapping. The WC issue is easily caught by the
2915 : * readback check when writing GTT PTE entries.
2916 : */
2917 : if (IS_BROXTON(dev))
2918 : dev_priv->gtt.gsm = ioremap_nocache(gtt_phys_addr, gtt_size);
2919 : else
2920 : dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
2921 : if (!dev_priv->gtt.gsm) {
2922 : DRM_ERROR("Failed to map the gtt page table\n");
2923 : return -ENOMEM;
2924 : }
2925 :
2926 : scratch_page = alloc_scratch_page(dev);
2927 : if (IS_ERR(scratch_page)) {
2928 : DRM_ERROR("Scratch setup failed\n");
2929 : /* iounmap will also get called at remove, but meh */
2930 : iounmap(dev_priv->gtt.gsm);
2931 : return PTR_ERR(scratch_page);
2932 : }
2933 :
2934 : dev_priv->gtt.base.scratch_page = scratch_page;
2935 :
2936 : return 0;
2937 : }
2938 :
2939 : #else
2940 :
2941 0 : static int ggtt_probe_common(struct drm_device *dev,
2942 : size_t gtt_size)
2943 : {
2944 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2945 : struct i915_page_scratch *scratch_page;
2946 0 : bus_space_handle_t gsm;
2947 0 : bus_addr_t addr;
2948 0 : bus_size_t size;
2949 : pcireg_t type;
2950 : int flags;
2951 : int ret;
2952 :
2953 0 : type = pci_mapreg_type(dev_priv->pc, dev_priv->tag, 0x10);
2954 0 : ret = -pci_mapreg_info(dev_priv->pc, dev_priv->tag, 0x10, type,
2955 : &addr, &size, NULL);
2956 0 : if (ret)
2957 0 : return ret;
2958 :
2959 : /*
2960 : * On BXT writes larger than 64 bit to the GTT pagetable range will be
2961 : * dropped. For WC mappings in general we have 64 byte burst writes
2962 : * when the WC buffer is flushed, so we can't use it, but have to
2963 : * resort to an uncached mapping. The WC issue is easily caught by the
2964 : * readback check when writing GTT PTE entries.
2965 : */
2966 0 : if (IS_BROXTON(dev))
2967 0 : flags = 0;
2968 : else
2969 : flags = BUS_SPACE_MAP_PREFETCHABLE;
2970 :
2971 : /* For Modern GENs the PTEs and register space are split in the BAR */
2972 0 : ret = -bus_space_map(dev_priv->bst, addr + size / 2, gtt_size,
2973 0 : flags | BUS_SPACE_MAP_LINEAR, &gsm);
2974 0 : if (ret) {
2975 0 : DRM_ERROR("Failed to map the gtt page table\n");
2976 0 : return ret;
2977 : }
2978 0 : dev_priv->gtt.gsm = bus_space_vaddr(dev_priv->bst, gsm);
2979 :
2980 0 : scratch_page = alloc_scratch_page(dev);
2981 0 : if (IS_ERR(scratch_page)) {
2982 0 : DRM_ERROR("Scratch setup failed\n");
2983 : /* iounmap will also get called at remove, but meh */
2984 0 : bus_space_unmap(dev_priv->bst, gsm, addr + size / 2);
2985 0 : return PTR_ERR(scratch_page);
2986 : }
2987 :
2988 0 : dev_priv->gtt.base.scratch_page = scratch_page;
2989 :
2990 0 : return 0;
2991 0 : }
2992 :
2993 : #endif
2994 :
2995 : /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
2996 : * bits. When using advanced contexts each context stores its own PAT, but
2997 : * writing this data shouldn't be harmful even in those cases. */
2998 0 : static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
2999 : {
3000 : uint64_t pat;
3001 :
3002 : pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */
3003 : GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
3004 : GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
3005 : GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */
3006 : GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
3007 : GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
3008 : GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
3009 : GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3010 :
3011 0 : if (!USES_PPGTT(dev_priv->dev))
3012 : /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
3013 : * so RTL will always use the value corresponding to
3014 : * pat_sel = 000".
3015 : * So let's disable cache for GGTT to avoid screen corruptions.
3016 : * MOCS still can be used though.
3017 : * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
3018 : * before this patch, i.e. the same uncached + snooping access
3019 : * like on gen6/7 seems to be in effect.
3020 : * - So this just fixes blitter/render access. Again it looks
3021 : * like it's not just uncached access, but uncached + snooping.
3022 : * So we can still hold onto all our assumptions wrt cpu
3023 : * clflushing on LLC machines.
3024 : */
3025 : pat = GEN8_PPAT(0, GEN8_PPAT_UC);
3026 :
3027 : /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
3028 : * write would work. */
3029 0 : I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
3030 0 : I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
3031 0 : }
3032 :
3033 0 : static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
3034 : {
3035 : uint64_t pat;
3036 :
3037 : /*
3038 : * Map WB on BDW to snooped on CHV.
3039 : *
3040 : * Only the snoop bit has meaning for CHV, the rest is
3041 : * ignored.
3042 : *
3043 : * The hardware will never snoop for certain types of accesses:
3044 : * - CPU GTT (GMADR->GGTT->no snoop->memory)
3045 : * - PPGTT page tables
3046 : * - some other special cycles
3047 : *
3048 : * As with BDW, we also need to consider the following for GT accesses:
3049 : * "For GGTT, there is NO pat_sel[2:0] from the entry,
3050 : * so RTL will always use the value corresponding to
3051 : * pat_sel = 000".
3052 : * Which means we must set the snoop bit in PAT entry 0
3053 : * in order to keep the global status page working.
3054 : */
3055 : pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
3056 : GEN8_PPAT(1, 0) |
3057 : GEN8_PPAT(2, 0) |
3058 : GEN8_PPAT(3, 0) |
3059 : GEN8_PPAT(4, CHV_PPAT_SNOOP) |
3060 : GEN8_PPAT(5, CHV_PPAT_SNOOP) |
3061 : GEN8_PPAT(6, CHV_PPAT_SNOOP) |
3062 : GEN8_PPAT(7, CHV_PPAT_SNOOP);
3063 :
3064 0 : I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
3065 0 : I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
3066 0 : }
3067 :
3068 0 : static int gen8_gmch_probe(struct drm_device *dev,
3069 : u64 *gtt_total,
3070 : size_t *stolen,
3071 : phys_addr_t *mappable_base,
3072 : u64 *mappable_end)
3073 : {
3074 0 : struct drm_i915_private *dev_priv = dev->dev_private;
3075 : u64 gtt_size;
3076 0 : u16 snb_gmch_ctl;
3077 0 : bus_addr_t base, end;
3078 : int ret;
3079 :
3080 : #ifdef __linux__
3081 : /* TODO: We're not aware of mappable constraints on gen8 yet */
3082 : *mappable_base = pci_resource_start(dev->pdev, 2);
3083 : *mappable_end = pci_resource_len(dev->pdev, 2);
3084 :
3085 : if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
3086 : pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));
3087 : #else
3088 0 : pcireg_t type = pci_mapreg_type(dev_priv->pc, dev_priv->tag, 0x18);
3089 0 : ret = -pci_mapreg_info(dev_priv->pc, dev_priv->tag, 0x18, type,
3090 : &base, &end, NULL);
3091 0 : if (ret)
3092 0 : return ret;
3093 0 : *mappable_base = base;
3094 0 : *mappable_end = end;
3095 : #endif
3096 :
3097 0 : pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3098 :
3099 0 : if (INTEL_INFO(dev)->gen >= 9) {
3100 0 : *stolen = gen9_get_stolen_size(snb_gmch_ctl);
3101 0 : gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
3102 0 : } else if (IS_CHERRYVIEW(dev)) {
3103 0 : *stolen = chv_get_stolen_size(snb_gmch_ctl);
3104 0 : gtt_size = chv_get_total_gtt_size(snb_gmch_ctl);
3105 0 : } else {
3106 0 : *stolen = gen8_get_stolen_size(snb_gmch_ctl);
3107 0 : gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
3108 : }
3109 :
3110 0 : *gtt_total = (gtt_size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
3111 :
3112 0 : if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
3113 0 : chv_setup_private_ppat(dev_priv);
3114 : else
3115 0 : bdw_setup_private_ppat(dev_priv);
3116 :
3117 0 : ret = ggtt_probe_common(dev, gtt_size);
3118 :
3119 0 : dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
3120 0 : dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
3121 0 : dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
3122 0 : dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
3123 :
3124 0 : return ret;
3125 0 : }
3126 :
3127 0 : static int gen6_gmch_probe(struct drm_device *dev,
3128 : u64 *gtt_total,
3129 : size_t *stolen,
3130 : phys_addr_t *mappable_base,
3131 : u64 *mappable_end)
3132 : {
3133 0 : struct drm_i915_private *dev_priv = dev->dev_private;
3134 : unsigned int gtt_size;
3135 0 : u16 snb_gmch_ctl;
3136 0 : bus_addr_t base, end;
3137 : int ret;
3138 :
3139 : #ifdef __linux__
3140 : *mappable_base = pci_resource_start(dev->pdev, 2);
3141 : *mappable_end = pci_resource_len(dev->pdev, 2);
3142 : #else
3143 0 : pcireg_t type = pci_mapreg_type(dev_priv->pc, dev_priv->tag, 0x18);
3144 0 : ret = -pci_mapreg_info(dev_priv->pc, dev_priv->tag, 0x18, type,
3145 : &base, &end, NULL);
3146 0 : if (ret)
3147 0 : return ret;
3148 0 : *mappable_base = base;
3149 0 : *mappable_end = end;
3150 : #endif
3151 :
3152 : /* 64/512MB is the current min/max we actually know of, but this is just
3153 : * a coarse sanity check.
3154 : */
3155 0 : if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) {
3156 0 : DRM_ERROR("Unknown GMADR size (%llx)\n",
3157 : dev_priv->gtt.mappable_end);
3158 0 : return -ENXIO;
3159 : }
3160 :
3161 : #ifdef __linux__
3162 : if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
3163 : pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
3164 : #endif
3165 0 : pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3166 :
3167 0 : *stolen = gen6_get_stolen_size(snb_gmch_ctl);
3168 :
3169 0 : gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
3170 0 : *gtt_total = (gtt_size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
3171 :
3172 0 : ret = ggtt_probe_common(dev, gtt_size);
3173 :
3174 0 : dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
3175 0 : dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
3176 0 : dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
3177 0 : dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
3178 :
3179 0 : return ret;
3180 0 : }
3181 :
3182 0 : static void gen6_gmch_remove(struct i915_address_space *vm)
3183 : {
3184 : #ifdef __linux__
3185 : struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
3186 :
3187 : iounmap(gtt->gsm);
3188 : #endif
3189 0 : free_scratch_page(vm->dev, vm->scratch_page);
3190 0 : }
3191 :
3192 0 : static int i915_gmch_probe(struct drm_device *dev,
3193 : u64 *gtt_total,
3194 : size_t *stolen,
3195 : phys_addr_t *mappable_base,
3196 : u64 *mappable_end)
3197 : {
3198 0 : struct drm_i915_private *dev_priv = dev->dev_private;
3199 : int ret;
3200 :
3201 0 : ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
3202 0 : if (!ret) {
3203 0 : DRM_ERROR("failed to set up gmch\n");
3204 0 : return -EIO;
3205 : }
3206 :
3207 0 : intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
3208 :
3209 0 : dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
3210 0 : dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
3211 0 : dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
3212 0 : dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
3213 0 : dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
3214 :
3215 0 : if (unlikely(dev_priv->gtt.do_idle_maps))
3216 : DRM_INFO("applying Ironlake quirks for intel_iommu\n");
3217 :
3218 0 : return 0;
3219 0 : }
3220 :
3221 0 : static void i915_gmch_remove(struct i915_address_space *vm)
3222 : {
3223 0 : intel_gmch_remove();
3224 0 : }
3225 :
3226 0 : int i915_gem_gtt_init(struct drm_device *dev)
3227 : {
3228 0 : struct drm_i915_private *dev_priv = dev->dev_private;
3229 0 : struct i915_gtt *gtt = &dev_priv->gtt;
3230 : int ret;
3231 :
3232 0 : if (INTEL_INFO(dev)->gen <= 5) {
3233 0 : gtt->gtt_probe = i915_gmch_probe;
3234 0 : gtt->base.cleanup = i915_gmch_remove;
3235 0 : } else if (INTEL_INFO(dev)->gen < 8) {
3236 0 : gtt->gtt_probe = gen6_gmch_probe;
3237 0 : gtt->base.cleanup = gen6_gmch_remove;
3238 0 : if (IS_HASWELL(dev) && dev_priv->ellc_size)
3239 0 : gtt->base.pte_encode = iris_pte_encode;
3240 0 : else if (IS_HASWELL(dev))
3241 0 : gtt->base.pte_encode = hsw_pte_encode;
3242 0 : else if (IS_VALLEYVIEW(dev))
3243 0 : gtt->base.pte_encode = byt_pte_encode;
3244 0 : else if (INTEL_INFO(dev)->gen >= 7)
3245 0 : gtt->base.pte_encode = ivb_pte_encode;
3246 : else
3247 0 : gtt->base.pte_encode = snb_pte_encode;
3248 : } else {
3249 0 : dev_priv->gtt.gtt_probe = gen8_gmch_probe;
3250 0 : dev_priv->gtt.base.cleanup = gen6_gmch_remove;
3251 : }
3252 :
3253 0 : gtt->base.dev = dev;
3254 :
3255 0 : ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size,
3256 0 : >t->mappable_base, >t->mappable_end);
3257 0 : if (ret)
3258 0 : return ret;
3259 :
3260 : /* GMADR is the PCI mmio aperture into the global GTT. */
3261 : DRM_INFO("Memory usable by graphics device = %lluM\n",
3262 : gtt->base.total >> 20);
3263 : DRM_DEBUG_DRIVER("GMADR size = %lldM\n", gtt->mappable_end >> 20);
3264 : DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
3265 : #ifdef CONFIG_INTEL_IOMMU
3266 : if (intel_iommu_gfx_mapped)
3267 : DRM_INFO("VT-d active for gfx access\n");
3268 : #endif
3269 : /*
3270 : * i915.enable_ppgtt is read-only, so do an early pass to validate the
3271 : * user's requested state against the hardware/driver capabilities. We
3272 : * do this now so that we can print out any log messages once rather
3273 : * than every time we check intel_enable_ppgtt().
3274 : */
3275 0 : i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt);
3276 : DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
3277 :
3278 0 : return 0;
3279 0 : }
3280 :
3281 0 : void i915_gem_restore_gtt_mappings(struct drm_device *dev)
3282 : {
3283 0 : struct drm_i915_private *dev_priv = dev->dev_private;
3284 : struct drm_i915_gem_object *obj;
3285 : struct i915_address_space *vm;
3286 : struct i915_vma *vma;
3287 : bool flush;
3288 :
3289 0 : i915_check_and_clear_faults(dev);
3290 :
3291 : /* First fill our portion of the GTT with scratch pages */
3292 0 : dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
3293 0 : dev_priv->gtt.base.start,
3294 0 : dev_priv->gtt.base.total,
3295 : true);
3296 :
3297 : /* Cache flush objects bound into GGTT and rebind them. */
3298 : vm = &dev_priv->gtt.base;
3299 0 : list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
3300 : flush = false;
3301 0 : list_for_each_entry(vma, &obj->vma_list, vma_link) {
3302 0 : if (vma->vm != vm)
3303 : continue;
3304 :
3305 0 : WARN_ON(i915_vma_bind(vma, obj->cache_level,
3306 : PIN_UPDATE));
3307 :
3308 : flush = true;
3309 0 : }
3310 :
3311 0 : if (flush)
3312 0 : i915_gem_clflush_object(obj, obj->pin_display);
3313 : }
3314 :
3315 0 : if (INTEL_INFO(dev)->gen >= 8) {
3316 0 : if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
3317 0 : chv_setup_private_ppat(dev_priv);
3318 : else
3319 0 : bdw_setup_private_ppat(dev_priv);
3320 :
3321 0 : return;
3322 : }
3323 :
3324 0 : if (USES_PPGTT(dev)) {
3325 0 : list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
3326 : /* TODO: Perhaps it shouldn't be gen6 specific */
3327 :
3328 : struct i915_hw_ppgtt *ppgtt =
3329 0 : container_of(vm, struct i915_hw_ppgtt,
3330 : base);
3331 :
3332 0 : if (i915_is_ggtt(vm))
3333 0 : ppgtt = dev_priv->mm.aliasing_ppgtt;
3334 :
3335 0 : gen6_write_page_range(dev_priv, &ppgtt->pd,
3336 0 : 0, ppgtt->base.total);
3337 : }
3338 : }
3339 :
3340 0 : i915_ggtt_flush(dev_priv);
3341 0 : }
3342 :
3343 : static struct i915_vma *
3344 0 : __i915_gem_vma_create(struct drm_i915_gem_object *obj,
3345 : struct i915_address_space *vm,
3346 : const struct i915_ggtt_view *ggtt_view)
3347 : {
3348 : struct i915_vma *vma;
3349 :
3350 0 : if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
3351 0 : return ERR_PTR(-EINVAL);
3352 :
3353 : #ifdef __linux__
3354 : vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
3355 : #else
3356 0 : vma = pool_get(&(to_i915(obj->base.dev)->vmas), PR_WAITOK | PR_ZERO);
3357 : #endif
3358 0 : if (vma == NULL)
3359 0 : return ERR_PTR(-ENOMEM);
3360 :
3361 0 : INIT_LIST_HEAD(&vma->vma_link);
3362 0 : INIT_LIST_HEAD(&vma->mm_list);
3363 0 : INIT_LIST_HEAD(&vma->exec_list);
3364 0 : vma->vm = vm;
3365 0 : vma->obj = obj;
3366 :
3367 0 : if (i915_is_ggtt(vm))
3368 0 : vma->ggtt_view = *ggtt_view;
3369 :
3370 0 : list_add_tail(&vma->vma_link, &obj->vma_list);
3371 0 : if (!i915_is_ggtt(vm))
3372 0 : i915_ppgtt_get(i915_vm_to_ppgtt(vm));
3373 :
3374 0 : return vma;
3375 0 : }
3376 :
3377 : struct i915_vma *
3378 0 : i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
3379 : struct i915_address_space *vm)
3380 : {
3381 : struct i915_vma *vma;
3382 :
3383 0 : vma = i915_gem_obj_to_vma(obj, vm);
3384 0 : if (!vma)
3385 0 : vma = __i915_gem_vma_create(obj, vm,
3386 0 : i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL);
3387 :
3388 0 : return vma;
3389 : }
3390 :
3391 : struct i915_vma *
3392 0 : i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
3393 : const struct i915_ggtt_view *view)
3394 : {
3395 0 : struct i915_address_space *ggtt = i915_obj_to_ggtt(obj);
3396 : struct i915_vma *vma;
3397 :
3398 0 : if (WARN_ON(!view))
3399 0 : return ERR_PTR(-EINVAL);
3400 :
3401 0 : vma = i915_gem_obj_to_ggtt_view(obj, view);
3402 :
3403 0 : if (IS_ERR(vma))
3404 0 : return vma;
3405 :
3406 0 : if (!vma)
3407 0 : vma = __i915_gem_vma_create(obj, ggtt, view);
3408 :
3409 0 : return vma;
3410 :
3411 0 : }
3412 :
3413 : #ifdef __linux__
3414 :
3415 : static struct scatterlist *
3416 : rotate_pages(dma_addr_t *in, unsigned int offset,
3417 : unsigned int width, unsigned int height,
3418 : struct sg_table *st, struct scatterlist *sg)
3419 : {
3420 : unsigned int column, row;
3421 : unsigned int src_idx;
3422 :
3423 : if (!sg) {
3424 : st->nents = 0;
3425 : sg = st->sgl;
3426 : }
3427 :
3428 : for (column = 0; column < width; column++) {
3429 : src_idx = width * (height - 1) + column;
3430 : for (row = 0; row < height; row++) {
3431 : st->nents++;
3432 : /* We don't need the pages, but need to initialize
3433 : * the entries so the sg list can be happily traversed.
3434 : * The only thing we need are DMA addresses.
3435 : */
3436 : sg_set_page(sg, NULL, PAGE_SIZE, 0);
3437 : sg_dma_address(sg) = in[offset + src_idx];
3438 : sg_dma_len(sg) = PAGE_SIZE;
3439 : sg = sg_next(sg);
3440 : src_idx -= width;
3441 : }
3442 : }
3443 :
3444 : return sg;
3445 : }
3446 :
3447 : static struct sg_table *
3448 : intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view,
3449 : struct drm_i915_gem_object *obj)
3450 : {
3451 : struct intel_rotation_info *rot_info = &ggtt_view->rotation_info;
3452 : unsigned int size_pages = rot_info->size >> PAGE_SHIFT;
3453 : unsigned int size_pages_uv;
3454 : struct sg_page_iter sg_iter;
3455 : unsigned long i;
3456 : dma_addr_t *page_addr_list;
3457 : struct sg_table *st;
3458 : unsigned int uv_start_page;
3459 : struct scatterlist *sg;
3460 : int ret = -ENOMEM;
3461 :
3462 : /* Allocate a temporary list of source pages for random access. */
3463 : page_addr_list = drm_malloc_ab(obj->base.size / PAGE_SIZE,
3464 : sizeof(dma_addr_t));
3465 : if (!page_addr_list)
3466 : return ERR_PTR(ret);
3467 :
3468 : /* Account for UV plane with NV12. */
3469 : if (rot_info->pixel_format == DRM_FORMAT_NV12)
3470 : size_pages_uv = rot_info->size_uv >> PAGE_SHIFT;
3471 : else
3472 : size_pages_uv = 0;
3473 :
3474 : /* Allocate target SG list. */
3475 : st = kmalloc(sizeof(*st), GFP_KERNEL);
3476 : if (!st)
3477 : goto err_st_alloc;
3478 :
3479 : ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL);
3480 : if (ret)
3481 : goto err_sg_alloc;
3482 :
3483 : /* Populate source page list from the object. */
3484 : i = 0;
3485 : for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
3486 : page_addr_list[i] = sg_page_iter_dma_address(&sg_iter);
3487 : i++;
3488 : }
3489 :
3490 : /* Rotate the pages. */
3491 : sg = rotate_pages(page_addr_list, 0,
3492 : rot_info->width_pages, rot_info->height_pages,
3493 : st, NULL);
3494 :
3495 : /* Append the UV plane if NV12. */
3496 : if (rot_info->pixel_format == DRM_FORMAT_NV12) {
3497 : uv_start_page = size_pages;
3498 :
3499 : /* Check for tile-row un-alignment. */
3500 : if (offset_in_page(rot_info->uv_offset))
3501 : uv_start_page--;
3502 :
3503 : rot_info->uv_start_page = uv_start_page;
3504 :
3505 : rotate_pages(page_addr_list, uv_start_page,
3506 : rot_info->width_pages_uv,
3507 : rot_info->height_pages_uv,
3508 : st, sg);
3509 : }
3510 :
3511 : DRM_DEBUG_KMS(
3512 : "Created rotated page mapping for object size %zu (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages (%u plane 0)).\n",
3513 : obj->base.size, rot_info->pitch, rot_info->height,
3514 : rot_info->pixel_format, rot_info->width_pages,
3515 : rot_info->height_pages, size_pages + size_pages_uv,
3516 : size_pages);
3517 :
3518 : drm_free_large(page_addr_list);
3519 :
3520 : return st;
3521 :
3522 : err_sg_alloc:
3523 : kfree(st);
3524 : err_st_alloc:
3525 : drm_free_large(page_addr_list);
3526 :
3527 : DRM_DEBUG_KMS(
3528 : "Failed to create rotated mapping for object size %zu! (%d) (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages (%u plane 0))\n",
3529 : obj->base.size, ret, rot_info->pitch, rot_info->height,
3530 : rot_info->pixel_format, rot_info->width_pages,
3531 : rot_info->height_pages, size_pages + size_pages_uv,
3532 : size_pages);
3533 : return ERR_PTR(ret);
3534 : }
3535 :
3536 : static struct sg_table *
3537 : intel_partial_pages(const struct i915_ggtt_view *view,
3538 : struct drm_i915_gem_object *obj)
3539 : {
3540 : struct sg_table *st;
3541 : struct scatterlist *sg;
3542 : struct sg_page_iter obj_sg_iter;
3543 : int ret = -ENOMEM;
3544 :
3545 : st = kmalloc(sizeof(*st), GFP_KERNEL);
3546 : if (!st)
3547 : goto err_st_alloc;
3548 :
3549 : ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL);
3550 : if (ret)
3551 : goto err_sg_alloc;
3552 :
3553 : sg = st->sgl;
3554 : st->nents = 0;
3555 : for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents,
3556 : view->params.partial.offset)
3557 : {
3558 : if (st->nents >= view->params.partial.size)
3559 : break;
3560 :
3561 : sg_set_page(sg, NULL, PAGE_SIZE, 0);
3562 : sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter);
3563 : sg_dma_len(sg) = PAGE_SIZE;
3564 :
3565 : sg = sg_next(sg);
3566 : st->nents++;
3567 : }
3568 :
3569 : return st;
3570 :
3571 : err_sg_alloc:
3572 : kfree(st);
3573 : err_st_alloc:
3574 : return ERR_PTR(ret);
3575 : }
3576 :
3577 : #endif
3578 :
3579 : static int
3580 0 : i915_get_ggtt_vma_pages(struct i915_vma *vma)
3581 : {
3582 : int ret = 0;
3583 :
3584 0 : if (vma->ggtt_view.pages)
3585 0 : return 0;
3586 :
3587 0 : if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
3588 0 : vma->ggtt_view.pages = vma->obj->pages;
3589 : #ifdef __linux__
3590 : else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
3591 : vma->ggtt_view.pages =
3592 : intel_rotate_fb_obj_pages(&vma->ggtt_view, vma->obj);
3593 : else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
3594 : vma->ggtt_view.pages =
3595 : intel_partial_pages(&vma->ggtt_view, vma->obj);
3596 : #endif
3597 : else
3598 0 : WARN_ONCE(1, "GGTT view %u not implemented!\n",
3599 : vma->ggtt_view.type);
3600 :
3601 0 : if (!vma->ggtt_view.pages) {
3602 0 : DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
3603 : vma->ggtt_view.type);
3604 : ret = -EINVAL;
3605 0 : } else if (IS_ERR(vma->ggtt_view.pages)) {
3606 0 : ret = PTR_ERR(vma->ggtt_view.pages);
3607 0 : vma->ggtt_view.pages = NULL;
3608 0 : DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
3609 : vma->ggtt_view.type, ret);
3610 0 : }
3611 :
3612 0 : return ret;
3613 0 : }
3614 :
3615 : /**
3616 : * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
3617 : * @vma: VMA to map
3618 : * @cache_level: mapping cache level
3619 : * @flags: flags like global or local mapping
3620 : *
3621 : * DMA addresses are taken from the scatter-gather table of this object (or of
3622 : * this VMA in case of non-default GGTT views) and PTE entries set up.
3623 : * Note that DMA addresses are also the only part of the SG table we care about.
3624 : */
3625 0 : int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
3626 : u32 flags)
3627 : {
3628 : int ret;
3629 : u32 bind_flags;
3630 :
3631 0 : if (WARN_ON(flags == 0))
3632 0 : return -EINVAL;
3633 :
3634 : bind_flags = 0;
3635 0 : if (flags & PIN_GLOBAL)
3636 0 : bind_flags |= GLOBAL_BIND;
3637 0 : if (flags & PIN_USER)
3638 0 : bind_flags |= LOCAL_BIND;
3639 :
3640 0 : if (flags & PIN_UPDATE)
3641 0 : bind_flags |= vma->bound;
3642 : else
3643 0 : bind_flags &= ~vma->bound;
3644 :
3645 0 : if (bind_flags == 0)
3646 0 : return 0;
3647 :
3648 0 : if (vma->bound == 0 && vma->vm->allocate_va_range) {
3649 0 : trace_i915_va_alloc(vma->vm,
3650 0 : vma->node.start,
3651 0 : vma->node.size,
3652 0 : VM_TO_TRACE_NAME(vma->vm));
3653 :
3654 : /* XXX: i915_vma_pin() will fix this +- hack */
3655 0 : vma->pin_count++;
3656 0 : ret = vma->vm->allocate_va_range(vma->vm,
3657 0 : vma->node.start,
3658 0 : vma->node.size);
3659 0 : vma->pin_count--;
3660 0 : if (ret)
3661 0 : return ret;
3662 : }
3663 :
3664 0 : ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
3665 0 : if (ret)
3666 0 : return ret;
3667 :
3668 0 : vma->bound |= bind_flags;
3669 :
3670 0 : return 0;
3671 0 : }
3672 :
3673 : /**
3674 : * i915_ggtt_view_size - Get the size of a GGTT view.
3675 : * @obj: Object the view is of.
3676 : * @view: The view in question.
3677 : *
3678 : * @return The size of the GGTT view in bytes.
3679 : */
3680 : size_t
3681 0 : i915_ggtt_view_size(struct drm_i915_gem_object *obj,
3682 : const struct i915_ggtt_view *view)
3683 : {
3684 0 : if (view->type == I915_GGTT_VIEW_NORMAL) {
3685 0 : return obj->base.size;
3686 : #ifdef __linux__
3687 : } else if (view->type == I915_GGTT_VIEW_ROTATED) {
3688 : return view->rotation_info.size;
3689 : } else if (view->type == I915_GGTT_VIEW_PARTIAL) {
3690 : return view->params.partial.size << PAGE_SHIFT;
3691 : #endif
3692 : } else {
3693 0 : WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type);
3694 0 : return obj->base.size;
3695 : }
3696 0 : }
|