Line data Source code
1 : /*
2 : * Copyright 2009 Jerome Glisse.
3 : * All Rights Reserved.
4 : *
5 : * Permission is hereby granted, free of charge, to any person obtaining a
6 : * copy of this software and associated documentation files (the
7 : * "Software"), to deal in the Software without restriction, including
8 : * without limitation the rights to use, copy, modify, merge, publish,
9 : * distribute, sub license, and/or sell copies of the Software, and to
10 : * permit persons to whom the Software is furnished to do so, subject to
11 : * the following conditions:
12 : *
13 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 : * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 : * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 : * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 : * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 : * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 : *
21 : * The above copyright notice and this permission notice (including the
22 : * next paragraph) shall be included in all copies or substantial portions
23 : * of the Software.
24 : *
25 : */
26 : /*
27 : * Authors:
28 : * Jerome Glisse <glisse@freedesktop.org>
29 : * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
30 : * Dave Airlie
31 : */
32 : #include <dev/pci/drm/drmP.h>
33 : #include <dev/pci/drm/radeon_drm.h>
34 : #include <dev/pci/drm/drm_cache.h>
35 : #include "radeon.h"
36 : #include "radeon_trace.h"
37 :
38 :
39 : int radeon_ttm_init(struct radeon_device *rdev);
40 : void radeon_ttm_fini(struct radeon_device *rdev);
41 : static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);
42 :
43 : /*
44 : * To exclude mutual BO access we rely on bo_reserve exclusion, as all
45 : * function are calling it.
46 : */
47 :
48 0 : static void radeon_update_memory_usage(struct radeon_bo *bo,
49 : unsigned mem_type, int sign)
50 : {
51 0 : struct radeon_device *rdev = bo->rdev;
52 0 : u64 size = (u64)bo->tbo.num_pages << PAGE_SHIFT;
53 :
54 0 : switch (mem_type) {
55 : case TTM_PL_TT:
56 0 : if (sign > 0)
57 0 : atomic64_add(size, &rdev->gtt_usage);
58 : else
59 0 : atomic64_sub(size, &rdev->gtt_usage);
60 : break;
61 : case TTM_PL_VRAM:
62 0 : if (sign > 0)
63 0 : atomic64_add(size, &rdev->vram_usage);
64 : else
65 0 : atomic64_sub(size, &rdev->vram_usage);
66 : break;
67 : }
68 0 : }
69 :
70 0 : static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
71 : {
72 : struct radeon_bo *bo;
73 :
74 0 : bo = container_of(tbo, struct radeon_bo, tbo);
75 :
76 0 : radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1);
77 :
78 0 : mutex_lock(&bo->rdev->gem.mutex);
79 0 : list_del_init(&bo->list);
80 0 : mutex_unlock(&bo->rdev->gem.mutex);
81 0 : radeon_bo_clear_surface_reg(bo);
82 0 : WARN_ON(!list_empty(&bo->va));
83 0 : drm_gem_object_release(&bo->gem_base);
84 0 : pool_put(&bo->rdev->ddev->objpl, bo);
85 0 : }
86 :
87 0 : bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo)
88 : {
89 0 : if (bo->destroy == &radeon_ttm_bo_destroy)
90 0 : return true;
91 0 : return false;
92 0 : }
93 :
94 0 : void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
95 : {
96 : u32 c = 0, i;
97 :
98 0 : rbo->placement.placement = rbo->placements;
99 0 : rbo->placement.busy_placement = rbo->placements;
100 0 : if (domain & RADEON_GEM_DOMAIN_VRAM) {
101 : /* Try placing BOs which don't need CPU access outside of the
102 : * CPU accessible part of VRAM
103 : */
104 0 : if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
105 0 : rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) {
106 0 : rbo->placements[c].fpfn =
107 0 : rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
108 0 : rbo->placements[c++].flags = TTM_PL_FLAG_WC |
109 : TTM_PL_FLAG_UNCACHED |
110 : TTM_PL_FLAG_VRAM;
111 0 : }
112 :
113 0 : rbo->placements[c].fpfn = 0;
114 0 : rbo->placements[c++].flags = TTM_PL_FLAG_WC |
115 : TTM_PL_FLAG_UNCACHED |
116 : TTM_PL_FLAG_VRAM;
117 0 : }
118 :
119 0 : if (domain & RADEON_GEM_DOMAIN_GTT) {
120 0 : if (rbo->flags & RADEON_GEM_GTT_UC) {
121 0 : rbo->placements[c].fpfn = 0;
122 0 : rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
123 : TTM_PL_FLAG_TT;
124 :
125 0 : } else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
126 0 : (rbo->rdev->flags & RADEON_IS_AGP)) {
127 0 : rbo->placements[c].fpfn = 0;
128 0 : rbo->placements[c++].flags = TTM_PL_FLAG_WC |
129 : TTM_PL_FLAG_UNCACHED |
130 : TTM_PL_FLAG_TT;
131 0 : } else {
132 0 : rbo->placements[c].fpfn = 0;
133 0 : rbo->placements[c++].flags = TTM_PL_FLAG_CACHED |
134 : TTM_PL_FLAG_TT;
135 : }
136 : }
137 :
138 0 : if (domain & RADEON_GEM_DOMAIN_CPU) {
139 0 : if (rbo->flags & RADEON_GEM_GTT_UC) {
140 0 : rbo->placements[c].fpfn = 0;
141 0 : rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
142 : TTM_PL_FLAG_SYSTEM;
143 :
144 0 : } else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
145 0 : rbo->rdev->flags & RADEON_IS_AGP) {
146 0 : rbo->placements[c].fpfn = 0;
147 0 : rbo->placements[c++].flags = TTM_PL_FLAG_WC |
148 : TTM_PL_FLAG_UNCACHED |
149 : TTM_PL_FLAG_SYSTEM;
150 0 : } else {
151 0 : rbo->placements[c].fpfn = 0;
152 0 : rbo->placements[c++].flags = TTM_PL_FLAG_CACHED |
153 : TTM_PL_FLAG_SYSTEM;
154 : }
155 : }
156 0 : if (!c) {
157 0 : rbo->placements[c].fpfn = 0;
158 0 : rbo->placements[c++].flags = TTM_PL_MASK_CACHING |
159 : TTM_PL_FLAG_SYSTEM;
160 0 : }
161 :
162 0 : rbo->placement.num_placement = c;
163 0 : rbo->placement.num_busy_placement = c;
164 :
165 0 : for (i = 0; i < c; ++i) {
166 0 : if ((rbo->flags & RADEON_GEM_CPU_ACCESS) &&
167 0 : (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
168 0 : !rbo->placements[i].fpfn)
169 0 : rbo->placements[i].lpfn =
170 0 : rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
171 : else
172 0 : rbo->placements[i].lpfn = 0;
173 : }
174 0 : }
175 :
176 0 : int radeon_bo_create(struct radeon_device *rdev,
177 : unsigned long size, int byte_align, bool kernel,
178 : u32 domain, u32 flags, struct sg_table *sg,
179 : struct reservation_object *resv,
180 : struct radeon_bo **bo_ptr)
181 : {
182 : struct radeon_bo *bo;
183 : enum ttm_bo_type type;
184 0 : unsigned long page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
185 : size_t acc_size;
186 : int r;
187 :
188 0 : size = PAGE_ALIGN(size);
189 :
190 0 : if (kernel) {
191 : type = ttm_bo_type_kernel;
192 0 : } else if (sg) {
193 : type = ttm_bo_type_sg;
194 0 : } else {
195 : type = ttm_bo_type_device;
196 : }
197 0 : *bo_ptr = NULL;
198 :
199 0 : acc_size = ttm_bo_dma_acc_size(&rdev->mman.bdev, size,
200 : sizeof(struct radeon_bo));
201 :
202 0 : bo = pool_get(&rdev->ddev->objpl, PR_WAITOK | PR_ZERO);
203 0 : if (bo == NULL)
204 0 : return -ENOMEM;
205 0 : r = drm_gem_object_init(rdev->ddev, &bo->gem_base, size);
206 0 : if (unlikely(r)) {
207 0 : pool_put(&rdev->ddev->objpl, bo);
208 0 : return r;
209 : }
210 0 : bo->rdev = rdev;
211 0 : bo->surface_reg = -1;
212 0 : INIT_LIST_HEAD(&bo->list);
213 0 : INIT_LIST_HEAD(&bo->va);
214 0 : bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM |
215 : RADEON_GEM_DOMAIN_GTT |
216 : RADEON_GEM_DOMAIN_CPU);
217 :
218 0 : bo->flags = flags;
219 : /* PCI GART is always snooped */
220 0 : if (!(rdev->flags & RADEON_IS_PCIE))
221 0 : bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
222 :
223 : /* Write-combined CPU mappings of GTT cause GPU hangs with RV6xx
224 : * See https://bugs.freedesktop.org/show_bug.cgi?id=91268
225 : */
226 0 : if (rdev->family >= CHIP_RV610 && rdev->family <= CHIP_RV635)
227 0 : bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
228 :
229 : #ifdef CONFIG_X86_32
230 : /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
231 : * See https://bugs.freedesktop.org/show_bug.cgi?id=84627
232 : */
233 : bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
234 : #elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
235 : /* Don't try to enable write-combining when it can't work, or things
236 : * may be slow
237 : * See https://bugs.freedesktop.org/show_bug.cgi?id=88758
238 : */
239 : #ifndef CONFIG_COMPILE_TEST
240 : #warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
241 : thanks to write-combining
242 : #endif
243 :
244 : if (bo->flags & RADEON_GEM_GTT_WC)
245 : DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
246 : "better performance thanks to write-combining\n");
247 : bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
248 : #else
249 : /* For architectures that don't support WC memory,
250 : * mask out the WC flag from the BO
251 : */
252 0 : if (!drm_arch_can_wc_memory())
253 0 : bo->flags &= ~RADEON_GEM_GTT_WC;
254 : #endif
255 :
256 0 : radeon_ttm_placement_from_domain(bo, domain);
257 : /* Kernel allocation are uninterruptible */
258 0 : down_read(&rdev->pm.mclk_lock);
259 0 : r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
260 0 : &bo->placement, page_align, !kernel, NULL,
261 : acc_size, sg, resv, &radeon_ttm_bo_destroy);
262 0 : up_read(&rdev->pm.mclk_lock);
263 0 : if (unlikely(r != 0)) {
264 0 : return r;
265 : }
266 0 : *bo_ptr = bo;
267 :
268 0 : trace_radeon_bo_create(bo);
269 :
270 0 : return 0;
271 0 : }
272 :
273 0 : int radeon_bo_kmap(struct radeon_bo *bo, void **ptr)
274 : {
275 0 : bool is_iomem;
276 : int r;
277 :
278 0 : if (bo->kptr) {
279 0 : if (ptr) {
280 0 : *ptr = bo->kptr;
281 0 : }
282 0 : return 0;
283 : }
284 0 : r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap);
285 0 : if (r) {
286 0 : return r;
287 : }
288 0 : bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
289 0 : if (ptr) {
290 0 : *ptr = bo->kptr;
291 0 : }
292 0 : radeon_bo_check_tiling(bo, 0, 0);
293 0 : return 0;
294 0 : }
295 :
296 0 : void radeon_bo_kunmap(struct radeon_bo *bo)
297 : {
298 0 : if (bo->kptr == NULL)
299 : return;
300 0 : bo->kptr = NULL;
301 0 : radeon_bo_check_tiling(bo, 0, 0);
302 0 : ttm_bo_kunmap(&bo->kmap);
303 0 : }
304 :
305 0 : struct radeon_bo *radeon_bo_ref(struct radeon_bo *bo)
306 : {
307 0 : if (bo == NULL)
308 0 : return NULL;
309 :
310 0 : ttm_bo_reference(&bo->tbo);
311 0 : return bo;
312 0 : }
313 :
314 0 : void radeon_bo_unref(struct radeon_bo **bo)
315 : {
316 0 : struct ttm_buffer_object *tbo;
317 : struct radeon_device *rdev;
318 :
319 0 : if ((*bo) == NULL)
320 0 : return;
321 0 : rdev = (*bo)->rdev;
322 0 : tbo = &((*bo)->tbo);
323 0 : ttm_bo_unref(&tbo);
324 0 : if (tbo == NULL)
325 0 : *bo = NULL;
326 0 : }
327 :
328 0 : int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
329 : u64 *gpu_addr)
330 : {
331 : int r, i;
332 :
333 0 : if (radeon_ttm_tt_has_userptr(bo->tbo.ttm))
334 0 : return -EPERM;
335 :
336 0 : if (bo->pin_count) {
337 0 : bo->pin_count++;
338 0 : if (gpu_addr)
339 0 : *gpu_addr = radeon_bo_gpu_offset(bo);
340 :
341 0 : if (max_offset != 0) {
342 : u64 domain_start;
343 :
344 0 : if (domain == RADEON_GEM_DOMAIN_VRAM)
345 0 : domain_start = bo->rdev->mc.vram_start;
346 : else
347 0 : domain_start = bo->rdev->mc.gtt_start;
348 0 : WARN_ON_ONCE(max_offset <
349 : (radeon_bo_gpu_offset(bo) - domain_start));
350 0 : }
351 :
352 0 : return 0;
353 : }
354 0 : radeon_ttm_placement_from_domain(bo, domain);
355 0 : for (i = 0; i < bo->placement.num_placement; i++) {
356 : /* force to pin into visible video ram */
357 0 : if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
358 0 : !(bo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
359 0 : (!max_offset || max_offset > bo->rdev->mc.visible_vram_size))
360 0 : bo->placements[i].lpfn =
361 0 : bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
362 : else
363 0 : bo->placements[i].lpfn = max_offset >> PAGE_SHIFT;
364 :
365 0 : bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
366 : }
367 :
368 0 : r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
369 0 : if (likely(r == 0)) {
370 0 : bo->pin_count = 1;
371 0 : if (gpu_addr != NULL)
372 0 : *gpu_addr = radeon_bo_gpu_offset(bo);
373 0 : if (domain == RADEON_GEM_DOMAIN_VRAM)
374 0 : bo->rdev->vram_pin_size += radeon_bo_size(bo);
375 : else
376 0 : bo->rdev->gart_pin_size += radeon_bo_size(bo);
377 : } else {
378 0 : dev_err(bo->rdev->dev, "%p pin failed\n", bo);
379 : }
380 0 : return r;
381 0 : }
382 :
383 0 : int radeon_bo_pin(struct radeon_bo *bo, u32 domain, u64 *gpu_addr)
384 : {
385 0 : return radeon_bo_pin_restricted(bo, domain, 0, gpu_addr);
386 : }
387 :
388 0 : int radeon_bo_unpin(struct radeon_bo *bo)
389 : {
390 : int r, i;
391 :
392 0 : if (!bo->pin_count) {
393 0 : dev_warn(bo->rdev->dev, "%p unpin not necessary\n", bo);
394 0 : return 0;
395 : }
396 0 : bo->pin_count--;
397 0 : if (bo->pin_count)
398 0 : return 0;
399 0 : for (i = 0; i < bo->placement.num_placement; i++) {
400 0 : bo->placements[i].lpfn = 0;
401 0 : bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
402 : }
403 0 : r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
404 0 : if (likely(r == 0)) {
405 0 : if (bo->tbo.mem.mem_type == TTM_PL_VRAM)
406 0 : bo->rdev->vram_pin_size -= radeon_bo_size(bo);
407 : else
408 0 : bo->rdev->gart_pin_size -= radeon_bo_size(bo);
409 : } else {
410 0 : dev_err(bo->rdev->dev, "%p validate failed for unpin\n", bo);
411 : }
412 0 : return r;
413 0 : }
414 :
415 0 : int radeon_bo_evict_vram(struct radeon_device *rdev)
416 : {
417 : /* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
418 : if (0 && (rdev->flags & RADEON_IS_IGP)) {
419 : if (rdev->mc.igp_sideport_enabled == false)
420 : /* Useless to evict on IGP chips */
421 : return 0;
422 : }
423 0 : return ttm_bo_evict_mm(&rdev->mman.bdev, TTM_PL_VRAM);
424 : }
425 :
426 0 : void radeon_bo_force_delete(struct radeon_device *rdev)
427 : {
428 : struct radeon_bo *bo, *n;
429 :
430 0 : if (list_empty(&rdev->gem.objects)) {
431 0 : return;
432 : }
433 0 : dev_err(rdev->dev, "Userspace still has active objects !\n");
434 0 : list_for_each_entry_safe(bo, n, &rdev->gem.objects, list) {
435 0 : dev_err(rdev->dev, "%p %p %lu %lu force free\n",
436 : &bo->gem_base, bo, (unsigned long)bo->gem_base.size,
437 : *((unsigned long *)&bo->gem_base.refcount));
438 0 : mutex_lock(&bo->rdev->gem.mutex);
439 0 : list_del_init(&bo->list);
440 0 : mutex_unlock(&bo->rdev->gem.mutex);
441 : /* this should unref the ttm bo */
442 0 : drm_gem_object_unreference_unlocked(&bo->gem_base);
443 : }
444 0 : }
445 :
446 0 : int radeon_bo_init(struct radeon_device *rdev)
447 : {
448 : paddr_t start, end;
449 :
450 : #ifdef __linux__
451 : /* reserve PAT memory space to WC for VRAM */
452 : arch_io_reserve_memtype_wc(rdev->mc.aper_base,
453 : rdev->mc.aper_size);
454 : #endif
455 :
456 : /* Add an MTRR for the VRAM */
457 0 : if (!rdev->fastfb_working) {
458 : #ifdef __linux__
459 : rdev->mc.vram_mtrr = arch_phys_wc_add(rdev->mc.aper_base,
460 : rdev->mc.aper_size);
461 : #else
462 0 : drm_mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size, DRM_MTRR_WC);
463 : /* fake a 'cookie', seems to be unused? */
464 0 : rdev->mc.vram_mtrr = 1;
465 : #endif
466 0 : }
467 :
468 0 : start = atop(bus_space_mmap(rdev->memt, rdev->mc.aper_base, 0, 0, 0));
469 0 : end = start + atop(rdev->mc.aper_size);
470 0 : uvm_page_physload(start, end, start, end, PHYSLOAD_DEVICE);
471 :
472 : DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
473 : rdev->mc.mc_vram_size >> 20,
474 : (unsigned long long)rdev->mc.aper_size >> 20);
475 : DRM_INFO("RAM width %dbits %cDR\n",
476 : rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S');
477 0 : return radeon_ttm_init(rdev);
478 : }
479 :
480 0 : void radeon_bo_fini(struct radeon_device *rdev)
481 : {
482 0 : radeon_ttm_fini(rdev);
483 : #ifdef __linux__
484 : arch_phys_wc_del(rdev->mc.vram_mtrr);
485 : arch_io_free_memtype_wc(rdev->mc.aper_base, rdev->mc.aper_size);
486 : #else
487 0 : drm_mtrr_del(0, rdev->mc.aper_base, rdev->mc.aper_size, DRM_MTRR_WC);
488 : #endif
489 0 : }
490 :
491 : /* Returns how many bytes TTM can move per IB.
492 : */
493 0 : static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev)
494 : {
495 0 : u64 real_vram_size = rdev->mc.real_vram_size;
496 0 : u64 vram_usage = atomic64_read(&rdev->vram_usage);
497 :
498 : /* This function is based on the current VRAM usage.
499 : *
500 : * - If all of VRAM is free, allow relocating the number of bytes that
501 : * is equal to 1/4 of the size of VRAM for this IB.
502 :
503 : * - If more than one half of VRAM is occupied, only allow relocating
504 : * 1 MB of data for this IB.
505 : *
506 : * - From 0 to one half of used VRAM, the threshold decreases
507 : * linearly.
508 : * __________________
509 : * 1/4 of -|\ |
510 : * VRAM | \ |
511 : * | \ |
512 : * | \ |
513 : * | \ |
514 : * | \ |
515 : * | \ |
516 : * | \________|1 MB
517 : * |----------------|
518 : * VRAM 0 % 100 %
519 : * used used
520 : *
521 : * Note: It's a threshold, not a limit. The threshold must be crossed
522 : * for buffer relocations to stop, so any buffer of an arbitrary size
523 : * can be moved as long as the threshold isn't crossed before
524 : * the relocation takes place. We don't want to disable buffer
525 : * relocations completely.
526 : *
527 : * The idea is that buffers should be placed in VRAM at creation time
528 : * and TTM should only do a minimum number of relocations during
529 : * command submission. In practice, you need to submit at least
530 : * a dozen IBs to move all buffers to VRAM if they are in GTT.
531 : *
532 : * Also, things can get pretty crazy under memory pressure and actual
533 : * VRAM usage can change a lot, so playing safe even at 50% does
534 : * consistently increase performance.
535 : */
536 :
537 0 : u64 half_vram = real_vram_size >> 1;
538 0 : u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage;
539 0 : u64 bytes_moved_threshold = half_free_vram >> 1;
540 0 : return max(bytes_moved_threshold, 1024*1024ull);
541 : }
542 :
543 0 : int radeon_bo_list_validate(struct radeon_device *rdev,
544 : struct ww_acquire_ctx *ticket,
545 : struct list_head *head, int ring)
546 : {
547 : struct radeon_bo_list *lobj;
548 0 : struct list_head duplicates;
549 : int r;
550 : u64 bytes_moved = 0, initial_bytes_moved;
551 0 : u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev);
552 :
553 0 : INIT_LIST_HEAD(&duplicates);
554 0 : r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates);
555 0 : if (unlikely(r != 0)) {
556 0 : return r;
557 : }
558 :
559 0 : list_for_each_entry(lobj, head, tv.head) {
560 0 : struct radeon_bo *bo = lobj->robj;
561 0 : if (!bo->pin_count) {
562 0 : u32 domain = lobj->prefered_domains;
563 0 : u32 allowed = lobj->allowed_domains;
564 : u32 current_domain =
565 0 : radeon_mem_type_to_domain(bo->tbo.mem.mem_type);
566 :
567 : /* Check if this buffer will be moved and don't move it
568 : * if we have moved too many buffers for this IB already.
569 : *
570 : * Note that this allows moving at least one buffer of
571 : * any size, because it doesn't take the current "bo"
572 : * into account. We don't want to disallow buffer moves
573 : * completely.
574 : */
575 0 : if ((allowed & current_domain) != 0 &&
576 0 : (domain & current_domain) == 0 && /* will be moved */
577 0 : bytes_moved > bytes_moved_threshold) {
578 : /* don't move it */
579 : domain = current_domain;
580 0 : }
581 :
582 : retry:
583 0 : radeon_ttm_placement_from_domain(bo, domain);
584 0 : if (ring == R600_RING_TYPE_UVD_INDEX)
585 0 : radeon_uvd_force_into_uvd_segment(bo, allowed);
586 :
587 0 : initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved);
588 0 : r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
589 0 : bytes_moved += atomic64_read(&rdev->num_bytes_moved) -
590 : initial_bytes_moved;
591 :
592 0 : if (unlikely(r)) {
593 0 : if (r != -ERESTARTSYS &&
594 0 : domain != lobj->allowed_domains) {
595 : domain = lobj->allowed_domains;
596 0 : goto retry;
597 : }
598 0 : ttm_eu_backoff_reservation(ticket, head);
599 0 : return r;
600 : }
601 0 : }
602 0 : lobj->gpu_offset = radeon_bo_gpu_offset(bo);
603 0 : lobj->tiling_flags = bo->tiling_flags;
604 0 : }
605 :
606 0 : list_for_each_entry(lobj, &duplicates, tv.head) {
607 0 : lobj->gpu_offset = radeon_bo_gpu_offset(lobj->robj);
608 0 : lobj->tiling_flags = lobj->robj->tiling_flags;
609 : }
610 :
611 0 : return 0;
612 0 : }
613 :
614 0 : int radeon_bo_get_surface_reg(struct radeon_bo *bo)
615 : {
616 0 : struct radeon_device *rdev = bo->rdev;
617 : struct radeon_surface_reg *reg;
618 : struct radeon_bo *old_object;
619 : int steal;
620 : int i;
621 :
622 : #ifdef notyet
623 : lockdep_assert_held(&bo->tbo.resv->lock.base);
624 : #endif
625 :
626 0 : if (!bo->tiling_flags)
627 0 : return 0;
628 :
629 0 : if (bo->surface_reg >= 0) {
630 0 : reg = &rdev->surface_regs[bo->surface_reg];
631 : i = bo->surface_reg;
632 0 : goto out;
633 : }
634 :
635 : steal = -1;
636 0 : for (i = 0; i < RADEON_GEM_MAX_SURFACES; i++) {
637 :
638 0 : reg = &rdev->surface_regs[i];
639 0 : if (!reg->bo)
640 : break;
641 :
642 : old_object = reg->bo;
643 0 : if (old_object->pin_count == 0)
644 0 : steal = i;
645 : }
646 :
647 : /* if we are all out */
648 0 : if (i == RADEON_GEM_MAX_SURFACES) {
649 0 : if (steal == -1)
650 0 : return -ENOMEM;
651 : /* find someone with a surface reg and nuke their BO */
652 0 : reg = &rdev->surface_regs[steal];
653 0 : old_object = reg->bo;
654 : /* blow away the mapping */
655 : DRM_DEBUG("stealing surface reg %d from %p\n", steal, old_object);
656 0 : ttm_bo_unmap_virtual(&old_object->tbo);
657 0 : old_object->surface_reg = -1;
658 : i = steal;
659 0 : }
660 :
661 0 : bo->surface_reg = i;
662 0 : reg->bo = bo;
663 :
664 : out:
665 0 : radeon_set_surface_reg(rdev, i, bo->tiling_flags, bo->pitch,
666 : bo->tbo.mem.start << PAGE_SHIFT,
667 : bo->tbo.num_pages << PAGE_SHIFT);
668 0 : return 0;
669 0 : }
670 :
671 0 : static void radeon_bo_clear_surface_reg(struct radeon_bo *bo)
672 : {
673 0 : struct radeon_device *rdev = bo->rdev;
674 : struct radeon_surface_reg *reg;
675 :
676 0 : if (bo->surface_reg == -1)
677 0 : return;
678 :
679 0 : reg = &rdev->surface_regs[bo->surface_reg];
680 0 : radeon_clear_surface_reg(rdev, bo->surface_reg);
681 :
682 0 : reg->bo = NULL;
683 0 : bo->surface_reg = -1;
684 0 : }
685 :
686 0 : int radeon_bo_set_tiling_flags(struct radeon_bo *bo,
687 : uint32_t tiling_flags, uint32_t pitch)
688 : {
689 0 : struct radeon_device *rdev = bo->rdev;
690 : int r;
691 :
692 0 : if (rdev->family >= CHIP_CEDAR) {
693 : unsigned bankw, bankh, mtaspect, tilesplit, stilesplit;
694 :
695 0 : bankw = (tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
696 0 : bankh = (tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
697 0 : mtaspect = (tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
698 0 : tilesplit = (tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
699 0 : stilesplit = (tiling_flags >> RADEON_TILING_EG_STENCIL_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_STENCIL_TILE_SPLIT_MASK;
700 0 : switch (bankw) {
701 : case 0:
702 : case 1:
703 : case 2:
704 : case 4:
705 : case 8:
706 : break;
707 : default:
708 0 : return -EINVAL;
709 : }
710 0 : switch (bankh) {
711 : case 0:
712 : case 1:
713 : case 2:
714 : case 4:
715 : case 8:
716 : break;
717 : default:
718 0 : return -EINVAL;
719 : }
720 0 : switch (mtaspect) {
721 : case 0:
722 : case 1:
723 : case 2:
724 : case 4:
725 : case 8:
726 : break;
727 : default:
728 0 : return -EINVAL;
729 : }
730 0 : if (tilesplit > 6) {
731 0 : return -EINVAL;
732 : }
733 0 : if (stilesplit > 6) {
734 0 : return -EINVAL;
735 : }
736 0 : }
737 0 : r = radeon_bo_reserve(bo, false);
738 0 : if (unlikely(r != 0))
739 0 : return r;
740 0 : bo->tiling_flags = tiling_flags;
741 0 : bo->pitch = pitch;
742 0 : radeon_bo_unreserve(bo);
743 0 : return 0;
744 0 : }
745 :
746 0 : void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
747 : uint32_t *tiling_flags,
748 : uint32_t *pitch)
749 : {
750 : #ifdef notyet
751 : lockdep_assert_held(&bo->tbo.resv->lock.base);
752 : #endif
753 :
754 0 : if (tiling_flags)
755 0 : *tiling_flags = bo->tiling_flags;
756 0 : if (pitch)
757 0 : *pitch = bo->pitch;
758 0 : }
759 :
760 0 : int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
761 : bool force_drop)
762 : {
763 : #ifdef notyet
764 : if (!force_drop)
765 : lockdep_assert_held(&bo->tbo.resv->lock.base);
766 : #endif
767 :
768 0 : if (!(bo->tiling_flags & RADEON_TILING_SURFACE))
769 0 : return 0;
770 :
771 0 : if (force_drop) {
772 0 : radeon_bo_clear_surface_reg(bo);
773 0 : return 0;
774 : }
775 :
776 0 : if (bo->tbo.mem.mem_type != TTM_PL_VRAM) {
777 0 : if (!has_moved)
778 0 : return 0;
779 :
780 0 : if (bo->surface_reg >= 0)
781 0 : radeon_bo_clear_surface_reg(bo);
782 0 : return 0;
783 : }
784 :
785 0 : if ((bo->surface_reg >= 0) && !has_moved)
786 0 : return 0;
787 :
788 0 : return radeon_bo_get_surface_reg(bo);
789 0 : }
790 :
791 0 : void radeon_bo_move_notify(struct ttm_buffer_object *bo,
792 : struct ttm_mem_reg *new_mem)
793 : {
794 : struct radeon_bo *rbo;
795 :
796 0 : if (!radeon_ttm_bo_is_radeon_bo(bo))
797 0 : return;
798 :
799 0 : rbo = container_of(bo, struct radeon_bo, tbo);
800 0 : radeon_bo_check_tiling(rbo, 0, 1);
801 0 : radeon_vm_bo_invalidate(rbo->rdev, rbo);
802 :
803 : /* update statistics */
804 0 : if (!new_mem)
805 0 : return;
806 :
807 0 : radeon_update_memory_usage(rbo, bo->mem.mem_type, -1);
808 0 : radeon_update_memory_usage(rbo, new_mem->mem_type, 1);
809 0 : }
810 :
811 0 : int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
812 : {
813 : struct radeon_device *rdev;
814 : struct radeon_bo *rbo;
815 : unsigned long offset, size, lpfn;
816 : int i, r;
817 :
818 0 : if (!radeon_ttm_bo_is_radeon_bo(bo))
819 0 : return 0;
820 0 : rbo = container_of(bo, struct radeon_bo, tbo);
821 0 : radeon_bo_check_tiling(rbo, 0, 0);
822 0 : rdev = rbo->rdev;
823 0 : if (bo->mem.mem_type != TTM_PL_VRAM)
824 0 : return 0;
825 :
826 0 : size = bo->mem.num_pages << PAGE_SHIFT;
827 0 : offset = bo->mem.start << PAGE_SHIFT;
828 0 : if ((offset + size) <= rdev->mc.visible_vram_size)
829 0 : return 0;
830 :
831 : /* hurrah the memory is not visible ! */
832 0 : radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
833 0 : lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT;
834 0 : for (i = 0; i < rbo->placement.num_placement; i++) {
835 : /* Force into visible VRAM */
836 0 : if ((rbo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
837 0 : (!rbo->placements[i].lpfn || rbo->placements[i].lpfn > lpfn))
838 0 : rbo->placements[i].lpfn = lpfn;
839 : }
840 0 : r = ttm_bo_validate(bo, &rbo->placement, false, false);
841 0 : if (unlikely(r == -ENOMEM)) {
842 0 : radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
843 0 : return ttm_bo_validate(bo, &rbo->placement, false, false);
844 0 : } else if (unlikely(r != 0)) {
845 0 : return r;
846 : }
847 :
848 0 : offset = bo->mem.start << PAGE_SHIFT;
849 : /* this should never happen */
850 0 : if ((offset + size) > rdev->mc.visible_vram_size)
851 0 : return -EINVAL;
852 :
853 0 : return 0;
854 0 : }
855 :
856 0 : int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait)
857 : {
858 : int r;
859 :
860 0 : r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, NULL);
861 0 : if (unlikely(r != 0))
862 0 : return r;
863 0 : if (mem_type)
864 0 : *mem_type = bo->tbo.mem.mem_type;
865 :
866 0 : r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
867 0 : ttm_bo_unreserve(&bo->tbo);
868 0 : return r;
869 0 : }
870 :
871 : /**
872 : * radeon_bo_fence - add fence to buffer object
873 : *
874 : * @bo: buffer object in question
875 : * @fence: fence to add
876 : * @shared: true if fence should be added shared
877 : *
878 : */
879 0 : void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
880 : bool shared)
881 : {
882 0 : struct reservation_object *resv = bo->tbo.resv;
883 :
884 0 : if (shared)
885 0 : reservation_object_add_shared_fence(resv, &fence->base);
886 : else
887 0 : reservation_object_add_excl_fence(resv, &fence->base);
888 0 : }
|