Line data Source code
1 : /*
2 : * Copyright 2009 Jerome Glisse.
3 : * All Rights Reserved.
4 : *
5 : * Permission is hereby granted, free of charge, to any person obtaining a
6 : * copy of this software and associated documentation files (the
7 : * "Software"), to deal in the Software without restriction, including
8 : * without limitation the rights to use, copy, modify, merge, publish,
9 : * distribute, sub license, and/or sell copies of the Software, and to
10 : * permit persons to whom the Software is furnished to do so, subject to
11 : * the following conditions:
12 : *
13 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 : * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 : * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 : * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 : * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 : * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 : *
21 : * The above copyright notice and this permission notice (including the
22 : * next paragraph) shall be included in all copies or substantial portions
23 : * of the Software.
24 : *
25 : */
26 : /*
27 : * Authors:
28 : * Jerome Glisse <glisse@freedesktop.org>
29 : * Dave Airlie
30 : */
31 : #include <dev/pci/drm/drm_linux.h>
32 : #include <dev/pci/drm/drmP.h>
33 : #include "radeon_reg.h"
34 : #include "radeon.h"
35 : #include "radeon_trace.h"
36 :
37 : /*
38 : * Fences
39 : * Fences mark an event in the GPUs pipeline and are used
40 : * for GPU/CPU synchronization. When the fence is written,
41 : * it is expected that all buffers associated with that fence
42 : * are no longer in use by the associated ring on the GPU and
43 : * that the the relevant GPU caches have been flushed. Whether
44 : * we use a scratch register or memory location depends on the asic
45 : * and whether writeback is enabled.
46 : */
47 :
48 : /**
49 : * radeon_fence_write - write a fence value
50 : *
51 : * @rdev: radeon_device pointer
52 : * @seq: sequence number to write
53 : * @ring: ring index the fence is associated with
54 : *
55 : * Writes a fence value to memory or a scratch register (all asics).
56 : */
57 0 : static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
58 : {
59 0 : struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
60 0 : if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
61 0 : if (drv->cpu_addr) {
62 0 : *drv->cpu_addr = cpu_to_le32(seq);
63 0 : }
64 : } else {
65 0 : WREG32(drv->scratch_reg, seq);
66 : }
67 0 : }
68 :
69 : /**
70 : * radeon_fence_read - read a fence value
71 : *
72 : * @rdev: radeon_device pointer
73 : * @ring: ring index the fence is associated with
74 : *
75 : * Reads a fence value from memory or a scratch register (all asics).
76 : * Returns the value of the fence read from memory or register.
77 : */
78 0 : static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
79 : {
80 0 : struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
81 : u32 seq = 0;
82 :
83 0 : if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
84 0 : if (drv->cpu_addr) {
85 0 : seq = le32_to_cpu(*drv->cpu_addr);
86 0 : } else {
87 0 : seq = lower_32_bits(atomic64_read(&drv->last_seq));
88 : }
89 : } else {
90 0 : seq = RREG32(drv->scratch_reg);
91 : }
92 0 : return seq;
93 : }
94 :
95 : /**
96 : * radeon_fence_schedule_check - schedule lockup check
97 : *
98 : * @rdev: radeon_device pointer
99 : * @ring: ring index we should work with
100 : *
101 : * Queues a delayed work item to check for lockups.
102 : */
103 0 : static void radeon_fence_schedule_check(struct radeon_device *rdev, int ring)
104 : {
105 : /*
106 : * Do not reset the timer here with mod_delayed_work,
107 : * this can livelock in an interaction with TTM delayed destroy.
108 : */
109 0 : queue_delayed_work(system_power_efficient_wq,
110 0 : &rdev->fence_drv[ring].lockup_work,
111 0 : RADEON_FENCE_JIFFIES_TIMEOUT);
112 0 : }
113 :
114 : /**
115 : * radeon_fence_emit - emit a fence on the requested ring
116 : *
117 : * @rdev: radeon_device pointer
118 : * @fence: radeon fence object
119 : * @ring: ring index the fence is associated with
120 : *
121 : * Emits a fence command on the requested ring (all asics).
122 : * Returns 0 on success, -ENOMEM on failure.
123 : */
124 0 : int radeon_fence_emit(struct radeon_device *rdev,
125 : struct radeon_fence **fence,
126 : int ring)
127 : {
128 0 : u64 seq = ++rdev->fence_drv[ring].sync_seq[ring];
129 :
130 : /* we are protected by the ring emission mutex */
131 0 : *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
132 0 : if ((*fence) == NULL) {
133 0 : return -ENOMEM;
134 : }
135 0 : (*fence)->rdev = rdev;
136 0 : (*fence)->seq = seq;
137 0 : (*fence)->ring = ring;
138 0 : (*fence)->is_vm_update = false;
139 0 : fence_init(&(*fence)->base, &radeon_fence_ops,
140 0 : &rdev->fence_queue.lock, rdev->fence_context + ring, seq);
141 0 : radeon_fence_ring_emit(rdev, ring, *fence);
142 0 : trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
143 0 : radeon_fence_schedule_check(rdev, ring);
144 0 : return 0;
145 0 : }
146 :
147 : /**
148 : * radeon_fence_check_signaled - callback from fence_queue
149 : *
150 : * this function is called with fence_queue lock held, which is also used
151 : * for the fence locking itself, so unlocked variants are used for
152 : * fence_signal, and remove_wait_queue.
153 : */
154 0 : static int radeon_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
155 : {
156 : struct radeon_fence *fence;
157 : u64 seq;
158 :
159 0 : fence = container_of(wait, struct radeon_fence, fence_wake);
160 :
161 : /*
162 : * We cannot use radeon_fence_process here because we're already
163 : * in the waitqueue, in a call from wake_up_all.
164 : */
165 0 : seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq);
166 0 : if (seq >= fence->seq) {
167 0 : int ret = fence_signal_locked(&fence->base);
168 :
169 : if (!ret)
170 : FENCE_TRACE(&fence->base, "signaled from irq context\n");
171 : else
172 : FENCE_TRACE(&fence->base, "was already signaled\n");
173 :
174 0 : radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring);
175 0 : __remove_wait_queue(&fence->rdev->fence_queue, &fence->fence_wake);
176 0 : fence_put(&fence->base);
177 0 : } else
178 : FENCE_TRACE(&fence->base, "pending\n");
179 0 : return 0;
180 : }
181 :
182 : /**
183 : * radeon_fence_activity - check for fence activity
184 : *
185 : * @rdev: radeon_device pointer
186 : * @ring: ring index the fence is associated with
187 : *
188 : * Checks the current fence value and calculates the last
189 : * signalled fence value. Returns true if activity occured
190 : * on the ring, and the fence_queue should be waken up.
191 : */
192 0 : static bool radeon_fence_activity(struct radeon_device *rdev, int ring)
193 : {
194 : uint64_t seq, last_seq, last_emitted;
195 : unsigned count_loop = 0;
196 : bool wake = false;
197 :
198 : /* Note there is a scenario here for an infinite loop but it's
199 : * very unlikely to happen. For it to happen, the current polling
200 : * process need to be interrupted by another process and another
201 : * process needs to update the last_seq btw the atomic read and
202 : * xchg of the current process.
203 : *
204 : * More over for this to go in infinite loop there need to be
205 : * continuously new fence signaled ie radeon_fence_read needs
206 : * to return a different value each time for both the currently
207 : * polling process and the other process that xchg the last_seq
208 : * btw atomic read and xchg of the current process. And the
209 : * value the other process set as last seq must be higher than
210 : * the seq value we just read. Which means that current process
211 : * need to be interrupted after radeon_fence_read and before
212 : * atomic xchg.
213 : *
214 : * To be even more safe we count the number of time we loop and
215 : * we bail after 10 loop just accepting the fact that we might
216 : * have temporarly set the last_seq not to the true real last
217 : * seq but to an older one.
218 : */
219 0 : last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
220 0 : do {
221 0 : last_emitted = rdev->fence_drv[ring].sync_seq[ring];
222 0 : seq = radeon_fence_read(rdev, ring);
223 0 : seq |= last_seq & 0xffffffff00000000LL;
224 0 : if (seq < last_seq) {
225 0 : seq &= 0xffffffff;
226 0 : seq |= last_emitted & 0xffffffff00000000LL;
227 0 : }
228 :
229 0 : if (seq <= last_seq || seq > last_emitted) {
230 : break;
231 : }
232 : /* If we loop over we don't want to return without
233 : * checking if a fence is signaled as it means that the
234 : * seq we just read is different from the previous on.
235 : */
236 : wake = true;
237 : last_seq = seq;
238 0 : if ((count_loop++) > 10) {
239 : /* We looped over too many time leave with the
240 : * fact that we might have set an older fence
241 : * seq then the current real last seq as signaled
242 : * by the hw.
243 : */
244 : break;
245 : }
246 0 : } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
247 :
248 0 : if (seq < last_emitted)
249 0 : radeon_fence_schedule_check(rdev, ring);
250 :
251 0 : return wake;
252 : }
253 :
254 : /**
255 : * radeon_fence_check_lockup - check for hardware lockup
256 : *
257 : * @work: delayed work item
258 : *
259 : * Checks for fence activity and if there is none probe
260 : * the hardware if a lockup occured.
261 : */
262 0 : static void radeon_fence_check_lockup(struct work_struct *work)
263 : {
264 : struct radeon_fence_driver *fence_drv;
265 : struct radeon_device *rdev;
266 : int ring;
267 :
268 0 : fence_drv = container_of(work, struct radeon_fence_driver,
269 : lockup_work.work);
270 0 : rdev = fence_drv->rdev;
271 0 : ring = fence_drv - &rdev->fence_drv[0];
272 :
273 0 : if (!down_read_trylock(&rdev->exclusive_lock)) {
274 : /* just reschedule the check if a reset is going on */
275 0 : radeon_fence_schedule_check(rdev, ring);
276 0 : return;
277 : }
278 :
279 0 : if (fence_drv->delayed_irq && rdev->ddev->irq_enabled) {
280 : unsigned long irqflags;
281 :
282 0 : fence_drv->delayed_irq = false;
283 0 : spin_lock_irqsave(&rdev->irq.lock, irqflags);
284 0 : radeon_irq_set(rdev);
285 0 : spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
286 0 : }
287 :
288 0 : if (radeon_fence_activity(rdev, ring))
289 0 : wake_up_all(&rdev->fence_queue);
290 :
291 0 : else if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
292 :
293 : /* good news we believe it's a lockup */
294 0 : dev_warn(rdev->dev, "GPU lockup (current fence id "
295 : "0x%016llx last fence id 0x%016llx on ring %d)\n",
296 : (uint64_t)atomic64_read(&fence_drv->last_seq),
297 : fence_drv->sync_seq[ring], ring);
298 :
299 : /* remember that we need an reset */
300 0 : rdev->needs_reset = true;
301 0 : wake_up_all(&rdev->fence_queue);
302 0 : }
303 0 : up_read(&rdev->exclusive_lock);
304 0 : }
305 :
306 : /**
307 : * radeon_fence_process - process a fence
308 : *
309 : * @rdev: radeon_device pointer
310 : * @ring: ring index the fence is associated with
311 : *
312 : * Checks the current fence value and wakes the fence queue
313 : * if the sequence number has increased (all asics).
314 : */
315 0 : void radeon_fence_process(struct radeon_device *rdev, int ring)
316 : {
317 0 : if (radeon_fence_activity(rdev, ring))
318 0 : wake_up_all(&rdev->fence_queue);
319 0 : }
320 :
321 : /**
322 : * radeon_fence_seq_signaled - check if a fence sequence number has signaled
323 : *
324 : * @rdev: radeon device pointer
325 : * @seq: sequence number
326 : * @ring: ring index the fence is associated with
327 : *
328 : * Check if the last signaled fence sequnce number is >= the requested
329 : * sequence number (all asics).
330 : * Returns true if the fence has signaled (current fence value
331 : * is >= requested value) or false if it has not (current fence
332 : * value is < the requested value. Helper function for
333 : * radeon_fence_signaled().
334 : */
335 0 : static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
336 : u64 seq, unsigned ring)
337 : {
338 0 : if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
339 0 : return true;
340 : }
341 : /* poll new last sequence at least once */
342 0 : radeon_fence_process(rdev, ring);
343 0 : if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
344 0 : return true;
345 : }
346 0 : return false;
347 0 : }
348 :
349 0 : static bool radeon_fence_is_signaled(struct fence *f)
350 : {
351 0 : struct radeon_fence *fence = to_radeon_fence(f);
352 0 : struct radeon_device *rdev = fence->rdev;
353 0 : unsigned ring = fence->ring;
354 0 : u64 seq = fence->seq;
355 :
356 0 : if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
357 0 : return true;
358 : }
359 :
360 0 : if (down_read_trylock(&rdev->exclusive_lock)) {
361 0 : radeon_fence_process(rdev, ring);
362 0 : up_read(&rdev->exclusive_lock);
363 :
364 0 : if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
365 0 : return true;
366 : }
367 : }
368 0 : return false;
369 0 : }
370 :
371 : /**
372 : * radeon_fence_enable_signaling - enable signalling on fence
373 : * @fence: fence
374 : *
375 : * This function is called with fence_queue lock held, and adds a callback
376 : * to fence_queue that checks if this fence is signaled, and if so it
377 : * signals the fence and removes itself.
378 : */
379 0 : static bool radeon_fence_enable_signaling(struct fence *f)
380 : {
381 0 : struct radeon_fence *fence = to_radeon_fence(f);
382 0 : struct radeon_device *rdev = fence->rdev;
383 :
384 0 : if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq)
385 0 : return false;
386 :
387 0 : if (down_read_trylock(&rdev->exclusive_lock)) {
388 0 : radeon_irq_kms_sw_irq_get(rdev, fence->ring);
389 :
390 0 : if (radeon_fence_activity(rdev, fence->ring))
391 0 : wake_up_all_locked(&rdev->fence_queue);
392 :
393 : /* did fence get signaled after we enabled the sw irq? */
394 0 : if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) {
395 0 : radeon_irq_kms_sw_irq_put(rdev, fence->ring);
396 0 : up_read(&rdev->exclusive_lock);
397 0 : return false;
398 : }
399 :
400 0 : up_read(&rdev->exclusive_lock);
401 0 : } else {
402 : /* we're probably in a lockup, lets not fiddle too much */
403 0 : if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring))
404 0 : rdev->fence_drv[fence->ring].delayed_irq = true;
405 0 : radeon_fence_schedule_check(rdev, fence->ring);
406 : }
407 :
408 0 : fence->fence_wake.flags = 0;
409 0 : fence->fence_wake.private = NULL;
410 0 : fence->fence_wake.func = radeon_fence_check_signaled;
411 0 : __add_wait_queue(&rdev->fence_queue, &fence->fence_wake);
412 0 : fence_get(f);
413 :
414 : FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring);
415 0 : return true;
416 0 : }
417 :
418 : /**
419 : * radeon_fence_signaled - check if a fence has signaled
420 : *
421 : * @fence: radeon fence object
422 : *
423 : * Check if the requested fence has signaled (all asics).
424 : * Returns true if the fence has signaled or false if it has not.
425 : */
426 0 : bool radeon_fence_signaled(struct radeon_fence *fence)
427 : {
428 0 : if (!fence)
429 0 : return true;
430 :
431 0 : if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
432 : int ret;
433 :
434 0 : ret = fence_signal(&fence->base);
435 : if (!ret)
436 : FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n");
437 : return true;
438 : }
439 0 : return false;
440 0 : }
441 :
442 : /**
443 : * radeon_fence_any_seq_signaled - check if any sequence number is signaled
444 : *
445 : * @rdev: radeon device pointer
446 : * @seq: sequence numbers
447 : *
448 : * Check if the last signaled fence sequnce number is >= the requested
449 : * sequence number (all asics).
450 : * Returns true if any has signaled (current value is >= requested value)
451 : * or false if it has not. Helper function for radeon_fence_wait_seq.
452 : */
453 0 : static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
454 : {
455 : unsigned i;
456 :
457 0 : for (i = 0; i < RADEON_NUM_RINGS; ++i) {
458 0 : if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i))
459 0 : return true;
460 : }
461 0 : return false;
462 0 : }
463 :
464 : /**
465 : * radeon_fence_wait_seq_timeout - wait for a specific sequence numbers
466 : *
467 : * @rdev: radeon device pointer
468 : * @target_seq: sequence number(s) we want to wait for
469 : * @intr: use interruptable sleep
470 : * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
471 : *
472 : * Wait for the requested sequence number(s) to be written by any ring
473 : * (all asics). Sequnce number array is indexed by ring id.
474 : * @intr selects whether to use interruptable (true) or non-interruptable
475 : * (false) sleep when waiting for the sequence number. Helper function
476 : * for radeon_fence_wait_*().
477 : * Returns remaining time if the sequence number has passed, 0 when
478 : * the wait timeout, or an error for all other cases.
479 : * -EDEADLK is returned when a GPU lockup has been detected.
480 : */
481 0 : static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev,
482 : u64 *target_seq, bool intr,
483 : long timeout)
484 : {
485 : long r;
486 : int i;
487 :
488 0 : if (radeon_fence_any_seq_signaled(rdev, target_seq))
489 0 : return timeout;
490 :
491 : /* enable IRQs and tracing */
492 0 : for (i = 0; i < RADEON_NUM_RINGS; ++i) {
493 0 : if (!target_seq[i])
494 : continue;
495 :
496 0 : trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
497 0 : radeon_irq_kms_sw_irq_get(rdev, i);
498 0 : }
499 :
500 0 : if (intr) {
501 0 : r = wait_event_interruptible_timeout(rdev->fence_queue, (
502 : radeon_fence_any_seq_signaled(rdev, target_seq)
503 : || rdev->needs_reset), timeout);
504 0 : } else {
505 0 : r = wait_event_timeout(rdev->fence_queue, (
506 : radeon_fence_any_seq_signaled(rdev, target_seq)
507 : || rdev->needs_reset), timeout);
508 : }
509 :
510 0 : if (rdev->needs_reset)
511 0 : r = -EDEADLK;
512 :
513 0 : for (i = 0; i < RADEON_NUM_RINGS; ++i) {
514 0 : if (!target_seq[i])
515 : continue;
516 :
517 0 : radeon_irq_kms_sw_irq_put(rdev, i);
518 0 : trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
519 0 : }
520 :
521 0 : return r;
522 0 : }
523 :
524 : /**
525 : * radeon_fence_wait - wait for a fence to signal
526 : *
527 : * @fence: radeon fence object
528 : * @intr: use interruptible sleep
529 : *
530 : * Wait for the requested fence to signal (all asics).
531 : * @intr selects whether to use interruptable (true) or non-interruptable
532 : * (false) sleep when waiting for the fence.
533 : * Returns 0 if the fence has passed, error for all other cases.
534 : */
535 0 : int radeon_fence_wait(struct radeon_fence *fence, bool intr)
536 : {
537 0 : uint64_t seq[RADEON_NUM_RINGS] = {};
538 : long r;
539 :
540 : /*
541 : * This function should not be called on !radeon fences.
542 : * If this is the case, it would mean this function can
543 : * also be called on radeon fences belonging to another card.
544 : * exclusive_lock is not held in that case.
545 : */
546 0 : if (WARN_ON_ONCE(!to_radeon_fence(&fence->base)))
547 0 : return fence_wait(&fence->base, intr);
548 :
549 0 : seq[fence->ring] = fence->seq;
550 0 : r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
551 0 : if (r < 0) {
552 0 : return r;
553 : }
554 :
555 0 : r = fence_signal(&fence->base);
556 : if (!r)
557 : FENCE_TRACE(&fence->base, "signaled from fence_wait\n");
558 0 : return 0;
559 0 : }
560 :
561 : /**
562 : * radeon_fence_wait_any - wait for a fence to signal on any ring
563 : *
564 : * @rdev: radeon device pointer
565 : * @fences: radeon fence object(s)
566 : * @intr: use interruptable sleep
567 : *
568 : * Wait for any requested fence to signal (all asics). Fence
569 : * array is indexed by ring id. @intr selects whether to use
570 : * interruptable (true) or non-interruptable (false) sleep when
571 : * waiting for the fences. Used by the suballocator.
572 : * Returns 0 if any fence has passed, error for all other cases.
573 : */
574 0 : int radeon_fence_wait_any(struct radeon_device *rdev,
575 : struct radeon_fence **fences,
576 : bool intr)
577 : {
578 0 : uint64_t seq[RADEON_NUM_RINGS];
579 : unsigned i, num_rings = 0;
580 : long r;
581 :
582 0 : for (i = 0; i < RADEON_NUM_RINGS; ++i) {
583 0 : seq[i] = 0;
584 :
585 0 : if (!fences[i]) {
586 : continue;
587 : }
588 :
589 0 : seq[i] = fences[i]->seq;
590 0 : ++num_rings;
591 0 : }
592 :
593 : /* nothing to wait for ? */
594 0 : if (num_rings == 0)
595 0 : return -ENOENT;
596 :
597 0 : r = radeon_fence_wait_seq_timeout(rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
598 0 : if (r < 0) {
599 0 : return r;
600 : }
601 0 : return 0;
602 0 : }
603 :
604 : /**
605 : * radeon_fence_wait_next - wait for the next fence to signal
606 : *
607 : * @rdev: radeon device pointer
608 : * @ring: ring index the fence is associated with
609 : *
610 : * Wait for the next fence on the requested ring to signal (all asics).
611 : * Returns 0 if the next fence has passed, error for all other cases.
612 : * Caller must hold ring lock.
613 : */
614 0 : int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
615 : {
616 0 : uint64_t seq[RADEON_NUM_RINGS] = {};
617 : long r;
618 :
619 0 : seq[ring] = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
620 0 : if (seq[ring] >= rdev->fence_drv[ring].sync_seq[ring]) {
621 : /* nothing to wait for, last_seq is
622 : already the last emited fence */
623 0 : return -ENOENT;
624 : }
625 0 : r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
626 0 : if (r < 0)
627 0 : return r;
628 0 : return 0;
629 0 : }
630 :
631 : /**
632 : * radeon_fence_wait_empty - wait for all fences to signal
633 : *
634 : * @rdev: radeon device pointer
635 : * @ring: ring index the fence is associated with
636 : *
637 : * Wait for all fences on the requested ring to signal (all asics).
638 : * Returns 0 if the fences have passed, error for all other cases.
639 : * Caller must hold ring lock.
640 : */
641 0 : int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
642 : {
643 0 : uint64_t seq[RADEON_NUM_RINGS] = {};
644 : long r;
645 :
646 0 : seq[ring] = rdev->fence_drv[ring].sync_seq[ring];
647 0 : if (!seq[ring])
648 0 : return 0;
649 :
650 0 : r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
651 0 : if (r < 0) {
652 0 : if (r == -EDEADLK)
653 0 : return -EDEADLK;
654 :
655 0 : dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%ld)\n",
656 : ring, r);
657 0 : }
658 0 : return 0;
659 0 : }
660 :
661 : /**
662 : * radeon_fence_ref - take a ref on a fence
663 : *
664 : * @fence: radeon fence object
665 : *
666 : * Take a reference on a fence (all asics).
667 : * Returns the fence.
668 : */
669 0 : struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
670 : {
671 0 : fence_get(&fence->base);
672 0 : return fence;
673 : }
674 :
675 : /**
676 : * radeon_fence_unref - remove a ref on a fence
677 : *
678 : * @fence: radeon fence object
679 : *
680 : * Remove a reference on a fence (all asics).
681 : */
682 0 : void radeon_fence_unref(struct radeon_fence **fence)
683 : {
684 0 : struct radeon_fence *tmp = *fence;
685 :
686 0 : *fence = NULL;
687 0 : if (tmp) {
688 0 : fence_put(&tmp->base);
689 0 : }
690 0 : }
691 :
692 : /**
693 : * radeon_fence_count_emitted - get the count of emitted fences
694 : *
695 : * @rdev: radeon device pointer
696 : * @ring: ring index the fence is associated with
697 : *
698 : * Get the number of fences emitted on the requested ring (all asics).
699 : * Returns the number of emitted fences on the ring. Used by the
700 : * dynpm code to ring track activity.
701 : */
702 0 : unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
703 : {
704 : uint64_t emitted;
705 :
706 : /* We are not protected by ring lock when reading the last sequence
707 : * but it's ok to report slightly wrong fence count here.
708 : */
709 0 : radeon_fence_process(rdev, ring);
710 0 : emitted = rdev->fence_drv[ring].sync_seq[ring]
711 0 : - atomic64_read(&rdev->fence_drv[ring].last_seq);
712 : /* to avoid 32bits warp around */
713 0 : if (emitted > 0x10000000) {
714 : emitted = 0x10000000;
715 : }
716 0 : return (unsigned)emitted;
717 : }
718 :
719 : /**
720 : * radeon_fence_need_sync - do we need a semaphore
721 : *
722 : * @fence: radeon fence object
723 : * @dst_ring: which ring to check against
724 : *
725 : * Check if the fence needs to be synced against another ring
726 : * (all asics). If so, we need to emit a semaphore.
727 : * Returns true if we need to sync with another ring, false if
728 : * not.
729 : */
730 0 : bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring)
731 : {
732 : struct radeon_fence_driver *fdrv;
733 :
734 0 : if (!fence) {
735 0 : return false;
736 : }
737 :
738 0 : if (fence->ring == dst_ring) {
739 0 : return false;
740 : }
741 :
742 : /* we are protected by the ring mutex */
743 0 : fdrv = &fence->rdev->fence_drv[dst_ring];
744 0 : if (fence->seq <= fdrv->sync_seq[fence->ring]) {
745 0 : return false;
746 : }
747 :
748 0 : return true;
749 0 : }
750 :
751 : /**
752 : * radeon_fence_note_sync - record the sync point
753 : *
754 : * @fence: radeon fence object
755 : * @dst_ring: which ring to check against
756 : *
757 : * Note the sequence number at which point the fence will
758 : * be synced with the requested ring (all asics).
759 : */
760 0 : void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring)
761 : {
762 : struct radeon_fence_driver *dst, *src;
763 : unsigned i;
764 :
765 0 : if (!fence) {
766 0 : return;
767 : }
768 :
769 0 : if (fence->ring == dst_ring) {
770 0 : return;
771 : }
772 :
773 : /* we are protected by the ring mutex */
774 0 : src = &fence->rdev->fence_drv[fence->ring];
775 0 : dst = &fence->rdev->fence_drv[dst_ring];
776 0 : for (i = 0; i < RADEON_NUM_RINGS; ++i) {
777 0 : if (i == dst_ring) {
778 : continue;
779 : }
780 0 : dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
781 0 : }
782 0 : }
783 :
784 : /**
785 : * radeon_fence_driver_start_ring - make the fence driver
786 : * ready for use on the requested ring.
787 : *
788 : * @rdev: radeon device pointer
789 : * @ring: ring index to start the fence driver on
790 : *
791 : * Make the fence driver ready for processing (all asics).
792 : * Not all asics have all rings, so each asic will only
793 : * start the fence driver on the rings it has.
794 : * Returns 0 for success, errors for failure.
795 : */
796 0 : int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
797 : {
798 : uint64_t index;
799 : int r;
800 :
801 0 : radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
802 0 : if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
803 0 : rdev->fence_drv[ring].scratch_reg = 0;
804 0 : if (ring != R600_RING_TYPE_UVD_INDEX) {
805 0 : index = R600_WB_EVENT_OFFSET + ring * 4;
806 0 : rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
807 0 : rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr +
808 : index;
809 :
810 0 : } else {
811 : /* put fence directly behind firmware */
812 0 : index = roundup2(rdev->uvd_fw->size, 8);
813 0 : rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr + index;
814 0 : rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + index;
815 : }
816 :
817 : } else {
818 0 : r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
819 0 : if (r) {
820 0 : dev_err(rdev->dev, "fence failed to get scratch register\n");
821 0 : return r;
822 : }
823 0 : index = RADEON_WB_SCRATCH_OFFSET +
824 0 : rdev->fence_drv[ring].scratch_reg -
825 0 : rdev->scratch.reg_base;
826 0 : rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
827 0 : rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
828 : }
829 0 : radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
830 0 : rdev->fence_drv[ring].initialized = true;
831 : dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n",
832 : ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
833 0 : return 0;
834 0 : }
835 :
836 : /**
837 : * radeon_fence_driver_init_ring - init the fence driver
838 : * for the requested ring.
839 : *
840 : * @rdev: radeon device pointer
841 : * @ring: ring index to start the fence driver on
842 : *
843 : * Init the fence driver for the requested ring (all asics).
844 : * Helper function for radeon_fence_driver_init().
845 : */
846 0 : static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
847 : {
848 : int i;
849 :
850 0 : rdev->fence_drv[ring].scratch_reg = -1;
851 0 : rdev->fence_drv[ring].cpu_addr = NULL;
852 0 : rdev->fence_drv[ring].gpu_addr = 0;
853 0 : for (i = 0; i < RADEON_NUM_RINGS; ++i)
854 0 : rdev->fence_drv[ring].sync_seq[i] = 0;
855 0 : atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
856 0 : rdev->fence_drv[ring].initialized = false;
857 0 : INIT_DELAYED_WORK(&rdev->fence_drv[ring].lockup_work,
858 : radeon_fence_check_lockup);
859 0 : rdev->fence_drv[ring].rdev = rdev;
860 0 : }
861 :
862 : /**
863 : * radeon_fence_driver_init - init the fence driver
864 : * for all possible rings.
865 : *
866 : * @rdev: radeon device pointer
867 : *
868 : * Init the fence driver for all possible rings (all asics).
869 : * Not all asics have all rings, so each asic will only
870 : * start the fence driver on the rings it has using
871 : * radeon_fence_driver_start_ring().
872 : * Returns 0 for success.
873 : */
874 0 : int radeon_fence_driver_init(struct radeon_device *rdev)
875 : {
876 : int ring;
877 :
878 0 : init_waitqueue_head(&rdev->fence_queue);
879 0 : for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
880 0 : radeon_fence_driver_init_ring(rdev, ring);
881 : }
882 0 : if (radeon_debugfs_fence_init(rdev)) {
883 0 : dev_err(rdev->dev, "fence debugfs file creation failed\n");
884 0 : }
885 0 : return 0;
886 : }
887 :
888 : /**
889 : * radeon_fence_driver_fini - tear down the fence driver
890 : * for all possible rings.
891 : *
892 : * @rdev: radeon device pointer
893 : *
894 : * Tear down the fence driver for all possible rings (all asics).
895 : */
896 0 : void radeon_fence_driver_fini(struct radeon_device *rdev)
897 : {
898 : int ring, r;
899 :
900 0 : mutex_lock(&rdev->ring_lock);
901 0 : for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
902 0 : if (!rdev->fence_drv[ring].initialized)
903 : continue;
904 0 : r = radeon_fence_wait_empty(rdev, ring);
905 0 : if (r) {
906 : /* no need to trigger GPU reset as we are unloading */
907 0 : radeon_fence_driver_force_completion(rdev, ring);
908 0 : }
909 0 : cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
910 0 : wake_up_all(&rdev->fence_queue);
911 0 : radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
912 0 : rdev->fence_drv[ring].initialized = false;
913 0 : }
914 0 : mutex_unlock(&rdev->ring_lock);
915 0 : }
916 :
917 : /**
918 : * radeon_fence_driver_force_completion - force all fence waiter to complete
919 : *
920 : * @rdev: radeon device pointer
921 : * @ring: the ring to complete
922 : *
923 : * In case of GPU reset failure make sure no process keep waiting on fence
924 : * that will never complete.
925 : */
926 0 : void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring)
927 : {
928 0 : if (rdev->fence_drv[ring].initialized) {
929 0 : radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
930 0 : cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
931 0 : }
932 0 : }
933 :
934 :
935 : /*
936 : * Fence debugfs
937 : */
938 : #if defined(CONFIG_DEBUG_FS)
939 : static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
940 : {
941 : struct drm_info_node *node = (struct drm_info_node *)m->private;
942 : struct drm_device *dev = node->minor->dev;
943 : struct radeon_device *rdev = dev->dev_private;
944 : int i, j;
945 :
946 : for (i = 0; i < RADEON_NUM_RINGS; ++i) {
947 : if (!rdev->fence_drv[i].initialized)
948 : continue;
949 :
950 : radeon_fence_process(rdev, i);
951 :
952 : seq_printf(m, "--- ring %d ---\n", i);
953 : seq_printf(m, "Last signaled fence 0x%016llx\n",
954 : (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq));
955 : seq_printf(m, "Last emitted 0x%016llx\n",
956 : rdev->fence_drv[i].sync_seq[i]);
957 :
958 : for (j = 0; j < RADEON_NUM_RINGS; ++j) {
959 : if (i != j && rdev->fence_drv[j].initialized)
960 : seq_printf(m, "Last sync to ring %d 0x%016llx\n",
961 : j, rdev->fence_drv[i].sync_seq[j]);
962 : }
963 : }
964 : return 0;
965 : }
966 :
967 : /**
968 : * radeon_debugfs_gpu_reset - manually trigger a gpu reset
969 : *
970 : * Manually trigger a gpu reset at the next fence wait.
971 : */
972 : static int radeon_debugfs_gpu_reset(struct seq_file *m, void *data)
973 : {
974 : struct drm_info_node *node = (struct drm_info_node *) m->private;
975 : struct drm_device *dev = node->minor->dev;
976 : struct radeon_device *rdev = dev->dev_private;
977 :
978 : down_read(&rdev->exclusive_lock);
979 : seq_printf(m, "%d\n", rdev->needs_reset);
980 : rdev->needs_reset = true;
981 : wake_up_all(&rdev->fence_queue);
982 : up_read(&rdev->exclusive_lock);
983 :
984 : return 0;
985 : }
986 :
987 : static struct drm_info_list radeon_debugfs_fence_list[] = {
988 : {"radeon_fence_info", &radeon_debugfs_fence_info, 0, NULL},
989 : {"radeon_gpu_reset", &radeon_debugfs_gpu_reset, 0, NULL}
990 : };
991 : #endif
992 :
993 0 : int radeon_debugfs_fence_init(struct radeon_device *rdev)
994 : {
995 : #if defined(CONFIG_DEBUG_FS)
996 : return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 2);
997 : #else
998 0 : return 0;
999 : #endif
1000 : }
1001 :
1002 0 : static const char *radeon_fence_get_driver_name(struct fence *fence)
1003 : {
1004 0 : return "radeon";
1005 : }
1006 :
1007 0 : static const char *radeon_fence_get_timeline_name(struct fence *f)
1008 : {
1009 0 : struct radeon_fence *fence = to_radeon_fence(f);
1010 0 : switch (fence->ring) {
1011 0 : case RADEON_RING_TYPE_GFX_INDEX: return "radeon.gfx";
1012 0 : case CAYMAN_RING_TYPE_CP1_INDEX: return "radeon.cp1";
1013 0 : case CAYMAN_RING_TYPE_CP2_INDEX: return "radeon.cp2";
1014 0 : case R600_RING_TYPE_DMA_INDEX: return "radeon.dma";
1015 0 : case CAYMAN_RING_TYPE_DMA1_INDEX: return "radeon.dma1";
1016 0 : case R600_RING_TYPE_UVD_INDEX: return "radeon.uvd";
1017 0 : case TN_RING_TYPE_VCE1_INDEX: return "radeon.vce1";
1018 0 : case TN_RING_TYPE_VCE2_INDEX: return "radeon.vce2";
1019 0 : default: WARN_ON_ONCE(1); return "radeon.unk";
1020 : }
1021 0 : }
1022 :
1023 0 : static inline bool radeon_test_signaled(struct radeon_fence *fence)
1024 : {
1025 0 : return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
1026 : }
1027 :
1028 : struct radeon_wait_cb {
1029 : struct fence_cb base;
1030 : void *task;
1031 : };
1032 :
1033 : static void
1034 0 : radeon_fence_wait_cb(struct fence *fence, struct fence_cb *cb)
1035 : {
1036 : struct radeon_wait_cb *wait =
1037 0 : container_of(cb, struct radeon_wait_cb, base);
1038 0 : wake_up_process(wait->task);
1039 0 : }
1040 :
1041 0 : static signed long radeon_fence_default_wait(struct fence *f, bool intr,
1042 : signed long t)
1043 : {
1044 0 : struct radeon_fence *fence = to_radeon_fence(f);
1045 0 : struct radeon_device *rdev = fence->rdev;
1046 0 : struct radeon_wait_cb cb;
1047 :
1048 0 : cb.task = curproc;
1049 :
1050 0 : if (fence_add_callback(f, &cb.base, radeon_fence_wait_cb))
1051 0 : return t;
1052 :
1053 0 : while (t > 0) {
1054 0 : if (intr)
1055 0 : set_current_state(TASK_INTERRUPTIBLE);
1056 : else
1057 0 : set_current_state(TASK_UNINTERRUPTIBLE);
1058 :
1059 : /*
1060 : * radeon_test_signaled must be called after
1061 : * set_current_state to prevent a race with wake_up_process
1062 : */
1063 0 : if (radeon_test_signaled(fence))
1064 : break;
1065 :
1066 0 : if (rdev->needs_reset) {
1067 : t = -EDEADLK;
1068 0 : break;
1069 : }
1070 :
1071 0 : KASSERT(sch_ident != NULL);
1072 0 : t = schedule_timeout(t);
1073 :
1074 0 : if (t > 0 && intr && signal_pending(current))
1075 0 : t = -ERESTARTSYS;
1076 : }
1077 :
1078 0 : __set_current_state(TASK_RUNNING);
1079 0 : fence_remove_callback(f, &cb.base);
1080 :
1081 0 : return t;
1082 0 : }
1083 :
1084 : const struct fence_ops radeon_fence_ops = {
1085 : .get_driver_name = radeon_fence_get_driver_name,
1086 : .get_timeline_name = radeon_fence_get_timeline_name,
1087 : .enable_signaling = radeon_fence_enable_signaling,
1088 : .signaled = radeon_fence_is_signaled,
1089 : .wait = radeon_fence_default_wait,
1090 : .release = NULL,
1091 : };
|