Line data Source code
1 : /*
2 : * Copyright © 2008-2010 Intel Corporation
3 : *
4 : * Permission is hereby granted, free of charge, to any person obtaining a
5 : * copy of this software and associated documentation files (the "Software"),
6 : * to deal in the Software without restriction, including without limitation
7 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 : * and/or sell copies of the Software, and to permit persons to whom the
9 : * Software is furnished to do so, subject to the following conditions:
10 : *
11 : * The above copyright notice and this permission notice (including the next
12 : * paragraph) shall be included in all copies or substantial portions of the
13 : * Software.
14 : *
15 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 : * IN THE SOFTWARE.
22 : *
23 : * Authors:
24 : * Eric Anholt <eric@anholt.net>
25 : * Zou Nan hai <nanhai.zou@intel.com>
26 : * Xiang Hai hao<haihao.xiang@intel.com>
27 : *
28 : */
29 :
30 : #include <dev/pci/drm/drmP.h>
31 : #include "i915_drv.h"
32 : #include <dev/pci/drm/i915_drm.h>
33 : #include "i915_trace.h"
34 : #include "intel_drv.h"
35 :
36 : bool
37 0 : intel_ring_initialized(struct intel_engine_cs *ring)
38 : {
39 0 : struct drm_device *dev = ring->dev;
40 :
41 0 : if (!dev)
42 0 : return false;
43 :
44 0 : if (i915.enable_execlists) {
45 0 : struct intel_context *dctx = ring->default_context;
46 0 : struct intel_ringbuffer *ringbuf = dctx->engine[ring->id].ringbuf;
47 :
48 0 : return ringbuf->obj;
49 : } else
50 0 : return ring->buffer && ring->buffer->obj;
51 0 : }
52 :
53 0 : int __intel_ring_space(int head, int tail, int size)
54 : {
55 0 : int space = head - tail;
56 0 : if (space <= 0)
57 0 : space += size;
58 0 : return space - I915_RING_FREE_SPACE;
59 : }
60 :
61 0 : void intel_ring_update_space(struct intel_ringbuffer *ringbuf)
62 : {
63 0 : if (ringbuf->last_retired_head != -1) {
64 0 : ringbuf->head = ringbuf->last_retired_head;
65 0 : ringbuf->last_retired_head = -1;
66 0 : }
67 :
68 0 : ringbuf->space = __intel_ring_space(ringbuf->head & HEAD_ADDR,
69 0 : ringbuf->tail, ringbuf->size);
70 0 : }
71 :
72 0 : int intel_ring_space(struct intel_ringbuffer *ringbuf)
73 : {
74 0 : intel_ring_update_space(ringbuf);
75 0 : return ringbuf->space;
76 : }
77 :
78 0 : bool intel_ring_stopped(struct intel_engine_cs *ring)
79 : {
80 0 : struct drm_i915_private *dev_priv = ring->dev->dev_private;
81 0 : return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring);
82 : }
83 :
84 0 : static void __intel_ring_advance(struct intel_engine_cs *ring)
85 : {
86 0 : struct intel_ringbuffer *ringbuf = ring->buffer;
87 0 : ringbuf->tail &= ringbuf->size - 1;
88 0 : if (intel_ring_stopped(ring))
89 0 : return;
90 0 : ring->write_tail(ring, ringbuf->tail);
91 0 : }
92 :
93 : static int
94 0 : gen2_render_ring_flush(struct drm_i915_gem_request *req,
95 : u32 invalidate_domains,
96 : u32 flush_domains)
97 : {
98 0 : struct intel_engine_cs *ring = req->ring;
99 : u32 cmd;
100 : int ret;
101 :
102 : cmd = MI_FLUSH;
103 0 : if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
104 0 : cmd |= MI_NO_WRITE_FLUSH;
105 :
106 0 : if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
107 0 : cmd |= MI_READ_FLUSH;
108 :
109 0 : ret = intel_ring_begin(req, 2);
110 0 : if (ret)
111 0 : return ret;
112 :
113 0 : intel_ring_emit(ring, cmd);
114 0 : intel_ring_emit(ring, MI_NOOP);
115 0 : intel_ring_advance(ring);
116 :
117 0 : return 0;
118 0 : }
119 :
120 : static int
121 0 : gen4_render_ring_flush(struct drm_i915_gem_request *req,
122 : u32 invalidate_domains,
123 : u32 flush_domains)
124 : {
125 0 : struct intel_engine_cs *ring = req->ring;
126 0 : struct drm_device *dev = ring->dev;
127 : u32 cmd;
128 : int ret;
129 :
130 : /*
131 : * read/write caches:
132 : *
133 : * I915_GEM_DOMAIN_RENDER is always invalidated, but is
134 : * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is
135 : * also flushed at 2d versus 3d pipeline switches.
136 : *
137 : * read-only caches:
138 : *
139 : * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
140 : * MI_READ_FLUSH is set, and is always flushed on 965.
141 : *
142 : * I915_GEM_DOMAIN_COMMAND may not exist?
143 : *
144 : * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
145 : * invalidated when MI_EXE_FLUSH is set.
146 : *
147 : * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
148 : * invalidated with every MI_FLUSH.
149 : *
150 : * TLBs:
151 : *
152 : * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
153 : * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
154 : * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
155 : * are flushed at any MI_FLUSH.
156 : */
157 :
158 : cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
159 0 : if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER)
160 0 : cmd &= ~MI_NO_WRITE_FLUSH;
161 0 : if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
162 0 : cmd |= MI_EXE_FLUSH;
163 :
164 0 : if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
165 0 : (IS_G4X(dev) || IS_GEN5(dev)))
166 0 : cmd |= MI_INVALIDATE_ISP;
167 :
168 0 : ret = intel_ring_begin(req, 2);
169 0 : if (ret)
170 0 : return ret;
171 :
172 0 : intel_ring_emit(ring, cmd);
173 0 : intel_ring_emit(ring, MI_NOOP);
174 0 : intel_ring_advance(ring);
175 :
176 0 : return 0;
177 0 : }
178 :
179 : /**
180 : * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
181 : * implementing two workarounds on gen6. From section 1.4.7.1
182 : * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
183 : *
184 : * [DevSNB-C+{W/A}] Before any depth stall flush (including those
185 : * produced by non-pipelined state commands), software needs to first
186 : * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
187 : * 0.
188 : *
189 : * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
190 : * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
191 : *
192 : * And the workaround for these two requires this workaround first:
193 : *
194 : * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
195 : * BEFORE the pipe-control with a post-sync op and no write-cache
196 : * flushes.
197 : *
198 : * And this last workaround is tricky because of the requirements on
199 : * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
200 : * volume 2 part 1:
201 : *
202 : * "1 of the following must also be set:
203 : * - Render Target Cache Flush Enable ([12] of DW1)
204 : * - Depth Cache Flush Enable ([0] of DW1)
205 : * - Stall at Pixel Scoreboard ([1] of DW1)
206 : * - Depth Stall ([13] of DW1)
207 : * - Post-Sync Operation ([13] of DW1)
208 : * - Notify Enable ([8] of DW1)"
209 : *
210 : * The cache flushes require the workaround flush that triggered this
211 : * one, so we can't use it. Depth stall would trigger the same.
212 : * Post-sync nonzero is what triggered this second workaround, so we
213 : * can't use that one either. Notify enable is IRQs, which aren't
214 : * really our business. That leaves only stall at scoreboard.
215 : */
216 : static int
217 0 : intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
218 : {
219 0 : struct intel_engine_cs *ring = req->ring;
220 0 : u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
221 : int ret;
222 :
223 0 : ret = intel_ring_begin(req, 6);
224 0 : if (ret)
225 0 : return ret;
226 :
227 0 : intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
228 0 : intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
229 : PIPE_CONTROL_STALL_AT_SCOREBOARD);
230 0 : intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
231 0 : intel_ring_emit(ring, 0); /* low dword */
232 0 : intel_ring_emit(ring, 0); /* high dword */
233 0 : intel_ring_emit(ring, MI_NOOP);
234 0 : intel_ring_advance(ring);
235 :
236 0 : ret = intel_ring_begin(req, 6);
237 0 : if (ret)
238 0 : return ret;
239 :
240 0 : intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
241 0 : intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
242 0 : intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
243 0 : intel_ring_emit(ring, 0);
244 0 : intel_ring_emit(ring, 0);
245 0 : intel_ring_emit(ring, MI_NOOP);
246 0 : intel_ring_advance(ring);
247 :
248 0 : return 0;
249 0 : }
250 :
251 : static int
252 0 : gen6_render_ring_flush(struct drm_i915_gem_request *req,
253 : u32 invalidate_domains, u32 flush_domains)
254 : {
255 0 : struct intel_engine_cs *ring = req->ring;
256 : u32 flags = 0;
257 0 : u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
258 : int ret;
259 :
260 : /* Force SNB workarounds for PIPE_CONTROL flushes */
261 0 : ret = intel_emit_post_sync_nonzero_flush(req);
262 0 : if (ret)
263 0 : return ret;
264 :
265 : /* Just flush everything. Experiments have shown that reducing the
266 : * number of bits based on the write domains has little performance
267 : * impact.
268 : */
269 0 : if (flush_domains) {
270 : flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
271 : flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
272 : /*
273 : * Ensure that any following seqno writes only happen
274 : * when the render cache is indeed flushed.
275 : */
276 : flags |= PIPE_CONTROL_CS_STALL;
277 0 : }
278 0 : if (invalidate_domains) {
279 0 : flags |= PIPE_CONTROL_TLB_INVALIDATE;
280 0 : flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
281 0 : flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
282 0 : flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
283 0 : flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
284 0 : flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
285 : /*
286 : * TLB invalidate requires a post-sync write.
287 : */
288 0 : flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
289 0 : }
290 :
291 0 : ret = intel_ring_begin(req, 4);
292 0 : if (ret)
293 0 : return ret;
294 :
295 0 : intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
296 0 : intel_ring_emit(ring, flags);
297 0 : intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
298 0 : intel_ring_emit(ring, 0);
299 0 : intel_ring_advance(ring);
300 :
301 0 : return 0;
302 0 : }
303 :
304 : static int
305 0 : gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)
306 : {
307 0 : struct intel_engine_cs *ring = req->ring;
308 : int ret;
309 :
310 0 : ret = intel_ring_begin(req, 4);
311 0 : if (ret)
312 0 : return ret;
313 :
314 0 : intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
315 0 : intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
316 : PIPE_CONTROL_STALL_AT_SCOREBOARD);
317 0 : intel_ring_emit(ring, 0);
318 0 : intel_ring_emit(ring, 0);
319 0 : intel_ring_advance(ring);
320 :
321 0 : return 0;
322 0 : }
323 :
324 : static int
325 0 : gen7_render_ring_flush(struct drm_i915_gem_request *req,
326 : u32 invalidate_domains, u32 flush_domains)
327 : {
328 0 : struct intel_engine_cs *ring = req->ring;
329 : u32 flags = 0;
330 0 : u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
331 : int ret;
332 :
333 : /*
334 : * Ensure that any following seqno writes only happen when the render
335 : * cache is indeed flushed.
336 : *
337 : * Workaround: 4th PIPE_CONTROL command (except the ones with only
338 : * read-cache invalidate bits set) must have the CS_STALL bit set. We
339 : * don't try to be clever and just set it unconditionally.
340 : */
341 : flags |= PIPE_CONTROL_CS_STALL;
342 :
343 : /* Just flush everything. Experiments have shown that reducing the
344 : * number of bits based on the write domains has little performance
345 : * impact.
346 : */
347 0 : if (flush_domains) {
348 : flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
349 : flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
350 : flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
351 : flags |= PIPE_CONTROL_FLUSH_ENABLE;
352 0 : }
353 0 : if (invalidate_domains) {
354 0 : flags |= PIPE_CONTROL_TLB_INVALIDATE;
355 0 : flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
356 0 : flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
357 0 : flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
358 0 : flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
359 0 : flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
360 0 : flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
361 : /*
362 : * TLB invalidate requires a post-sync write.
363 : */
364 0 : flags |= PIPE_CONTROL_QW_WRITE;
365 0 : flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
366 :
367 0 : flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
368 :
369 : /* Workaround: we must issue a pipe_control with CS-stall bit
370 : * set before a pipe_control command that has the state cache
371 : * invalidate bit set. */
372 0 : gen7_render_ring_cs_stall_wa(req);
373 0 : }
374 :
375 0 : ret = intel_ring_begin(req, 4);
376 0 : if (ret)
377 0 : return ret;
378 :
379 0 : intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
380 0 : intel_ring_emit(ring, flags);
381 0 : intel_ring_emit(ring, scratch_addr);
382 0 : intel_ring_emit(ring, 0);
383 0 : intel_ring_advance(ring);
384 :
385 0 : return 0;
386 0 : }
387 :
388 : static int
389 0 : gen8_emit_pipe_control(struct drm_i915_gem_request *req,
390 : u32 flags, u32 scratch_addr)
391 : {
392 0 : struct intel_engine_cs *ring = req->ring;
393 : int ret;
394 :
395 0 : ret = intel_ring_begin(req, 6);
396 0 : if (ret)
397 0 : return ret;
398 :
399 0 : intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
400 0 : intel_ring_emit(ring, flags);
401 0 : intel_ring_emit(ring, scratch_addr);
402 0 : intel_ring_emit(ring, 0);
403 0 : intel_ring_emit(ring, 0);
404 0 : intel_ring_emit(ring, 0);
405 0 : intel_ring_advance(ring);
406 :
407 0 : return 0;
408 0 : }
409 :
410 : static int
411 0 : gen8_render_ring_flush(struct drm_i915_gem_request *req,
412 : u32 invalidate_domains, u32 flush_domains)
413 : {
414 : u32 flags = 0;
415 0 : u32 scratch_addr = req->ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
416 : int ret;
417 :
418 : flags |= PIPE_CONTROL_CS_STALL;
419 :
420 0 : if (flush_domains) {
421 : flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
422 : flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
423 : flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
424 : flags |= PIPE_CONTROL_FLUSH_ENABLE;
425 0 : }
426 0 : if (invalidate_domains) {
427 0 : flags |= PIPE_CONTROL_TLB_INVALIDATE;
428 0 : flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
429 0 : flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
430 0 : flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
431 0 : flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
432 0 : flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
433 0 : flags |= PIPE_CONTROL_QW_WRITE;
434 0 : flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
435 :
436 : /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
437 0 : ret = gen8_emit_pipe_control(req,
438 : PIPE_CONTROL_CS_STALL |
439 : PIPE_CONTROL_STALL_AT_SCOREBOARD,
440 : 0);
441 0 : if (ret)
442 0 : return ret;
443 : }
444 :
445 0 : return gen8_emit_pipe_control(req, flags, scratch_addr);
446 0 : }
447 :
448 0 : static void ring_write_tail(struct intel_engine_cs *ring,
449 : u32 value)
450 : {
451 0 : struct drm_i915_private *dev_priv = ring->dev->dev_private;
452 0 : I915_WRITE_TAIL(ring, value);
453 0 : }
454 :
455 0 : u64 intel_ring_get_active_head(struct intel_engine_cs *ring)
456 : {
457 0 : struct drm_i915_private *dev_priv = ring->dev->dev_private;
458 : u64 acthd;
459 :
460 0 : if (INTEL_INFO(ring->dev)->gen >= 8)
461 0 : acthd = I915_READ64_2x32(RING_ACTHD(ring->mmio_base),
462 : RING_ACTHD_UDW(ring->mmio_base));
463 0 : else if (INTEL_INFO(ring->dev)->gen >= 4)
464 0 : acthd = I915_READ(RING_ACTHD(ring->mmio_base));
465 : else
466 0 : acthd = I915_READ(ACTHD);
467 :
468 0 : return acthd;
469 : }
470 :
471 0 : static void ring_setup_phys_status_page(struct intel_engine_cs *ring)
472 : {
473 0 : struct drm_i915_private *dev_priv = ring->dev->dev_private;
474 : u32 addr;
475 :
476 0 : addr = dev_priv->status_page_dmah->busaddr;
477 0 : if (INTEL_INFO(ring->dev)->gen >= 4)
478 0 : addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
479 0 : I915_WRITE(HWS_PGA, addr);
480 0 : }
481 :
482 0 : static void intel_ring_setup_status_page(struct intel_engine_cs *ring)
483 : {
484 0 : struct drm_device *dev = ring->dev;
485 0 : struct drm_i915_private *dev_priv = ring->dev->dev_private;
486 : u32 mmio = 0;
487 :
488 : /* The ring status page addresses are no longer next to the rest of
489 : * the ring registers as of gen7.
490 : */
491 0 : if (IS_GEN7(dev)) {
492 0 : switch (ring->id) {
493 : case RCS:
494 : mmio = RENDER_HWS_PGA_GEN7;
495 0 : break;
496 : case BCS:
497 : mmio = BLT_HWS_PGA_GEN7;
498 0 : break;
499 : /*
500 : * VCS2 actually doesn't exist on Gen7. Only shut up
501 : * gcc switch check warning
502 : */
503 : case VCS2:
504 : case VCS:
505 : mmio = BSD_HWS_PGA_GEN7;
506 0 : break;
507 : case VECS:
508 : mmio = VEBOX_HWS_PGA_GEN7;
509 0 : break;
510 : }
511 0 : } else if (IS_GEN6(ring->dev)) {
512 0 : mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
513 0 : } else {
514 : /* XXX: gen8 returns to sanity */
515 0 : mmio = RING_HWS_PGA(ring->mmio_base);
516 : }
517 :
518 0 : I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
519 0 : POSTING_READ(mmio);
520 :
521 : /*
522 : * Flush the TLB for this page
523 : *
524 : * FIXME: These two bits have disappeared on gen8, so a question
525 : * arises: do we still need this and if so how should we go about
526 : * invalidating the TLB?
527 : */
528 0 : if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) {
529 0 : u32 reg = RING_INSTPM(ring->mmio_base);
530 :
531 : /* ring should be idle before issuing a sync flush*/
532 0 : WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
533 :
534 0 : I915_WRITE(reg,
535 : _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
536 : INSTPM_SYNC_FLUSH));
537 0 : if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0,
538 : 1000))
539 0 : DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
540 : ring->name);
541 0 : }
542 0 : }
543 :
544 0 : static bool stop_ring(struct intel_engine_cs *ring)
545 : {
546 0 : struct drm_i915_private *dev_priv = to_i915(ring->dev);
547 :
548 0 : if (!IS_GEN2(ring->dev)) {
549 0 : I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
550 0 : if (wait_for((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
551 0 : DRM_ERROR("%s : timed out trying to stop ring\n", ring->name);
552 : /* Sometimes we observe that the idle flag is not
553 : * set even though the ring is empty. So double
554 : * check before giving up.
555 : */
556 0 : if (I915_READ_HEAD(ring) != I915_READ_TAIL(ring))
557 0 : return false;
558 : }
559 : }
560 :
561 0 : I915_WRITE_CTL(ring, 0);
562 0 : I915_WRITE_HEAD(ring, 0);
563 0 : ring->write_tail(ring, 0);
564 :
565 0 : if (!IS_GEN2(ring->dev)) {
566 0 : (void)I915_READ_CTL(ring);
567 0 : I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
568 0 : }
569 :
570 0 : return (I915_READ_HEAD(ring) & HEAD_ADDR) == 0;
571 0 : }
572 :
573 0 : static int init_ring_common(struct intel_engine_cs *ring)
574 : {
575 0 : struct drm_device *dev = ring->dev;
576 0 : struct drm_i915_private *dev_priv = dev->dev_private;
577 0 : struct intel_ringbuffer *ringbuf = ring->buffer;
578 0 : struct drm_i915_gem_object *obj = ringbuf->obj;
579 : int ret = 0;
580 :
581 0 : intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
582 :
583 0 : if (!stop_ring(ring)) {
584 : /* G45 ring initialization often fails to reset head to zero */
585 : DRM_DEBUG_KMS("%s head not reset to zero "
586 : "ctl %08x head %08x tail %08x start %08x\n",
587 : ring->name,
588 : I915_READ_CTL(ring),
589 : I915_READ_HEAD(ring),
590 : I915_READ_TAIL(ring),
591 : I915_READ_START(ring));
592 :
593 0 : if (!stop_ring(ring)) {
594 0 : DRM_ERROR("failed to set %s head to zero "
595 : "ctl %08x head %08x tail %08x start %08x\n",
596 : ring->name,
597 : I915_READ_CTL(ring),
598 : I915_READ_HEAD(ring),
599 : I915_READ_TAIL(ring),
600 : I915_READ_START(ring));
601 : ret = -EIO;
602 0 : goto out;
603 : }
604 : }
605 :
606 0 : if (I915_NEED_GFX_HWS(dev))
607 0 : intel_ring_setup_status_page(ring);
608 : else
609 0 : ring_setup_phys_status_page(ring);
610 :
611 : /* Enforce ordering by reading HEAD register back */
612 0 : I915_READ_HEAD(ring);
613 :
614 : /* Initialize the ring. This must happen _after_ we've cleared the ring
615 : * registers with the above sequence (the readback of the HEAD registers
616 : * also enforces ordering), otherwise the hw might lose the new ring
617 : * register values. */
618 0 : I915_WRITE_START(ring, i915_gem_obj_ggtt_offset(obj));
619 :
620 : /* WaClearRingBufHeadRegAtInit:ctg,elk */
621 0 : if (I915_READ_HEAD(ring))
622 : DRM_DEBUG("%s initialization failed [head=%08x], fudging\n",
623 : ring->name, I915_READ_HEAD(ring));
624 0 : I915_WRITE_HEAD(ring, 0);
625 0 : (void)I915_READ_HEAD(ring);
626 :
627 0 : I915_WRITE_CTL(ring,
628 : ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES)
629 : | RING_VALID);
630 :
631 : /* If the head is still not zero, the ring is dead */
632 0 : if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 &&
633 : I915_READ_START(ring) == i915_gem_obj_ggtt_offset(obj) &&
634 : (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {
635 0 : DRM_ERROR("%s initialization failed "
636 : "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n",
637 : ring->name,
638 : I915_READ_CTL(ring), I915_READ_CTL(ring) & RING_VALID,
639 : I915_READ_HEAD(ring), I915_READ_TAIL(ring),
640 : I915_READ_START(ring), (unsigned long)i915_gem_obj_ggtt_offset(obj));
641 : ret = -EIO;
642 0 : goto out;
643 : }
644 :
645 0 : ringbuf->last_retired_head = -1;
646 0 : ringbuf->head = I915_READ_HEAD(ring);
647 0 : ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
648 0 : intel_ring_update_space(ringbuf);
649 :
650 0 : memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
651 :
652 : out:
653 0 : intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
654 :
655 0 : return ret;
656 : }
657 :
658 : void
659 0 : intel_fini_pipe_control(struct intel_engine_cs *ring)
660 : {
661 0 : struct drm_device *dev = ring->dev;
662 :
663 0 : if (ring->scratch.obj == NULL)
664 0 : return;
665 :
666 0 : if (INTEL_INFO(dev)->gen >= 5) {
667 0 : kunmap(sg_page(ring->scratch.obj->pages->sgl));
668 0 : i915_gem_object_ggtt_unpin(ring->scratch.obj);
669 0 : }
670 :
671 0 : drm_gem_object_unreference(&ring->scratch.obj->base);
672 0 : ring->scratch.obj = NULL;
673 0 : }
674 :
675 : int
676 0 : intel_init_pipe_control(struct intel_engine_cs *ring)
677 : {
678 : int ret;
679 :
680 0 : WARN_ON(ring->scratch.obj);
681 :
682 0 : ring->scratch.obj = i915_gem_alloc_object(ring->dev, 4096);
683 0 : if (ring->scratch.obj == NULL) {
684 0 : DRM_ERROR("Failed to allocate seqno page\n");
685 : ret = -ENOMEM;
686 0 : goto err;
687 : }
688 :
689 0 : ret = i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
690 0 : if (ret)
691 : goto err_unref;
692 :
693 0 : ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, 0);
694 0 : if (ret)
695 : goto err_unref;
696 :
697 0 : ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(ring->scratch.obj);
698 0 : ring->scratch.cpu_page = kmap(sg_page(ring->scratch.obj->pages->sgl));
699 0 : if (ring->scratch.cpu_page == NULL) {
700 : ret = -ENOMEM;
701 : goto err_unpin;
702 : }
703 :
704 : DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
705 : ring->name, ring->scratch.gtt_offset);
706 0 : return 0;
707 :
708 : err_unpin:
709 0 : i915_gem_object_ggtt_unpin(ring->scratch.obj);
710 : err_unref:
711 0 : drm_gem_object_unreference(&ring->scratch.obj->base);
712 : err:
713 0 : return ret;
714 0 : }
715 :
716 0 : static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
717 : {
718 : int ret, i;
719 0 : struct intel_engine_cs *ring = req->ring;
720 0 : struct drm_device *dev = ring->dev;
721 0 : struct drm_i915_private *dev_priv = dev->dev_private;
722 0 : struct i915_workarounds *w = &dev_priv->workarounds;
723 :
724 0 : if (w->count == 0)
725 0 : return 0;
726 :
727 0 : ring->gpu_caches_dirty = true;
728 0 : ret = intel_ring_flush_all_caches(req);
729 0 : if (ret)
730 0 : return ret;
731 :
732 0 : ret = intel_ring_begin(req, (w->count * 2 + 2));
733 0 : if (ret)
734 0 : return ret;
735 :
736 0 : intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count));
737 0 : for (i = 0; i < w->count; i++) {
738 0 : intel_ring_emit(ring, w->reg[i].addr);
739 0 : intel_ring_emit(ring, w->reg[i].value);
740 : }
741 0 : intel_ring_emit(ring, MI_NOOP);
742 :
743 0 : intel_ring_advance(ring);
744 :
745 0 : ring->gpu_caches_dirty = true;
746 0 : ret = intel_ring_flush_all_caches(req);
747 0 : if (ret)
748 0 : return ret;
749 :
750 : DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count);
751 :
752 0 : return 0;
753 0 : }
754 :
755 0 : static int intel_rcs_ctx_init(struct drm_i915_gem_request *req)
756 : {
757 : int ret;
758 :
759 0 : ret = intel_ring_workarounds_emit(req);
760 0 : if (ret != 0)
761 0 : return ret;
762 :
763 0 : ret = i915_gem_render_state_init(req);
764 0 : if (ret)
765 0 : DRM_ERROR("init render state: %d\n", ret);
766 :
767 0 : return ret;
768 0 : }
769 :
770 0 : static int wa_add(struct drm_i915_private *dev_priv,
771 : const u32 addr, const u32 mask, const u32 val)
772 : {
773 0 : const u32 idx = dev_priv->workarounds.count;
774 :
775 0 : if (WARN_ON(idx >= I915_MAX_WA_REGS))
776 0 : return -ENOSPC;
777 :
778 0 : dev_priv->workarounds.reg[idx].addr = addr;
779 0 : dev_priv->workarounds.reg[idx].value = val;
780 0 : dev_priv->workarounds.reg[idx].mask = mask;
781 :
782 0 : dev_priv->workarounds.count++;
783 :
784 0 : return 0;
785 0 : }
786 :
787 : #define WA_REG(addr, mask, val) do { \
788 : const int r = wa_add(dev_priv, (addr), (mask), (val)); \
789 : if (r) \
790 : return r; \
791 : } while (0)
792 :
793 : #define WA_SET_BIT_MASKED(addr, mask) \
794 : WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
795 :
796 : #define WA_CLR_BIT_MASKED(addr, mask) \
797 : WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
798 :
799 : #define WA_SET_FIELD_MASKED(addr, mask, value) \
800 : WA_REG(addr, mask, _MASKED_FIELD(mask, value))
801 :
802 : #define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask))
803 : #define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask))
804 :
805 : #define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val)
806 :
807 0 : static int gen8_init_workarounds(struct intel_engine_cs *ring)
808 : {
809 0 : struct drm_device *dev = ring->dev;
810 0 : struct drm_i915_private *dev_priv = dev->dev_private;
811 :
812 0 : WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
813 :
814 : /* WaDisableAsyncFlipPerfMode:bdw,chv */
815 0 : WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
816 :
817 : /* WaDisablePartialInstShootdown:bdw,chv */
818 0 : WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
819 : PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
820 :
821 : /* Use Force Non-Coherent whenever executing a 3D context. This is a
822 : * workaround for for a possible hang in the unlikely event a TLB
823 : * invalidation occurs during a PSD flush.
824 : */
825 : /* WaForceEnableNonCoherent:bdw,chv */
826 : /* WaHdcDisableFetchWhenMasked:bdw,chv */
827 0 : WA_SET_BIT_MASKED(HDC_CHICKEN0,
828 : HDC_DONOT_FETCH_MEM_WHEN_MASKED |
829 : HDC_FORCE_NON_COHERENT);
830 :
831 : /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
832 : * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
833 : * polygons in the same 8x4 pixel/sample area to be processed without
834 : * stalling waiting for the earlier ones to write to Hierarchical Z
835 : * buffer."
836 : *
837 : * This optimization is off by default for BDW and CHV; turn it on.
838 : */
839 0 : WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
840 :
841 : /* Wa4x4STCOptimizationDisable:bdw,chv */
842 0 : WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
843 :
844 : /*
845 : * BSpec recommends 8x4 when MSAA is used,
846 : * however in practice 16x4 seems fastest.
847 : *
848 : * Note that PS/WM thread counts depend on the WIZ hashing
849 : * disable bit, which we don't touch here, but it's good
850 : * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
851 : */
852 0 : WA_SET_FIELD_MASKED(GEN7_GT_MODE,
853 : GEN6_WIZ_HASHING_MASK,
854 : GEN6_WIZ_HASHING_16x4);
855 :
856 0 : return 0;
857 0 : }
858 :
859 0 : static int bdw_init_workarounds(struct intel_engine_cs *ring)
860 : {
861 : int ret;
862 0 : struct drm_device *dev = ring->dev;
863 0 : struct drm_i915_private *dev_priv = dev->dev_private;
864 :
865 0 : ret = gen8_init_workarounds(ring);
866 0 : if (ret)
867 0 : return ret;
868 :
869 : /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
870 0 : WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
871 :
872 : /* WaDisableDopClockGating:bdw */
873 0 : WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
874 : DOP_CLOCK_GATING_DISABLE);
875 :
876 0 : WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
877 : GEN8_SAMPLER_POWER_BYPASS_DIS);
878 :
879 0 : WA_SET_BIT_MASKED(HDC_CHICKEN0,
880 : /* WaForceContextSaveRestoreNonCoherent:bdw */
881 : HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
882 : /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
883 : (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
884 :
885 0 : return 0;
886 0 : }
887 :
888 0 : static int chv_init_workarounds(struct intel_engine_cs *ring)
889 : {
890 : int ret;
891 0 : struct drm_device *dev = ring->dev;
892 0 : struct drm_i915_private *dev_priv = dev->dev_private;
893 :
894 0 : ret = gen8_init_workarounds(ring);
895 0 : if (ret)
896 0 : return ret;
897 :
898 : /* WaDisableThreadStallDopClockGating:chv */
899 0 : WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
900 :
901 : /* Improve HiZ throughput on CHV. */
902 0 : WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
903 :
904 0 : return 0;
905 0 : }
906 :
907 0 : static int gen9_init_workarounds(struct intel_engine_cs *ring)
908 : {
909 0 : struct drm_device *dev = ring->dev;
910 0 : struct drm_i915_private *dev_priv = dev->dev_private;
911 :
912 : /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl */
913 0 : I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE));
914 :
915 : /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl */
916 0 : I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
917 : GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
918 :
919 : /* WaDisableKillLogic:bxt,skl,kbl */
920 0 : I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
921 : ECOCHK_DIS_TLB);
922 :
923 : /* WaDisablePartialInstShootdown:skl,bxt,kbl */
924 0 : WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
925 : PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
926 :
927 : /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
928 0 : WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
929 : GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
930 :
931 : /* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt,kbl */
932 0 : WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
933 : GEN9_DG_MIRROR_FIX_ENABLE);
934 :
935 : /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
936 0 : if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
937 0 : IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
938 0 : WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
939 : GEN9_RHWO_OPTIMIZATION_DISABLE);
940 : /*
941 : * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set
942 : * but we do that in per ctx batchbuffer as there is an issue
943 : * with this register not getting restored on ctx restore
944 : */
945 : }
946 :
947 : /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl */
948 0 : WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
949 : GEN9_ENABLE_YV12_BUGFIX);
950 :
951 : /* Wa4x4STCOptimizationDisable:skl,bxt,kbl */
952 : /* WaDisablePartialResolveInVc:skl,bxt,kbl */
953 0 : WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE |
954 : GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE));
955 :
956 : /* WaCcsTlbPrefetchDisable:skl,bxt,kbl */
957 0 : WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
958 : GEN9_CCS_TLB_PREFETCH_ENABLE);
959 :
960 : /* WaDisableMaskBasedCammingInRCC:skl,bxt,kbl */
961 0 : if (IS_SKL_REVID(dev, SKL_REVID_C0, SKL_REVID_C0) ||
962 0 : IS_BXT_REVID(dev, 0, BXT_REVID_A1))
963 0 : WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
964 : PIXEL_MASK_CAMMING_DISABLE);
965 :
966 : /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl */
967 0 : WA_SET_BIT_MASKED(HDC_CHICKEN0,
968 : HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
969 : HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
970 :
971 : /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
972 : * both tied to WaForceContextSaveRestoreNonCoherent
973 : * in some hsds for skl. We keep the tie for all gen9. The
974 : * documentation is a bit hazy and so we want to get common behaviour,
975 : * even though there is no clear evidence we would need both on kbl/bxt.
976 : * This area has been source of system hangs so we play it safe
977 : * and mimic the skl regardless of what bspec says.
978 : *
979 : * Use Force Non-Coherent whenever executing a 3D context. This
980 : * is a workaround for a possible hang in the unlikely event
981 : * a TLB invalidation occurs during a PSD flush.
982 : */
983 :
984 : /* WaForceEnableNonCoherent:skl,bxt,kbl */
985 0 : WA_SET_BIT_MASKED(HDC_CHICKEN0,
986 : HDC_FORCE_NON_COHERENT);
987 :
988 : /* WaDisableHDCInvalidation:skl,bxt,kbl */
989 0 : I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
990 : BDW_DISABLE_HDC_INVALIDATION);
991 :
992 :
993 : /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl */
994 0 : if (IS_SKYLAKE(dev_priv) ||
995 0 : IS_KABYLAKE(dev_priv) ||
996 0 : IS_BXT_REVID(dev, 0, BXT_REVID_B0))
997 0 : WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
998 : GEN8_SAMPLER_POWER_BYPASS_DIS);
999 :
1000 : /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl */
1001 0 : WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
1002 :
1003 0 : return 0;
1004 0 : }
1005 :
1006 0 : static int skl_tune_iz_hashing(struct intel_engine_cs *ring)
1007 : {
1008 0 : struct drm_device *dev = ring->dev;
1009 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1010 0 : u8 vals[3] = { 0, 0, 0 };
1011 : unsigned int i;
1012 :
1013 0 : for (i = 0; i < 3; i++) {
1014 : u8 ss;
1015 :
1016 : /*
1017 : * Only consider slices where one, and only one, subslice has 7
1018 : * EUs
1019 : */
1020 0 : if (hweight8(dev_priv->info.subslice_7eu[i]) != 1)
1021 0 : continue;
1022 :
1023 : /*
1024 : * subslice_7eu[i] != 0 (because of the check above) and
1025 : * ss_max == 4 (maximum number of subslices possible per slice)
1026 : *
1027 : * -> 0 <= ss <= 3;
1028 : */
1029 0 : ss = ffs(dev_priv->info.subslice_7eu[i]) - 1;
1030 0 : vals[i] = 3 - ss;
1031 0 : }
1032 :
1033 0 : if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
1034 0 : return 0;
1035 :
1036 : /* Tune IZ hashing. See intel_device_info_runtime_init() */
1037 0 : WA_SET_FIELD_MASKED(GEN7_GT_MODE,
1038 : GEN9_IZ_HASHING_MASK(2) |
1039 : GEN9_IZ_HASHING_MASK(1) |
1040 : GEN9_IZ_HASHING_MASK(0),
1041 : GEN9_IZ_HASHING(2, vals[2]) |
1042 : GEN9_IZ_HASHING(1, vals[1]) |
1043 : GEN9_IZ_HASHING(0, vals[0]));
1044 :
1045 0 : return 0;
1046 0 : }
1047 :
1048 0 : static int skl_init_workarounds(struct intel_engine_cs *ring)
1049 : {
1050 : int ret;
1051 0 : struct drm_device *dev = ring->dev;
1052 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1053 :
1054 0 : ret = gen9_init_workarounds(ring);
1055 0 : if (ret)
1056 0 : return ret;
1057 :
1058 0 : if (IS_SKL_REVID(dev, 0, SKL_REVID_D0)) {
1059 : /* WaDisableHDCInvalidation:skl */
1060 0 : I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
1061 : BDW_DISABLE_HDC_INVALIDATION);
1062 :
1063 : /* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */
1064 0 : I915_WRITE(FF_SLICE_CS_CHICKEN2,
1065 : _MASKED_BIT_ENABLE(GEN9_TSG_BARRIER_ACK_DISABLE));
1066 0 : }
1067 :
1068 : /* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes
1069 : * involving this register should also be added to WA batch as required.
1070 : */
1071 0 : if (IS_SKL_REVID(dev, 0, SKL_REVID_E0))
1072 : /* WaDisableLSQCROPERFforOCL:skl */
1073 0 : I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
1074 : GEN8_LQSC_RO_PERF_DIS);
1075 :
1076 : /* WaEnableGapsTsvCreditFix:skl */
1077 0 : if (IS_SKL_REVID(dev, SKL_REVID_C0, REVID_FOREVER)) {
1078 0 : I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
1079 : GEN9_GAPS_TSV_CREDIT_DISABLE));
1080 0 : }
1081 :
1082 : /* WaDisablePowerCompilerClockGating:skl */
1083 0 : if (IS_SKL_REVID(dev, SKL_REVID_B0, SKL_REVID_B0))
1084 0 : WA_SET_BIT_MASKED(HIZ_CHICKEN,
1085 : BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
1086 :
1087 : /* WaBarrierPerformanceFixDisable:skl */
1088 0 : if (IS_SKL_REVID(dev, SKL_REVID_C0, SKL_REVID_D0))
1089 0 : WA_SET_BIT_MASKED(HDC_CHICKEN0,
1090 : HDC_FENCE_DEST_SLM_DISABLE |
1091 : HDC_BARRIER_PERFORMANCE_DISABLE);
1092 :
1093 : /* WaDisableSbeCacheDispatchPortSharing:skl */
1094 0 : if (IS_SKL_REVID(dev, 0, SKL_REVID_F0))
1095 0 : WA_SET_BIT_MASKED(
1096 : GEN7_HALF_SLICE_CHICKEN1,
1097 : GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1098 :
1099 0 : return skl_tune_iz_hashing(ring);
1100 0 : }
1101 :
1102 0 : static int bxt_init_workarounds(struct intel_engine_cs *ring)
1103 : {
1104 : int ret;
1105 0 : struct drm_device *dev = ring->dev;
1106 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1107 :
1108 0 : ret = gen9_init_workarounds(ring);
1109 0 : if (ret)
1110 0 : return ret;
1111 :
1112 : /* WaStoreMultiplePTEenable:bxt */
1113 : /* This is a requirement according to Hardware specification */
1114 0 : if (IS_BXT_REVID(dev, 0, BXT_REVID_A0))
1115 0 : I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
1116 :
1117 : /* WaSetClckGatingDisableMedia:bxt */
1118 0 : if (IS_BXT_REVID(dev, 0, BXT_REVID_A0)) {
1119 0 : I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
1120 : ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE));
1121 0 : }
1122 :
1123 : /* WaDisableThreadStallDopClockGating:bxt */
1124 0 : WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
1125 : STALL_DOP_GATING_DISABLE);
1126 :
1127 : /* WaDisableSbeCacheDispatchPortSharing:bxt */
1128 0 : if (IS_BXT_REVID(dev, 0, BXT_REVID_B0)) {
1129 0 : WA_SET_BIT_MASKED(
1130 : GEN7_HALF_SLICE_CHICKEN1,
1131 : GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1132 : }
1133 :
1134 0 : return 0;
1135 0 : }
1136 :
1137 0 : static int kbl_init_workarounds(struct intel_engine_cs *ring)
1138 : {
1139 0 : struct drm_device *dev = ring->dev;
1140 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1141 : int ret;
1142 :
1143 0 : ret = gen9_init_workarounds(ring);
1144 0 : if (ret)
1145 0 : return ret;
1146 :
1147 : /* WaEnableGapsTsvCreditFix:kbl */
1148 0 : I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
1149 : GEN9_GAPS_TSV_CREDIT_DISABLE));
1150 :
1151 : /* WaDisableDynamicCreditSharing:kbl */
1152 0 : if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
1153 0 : WA_SET_BIT(GAMT_CHKN_BIT_REG,
1154 : GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
1155 :
1156 : /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */
1157 0 : if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0))
1158 0 : WA_SET_BIT_MASKED(HDC_CHICKEN0,
1159 : HDC_FENCE_DEST_SLM_DISABLE);
1160 :
1161 : /* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes
1162 : * involving this register should also be added to WA batch as required.
1163 : */
1164 0 : if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0))
1165 : /* WaDisableLSQCROPERFforOCL:kbl */
1166 0 : I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
1167 : GEN8_LQSC_RO_PERF_DIS);
1168 :
1169 : /* WaToEnableHwFixForPushConstHWBug:kbl */
1170 0 : if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER))
1171 0 : WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1172 : GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1173 :
1174 : /* WaDisableGafsUnitClkGating:kbl */
1175 0 : WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1176 :
1177 : /* WaDisableSbeCacheDispatchPortSharing:kbl */
1178 0 : WA_SET_BIT_MASKED(
1179 : GEN7_HALF_SLICE_CHICKEN1,
1180 : GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1181 :
1182 : /* WaInPlaceDecompressionHang:kbl */
1183 0 : WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
1184 : GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1185 :
1186 : #ifdef notyet
1187 : /* WaDisableLSQCROPERFforOCL:kbl */
1188 : ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
1189 : if (ret)
1190 : return ret;
1191 : #endif
1192 :
1193 0 : return 0;
1194 0 : }
1195 :
1196 0 : int init_workarounds_ring(struct intel_engine_cs *ring)
1197 : {
1198 0 : struct drm_device *dev = ring->dev;
1199 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1200 :
1201 0 : WARN_ON(ring->id != RCS);
1202 :
1203 0 : dev_priv->workarounds.count = 0;
1204 :
1205 0 : if (IS_BROADWELL(dev))
1206 0 : return bdw_init_workarounds(ring);
1207 :
1208 0 : if (IS_CHERRYVIEW(dev))
1209 0 : return chv_init_workarounds(ring);
1210 :
1211 0 : if (IS_SKYLAKE(dev))
1212 0 : return skl_init_workarounds(ring);
1213 :
1214 0 : if (IS_BROXTON(dev))
1215 0 : return bxt_init_workarounds(ring);
1216 :
1217 0 : if (IS_KABYLAKE(dev))
1218 0 : return kbl_init_workarounds(ring);
1219 :
1220 0 : return 0;
1221 0 : }
1222 :
1223 0 : static int init_render_ring(struct intel_engine_cs *ring)
1224 : {
1225 0 : struct drm_device *dev = ring->dev;
1226 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1227 0 : int ret = init_ring_common(ring);
1228 0 : if (ret)
1229 0 : return ret;
1230 :
1231 : /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
1232 0 : if (INTEL_INFO(dev)->gen >= 4 && INTEL_INFO(dev)->gen < 7)
1233 0 : I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
1234 :
1235 : /* We need to disable the AsyncFlip performance optimisations in order
1236 : * to use MI_WAIT_FOR_EVENT within the CS. It should already be
1237 : * programmed to '1' on all products.
1238 : *
1239 : * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
1240 : */
1241 0 : if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
1242 0 : I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
1243 :
1244 : /* Required for the hardware to program scanline values for waiting */
1245 : /* WaEnableFlushTlbInvalidationMode:snb */
1246 0 : if (INTEL_INFO(dev)->gen == 6)
1247 0 : I915_WRITE(GFX_MODE,
1248 : _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
1249 :
1250 : /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
1251 0 : if (IS_GEN7(dev))
1252 0 : I915_WRITE(GFX_MODE_GEN7,
1253 : _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
1254 : _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
1255 :
1256 0 : if (IS_GEN6(dev)) {
1257 : /* From the Sandybridge PRM, volume 1 part 3, page 24:
1258 : * "If this bit is set, STCunit will have LRA as replacement
1259 : * policy. [...] This bit must be reset. LRA replacement
1260 : * policy is not supported."
1261 : */
1262 0 : I915_WRITE(CACHE_MODE_0,
1263 : _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
1264 0 : }
1265 :
1266 0 : if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
1267 0 : I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
1268 :
1269 0 : if (HAS_L3_DPF(dev))
1270 0 : I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
1271 :
1272 0 : return init_workarounds_ring(ring);
1273 0 : }
1274 :
1275 0 : static void render_ring_cleanup(struct intel_engine_cs *ring)
1276 : {
1277 0 : struct drm_device *dev = ring->dev;
1278 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1279 :
1280 0 : if (dev_priv->semaphore_obj) {
1281 0 : i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj);
1282 0 : drm_gem_object_unreference(&dev_priv->semaphore_obj->base);
1283 0 : dev_priv->semaphore_obj = NULL;
1284 0 : }
1285 :
1286 0 : intel_fini_pipe_control(ring);
1287 0 : }
1288 :
1289 0 : static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
1290 : unsigned int num_dwords)
1291 : {
1292 : #define MBOX_UPDATE_DWORDS 8
1293 0 : struct intel_engine_cs *signaller = signaller_req->ring;
1294 0 : struct drm_device *dev = signaller->dev;
1295 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1296 : struct intel_engine_cs *waiter;
1297 : int i, ret, num_rings;
1298 :
1299 0 : num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
1300 0 : num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
1301 : #undef MBOX_UPDATE_DWORDS
1302 :
1303 0 : ret = intel_ring_begin(signaller_req, num_dwords);
1304 0 : if (ret)
1305 0 : return ret;
1306 :
1307 0 : for_each_ring(waiter, dev_priv, i) {
1308 : u32 seqno;
1309 0 : u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
1310 0 : if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
1311 0 : continue;
1312 :
1313 0 : seqno = i915_gem_request_get_seqno(signaller_req);
1314 0 : intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
1315 0 : intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
1316 : PIPE_CONTROL_QW_WRITE |
1317 : PIPE_CONTROL_FLUSH_ENABLE);
1318 0 : intel_ring_emit(signaller, lower_32_bits(gtt_offset));
1319 0 : intel_ring_emit(signaller, upper_32_bits(gtt_offset));
1320 0 : intel_ring_emit(signaller, seqno);
1321 0 : intel_ring_emit(signaller, 0);
1322 0 : intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
1323 0 : MI_SEMAPHORE_TARGET(waiter->id));
1324 0 : intel_ring_emit(signaller, 0);
1325 0 : }
1326 :
1327 0 : return 0;
1328 0 : }
1329 :
1330 0 : static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
1331 : unsigned int num_dwords)
1332 : {
1333 : #define MBOX_UPDATE_DWORDS 6
1334 0 : struct intel_engine_cs *signaller = signaller_req->ring;
1335 0 : struct drm_device *dev = signaller->dev;
1336 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1337 : struct intel_engine_cs *waiter;
1338 : int i, ret, num_rings;
1339 :
1340 0 : num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
1341 0 : num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
1342 : #undef MBOX_UPDATE_DWORDS
1343 :
1344 0 : ret = intel_ring_begin(signaller_req, num_dwords);
1345 0 : if (ret)
1346 0 : return ret;
1347 :
1348 0 : for_each_ring(waiter, dev_priv, i) {
1349 : u32 seqno;
1350 0 : u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
1351 0 : if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
1352 0 : continue;
1353 :
1354 0 : seqno = i915_gem_request_get_seqno(signaller_req);
1355 0 : intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
1356 : MI_FLUSH_DW_OP_STOREDW);
1357 0 : intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
1358 : MI_FLUSH_DW_USE_GTT);
1359 0 : intel_ring_emit(signaller, upper_32_bits(gtt_offset));
1360 0 : intel_ring_emit(signaller, seqno);
1361 0 : intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
1362 0 : MI_SEMAPHORE_TARGET(waiter->id));
1363 0 : intel_ring_emit(signaller, 0);
1364 0 : }
1365 :
1366 0 : return 0;
1367 0 : }
1368 :
1369 0 : static int gen6_signal(struct drm_i915_gem_request *signaller_req,
1370 : unsigned int num_dwords)
1371 : {
1372 0 : struct intel_engine_cs *signaller = signaller_req->ring;
1373 0 : struct drm_device *dev = signaller->dev;
1374 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1375 : struct intel_engine_cs *useless;
1376 : int i, ret, num_rings;
1377 :
1378 : #define MBOX_UPDATE_DWORDS 3
1379 0 : num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
1380 0 : num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
1381 : #undef MBOX_UPDATE_DWORDS
1382 :
1383 0 : ret = intel_ring_begin(signaller_req, num_dwords);
1384 0 : if (ret)
1385 0 : return ret;
1386 :
1387 0 : for_each_ring(useless, dev_priv, i) {
1388 0 : u32 mbox_reg = signaller->semaphore.mbox.signal[i];
1389 0 : if (mbox_reg != GEN6_NOSYNC) {
1390 0 : u32 seqno = i915_gem_request_get_seqno(signaller_req);
1391 0 : intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
1392 0 : intel_ring_emit(signaller, mbox_reg);
1393 0 : intel_ring_emit(signaller, seqno);
1394 0 : }
1395 0 : }
1396 :
1397 : /* If num_dwords was rounded, make sure the tail pointer is correct */
1398 0 : if (num_rings % 2 == 0)
1399 0 : intel_ring_emit(signaller, MI_NOOP);
1400 :
1401 0 : return 0;
1402 0 : }
1403 :
1404 : /**
1405 : * gen6_add_request - Update the semaphore mailbox registers
1406 : *
1407 : * @request - request to write to the ring
1408 : *
1409 : * Update the mailbox registers in the *other* rings with the current seqno.
1410 : * This acts like a signal in the canonical semaphore.
1411 : */
1412 : static int
1413 0 : gen6_add_request(struct drm_i915_gem_request *req)
1414 : {
1415 0 : struct intel_engine_cs *ring = req->ring;
1416 : int ret;
1417 :
1418 0 : if (ring->semaphore.signal)
1419 0 : ret = ring->semaphore.signal(req, 4);
1420 : else
1421 0 : ret = intel_ring_begin(req, 4);
1422 :
1423 0 : if (ret)
1424 0 : return ret;
1425 :
1426 0 : intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
1427 0 : intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
1428 0 : intel_ring_emit(ring, i915_gem_request_get_seqno(req));
1429 0 : intel_ring_emit(ring, MI_USER_INTERRUPT);
1430 0 : __intel_ring_advance(ring);
1431 :
1432 0 : return 0;
1433 0 : }
1434 :
1435 0 : static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev,
1436 : u32 seqno)
1437 : {
1438 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1439 0 : return dev_priv->last_seqno < seqno;
1440 : }
1441 :
1442 : /**
1443 : * intel_ring_sync - sync the waiter to the signaller on seqno
1444 : *
1445 : * @waiter - ring that is waiting
1446 : * @signaller - ring which has, or will signal
1447 : * @seqno - seqno which the waiter will block on
1448 : */
1449 :
1450 : static int
1451 0 : gen8_ring_sync(struct drm_i915_gem_request *waiter_req,
1452 : struct intel_engine_cs *signaller,
1453 : u32 seqno)
1454 : {
1455 0 : struct intel_engine_cs *waiter = waiter_req->ring;
1456 0 : struct drm_i915_private *dev_priv = waiter->dev->dev_private;
1457 : int ret;
1458 :
1459 0 : ret = intel_ring_begin(waiter_req, 4);
1460 0 : if (ret)
1461 0 : return ret;
1462 :
1463 0 : intel_ring_emit(waiter, MI_SEMAPHORE_WAIT |
1464 : MI_SEMAPHORE_GLOBAL_GTT |
1465 : MI_SEMAPHORE_POLL |
1466 : MI_SEMAPHORE_SAD_GTE_SDD);
1467 0 : intel_ring_emit(waiter, seqno);
1468 0 : intel_ring_emit(waiter,
1469 0 : lower_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
1470 0 : intel_ring_emit(waiter,
1471 0 : upper_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
1472 0 : intel_ring_advance(waiter);
1473 0 : return 0;
1474 0 : }
1475 :
1476 : static int
1477 0 : gen6_ring_sync(struct drm_i915_gem_request *waiter_req,
1478 : struct intel_engine_cs *signaller,
1479 : u32 seqno)
1480 : {
1481 0 : struct intel_engine_cs *waiter = waiter_req->ring;
1482 : u32 dw1 = MI_SEMAPHORE_MBOX |
1483 : MI_SEMAPHORE_COMPARE |
1484 : MI_SEMAPHORE_REGISTER;
1485 0 : u32 wait_mbox = signaller->semaphore.mbox.wait[waiter->id];
1486 : int ret;
1487 :
1488 : /* Throughout all of the GEM code, seqno passed implies our current
1489 : * seqno is >= the last seqno executed. However for hardware the
1490 : * comparison is strictly greater than.
1491 : */
1492 0 : seqno -= 1;
1493 :
1494 0 : WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
1495 :
1496 0 : ret = intel_ring_begin(waiter_req, 4);
1497 0 : if (ret)
1498 0 : return ret;
1499 :
1500 : /* If seqno wrap happened, omit the wait with no-ops */
1501 0 : if (likely(!i915_gem_has_seqno_wrapped(waiter->dev, seqno))) {
1502 0 : intel_ring_emit(waiter, dw1 | wait_mbox);
1503 0 : intel_ring_emit(waiter, seqno);
1504 0 : intel_ring_emit(waiter, 0);
1505 0 : intel_ring_emit(waiter, MI_NOOP);
1506 0 : } else {
1507 0 : intel_ring_emit(waiter, MI_NOOP);
1508 0 : intel_ring_emit(waiter, MI_NOOP);
1509 0 : intel_ring_emit(waiter, MI_NOOP);
1510 0 : intel_ring_emit(waiter, MI_NOOP);
1511 : }
1512 0 : intel_ring_advance(waiter);
1513 :
1514 0 : return 0;
1515 0 : }
1516 :
1517 : #define PIPE_CONTROL_FLUSH(ring__, addr__) \
1518 : do { \
1519 : intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | \
1520 : PIPE_CONTROL_DEPTH_STALL); \
1521 : intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \
1522 : intel_ring_emit(ring__, 0); \
1523 : intel_ring_emit(ring__, 0); \
1524 : } while (0)
1525 :
1526 : static int
1527 0 : pc_render_add_request(struct drm_i915_gem_request *req)
1528 : {
1529 0 : struct intel_engine_cs *ring = req->ring;
1530 0 : u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
1531 : int ret;
1532 :
1533 : /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
1534 : * incoherent with writes to memory, i.e. completely fubar,
1535 : * so we need to use PIPE_NOTIFY instead.
1536 : *
1537 : * However, we also need to workaround the qword write
1538 : * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
1539 : * memory before requesting an interrupt.
1540 : */
1541 0 : ret = intel_ring_begin(req, 32);
1542 0 : if (ret)
1543 0 : return ret;
1544 :
1545 0 : intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
1546 : PIPE_CONTROL_WRITE_FLUSH |
1547 : PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
1548 0 : intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
1549 0 : intel_ring_emit(ring, i915_gem_request_get_seqno(req));
1550 0 : intel_ring_emit(ring, 0);
1551 0 : PIPE_CONTROL_FLUSH(ring, scratch_addr);
1552 0 : scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
1553 0 : PIPE_CONTROL_FLUSH(ring, scratch_addr);
1554 0 : scratch_addr += 2 * CACHELINE_BYTES;
1555 0 : PIPE_CONTROL_FLUSH(ring, scratch_addr);
1556 0 : scratch_addr += 2 * CACHELINE_BYTES;
1557 0 : PIPE_CONTROL_FLUSH(ring, scratch_addr);
1558 0 : scratch_addr += 2 * CACHELINE_BYTES;
1559 0 : PIPE_CONTROL_FLUSH(ring, scratch_addr);
1560 0 : scratch_addr += 2 * CACHELINE_BYTES;
1561 0 : PIPE_CONTROL_FLUSH(ring, scratch_addr);
1562 :
1563 0 : intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
1564 : PIPE_CONTROL_WRITE_FLUSH |
1565 : PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
1566 : PIPE_CONTROL_NOTIFY);
1567 0 : intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
1568 0 : intel_ring_emit(ring, i915_gem_request_get_seqno(req));
1569 0 : intel_ring_emit(ring, 0);
1570 0 : __intel_ring_advance(ring);
1571 :
1572 0 : return 0;
1573 0 : }
1574 :
1575 : static u32
1576 0 : gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
1577 : {
1578 : /* Workaround to force correct ordering between irq and seqno writes on
1579 : * ivb (and maybe also on snb) by reading from a CS register (like
1580 : * ACTHD) before reading the status page. */
1581 0 : if (!lazy_coherency) {
1582 0 : struct drm_i915_private *dev_priv = ring->dev->dev_private;
1583 0 : POSTING_READ(RING_ACTHD(ring->mmio_base));
1584 0 : }
1585 :
1586 0 : return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
1587 : }
1588 :
1589 : static u32
1590 0 : ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
1591 : {
1592 0 : return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
1593 : }
1594 :
1595 : static void
1596 0 : ring_set_seqno(struct intel_engine_cs *ring, u32 seqno)
1597 : {
1598 0 : intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
1599 0 : }
1600 :
1601 : static u32
1602 0 : pc_render_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
1603 : {
1604 0 : return ring->scratch.cpu_page[0];
1605 : }
1606 :
1607 : static void
1608 0 : pc_render_set_seqno(struct intel_engine_cs *ring, u32 seqno)
1609 : {
1610 0 : ring->scratch.cpu_page[0] = seqno;
1611 0 : }
1612 :
1613 : static bool
1614 0 : gen5_ring_get_irq(struct intel_engine_cs *ring)
1615 : {
1616 0 : struct drm_device *dev = ring->dev;
1617 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1618 : unsigned long flags;
1619 :
1620 0 : if (WARN_ON(!intel_irqs_enabled(dev_priv)))
1621 0 : return false;
1622 :
1623 0 : spin_lock_irqsave(&dev_priv->irq_lock, flags);
1624 0 : if (ring->irq_refcount++ == 0)
1625 0 : gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask);
1626 0 : spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1627 :
1628 0 : return true;
1629 0 : }
1630 :
1631 : static void
1632 0 : gen5_ring_put_irq(struct intel_engine_cs *ring)
1633 : {
1634 0 : struct drm_device *dev = ring->dev;
1635 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1636 : unsigned long flags;
1637 :
1638 0 : spin_lock_irqsave(&dev_priv->irq_lock, flags);
1639 0 : if (--ring->irq_refcount == 0)
1640 0 : gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
1641 0 : spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1642 0 : }
1643 :
1644 : static bool
1645 0 : i9xx_ring_get_irq(struct intel_engine_cs *ring)
1646 : {
1647 0 : struct drm_device *dev = ring->dev;
1648 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1649 : unsigned long flags;
1650 :
1651 0 : if (!intel_irqs_enabled(dev_priv))
1652 0 : return false;
1653 :
1654 0 : spin_lock_irqsave(&dev_priv->irq_lock, flags);
1655 0 : if (ring->irq_refcount++ == 0) {
1656 0 : dev_priv->irq_mask &= ~ring->irq_enable_mask;
1657 0 : I915_WRITE(IMR, dev_priv->irq_mask);
1658 0 : POSTING_READ(IMR);
1659 0 : }
1660 0 : spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1661 :
1662 0 : return true;
1663 0 : }
1664 :
1665 : static void
1666 0 : i9xx_ring_put_irq(struct intel_engine_cs *ring)
1667 : {
1668 0 : struct drm_device *dev = ring->dev;
1669 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1670 : unsigned long flags;
1671 :
1672 0 : spin_lock_irqsave(&dev_priv->irq_lock, flags);
1673 0 : if (--ring->irq_refcount == 0) {
1674 0 : dev_priv->irq_mask |= ring->irq_enable_mask;
1675 0 : I915_WRITE(IMR, dev_priv->irq_mask);
1676 0 : POSTING_READ(IMR);
1677 0 : }
1678 0 : spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1679 0 : }
1680 :
1681 : static bool
1682 0 : i8xx_ring_get_irq(struct intel_engine_cs *ring)
1683 : {
1684 0 : struct drm_device *dev = ring->dev;
1685 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1686 : unsigned long flags;
1687 :
1688 0 : if (!intel_irqs_enabled(dev_priv))
1689 0 : return false;
1690 :
1691 0 : spin_lock_irqsave(&dev_priv->irq_lock, flags);
1692 0 : if (ring->irq_refcount++ == 0) {
1693 0 : dev_priv->irq_mask &= ~ring->irq_enable_mask;
1694 0 : I915_WRITE16(IMR, dev_priv->irq_mask);
1695 0 : POSTING_READ16(IMR);
1696 0 : }
1697 0 : spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1698 :
1699 0 : return true;
1700 0 : }
1701 :
1702 : static void
1703 0 : i8xx_ring_put_irq(struct intel_engine_cs *ring)
1704 : {
1705 0 : struct drm_device *dev = ring->dev;
1706 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1707 : unsigned long flags;
1708 :
1709 0 : spin_lock_irqsave(&dev_priv->irq_lock, flags);
1710 0 : if (--ring->irq_refcount == 0) {
1711 0 : dev_priv->irq_mask |= ring->irq_enable_mask;
1712 0 : I915_WRITE16(IMR, dev_priv->irq_mask);
1713 0 : POSTING_READ16(IMR);
1714 0 : }
1715 0 : spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1716 0 : }
1717 :
1718 : static int
1719 0 : bsd_ring_flush(struct drm_i915_gem_request *req,
1720 : u32 invalidate_domains,
1721 : u32 flush_domains)
1722 : {
1723 0 : struct intel_engine_cs *ring = req->ring;
1724 : int ret;
1725 :
1726 0 : ret = intel_ring_begin(req, 2);
1727 0 : if (ret)
1728 0 : return ret;
1729 :
1730 0 : intel_ring_emit(ring, MI_FLUSH);
1731 0 : intel_ring_emit(ring, MI_NOOP);
1732 0 : intel_ring_advance(ring);
1733 0 : return 0;
1734 0 : }
1735 :
1736 : static int
1737 0 : i9xx_add_request(struct drm_i915_gem_request *req)
1738 : {
1739 0 : struct intel_engine_cs *ring = req->ring;
1740 : int ret;
1741 :
1742 0 : ret = intel_ring_begin(req, 4);
1743 0 : if (ret)
1744 0 : return ret;
1745 :
1746 0 : intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
1747 0 : intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
1748 0 : intel_ring_emit(ring, i915_gem_request_get_seqno(req));
1749 0 : intel_ring_emit(ring, MI_USER_INTERRUPT);
1750 0 : __intel_ring_advance(ring);
1751 :
1752 0 : return 0;
1753 0 : }
1754 :
1755 : static bool
1756 0 : gen6_ring_get_irq(struct intel_engine_cs *ring)
1757 : {
1758 0 : struct drm_device *dev = ring->dev;
1759 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1760 : unsigned long flags;
1761 :
1762 0 : if (WARN_ON(!intel_irqs_enabled(dev_priv)))
1763 0 : return false;
1764 :
1765 0 : spin_lock_irqsave(&dev_priv->irq_lock, flags);
1766 0 : if (ring->irq_refcount++ == 0) {
1767 0 : if (HAS_L3_DPF(dev) && ring->id == RCS)
1768 0 : I915_WRITE_IMR(ring,
1769 : ~(ring->irq_enable_mask |
1770 : GT_PARITY_ERROR(dev)));
1771 : else
1772 0 : I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
1773 0 : gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask);
1774 0 : }
1775 0 : spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1776 :
1777 0 : return true;
1778 0 : }
1779 :
1780 : static void
1781 0 : gen6_ring_put_irq(struct intel_engine_cs *ring)
1782 : {
1783 0 : struct drm_device *dev = ring->dev;
1784 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1785 : unsigned long flags;
1786 :
1787 0 : spin_lock_irqsave(&dev_priv->irq_lock, flags);
1788 0 : if (--ring->irq_refcount == 0) {
1789 0 : if (HAS_L3_DPF(dev) && ring->id == RCS)
1790 0 : I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
1791 : else
1792 0 : I915_WRITE_IMR(ring, ~0);
1793 0 : gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
1794 0 : }
1795 0 : spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1796 0 : }
1797 :
1798 : static bool
1799 0 : hsw_vebox_get_irq(struct intel_engine_cs *ring)
1800 : {
1801 0 : struct drm_device *dev = ring->dev;
1802 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1803 : unsigned long flags;
1804 :
1805 0 : if (WARN_ON(!intel_irqs_enabled(dev_priv)))
1806 0 : return false;
1807 :
1808 0 : spin_lock_irqsave(&dev_priv->irq_lock, flags);
1809 0 : if (ring->irq_refcount++ == 0) {
1810 0 : I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
1811 0 : gen6_enable_pm_irq(dev_priv, ring->irq_enable_mask);
1812 0 : }
1813 0 : spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1814 :
1815 0 : return true;
1816 0 : }
1817 :
1818 : static void
1819 0 : hsw_vebox_put_irq(struct intel_engine_cs *ring)
1820 : {
1821 0 : struct drm_device *dev = ring->dev;
1822 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1823 : unsigned long flags;
1824 :
1825 0 : spin_lock_irqsave(&dev_priv->irq_lock, flags);
1826 0 : if (--ring->irq_refcount == 0) {
1827 0 : I915_WRITE_IMR(ring, ~0);
1828 0 : gen6_disable_pm_irq(dev_priv, ring->irq_enable_mask);
1829 0 : }
1830 0 : spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1831 0 : }
1832 :
1833 : static bool
1834 0 : gen8_ring_get_irq(struct intel_engine_cs *ring)
1835 : {
1836 0 : struct drm_device *dev = ring->dev;
1837 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1838 : unsigned long flags;
1839 :
1840 0 : if (WARN_ON(!intel_irqs_enabled(dev_priv)))
1841 0 : return false;
1842 :
1843 0 : spin_lock_irqsave(&dev_priv->irq_lock, flags);
1844 0 : if (ring->irq_refcount++ == 0) {
1845 0 : if (HAS_L3_DPF(dev) && ring->id == RCS) {
1846 0 : I915_WRITE_IMR(ring,
1847 : ~(ring->irq_enable_mask |
1848 : GT_RENDER_L3_PARITY_ERROR_INTERRUPT));
1849 0 : } else {
1850 0 : I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
1851 : }
1852 0 : POSTING_READ(RING_IMR(ring->mmio_base));
1853 0 : }
1854 0 : spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1855 :
1856 0 : return true;
1857 0 : }
1858 :
1859 : static void
1860 0 : gen8_ring_put_irq(struct intel_engine_cs *ring)
1861 : {
1862 0 : struct drm_device *dev = ring->dev;
1863 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1864 : unsigned long flags;
1865 :
1866 0 : spin_lock_irqsave(&dev_priv->irq_lock, flags);
1867 0 : if (--ring->irq_refcount == 0) {
1868 0 : if (HAS_L3_DPF(dev) && ring->id == RCS) {
1869 0 : I915_WRITE_IMR(ring,
1870 : ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
1871 0 : } else {
1872 0 : I915_WRITE_IMR(ring, ~0);
1873 : }
1874 0 : POSTING_READ(RING_IMR(ring->mmio_base));
1875 0 : }
1876 0 : spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1877 0 : }
1878 :
1879 : static int
1880 0 : i965_dispatch_execbuffer(struct drm_i915_gem_request *req,
1881 : u64 offset, u32 length,
1882 : unsigned dispatch_flags)
1883 : {
1884 0 : struct intel_engine_cs *ring = req->ring;
1885 : int ret;
1886 :
1887 0 : ret = intel_ring_begin(req, 2);
1888 0 : if (ret)
1889 0 : return ret;
1890 :
1891 0 : intel_ring_emit(ring,
1892 : MI_BATCH_BUFFER_START |
1893 0 : MI_BATCH_GTT |
1894 0 : (dispatch_flags & I915_DISPATCH_SECURE ?
1895 : 0 : MI_BATCH_NON_SECURE_I965));
1896 0 : intel_ring_emit(ring, offset);
1897 0 : intel_ring_advance(ring);
1898 :
1899 0 : return 0;
1900 0 : }
1901 :
1902 : /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
1903 : #define I830_BATCH_LIMIT (256*1024)
1904 : #define I830_TLB_ENTRIES (2)
1905 : #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
1906 : static int
1907 0 : i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
1908 : u64 offset, u32 len,
1909 : unsigned dispatch_flags)
1910 : {
1911 0 : struct intel_engine_cs *ring = req->ring;
1912 0 : u32 cs_offset = ring->scratch.gtt_offset;
1913 : int ret;
1914 :
1915 0 : ret = intel_ring_begin(req, 6);
1916 0 : if (ret)
1917 0 : return ret;
1918 :
1919 : /* Evict the invalid PTE TLBs */
1920 0 : intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA);
1921 0 : intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096);
1922 0 : intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */
1923 0 : intel_ring_emit(ring, cs_offset);
1924 0 : intel_ring_emit(ring, 0xdeadbeef);
1925 0 : intel_ring_emit(ring, MI_NOOP);
1926 0 : intel_ring_advance(ring);
1927 :
1928 0 : if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
1929 0 : if (len > I830_BATCH_LIMIT)
1930 0 : return -ENOSPC;
1931 :
1932 0 : ret = intel_ring_begin(req, 6 + 2);
1933 0 : if (ret)
1934 0 : return ret;
1935 :
1936 : /* Blit the batch (which has now all relocs applied) to the
1937 : * stable batch scratch bo area (so that the CS never
1938 : * stumbles over its tlb invalidation bug) ...
1939 : */
1940 0 : intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA);
1941 0 : intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096);
1942 0 : intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 4096);
1943 0 : intel_ring_emit(ring, cs_offset);
1944 0 : intel_ring_emit(ring, 4096);
1945 0 : intel_ring_emit(ring, offset);
1946 :
1947 0 : intel_ring_emit(ring, MI_FLUSH);
1948 0 : intel_ring_emit(ring, MI_NOOP);
1949 0 : intel_ring_advance(ring);
1950 :
1951 : /* ... and execute it. */
1952 0 : offset = cs_offset;
1953 0 : }
1954 :
1955 0 : ret = intel_ring_begin(req, 4);
1956 0 : if (ret)
1957 0 : return ret;
1958 :
1959 0 : intel_ring_emit(ring, MI_BATCH_BUFFER);
1960 0 : intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
1961 : 0 : MI_BATCH_NON_SECURE));
1962 0 : intel_ring_emit(ring, offset + len - 8);
1963 0 : intel_ring_emit(ring, MI_NOOP);
1964 0 : intel_ring_advance(ring);
1965 :
1966 0 : return 0;
1967 0 : }
1968 :
1969 : static int
1970 0 : i915_dispatch_execbuffer(struct drm_i915_gem_request *req,
1971 : u64 offset, u32 len,
1972 : unsigned dispatch_flags)
1973 : {
1974 0 : struct intel_engine_cs *ring = req->ring;
1975 : int ret;
1976 :
1977 0 : ret = intel_ring_begin(req, 2);
1978 0 : if (ret)
1979 0 : return ret;
1980 :
1981 0 : intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
1982 0 : intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
1983 : 0 : MI_BATCH_NON_SECURE));
1984 0 : intel_ring_advance(ring);
1985 :
1986 0 : return 0;
1987 0 : }
1988 :
1989 0 : static void cleanup_phys_status_page(struct intel_engine_cs *ring)
1990 : {
1991 0 : struct drm_i915_private *dev_priv = to_i915(ring->dev);
1992 :
1993 0 : if (!dev_priv->status_page_dmah)
1994 0 : return;
1995 :
1996 0 : drm_pci_free(ring->dev, dev_priv->status_page_dmah);
1997 0 : ring->status_page.page_addr = NULL;
1998 0 : }
1999 :
2000 0 : static void cleanup_status_page(struct intel_engine_cs *ring)
2001 : {
2002 : struct drm_i915_gem_object *obj;
2003 :
2004 0 : obj = ring->status_page.obj;
2005 0 : if (obj == NULL)
2006 0 : return;
2007 :
2008 0 : kunmap(sg_page(obj->pages->sgl));
2009 0 : i915_gem_object_ggtt_unpin(obj);
2010 0 : drm_gem_object_unreference(&obj->base);
2011 0 : ring->status_page.obj = NULL;
2012 0 : }
2013 :
2014 0 : static int init_status_page(struct intel_engine_cs *ring)
2015 : {
2016 0 : struct drm_i915_gem_object *obj = ring->status_page.obj;
2017 :
2018 0 : if (obj == NULL) {
2019 : unsigned flags;
2020 : int ret;
2021 :
2022 0 : obj = i915_gem_alloc_object(ring->dev, 4096);
2023 0 : if (obj == NULL) {
2024 0 : DRM_ERROR("Failed to allocate status page\n");
2025 0 : return -ENOMEM;
2026 : }
2027 :
2028 0 : ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
2029 0 : if (ret)
2030 : goto err_unref;
2031 :
2032 : flags = 0;
2033 0 : if (!HAS_LLC(ring->dev))
2034 : /* On g33, we cannot place HWS above 256MiB, so
2035 : * restrict its pinning to the low mappable arena.
2036 : * Though this restriction is not documented for
2037 : * gen4, gen5, or byt, they also behave similarly
2038 : * and hang if the HWS is placed at the top of the
2039 : * GTT. To generalise, it appears that all !llc
2040 : * platforms have issues with us placing the HWS
2041 : * above the mappable region (even though we never
2042 : * actualy map it).
2043 : */
2044 0 : flags |= PIN_MAPPABLE;
2045 0 : ret = i915_gem_obj_ggtt_pin(obj, 4096, flags);
2046 0 : if (ret) {
2047 : err_unref:
2048 0 : drm_gem_object_unreference(&obj->base);
2049 0 : return ret;
2050 : }
2051 :
2052 0 : ring->status_page.obj = obj;
2053 0 : }
2054 :
2055 0 : ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
2056 0 : ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
2057 0 : memset(ring->status_page.page_addr, 0, PAGE_SIZE);
2058 :
2059 : DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
2060 : ring->name, ring->status_page.gfx_addr);
2061 :
2062 0 : return 0;
2063 0 : }
2064 :
2065 0 : static int init_phys_status_page(struct intel_engine_cs *ring)
2066 : {
2067 0 : struct drm_i915_private *dev_priv = ring->dev->dev_private;
2068 :
2069 0 : if (!dev_priv->status_page_dmah) {
2070 0 : dev_priv->status_page_dmah =
2071 0 : drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE);
2072 0 : if (!dev_priv->status_page_dmah)
2073 0 : return -ENOMEM;
2074 : }
2075 :
2076 0 : ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
2077 0 : memset(ring->status_page.page_addr, 0, PAGE_SIZE);
2078 :
2079 0 : return 0;
2080 0 : }
2081 :
2082 0 : void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
2083 : {
2084 0 : struct drm_device *dev = ringbuf->obj->base.dev;
2085 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2086 :
2087 0 : agp_unmap_subregion(dev_priv->agph, ringbuf->bsh, ringbuf->size);
2088 0 : ringbuf->virtual_start = NULL;
2089 0 : i915_gem_object_ggtt_unpin(ringbuf->obj);
2090 0 : }
2091 :
2092 0 : int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
2093 : struct intel_ringbuffer *ringbuf)
2094 : {
2095 0 : struct drm_i915_private *dev_priv = to_i915(dev);
2096 0 : struct drm_i915_gem_object *obj = ringbuf->obj;
2097 : int ret;
2098 :
2099 0 : ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
2100 0 : if (ret)
2101 0 : return ret;
2102 :
2103 0 : ret = i915_gem_object_set_to_gtt_domain(obj, true);
2104 0 : if (ret) {
2105 0 : i915_gem_object_ggtt_unpin(obj);
2106 0 : return ret;
2107 : }
2108 :
2109 0 : if (agp_map_subregion(dev_priv->agph, i915_gem_obj_ggtt_offset(obj),
2110 0 : ringbuf->size, &ringbuf->bsh) != 0) {
2111 0 : i915_gem_object_ggtt_unpin(obj);
2112 0 : return -EINVAL;
2113 : }
2114 0 : ringbuf->virtual_start = bus_space_vaddr(dev_priv->bst, ringbuf->bsh);
2115 :
2116 0 : return 0;
2117 0 : }
2118 :
2119 0 : static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
2120 : {
2121 0 : drm_gem_object_unreference(&ringbuf->obj->base);
2122 0 : ringbuf->obj = NULL;
2123 0 : }
2124 :
2125 0 : static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
2126 : struct intel_ringbuffer *ringbuf)
2127 : {
2128 : struct drm_i915_gem_object *obj;
2129 :
2130 : obj = NULL;
2131 0 : if (!HAS_LLC(dev))
2132 0 : obj = i915_gem_object_create_stolen(dev, ringbuf->size);
2133 0 : if (obj == NULL)
2134 0 : obj = i915_gem_alloc_object(dev, ringbuf->size);
2135 0 : if (obj == NULL)
2136 0 : return -ENOMEM;
2137 :
2138 : /* mark ring buffers as read-only from GPU side by default */
2139 0 : obj->gt_ro = 1;
2140 :
2141 0 : ringbuf->obj = obj;
2142 :
2143 0 : return 0;
2144 0 : }
2145 :
2146 : struct intel_ringbuffer *
2147 0 : intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size)
2148 : {
2149 : struct intel_ringbuffer *ring;
2150 : int ret;
2151 :
2152 0 : ring = kzalloc(sizeof(*ring), GFP_KERNEL);
2153 0 : if (ring == NULL)
2154 0 : return ERR_PTR(-ENOMEM);
2155 :
2156 0 : ring->ring = engine;
2157 :
2158 0 : ring->size = size;
2159 : /* Workaround an erratum on the i830 which causes a hang if
2160 : * the TAIL pointer points to within the last 2 cachelines
2161 : * of the buffer.
2162 : */
2163 0 : ring->effective_size = size;
2164 0 : if (IS_I830(engine->dev) || IS_845G(engine->dev))
2165 0 : ring->effective_size -= 2 * CACHELINE_BYTES;
2166 :
2167 0 : ring->last_retired_head = -1;
2168 0 : intel_ring_update_space(ring);
2169 :
2170 0 : ret = intel_alloc_ringbuffer_obj(engine->dev, ring);
2171 0 : if (ret) {
2172 0 : DRM_ERROR("Failed to allocate ringbuffer %s: %d\n",
2173 : engine->name, ret);
2174 0 : kfree(ring);
2175 0 : return ERR_PTR(ret);
2176 : }
2177 :
2178 0 : return ring;
2179 0 : }
2180 :
2181 : void
2182 0 : intel_ringbuffer_free(struct intel_ringbuffer *ring)
2183 : {
2184 0 : intel_destroy_ringbuffer_obj(ring);
2185 0 : kfree(ring);
2186 0 : }
2187 :
2188 0 : static int intel_init_ring_buffer(struct drm_device *dev,
2189 : struct intel_engine_cs *ring)
2190 : {
2191 : struct intel_ringbuffer *ringbuf;
2192 : int ret;
2193 :
2194 0 : WARN_ON(ring->buffer);
2195 :
2196 0 : ring->dev = dev;
2197 0 : INIT_LIST_HEAD(&ring->active_list);
2198 0 : INIT_LIST_HEAD(&ring->request_list);
2199 0 : INIT_LIST_HEAD(&ring->execlist_queue);
2200 0 : i915_gem_batch_pool_init(dev, &ring->batch_pool);
2201 0 : memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
2202 :
2203 0 : init_waitqueue_head(&ring->irq_queue);
2204 :
2205 0 : ringbuf = intel_engine_create_ringbuffer(ring, 32 * PAGE_SIZE);
2206 0 : if (IS_ERR(ringbuf))
2207 0 : return PTR_ERR(ringbuf);
2208 0 : ring->buffer = ringbuf;
2209 :
2210 0 : if (I915_NEED_GFX_HWS(dev)) {
2211 0 : ret = init_status_page(ring);
2212 0 : if (ret)
2213 : goto error;
2214 : } else {
2215 0 : WARN_ON(ring->id != RCS);
2216 0 : ret = init_phys_status_page(ring);
2217 0 : if (ret)
2218 : goto error;
2219 : }
2220 :
2221 0 : ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
2222 0 : if (ret) {
2223 0 : DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
2224 : ring->name, ret);
2225 0 : intel_destroy_ringbuffer_obj(ringbuf);
2226 0 : goto error;
2227 : }
2228 :
2229 0 : ret = i915_cmd_parser_init_ring(ring);
2230 0 : if (ret)
2231 : goto error;
2232 :
2233 0 : return 0;
2234 :
2235 : error:
2236 0 : intel_ringbuffer_free(ringbuf);
2237 0 : ring->buffer = NULL;
2238 0 : return ret;
2239 0 : }
2240 :
2241 0 : void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
2242 : {
2243 : struct drm_i915_private *dev_priv;
2244 :
2245 0 : if (!intel_ring_initialized(ring))
2246 0 : return;
2247 :
2248 0 : dev_priv = to_i915(ring->dev);
2249 :
2250 0 : intel_stop_ring_buffer(ring);
2251 0 : WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
2252 :
2253 0 : intel_unpin_ringbuffer_obj(ring->buffer);
2254 0 : intel_ringbuffer_free(ring->buffer);
2255 0 : ring->buffer = NULL;
2256 :
2257 0 : if (ring->cleanup)
2258 0 : ring->cleanup(ring);
2259 :
2260 0 : if (I915_NEED_GFX_HWS(ring->dev)) {
2261 0 : cleanup_status_page(ring);
2262 0 : } else {
2263 0 : WARN_ON(ring->id != RCS);
2264 0 : cleanup_phys_status_page(ring);
2265 : }
2266 :
2267 0 : i915_cmd_parser_fini_ring(ring);
2268 0 : i915_gem_batch_pool_fini(&ring->batch_pool);
2269 0 : }
2270 :
2271 0 : static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
2272 : {
2273 0 : struct intel_ringbuffer *ringbuf = ring->buffer;
2274 : struct drm_i915_gem_request *request;
2275 : unsigned space;
2276 : int ret;
2277 :
2278 0 : if (intel_ring_space(ringbuf) >= n)
2279 0 : return 0;
2280 :
2281 : /* The whole point of reserving space is to not wait! */
2282 0 : WARN_ON(ringbuf->reserved_in_use);
2283 :
2284 0 : list_for_each_entry(request, &ring->request_list, list) {
2285 0 : space = __intel_ring_space(request->postfix, ringbuf->tail,
2286 0 : ringbuf->size);
2287 0 : if (space >= n)
2288 : break;
2289 : }
2290 :
2291 0 : if (WARN_ON(&request->list == &ring->request_list))
2292 0 : return -ENOSPC;
2293 :
2294 0 : ret = i915_wait_request(request);
2295 0 : if (ret)
2296 0 : return ret;
2297 :
2298 0 : ringbuf->space = space;
2299 0 : return 0;
2300 0 : }
2301 :
2302 0 : static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
2303 : {
2304 : uint32_t __iomem *virt;
2305 0 : int rem = ringbuf->size - ringbuf->tail;
2306 :
2307 0 : virt = ringbuf->virtual_start + ringbuf->tail;
2308 0 : rem /= 4;
2309 0 : while (rem--)
2310 0 : iowrite32(MI_NOOP, virt++);
2311 :
2312 0 : ringbuf->tail = 0;
2313 0 : intel_ring_update_space(ringbuf);
2314 0 : }
2315 :
2316 0 : int intel_ring_idle(struct intel_engine_cs *ring)
2317 : {
2318 : struct drm_i915_gem_request *req;
2319 :
2320 : /* Wait upon the last request to be completed */
2321 0 : if (list_empty(&ring->request_list))
2322 0 : return 0;
2323 :
2324 0 : req = list_entry(ring->request_list.prev,
2325 : struct drm_i915_gem_request,
2326 : list);
2327 :
2328 : /* Make sure we do not trigger any retires */
2329 0 : return __i915_wait_request(req,
2330 0 : atomic_read(&to_i915(ring->dev)->gpu_error.reset_counter),
2331 0 : to_i915(ring->dev)->mm.interruptible,
2332 : NULL, NULL);
2333 0 : }
2334 :
2335 0 : int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
2336 : {
2337 0 : request->ringbuf = request->ring->buffer;
2338 0 : return 0;
2339 : }
2340 :
2341 0 : int intel_ring_reserve_space(struct drm_i915_gem_request *request)
2342 : {
2343 : /*
2344 : * The first call merely notes the reserve request and is common for
2345 : * all back ends. The subsequent localised _begin() call actually
2346 : * ensures that the reservation is available. Without the begin, if
2347 : * the request creator immediately submitted the request without
2348 : * adding any commands to it then there might not actually be
2349 : * sufficient room for the submission commands.
2350 : */
2351 0 : intel_ring_reserved_space_reserve(request->ringbuf, MIN_SPACE_FOR_ADD_REQUEST);
2352 :
2353 0 : return intel_ring_begin(request, 0);
2354 : }
2355 :
2356 0 : void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size)
2357 : {
2358 0 : WARN_ON(ringbuf->reserved_size);
2359 0 : WARN_ON(ringbuf->reserved_in_use);
2360 :
2361 0 : ringbuf->reserved_size = size;
2362 0 : }
2363 :
2364 0 : void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf)
2365 : {
2366 0 : WARN_ON(ringbuf->reserved_in_use);
2367 :
2368 0 : ringbuf->reserved_size = 0;
2369 0 : ringbuf->reserved_in_use = false;
2370 0 : }
2371 :
2372 0 : void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf)
2373 : {
2374 0 : WARN_ON(ringbuf->reserved_in_use);
2375 :
2376 0 : ringbuf->reserved_in_use = true;
2377 0 : ringbuf->reserved_tail = ringbuf->tail;
2378 0 : }
2379 :
2380 0 : void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
2381 : {
2382 0 : WARN_ON(!ringbuf->reserved_in_use);
2383 0 : if (ringbuf->tail > ringbuf->reserved_tail) {
2384 0 : WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
2385 : "request reserved size too small: %d vs %d!\n",
2386 : ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
2387 0 : } else {
2388 : /*
2389 : * The ring was wrapped while the reserved space was in use.
2390 : * That means that some unknown amount of the ring tail was
2391 : * no-op filled and skipped. Thus simply adding the ring size
2392 : * to the tail and doing the above space check will not work.
2393 : * Rather than attempt to track how much tail was skipped,
2394 : * it is much simpler to say that also skipping the sanity
2395 : * check every once in a while is not a big issue.
2396 : */
2397 : }
2398 :
2399 0 : ringbuf->reserved_size = 0;
2400 0 : ringbuf->reserved_in_use = false;
2401 0 : }
2402 :
2403 0 : static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes)
2404 : {
2405 0 : struct intel_ringbuffer *ringbuf = ring->buffer;
2406 0 : int remain_usable = ringbuf->effective_size - ringbuf->tail;
2407 0 : int remain_actual = ringbuf->size - ringbuf->tail;
2408 : int ret, total_bytes, wait_bytes = 0;
2409 : bool need_wrap = false;
2410 :
2411 0 : if (ringbuf->reserved_in_use)
2412 0 : total_bytes = bytes;
2413 : else
2414 0 : total_bytes = bytes + ringbuf->reserved_size;
2415 :
2416 0 : if (unlikely(bytes > remain_usable)) {
2417 : /*
2418 : * Not enough space for the basic request. So need to flush
2419 : * out the remainder and then wait for base + reserved.
2420 : */
2421 0 : wait_bytes = remain_actual + total_bytes;
2422 : need_wrap = true;
2423 0 : } else {
2424 0 : if (unlikely(total_bytes > remain_usable)) {
2425 : /*
2426 : * The base request will fit but the reserved space
2427 : * falls off the end. So don't need an immediate wrap
2428 : * and only need to effectively wait for the reserved
2429 : * size space from the start of ringbuffer.
2430 : */
2431 0 : wait_bytes = remain_actual + ringbuf->reserved_size;
2432 0 : } else if (total_bytes > ringbuf->space) {
2433 : /* No wrapping required, just waiting. */
2434 : wait_bytes = total_bytes;
2435 0 : }
2436 : }
2437 :
2438 0 : if (wait_bytes) {
2439 0 : ret = ring_wait_for_space(ring, wait_bytes);
2440 0 : if (unlikely(ret))
2441 0 : return ret;
2442 :
2443 0 : if (need_wrap)
2444 0 : __wrap_ring_buffer(ringbuf);
2445 : }
2446 :
2447 0 : return 0;
2448 0 : }
2449 :
2450 0 : int intel_ring_begin(struct drm_i915_gem_request *req,
2451 : int num_dwords)
2452 : {
2453 : struct intel_engine_cs *ring;
2454 : struct drm_i915_private *dev_priv;
2455 : int ret;
2456 :
2457 0 : WARN_ON(req == NULL);
2458 0 : ring = req->ring;
2459 0 : dev_priv = ring->dev->dev_private;
2460 :
2461 0 : ret = i915_gem_check_wedge(&dev_priv->gpu_error,
2462 0 : dev_priv->mm.interruptible);
2463 0 : if (ret)
2464 0 : return ret;
2465 :
2466 0 : ret = __intel_ring_prepare(ring, num_dwords * sizeof(uint32_t));
2467 0 : if (ret)
2468 0 : return ret;
2469 :
2470 0 : ring->buffer->space -= num_dwords * sizeof(uint32_t);
2471 0 : return 0;
2472 0 : }
2473 :
2474 : /* Align the ring tail to a cacheline boundary */
2475 0 : int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
2476 : {
2477 0 : struct intel_engine_cs *ring = req->ring;
2478 0 : int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
2479 : int ret;
2480 :
2481 0 : if (num_dwords == 0)
2482 0 : return 0;
2483 :
2484 0 : num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
2485 0 : ret = intel_ring_begin(req, num_dwords);
2486 0 : if (ret)
2487 0 : return ret;
2488 :
2489 0 : while (num_dwords--)
2490 0 : intel_ring_emit(ring, MI_NOOP);
2491 :
2492 0 : intel_ring_advance(ring);
2493 :
2494 0 : return 0;
2495 0 : }
2496 :
2497 0 : void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)
2498 : {
2499 0 : struct drm_device *dev = ring->dev;
2500 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2501 :
2502 0 : if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
2503 0 : I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
2504 0 : I915_WRITE(RING_SYNC_1(ring->mmio_base), 0);
2505 0 : if (HAS_VEBOX(dev))
2506 0 : I915_WRITE(RING_SYNC_2(ring->mmio_base), 0);
2507 : }
2508 :
2509 0 : ring->set_seqno(ring, seqno);
2510 0 : ring->hangcheck.seqno = seqno;
2511 0 : }
2512 :
2513 0 : static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring,
2514 : u32 value)
2515 : {
2516 0 : struct drm_i915_private *dev_priv = ring->dev->dev_private;
2517 :
2518 : /* Every tail move must follow the sequence below */
2519 :
2520 : /* Disable notification that the ring is IDLE. The GT
2521 : * will then assume that it is busy and bring it out of rc6.
2522 : */
2523 0 : I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
2524 : _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
2525 :
2526 : /* Clear the context id. Here be magic! */
2527 0 : I915_WRITE64(GEN6_BSD_RNCID, 0x0);
2528 :
2529 : /* Wait for the ring not to be idle, i.e. for it to wake up. */
2530 0 : if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
2531 : GEN6_BSD_SLEEP_INDICATOR) == 0,
2532 : 50))
2533 0 : DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
2534 :
2535 : /* Now that the ring is fully powered up, update the tail */
2536 0 : I915_WRITE_TAIL(ring, value);
2537 0 : POSTING_READ(RING_TAIL(ring->mmio_base));
2538 :
2539 : /* Let the ring send IDLE messages to the GT again,
2540 : * and so let it sleep to conserve power when idle.
2541 : */
2542 0 : I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
2543 : _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
2544 0 : }
2545 :
2546 0 : static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req,
2547 : u32 invalidate, u32 flush)
2548 : {
2549 0 : struct intel_engine_cs *ring = req->ring;
2550 : uint32_t cmd;
2551 : int ret;
2552 :
2553 0 : ret = intel_ring_begin(req, 4);
2554 0 : if (ret)
2555 0 : return ret;
2556 :
2557 : cmd = MI_FLUSH_DW;
2558 0 : if (INTEL_INFO(ring->dev)->gen >= 8)
2559 0 : cmd += 1;
2560 :
2561 : /* We always require a command barrier so that subsequent
2562 : * commands, such as breadcrumb interrupts, are strictly ordered
2563 : * wrt the contents of the write cache being flushed to memory
2564 : * (and thus being coherent from the CPU).
2565 : */
2566 0 : cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2567 :
2568 : /*
2569 : * Bspec vol 1c.5 - video engine command streamer:
2570 : * "If ENABLED, all TLBs will be invalidated once the flush
2571 : * operation is complete. This bit is only valid when the
2572 : * Post-Sync Operation field is a value of 1h or 3h."
2573 : */
2574 0 : if (invalidate & I915_GEM_GPU_DOMAINS)
2575 0 : cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
2576 :
2577 0 : intel_ring_emit(ring, cmd);
2578 0 : intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
2579 0 : if (INTEL_INFO(ring->dev)->gen >= 8) {
2580 : intel_ring_emit(ring, 0); /* upper addr */
2581 : intel_ring_emit(ring, 0); /* value */
2582 : } else {
2583 : intel_ring_emit(ring, 0);
2584 : intel_ring_emit(ring, MI_NOOP);
2585 : }
2586 0 : intel_ring_advance(ring);
2587 0 : return 0;
2588 0 : }
2589 :
2590 : static int
2591 0 : gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
2592 : u64 offset, u32 len,
2593 : unsigned dispatch_flags)
2594 : {
2595 0 : struct intel_engine_cs *ring = req->ring;
2596 0 : bool ppgtt = USES_PPGTT(ring->dev) &&
2597 0 : !(dispatch_flags & I915_DISPATCH_SECURE);
2598 : int ret;
2599 :
2600 0 : ret = intel_ring_begin(req, 4);
2601 0 : if (ret)
2602 0 : return ret;
2603 :
2604 : /* FIXME(BDW): Address space and security selectors. */
2605 0 : intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) |
2606 0 : (dispatch_flags & I915_DISPATCH_RS ?
2607 : MI_BATCH_RESOURCE_STREAMER : 0));
2608 0 : intel_ring_emit(ring, lower_32_bits(offset));
2609 0 : intel_ring_emit(ring, upper_32_bits(offset));
2610 0 : intel_ring_emit(ring, MI_NOOP);
2611 0 : intel_ring_advance(ring);
2612 :
2613 0 : return 0;
2614 0 : }
2615 :
2616 : static int
2617 0 : hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
2618 : u64 offset, u32 len,
2619 : unsigned dispatch_flags)
2620 : {
2621 0 : struct intel_engine_cs *ring = req->ring;
2622 : int ret;
2623 :
2624 0 : ret = intel_ring_begin(req, 2);
2625 0 : if (ret)
2626 0 : return ret;
2627 :
2628 0 : intel_ring_emit(ring,
2629 0 : MI_BATCH_BUFFER_START |
2630 0 : (dispatch_flags & I915_DISPATCH_SECURE ?
2631 0 : 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) |
2632 0 : (dispatch_flags & I915_DISPATCH_RS ?
2633 : MI_BATCH_RESOURCE_STREAMER : 0));
2634 : /* bit0-7 is the length on GEN6+ */
2635 0 : intel_ring_emit(ring, offset);
2636 0 : intel_ring_advance(ring);
2637 :
2638 0 : return 0;
2639 0 : }
2640 :
2641 : static int
2642 0 : gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
2643 : u64 offset, u32 len,
2644 : unsigned dispatch_flags)
2645 : {
2646 0 : struct intel_engine_cs *ring = req->ring;
2647 : int ret;
2648 :
2649 0 : ret = intel_ring_begin(req, 2);
2650 0 : if (ret)
2651 0 : return ret;
2652 :
2653 0 : intel_ring_emit(ring,
2654 0 : MI_BATCH_BUFFER_START |
2655 0 : (dispatch_flags & I915_DISPATCH_SECURE ?
2656 : 0 : MI_BATCH_NON_SECURE_I965));
2657 : /* bit0-7 is the length on GEN6+ */
2658 0 : intel_ring_emit(ring, offset);
2659 0 : intel_ring_advance(ring);
2660 :
2661 0 : return 0;
2662 0 : }
2663 :
2664 : /* Blitter support (SandyBridge+) */
2665 :
2666 0 : static int gen6_ring_flush(struct drm_i915_gem_request *req,
2667 : u32 invalidate, u32 flush)
2668 : {
2669 0 : struct intel_engine_cs *ring = req->ring;
2670 0 : struct drm_device *dev = ring->dev;
2671 : uint32_t cmd;
2672 : int ret;
2673 :
2674 0 : ret = intel_ring_begin(req, 4);
2675 0 : if (ret)
2676 0 : return ret;
2677 :
2678 : cmd = MI_FLUSH_DW;
2679 0 : if (INTEL_INFO(dev)->gen >= 8)
2680 0 : cmd += 1;
2681 :
2682 : /* We always require a command barrier so that subsequent
2683 : * commands, such as breadcrumb interrupts, are strictly ordered
2684 : * wrt the contents of the write cache being flushed to memory
2685 : * (and thus being coherent from the CPU).
2686 : */
2687 0 : cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2688 :
2689 : /*
2690 : * Bspec vol 1c.3 - blitter engine command streamer:
2691 : * "If ENABLED, all TLBs will be invalidated once the flush
2692 : * operation is complete. This bit is only valid when the
2693 : * Post-Sync Operation field is a value of 1h or 3h."
2694 : */
2695 0 : if (invalidate & I915_GEM_DOMAIN_RENDER)
2696 0 : cmd |= MI_INVALIDATE_TLB;
2697 0 : intel_ring_emit(ring, cmd);
2698 0 : intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
2699 0 : if (INTEL_INFO(dev)->gen >= 8) {
2700 : intel_ring_emit(ring, 0); /* upper addr */
2701 : intel_ring_emit(ring, 0); /* value */
2702 : } else {
2703 : intel_ring_emit(ring, 0);
2704 : intel_ring_emit(ring, MI_NOOP);
2705 : }
2706 0 : intel_ring_advance(ring);
2707 :
2708 0 : return 0;
2709 0 : }
2710 :
2711 0 : int intel_init_render_ring_buffer(struct drm_device *dev)
2712 : {
2713 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2714 0 : struct intel_engine_cs *ring = &dev_priv->ring[RCS];
2715 : struct drm_i915_gem_object *obj;
2716 : int ret;
2717 :
2718 0 : ring->name = "render ring";
2719 0 : ring->id = RCS;
2720 0 : ring->mmio_base = RENDER_RING_BASE;
2721 :
2722 0 : if (INTEL_INFO(dev)->gen >= 8) {
2723 0 : if (i915_semaphore_is_enabled(dev)) {
2724 0 : obj = i915_gem_alloc_object(dev, 4096);
2725 0 : if (obj == NULL) {
2726 0 : DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n");
2727 0 : i915.semaphores = 0;
2728 0 : } else {
2729 0 : i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
2730 0 : ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK);
2731 0 : if (ret != 0) {
2732 0 : drm_gem_object_unreference(&obj->base);
2733 0 : DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
2734 0 : i915.semaphores = 0;
2735 0 : } else
2736 0 : dev_priv->semaphore_obj = obj;
2737 : }
2738 : }
2739 :
2740 0 : ring->init_context = intel_rcs_ctx_init;
2741 0 : ring->add_request = gen6_add_request;
2742 0 : ring->flush = gen8_render_ring_flush;
2743 0 : ring->irq_get = gen8_ring_get_irq;
2744 0 : ring->irq_put = gen8_ring_put_irq;
2745 0 : ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
2746 0 : ring->get_seqno = gen6_ring_get_seqno;
2747 0 : ring->set_seqno = ring_set_seqno;
2748 0 : if (i915_semaphore_is_enabled(dev)) {
2749 0 : WARN_ON(!dev_priv->semaphore_obj);
2750 0 : ring->semaphore.sync_to = gen8_ring_sync;
2751 0 : ring->semaphore.signal = gen8_rcs_signal;
2752 0 : GEN8_RING_SEMAPHORE_INIT;
2753 : }
2754 0 : } else if (INTEL_INFO(dev)->gen >= 6) {
2755 0 : ring->init_context = intel_rcs_ctx_init;
2756 0 : ring->add_request = gen6_add_request;
2757 0 : ring->flush = gen7_render_ring_flush;
2758 0 : if (INTEL_INFO(dev)->gen == 6)
2759 0 : ring->flush = gen6_render_ring_flush;
2760 0 : ring->irq_get = gen6_ring_get_irq;
2761 0 : ring->irq_put = gen6_ring_put_irq;
2762 0 : ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
2763 0 : ring->get_seqno = gen6_ring_get_seqno;
2764 0 : ring->set_seqno = ring_set_seqno;
2765 0 : if (i915_semaphore_is_enabled(dev)) {
2766 0 : ring->semaphore.sync_to = gen6_ring_sync;
2767 0 : ring->semaphore.signal = gen6_signal;
2768 : /*
2769 : * The current semaphore is only applied on pre-gen8
2770 : * platform. And there is no VCS2 ring on the pre-gen8
2771 : * platform. So the semaphore between RCS and VCS2 is
2772 : * initialized as INVALID. Gen8 will initialize the
2773 : * sema between VCS2 and RCS later.
2774 : */
2775 0 : ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID;
2776 0 : ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_RV;
2777 0 : ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_RB;
2778 0 : ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_RVE;
2779 0 : ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
2780 0 : ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC;
2781 0 : ring->semaphore.mbox.signal[VCS] = GEN6_VRSYNC;
2782 0 : ring->semaphore.mbox.signal[BCS] = GEN6_BRSYNC;
2783 0 : ring->semaphore.mbox.signal[VECS] = GEN6_VERSYNC;
2784 0 : ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
2785 0 : }
2786 0 : } else if (IS_GEN5(dev)) {
2787 0 : ring->add_request = pc_render_add_request;
2788 0 : ring->flush = gen4_render_ring_flush;
2789 0 : ring->get_seqno = pc_render_get_seqno;
2790 0 : ring->set_seqno = pc_render_set_seqno;
2791 0 : ring->irq_get = gen5_ring_get_irq;
2792 0 : ring->irq_put = gen5_ring_put_irq;
2793 0 : ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT |
2794 : GT_RENDER_PIPECTL_NOTIFY_INTERRUPT;
2795 0 : } else {
2796 0 : ring->add_request = i9xx_add_request;
2797 0 : if (INTEL_INFO(dev)->gen < 4)
2798 0 : ring->flush = gen2_render_ring_flush;
2799 : else
2800 0 : ring->flush = gen4_render_ring_flush;
2801 0 : ring->get_seqno = ring_get_seqno;
2802 0 : ring->set_seqno = ring_set_seqno;
2803 0 : if (IS_GEN2(dev)) {
2804 0 : ring->irq_get = i8xx_ring_get_irq;
2805 0 : ring->irq_put = i8xx_ring_put_irq;
2806 0 : } else {
2807 0 : ring->irq_get = i9xx_ring_get_irq;
2808 0 : ring->irq_put = i9xx_ring_put_irq;
2809 : }
2810 0 : ring->irq_enable_mask = I915_USER_INTERRUPT;
2811 : }
2812 0 : ring->write_tail = ring_write_tail;
2813 :
2814 0 : if (IS_HASWELL(dev))
2815 0 : ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
2816 0 : else if (IS_GEN8(dev))
2817 0 : ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
2818 0 : else if (INTEL_INFO(dev)->gen >= 6)
2819 0 : ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
2820 0 : else if (INTEL_INFO(dev)->gen >= 4)
2821 0 : ring->dispatch_execbuffer = i965_dispatch_execbuffer;
2822 0 : else if (IS_I830(dev) || IS_845G(dev))
2823 0 : ring->dispatch_execbuffer = i830_dispatch_execbuffer;
2824 : else
2825 0 : ring->dispatch_execbuffer = i915_dispatch_execbuffer;
2826 0 : ring->init_hw = init_render_ring;
2827 0 : ring->cleanup = render_ring_cleanup;
2828 :
2829 : /* Workaround batchbuffer to combat CS tlb bug. */
2830 0 : if (HAS_BROKEN_CS_TLB(dev)) {
2831 0 : obj = i915_gem_alloc_object(dev, I830_WA_SIZE);
2832 0 : if (obj == NULL) {
2833 0 : DRM_ERROR("Failed to allocate batch bo\n");
2834 0 : return -ENOMEM;
2835 : }
2836 :
2837 0 : ret = i915_gem_obj_ggtt_pin(obj, 0, 0);
2838 0 : if (ret != 0) {
2839 0 : drm_gem_object_unreference(&obj->base);
2840 0 : DRM_ERROR("Failed to ping batch bo\n");
2841 0 : return ret;
2842 : }
2843 :
2844 0 : ring->scratch.obj = obj;
2845 0 : ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
2846 0 : }
2847 :
2848 0 : ret = intel_init_ring_buffer(dev, ring);
2849 0 : if (ret)
2850 0 : return ret;
2851 :
2852 0 : if (INTEL_INFO(dev)->gen >= 5) {
2853 0 : ret = intel_init_pipe_control(ring);
2854 0 : if (ret)
2855 0 : return ret;
2856 : }
2857 :
2858 0 : return 0;
2859 0 : }
2860 :
2861 0 : int intel_init_bsd_ring_buffer(struct drm_device *dev)
2862 : {
2863 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2864 0 : struct intel_engine_cs *ring = &dev_priv->ring[VCS];
2865 :
2866 0 : ring->name = "bsd ring";
2867 0 : ring->id = VCS;
2868 :
2869 0 : ring->write_tail = ring_write_tail;
2870 0 : if (INTEL_INFO(dev)->gen >= 6) {
2871 0 : ring->mmio_base = GEN6_BSD_RING_BASE;
2872 : /* gen6 bsd needs a special wa for tail updates */
2873 0 : if (IS_GEN6(dev))
2874 0 : ring->write_tail = gen6_bsd_ring_write_tail;
2875 0 : ring->flush = gen6_bsd_ring_flush;
2876 0 : ring->add_request = gen6_add_request;
2877 0 : ring->get_seqno = gen6_ring_get_seqno;
2878 0 : ring->set_seqno = ring_set_seqno;
2879 0 : if (INTEL_INFO(dev)->gen >= 8) {
2880 0 : ring->irq_enable_mask =
2881 : GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
2882 0 : ring->irq_get = gen8_ring_get_irq;
2883 0 : ring->irq_put = gen8_ring_put_irq;
2884 0 : ring->dispatch_execbuffer =
2885 : gen8_ring_dispatch_execbuffer;
2886 0 : if (i915_semaphore_is_enabled(dev)) {
2887 0 : ring->semaphore.sync_to = gen8_ring_sync;
2888 0 : ring->semaphore.signal = gen8_xcs_signal;
2889 0 : GEN8_RING_SEMAPHORE_INIT;
2890 : }
2891 : } else {
2892 0 : ring->irq_enable_mask = GT_BSD_USER_INTERRUPT;
2893 0 : ring->irq_get = gen6_ring_get_irq;
2894 0 : ring->irq_put = gen6_ring_put_irq;
2895 0 : ring->dispatch_execbuffer =
2896 : gen6_ring_dispatch_execbuffer;
2897 0 : if (i915_semaphore_is_enabled(dev)) {
2898 0 : ring->semaphore.sync_to = gen6_ring_sync;
2899 0 : ring->semaphore.signal = gen6_signal;
2900 0 : ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VR;
2901 0 : ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID;
2902 0 : ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VB;
2903 0 : ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_VVE;
2904 0 : ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
2905 0 : ring->semaphore.mbox.signal[RCS] = GEN6_RVSYNC;
2906 0 : ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC;
2907 0 : ring->semaphore.mbox.signal[BCS] = GEN6_BVSYNC;
2908 0 : ring->semaphore.mbox.signal[VECS] = GEN6_VEVSYNC;
2909 0 : ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
2910 0 : }
2911 : }
2912 : } else {
2913 0 : ring->mmio_base = BSD_RING_BASE;
2914 0 : ring->flush = bsd_ring_flush;
2915 0 : ring->add_request = i9xx_add_request;
2916 0 : ring->get_seqno = ring_get_seqno;
2917 0 : ring->set_seqno = ring_set_seqno;
2918 0 : if (IS_GEN5(dev)) {
2919 0 : ring->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
2920 0 : ring->irq_get = gen5_ring_get_irq;
2921 0 : ring->irq_put = gen5_ring_put_irq;
2922 0 : } else {
2923 0 : ring->irq_enable_mask = I915_BSD_USER_INTERRUPT;
2924 0 : ring->irq_get = i9xx_ring_get_irq;
2925 0 : ring->irq_put = i9xx_ring_put_irq;
2926 : }
2927 0 : ring->dispatch_execbuffer = i965_dispatch_execbuffer;
2928 : }
2929 0 : ring->init_hw = init_ring_common;
2930 :
2931 0 : return intel_init_ring_buffer(dev, ring);
2932 : }
2933 :
2934 : /**
2935 : * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3)
2936 : */
2937 0 : int intel_init_bsd2_ring_buffer(struct drm_device *dev)
2938 : {
2939 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2940 0 : struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
2941 :
2942 0 : ring->name = "bsd2 ring";
2943 0 : ring->id = VCS2;
2944 :
2945 0 : ring->write_tail = ring_write_tail;
2946 0 : ring->mmio_base = GEN8_BSD2_RING_BASE;
2947 0 : ring->flush = gen6_bsd_ring_flush;
2948 0 : ring->add_request = gen6_add_request;
2949 0 : ring->get_seqno = gen6_ring_get_seqno;
2950 0 : ring->set_seqno = ring_set_seqno;
2951 0 : ring->irq_enable_mask =
2952 : GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
2953 0 : ring->irq_get = gen8_ring_get_irq;
2954 0 : ring->irq_put = gen8_ring_put_irq;
2955 0 : ring->dispatch_execbuffer =
2956 : gen8_ring_dispatch_execbuffer;
2957 0 : if (i915_semaphore_is_enabled(dev)) {
2958 0 : ring->semaphore.sync_to = gen8_ring_sync;
2959 0 : ring->semaphore.signal = gen8_xcs_signal;
2960 0 : GEN8_RING_SEMAPHORE_INIT;
2961 : }
2962 0 : ring->init_hw = init_ring_common;
2963 :
2964 0 : return intel_init_ring_buffer(dev, ring);
2965 : }
2966 :
2967 0 : int intel_init_blt_ring_buffer(struct drm_device *dev)
2968 : {
2969 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2970 0 : struct intel_engine_cs *ring = &dev_priv->ring[BCS];
2971 :
2972 0 : ring->name = "blitter ring";
2973 0 : ring->id = BCS;
2974 :
2975 0 : ring->mmio_base = BLT_RING_BASE;
2976 0 : ring->write_tail = ring_write_tail;
2977 0 : ring->flush = gen6_ring_flush;
2978 0 : ring->add_request = gen6_add_request;
2979 0 : ring->get_seqno = gen6_ring_get_seqno;
2980 0 : ring->set_seqno = ring_set_seqno;
2981 0 : if (INTEL_INFO(dev)->gen >= 8) {
2982 0 : ring->irq_enable_mask =
2983 : GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
2984 0 : ring->irq_get = gen8_ring_get_irq;
2985 0 : ring->irq_put = gen8_ring_put_irq;
2986 0 : ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
2987 0 : if (i915_semaphore_is_enabled(dev)) {
2988 0 : ring->semaphore.sync_to = gen8_ring_sync;
2989 0 : ring->semaphore.signal = gen8_xcs_signal;
2990 0 : GEN8_RING_SEMAPHORE_INIT;
2991 : }
2992 : } else {
2993 0 : ring->irq_enable_mask = GT_BLT_USER_INTERRUPT;
2994 0 : ring->irq_get = gen6_ring_get_irq;
2995 0 : ring->irq_put = gen6_ring_put_irq;
2996 0 : ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
2997 0 : if (i915_semaphore_is_enabled(dev)) {
2998 0 : ring->semaphore.signal = gen6_signal;
2999 0 : ring->semaphore.sync_to = gen6_ring_sync;
3000 : /*
3001 : * The current semaphore is only applied on pre-gen8
3002 : * platform. And there is no VCS2 ring on the pre-gen8
3003 : * platform. So the semaphore between BCS and VCS2 is
3004 : * initialized as INVALID. Gen8 will initialize the
3005 : * sema between BCS and VCS2 later.
3006 : */
3007 0 : ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_BR;
3008 0 : ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_BV;
3009 0 : ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID;
3010 0 : ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_BVE;
3011 0 : ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
3012 0 : ring->semaphore.mbox.signal[RCS] = GEN6_RBSYNC;
3013 0 : ring->semaphore.mbox.signal[VCS] = GEN6_VBSYNC;
3014 0 : ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC;
3015 0 : ring->semaphore.mbox.signal[VECS] = GEN6_VEBSYNC;
3016 0 : ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
3017 0 : }
3018 : }
3019 0 : ring->init_hw = init_ring_common;
3020 :
3021 0 : return intel_init_ring_buffer(dev, ring);
3022 : }
3023 :
3024 0 : int intel_init_vebox_ring_buffer(struct drm_device *dev)
3025 : {
3026 0 : struct drm_i915_private *dev_priv = dev->dev_private;
3027 0 : struct intel_engine_cs *ring = &dev_priv->ring[VECS];
3028 :
3029 0 : ring->name = "video enhancement ring";
3030 0 : ring->id = VECS;
3031 :
3032 0 : ring->mmio_base = VEBOX_RING_BASE;
3033 0 : ring->write_tail = ring_write_tail;
3034 0 : ring->flush = gen6_ring_flush;
3035 0 : ring->add_request = gen6_add_request;
3036 0 : ring->get_seqno = gen6_ring_get_seqno;
3037 0 : ring->set_seqno = ring_set_seqno;
3038 :
3039 0 : if (INTEL_INFO(dev)->gen >= 8) {
3040 0 : ring->irq_enable_mask =
3041 : GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
3042 0 : ring->irq_get = gen8_ring_get_irq;
3043 0 : ring->irq_put = gen8_ring_put_irq;
3044 0 : ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
3045 0 : if (i915_semaphore_is_enabled(dev)) {
3046 0 : ring->semaphore.sync_to = gen8_ring_sync;
3047 0 : ring->semaphore.signal = gen8_xcs_signal;
3048 0 : GEN8_RING_SEMAPHORE_INIT;
3049 : }
3050 : } else {
3051 0 : ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
3052 0 : ring->irq_get = hsw_vebox_get_irq;
3053 0 : ring->irq_put = hsw_vebox_put_irq;
3054 0 : ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
3055 0 : if (i915_semaphore_is_enabled(dev)) {
3056 0 : ring->semaphore.sync_to = gen6_ring_sync;
3057 0 : ring->semaphore.signal = gen6_signal;
3058 0 : ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VER;
3059 0 : ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_VEV;
3060 0 : ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VEB;
3061 0 : ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID;
3062 0 : ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
3063 0 : ring->semaphore.mbox.signal[RCS] = GEN6_RVESYNC;
3064 0 : ring->semaphore.mbox.signal[VCS] = GEN6_VVESYNC;
3065 0 : ring->semaphore.mbox.signal[BCS] = GEN6_BVESYNC;
3066 0 : ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC;
3067 0 : ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
3068 0 : }
3069 : }
3070 0 : ring->init_hw = init_ring_common;
3071 :
3072 0 : return intel_init_ring_buffer(dev, ring);
3073 : }
3074 :
3075 : int
3076 0 : intel_ring_flush_all_caches(struct drm_i915_gem_request *req)
3077 : {
3078 0 : struct intel_engine_cs *ring = req->ring;
3079 : int ret;
3080 :
3081 0 : if (!ring->gpu_caches_dirty)
3082 0 : return 0;
3083 :
3084 0 : ret = ring->flush(req, 0, I915_GEM_GPU_DOMAINS);
3085 0 : if (ret)
3086 0 : return ret;
3087 :
3088 0 : trace_i915_gem_ring_flush(req, 0, I915_GEM_GPU_DOMAINS);
3089 :
3090 0 : ring->gpu_caches_dirty = false;
3091 0 : return 0;
3092 0 : }
3093 :
3094 : int
3095 0 : intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
3096 : {
3097 0 : struct intel_engine_cs *ring = req->ring;
3098 : uint32_t flush_domains;
3099 : int ret;
3100 :
3101 : flush_domains = 0;
3102 0 : if (ring->gpu_caches_dirty)
3103 : flush_domains = I915_GEM_GPU_DOMAINS;
3104 :
3105 0 : ret = ring->flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
3106 0 : if (ret)
3107 0 : return ret;
3108 :
3109 0 : trace_i915_gem_ring_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
3110 :
3111 0 : ring->gpu_caches_dirty = false;
3112 0 : return 0;
3113 0 : }
3114 :
3115 : void
3116 0 : intel_stop_ring_buffer(struct intel_engine_cs *ring)
3117 : {
3118 : int ret;
3119 :
3120 0 : if (!intel_ring_initialized(ring))
3121 0 : return;
3122 :
3123 0 : ret = intel_ring_idle(ring);
3124 0 : if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error))
3125 0 : DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
3126 : ring->name, ret);
3127 :
3128 0 : stop_ring(ring);
3129 0 : }
|