LCOV - 6.4 - dev/pci/drm/i915/intel

LCOV - code coverage report

Current view:	top level - dev/pci/drm/i915 - intel_lrc.c (source / functions)		Hit	Total	Coverage
Test:	6.4	Lines:	0	1155	0.0 %
Date:	2018-10-19 03:25:38	Functions:	0	69	0.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*
       2             :  * Copyright © 2014 Intel Corporation
       3             :  *
       4             :  * Permission is hereby granted, free of charge, to any person obtaining a
       5             :  * copy of this software and associated documentation files (the "Software"),
       6             :  * to deal in the Software without restriction, including without limitation
       7             :  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
       8             :  * and/or sell copies of the Software, and to permit persons to whom the
       9             :  * Software is furnished to do so, subject to the following conditions:
      10             :  *
      11             :  * The above copyright notice and this permission notice (including the next
      12             :  * paragraph) shall be included in all copies or substantial portions of the
      13             :  * Software.
      14             :  *
      15             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      16             :  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      17             :  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
      18             :  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      19             :  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
      20             :  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
      21             :  * IN THE SOFTWARE.
      22             :  *
      23             :  * Authors:
      24             :  *    Ben Widawsky <ben@bwidawsk.net>
      25             :  *    Michel Thierry <michel.thierry@intel.com>
      26             :  *    Thomas Daniel <thomas.daniel@intel.com>
      27             :  *    Oscar Mateo <oscar.mateo@intel.com>
      28             :  *
      29             :  */
      30             : 
      31             : /**
      32             :  * DOC: Logical Rings, Logical Ring Contexts and Execlists
      33             :  *
      34             :  * Motivation:
      35             :  * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
      36             :  * These expanded contexts enable a number of new abilities, especially
      37             :  * "Execlists" (also implemented in this file).
      38             :  *
      39             :  * One of the main differences with the legacy HW contexts is that logical
      40             :  * ring contexts incorporate many more things to the context's state, like
      41             :  * PDPs or ringbuffer control registers:
      42             :  *
      43             :  * The reason why PDPs are included in the context is straightforward: as
      44             :  * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
      45             :  * contained there mean you don't need to do a ppgtt->switch_mm yourself,
      46             :  * instead, the GPU will do it for you on the context switch.
      47             :  *
      48             :  * But, what about the ringbuffer control registers (head, tail, etc..)?
      49             :  * shouldn't we just need a set of those per engine command streamer? This is
      50             :  * where the name "Logical Rings" starts to make sense: by virtualizing the
      51             :  * rings, the engine cs shifts to a new "ring buffer" with every context
      52             :  * switch. When you want to submit a workload to the GPU you: A) choose your
      53             :  * context, B) find its appropriate virtualized ring, C) write commands to it
      54             :  * and then, finally, D) tell the GPU to switch to that context.
      55             :  *
      56             :  * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
      57             :  * to a contexts is via a context execution list, ergo "Execlists".
      58             :  *
      59             :  * LRC implementation:
      60             :  * Regarding the creation of contexts, we have:
      61             :  *
      62             :  * - One global default context.
      63             :  * - One local default context for each opened fd.
      64             :  * - One local extra context for each context create ioctl call.
      65             :  *
      66             :  * Now that ringbuffers belong per-context (and not per-engine, like before)
      67             :  * and that contexts are uniquely tied to a given engine (and not reusable,
      68             :  * like before) we need:
      69             :  *
      70             :  * - One ringbuffer per-engine inside each context.
      71             :  * - One backing object per-engine inside each context.
      72             :  *
      73             :  * The global default context starts its life with these new objects fully
      74             :  * allocated and populated. The local default context for each opened fd is
      75             :  * more complex, because we don't know at creation time which engine is going
      76             :  * to use them. To handle this, we have implemented a deferred creation of LR
      77             :  * contexts:
      78             :  *
      79             :  * The local context starts its life as a hollow or blank holder, that only
      80             :  * gets populated for a given engine once we receive an execbuffer. If later
      81             :  * on we receive another execbuffer ioctl for the same context but a different
      82             :  * engine, we allocate/populate a new ringbuffer and context backing object and
      83             :  * so on.
      84             :  *
      85             :  * Finally, regarding local contexts created using the ioctl call: as they are
      86             :  * only allowed with the render ring, we can allocate & populate them right
      87             :  * away (no need to defer anything, at least for now).
      88             :  *
      89             :  * Execlists implementation:
      90             :  * Execlists are the new method by which, on gen8+ hardware, workloads are
      91             :  * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
      92             :  * This method works as follows:
      93             :  *
      94             :  * When a request is committed, its commands (the BB start and any leading or
      95             :  * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
      96             :  * for the appropriate context. The tail pointer in the hardware context is not
      97             :  * updated at this time, but instead, kept by the driver in the ringbuffer
      98             :  * structure. A structure representing this request is added to a request queue
      99             :  * for the appropriate engine: this structure contains a copy of the context's
     100             :  * tail after the request was written to the ring buffer and a pointer to the
     101             :  * context itself.
     102             :  *
     103             :  * If the engine's request queue was empty before the request was added, the
     104             :  * queue is processed immediately. Otherwise the queue will be processed during
     105             :  * a context switch interrupt. In any case, elements on the queue will get sent
     106             :  * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
     107             :  * globally unique 20-bits submission ID.
     108             :  *
     109             :  * When execution of a request completes, the GPU updates the context status
     110             :  * buffer with a context complete event and generates a context switch interrupt.
     111             :  * During the interrupt handling, the driver examines the events in the buffer:
     112             :  * for each context complete event, if the announced ID matches that on the head
     113             :  * of the request queue, then that request is retired and removed from the queue.
     114             :  *
     115             :  * After processing, if any requests were retired and the queue is not empty
     116             :  * then a new execution list can be submitted. The two requests at the front of
     117             :  * the queue are next to be submitted but since a context may not occur twice in
     118             :  * an execution list, if subsequent requests have the same ID as the first then
     119             :  * the two requests must be combined. This is done simply by discarding requests
     120             :  * at the head of the queue until either only one requests is left (in which case
     121             :  * we use a NULL second context) or the first two requests have unique IDs.
     122             :  *
     123             :  * By always executing the first two requests in the queue the driver ensures
     124             :  * that the GPU is kept as busy as possible. In the case where a single context
     125             :  * completes but a second context is still executing, the request for this second
     126             :  * context will be at the head of the queue when we remove the first one. This
     127             :  * request will then be resubmitted along with a new request for a different context,
     128             :  * which will cause the hardware to continue executing the second request and queue
     129             :  * the new request (the GPU detects the condition of a context getting preempted
     130             :  * with the same context and optimizes the context switch flow by not doing
     131             :  * preemption, but just sampling the new tail pointer).
     132             :  *
     133             :  */
     134             : 
     135             : #include <dev/pci/drm/drmP.h>
     136             : #include <dev/pci/drm/i915_drm.h>
     137             : #include "i915_drv.h"
     138             : #include "intel_mocs.h"
     139             : 
     140             : #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
     141             : #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
     142             : #define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
     143             : 
     144             : #define RING_EXECLIST_QFULL             (1 << 0x2)
     145             : #define RING_EXECLIST1_VALID            (1 << 0x3)
     146             : #define RING_EXECLIST0_VALID            (1 << 0x4)
     147             : #define RING_EXECLIST_ACTIVE_STATUS     (3 << 0xE)
     148             : #define RING_EXECLIST1_ACTIVE           (1 << 0x11)
     149             : #define RING_EXECLIST0_ACTIVE           (1 << 0x12)
     150             : 
     151             : #define GEN8_CTX_STATUS_IDLE_ACTIVE     (1 << 0)
     152             : #define GEN8_CTX_STATUS_PREEMPTED       (1 << 1)
     153             : #define GEN8_CTX_STATUS_ELEMENT_SWITCH  (1 << 2)
     154             : #define GEN8_CTX_STATUS_ACTIVE_IDLE     (1 << 3)
     155             : #define GEN8_CTX_STATUS_COMPLETE        (1 << 4)
     156             : #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
     157             : 
     158             : #define CTX_LRI_HEADER_0                0x01
     159             : #define CTX_CONTEXT_CONTROL             0x02
     160             : #define CTX_RING_HEAD                   0x04
     161             : #define CTX_RING_TAIL                   0x06
     162             : #define CTX_RING_BUFFER_START           0x08
     163             : #define CTX_RING_BUFFER_CONTROL         0x0a
     164             : #define CTX_BB_HEAD_U                   0x0c
     165             : #define CTX_BB_HEAD_L                   0x0e
     166             : #define CTX_BB_STATE                    0x10
     167             : #define CTX_SECOND_BB_HEAD_U            0x12
     168             : #define CTX_SECOND_BB_HEAD_L            0x14
     169             : #define CTX_SECOND_BB_STATE             0x16
     170             : #define CTX_BB_PER_CTX_PTR              0x18
     171             : #define CTX_RCS_INDIRECT_CTX            0x1a
     172             : #define CTX_RCS_INDIRECT_CTX_OFFSET     0x1c
     173             : #define CTX_LRI_HEADER_1                0x21
     174             : #define CTX_CTX_TIMESTAMP               0x22
     175             : #define CTX_PDP3_UDW                    0x24
     176             : #define CTX_PDP3_LDW                    0x26
     177             : #define CTX_PDP2_UDW                    0x28
     178             : #define CTX_PDP2_LDW                    0x2a
     179             : #define CTX_PDP1_UDW                    0x2c
     180             : #define CTX_PDP1_LDW                    0x2e
     181             : #define CTX_PDP0_UDW                    0x30
     182             : #define CTX_PDP0_LDW                    0x32
     183             : #define CTX_LRI_HEADER_2                0x41
     184             : #define CTX_R_PWR_CLK_STATE             0x42
     185             : #define CTX_GPGPU_CSR_BASE_ADDRESS      0x44
     186             : 
     187             : #define GEN8_CTX_VALID (1<<0)
     188             : #define GEN8_CTX_FORCE_PD_RESTORE (1<<1)
     189             : #define GEN8_CTX_FORCE_RESTORE (1<<2)
     190             : #define GEN8_CTX_L3LLC_COHERENT (1<<5)
     191             : #define GEN8_CTX_PRIVILEGE (1<<8)
     192             : 
     193             : #define ASSIGN_CTX_PDP(ppgtt, reg_state, n) { \
     194             :         const u64 _addr = i915_page_dir_dma_addr((ppgtt), (n)); \
     195             :         reg_state[CTX_PDP ## n ## _UDW+1] = upper_32_bits(_addr); \
     196             :         reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \
     197             : }
     198             : 
     199             : #define ASSIGN_CTX_PML4(ppgtt, reg_state) { \
     200             :         reg_state[CTX_PDP0_UDW + 1] = upper_32_bits(px_dma(&ppgtt->pml4)); \
     201             :         reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \
     202             : }
     203             : 
     204             : enum {
     205             :         ADVANCED_CONTEXT = 0,
     206             :         LEGACY_32B_CONTEXT,
     207             :         ADVANCED_AD_CONTEXT,
     208             :         LEGACY_64B_CONTEXT
     209             : };
     210             : #define GEN8_CTX_ADDRESSING_MODE_SHIFT 3
     211             : #define GEN8_CTX_ADDRESSING_MODE(dev)  (USES_FULL_48BIT_PPGTT(dev) ?\
     212             :                 LEGACY_64B_CONTEXT :\
     213             :                 LEGACY_32B_CONTEXT)
     214             : enum {
     215             :         FAULT_AND_HANG = 0,
     216             :         FAULT_AND_HALT, /* Debug only */
     217             :         FAULT_AND_STREAM,
     218             :         FAULT_AND_CONTINUE /* Unsupported */
     219             : };
     220             : #define GEN8_CTX_ID_SHIFT 32
     221             : #define CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT  0x17
     222             : 
     223             : static int intel_lr_context_pin(struct drm_i915_gem_request *rq);
     224             : static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
     225             :                 struct drm_i915_gem_object *default_ctx_obj);
     226             : 
     227             : 
     228             : /**
     229             :  * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
     230             :  * @dev: DRM device.
     231             :  * @enable_execlists: value of i915.enable_execlists module parameter.
     232             :  *
     233             :  * Only certain platforms support Execlists (the prerequisites being
     234             :  * support for Logical Ring Contexts and Aliasing PPGTT or better).
     235             :  *
     236             :  * Return: 1 if Execlists is supported and has to be enabled.
     237             :  */
     238           0 : int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists)
     239             : {
     240           0 :         WARN_ON(i915.enable_ppgtt == -1);
     241             : 
     242             :         /* On platforms with execlist available, vGPU will only
     243             :          * support execlist mode, no ring buffer mode.
     244             :          */
     245           0 :         if (HAS_LOGICAL_RING_CONTEXTS(dev) && intel_vgpu_active(dev))
     246           0 :                 return 1;
     247             : 
     248           0 :         if (INTEL_INFO(dev)->gen >= 9)
     249           0 :                 return 1;
     250             : 
     251           0 :         if (enable_execlists == 0)
     252           0 :                 return 0;
     253             : 
     254           0 :         if (HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev) &&
     255           0 :             i915.use_mmio_flip >= 0)
     256           0 :                 return 1;
     257             : 
     258           0 :         return 0;
     259           0 : }
     260             : 
     261             : /**
     262             :  * intel_execlists_ctx_id() - get the Execlists Context ID
     263             :  * @ctx_obj: Logical Ring Context backing object.
     264             :  *
     265             :  * Do not confuse with ctx->id! Unfortunately we have a name overload
     266             :  * here: the old context ID we pass to userspace as a handler so that
     267             :  * they can refer to a context, and the new context ID we pass to the
     268             :  * ELSP so that the GPU can inform us of the context status via
     269             :  * interrupts.
     270             :  *
     271             :  * Return: 20-bits globally unique context ID.
     272             :  */
     273           0 : u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
     274             : {
     275           0 :         u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj) +
     276             :                         LRC_PPHWSP_PN * PAGE_SIZE;
     277             : 
     278             :         /* LRCA is required to be 4K aligned so the more significant 20 bits
     279             :          * are globally unique */
     280           0 :         return lrca >> 12;
     281             : }
     282             : 
     283           0 : static bool disable_lite_restore_wa(struct intel_engine_cs *ring)
     284             : {
     285           0 :         struct drm_device *dev = ring->dev;
     286             : 
     287           0 :         return (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
     288           0 :                 IS_BXT_REVID(dev, 0, BXT_REVID_A0)) &&
     289           0 :                (ring->id == VCS || ring->id == VCS2);
     290             : }
     291             : 
     292           0 : uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
     293             :                                      struct intel_engine_cs *ring)
     294             : {
     295           0 :         struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
     296             :         uint64_t desc;
     297           0 :         uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj) +
     298             :                         LRC_PPHWSP_PN * PAGE_SIZE;
     299             : 
     300           0 :         WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
     301             : 
     302             :         desc = GEN8_CTX_VALID;
     303           0 :         desc |= GEN8_CTX_ADDRESSING_MODE(dev) << GEN8_CTX_ADDRESSING_MODE_SHIFT;
     304           0 :         if (IS_GEN8(ctx_obj->base.dev))
     305           0 :                 desc |= GEN8_CTX_L3LLC_COHERENT;
     306           0 :         desc |= GEN8_CTX_PRIVILEGE;
     307           0 :         desc |= lrca;
     308           0 :         desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
     309             : 
     310             :         /* TODO: WaDisableLiteRestore when we start using semaphore
     311             :          * signalling between Command Streamers */
     312             :         /* desc |= GEN8_CTX_FORCE_RESTORE; */
     313             : 
     314             :         /* WaEnableForceRestoreInCtxtDescForVCS:skl */
     315             :         /* WaEnableForceRestoreInCtxtDescForVCS:bxt */
     316           0 :         if (disable_lite_restore_wa(ring))
     317           0 :                 desc |= GEN8_CTX_FORCE_RESTORE;
     318             : 
     319           0 :         return desc;
     320             : }
     321             : 
     322           0 : static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
     323             :                                  struct drm_i915_gem_request *rq1)
     324             : {
     325             : 
     326           0 :         struct intel_engine_cs *ring = rq0->ring;
     327           0 :         struct drm_device *dev = ring->dev;
     328           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
     329             :         uint64_t desc[2];
     330             : 
     331           0 :         if (rq1) {
     332           0 :                 desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->ring);
     333           0 :                 rq1->elsp_submitted++;
     334           0 :         } else {
     335             :                 desc[1] = 0;
     336             :         }
     337             : 
     338           0 :         desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->ring);
     339           0 :         rq0->elsp_submitted++;
     340             : 
     341             :         /* You must always write both descriptors in the order below. */
     342           0 :         spin_lock(&dev_priv->uncore.lock);
     343           0 :         intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL);
     344           0 :         I915_WRITE_FW(RING_ELSP(ring), upper_32_bits(desc[1]));
     345           0 :         I915_WRITE_FW(RING_ELSP(ring), lower_32_bits(desc[1]));
     346             : 
     347           0 :         I915_WRITE_FW(RING_ELSP(ring), upper_32_bits(desc[0]));
     348             :         /* The context is automatically loaded after the following */
     349           0 :         I915_WRITE_FW(RING_ELSP(ring), lower_32_bits(desc[0]));
     350             : 
     351             :         /* ELSP is a wo register, use another nearby reg for posting */
     352           0 :         POSTING_READ_FW(RING_EXECLIST_STATUS_LO(ring));
     353           0 :         intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL);
     354           0 :         spin_unlock(&dev_priv->uncore.lock);
     355           0 : }
     356             : 
     357           0 : static int execlists_update_context(struct drm_i915_gem_request *rq)
     358             : {
     359           0 :         struct intel_engine_cs *ring = rq->ring;
     360           0 :         struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
     361           0 :         struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
     362           0 :         struct drm_i915_gem_object *rb_obj = rq->ringbuf->obj;
     363             :         struct vm_page *page;
     364             :         uint32_t *reg_state;
     365             : 
     366           0 :         BUG_ON(!ctx_obj);
     367           0 :         WARN_ON(!i915_gem_obj_is_pinned(ctx_obj));
     368           0 :         WARN_ON(!i915_gem_obj_is_pinned(rb_obj));
     369             : 
     370           0 :         page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
     371           0 :         reg_state = kmap_atomic(page);
     372             : 
     373           0 :         reg_state[CTX_RING_TAIL+1] = rq->tail;
     374           0 :         reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj);
     375             : 
     376           0 :         if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
     377             :                 /* True 32b PPGTT with dynamic page allocation: update PDP
     378             :                  * registers and point the unallocated PDPs to scratch page.
     379             :                  * PML4 is allocated during ppgtt init, so this is not needed
     380             :                  * in 48-bit mode.
     381             :                  */
     382           0 :                 ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
     383           0 :                 ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
     384           0 :                 ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
     385           0 :                 ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
     386           0 :         }
     387             : 
     388           0 :         kunmap_atomic(reg_state);
     389             : 
     390           0 :         return 0;
     391             : }
     392             : 
     393           0 : static void execlists_submit_requests(struct drm_i915_gem_request *rq0,
     394             :                                       struct drm_i915_gem_request *rq1)
     395             : {
     396           0 :         execlists_update_context(rq0);
     397             : 
     398           0 :         if (rq1)
     399           0 :                 execlists_update_context(rq1);
     400             : 
     401           0 :         execlists_elsp_write(rq0, rq1);
     402           0 : }
     403             : 
     404           0 : static void execlists_context_unqueue(struct intel_engine_cs *ring)
     405             : {
     406             :         struct drm_i915_gem_request *req0 = NULL, *req1 = NULL;
     407             :         struct drm_i915_gem_request *cursor = NULL, *tmp = NULL;
     408             : 
     409           0 :         assert_spin_locked(&ring->execlist_lock);
     410             : 
     411             :         /*
     412             :          * If irqs are not active generate a warning as batches that finish
     413             :          * without the irqs may get lost and a GPU Hang may occur.
     414             :          */
     415           0 :         WARN_ON(!intel_irqs_enabled(ring->dev->dev_private));
     416             : 
     417           0 :         if (list_empty(&ring->execlist_queue))
     418           0 :                 return;
     419             : 
     420             :         /* Try to read in pairs */
     421           0 :         list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue,
     422             :                                  execlist_link) {
     423           0 :                 if (!req0) {
     424             :                         req0 = cursor;
     425           0 :                 } else if (req0->ctx == cursor->ctx) {
     426             :                         /* Same ctx: ignore first request, as second request
     427             :                          * will update tail past first request's workload */
     428           0 :                         cursor->elsp_submitted = req0->elsp_submitted;
     429           0 :                         list_del(&req0->execlist_link);
     430           0 :                         list_add_tail(&req0->execlist_link,
     431           0 :                                 &ring->execlist_retired_req_list);
     432             :                         req0 = cursor;
     433             :                 } else {
     434             :                         req1 = cursor;
     435           0 :                         break;
     436             :                 }
     437             :         }
     438             : 
     439           0 :         if (IS_GEN8(ring->dev) || IS_GEN9(ring->dev)) {
     440             :                 /*
     441             :                  * WaIdleLiteRestore: make sure we never cause a lite
     442             :                  * restore with HEAD==TAIL
     443             :                  */
     444           0 :                 if (req0->elsp_submitted) {
     445             :                         /*
     446             :                          * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL
     447             :                          * as we resubmit the request. See gen8_emit_request()
     448             :                          * for where we prepare the padding after the end of the
     449             :                          * request.
     450             :                          */
     451             :                         struct intel_ringbuffer *ringbuf;
     452             : 
     453           0 :                         ringbuf = req0->ctx->engine[ring->id].ringbuf;
     454           0 :                         req0->tail += 8;
     455           0 :                         req0->tail &= ringbuf->size - 1;
     456           0 :                 }
     457             :         }
     458             : 
     459           0 :         WARN_ON(req1 && req1->elsp_submitted);
     460             : 
     461           0 :         execlists_submit_requests(req0, req1);
     462           0 : }
     463             : 
     464           0 : static bool execlists_check_remove_request(struct intel_engine_cs *ring,
     465             :                                            u32 request_id)
     466             : {
     467             :         struct drm_i915_gem_request *head_req;
     468             : 
     469           0 :         assert_spin_locked(&ring->execlist_lock);
     470             : 
     471           0 :         head_req = list_first_entry_or_null(&ring->execlist_queue,
     472             :                                             struct drm_i915_gem_request,
     473             :                                             execlist_link);
     474             : 
     475           0 :         if (head_req != NULL) {
     476             :                 struct drm_i915_gem_object *ctx_obj =
     477           0 :                                 head_req->ctx->engine[ring->id].state;
     478           0 :                 if (intel_execlists_ctx_id(ctx_obj) == request_id) {
     479           0 :                         WARN(head_req->elsp_submitted == 0,
     480             :                              "Never submitted head request\n");
     481             : 
     482           0 :                         if (--head_req->elsp_submitted <= 0) {
     483           0 :                                 list_del(&head_req->execlist_link);
     484           0 :                                 list_add_tail(&head_req->execlist_link,
     485           0 :                                         &ring->execlist_retired_req_list);
     486           0 :                                 return true;
     487             :                         }
     488             :                 }
     489           0 :         }
     490             : 
     491           0 :         return false;
     492           0 : }
     493             : 
     494             : /**
     495             :  * intel_lrc_irq_handler() - handle Context Switch interrupts
     496             :  * @ring: Engine Command Streamer to handle.
     497             :  *
     498             :  * Check the unread Context Status Buffers and manage the submission of new
     499             :  * contexts to the ELSP accordingly.
     500             :  */
     501           0 : void intel_lrc_irq_handler(struct intel_engine_cs *ring)
     502             : {
     503           0 :         struct drm_i915_private *dev_priv = ring->dev->dev_private;
     504             :         u32 status_pointer;
     505             :         u8 read_pointer;
     506             :         u8 write_pointer;
     507             :         u32 status = 0;
     508             :         u32 status_id;
     509             :         u32 submit_contexts = 0;
     510             : 
     511           0 :         status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
     512             : 
     513           0 :         read_pointer = ring->next_context_status_buffer;
     514           0 :         write_pointer = status_pointer & GEN8_CSB_PTR_MASK;
     515           0 :         if (read_pointer > write_pointer)
     516           0 :                 write_pointer += GEN8_CSB_ENTRIES;
     517             : 
     518           0 :         spin_lock(&ring->execlist_lock);
     519             : 
     520           0 :         while (read_pointer < write_pointer) {
     521           0 :                 read_pointer++;
     522           0 :                 status = I915_READ(RING_CONTEXT_STATUS_BUF_LO(ring, read_pointer % GEN8_CSB_ENTRIES));
     523           0 :                 status_id = I915_READ(RING_CONTEXT_STATUS_BUF_HI(ring, read_pointer % GEN8_CSB_ENTRIES));
     524             : 
     525           0 :                 if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
     526           0 :                         continue;
     527             : 
     528           0 :                 if (status & GEN8_CTX_STATUS_PREEMPTED) {
     529           0 :                         if (status & GEN8_CTX_STATUS_LITE_RESTORE) {
     530           0 :                                 if (execlists_check_remove_request(ring, status_id))
     531           0 :                                         WARN(1, "Lite Restored request removed from queue\n");
     532             :                         } else
     533           0 :                                 WARN(1, "Preemption without Lite Restore\n");
     534             :                 }
     535             : 
     536           0 :                  if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) ||
     537           0 :                      (status & GEN8_CTX_STATUS_ELEMENT_SWITCH)) {
     538           0 :                         if (execlists_check_remove_request(ring, status_id))
     539           0 :                                 submit_contexts++;
     540             :                 }
     541             :         }
     542             : 
     543           0 :         if (disable_lite_restore_wa(ring)) {
     544             :                 /* Prevent a ctx to preempt itself */
     545           0 :                 if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) &&
     546           0 :                     (submit_contexts != 0))
     547           0 :                         execlists_context_unqueue(ring);
     548           0 :         } else if (submit_contexts != 0) {
     549           0 :                 execlists_context_unqueue(ring);
     550           0 :         }
     551             : 
     552           0 :         spin_unlock(&ring->execlist_lock);
     553             : 
     554           0 :         WARN(submit_contexts > 2, "More than two context complete events?\n");
     555           0 :         ring->next_context_status_buffer = write_pointer % GEN8_CSB_ENTRIES;
     556             : 
     557           0 :         I915_WRITE(RING_CONTEXT_STATUS_PTR(ring),
     558             :                    _MASKED_FIELD(GEN8_CSB_PTR_MASK << 8,
     559             :                                  ((u32)ring->next_context_status_buffer &
     560             :                                   GEN8_CSB_PTR_MASK) << 8));
     561           0 : }
     562             : 
     563           0 : static int execlists_context_queue(struct drm_i915_gem_request *request)
     564             : {
     565           0 :         struct intel_engine_cs *ring = request->ring;
     566             :         struct drm_i915_gem_request *cursor;
     567             :         int num_elements = 0;
     568             : 
     569           0 :         if (request->ctx != ring->default_context)
     570           0 :                 intel_lr_context_pin(request);
     571             : 
     572           0 :         i915_gem_request_reference(request);
     573             : 
     574           0 :         spin_lock_irq(&ring->execlist_lock);
     575             : 
     576           0 :         list_for_each_entry(cursor, &ring->execlist_queue, execlist_link)
     577           0 :                 if (++num_elements > 2)
     578             :                         break;
     579             : 
     580           0 :         if (num_elements > 2) {
     581             :                 struct drm_i915_gem_request *tail_req;
     582             : 
     583           0 :                 tail_req = list_last_entry(&ring->execlist_queue,
     584             :                                            struct drm_i915_gem_request,
     585             :                                            execlist_link);
     586             : 
     587           0 :                 if (request->ctx == tail_req->ctx) {
     588           0 :                         WARN(tail_req->elsp_submitted != 0,
     589             :                                 "More than 2 already-submitted reqs queued\n");
     590           0 :                         list_del(&tail_req->execlist_link);
     591           0 :                         list_add_tail(&tail_req->execlist_link,
     592           0 :                                 &ring->execlist_retired_req_list);
     593           0 :                 }
     594           0 :         }
     595             : 
     596           0 :         list_add_tail(&request->execlist_link, &ring->execlist_queue);
     597           0 :         if (num_elements == 0)
     598           0 :                 execlists_context_unqueue(ring);
     599             : 
     600           0 :         spin_unlock_irq(&ring->execlist_lock);
     601             : 
     602           0 :         return 0;
     603             : }
     604             : 
     605           0 : static int logical_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
     606             : {
     607           0 :         struct intel_engine_cs *ring = req->ring;
     608             :         uint32_t flush_domains;
     609             :         int ret;
     610             : 
     611             :         flush_domains = 0;
     612           0 :         if (ring->gpu_caches_dirty)
     613             :                 flush_domains = I915_GEM_GPU_DOMAINS;
     614             : 
     615           0 :         ret = ring->emit_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
     616           0 :         if (ret)
     617           0 :                 return ret;
     618             : 
     619           0 :         ring->gpu_caches_dirty = false;
     620           0 :         return 0;
     621           0 : }
     622             : 
     623           0 : static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
     624             :                                  struct list_head *vmas)
     625             : {
     626           0 :         const unsigned other_rings = ~intel_ring_flag(req->ring);
     627             :         struct i915_vma *vma;
     628             :         uint32_t flush_domains = 0;
     629             :         bool flush_chipset = false;
     630             :         int ret;
     631             : 
     632           0 :         list_for_each_entry(vma, vmas, exec_list) {
     633           0 :                 struct drm_i915_gem_object *obj = vma->obj;
     634             : 
     635           0 :                 if (obj->active & other_rings) {
     636           0 :                         ret = i915_gem_object_sync(obj, req->ring, &req);
     637           0 :                         if (ret)
     638           0 :                                 return ret;
     639             :                 }
     640             : 
     641           0 :                 if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
     642           0 :                         flush_chipset |= i915_gem_clflush_object(obj, false);
     643             : 
     644           0 :                 flush_domains |= obj->base.write_domain;
     645           0 :         }
     646             : 
     647           0 :         if (flush_domains & I915_GEM_DOMAIN_GTT)
     648           0 :                 wmb();
     649             : 
     650             :         /* Unconditionally invalidate gpu caches and ensure that we do flush
     651             :          * any residual writes from the previous batch.
     652             :          */
     653           0 :         return logical_ring_invalidate_all_caches(req);
     654           0 : }
     655             : 
     656           0 : int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
     657             : {
     658             :         int ret;
     659             : 
     660           0 :         request->ringbuf = request->ctx->engine[request->ring->id].ringbuf;
     661             : 
     662           0 :         if (request->ctx != request->ring->default_context) {
     663           0 :                 ret = intel_lr_context_pin(request);
     664           0 :                 if (ret)
     665           0 :                         return ret;
     666             :         }
     667             : 
     668           0 :         return 0;
     669           0 : }
     670             : 
     671           0 : static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
     672             :                                        int bytes)
     673             : {
     674           0 :         struct intel_ringbuffer *ringbuf = req->ringbuf;
     675           0 :         struct intel_engine_cs *ring = req->ring;
     676             :         struct drm_i915_gem_request *target;
     677             :         unsigned space;
     678             :         int ret;
     679             : 
     680           0 :         if (intel_ring_space(ringbuf) >= bytes)
     681           0 :                 return 0;
     682             : 
     683             :         /* The whole point of reserving space is to not wait! */
     684           0 :         WARN_ON(ringbuf->reserved_in_use);
     685             : 
     686           0 :         list_for_each_entry(target, &ring->request_list, list) {
     687             :                 /*
     688             :                  * The request queue is per-engine, so can contain requests
     689             :                  * from multiple ringbuffers. Here, we must ignore any that
     690             :                  * aren't from the ringbuffer we're considering.
     691             :                  */
     692           0 :                 if (target->ringbuf != ringbuf)
     693             :                         continue;
     694             : 
     695             :                 /* Would completion of this request free enough space? */
     696           0 :                 space = __intel_ring_space(target->postfix, ringbuf->tail,
     697           0 :                                            ringbuf->size);
     698           0 :                 if (space >= bytes)
     699             :                         break;
     700             :         }
     701             : 
     702           0 :         if (WARN_ON(&target->list == &ring->request_list))
     703           0 :                 return -ENOSPC;
     704             : 
     705           0 :         ret = i915_wait_request(target);
     706           0 :         if (ret)
     707           0 :                 return ret;
     708             : 
     709           0 :         ringbuf->space = space;
     710           0 :         return 0;
     711           0 : }
     712             : 
     713             : /*
     714             :  * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload
     715             :  * @request: Request to advance the logical ringbuffer of.
     716             :  *
     717             :  * The tail is updated in our logical ringbuffer struct, not in the actual context. What
     718             :  * really happens during submission is that the context and current tail will be placed
     719             :  * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
     720             :  * point, the tail *inside* the context is updated and the ELSP written to.
     721             :  */
     722             : static void
     723           0 : intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
     724             : {
     725           0 :         struct intel_engine_cs *ring = request->ring;
     726           0 :         struct drm_i915_private *dev_priv = request->i915;
     727             : 
     728           0 :         intel_logical_ring_advance(request->ringbuf);
     729             : 
     730           0 :         request->tail = request->ringbuf->tail;
     731             : 
     732           0 :         if (intel_ring_stopped(ring))
     733           0 :                 return;
     734             : 
     735           0 :         if (dev_priv->guc.execbuf_client)
     736           0 :                 i915_guc_submit(dev_priv->guc.execbuf_client, request);
     737             :         else
     738           0 :                 execlists_context_queue(request);
     739           0 : }
     740             : 
     741           0 : static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
     742             : {
     743             :         uint32_t __iomem *virt;
     744           0 :         int rem = ringbuf->size - ringbuf->tail;
     745             : 
     746           0 :         virt = ringbuf->virtual_start + ringbuf->tail;
     747           0 :         rem /= 4;
     748           0 :         while (rem--)
     749           0 :                 iowrite32(MI_NOOP, virt++);
     750             : 
     751           0 :         ringbuf->tail = 0;
     752           0 :         intel_ring_update_space(ringbuf);
     753           0 : }
     754             : 
     755           0 : static int logical_ring_prepare(struct drm_i915_gem_request *req, int bytes)
     756             : {
     757           0 :         struct intel_ringbuffer *ringbuf = req->ringbuf;
     758           0 :         int remain_usable = ringbuf->effective_size - ringbuf->tail;
     759           0 :         int remain_actual = ringbuf->size - ringbuf->tail;
     760             :         int ret, total_bytes, wait_bytes = 0;
     761             :         bool need_wrap = false;
     762             : 
     763           0 :         if (ringbuf->reserved_in_use)
     764           0 :                 total_bytes = bytes;
     765             :         else
     766           0 :                 total_bytes = bytes + ringbuf->reserved_size;
     767             : 
     768           0 :         if (unlikely(bytes > remain_usable)) {
     769             :                 /*
     770             :                  * Not enough space for the basic request. So need to flush
     771             :                  * out the remainder and then wait for base + reserved.
     772             :                  */
     773           0 :                 wait_bytes = remain_actual + total_bytes;
     774             :                 need_wrap = true;
     775           0 :         } else {
     776           0 :                 if (unlikely(total_bytes > remain_usable)) {
     777             :                         /*
     778             :                          * The base request will fit but the reserved space
     779             :                          * falls off the end. So don't need an immediate wrap
     780             :                          * and only need to effectively wait for the reserved
     781             :                          * size space from the start of ringbuffer.
     782             :                          */
     783           0 :                         wait_bytes = remain_actual + ringbuf->reserved_size;
     784           0 :                 } else if (total_bytes > ringbuf->space) {
     785             :                         /* No wrapping required, just waiting. */
     786             :                         wait_bytes = total_bytes;
     787           0 :                 }
     788             :         }
     789             : 
     790           0 :         if (wait_bytes) {
     791           0 :                 ret = logical_ring_wait_for_space(req, wait_bytes);
     792           0 :                 if (unlikely(ret))
     793           0 :                         return ret;
     794             : 
     795           0 :                 if (need_wrap)
     796           0 :                         __wrap_ring_buffer(ringbuf);
     797             :         }
     798             : 
     799           0 :         return 0;
     800           0 : }
     801             : 
     802             : /**
     803             :  * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands
     804             :  *
     805             :  * @req: The request to start some new work for
     806             :  * @num_dwords: number of DWORDs that we plan to write to the ringbuffer.
     807             :  *
     808             :  * The ringbuffer might not be ready to accept the commands right away (maybe it needs to
     809             :  * be wrapped, or wait a bit for the tail to be updated). This function takes care of that
     810             :  * and also preallocates a request (every workload submission is still mediated through
     811             :  * requests, same as it did with legacy ringbuffer submission).
     812             :  *
     813             :  * Return: non-zero if the ringbuffer is not ready to be written to.
     814             :  */
     815           0 : int intel_logical_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
     816             : {
     817             :         struct drm_i915_private *dev_priv;
     818             :         int ret;
     819             : 
     820           0 :         WARN_ON(req == NULL);
     821           0 :         dev_priv = req->ring->dev->dev_private;
     822             : 
     823           0 :         ret = i915_gem_check_wedge(&dev_priv->gpu_error,
     824           0 :                                    dev_priv->mm.interruptible);
     825           0 :         if (ret)
     826           0 :                 return ret;
     827             : 
     828           0 :         ret = logical_ring_prepare(req, num_dwords * sizeof(uint32_t));
     829           0 :         if (ret)
     830           0 :                 return ret;
     831             : 
     832           0 :         req->ringbuf->space -= num_dwords * sizeof(uint32_t);
     833           0 :         return 0;
     834           0 : }
     835             : 
     836           0 : int intel_logical_ring_reserve_space(struct drm_i915_gem_request *request)
     837             : {
     838             :         /*
     839             :          * The first call merely notes the reserve request and is common for
     840             :          * all back ends. The subsequent localised _begin() call actually
     841             :          * ensures that the reservation is available. Without the begin, if
     842             :          * the request creator immediately submitted the request without
     843             :          * adding any commands to it then there might not actually be
     844             :          * sufficient room for the submission commands.
     845             :          */
     846           0 :         intel_ring_reserved_space_reserve(request->ringbuf, MIN_SPACE_FOR_ADD_REQUEST);
     847             : 
     848           0 :         return intel_logical_ring_begin(request, 0);
     849             : }
     850             : 
     851             : /**
     852             :  * execlists_submission() - submit a batchbuffer for execution, Execlists style
     853             :  * @dev: DRM device.
     854             :  * @file: DRM file.
     855             :  * @ring: Engine Command Streamer to submit to.
     856             :  * @ctx: Context to employ for this submission.
     857             :  * @args: execbuffer call arguments.
     858             :  * @vmas: list of vmas.
     859             :  * @batch_obj: the batchbuffer to submit.
     860             :  * @exec_start: batchbuffer start virtual address pointer.
     861             :  * @dispatch_flags: translated execbuffer call flags.
     862             :  *
     863             :  * This is the evil twin version of i915_gem_ringbuffer_submission. It abstracts
     864             :  * away the submission details of the execbuffer ioctl call.
     865             :  *
     866             :  * Return: non-zero if the submission fails.
     867             :  */
     868           0 : int intel_execlists_submission(struct i915_execbuffer_params *params,
     869             :                                struct drm_i915_gem_execbuffer2 *args,
     870             :                                struct list_head *vmas)
     871             : {
     872           0 :         struct drm_device       *dev = params->dev;
     873           0 :         struct intel_engine_cs  *ring = params->ring;
     874           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
     875           0 :         struct intel_ringbuffer *ringbuf = params->ctx->engine[ring->id].ringbuf;
     876             :         u64 exec_start;
     877             :         int instp_mode;
     878             :         u32 instp_mask;
     879             :         int ret;
     880             : 
     881           0 :         instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
     882             :         instp_mask = I915_EXEC_CONSTANTS_MASK;
     883           0 :         switch (instp_mode) {
     884             :         case I915_EXEC_CONSTANTS_REL_GENERAL:
     885             :         case I915_EXEC_CONSTANTS_ABSOLUTE:
     886             :         case I915_EXEC_CONSTANTS_REL_SURFACE:
     887           0 :                 if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) {
     888             :                         DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
     889           0 :                         return -EINVAL;
     890             :                 }
     891             : 
     892           0 :                 if (instp_mode != dev_priv->relative_constants_mode) {
     893           0 :                         if (instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
     894             :                                 DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
     895           0 :                                 return -EINVAL;
     896             :                         }
     897             : 
     898             :                         /* The HW changed the meaning on this bit on gen6 */
     899             :                         instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
     900           0 :                 }
     901             :                 break;
     902             :         default:
     903             :                 DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
     904           0 :                 return -EINVAL;
     905             :         }
     906             : 
     907           0 :         if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
     908             :                 DRM_DEBUG("sol reset is gen7 only\n");
     909           0 :                 return -EINVAL;
     910             :         }
     911             : 
     912           0 :         ret = execlists_move_to_gpu(params->request, vmas);
     913           0 :         if (ret)
     914           0 :                 return ret;
     915             : 
     916           0 :         if (ring == &dev_priv->ring[RCS] &&
     917           0 :             instp_mode != dev_priv->relative_constants_mode) {
     918           0 :                 ret = intel_logical_ring_begin(params->request, 4);
     919           0 :                 if (ret)
     920           0 :                         return ret;
     921             : 
     922           0 :                 intel_logical_ring_emit(ringbuf, MI_NOOP);
     923           0 :                 intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
     924           0 :                 intel_logical_ring_emit(ringbuf, INSTPM);
     925           0 :                 intel_logical_ring_emit(ringbuf, instp_mask << 16 | instp_mode);
     926           0 :                 intel_logical_ring_advance(ringbuf);
     927             : 
     928           0 :                 dev_priv->relative_constants_mode = instp_mode;
     929           0 :         }
     930             : 
     931           0 :         exec_start = params->batch_obj_vm_offset +
     932           0 :                      args->batch_start_offset;
     933             : 
     934           0 :         ret = ring->emit_bb_start(params->request, exec_start, params->dispatch_flags);
     935           0 :         if (ret)
     936           0 :                 return ret;
     937             : 
     938           0 :         trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
     939             : 
     940           0 :         i915_gem_execbuffer_move_to_active(vmas, params->request);
     941           0 :         i915_gem_execbuffer_retire_commands(params);
     942             : 
     943           0 :         return 0;
     944           0 : }
     945             : 
     946           0 : void intel_execlists_retire_requests(struct intel_engine_cs *ring)
     947             : {
     948             :         struct drm_i915_gem_request *req, *tmp;
     949           0 :         struct list_head retired_list;
     950             : 
     951           0 :         WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
     952           0 :         if (list_empty(&ring->execlist_retired_req_list))
     953           0 :                 return;
     954             : 
     955           0 :         INIT_LIST_HEAD(&retired_list);
     956           0 :         spin_lock_irq(&ring->execlist_lock);
     957           0 :         list_replace_init(&ring->execlist_retired_req_list, &retired_list);
     958           0 :         spin_unlock_irq(&ring->execlist_lock);
     959             : 
     960           0 :         list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
     961           0 :                 struct intel_context *ctx = req->ctx;
     962             :                 struct drm_i915_gem_object *ctx_obj =
     963           0 :                                 ctx->engine[ring->id].state;
     964             : 
     965           0 :                 if (ctx_obj && (ctx != ring->default_context))
     966           0 :                         intel_lr_context_unpin(req);
     967           0 :                 list_del(&req->execlist_link);
     968           0 :                 i915_gem_request_unreference(req);
     969             :         }
     970           0 : }
     971             : 
     972           0 : void intel_logical_ring_stop(struct intel_engine_cs *ring)
     973             : {
     974           0 :         struct drm_i915_private *dev_priv = ring->dev->dev_private;
     975             :         int ret;
     976             : 
     977           0 :         if (!intel_ring_initialized(ring))
     978           0 :                 return;
     979             : 
     980           0 :         ret = intel_ring_idle(ring);
     981           0 :         if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error))
     982           0 :                 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
     983             :                           ring->name, ret);
     984             : 
     985             :         /* TODO: Is this correct with Execlists enabled? */
     986           0 :         I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
     987           0 :         if (wait_for_atomic((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
     988           0 :                 DRM_ERROR("%s :timed out trying to stop ring\n", ring->name);
     989           0 :                 return;
     990             :         }
     991           0 :         I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
     992           0 : }
     993             : 
     994           0 : int logical_ring_flush_all_caches(struct drm_i915_gem_request *req)
     995             : {
     996           0 :         struct intel_engine_cs *ring = req->ring;
     997             :         int ret;
     998             : 
     999           0 :         if (!ring->gpu_caches_dirty)
    1000           0 :                 return 0;
    1001             : 
    1002           0 :         ret = ring->emit_flush(req, 0, I915_GEM_GPU_DOMAINS);
    1003           0 :         if (ret)
    1004           0 :                 return ret;
    1005             : 
    1006           0 :         ring->gpu_caches_dirty = false;
    1007           0 :         return 0;
    1008           0 : }
    1009             : 
    1010           0 : static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
    1011             :                 struct drm_i915_gem_object *ctx_obj,
    1012             :                 struct intel_ringbuffer *ringbuf)
    1013             : {
    1014           0 :         struct drm_device *dev = ring->dev;
    1015           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
    1016             :         int ret = 0;
    1017             : 
    1018           0 :         WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
    1019           0 :         ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN,
    1020             :                         PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
    1021           0 :         if (ret)
    1022           0 :                 return ret;
    1023             : 
    1024           0 :         ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
    1025           0 :         if (ret)
    1026             :                 goto unpin_ctx_obj;
    1027             : 
    1028           0 :         ctx_obj->dirty = true;
    1029             : 
    1030             :         /* Invalidate GuC TLB. */
    1031           0 :         if (i915.enable_guc_submission)
    1032           0 :                 I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
    1033             : 
    1034           0 :         return ret;
    1035             : 
    1036             : unpin_ctx_obj:
    1037           0 :         i915_gem_object_ggtt_unpin(ctx_obj);
    1038             : 
    1039           0 :         return ret;
    1040           0 : }
    1041             : 
    1042           0 : static int intel_lr_context_pin(struct drm_i915_gem_request *rq)
    1043             : {
    1044             :         int ret = 0;
    1045           0 :         struct intel_engine_cs *ring = rq->ring;
    1046           0 :         struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
    1047           0 :         struct intel_ringbuffer *ringbuf = rq->ringbuf;
    1048             : 
    1049           0 :         if (rq->ctx->engine[ring->id].pin_count++ == 0) {
    1050           0 :                 ret = intel_lr_context_do_pin(ring, ctx_obj, ringbuf);
    1051           0 :                 if (ret)
    1052             :                         goto reset_pin_count;
    1053             :         }
    1054           0 :         return ret;
    1055             : 
    1056             : reset_pin_count:
    1057           0 :         rq->ctx->engine[ring->id].pin_count = 0;
    1058           0 :         return ret;
    1059           0 : }
    1060             : 
    1061           0 : void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
    1062             : {
    1063           0 :         struct intel_engine_cs *ring = rq->ring;
    1064           0 :         struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
    1065           0 :         struct intel_ringbuffer *ringbuf = rq->ringbuf;
    1066             : 
    1067           0 :         if (ctx_obj) {
    1068           0 :                 WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
    1069           0 :                 if (--rq->ctx->engine[ring->id].pin_count == 0) {
    1070           0 :                         intel_unpin_ringbuffer_obj(ringbuf);
    1071           0 :                         i915_gem_object_ggtt_unpin(ctx_obj);
    1072           0 :                 }
    1073             :         }
    1074           0 : }
    1075             : 
    1076           0 : static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
    1077             : {
    1078             :         int ret, i;
    1079           0 :         struct intel_engine_cs *ring = req->ring;
    1080           0 :         struct intel_ringbuffer *ringbuf = req->ringbuf;
    1081           0 :         struct drm_device *dev = ring->dev;
    1082           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
    1083           0 :         struct i915_workarounds *w = &dev_priv->workarounds;
    1084             : 
    1085           0 :         if (WARN_ON_ONCE(w->count == 0))
    1086           0 :                 return 0;
    1087             : 
    1088           0 :         ring->gpu_caches_dirty = true;
    1089           0 :         ret = logical_ring_flush_all_caches(req);
    1090           0 :         if (ret)
    1091           0 :                 return ret;
    1092             : 
    1093           0 :         ret = intel_logical_ring_begin(req, w->count * 2 + 2);
    1094           0 :         if (ret)
    1095           0 :                 return ret;
    1096             : 
    1097           0 :         intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(w->count));
    1098           0 :         for (i = 0; i < w->count; i++) {
    1099           0 :                 intel_logical_ring_emit(ringbuf, w->reg[i].addr);
    1100           0 :                 intel_logical_ring_emit(ringbuf, w->reg[i].value);
    1101             :         }
    1102           0 :         intel_logical_ring_emit(ringbuf, MI_NOOP);
    1103             : 
    1104           0 :         intel_logical_ring_advance(ringbuf);
    1105             : 
    1106           0 :         ring->gpu_caches_dirty = true;
    1107           0 :         ret = logical_ring_flush_all_caches(req);
    1108           0 :         if (ret)
    1109           0 :                 return ret;
    1110             : 
    1111           0 :         return 0;
    1112           0 : }
    1113             : 
    1114             : #define wa_ctx_emit(batch, index, cmd)                                  \
    1115             :         do {                                                            \
    1116             :                 int __index = (index)++;                                \
    1117             :                 if (WARN_ON(__index >= (PAGE_SIZE / sizeof(uint32_t)))) { \
    1118             :                         return -ENOSPC;                                 \
    1119             :                 }                                                       \
    1120             :                 batch[__index] = (cmd);                                 \
    1121             :         } while (0)
    1122             : 
    1123             : 
    1124             : /*
    1125             :  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
    1126             :  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
    1127             :  * but there is a slight complication as this is applied in WA batch where the
    1128             :  * values are only initialized once so we cannot take register value at the
    1129             :  * beginning and reuse it further; hence we save its value to memory, upload a
    1130             :  * constant value with bit21 set and then we restore it back with the saved value.
    1131             :  * To simplify the WA, a constant value is formed by using the default value
    1132             :  * of this register. This shouldn't be a problem because we are only modifying
    1133             :  * it for a short period and this batch in non-premptible. We can ofcourse
    1134             :  * use additional instructions that read the actual value of the register
    1135             :  * at that time and set our bit of interest but it makes the WA complicated.
    1136             :  *
    1137             :  * This WA is also required for Gen9 so extracting as a function avoids
    1138             :  * code duplication.
    1139             :  */
    1140           0 : static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *ring,
    1141             :                                                 uint32_t *const batch,
    1142             :                                                 uint32_t index)
    1143             : {
    1144             :         uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES);
    1145             : 
    1146             :         /*
    1147             :          * WaDisableLSQCROPERFforOCL:skl,kbl
    1148             :          * This WA is implemented in skl_init_clock_gating() but since
    1149             :          * this batch updates GEN8_L3SQCREG4 with default value we need to
    1150             :          * set this bit here to retain the WA during flush.
    1151             :          */
    1152           0 :         if (IS_SKL_REVID(ring->dev, 0, SKL_REVID_E0) ||
    1153           0 :             IS_KBL_REVID(ring->dev, 0, KBL_REVID_E0))
    1154           0 :                 l3sqc4_flush |= GEN8_LQSC_RO_PERF_DIS;
    1155             : 
    1156           0 :         wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 |
    1157             :                                    MI_SRM_LRM_GLOBAL_GTT));
    1158           0 :         wa_ctx_emit(batch, index, GEN8_L3SQCREG4);
    1159           0 :         wa_ctx_emit(batch, index, ring->scratch.gtt_offset + 256);
    1160           0 :         wa_ctx_emit(batch, index, 0);
    1161             : 
    1162           0 :         wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
    1163           0 :         wa_ctx_emit(batch, index, GEN8_L3SQCREG4);
    1164           0 :         wa_ctx_emit(batch, index, l3sqc4_flush);
    1165             : 
    1166           0 :         wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
    1167           0 :         wa_ctx_emit(batch, index, (PIPE_CONTROL_CS_STALL |
    1168             :                                    PIPE_CONTROL_DC_FLUSH_ENABLE));
    1169           0 :         wa_ctx_emit(batch, index, 0);
    1170           0 :         wa_ctx_emit(batch, index, 0);
    1171           0 :         wa_ctx_emit(batch, index, 0);
    1172           0 :         wa_ctx_emit(batch, index, 0);
    1173             : 
    1174           0 :         wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 |
    1175             :                                    MI_SRM_LRM_GLOBAL_GTT));
    1176           0 :         wa_ctx_emit(batch, index, GEN8_L3SQCREG4);
    1177           0 :         wa_ctx_emit(batch, index, ring->scratch.gtt_offset + 256);
    1178           0 :         wa_ctx_emit(batch, index, 0);
    1179             : 
    1180           0 :         return index;
    1181           0 : }
    1182             : 
    1183           0 : static inline uint32_t wa_ctx_start(struct i915_wa_ctx_bb *wa_ctx,
    1184             :                                     uint32_t offset,
    1185             :                                     uint32_t start_alignment)
    1186             : {
    1187           0 :         return wa_ctx->offset = roundup2(offset, start_alignment);
    1188             : }
    1189             : 
    1190           0 : static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx,
    1191             :                              uint32_t offset,
    1192             :                              uint32_t size_alignment)
    1193             : {
    1194           0 :         wa_ctx->size = offset - wa_ctx->offset;
    1195             : 
    1196           0 :         WARN(wa_ctx->size % size_alignment,
    1197             :              "wa_ctx_bb failed sanity checks: size %d is not aligned to %d\n",
    1198             :              wa_ctx->size, size_alignment);
    1199           0 :         return 0;
    1200             : }
    1201             : 
    1202             : /**
    1203             :  * gen8_init_indirectctx_bb() - initialize indirect ctx batch with WA
    1204             :  *
    1205             :  * @ring: only applicable for RCS
    1206             :  * @wa_ctx: structure representing wa_ctx
    1207             :  *  offset: specifies start of the batch, should be cache-aligned. This is updated
    1208             :  *    with the offset value received as input.
    1209             :  *  size: size of the batch in DWORDS but HW expects in terms of cachelines
    1210             :  * @batch: page in which WA are loaded
    1211             :  * @offset: This field specifies the start of the batch, it should be
    1212             :  *  cache-aligned otherwise it is adjusted accordingly.
    1213             :  *  Typically we only have one indirect_ctx and per_ctx batch buffer which are
    1214             :  *  initialized at the beginning and shared across all contexts but this field
    1215             :  *  helps us to have multiple batches at different offsets and select them based
    1216             :  *  on a criteria. At the moment this batch always start at the beginning of the page
    1217             :  *  and at this point we don't have multiple wa_ctx batch buffers.
    1218             :  *
    1219             :  *  The number of WA applied are not known at the beginning; we use this field
    1220             :  *  to return the no of DWORDS written.
    1221             :  *
    1222             :  *  It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
    1223             :  *  so it adds NOOPs as padding to make it cacheline aligned.
    1224             :  *  MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
    1225             :  *  makes a complete batch buffer.
    1226             :  *
    1227             :  * Return: non-zero if we exceed the PAGE_SIZE limit.
    1228             :  */
    1229             : 
    1230           0 : static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring,
    1231             :                                     struct i915_wa_ctx_bb *wa_ctx,
    1232             :                                     uint32_t *const batch,
    1233             :                                     uint32_t *offset)
    1234             : {
    1235             :         uint32_t scratch_addr;
    1236           0 :         uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
    1237             : 
    1238             :         /* WaDisableCtxRestoreArbitration:bdw,chv */
    1239           0 :         wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE);
    1240             : 
    1241             :         /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
    1242           0 :         if (IS_BROADWELL(ring->dev)) {
    1243           0 :                 int rc = gen8_emit_flush_coherentl3_wa(ring, batch, index);
    1244           0 :                 if (rc < 0)
    1245           0 :                         return rc;
    1246             :                 index = rc;
    1247           0 :         }
    1248             : 
    1249             :         /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
    1250             :         /* Actual scratch location is at 128 bytes offset */
    1251           0 :         scratch_addr = ring->scratch.gtt_offset + 2*CACHELINE_BYTES;
    1252             : 
    1253           0 :         wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
    1254           0 :         wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 |
    1255             :                                    PIPE_CONTROL_GLOBAL_GTT_IVB |
    1256             :                                    PIPE_CONTROL_CS_STALL |
    1257             :                                    PIPE_CONTROL_QW_WRITE));
    1258           0 :         wa_ctx_emit(batch, index, scratch_addr);
    1259           0 :         wa_ctx_emit(batch, index, 0);
    1260           0 :         wa_ctx_emit(batch, index, 0);
    1261           0 :         wa_ctx_emit(batch, index, 0);
    1262             : 
    1263             :         /* Pad to end of cacheline */
    1264           0 :         while (index % CACHELINE_DWORDS)
    1265           0 :                 wa_ctx_emit(batch, index, MI_NOOP);
    1266             : 
    1267             :         /*
    1268             :          * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
    1269             :          * execution depends on the length specified in terms of cache lines
    1270             :          * in the register CTX_RCS_INDIRECT_CTX
    1271             :          */
    1272             : 
    1273           0 :         return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS);
    1274           0 : }
    1275             : 
    1276             : /**
    1277             :  * gen8_init_perctx_bb() - initialize per ctx batch with WA
    1278             :  *
    1279             :  * @ring: only applicable for RCS
    1280             :  * @wa_ctx: structure representing wa_ctx
    1281             :  *  offset: specifies start of the batch, should be cache-aligned.
    1282             :  *  size: size of the batch in DWORDS but HW expects in terms of cachelines
    1283             :  * @batch: page in which WA are loaded
    1284             :  * @offset: This field specifies the start of this batch.
    1285             :  *   This batch is started immediately after indirect_ctx batch. Since we ensure
    1286             :  *   that indirect_ctx ends on a cacheline this batch is aligned automatically.
    1287             :  *
    1288             :  *   The number of DWORDS written are returned using this field.
    1289             :  *
    1290             :  *  This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding
    1291             :  *  to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant.
    1292             :  */
    1293           0 : static int gen8_init_perctx_bb(struct intel_engine_cs *ring,
    1294             :                                struct i915_wa_ctx_bb *wa_ctx,
    1295             :                                uint32_t *const batch,
    1296             :                                uint32_t *offset)
    1297             : {
    1298           0 :         uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
    1299             : 
    1300             :         /* WaDisableCtxRestoreArbitration:bdw,chv */
    1301           0 :         wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE);
    1302             : 
    1303           0 :         wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
    1304             : 
    1305           0 :         return wa_ctx_end(wa_ctx, *offset = index, 1);
    1306           0 : }
    1307             : 
    1308           0 : static int gen9_init_indirectctx_bb(struct intel_engine_cs *ring,
    1309             :                                     struct i915_wa_ctx_bb *wa_ctx,
    1310             :                                     uint32_t *const batch,
    1311             :                                     uint32_t *offset)
    1312             : {
    1313             :         int ret;
    1314           0 :         struct drm_device *dev = ring->dev;
    1315           0 :         uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
    1316             : 
    1317             :         /* WaDisableCtxRestoreArbitration:skl,bxt */
    1318           0 :         if (IS_SKL_REVID(dev, 0, SKL_REVID_D0) ||
    1319           0 :             IS_BXT_REVID(dev, 0, BXT_REVID_A0))
    1320           0 :                 wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE);
    1321             : 
    1322             :         /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt */
    1323           0 :         ret = gen8_emit_flush_coherentl3_wa(ring, batch, index);
    1324           0 :         if (ret < 0)
    1325           0 :                 return ret;
    1326             :         index = ret;
    1327             : 
    1328             :         /* Pad to end of cacheline */
    1329           0 :         while (index % CACHELINE_DWORDS)
    1330           0 :                 wa_ctx_emit(batch, index, MI_NOOP);
    1331             : 
    1332           0 :         return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS);
    1333           0 : }
    1334             : 
    1335           0 : static int gen9_init_perctx_bb(struct intel_engine_cs *ring,
    1336             :                                struct i915_wa_ctx_bb *wa_ctx,
    1337             :                                uint32_t *const batch,
    1338             :                                uint32_t *offset)
    1339             : {
    1340           0 :         struct drm_device *dev = ring->dev;
    1341           0 :         uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
    1342             : 
    1343             :         /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
    1344           0 :         if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
    1345           0 :             IS_BXT_REVID(dev, 0, BXT_REVID_A0)) {
    1346           0 :                 wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
    1347           0 :                 wa_ctx_emit(batch, index, GEN9_SLICE_COMMON_ECO_CHICKEN0);
    1348           0 :                 wa_ctx_emit(batch, index,
    1349             :                             _MASKED_BIT_ENABLE(DISABLE_PIXEL_MASK_CAMMING));
    1350           0 :                 wa_ctx_emit(batch, index, MI_NOOP);
    1351             :         }
    1352             : 
    1353             :         /* WaDisableCtxRestoreArbitration:skl,bxt */
    1354           0 :         if (IS_SKL_REVID(dev, 0, SKL_REVID_D0) ||
    1355           0 :             IS_BXT_REVID(dev, 0, BXT_REVID_A0))
    1356           0 :                 wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE);
    1357             : 
    1358           0 :         wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
    1359             : 
    1360           0 :         return wa_ctx_end(wa_ctx, *offset = index, 1);
    1361           0 : }
    1362             : 
    1363           0 : static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *ring, u32 size)
    1364             : {
    1365             :         int ret;
    1366             : 
    1367           0 :         ring->wa_ctx.obj = i915_gem_alloc_object(ring->dev, PAGE_ALIGN(size));
    1368           0 :         if (!ring->wa_ctx.obj) {
    1369             :                 DRM_DEBUG_DRIVER("alloc LRC WA ctx backing obj failed.\n");
    1370           0 :                 return -ENOMEM;
    1371             :         }
    1372             : 
    1373           0 :         ret = i915_gem_obj_ggtt_pin(ring->wa_ctx.obj, PAGE_SIZE, 0);
    1374           0 :         if (ret) {
    1375             :                 DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n",
    1376             :                                  ret);
    1377           0 :                 drm_gem_object_unreference(&ring->wa_ctx.obj->base);
    1378           0 :                 return ret;
    1379             :         }
    1380             : 
    1381           0 :         return 0;
    1382           0 : }
    1383             : 
    1384           0 : static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *ring)
    1385             : {
    1386           0 :         if (ring->wa_ctx.obj) {
    1387           0 :                 i915_gem_object_ggtt_unpin(ring->wa_ctx.obj);
    1388           0 :                 drm_gem_object_unreference(&ring->wa_ctx.obj->base);
    1389           0 :                 ring->wa_ctx.obj = NULL;
    1390           0 :         }
    1391           0 : }
    1392             : 
    1393           0 : static int intel_init_workaround_bb(struct intel_engine_cs *ring)
    1394             : {
    1395             :         int ret;
    1396             :         uint32_t *batch;
    1397           0 :         uint32_t offset;
    1398             :         struct vm_page *page;
    1399           0 :         struct i915_ctx_workarounds *wa_ctx = &ring->wa_ctx;
    1400             : 
    1401           0 :         WARN_ON(ring->id != RCS);
    1402             : 
    1403             :         /* update this when WA for higher Gen are added */
    1404           0 :         if (INTEL_INFO(ring->dev)->gen > 9) {
    1405           0 :                 DRM_ERROR("WA batch buffer is not initialized for Gen%d\n",
    1406             :                           INTEL_INFO(ring->dev)->gen);
    1407           0 :                 return 0;
    1408             :         }
    1409             : 
    1410             :         /* some WA perform writes to scratch page, ensure it is valid */
    1411           0 :         if (ring->scratch.obj == NULL) {
    1412           0 :                 DRM_ERROR("scratch page not allocated for %s\n", ring->name);
    1413           0 :                 return -EINVAL;
    1414             :         }
    1415             : 
    1416           0 :         ret = lrc_setup_wa_ctx_obj(ring, PAGE_SIZE);
    1417           0 :         if (ret) {
    1418             :                 DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
    1419           0 :                 return ret;
    1420             :         }
    1421             : 
    1422           0 :         page = i915_gem_object_get_page(wa_ctx->obj, 0);
    1423           0 :         batch = kmap_atomic(page);
    1424           0 :         offset = 0;
    1425             : 
    1426           0 :         if (INTEL_INFO(ring->dev)->gen == 8) {
    1427           0 :                 ret = gen8_init_indirectctx_bb(ring,
    1428           0 :                                                &wa_ctx->indirect_ctx,
    1429             :                                                batch,
    1430             :                                                &offset);
    1431           0 :                 if (ret)
    1432             :                         goto out;
    1433             : 
    1434           0 :                 ret = gen8_init_perctx_bb(ring,
    1435           0 :                                           &wa_ctx->per_ctx,
    1436             :                                           batch,
    1437             :                                           &offset);
    1438           0 :                 if (ret)
    1439             :                         goto out;
    1440           0 :         } else if (INTEL_INFO(ring->dev)->gen == 9) {
    1441           0 :                 ret = gen9_init_indirectctx_bb(ring,
    1442           0 :                                                &wa_ctx->indirect_ctx,
    1443             :                                                batch,
    1444             :                                                &offset);
    1445           0 :                 if (ret)
    1446             :                         goto out;
    1447             : 
    1448           0 :                 ret = gen9_init_perctx_bb(ring,
    1449           0 :                                           &wa_ctx->per_ctx,
    1450             :                                           batch,
    1451             :                                           &offset);
    1452           0 :                 if (ret)
    1453             :                         goto out;
    1454             :         }
    1455             : 
    1456             : out:
    1457           0 :         kunmap_atomic(batch);
    1458           0 :         if (ret)
    1459           0 :                 lrc_destroy_wa_ctx_obj(ring);
    1460             : 
    1461           0 :         return ret;
    1462           0 : }
    1463             : 
    1464           0 : static int gen8_init_common_ring(struct intel_engine_cs *ring)
    1465             : {
    1466           0 :         struct drm_device *dev = ring->dev;
    1467           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
    1468             :         u8 next_context_status_buffer_hw;
    1469             : 
    1470           0 :         lrc_setup_hardware_status_page(ring,
    1471           0 :                                 ring->default_context->engine[ring->id].state);
    1472             : 
    1473           0 :         I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
    1474           0 :         I915_WRITE(RING_HWSTAM(ring->mmio_base), 0xffffffff);
    1475             : 
    1476           0 :         if (ring->status_page.obj) {
    1477           0 :                 I915_WRITE(RING_HWS_PGA(ring->mmio_base),
    1478             :                            (u32)ring->status_page.gfx_addr);
    1479           0 :                 POSTING_READ(RING_HWS_PGA(ring->mmio_base));
    1480           0 :         }
    1481             : 
    1482           0 :         I915_WRITE(RING_MODE_GEN7(ring),
    1483             :                    _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
    1484             :                    _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
    1485           0 :         POSTING_READ(RING_MODE_GEN7(ring));
    1486             : 
    1487             :         /*
    1488             :          * Instead of resetting the Context Status Buffer (CSB) read pointer to
    1489             :          * zero, we need to read the write pointer from hardware and use its
    1490             :          * value because "this register is power context save restored".
    1491             :          * Effectively, these states have been observed:
    1492             :          *
    1493             :          *      | Suspend-to-idle (freeze) | Suspend-to-RAM (mem) |
    1494             :          * BDW  | CSB regs not reset       | CSB regs reset       |
    1495             :          * CHT  | CSB regs not reset       | CSB regs not reset   |
    1496             :          */
    1497           0 :         next_context_status_buffer_hw = (I915_READ(RING_CONTEXT_STATUS_PTR(ring))
    1498           0 :                                                    & GEN8_CSB_PTR_MASK);
    1499             : 
    1500             :         /*
    1501             :          * When the CSB registers are reset (also after power-up / gpu reset),
    1502             :          * CSB write pointer is set to all 1's, which is not valid, use '5' in
    1503             :          * this special case, so the first element read is CSB[0].
    1504             :          */
    1505           0 :         if (next_context_status_buffer_hw == GEN8_CSB_PTR_MASK)
    1506             :                 next_context_status_buffer_hw = (GEN8_CSB_ENTRIES - 1);
    1507             : 
    1508           0 :         ring->next_context_status_buffer = next_context_status_buffer_hw;
    1509             :         DRM_DEBUG_DRIVER("Execlists enabled for %s\n", ring->name);
    1510             : 
    1511           0 :         memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
    1512             : 
    1513           0 :         return 0;
    1514             : }
    1515             : 
    1516           0 : static int gen8_init_render_ring(struct intel_engine_cs *ring)
    1517             : {
    1518           0 :         struct drm_device *dev = ring->dev;
    1519           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
    1520             :         int ret;
    1521             : 
    1522           0 :         ret = gen8_init_common_ring(ring);
    1523           0 :         if (ret)
    1524           0 :                 return ret;
    1525             : 
    1526             :         /* We need to disable the AsyncFlip performance optimisations in order
    1527             :          * to use MI_WAIT_FOR_EVENT within the CS. It should already be
    1528             :          * programmed to '1' on all products.
    1529             :          *
    1530             :          * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
    1531             :          */
    1532           0 :         I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
    1533             : 
    1534           0 :         I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
    1535             : 
    1536           0 :         return init_workarounds_ring(ring);
    1537           0 : }
    1538             : 
    1539           0 : static int gen9_init_render_ring(struct intel_engine_cs *ring)
    1540             : {
    1541             :         int ret;
    1542             : 
    1543           0 :         ret = gen8_init_common_ring(ring);
    1544           0 :         if (ret)
    1545           0 :                 return ret;
    1546             : 
    1547           0 :         return init_workarounds_ring(ring);
    1548           0 : }
    1549             : 
    1550           0 : static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
    1551             : {
    1552           0 :         struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt;
    1553           0 :         struct intel_engine_cs *ring = req->ring;
    1554           0 :         struct intel_ringbuffer *ringbuf = req->ringbuf;
    1555             :         const int num_lri_cmds = GEN8_LEGACY_PDPES * 2;
    1556             :         int i, ret;
    1557             : 
    1558           0 :         ret = intel_logical_ring_begin(req, num_lri_cmds * 2 + 2);
    1559           0 :         if (ret)
    1560           0 :                 return ret;
    1561             : 
    1562           0 :         intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(num_lri_cmds));
    1563           0 :         for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
    1564           0 :                 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
    1565             : 
    1566           0 :                 intel_logical_ring_emit(ringbuf, GEN8_RING_PDP_UDW(ring, i));
    1567           0 :                 intel_logical_ring_emit(ringbuf, upper_32_bits(pd_daddr));
    1568           0 :                 intel_logical_ring_emit(ringbuf, GEN8_RING_PDP_LDW(ring, i));
    1569           0 :                 intel_logical_ring_emit(ringbuf, lower_32_bits(pd_daddr));
    1570             :         }
    1571             : 
    1572           0 :         intel_logical_ring_emit(ringbuf, MI_NOOP);
    1573           0 :         intel_logical_ring_advance(ringbuf);
    1574             : 
    1575           0 :         return 0;
    1576           0 : }
    1577             : 
    1578           0 : static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
    1579             :                               u64 offset, unsigned dispatch_flags)
    1580             : {
    1581           0 :         struct intel_ringbuffer *ringbuf = req->ringbuf;
    1582           0 :         bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE);
    1583             :         int ret;
    1584             : 
    1585             :         /* Don't rely in hw updating PDPs, specially in lite-restore.
    1586             :          * Ideally, we should set Force PD Restore in ctx descriptor,
    1587             :          * but we can't. Force Restore would be a second option, but
    1588             :          * it is unsafe in case of lite-restore (because the ctx is
    1589             :          * not idle). PML4 is allocated during ppgtt init so this is
    1590             :          * not needed in 48-bit.*/
    1591           0 :         if (req->ctx->ppgtt &&
    1592           0 :             (intel_ring_flag(req->ring) & req->ctx->ppgtt->pd_dirty_rings)) {
    1593           0 :                 if (!USES_FULL_48BIT_PPGTT(req->i915) &&
    1594           0 :                     !intel_vgpu_active(req->i915->dev)) {
    1595           0 :                         ret = intel_logical_ring_emit_pdps(req);
    1596           0 :                         if (ret)
    1597           0 :                                 return ret;
    1598             :                 }
    1599             : 
    1600           0 :                 req->ctx->ppgtt->pd_dirty_rings &= ~intel_ring_flag(req->ring);
    1601           0 :         }
    1602             : 
    1603           0 :         ret = intel_logical_ring_begin(req, 4);
    1604           0 :         if (ret)
    1605           0 :                 return ret;
    1606             : 
    1607             :         /* FIXME(BDW): Address space and security selectors. */
    1608           0 :         intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 |
    1609           0 :                                 (ppgtt<<8) |
    1610           0 :                                 (dispatch_flags & I915_DISPATCH_RS ?
    1611             :                                  MI_BATCH_RESOURCE_STREAMER : 0));
    1612           0 :         intel_logical_ring_emit(ringbuf, lower_32_bits(offset));
    1613           0 :         intel_logical_ring_emit(ringbuf, upper_32_bits(offset));
    1614           0 :         intel_logical_ring_emit(ringbuf, MI_NOOP);
    1615           0 :         intel_logical_ring_advance(ringbuf);
    1616             : 
    1617           0 :         return 0;
    1618           0 : }
    1619             : 
    1620           0 : static bool gen8_logical_ring_get_irq(struct intel_engine_cs *ring)
    1621             : {
    1622           0 :         struct drm_device *dev = ring->dev;
    1623           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
    1624             :         unsigned long flags;
    1625             : 
    1626           0 :         if (WARN_ON(!intel_irqs_enabled(dev_priv)))
    1627           0 :                 return false;
    1628             : 
    1629           0 :         spin_lock_irqsave(&dev_priv->irq_lock, flags);
    1630           0 :         if (ring->irq_refcount++ == 0) {
    1631           0 :                 I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
    1632           0 :                 POSTING_READ(RING_IMR(ring->mmio_base));
    1633           0 :         }
    1634           0 :         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
    1635             : 
    1636           0 :         return true;
    1637           0 : }
    1638             : 
    1639           0 : static void gen8_logical_ring_put_irq(struct intel_engine_cs *ring)
    1640             : {
    1641           0 :         struct drm_device *dev = ring->dev;
    1642           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
    1643             :         unsigned long flags;
    1644             : 
    1645           0 :         spin_lock_irqsave(&dev_priv->irq_lock, flags);
    1646           0 :         if (--ring->irq_refcount == 0) {
    1647           0 :                 I915_WRITE_IMR(ring, ~ring->irq_keep_mask);
    1648           0 :                 POSTING_READ(RING_IMR(ring->mmio_base));
    1649           0 :         }
    1650           0 :         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
    1651           0 : }
    1652             : 
    1653           0 : static int gen8_emit_flush(struct drm_i915_gem_request *request,
    1654             :                            u32 invalidate_domains,
    1655             :                            u32 unused)
    1656             : {
    1657           0 :         struct intel_ringbuffer *ringbuf = request->ringbuf;
    1658           0 :         struct intel_engine_cs *ring = ringbuf->ring;
    1659           0 :         struct drm_device *dev = ring->dev;
    1660           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
    1661             :         uint32_t cmd;
    1662             :         int ret;
    1663             : 
    1664           0 :         ret = intel_logical_ring_begin(request, 4);
    1665           0 :         if (ret)
    1666           0 :                 return ret;
    1667             : 
    1668             :         cmd = MI_FLUSH_DW + 1;
    1669             : 
    1670             :         /* We always require a command barrier so that subsequent
    1671             :          * commands, such as breadcrumb interrupts, are strictly ordered
    1672             :          * wrt the contents of the write cache being flushed to memory
    1673             :          * (and thus being coherent from the CPU).
    1674             :          */
    1675             :         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
    1676             : 
    1677           0 :         if (invalidate_domains & I915_GEM_GPU_DOMAINS) {
    1678             :                 cmd |= MI_INVALIDATE_TLB;
    1679           0 :                 if (ring == &dev_priv->ring[VCS])
    1680           0 :                         cmd |= MI_INVALIDATE_BSD;
    1681             :         }
    1682             : 
    1683           0 :         intel_logical_ring_emit(ringbuf, cmd);
    1684           0 :         intel_logical_ring_emit(ringbuf,
    1685             :                                 I915_GEM_HWS_SCRATCH_ADDR |
    1686             :                                 MI_FLUSH_DW_USE_GTT);
    1687           0 :         intel_logical_ring_emit(ringbuf, 0); /* upper addr */
    1688           0 :         intel_logical_ring_emit(ringbuf, 0); /* value */
    1689           0 :         intel_logical_ring_advance(ringbuf);
    1690             : 
    1691           0 :         return 0;
    1692           0 : }
    1693             : 
    1694           0 : static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
    1695             :                                   u32 invalidate_domains,
    1696             :                                   u32 flush_domains)
    1697             : {
    1698           0 :         struct intel_ringbuffer *ringbuf = request->ringbuf;
    1699           0 :         struct intel_engine_cs *ring = ringbuf->ring;
    1700           0 :         u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
    1701             :         bool vf_flush_wa;
    1702             :         u32 flags = 0;
    1703             :         int ret;
    1704             : 
    1705             :         flags |= PIPE_CONTROL_CS_STALL;
    1706             : 
    1707           0 :         if (flush_domains) {
    1708             :                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
    1709             :                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
    1710             :                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
    1711             :                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
    1712           0 :         }
    1713             : 
    1714           0 :         if (invalidate_domains) {
    1715           0 :                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
    1716           0 :                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
    1717           0 :                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
    1718           0 :                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
    1719           0 :                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
    1720           0 :                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
    1721           0 :                 flags |= PIPE_CONTROL_QW_WRITE;
    1722           0 :                 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
    1723           0 :         }
    1724             : 
    1725             :         /*
    1726             :          * On GEN9+ Before VF_CACHE_INVALIDATE we need to emit a NULL pipe
    1727             :          * control.
    1728             :          */
    1729           0 :         vf_flush_wa = INTEL_INFO(ring->dev)->gen >= 9 &&
    1730           0 :                       flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
    1731             : 
    1732           0 :         ret = intel_logical_ring_begin(request, vf_flush_wa ? 12 : 6);
    1733           0 :         if (ret)
    1734           0 :                 return ret;
    1735             : 
    1736           0 :         if (vf_flush_wa) {
    1737           0 :                 intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
    1738           0 :                 intel_logical_ring_emit(ringbuf, 0);
    1739           0 :                 intel_logical_ring_emit(ringbuf, 0);
    1740           0 :                 intel_logical_ring_emit(ringbuf, 0);
    1741           0 :                 intel_logical_ring_emit(ringbuf, 0);
    1742           0 :                 intel_logical_ring_emit(ringbuf, 0);
    1743           0 :         }
    1744             : 
    1745           0 :         intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
    1746           0 :         intel_logical_ring_emit(ringbuf, flags);
    1747           0 :         intel_logical_ring_emit(ringbuf, scratch_addr);
    1748           0 :         intel_logical_ring_emit(ringbuf, 0);
    1749           0 :         intel_logical_ring_emit(ringbuf, 0);
    1750           0 :         intel_logical_ring_emit(ringbuf, 0);
    1751           0 :         intel_logical_ring_advance(ringbuf);
    1752             : 
    1753           0 :         return 0;
    1754           0 : }
    1755             : 
    1756           0 : static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
    1757             : {
    1758           0 :         return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
    1759             : }
    1760             : 
    1761           0 : static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno)
    1762             : {
    1763           0 :         intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
    1764           0 : }
    1765             : 
    1766           0 : static u32 bxt_a_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
    1767             : {
    1768             : 
    1769             :         /*
    1770             :          * On BXT A steppings there is a HW coherency issue whereby the
    1771             :          * MI_STORE_DATA_IMM storing the completed request's seqno
    1772             :          * occasionally doesn't invalidate the CPU cache. Work around this by
    1773             :          * clflushing the corresponding cacheline whenever the caller wants
    1774             :          * the coherency to be guaranteed. Note that this cacheline is known
    1775             :          * to be clean at this point, since we only write it in
    1776             :          * bxt_a_set_seqno(), where we also do a clflush after the write. So
    1777             :          * this clflush in practice becomes an invalidate operation.
    1778             :          */
    1779             : 
    1780           0 :         if (!lazy_coherency)
    1781           0 :                 intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
    1782             : 
    1783           0 :         return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
    1784             : }
    1785             : 
    1786           0 : static void bxt_a_set_seqno(struct intel_engine_cs *ring, u32 seqno)
    1787             : {
    1788           0 :         intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
    1789             : 
    1790             :         /* See bxt_a_get_seqno() explaining the reason for the clflush. */
    1791           0 :         intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
    1792           0 : }
    1793             : 
    1794           0 : static int gen8_emit_request(struct drm_i915_gem_request *request)
    1795             : {
    1796           0 :         struct intel_ringbuffer *ringbuf = request->ringbuf;
    1797           0 :         struct intel_engine_cs *ring = ringbuf->ring;
    1798             :         u32 cmd;
    1799             :         int ret;
    1800             : 
    1801             :         /*
    1802             :          * Reserve space for 2 NOOPs at the end of each request to be
    1803             :          * used as a workaround for not being allowed to do lite
    1804             :          * restore with HEAD==TAIL (WaIdleLiteRestore).
    1805             :          */
    1806           0 :         ret = intel_logical_ring_begin(request, 8);
    1807           0 :         if (ret)
    1808           0 :                 return ret;
    1809             : 
    1810             :         cmd = MI_STORE_DWORD_IMM_GEN4;
    1811             :         cmd |= MI_GLOBAL_GTT;
    1812             : 
    1813           0 :         intel_logical_ring_emit(ringbuf, cmd);
    1814           0 :         intel_logical_ring_emit(ringbuf,
    1815           0 :                                 (ring->status_page.gfx_addr +
    1816             :                                 (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)));
    1817           0 :         intel_logical_ring_emit(ringbuf, 0);
    1818           0 :         intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request));
    1819           0 :         intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
    1820           0 :         intel_logical_ring_emit(ringbuf, MI_NOOP);
    1821           0 :         intel_logical_ring_advance_and_submit(request);
    1822             : 
    1823             :         /*
    1824             :          * Here we add two extra NOOPs as padding to avoid
    1825             :          * lite restore of a context with HEAD==TAIL.
    1826             :          */
    1827           0 :         intel_logical_ring_emit(ringbuf, MI_NOOP);
    1828           0 :         intel_logical_ring_emit(ringbuf, MI_NOOP);
    1829           0 :         intel_logical_ring_advance(ringbuf);
    1830             : 
    1831           0 :         return 0;
    1832           0 : }
    1833             : 
    1834           0 : static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req)
    1835             : {
    1836           0 :         struct render_state so;
    1837             :         int ret;
    1838             : 
    1839           0 :         ret = i915_gem_render_state_prepare(req->ring, &so);
    1840           0 :         if (ret)
    1841           0 :                 return ret;
    1842             : 
    1843           0 :         if (so.rodata == NULL)
    1844           0 :                 return 0;
    1845             : 
    1846           0 :         ret = req->ring->emit_bb_start(req, so.ggtt_offset,
    1847             :                                        I915_DISPATCH_SECURE);
    1848           0 :         if (ret)
    1849             :                 goto out;
    1850             : 
    1851           0 :         ret = req->ring->emit_bb_start(req,
    1852           0 :                                        (so.ggtt_offset + so.aux_batch_offset),
    1853             :                                        I915_DISPATCH_SECURE);
    1854           0 :         if (ret)
    1855             :                 goto out;
    1856             : 
    1857           0 :         i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
    1858             : 
    1859             : out:
    1860           0 :         i915_gem_render_state_fini(&so);
    1861           0 :         return ret;
    1862           0 : }
    1863             : 
    1864           0 : static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
    1865             : {
    1866             :         int ret;
    1867             : 
    1868           0 :         ret = intel_logical_ring_workarounds_emit(req);
    1869           0 :         if (ret)
    1870           0 :                 return ret;
    1871             : 
    1872           0 :         ret = intel_rcs_context_init_mocs(req);
    1873             :         /*
    1874             :          * Failing to program the MOCS is non-fatal.The system will not
    1875             :          * run at peak performance. So generate an error and carry on.
    1876             :          */
    1877           0 :         if (ret)
    1878           0 :                 DRM_ERROR("MOCS failed to program: expect performance issues.\n");
    1879             : 
    1880           0 :         return intel_lr_context_render_state_init(req);
    1881           0 : }
    1882             : 
    1883             : /**
    1884             :  * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer
    1885             :  *
    1886             :  * @ring: Engine Command Streamer.
    1887             :  *
    1888             :  */
    1889           0 : void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
    1890             : {
    1891             :         struct drm_i915_private *dev_priv;
    1892             : 
    1893           0 :         if (!intel_ring_initialized(ring))
    1894           0 :                 return;
    1895             : 
    1896           0 :         dev_priv = ring->dev->dev_private;
    1897             : 
    1898           0 :         intel_logical_ring_stop(ring);
    1899           0 :         WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
    1900             : 
    1901           0 :         if (ring->cleanup)
    1902           0 :                 ring->cleanup(ring);
    1903             : 
    1904           0 :         i915_cmd_parser_fini_ring(ring);
    1905           0 :         i915_gem_batch_pool_fini(&ring->batch_pool);
    1906             : 
    1907           0 :         if (ring->status_page.obj) {
    1908           0 :                 kunmap(sg_page(ring->status_page.obj->pages->sgl));
    1909           0 :                 ring->status_page.obj = NULL;
    1910           0 :         }
    1911             : 
    1912           0 :         lrc_destroy_wa_ctx_obj(ring);
    1913           0 : }
    1914             : 
    1915           0 : static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring)
    1916             : {
    1917             :         int ret;
    1918             : 
    1919             :         /* Intentionally left blank. */
    1920           0 :         ring->buffer = NULL;
    1921             : 
    1922           0 :         ring->dev = dev;
    1923           0 :         INIT_LIST_HEAD(&ring->active_list);
    1924           0 :         INIT_LIST_HEAD(&ring->request_list);
    1925           0 :         i915_gem_batch_pool_init(dev, &ring->batch_pool);
    1926           0 :         init_waitqueue_head(&ring->irq_queue);
    1927             : 
    1928           0 :         INIT_LIST_HEAD(&ring->execlist_queue);
    1929           0 :         INIT_LIST_HEAD(&ring->execlist_retired_req_list);
    1930           0 :         mtx_init(&ring->execlist_lock, IPL_TTY);
    1931             : 
    1932           0 :         ret = i915_cmd_parser_init_ring(ring);
    1933           0 :         if (ret)
    1934           0 :                 return ret;
    1935             : 
    1936           0 :         ret = intel_lr_context_deferred_alloc(ring->default_context, ring);
    1937           0 :         if (ret)
    1938           0 :                 return ret;
    1939             : 
    1940             :         /* As this is the default context, always pin it */
    1941           0 :         ret = intel_lr_context_do_pin(
    1942             :                         ring,
    1943           0 :                         ring->default_context->engine[ring->id].state,
    1944           0 :                         ring->default_context->engine[ring->id].ringbuf);
    1945           0 :         if (ret) {
    1946           0 :                 DRM_ERROR(
    1947             :                         "Failed to pin and map ringbuffer %s: %d\n",
    1948             :                         ring->name, ret);
    1949           0 :                 return ret;
    1950             :         }
    1951             : 
    1952           0 :         return ret;
    1953           0 : }
    1954             : 
    1955           0 : static int logical_render_ring_init(struct drm_device *dev)
    1956             : {
    1957           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
    1958           0 :         struct intel_engine_cs *ring = &dev_priv->ring[RCS];
    1959             :         int ret;
    1960             : 
    1961           0 :         ring->name = "render ring";
    1962           0 :         ring->id = RCS;
    1963           0 :         ring->mmio_base = RENDER_RING_BASE;
    1964           0 :         ring->irq_enable_mask =
    1965             :                 GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT;
    1966           0 :         ring->irq_keep_mask =
    1967             :                 GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT;
    1968           0 :         if (HAS_L3_DPF(dev))
    1969           0 :                 ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
    1970             : 
    1971           0 :         if (INTEL_INFO(dev)->gen >= 9)
    1972           0 :                 ring->init_hw = gen9_init_render_ring;
    1973             :         else
    1974           0 :                 ring->init_hw = gen8_init_render_ring;
    1975           0 :         ring->init_context = gen8_init_rcs_context;
    1976           0 :         ring->cleanup = intel_fini_pipe_control;
    1977           0 :         if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
    1978           0 :                 ring->get_seqno = bxt_a_get_seqno;
    1979           0 :                 ring->set_seqno = bxt_a_set_seqno;
    1980           0 :         } else {
    1981           0 :                 ring->get_seqno = gen8_get_seqno;
    1982           0 :                 ring->set_seqno = gen8_set_seqno;
    1983             :         }
    1984           0 :         ring->emit_request = gen8_emit_request;
    1985           0 :         ring->emit_flush = gen8_emit_flush_render;
    1986           0 :         ring->irq_get = gen8_logical_ring_get_irq;
    1987           0 :         ring->irq_put = gen8_logical_ring_put_irq;
    1988           0 :         ring->emit_bb_start = gen8_emit_bb_start;
    1989             : 
    1990           0 :         ring->dev = dev;
    1991             : 
    1992           0 :         ret = intel_init_pipe_control(ring);
    1993           0 :         if (ret)
    1994           0 :                 return ret;
    1995             : 
    1996           0 :         ret = intel_init_workaround_bb(ring);
    1997           0 :         if (ret) {
    1998             :                 /*
    1999             :                  * We continue even if we fail to initialize WA batch
    2000             :                  * because we only expect rare glitches but nothing
    2001             :                  * critical to prevent us from using GPU
    2002             :                  */
    2003           0 :                 DRM_ERROR("WA batch buffer initialization failed: %d\n",
    2004             :                           ret);
    2005           0 :         }
    2006             : 
    2007           0 :         ret = logical_ring_init(dev, ring);
    2008           0 :         if (ret) {
    2009           0 :                 lrc_destroy_wa_ctx_obj(ring);
    2010           0 :         }
    2011             : 
    2012           0 :         return ret;
    2013           0 : }
    2014             : 
    2015           0 : static int logical_bsd_ring_init(struct drm_device *dev)
    2016             : {
    2017           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
    2018           0 :         struct intel_engine_cs *ring = &dev_priv->ring[VCS];
    2019             : 
    2020           0 :         ring->name = "bsd ring";
    2021           0 :         ring->id = VCS;
    2022           0 :         ring->mmio_base = GEN6_BSD_RING_BASE;
    2023           0 :         ring->irq_enable_mask =
    2024             :                 GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
    2025           0 :         ring->irq_keep_mask =
    2026             :                 GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
    2027             : 
    2028           0 :         ring->init_hw = gen8_init_common_ring;
    2029           0 :         if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
    2030           0 :                 ring->get_seqno = bxt_a_get_seqno;
    2031           0 :                 ring->set_seqno = bxt_a_set_seqno;
    2032           0 :         } else {
    2033           0 :                 ring->get_seqno = gen8_get_seqno;
    2034           0 :                 ring->set_seqno = gen8_set_seqno;
    2035             :         }
    2036           0 :         ring->emit_request = gen8_emit_request;
    2037           0 :         ring->emit_flush = gen8_emit_flush;
    2038           0 :         ring->irq_get = gen8_logical_ring_get_irq;
    2039           0 :         ring->irq_put = gen8_logical_ring_put_irq;
    2040           0 :         ring->emit_bb_start = gen8_emit_bb_start;
    2041             : 
    2042           0 :         return logical_ring_init(dev, ring);
    2043             : }
    2044             : 
    2045           0 : static int logical_bsd2_ring_init(struct drm_device *dev)
    2046             : {
    2047           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
    2048           0 :         struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
    2049             : 
    2050           0 :         ring->name = "bds2 ring";
    2051           0 :         ring->id = VCS2;
    2052           0 :         ring->mmio_base = GEN8_BSD2_RING_BASE;
    2053           0 :         ring->irq_enable_mask =
    2054             :                 GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
    2055           0 :         ring->irq_keep_mask =
    2056             :                 GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
    2057             : 
    2058           0 :         ring->init_hw = gen8_init_common_ring;
    2059           0 :         ring->get_seqno = gen8_get_seqno;
    2060           0 :         ring->set_seqno = gen8_set_seqno;
    2061           0 :         ring->emit_request = gen8_emit_request;
    2062           0 :         ring->emit_flush = gen8_emit_flush;
    2063           0 :         ring->irq_get = gen8_logical_ring_get_irq;
    2064           0 :         ring->irq_put = gen8_logical_ring_put_irq;
    2065           0 :         ring->emit_bb_start = gen8_emit_bb_start;
    2066             : 
    2067           0 :         return logical_ring_init(dev, ring);
    2068             : }
    2069             : 
    2070           0 : static int logical_blt_ring_init(struct drm_device *dev)
    2071             : {
    2072           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
    2073           0 :         struct intel_engine_cs *ring = &dev_priv->ring[BCS];
    2074             : 
    2075           0 :         ring->name = "blitter ring";
    2076           0 :         ring->id = BCS;
    2077           0 :         ring->mmio_base = BLT_RING_BASE;
    2078           0 :         ring->irq_enable_mask =
    2079             :                 GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
    2080           0 :         ring->irq_keep_mask =
    2081             :                 GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
    2082             : 
    2083           0 :         ring->init_hw = gen8_init_common_ring;
    2084           0 :         if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
    2085           0 :                 ring->get_seqno = bxt_a_get_seqno;
    2086           0 :                 ring->set_seqno = bxt_a_set_seqno;
    2087           0 :         } else {
    2088           0 :                 ring->get_seqno = gen8_get_seqno;
    2089           0 :                 ring->set_seqno = gen8_set_seqno;
    2090             :         }
    2091           0 :         ring->emit_request = gen8_emit_request;
    2092           0 :         ring->emit_flush = gen8_emit_flush;
    2093           0 :         ring->irq_get = gen8_logical_ring_get_irq;
    2094           0 :         ring->irq_put = gen8_logical_ring_put_irq;
    2095           0 :         ring->emit_bb_start = gen8_emit_bb_start;
    2096             : 
    2097           0 :         return logical_ring_init(dev, ring);
    2098             : }
    2099             : 
    2100           0 : static int logical_vebox_ring_init(struct drm_device *dev)
    2101             : {
    2102           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
    2103           0 :         struct intel_engine_cs *ring = &dev_priv->ring[VECS];
    2104             : 
    2105           0 :         ring->name = "video enhancement ring";
    2106           0 :         ring->id = VECS;
    2107           0 :         ring->mmio_base = VEBOX_RING_BASE;
    2108           0 :         ring->irq_enable_mask =
    2109             :                 GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
    2110           0 :         ring->irq_keep_mask =
    2111             :                 GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
    2112             : 
    2113           0 :         ring->init_hw = gen8_init_common_ring;
    2114           0 :         if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
    2115           0 :                 ring->get_seqno = bxt_a_get_seqno;
    2116           0 :                 ring->set_seqno = bxt_a_set_seqno;
    2117           0 :         } else {
    2118           0 :                 ring->get_seqno = gen8_get_seqno;
    2119           0 :                 ring->set_seqno = gen8_set_seqno;
    2120             :         }
    2121           0 :         ring->emit_request = gen8_emit_request;
    2122           0 :         ring->emit_flush = gen8_emit_flush;
    2123           0 :         ring->irq_get = gen8_logical_ring_get_irq;
    2124           0 :         ring->irq_put = gen8_logical_ring_put_irq;
    2125           0 :         ring->emit_bb_start = gen8_emit_bb_start;
    2126             : 
    2127           0 :         return logical_ring_init(dev, ring);
    2128             : }
    2129             : 
    2130             : /**
    2131             :  * intel_logical_rings_init() - allocate, populate and init the Engine Command Streamers
    2132             :  * @dev: DRM device.
    2133             :  *
    2134             :  * This function inits the engines for an Execlists submission style (the equivalent in the
    2135             :  * legacy ringbuffer submission world would be i915_gem_init_rings). It does it only for
    2136             :  * those engines that are present in the hardware.
    2137             :  *
    2138             :  * Return: non-zero if the initialization failed.
    2139             :  */
    2140           0 : int intel_logical_rings_init(struct drm_device *dev)
    2141             : {
    2142           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
    2143             :         int ret;
    2144             : 
    2145           0 :         ret = logical_render_ring_init(dev);
    2146           0 :         if (ret)
    2147           0 :                 return ret;
    2148             : 
    2149           0 :         if (HAS_BSD(dev)) {
    2150           0 :                 ret = logical_bsd_ring_init(dev);
    2151           0 :                 if (ret)
    2152             :                         goto cleanup_render_ring;
    2153             :         }
    2154             : 
    2155           0 :         if (HAS_BLT(dev)) {
    2156           0 :                 ret = logical_blt_ring_init(dev);
    2157           0 :                 if (ret)
    2158             :                         goto cleanup_bsd_ring;
    2159             :         }
    2160             : 
    2161           0 :         if (HAS_VEBOX(dev)) {
    2162           0 :                 ret = logical_vebox_ring_init(dev);
    2163           0 :                 if (ret)
    2164             :                         goto cleanup_blt_ring;
    2165             :         }
    2166             : 
    2167           0 :         if (HAS_BSD2(dev)) {
    2168           0 :                 ret = logical_bsd2_ring_init(dev);
    2169           0 :                 if (ret)
    2170             :                         goto cleanup_vebox_ring;
    2171             :         }
    2172             : 
    2173           0 :         return 0;
    2174             : 
    2175             : cleanup_vebox_ring:
    2176           0 :         intel_logical_ring_cleanup(&dev_priv->ring[VECS]);
    2177             : cleanup_blt_ring:
    2178           0 :         intel_logical_ring_cleanup(&dev_priv->ring[BCS]);
    2179             : cleanup_bsd_ring:
    2180           0 :         intel_logical_ring_cleanup(&dev_priv->ring[VCS]);
    2181             : cleanup_render_ring:
    2182           0 :         intel_logical_ring_cleanup(&dev_priv->ring[RCS]);
    2183             : 
    2184           0 :         return ret;
    2185           0 : }
    2186             : 
    2187             : static u32
    2188           0 : make_rpcs(struct drm_device *dev)
    2189             : {
    2190             :         u32 rpcs = 0;
    2191             : 
    2192             :         /*
    2193             :          * No explicit RPCS request is needed to ensure full
    2194             :          * slice/subslice/EU enablement prior to Gen9.
    2195             :         */
    2196           0 :         if (INTEL_INFO(dev)->gen < 9)
    2197           0 :                 return 0;
    2198             : 
    2199             :         /*
    2200             :          * Starting in Gen9, render power gating can leave
    2201             :          * slice/subslice/EU in a partially enabled state. We
    2202             :          * must make an explicit request through RPCS for full
    2203             :          * enablement.
    2204             :         */
    2205           0 :         if (INTEL_INFO(dev)->has_slice_pg) {
    2206             :                 rpcs |= GEN8_RPCS_S_CNT_ENABLE;
    2207           0 :                 rpcs |= INTEL_INFO(dev)->slice_total <<
    2208             :                         GEN8_RPCS_S_CNT_SHIFT;
    2209           0 :                 rpcs |= GEN8_RPCS_ENABLE;
    2210           0 :         }
    2211             : 
    2212           0 :         if (INTEL_INFO(dev)->has_subslice_pg) {
    2213           0 :                 rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
    2214           0 :                 rpcs |= INTEL_INFO(dev)->subslice_per_slice <<
    2215             :                         GEN8_RPCS_SS_CNT_SHIFT;
    2216           0 :                 rpcs |= GEN8_RPCS_ENABLE;
    2217           0 :         }
    2218             : 
    2219           0 :         if (INTEL_INFO(dev)->has_eu_pg) {
    2220           0 :                 rpcs |= INTEL_INFO(dev)->eu_per_subslice <<
    2221             :                         GEN8_RPCS_EU_MIN_SHIFT;
    2222           0 :                 rpcs |= INTEL_INFO(dev)->eu_per_subslice <<
    2223             :                         GEN8_RPCS_EU_MAX_SHIFT;
    2224           0 :                 rpcs |= GEN8_RPCS_ENABLE;
    2225           0 :         }
    2226             : 
    2227           0 :         return rpcs;
    2228           0 : }
    2229             : 
    2230             : static int
    2231           0 : populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_obj,
    2232             :                     struct intel_engine_cs *ring, struct intel_ringbuffer *ringbuf)
    2233             : {
    2234           0 :         struct drm_device *dev = ring->dev;
    2235           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
    2236           0 :         struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
    2237             :         struct vm_page *page;
    2238             :         uint32_t *reg_state;
    2239             :         int ret;
    2240             : 
    2241           0 :         if (!ppgtt)
    2242           0 :                 ppgtt = dev_priv->mm.aliasing_ppgtt;
    2243             : 
    2244           0 :         ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
    2245           0 :         if (ret) {
    2246             :                 DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
    2247           0 :                 return ret;
    2248             :         }
    2249             : 
    2250           0 :         ret = i915_gem_object_get_pages(ctx_obj);
    2251           0 :         if (ret) {
    2252             :                 DRM_DEBUG_DRIVER("Could not get object pages\n");
    2253           0 :                 return ret;
    2254             :         }
    2255             : 
    2256           0 :         i915_gem_object_pin_pages(ctx_obj);
    2257             : 
    2258             :         /* The second page of the context object contains some fields which must
    2259             :          * be set up prior to the first execution. */
    2260           0 :         page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
    2261           0 :         reg_state = kmap_atomic(page);
    2262             : 
    2263             :         /* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM
    2264             :          * commands followed by (reg, value) pairs. The values we are setting here are
    2265             :          * only for the first context restore: on a subsequent save, the GPU will
    2266             :          * recreate this batchbuffer with new values (including all the missing
    2267             :          * MI_LOAD_REGISTER_IMM commands that we are not initializing here). */
    2268           0 :         if (ring->id == RCS)
    2269           0 :                 reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(14);
    2270             :         else
    2271           0 :                 reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(11);
    2272           0 :         reg_state[CTX_LRI_HEADER_0] |= MI_LRI_FORCE_POSTED;
    2273           0 :         reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring);
    2274           0 :         reg_state[CTX_CONTEXT_CONTROL+1] =
    2275             :                 _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
    2276             :                                    CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
    2277             :                                    CTX_CTRL_RS_CTX_ENABLE);
    2278           0 :         reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base);
    2279           0 :         reg_state[CTX_RING_HEAD+1] = 0;
    2280           0 :         reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
    2281           0 :         reg_state[CTX_RING_TAIL+1] = 0;
    2282           0 :         reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base);
    2283             :         /* Ring buffer start address is not known until the buffer is pinned.
    2284             :          * It is written to the context image in execlists_update_context()
    2285             :          */
    2286           0 :         reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base);
    2287           0 :         reg_state[CTX_RING_BUFFER_CONTROL+1] =
    2288           0 :                         ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID;
    2289           0 :         reg_state[CTX_BB_HEAD_U] = ring->mmio_base + 0x168;
    2290           0 :         reg_state[CTX_BB_HEAD_U+1] = 0;
    2291           0 :         reg_state[CTX_BB_HEAD_L] = ring->mmio_base + 0x140;
    2292           0 :         reg_state[CTX_BB_HEAD_L+1] = 0;
    2293           0 :         reg_state[CTX_BB_STATE] = ring->mmio_base + 0x110;
    2294           0 :         reg_state[CTX_BB_STATE+1] = (1<<5);
    2295           0 :         reg_state[CTX_SECOND_BB_HEAD_U] = ring->mmio_base + 0x11c;
    2296           0 :         reg_state[CTX_SECOND_BB_HEAD_U+1] = 0;
    2297           0 :         reg_state[CTX_SECOND_BB_HEAD_L] = ring->mmio_base + 0x114;
    2298           0 :         reg_state[CTX_SECOND_BB_HEAD_L+1] = 0;
    2299           0 :         reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118;
    2300           0 :         reg_state[CTX_SECOND_BB_STATE+1] = 0;
    2301           0 :         if (ring->id == RCS) {
    2302           0 :                 reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0;
    2303           0 :                 reg_state[CTX_BB_PER_CTX_PTR+1] = 0;
    2304           0 :                 reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4;
    2305           0 :                 reg_state[CTX_RCS_INDIRECT_CTX+1] = 0;
    2306           0 :                 reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8;
    2307           0 :                 reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0;
    2308           0 :                 if (ring->wa_ctx.obj) {
    2309             :                         struct i915_ctx_workarounds *wa_ctx = &ring->wa_ctx;
    2310           0 :                         uint32_t ggtt_offset = i915_gem_obj_ggtt_offset(wa_ctx->obj);
    2311             : 
    2312           0 :                         reg_state[CTX_RCS_INDIRECT_CTX+1] =
    2313           0 :                                 (ggtt_offset + wa_ctx->indirect_ctx.offset * sizeof(uint32_t)) |
    2314           0 :                                 (wa_ctx->indirect_ctx.size / CACHELINE_DWORDS);
    2315             : 
    2316           0 :                         reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] =
    2317             :                                 CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT << 6;
    2318             : 
    2319           0 :                         reg_state[CTX_BB_PER_CTX_PTR+1] =
    2320           0 :                                 (ggtt_offset + wa_ctx->per_ctx.offset * sizeof(uint32_t)) |
    2321             :                                 0x01;
    2322           0 :                 }
    2323             :         }
    2324           0 :         reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9);
    2325           0 :         reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED;
    2326           0 :         reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8;
    2327           0 :         reg_state[CTX_CTX_TIMESTAMP+1] = 0;
    2328           0 :         reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3);
    2329           0 :         reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3);
    2330           0 :         reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2);
    2331           0 :         reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2);
    2332           0 :         reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1);
    2333           0 :         reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
    2334           0 :         reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
    2335           0 :         reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
    2336             : 
    2337           0 :         if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
    2338             :                 /* 64b PPGTT (48bit canonical)
    2339             :                  * PDP0_DESCRIPTOR contains the base address to PML4 and
    2340             :                  * other PDP Descriptors are ignored.
    2341             :                  */
    2342           0 :                 ASSIGN_CTX_PML4(ppgtt, reg_state);
    2343           0 :         } else {
    2344             :                 /* 32b PPGTT
    2345             :                  * PDP*_DESCRIPTOR contains the base address of space supported.
    2346             :                  * With dynamic page allocation, PDPs may not be allocated at
    2347             :                  * this point. Point the unallocated PDPs to the scratch page
    2348             :                  */
    2349           0 :                 ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
    2350           0 :                 ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
    2351           0 :                 ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
    2352           0 :                 ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
    2353             :         }
    2354             : 
    2355           0 :         if (ring->id == RCS) {
    2356           0 :                 reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
    2357           0 :                 reg_state[CTX_R_PWR_CLK_STATE] = GEN8_R_PWR_CLK_STATE;
    2358           0 :                 reg_state[CTX_R_PWR_CLK_STATE+1] = make_rpcs(dev);
    2359           0 :         }
    2360             : 
    2361           0 :         kunmap_atomic(reg_state);
    2362             : 
    2363           0 :         ctx_obj->dirty = 1;
    2364           0 :         set_page_dirty(page);
    2365           0 :         i915_gem_object_unpin_pages(ctx_obj);
    2366             : 
    2367           0 :         return 0;
    2368           0 : }
    2369             : 
    2370             : /**
    2371             :  * intel_lr_context_free() - free the LRC specific bits of a context
    2372             :  * @ctx: the LR context to free.
    2373             :  *
    2374             :  * The real context freeing is done in i915_gem_context_free: this only
    2375             :  * takes care of the bits that are LRC related: the per-engine backing
    2376             :  * objects and the logical ringbuffer.
    2377             :  */
    2378           0 : void intel_lr_context_free(struct intel_context *ctx)
    2379             : {
    2380             :         int i;
    2381             : 
    2382           0 :         for (i = 0; i < I915_NUM_RINGS; i++) {
    2383           0 :                 struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
    2384             : 
    2385           0 :                 if (ctx_obj) {
    2386             :                         struct intel_ringbuffer *ringbuf =
    2387           0 :                                         ctx->engine[i].ringbuf;
    2388           0 :                         struct intel_engine_cs *ring = ringbuf->ring;
    2389             : 
    2390           0 :                         if (ctx == ring->default_context) {
    2391           0 :                                 intel_unpin_ringbuffer_obj(ringbuf);
    2392           0 :                                 i915_gem_object_ggtt_unpin(ctx_obj);
    2393           0 :                         }
    2394           0 :                         WARN_ON(ctx->engine[ring->id].pin_count);
    2395           0 :                         intel_ringbuffer_free(ringbuf);
    2396           0 :                         drm_gem_object_unreference(&ctx_obj->base);
    2397           0 :                 }
    2398             :         }
    2399           0 : }
    2400             : 
    2401           0 : static uint32_t get_lr_context_size(struct intel_engine_cs *ring)
    2402             : {
    2403             :         int ret = 0;
    2404             : 
    2405           0 :         WARN_ON(INTEL_INFO(ring->dev)->gen < 8);
    2406             : 
    2407           0 :         switch (ring->id) {
    2408             :         case RCS:
    2409           0 :                 if (INTEL_INFO(ring->dev)->gen >= 9)
    2410           0 :                         ret = GEN9_LR_CONTEXT_RENDER_SIZE;
    2411             :                 else
    2412             :                         ret = GEN8_LR_CONTEXT_RENDER_SIZE;
    2413             :                 break;
    2414             :         case VCS:
    2415             :         case BCS:
    2416             :         case VECS:
    2417             :         case VCS2:
    2418             :                 ret = GEN8_LR_CONTEXT_OTHER_SIZE;
    2419           0 :                 break;
    2420             :         }
    2421             : 
    2422           0 :         return ret;
    2423             : }
    2424             : 
    2425           0 : static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
    2426             :                 struct drm_i915_gem_object *default_ctx_obj)
    2427             : {
    2428           0 :         struct drm_i915_private *dev_priv = ring->dev->dev_private;
    2429             :         struct vm_page *page;
    2430             : 
    2431             :         /* The HWSP is part of the default context object in LRC mode. */
    2432           0 :         ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(default_ctx_obj)
    2433           0 :                         + LRC_PPHWSP_PN * PAGE_SIZE;
    2434           0 :         page = i915_gem_object_get_page(default_ctx_obj, LRC_PPHWSP_PN);
    2435           0 :         ring->status_page.page_addr = kmap(page);
    2436           0 :         ring->status_page.obj = default_ctx_obj;
    2437             : 
    2438           0 :         I915_WRITE(RING_HWS_PGA(ring->mmio_base),
    2439             :                         (u32)ring->status_page.gfx_addr);
    2440           0 :         POSTING_READ(RING_HWS_PGA(ring->mmio_base));
    2441           0 : }
    2442             : 
    2443             : /**
    2444             :  * intel_lr_context_deferred_alloc() - create the LRC specific bits of a context
    2445             :  * @ctx: LR context to create.
    2446             :  * @ring: engine to be used with the context.
    2447             :  *
    2448             :  * This function can be called more than once, with different engines, if we plan
    2449             :  * to use the context with them. The context backing objects and the ringbuffers
    2450             :  * (specially the ringbuffer backing objects) suck a lot of memory up, and that's why
    2451             :  * the creation is a deferred call: it's better to make sure first that we need to use
    2452             :  * a given ring with the context.
    2453             :  *
    2454             :  * Return: non-zero on error.
    2455             :  */
    2456             : 
    2457           0 : int intel_lr_context_deferred_alloc(struct intel_context *ctx,
    2458             :                                      struct intel_engine_cs *ring)
    2459             : {
    2460           0 :         struct drm_device *dev = ring->dev;
    2461             :         struct drm_i915_gem_object *ctx_obj;
    2462             :         uint32_t context_size;
    2463             :         struct intel_ringbuffer *ringbuf;
    2464             :         int ret;
    2465             : 
    2466           0 :         WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL);
    2467           0 :         WARN_ON(ctx->engine[ring->id].state);
    2468             : 
    2469           0 :         context_size = round_up(get_lr_context_size(ring), 4096);
    2470             : 
    2471             :         /* One extra page as the sharing data between driver and GuC */
    2472           0 :         context_size += PAGE_SIZE * LRC_PPHWSP_PN;
    2473             : 
    2474           0 :         ctx_obj = i915_gem_alloc_object(dev, context_size);
    2475           0 :         if (!ctx_obj) {
    2476             :                 DRM_DEBUG_DRIVER("Alloc LRC backing obj failed.\n");
    2477           0 :                 return -ENOMEM;
    2478             :         }
    2479             : 
    2480           0 :         ringbuf = intel_engine_create_ringbuffer(ring, 4 * PAGE_SIZE);
    2481           0 :         if (IS_ERR(ringbuf)) {
    2482           0 :                 ret = PTR_ERR(ringbuf);
    2483           0 :                 goto error_deref_obj;
    2484             :         }
    2485             : 
    2486           0 :         ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf);
    2487           0 :         if (ret) {
    2488             :                 DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
    2489             :                 goto error_ringbuf;
    2490             :         }
    2491             : 
    2492           0 :         ctx->engine[ring->id].ringbuf = ringbuf;
    2493           0 :         ctx->engine[ring->id].state = ctx_obj;
    2494             : 
    2495           0 :         if (ctx != ring->default_context && ring->init_context) {
    2496           0 :                 struct drm_i915_gem_request *req;
    2497             : 
    2498           0 :                 ret = i915_gem_request_alloc(ring,
    2499             :                         ctx, &req);
    2500           0 :                 if (ret) {
    2501           0 :                         DRM_ERROR("ring create req: %d\n",
    2502             :                                 ret);
    2503           0 :                         goto error_ringbuf;
    2504             :                 }
    2505             : 
    2506           0 :                 ret = ring->init_context(req);
    2507           0 :                 if (ret) {
    2508           0 :                         DRM_ERROR("ring init context: %d\n",
    2509             :                                 ret);
    2510           0 :                         i915_gem_request_cancel(req);
    2511           0 :                         goto error_ringbuf;
    2512             :                 }
    2513           0 :                 i915_add_request_no_flush(req);
    2514           0 :         }
    2515           0 :         return 0;
    2516             : 
    2517             : error_ringbuf:
    2518           0 :         intel_ringbuffer_free(ringbuf);
    2519             : error_deref_obj:
    2520           0 :         drm_gem_object_unreference(&ctx_obj->base);
    2521           0 :         ctx->engine[ring->id].ringbuf = NULL;
    2522           0 :         ctx->engine[ring->id].state = NULL;
    2523           0 :         return ret;
    2524           0 : }
    2525             : 
    2526           0 : void intel_lr_context_reset(struct drm_device *dev,
    2527             :                         struct intel_context *ctx)
    2528             : {
    2529           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
    2530             :         struct intel_engine_cs *ring;
    2531             :         int i;
    2532             : 
    2533           0 :         for_each_ring(ring, dev_priv, i) {
    2534             :                 struct drm_i915_gem_object *ctx_obj =
    2535           0 :                                 ctx->engine[ring->id].state;
    2536             :                 struct intel_ringbuffer *ringbuf =
    2537           0 :                                 ctx->engine[ring->id].ringbuf;
    2538             :                 uint32_t *reg_state;
    2539             :                 struct vm_page *page;
    2540             : 
    2541           0 :                 if (!ctx_obj)
    2542           0 :                         continue;
    2543             : 
    2544           0 :                 if (i915_gem_object_get_pages(ctx_obj)) {
    2545           0 :                         WARN(1, "Failed get_pages for context obj\n");
    2546           0 :                         continue;
    2547             :                 }
    2548           0 :                 page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
    2549           0 :                 reg_state = kmap_atomic(page);
    2550             : 
    2551           0 :                 reg_state[CTX_RING_HEAD+1] = 0;
    2552           0 :                 reg_state[CTX_RING_TAIL+1] = 0;
    2553             : 
    2554           0 :                 kunmap_atomic(reg_state);
    2555             : 
    2556           0 :                 ringbuf->head = 0;
    2557           0 :                 ringbuf->tail = 0;
    2558           0 :         }
    2559           0 : }

Generated by: LCOV version 1.13