LCOV - 6.4 - dev/pci/drm/i915/i915_gem

LCOV - code coverage report

Current view:	top level - dev/pci/drm/i915 - i915_gem_fence.c (source / functions)		Hit	Total	Coverage
Test:	6.4	Lines:	0	275	0.0 %
Date:	2018-10-19 03:25:38	Functions:	0	19	0.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*
       2             :  * Copyright © 2008-2015 Intel Corporation
       3             :  *
       4             :  * Permission is hereby granted, free of charge, to any person obtaining a
       5             :  * copy of this software and associated documentation files (the "Software"),
       6             :  * to deal in the Software without restriction, including without limitation
       7             :  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
       8             :  * and/or sell copies of the Software, and to permit persons to whom the
       9             :  * Software is furnished to do so, subject to the following conditions:
      10             :  *
      11             :  * The above copyright notice and this permission notice (including the next
      12             :  * paragraph) shall be included in all copies or substantial portions of the
      13             :  * Software.
      14             :  *
      15             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      16             :  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      17             :  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
      18             :  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      19             :  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
      20             :  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
      21             :  * IN THE SOFTWARE.
      22             :  */
      23             : 
      24             : #include <dev/pci/drm/drmP.h>
      25             : #include <dev/pci/drm/i915_drm.h>
      26             : #include "i915_drv.h"
      27             : 
      28             : /**
      29             :  * DOC: fence register handling
      30             :  *
      31             :  * Important to avoid confusions: "fences" in the i915 driver are not execution
      32             :  * fences used to track command completion but hardware detiler objects which
      33             :  * wrap a given range of the global GTT. Each platform has only a fairly limited
      34             :  * set of these objects.
      35             :  *
      36             :  * Fences are used to detile GTT memory mappings. They're also connected to the
      37             :  * hardware frontbuffer render tracking and hence interract with frontbuffer
      38             :  * conmpression. Furthermore on older platforms fences are required for tiled
      39             :  * objects used by the display engine. They can also be used by the render
      40             :  * engine - they're required for blitter commands and are optional for render
      41             :  * commands. But on gen4+ both display (with the exception of fbc) and rendering
      42             :  * have their own tiling state bits and don't need fences.
      43             :  *
      44             :  * Also note that fences only support X and Y tiling and hence can't be used for
      45             :  * the fancier new tiling formats like W, Ys and Yf.
      46             :  *
      47             :  * Finally note that because fences are such a restricted resource they're
      48             :  * dynamically associated with objects. Furthermore fence state is committed to
      49             :  * the hardware lazily to avoid unecessary stalls on gen2/3. Therefore code must
      50             :  * explictly call i915_gem_object_get_fence() to synchronize fencing status
      51             :  * for cpu access. Also note that some code wants an unfenced view, for those
      52             :  * cases the fence can be removed forcefully with i915_gem_object_put_fence().
      53             :  *
      54             :  * Internally these functions will synchronize with userspace access by removing
      55             :  * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
      56             :  */
      57             : 
      58           0 : static void i965_write_fence_reg(struct drm_device *dev, int reg,
      59             :                                  struct drm_i915_gem_object *obj)
      60             : {
      61           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
      62             :         int fence_reg_lo, fence_reg_hi;
      63             :         int fence_pitch_shift;
      64             : 
      65           0 :         if (INTEL_INFO(dev)->gen >= 6) {
      66           0 :                 fence_reg_lo = FENCE_REG_GEN6_LO(reg);
      67           0 :                 fence_reg_hi = FENCE_REG_GEN6_HI(reg);
      68             :                 fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
      69           0 :         } else {
      70           0 :                 fence_reg_lo = FENCE_REG_965_LO(reg);
      71           0 :                 fence_reg_hi = FENCE_REG_965_HI(reg);
      72             :                 fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
      73             :         }
      74             : 
      75             :         /* To w/a incoherency with non-atomic 64-bit register updates,
      76             :          * we split the 64-bit update into two 32-bit writes. In order
      77             :          * for a partial fence not to be evaluated between writes, we
      78             :          * precede the update with write to turn off the fence register,
      79             :          * and only enable the fence as the last step.
      80             :          *
      81             :          * For extra levels of paranoia, we make sure each step lands
      82             :          * before applying the next step.
      83             :          */
      84           0 :         I915_WRITE(fence_reg_lo, 0);
      85           0 :         POSTING_READ(fence_reg_lo);
      86             : 
      87           0 :         if (obj) {
      88           0 :                 u32 size = i915_gem_obj_ggtt_size(obj);
      89             :                 uint64_t val;
      90             : 
      91             :                 /* Adjust fence size to match tiled area */
      92           0 :                 if (obj->tiling_mode != I915_TILING_NONE) {
      93           0 :                         uint32_t row_size = obj->stride *
      94           0 :                                 (obj->tiling_mode == I915_TILING_Y ? 32 : 8);
      95           0 :                         size = (size / row_size) * row_size;
      96           0 :                 }
      97             : 
      98           0 :                 val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
      99           0 :                                  0xfffff000) << 32;
     100           0 :                 val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
     101           0 :                 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
     102           0 :                 if (obj->tiling_mode == I915_TILING_Y)
     103           0 :                         val |= 1 << I965_FENCE_TILING_Y_SHIFT;
     104           0 :                 val |= I965_FENCE_REG_VALID;
     105             : 
     106           0 :                 I915_WRITE(fence_reg_hi, val >> 32);
     107           0 :                 POSTING_READ(fence_reg_hi);
     108             : 
     109           0 :                 I915_WRITE(fence_reg_lo, val);
     110           0 :                 POSTING_READ(fence_reg_lo);
     111           0 :         } else {
     112           0 :                 I915_WRITE(fence_reg_hi, 0);
     113           0 :                 POSTING_READ(fence_reg_hi);
     114             :         }
     115           0 : }
     116             : 
     117           0 : static void i915_write_fence_reg(struct drm_device *dev, int reg,
     118             :                                  struct drm_i915_gem_object *obj)
     119             : {
     120           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
     121             :         u32 val;
     122             : 
     123           0 :         if (obj) {
     124           0 :                 u32 size = i915_gem_obj_ggtt_size(obj);
     125             :                 int pitch_val;
     126             :                 int tile_width;
     127             : 
     128           0 :                 WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) ||
     129             :                      (size & -size) != size ||
     130             :                      (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
     131             :                      "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
     132             :                      i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
     133             : 
     134           0 :                 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
     135           0 :                         tile_width = 128;
     136             :                 else
     137             :                         tile_width = 512;
     138             : 
     139             :                 /* Note: pitch better be a power of two tile widths */
     140           0 :                 pitch_val = obj->stride / tile_width;
     141           0 :                 pitch_val = ffs(pitch_val) - 1;
     142             : 
     143           0 :                 val = i915_gem_obj_ggtt_offset(obj);
     144           0 :                 if (obj->tiling_mode == I915_TILING_Y)
     145           0 :                         val |= 1 << I830_FENCE_TILING_Y_SHIFT;
     146           0 :                 val |= I915_FENCE_SIZE_BITS(size);
     147           0 :                 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
     148           0 :                 val |= I830_FENCE_REG_VALID;
     149           0 :         } else
     150             :                 val = 0;
     151             : 
     152           0 :         I915_WRITE(FENCE_REG(reg), val);
     153           0 :         POSTING_READ(FENCE_REG(reg));
     154           0 : }
     155             : 
     156           0 : static void i830_write_fence_reg(struct drm_device *dev, int reg,
     157             :                                 struct drm_i915_gem_object *obj)
     158             : {
     159           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
     160             :         uint32_t val;
     161             : 
     162           0 :         if (obj) {
     163           0 :                 u32 size = i915_gem_obj_ggtt_size(obj);
     164             :                 uint32_t pitch_val;
     165             : 
     166           0 :                 WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) ||
     167             :                      (size & -size) != size ||
     168             :                      (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
     169             :                      "object 0x%08llx not 512K or pot-size 0x%08x aligned\n",
     170             :                      i915_gem_obj_ggtt_offset(obj), size);
     171             : 
     172           0 :                 pitch_val = obj->stride / 128;
     173           0 :                 pitch_val = ffs(pitch_val) - 1;
     174             : 
     175           0 :                 val = i915_gem_obj_ggtt_offset(obj);
     176           0 :                 if (obj->tiling_mode == I915_TILING_Y)
     177           0 :                         val |= 1 << I830_FENCE_TILING_Y_SHIFT;
     178           0 :                 val |= I830_FENCE_SIZE_BITS(size);
     179           0 :                 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
     180           0 :                 val |= I830_FENCE_REG_VALID;
     181           0 :         } else
     182             :                 val = 0;
     183             : 
     184           0 :         I915_WRITE(FENCE_REG(reg), val);
     185           0 :         POSTING_READ(FENCE_REG(reg));
     186           0 : }
     187             : 
     188           0 : inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
     189             : {
     190           0 :         return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
     191             : }
     192             : 
     193           0 : static void i915_gem_write_fence(struct drm_device *dev, int reg,
     194             :                                  struct drm_i915_gem_object *obj)
     195             : {
     196           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
     197             : 
     198             :         /* Ensure that all CPU reads are completed before installing a fence
     199             :          * and all writes before removing the fence.
     200             :          */
     201           0 :         if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
     202           0 :                 mb();
     203             : 
     204           0 :         WARN(obj && (!obj->stride || !obj->tiling_mode),
     205             :              "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
     206             :              obj->stride, obj->tiling_mode);
     207             : 
     208           0 :         if (IS_GEN2(dev))
     209           0 :                 i830_write_fence_reg(dev, reg, obj);
     210           0 :         else if (IS_GEN3(dev))
     211           0 :                 i915_write_fence_reg(dev, reg, obj);
     212           0 :         else if (INTEL_INFO(dev)->gen >= 4)
     213           0 :                 i965_write_fence_reg(dev, reg, obj);
     214             : 
     215             :         /* And similarly be paranoid that no direct access to this region
     216             :          * is reordered to before the fence is installed.
     217             :          */
     218           0 :         if (i915_gem_object_needs_mb(obj))
     219           0 :                 mb();
     220           0 : }
     221             : 
     222           0 : static inline int fence_number(struct drm_i915_private *dev_priv,
     223             :                                struct drm_i915_fence_reg *fence)
     224             : {
     225           0 :         return fence - dev_priv->fence_regs;
     226             : }
     227             : 
     228           0 : static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
     229             :                                          struct drm_i915_fence_reg *fence,
     230             :                                          bool enable)
     231             : {
     232           0 :         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
     233           0 :         int reg = fence_number(dev_priv, fence);
     234             : 
     235           0 :         i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
     236             : 
     237           0 :         if (enable) {
     238           0 :                 obj->fence_reg = reg;
     239           0 :                 fence->obj = obj;
     240           0 :                 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
     241           0 :         } else {
     242           0 :                 obj->fence_reg = I915_FENCE_REG_NONE;
     243           0 :                 fence->obj = NULL;
     244           0 :                 list_del_init(&fence->lru_list);
     245             :         }
     246           0 :         obj->fence_dirty = false;
     247           0 : }
     248             : 
     249           0 : static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
     250             : {
     251           0 :         if (obj->tiling_mode)
     252           0 :                 i915_gem_release_mmap(obj);
     253             : 
     254             :         /* As we do not have an associated fence register, we will force
     255             :          * a tiling change if we ever need to acquire one.
     256             :          */
     257           0 :         obj->fence_dirty = false;
     258           0 :         obj->fence_reg = I915_FENCE_REG_NONE;
     259           0 : }
     260             : 
     261             : static int
     262           0 : i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
     263             : {
     264           0 :         if (obj->last_fenced_req) {
     265           0 :                 int ret = i915_wait_request(obj->last_fenced_req);
     266           0 :                 if (ret)
     267           0 :                         return ret;
     268             : 
     269           0 :                 i915_gem_request_assign(&obj->last_fenced_req, NULL);
     270           0 :         }
     271             : 
     272           0 :         return 0;
     273           0 : }
     274             : 
     275             : /**
     276             :  * i915_gem_object_put_fence - force-remove fence for an object
     277             :  * @obj: object to map through a fence reg
     278             :  *
     279             :  * This function force-removes any fence from the given object, which is useful
     280             :  * if the kernel wants to do untiled GTT access.
     281             :  *
     282             :  * Returns:
     283             :  *
     284             :  * 0 on success, negative error code on failure.
     285             :  */
     286             : int
     287           0 : i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
     288             : {
     289           0 :         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
     290             :         struct drm_i915_fence_reg *fence;
     291             :         int ret;
     292             : 
     293           0 :         ret = i915_gem_object_wait_fence(obj);
     294           0 :         if (ret)
     295           0 :                 return ret;
     296             : 
     297           0 :         if (obj->fence_reg == I915_FENCE_REG_NONE)
     298           0 :                 return 0;
     299             : 
     300           0 :         fence = &dev_priv->fence_regs[obj->fence_reg];
     301             : 
     302           0 :         if (WARN_ON(fence->pin_count))
     303           0 :                 return -EBUSY;
     304             : 
     305           0 :         i915_gem_object_fence_lost(obj);
     306           0 :         i915_gem_object_update_fence(obj, fence, false);
     307             : 
     308           0 :         return 0;
     309           0 : }
     310             : 
     311             : static struct drm_i915_fence_reg *
     312           0 : i915_find_fence_reg(struct drm_device *dev)
     313             : {
     314           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
     315             :         struct drm_i915_fence_reg *reg, *avail;
     316             :         int i;
     317             : 
     318             :         /* First try to find a free reg */
     319             :         avail = NULL;
     320           0 :         for (i = 0; i < dev_priv->num_fence_regs; i++) {
     321           0 :                 reg = &dev_priv->fence_regs[i];
     322           0 :                 if (!reg->obj)
     323           0 :                         return reg;
     324             : 
     325           0 :                 if (!reg->pin_count)
     326           0 :                         avail = reg;
     327             :         }
     328             : 
     329           0 :         if (avail == NULL)
     330             :                 goto deadlock;
     331             : 
     332             :         /* None available, try to steal one or wait for a user to finish */
     333           0 :         list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
     334           0 :                 if (reg->pin_count)
     335             :                         continue;
     336             : 
     337           0 :                 return reg;
     338             :         }
     339             : 
     340             : deadlock:
     341             :         /* Wait for completion of pending flips which consume fences */
     342           0 :         if (intel_has_pending_fb_unpin(dev))
     343           0 :                 return ERR_PTR(-EAGAIN);
     344             : 
     345           0 :         return ERR_PTR(-EDEADLK);
     346           0 : }
     347             : 
     348             : /**
     349             :  * i915_gem_object_get_fence - set up fencing for an object
     350             :  * @obj: object to map through a fence reg
     351             :  *
     352             :  * When mapping objects through the GTT, userspace wants to be able to write
     353             :  * to them without having to worry about swizzling if the object is tiled.
     354             :  * This function walks the fence regs looking for a free one for @obj,
     355             :  * stealing one if it can't find any.
     356             :  *
     357             :  * It then sets up the reg based on the object's properties: address, pitch
     358             :  * and tiling format.
     359             :  *
     360             :  * For an untiled surface, this removes any existing fence.
     361             :  *
     362             :  * Returns:
     363             :  *
     364             :  * 0 on success, negative error code on failure.
     365             :  */
     366             : int
     367           0 : i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
     368             : {
     369           0 :         struct drm_device *dev = obj->base.dev;
     370           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
     371           0 :         bool enable = obj->tiling_mode != I915_TILING_NONE;
     372             :         struct drm_i915_fence_reg *reg;
     373             :         int ret;
     374             : 
     375             :         /* Have we updated the tiling parameters upon the object and so
     376             :          * will need to serialise the write to the associated fence register?
     377             :          */
     378           0 :         if (obj->fence_dirty) {
     379           0 :                 ret = i915_gem_object_wait_fence(obj);
     380           0 :                 if (ret)
     381           0 :                         return ret;
     382             :         }
     383             : 
     384             :         /* Just update our place in the LRU if our fence is getting reused. */
     385           0 :         if (obj->fence_reg != I915_FENCE_REG_NONE) {
     386           0 :                 reg = &dev_priv->fence_regs[obj->fence_reg];
     387           0 :                 if (!obj->fence_dirty) {
     388           0 :                         list_move_tail(&reg->lru_list,
     389           0 :                                        &dev_priv->mm.fence_list);
     390           0 :                         return 0;
     391             :                 }
     392           0 :         } else if (enable) {
     393           0 :                 if (WARN_ON(!obj->map_and_fenceable))
     394           0 :                         return -EINVAL;
     395             : 
     396           0 :                 reg = i915_find_fence_reg(dev);
     397           0 :                 if (IS_ERR(reg))
     398           0 :                         return PTR_ERR(reg);
     399             : 
     400           0 :                 if (reg->obj) {
     401             :                         struct drm_i915_gem_object *old = reg->obj;
     402             : 
     403           0 :                         ret = i915_gem_object_wait_fence(old);
     404           0 :                         if (ret)
     405           0 :                                 return ret;
     406             : 
     407           0 :                         i915_gem_object_fence_lost(old);
     408           0 :                 }
     409             :         } else
     410           0 :                 return 0;
     411             : 
     412           0 :         i915_gem_object_update_fence(obj, reg, enable);
     413             : 
     414           0 :         return 0;
     415           0 : }
     416             : 
     417             : /**
     418             :  * i915_gem_object_pin_fence - pin fencing state
     419             :  * @obj: object to pin fencing for
     420             :  *
     421             :  * This pins the fencing state (whether tiled or untiled) to make sure the
     422             :  * object is ready to be used as a scanout target. Fencing status must be
     423             :  * synchronize first by calling i915_gem_object_get_fence():
     424             :  *
     425             :  * The resulting fence pin reference must be released again with
     426             :  * i915_gem_object_unpin_fence().
     427             :  *
     428             :  * Returns:
     429             :  *
     430             :  * True if the object has a fence, false otherwise.
     431             :  */
     432             : bool
     433           0 : i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
     434             : {
     435           0 :         if (obj->fence_reg != I915_FENCE_REG_NONE) {
     436           0 :                 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
     437           0 :                 struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj);
     438             : 
     439           0 :                 WARN_ON(!ggtt_vma ||
     440             :                         dev_priv->fence_regs[obj->fence_reg].pin_count >
     441             :                         ggtt_vma->pin_count);
     442           0 :                 dev_priv->fence_regs[obj->fence_reg].pin_count++;
     443             :                 return true;
     444             :         } else
     445           0 :                 return false;
     446           0 : }
     447             : 
     448             : /**
     449             :  * i915_gem_object_unpin_fence - unpin fencing state
     450             :  * @obj: object to unpin fencing for
     451             :  *
     452             :  * This releases the fence pin reference acquired through
     453             :  * i915_gem_object_pin_fence. It will handle both objects with and without an
     454             :  * attached fence correctly, callers do not need to distinguish this.
     455             :  */
     456             : void
     457           0 : i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
     458             : {
     459           0 :         if (obj->fence_reg != I915_FENCE_REG_NONE) {
     460           0 :                 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
     461           0 :                 WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0);
     462           0 :                 dev_priv->fence_regs[obj->fence_reg].pin_count--;
     463           0 :         }
     464           0 : }
     465             : 
     466             : /**
     467             :  * i915_gem_restore_fences - restore fence state
     468             :  * @dev: DRM device
     469             :  *
     470             :  * Restore the hw fence state to match the software tracking again, to be called
     471             :  * after a gpu reset and on resume.
     472             :  */
     473           0 : void i915_gem_restore_fences(struct drm_device *dev)
     474             : {
     475           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
     476             :         int i;
     477             : 
     478           0 :         for (i = 0; i < dev_priv->num_fence_regs; i++) {
     479           0 :                 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
     480             : 
     481             :                 /*
     482             :                  * Commit delayed tiling changes if we have an object still
     483             :                  * attached to the fence, otherwise just clear the fence.
     484             :                  */
     485           0 :                 if (reg->obj) {
     486           0 :                         i915_gem_object_update_fence(reg->obj, reg,
     487           0 :                                                      reg->obj->tiling_mode);
     488           0 :                 } else {
     489           0 :                         i915_gem_write_fence(dev, i, NULL);
     490             :                 }
     491             :         }
     492           0 : }
     493             : 
     494             : /**
     495             :  * DOC: tiling swizzling details
     496             :  *
     497             :  * The idea behind tiling is to increase cache hit rates by rearranging
     498             :  * pixel data so that a group of pixel accesses are in the same cacheline.
     499             :  * Performance improvement from doing this on the back/depth buffer are on
     500             :  * the order of 30%.
     501             :  *
     502             :  * Intel architectures make this somewhat more complicated, though, by
     503             :  * adjustments made to addressing of data when the memory is in interleaved
     504             :  * mode (matched pairs of DIMMS) to improve memory bandwidth.
     505             :  * For interleaved memory, the CPU sends every sequential 64 bytes
     506             :  * to an alternate memory channel so it can get the bandwidth from both.
     507             :  *
     508             :  * The GPU also rearranges its accesses for increased bandwidth to interleaved
     509             :  * memory, and it matches what the CPU does for non-tiled.  However, when tiled
     510             :  * it does it a little differently, since one walks addresses not just in the
     511             :  * X direction but also Y.  So, along with alternating channels when bit
     512             :  * 6 of the address flips, it also alternates when other bits flip --  Bits 9
     513             :  * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines)
     514             :  * are common to both the 915 and 965-class hardware.
     515             :  *
     516             :  * The CPU also sometimes XORs in higher bits as well, to improve
     517             :  * bandwidth doing strided access like we do so frequently in graphics.  This
     518             :  * is called "Channel XOR Randomization" in the MCH documentation.  The result
     519             :  * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address
     520             :  * decode.
     521             :  *
     522             :  * All of this bit 6 XORing has an effect on our memory management,
     523             :  * as we need to make sure that the 3d driver can correctly address object
     524             :  * contents.
     525             :  *
     526             :  * If we don't have interleaved memory, all tiling is safe and no swizzling is
     527             :  * required.
     528             :  *
     529             :  * When bit 17 is XORed in, we simply refuse to tile at all.  Bit
     530             :  * 17 is not just a page offset, so as we page an objet out and back in,
     531             :  * individual pages in it will have different bit 17 addresses, resulting in
     532             :  * each 64 bytes being swapped with its neighbor!
     533             :  *
     534             :  * Otherwise, if interleaved, we have to tell the 3d driver what the address
     535             :  * swizzling it needs to do is, since it's writing with the CPU to the pages
     536             :  * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the
     537             :  * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling
     538             :  * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order
     539             :  * to match what the GPU expects.
     540             :  */
     541             : 
     542             : /**
     543             :  * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern
     544             :  * @dev: DRM device
     545             :  *
     546             :  * Detects bit 6 swizzling of address lookup between IGD access and CPU
     547             :  * access through main memory.
     548             :  */
     549             : void
     550           0 : i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
     551             : {
     552           0 :         struct drm_i915_private *dev_priv = dev->dev_private;
     553             :         uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
     554             :         uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
     555             : 
     556           0 :         if (INTEL_INFO(dev)->gen >= 8 || IS_VALLEYVIEW(dev)) {
     557             :                 /*
     558             :                  * On BDW+, swizzling is not used. We leave the CPU memory
     559             :                  * controller in charge of optimizing memory accesses without
     560             :                  * the extra address manipulation GPU side.
     561             :                  *
     562             :                  * VLV and CHV don't have GPU swizzling.
     563             :                  */
     564             :                 swizzle_x = I915_BIT_6_SWIZZLE_NONE;
     565             :                 swizzle_y = I915_BIT_6_SWIZZLE_NONE;
     566           0 :         } else if (INTEL_INFO(dev)->gen >= 6) {
     567           0 :                 if (dev_priv->preserve_bios_swizzle) {
     568           0 :                         if (I915_READ(DISP_ARB_CTL) &
     569             :                             DISP_TILE_SURFACE_SWIZZLING) {
     570             :                                 swizzle_x = I915_BIT_6_SWIZZLE_9_10;
     571             :                                 swizzle_y = I915_BIT_6_SWIZZLE_9;
     572           0 :                         } else {
     573             :                                 swizzle_x = I915_BIT_6_SWIZZLE_NONE;
     574             :                                 swizzle_y = I915_BIT_6_SWIZZLE_NONE;
     575             :                         }
     576             :                 } else {
     577             :                         uint32_t dimm_c0, dimm_c1;
     578           0 :                         dimm_c0 = I915_READ(MAD_DIMM_C0);
     579           0 :                         dimm_c1 = I915_READ(MAD_DIMM_C1);
     580           0 :                         dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
     581           0 :                         dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
     582             :                         /* Enable swizzling when the channels are populated
     583             :                          * with identically sized dimms. We don't need to check
     584             :                          * the 3rd channel because no cpu with gpu attached
     585             :                          * ships in that configuration. Also, swizzling only
     586             :                          * makes sense for 2 channels anyway. */
     587           0 :                         if (dimm_c0 == dimm_c1) {
     588             :                                 swizzle_x = I915_BIT_6_SWIZZLE_9_10;
     589             :                                 swizzle_y = I915_BIT_6_SWIZZLE_9;
     590           0 :                         } else {
     591             :                                 swizzle_x = I915_BIT_6_SWIZZLE_NONE;
     592             :                                 swizzle_y = I915_BIT_6_SWIZZLE_NONE;
     593             :                         }
     594             :                 }
     595           0 :         } else if (IS_GEN5(dev)) {
     596             :                 /* On Ironlake whatever DRAM config, GPU always do
     597             :                  * same swizzling setup.
     598             :                  */
     599             :                 swizzle_x = I915_BIT_6_SWIZZLE_9_10;
     600             :                 swizzle_y = I915_BIT_6_SWIZZLE_9;
     601           0 :         } else if (IS_GEN2(dev)) {
     602             :                 /* As far as we know, the 865 doesn't have these bit 6
     603             :                  * swizzling issues.
     604             :                  */
     605             :                 swizzle_x = I915_BIT_6_SWIZZLE_NONE;
     606             :                 swizzle_y = I915_BIT_6_SWIZZLE_NONE;
     607           0 :         } else if (IS_MOBILE(dev) || (IS_GEN3(dev) && !IS_G33(dev))) {
     608             :                 uint32_t dcc;
     609             : 
     610             :                 /* On 9xx chipsets, channel interleave by the CPU is
     611             :                  * determined by DCC.  For single-channel, neither the CPU
     612             :                  * nor the GPU do swizzling.  For dual channel interleaved,
     613             :                  * the GPU's interleave is bit 9 and 10 for X tiled, and bit
     614             :                  * 9 for Y tiled.  The CPU's interleave is independent, and
     615             :                  * can be based on either bit 11 (haven't seen this yet) or
     616             :                  * bit 17 (common).
     617             :                  */
     618           0 :                 dcc = I915_READ(DCC);
     619           0 :                 switch (dcc & DCC_ADDRESSING_MODE_MASK) {
     620             :                 case DCC_ADDRESSING_MODE_SINGLE_CHANNEL:
     621             :                 case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC:
     622             :                         swizzle_x = I915_BIT_6_SWIZZLE_NONE;
     623             :                         swizzle_y = I915_BIT_6_SWIZZLE_NONE;
     624           0 :                         break;
     625             :                 case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED:
     626           0 :                         if (dcc & DCC_CHANNEL_XOR_DISABLE) {
     627             :                                 /* This is the base swizzling by the GPU for
     628             :                                  * tiled buffers.
     629             :                                  */
     630             :                                 swizzle_x = I915_BIT_6_SWIZZLE_9_10;
     631             :                                 swizzle_y = I915_BIT_6_SWIZZLE_9;
     632           0 :                         } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) {
     633             :                                 /* Bit 11 swizzling by the CPU in addition. */
     634             :                                 swizzle_x = I915_BIT_6_SWIZZLE_9_10_11;
     635             :                                 swizzle_y = I915_BIT_6_SWIZZLE_9_11;
     636           0 :                         } else {
     637             :                                 /* Bit 17 swizzling by the CPU in addition. */
     638             :                                 swizzle_x = I915_BIT_6_SWIZZLE_9_10_17;
     639             :                                 swizzle_y = I915_BIT_6_SWIZZLE_9_17;
     640             :                         }
     641             :                         break;
     642             :                 }
     643             : 
     644             :                 /* check for L-shaped memory aka modified enhanced addressing */
     645           0 :                 if (IS_GEN4(dev) &&
     646           0 :                     !(I915_READ(DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) {
     647             :                         swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
     648             :                         swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
     649           0 :                 }
     650             : 
     651           0 :                 if (dcc == 0xffffffff) {
     652           0 :                         DRM_ERROR("Couldn't read from MCHBAR.  "
     653             :                                   "Disabling tiling.\n");
     654             :                         swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
     655             :                         swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
     656           0 :                 }
     657           0 :         } else {
     658             :                 /* The 965, G33, and newer, have a very flexible memory
     659             :                  * configuration.  It will enable dual-channel mode
     660             :                  * (interleaving) on as much memory as it can, and the GPU
     661             :                  * will additionally sometimes enable different bit 6
     662             :                  * swizzling for tiled objects from the CPU.
     663             :                  *
     664             :                  * Here's what I found on the G965:
     665             :                  *    slot fill         memory size  swizzling
     666             :                  * 0A   0B   1A   1B    1-ch   2-ch
     667             :                  * 512  0    0    0     512    0     O
     668             :                  * 512  0    512  0     16     1008  X
     669             :                  * 512  0    0    512   16     1008  X
     670             :                  * 0    512  0    512   16     1008  X
     671             :                  * 1024 1024 1024 0     2048   1024  O
     672             :                  *
     673             :                  * We could probably detect this based on either the DRB
     674             :                  * matching, which was the case for the swizzling required in
     675             :                  * the table above, or from the 1-ch value being less than
     676             :                  * the minimum size of a rank.
     677             :                  *
     678             :                  * Reports indicate that the swizzling actually
     679             :                  * varies depending upon page placement inside the
     680             :                  * channels, i.e. we see swizzled pages where the
     681             :                  * banks of memory are paired and unswizzled on the
     682             :                  * uneven portion, so leave that as unknown.
     683             :                  */
     684           0 :                 if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) {
     685             :                         swizzle_x = I915_BIT_6_SWIZZLE_9_10;
     686             :                         swizzle_y = I915_BIT_6_SWIZZLE_9;
     687           0 :                 }
     688             :         }
     689             : 
     690           0 :         if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN ||
     691           0 :             swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) {
     692             :                 /* Userspace likes to explode if it sees unknown swizzling,
     693             :                  * so lie. We will finish the lie when reporting through
     694             :                  * the get-tiling-ioctl by reporting the physical swizzle
     695             :                  * mode as unknown instead.
     696             :                  *
     697             :                  * As we don't strictly know what the swizzling is, it may be
     698             :                  * bit17 dependent, and so we need to also prevent the pages
     699             :                  * from being moved.
     700             :                  */
     701           0 :                 dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
     702             :                 swizzle_x = I915_BIT_6_SWIZZLE_NONE;
     703             :                 swizzle_y = I915_BIT_6_SWIZZLE_NONE;
     704           0 :         }
     705             : 
     706           0 :         dev_priv->mm.bit_6_swizzle_x = swizzle_x;
     707           0 :         dev_priv->mm.bit_6_swizzle_y = swizzle_y;
     708           0 : }
     709             : 
     710             : /*
     711             :  * Swap every 64 bytes of this page around, to account for it having a new
     712             :  * bit 17 of its physical address and therefore being interpreted differently
     713             :  * by the GPU.
     714             :  */
     715             : static void
     716           0 : i915_gem_swizzle_page(struct vm_page *page)
     717             : {
     718           0 :         char temp[64];
     719             :         char *vaddr;
     720             :         int i;
     721             : 
     722           0 :         vaddr = kmap(page);
     723             : 
     724           0 :         for (i = 0; i < PAGE_SIZE; i += 128) {
     725           0 :                 memcpy(temp, &vaddr[i], 64);
     726           0 :                 memcpy(&vaddr[i], &vaddr[i + 64], 64);
     727           0 :                 memcpy(&vaddr[i + 64], temp, 64);
     728             :         }
     729             : 
     730           0 :         kunmap(vaddr);
     731           0 : }
     732             : 
     733             : /**
     734             :  * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling
     735             :  * @obj: i915 GEM buffer object
     736             :  *
     737             :  * This function fixes up the swizzling in case any page frame number for this
     738             :  * object has changed in bit 17 since that state has been saved with
     739             :  * i915_gem_object_save_bit_17_swizzle().
     740             :  *
     741             :  * This is called when pinning backing storage again, since the kernel is free
     742             :  * to move unpinned backing storage around (either by directly moving pages or
     743             :  * by swapping them out and back in again).
     744             :  */
     745             : void
     746           0 : i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
     747             : {
     748           0 :         struct sg_page_iter sg_iter;
     749             :         int i;
     750             : 
     751           0 :         if (obj->bit_17 == NULL)
     752           0 :                 return;
     753             : 
     754             :         i = 0;
     755           0 :         for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
     756           0 :                 struct vm_page *page = sg_page_iter_page(&sg_iter);
     757           0 :                 char new_bit_17 = page_to_phys(page) >> 17;
     758           0 :                 if ((new_bit_17 & 0x1) !=
     759           0 :                     (test_bit(i, obj->bit_17) != 0)) {
     760           0 :                         i915_gem_swizzle_page(page);
     761           0 :                         set_page_dirty(page);
     762           0 :                 }
     763           0 :                 i++;
     764             :         }
     765           0 : }
     766             : 
     767             : /**
     768             :  * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling
     769             :  * @obj: i915 GEM buffer object
     770             :  *
     771             :  * This function saves the bit 17 of each page frame number so that swizzling
     772             :  * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must
     773             :  * be called before the backing storage can be unpinned.
     774             :  */
     775             : void
     776           0 : i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
     777             : {
     778           0 :         struct sg_page_iter sg_iter;
     779           0 :         int page_count = obj->base.size >> PAGE_SHIFT;
     780             :         int i;
     781             : 
     782           0 :         if (obj->bit_17 == NULL) {
     783           0 :                 obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count),
     784             :                                       sizeof(long), GFP_KERNEL);
     785           0 :                 if (obj->bit_17 == NULL) {
     786           0 :                         DRM_ERROR("Failed to allocate memory for bit 17 "
     787             :                                   "record\n");
     788           0 :                         return;
     789             :                 }
     790             :         }
     791             : 
     792             :         i = 0;
     793           0 :         for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
     794           0 :                 if (page_to_phys(sg_page_iter_page(&sg_iter)) & (1 << 17))
     795           0 :                         __set_bit(i, obj->bit_17);
     796             :                 else
     797           0 :                         __clear_bit(i, obj->bit_17);
     798           0 :                 i++;
     799             :         }
     800           0 : }

Generated by: LCOV version 1.13