Line data Source code
1 : /*
2 : * Copyright © 2008-2015 Intel Corporation
3 : *
4 : * Permission is hereby granted, free of charge, to any person obtaining a
5 : * copy of this software and associated documentation files (the "Software"),
6 : * to deal in the Software without restriction, including without limitation
7 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 : * and/or sell copies of the Software, and to permit persons to whom the
9 : * Software is furnished to do so, subject to the following conditions:
10 : *
11 : * The above copyright notice and this permission notice (including the next
12 : * paragraph) shall be included in all copies or substantial portions of the
13 : * Software.
14 : *
15 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 : * IN THE SOFTWARE.
22 : */
23 :
24 : #include <dev/pci/drm/drmP.h>
25 : #include <dev/pci/drm/i915_drm.h>
26 : #include "i915_drv.h"
27 :
28 : /**
29 : * DOC: fence register handling
30 : *
31 : * Important to avoid confusions: "fences" in the i915 driver are not execution
32 : * fences used to track command completion but hardware detiler objects which
33 : * wrap a given range of the global GTT. Each platform has only a fairly limited
34 : * set of these objects.
35 : *
36 : * Fences are used to detile GTT memory mappings. They're also connected to the
37 : * hardware frontbuffer render tracking and hence interract with frontbuffer
38 : * conmpression. Furthermore on older platforms fences are required for tiled
39 : * objects used by the display engine. They can also be used by the render
40 : * engine - they're required for blitter commands and are optional for render
41 : * commands. But on gen4+ both display (with the exception of fbc) and rendering
42 : * have their own tiling state bits and don't need fences.
43 : *
44 : * Also note that fences only support X and Y tiling and hence can't be used for
45 : * the fancier new tiling formats like W, Ys and Yf.
46 : *
47 : * Finally note that because fences are such a restricted resource they're
48 : * dynamically associated with objects. Furthermore fence state is committed to
49 : * the hardware lazily to avoid unecessary stalls on gen2/3. Therefore code must
50 : * explictly call i915_gem_object_get_fence() to synchronize fencing status
51 : * for cpu access. Also note that some code wants an unfenced view, for those
52 : * cases the fence can be removed forcefully with i915_gem_object_put_fence().
53 : *
54 : * Internally these functions will synchronize with userspace access by removing
55 : * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
56 : */
57 :
58 0 : static void i965_write_fence_reg(struct drm_device *dev, int reg,
59 : struct drm_i915_gem_object *obj)
60 : {
61 0 : struct drm_i915_private *dev_priv = dev->dev_private;
62 : int fence_reg_lo, fence_reg_hi;
63 : int fence_pitch_shift;
64 :
65 0 : if (INTEL_INFO(dev)->gen >= 6) {
66 0 : fence_reg_lo = FENCE_REG_GEN6_LO(reg);
67 0 : fence_reg_hi = FENCE_REG_GEN6_HI(reg);
68 : fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
69 0 : } else {
70 0 : fence_reg_lo = FENCE_REG_965_LO(reg);
71 0 : fence_reg_hi = FENCE_REG_965_HI(reg);
72 : fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
73 : }
74 :
75 : /* To w/a incoherency with non-atomic 64-bit register updates,
76 : * we split the 64-bit update into two 32-bit writes. In order
77 : * for a partial fence not to be evaluated between writes, we
78 : * precede the update with write to turn off the fence register,
79 : * and only enable the fence as the last step.
80 : *
81 : * For extra levels of paranoia, we make sure each step lands
82 : * before applying the next step.
83 : */
84 0 : I915_WRITE(fence_reg_lo, 0);
85 0 : POSTING_READ(fence_reg_lo);
86 :
87 0 : if (obj) {
88 0 : u32 size = i915_gem_obj_ggtt_size(obj);
89 : uint64_t val;
90 :
91 : /* Adjust fence size to match tiled area */
92 0 : if (obj->tiling_mode != I915_TILING_NONE) {
93 0 : uint32_t row_size = obj->stride *
94 0 : (obj->tiling_mode == I915_TILING_Y ? 32 : 8);
95 0 : size = (size / row_size) * row_size;
96 0 : }
97 :
98 0 : val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
99 0 : 0xfffff000) << 32;
100 0 : val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
101 0 : val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
102 0 : if (obj->tiling_mode == I915_TILING_Y)
103 0 : val |= 1 << I965_FENCE_TILING_Y_SHIFT;
104 0 : val |= I965_FENCE_REG_VALID;
105 :
106 0 : I915_WRITE(fence_reg_hi, val >> 32);
107 0 : POSTING_READ(fence_reg_hi);
108 :
109 0 : I915_WRITE(fence_reg_lo, val);
110 0 : POSTING_READ(fence_reg_lo);
111 0 : } else {
112 0 : I915_WRITE(fence_reg_hi, 0);
113 0 : POSTING_READ(fence_reg_hi);
114 : }
115 0 : }
116 :
117 0 : static void i915_write_fence_reg(struct drm_device *dev, int reg,
118 : struct drm_i915_gem_object *obj)
119 : {
120 0 : struct drm_i915_private *dev_priv = dev->dev_private;
121 : u32 val;
122 :
123 0 : if (obj) {
124 0 : u32 size = i915_gem_obj_ggtt_size(obj);
125 : int pitch_val;
126 : int tile_width;
127 :
128 0 : WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) ||
129 : (size & -size) != size ||
130 : (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
131 : "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
132 : i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
133 :
134 0 : if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
135 0 : tile_width = 128;
136 : else
137 : tile_width = 512;
138 :
139 : /* Note: pitch better be a power of two tile widths */
140 0 : pitch_val = obj->stride / tile_width;
141 0 : pitch_val = ffs(pitch_val) - 1;
142 :
143 0 : val = i915_gem_obj_ggtt_offset(obj);
144 0 : if (obj->tiling_mode == I915_TILING_Y)
145 0 : val |= 1 << I830_FENCE_TILING_Y_SHIFT;
146 0 : val |= I915_FENCE_SIZE_BITS(size);
147 0 : val |= pitch_val << I830_FENCE_PITCH_SHIFT;
148 0 : val |= I830_FENCE_REG_VALID;
149 0 : } else
150 : val = 0;
151 :
152 0 : I915_WRITE(FENCE_REG(reg), val);
153 0 : POSTING_READ(FENCE_REG(reg));
154 0 : }
155 :
156 0 : static void i830_write_fence_reg(struct drm_device *dev, int reg,
157 : struct drm_i915_gem_object *obj)
158 : {
159 0 : struct drm_i915_private *dev_priv = dev->dev_private;
160 : uint32_t val;
161 :
162 0 : if (obj) {
163 0 : u32 size = i915_gem_obj_ggtt_size(obj);
164 : uint32_t pitch_val;
165 :
166 0 : WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) ||
167 : (size & -size) != size ||
168 : (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
169 : "object 0x%08llx not 512K or pot-size 0x%08x aligned\n",
170 : i915_gem_obj_ggtt_offset(obj), size);
171 :
172 0 : pitch_val = obj->stride / 128;
173 0 : pitch_val = ffs(pitch_val) - 1;
174 :
175 0 : val = i915_gem_obj_ggtt_offset(obj);
176 0 : if (obj->tiling_mode == I915_TILING_Y)
177 0 : val |= 1 << I830_FENCE_TILING_Y_SHIFT;
178 0 : val |= I830_FENCE_SIZE_BITS(size);
179 0 : val |= pitch_val << I830_FENCE_PITCH_SHIFT;
180 0 : val |= I830_FENCE_REG_VALID;
181 0 : } else
182 : val = 0;
183 :
184 0 : I915_WRITE(FENCE_REG(reg), val);
185 0 : POSTING_READ(FENCE_REG(reg));
186 0 : }
187 :
188 0 : inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
189 : {
190 0 : return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
191 : }
192 :
193 0 : static void i915_gem_write_fence(struct drm_device *dev, int reg,
194 : struct drm_i915_gem_object *obj)
195 : {
196 0 : struct drm_i915_private *dev_priv = dev->dev_private;
197 :
198 : /* Ensure that all CPU reads are completed before installing a fence
199 : * and all writes before removing the fence.
200 : */
201 0 : if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
202 0 : mb();
203 :
204 0 : WARN(obj && (!obj->stride || !obj->tiling_mode),
205 : "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
206 : obj->stride, obj->tiling_mode);
207 :
208 0 : if (IS_GEN2(dev))
209 0 : i830_write_fence_reg(dev, reg, obj);
210 0 : else if (IS_GEN3(dev))
211 0 : i915_write_fence_reg(dev, reg, obj);
212 0 : else if (INTEL_INFO(dev)->gen >= 4)
213 0 : i965_write_fence_reg(dev, reg, obj);
214 :
215 : /* And similarly be paranoid that no direct access to this region
216 : * is reordered to before the fence is installed.
217 : */
218 0 : if (i915_gem_object_needs_mb(obj))
219 0 : mb();
220 0 : }
221 :
222 0 : static inline int fence_number(struct drm_i915_private *dev_priv,
223 : struct drm_i915_fence_reg *fence)
224 : {
225 0 : return fence - dev_priv->fence_regs;
226 : }
227 :
228 0 : static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
229 : struct drm_i915_fence_reg *fence,
230 : bool enable)
231 : {
232 0 : struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
233 0 : int reg = fence_number(dev_priv, fence);
234 :
235 0 : i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
236 :
237 0 : if (enable) {
238 0 : obj->fence_reg = reg;
239 0 : fence->obj = obj;
240 0 : list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
241 0 : } else {
242 0 : obj->fence_reg = I915_FENCE_REG_NONE;
243 0 : fence->obj = NULL;
244 0 : list_del_init(&fence->lru_list);
245 : }
246 0 : obj->fence_dirty = false;
247 0 : }
248 :
249 0 : static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
250 : {
251 0 : if (obj->tiling_mode)
252 0 : i915_gem_release_mmap(obj);
253 :
254 : /* As we do not have an associated fence register, we will force
255 : * a tiling change if we ever need to acquire one.
256 : */
257 0 : obj->fence_dirty = false;
258 0 : obj->fence_reg = I915_FENCE_REG_NONE;
259 0 : }
260 :
261 : static int
262 0 : i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
263 : {
264 0 : if (obj->last_fenced_req) {
265 0 : int ret = i915_wait_request(obj->last_fenced_req);
266 0 : if (ret)
267 0 : return ret;
268 :
269 0 : i915_gem_request_assign(&obj->last_fenced_req, NULL);
270 0 : }
271 :
272 0 : return 0;
273 0 : }
274 :
275 : /**
276 : * i915_gem_object_put_fence - force-remove fence for an object
277 : * @obj: object to map through a fence reg
278 : *
279 : * This function force-removes any fence from the given object, which is useful
280 : * if the kernel wants to do untiled GTT access.
281 : *
282 : * Returns:
283 : *
284 : * 0 on success, negative error code on failure.
285 : */
286 : int
287 0 : i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
288 : {
289 0 : struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
290 : struct drm_i915_fence_reg *fence;
291 : int ret;
292 :
293 0 : ret = i915_gem_object_wait_fence(obj);
294 0 : if (ret)
295 0 : return ret;
296 :
297 0 : if (obj->fence_reg == I915_FENCE_REG_NONE)
298 0 : return 0;
299 :
300 0 : fence = &dev_priv->fence_regs[obj->fence_reg];
301 :
302 0 : if (WARN_ON(fence->pin_count))
303 0 : return -EBUSY;
304 :
305 0 : i915_gem_object_fence_lost(obj);
306 0 : i915_gem_object_update_fence(obj, fence, false);
307 :
308 0 : return 0;
309 0 : }
310 :
311 : static struct drm_i915_fence_reg *
312 0 : i915_find_fence_reg(struct drm_device *dev)
313 : {
314 0 : struct drm_i915_private *dev_priv = dev->dev_private;
315 : struct drm_i915_fence_reg *reg, *avail;
316 : int i;
317 :
318 : /* First try to find a free reg */
319 : avail = NULL;
320 0 : for (i = 0; i < dev_priv->num_fence_regs; i++) {
321 0 : reg = &dev_priv->fence_regs[i];
322 0 : if (!reg->obj)
323 0 : return reg;
324 :
325 0 : if (!reg->pin_count)
326 0 : avail = reg;
327 : }
328 :
329 0 : if (avail == NULL)
330 : goto deadlock;
331 :
332 : /* None available, try to steal one or wait for a user to finish */
333 0 : list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
334 0 : if (reg->pin_count)
335 : continue;
336 :
337 0 : return reg;
338 : }
339 :
340 : deadlock:
341 : /* Wait for completion of pending flips which consume fences */
342 0 : if (intel_has_pending_fb_unpin(dev))
343 0 : return ERR_PTR(-EAGAIN);
344 :
345 0 : return ERR_PTR(-EDEADLK);
346 0 : }
347 :
348 : /**
349 : * i915_gem_object_get_fence - set up fencing for an object
350 : * @obj: object to map through a fence reg
351 : *
352 : * When mapping objects through the GTT, userspace wants to be able to write
353 : * to them without having to worry about swizzling if the object is tiled.
354 : * This function walks the fence regs looking for a free one for @obj,
355 : * stealing one if it can't find any.
356 : *
357 : * It then sets up the reg based on the object's properties: address, pitch
358 : * and tiling format.
359 : *
360 : * For an untiled surface, this removes any existing fence.
361 : *
362 : * Returns:
363 : *
364 : * 0 on success, negative error code on failure.
365 : */
366 : int
367 0 : i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
368 : {
369 0 : struct drm_device *dev = obj->base.dev;
370 0 : struct drm_i915_private *dev_priv = dev->dev_private;
371 0 : bool enable = obj->tiling_mode != I915_TILING_NONE;
372 : struct drm_i915_fence_reg *reg;
373 : int ret;
374 :
375 : /* Have we updated the tiling parameters upon the object and so
376 : * will need to serialise the write to the associated fence register?
377 : */
378 0 : if (obj->fence_dirty) {
379 0 : ret = i915_gem_object_wait_fence(obj);
380 0 : if (ret)
381 0 : return ret;
382 : }
383 :
384 : /* Just update our place in the LRU if our fence is getting reused. */
385 0 : if (obj->fence_reg != I915_FENCE_REG_NONE) {
386 0 : reg = &dev_priv->fence_regs[obj->fence_reg];
387 0 : if (!obj->fence_dirty) {
388 0 : list_move_tail(®->lru_list,
389 0 : &dev_priv->mm.fence_list);
390 0 : return 0;
391 : }
392 0 : } else if (enable) {
393 0 : if (WARN_ON(!obj->map_and_fenceable))
394 0 : return -EINVAL;
395 :
396 0 : reg = i915_find_fence_reg(dev);
397 0 : if (IS_ERR(reg))
398 0 : return PTR_ERR(reg);
399 :
400 0 : if (reg->obj) {
401 : struct drm_i915_gem_object *old = reg->obj;
402 :
403 0 : ret = i915_gem_object_wait_fence(old);
404 0 : if (ret)
405 0 : return ret;
406 :
407 0 : i915_gem_object_fence_lost(old);
408 0 : }
409 : } else
410 0 : return 0;
411 :
412 0 : i915_gem_object_update_fence(obj, reg, enable);
413 :
414 0 : return 0;
415 0 : }
416 :
417 : /**
418 : * i915_gem_object_pin_fence - pin fencing state
419 : * @obj: object to pin fencing for
420 : *
421 : * This pins the fencing state (whether tiled or untiled) to make sure the
422 : * object is ready to be used as a scanout target. Fencing status must be
423 : * synchronize first by calling i915_gem_object_get_fence():
424 : *
425 : * The resulting fence pin reference must be released again with
426 : * i915_gem_object_unpin_fence().
427 : *
428 : * Returns:
429 : *
430 : * True if the object has a fence, false otherwise.
431 : */
432 : bool
433 0 : i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
434 : {
435 0 : if (obj->fence_reg != I915_FENCE_REG_NONE) {
436 0 : struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
437 0 : struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj);
438 :
439 0 : WARN_ON(!ggtt_vma ||
440 : dev_priv->fence_regs[obj->fence_reg].pin_count >
441 : ggtt_vma->pin_count);
442 0 : dev_priv->fence_regs[obj->fence_reg].pin_count++;
443 : return true;
444 : } else
445 0 : return false;
446 0 : }
447 :
448 : /**
449 : * i915_gem_object_unpin_fence - unpin fencing state
450 : * @obj: object to unpin fencing for
451 : *
452 : * This releases the fence pin reference acquired through
453 : * i915_gem_object_pin_fence. It will handle both objects with and without an
454 : * attached fence correctly, callers do not need to distinguish this.
455 : */
456 : void
457 0 : i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
458 : {
459 0 : if (obj->fence_reg != I915_FENCE_REG_NONE) {
460 0 : struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
461 0 : WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0);
462 0 : dev_priv->fence_regs[obj->fence_reg].pin_count--;
463 0 : }
464 0 : }
465 :
466 : /**
467 : * i915_gem_restore_fences - restore fence state
468 : * @dev: DRM device
469 : *
470 : * Restore the hw fence state to match the software tracking again, to be called
471 : * after a gpu reset and on resume.
472 : */
473 0 : void i915_gem_restore_fences(struct drm_device *dev)
474 : {
475 0 : struct drm_i915_private *dev_priv = dev->dev_private;
476 : int i;
477 :
478 0 : for (i = 0; i < dev_priv->num_fence_regs; i++) {
479 0 : struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
480 :
481 : /*
482 : * Commit delayed tiling changes if we have an object still
483 : * attached to the fence, otherwise just clear the fence.
484 : */
485 0 : if (reg->obj) {
486 0 : i915_gem_object_update_fence(reg->obj, reg,
487 0 : reg->obj->tiling_mode);
488 0 : } else {
489 0 : i915_gem_write_fence(dev, i, NULL);
490 : }
491 : }
492 0 : }
493 :
494 : /**
495 : * DOC: tiling swizzling details
496 : *
497 : * The idea behind tiling is to increase cache hit rates by rearranging
498 : * pixel data so that a group of pixel accesses are in the same cacheline.
499 : * Performance improvement from doing this on the back/depth buffer are on
500 : * the order of 30%.
501 : *
502 : * Intel architectures make this somewhat more complicated, though, by
503 : * adjustments made to addressing of data when the memory is in interleaved
504 : * mode (matched pairs of DIMMS) to improve memory bandwidth.
505 : * For interleaved memory, the CPU sends every sequential 64 bytes
506 : * to an alternate memory channel so it can get the bandwidth from both.
507 : *
508 : * The GPU also rearranges its accesses for increased bandwidth to interleaved
509 : * memory, and it matches what the CPU does for non-tiled. However, when tiled
510 : * it does it a little differently, since one walks addresses not just in the
511 : * X direction but also Y. So, along with alternating channels when bit
512 : * 6 of the address flips, it also alternates when other bits flip -- Bits 9
513 : * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines)
514 : * are common to both the 915 and 965-class hardware.
515 : *
516 : * The CPU also sometimes XORs in higher bits as well, to improve
517 : * bandwidth doing strided access like we do so frequently in graphics. This
518 : * is called "Channel XOR Randomization" in the MCH documentation. The result
519 : * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address
520 : * decode.
521 : *
522 : * All of this bit 6 XORing has an effect on our memory management,
523 : * as we need to make sure that the 3d driver can correctly address object
524 : * contents.
525 : *
526 : * If we don't have interleaved memory, all tiling is safe and no swizzling is
527 : * required.
528 : *
529 : * When bit 17 is XORed in, we simply refuse to tile at all. Bit
530 : * 17 is not just a page offset, so as we page an objet out and back in,
531 : * individual pages in it will have different bit 17 addresses, resulting in
532 : * each 64 bytes being swapped with its neighbor!
533 : *
534 : * Otherwise, if interleaved, we have to tell the 3d driver what the address
535 : * swizzling it needs to do is, since it's writing with the CPU to the pages
536 : * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the
537 : * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling
538 : * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order
539 : * to match what the GPU expects.
540 : */
541 :
542 : /**
543 : * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern
544 : * @dev: DRM device
545 : *
546 : * Detects bit 6 swizzling of address lookup between IGD access and CPU
547 : * access through main memory.
548 : */
549 : void
550 0 : i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
551 : {
552 0 : struct drm_i915_private *dev_priv = dev->dev_private;
553 : uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
554 : uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
555 :
556 0 : if (INTEL_INFO(dev)->gen >= 8 || IS_VALLEYVIEW(dev)) {
557 : /*
558 : * On BDW+, swizzling is not used. We leave the CPU memory
559 : * controller in charge of optimizing memory accesses without
560 : * the extra address manipulation GPU side.
561 : *
562 : * VLV and CHV don't have GPU swizzling.
563 : */
564 : swizzle_x = I915_BIT_6_SWIZZLE_NONE;
565 : swizzle_y = I915_BIT_6_SWIZZLE_NONE;
566 0 : } else if (INTEL_INFO(dev)->gen >= 6) {
567 0 : if (dev_priv->preserve_bios_swizzle) {
568 0 : if (I915_READ(DISP_ARB_CTL) &
569 : DISP_TILE_SURFACE_SWIZZLING) {
570 : swizzle_x = I915_BIT_6_SWIZZLE_9_10;
571 : swizzle_y = I915_BIT_6_SWIZZLE_9;
572 0 : } else {
573 : swizzle_x = I915_BIT_6_SWIZZLE_NONE;
574 : swizzle_y = I915_BIT_6_SWIZZLE_NONE;
575 : }
576 : } else {
577 : uint32_t dimm_c0, dimm_c1;
578 0 : dimm_c0 = I915_READ(MAD_DIMM_C0);
579 0 : dimm_c1 = I915_READ(MAD_DIMM_C1);
580 0 : dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
581 0 : dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
582 : /* Enable swizzling when the channels are populated
583 : * with identically sized dimms. We don't need to check
584 : * the 3rd channel because no cpu with gpu attached
585 : * ships in that configuration. Also, swizzling only
586 : * makes sense for 2 channels anyway. */
587 0 : if (dimm_c0 == dimm_c1) {
588 : swizzle_x = I915_BIT_6_SWIZZLE_9_10;
589 : swizzle_y = I915_BIT_6_SWIZZLE_9;
590 0 : } else {
591 : swizzle_x = I915_BIT_6_SWIZZLE_NONE;
592 : swizzle_y = I915_BIT_6_SWIZZLE_NONE;
593 : }
594 : }
595 0 : } else if (IS_GEN5(dev)) {
596 : /* On Ironlake whatever DRAM config, GPU always do
597 : * same swizzling setup.
598 : */
599 : swizzle_x = I915_BIT_6_SWIZZLE_9_10;
600 : swizzle_y = I915_BIT_6_SWIZZLE_9;
601 0 : } else if (IS_GEN2(dev)) {
602 : /* As far as we know, the 865 doesn't have these bit 6
603 : * swizzling issues.
604 : */
605 : swizzle_x = I915_BIT_6_SWIZZLE_NONE;
606 : swizzle_y = I915_BIT_6_SWIZZLE_NONE;
607 0 : } else if (IS_MOBILE(dev) || (IS_GEN3(dev) && !IS_G33(dev))) {
608 : uint32_t dcc;
609 :
610 : /* On 9xx chipsets, channel interleave by the CPU is
611 : * determined by DCC. For single-channel, neither the CPU
612 : * nor the GPU do swizzling. For dual channel interleaved,
613 : * the GPU's interleave is bit 9 and 10 for X tiled, and bit
614 : * 9 for Y tiled. The CPU's interleave is independent, and
615 : * can be based on either bit 11 (haven't seen this yet) or
616 : * bit 17 (common).
617 : */
618 0 : dcc = I915_READ(DCC);
619 0 : switch (dcc & DCC_ADDRESSING_MODE_MASK) {
620 : case DCC_ADDRESSING_MODE_SINGLE_CHANNEL:
621 : case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC:
622 : swizzle_x = I915_BIT_6_SWIZZLE_NONE;
623 : swizzle_y = I915_BIT_6_SWIZZLE_NONE;
624 0 : break;
625 : case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED:
626 0 : if (dcc & DCC_CHANNEL_XOR_DISABLE) {
627 : /* This is the base swizzling by the GPU for
628 : * tiled buffers.
629 : */
630 : swizzle_x = I915_BIT_6_SWIZZLE_9_10;
631 : swizzle_y = I915_BIT_6_SWIZZLE_9;
632 0 : } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) {
633 : /* Bit 11 swizzling by the CPU in addition. */
634 : swizzle_x = I915_BIT_6_SWIZZLE_9_10_11;
635 : swizzle_y = I915_BIT_6_SWIZZLE_9_11;
636 0 : } else {
637 : /* Bit 17 swizzling by the CPU in addition. */
638 : swizzle_x = I915_BIT_6_SWIZZLE_9_10_17;
639 : swizzle_y = I915_BIT_6_SWIZZLE_9_17;
640 : }
641 : break;
642 : }
643 :
644 : /* check for L-shaped memory aka modified enhanced addressing */
645 0 : if (IS_GEN4(dev) &&
646 0 : !(I915_READ(DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) {
647 : swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
648 : swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
649 0 : }
650 :
651 0 : if (dcc == 0xffffffff) {
652 0 : DRM_ERROR("Couldn't read from MCHBAR. "
653 : "Disabling tiling.\n");
654 : swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
655 : swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
656 0 : }
657 0 : } else {
658 : /* The 965, G33, and newer, have a very flexible memory
659 : * configuration. It will enable dual-channel mode
660 : * (interleaving) on as much memory as it can, and the GPU
661 : * will additionally sometimes enable different bit 6
662 : * swizzling for tiled objects from the CPU.
663 : *
664 : * Here's what I found on the G965:
665 : * slot fill memory size swizzling
666 : * 0A 0B 1A 1B 1-ch 2-ch
667 : * 512 0 0 0 512 0 O
668 : * 512 0 512 0 16 1008 X
669 : * 512 0 0 512 16 1008 X
670 : * 0 512 0 512 16 1008 X
671 : * 1024 1024 1024 0 2048 1024 O
672 : *
673 : * We could probably detect this based on either the DRB
674 : * matching, which was the case for the swizzling required in
675 : * the table above, or from the 1-ch value being less than
676 : * the minimum size of a rank.
677 : *
678 : * Reports indicate that the swizzling actually
679 : * varies depending upon page placement inside the
680 : * channels, i.e. we see swizzled pages where the
681 : * banks of memory are paired and unswizzled on the
682 : * uneven portion, so leave that as unknown.
683 : */
684 0 : if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) {
685 : swizzle_x = I915_BIT_6_SWIZZLE_9_10;
686 : swizzle_y = I915_BIT_6_SWIZZLE_9;
687 0 : }
688 : }
689 :
690 0 : if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN ||
691 0 : swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) {
692 : /* Userspace likes to explode if it sees unknown swizzling,
693 : * so lie. We will finish the lie when reporting through
694 : * the get-tiling-ioctl by reporting the physical swizzle
695 : * mode as unknown instead.
696 : *
697 : * As we don't strictly know what the swizzling is, it may be
698 : * bit17 dependent, and so we need to also prevent the pages
699 : * from being moved.
700 : */
701 0 : dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
702 : swizzle_x = I915_BIT_6_SWIZZLE_NONE;
703 : swizzle_y = I915_BIT_6_SWIZZLE_NONE;
704 0 : }
705 :
706 0 : dev_priv->mm.bit_6_swizzle_x = swizzle_x;
707 0 : dev_priv->mm.bit_6_swizzle_y = swizzle_y;
708 0 : }
709 :
710 : /*
711 : * Swap every 64 bytes of this page around, to account for it having a new
712 : * bit 17 of its physical address and therefore being interpreted differently
713 : * by the GPU.
714 : */
715 : static void
716 0 : i915_gem_swizzle_page(struct vm_page *page)
717 : {
718 0 : char temp[64];
719 : char *vaddr;
720 : int i;
721 :
722 0 : vaddr = kmap(page);
723 :
724 0 : for (i = 0; i < PAGE_SIZE; i += 128) {
725 0 : memcpy(temp, &vaddr[i], 64);
726 0 : memcpy(&vaddr[i], &vaddr[i + 64], 64);
727 0 : memcpy(&vaddr[i + 64], temp, 64);
728 : }
729 :
730 0 : kunmap(vaddr);
731 0 : }
732 :
733 : /**
734 : * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling
735 : * @obj: i915 GEM buffer object
736 : *
737 : * This function fixes up the swizzling in case any page frame number for this
738 : * object has changed in bit 17 since that state has been saved with
739 : * i915_gem_object_save_bit_17_swizzle().
740 : *
741 : * This is called when pinning backing storage again, since the kernel is free
742 : * to move unpinned backing storage around (either by directly moving pages or
743 : * by swapping them out and back in again).
744 : */
745 : void
746 0 : i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
747 : {
748 0 : struct sg_page_iter sg_iter;
749 : int i;
750 :
751 0 : if (obj->bit_17 == NULL)
752 0 : return;
753 :
754 : i = 0;
755 0 : for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
756 0 : struct vm_page *page = sg_page_iter_page(&sg_iter);
757 0 : char new_bit_17 = page_to_phys(page) >> 17;
758 0 : if ((new_bit_17 & 0x1) !=
759 0 : (test_bit(i, obj->bit_17) != 0)) {
760 0 : i915_gem_swizzle_page(page);
761 0 : set_page_dirty(page);
762 0 : }
763 0 : i++;
764 : }
765 0 : }
766 :
767 : /**
768 : * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling
769 : * @obj: i915 GEM buffer object
770 : *
771 : * This function saves the bit 17 of each page frame number so that swizzling
772 : * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must
773 : * be called before the backing storage can be unpinned.
774 : */
775 : void
776 0 : i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
777 : {
778 0 : struct sg_page_iter sg_iter;
779 0 : int page_count = obj->base.size >> PAGE_SHIFT;
780 : int i;
781 :
782 0 : if (obj->bit_17 == NULL) {
783 0 : obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count),
784 : sizeof(long), GFP_KERNEL);
785 0 : if (obj->bit_17 == NULL) {
786 0 : DRM_ERROR("Failed to allocate memory for bit 17 "
787 : "record\n");
788 0 : return;
789 : }
790 : }
791 :
792 : i = 0;
793 0 : for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
794 0 : if (page_to_phys(sg_page_iter_page(&sg_iter)) & (1 << 17))
795 0 : __set_bit(i, obj->bit_17);
796 : else
797 0 : __clear_bit(i, obj->bit_17);
798 0 : i++;
799 : }
800 0 : }
|