Line data Source code
1 : /*
2 : * Copyright © 2012 Intel Corporation
3 : *
4 : * Permission is hereby granted, free of charge, to any person obtaining a
5 : * copy of this software and associated documentation files (the "Software"),
6 : * to deal in the Software without restriction, including without limitation
7 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 : * and/or sell copies of the Software, and to permit persons to whom the
9 : * Software is furnished to do so, subject to the following conditions:
10 : *
11 : * The above copyright notice and this permission notice (including the next
12 : * paragraph) shall be included in all copies or substantial portions of the
13 : * Software.
14 : *
15 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 : * IN THE SOFTWARE.
22 : *
23 : * Authors:
24 : * Eugeni Dodonov <eugeni.dodonov@intel.com>
25 : *
26 : */
27 :
28 : #ifdef __linux__
29 : #include <linux/cpufreq.h>
30 : #endif
31 : #include "i915_drv.h"
32 : #include "intel_drv.h"
33 : #ifdef __linux__
34 : #include "../../../platform/x86/intel_ips.h"
35 : #include <linux/module.h>
36 : #endif
37 :
38 : /**
39 : * RC6 is a special power stage which allows the GPU to enter an very
40 : * low-voltage mode when idle, using down to 0V while at this stage. This
41 : * stage is entered automatically when the GPU is idle when RC6 support is
42 : * enabled, and as soon as new workload arises GPU wakes up automatically as well.
43 : *
44 : * There are different RC6 modes available in Intel GPU, which differentiate
45 : * among each other with the latency required to enter and leave RC6 and
46 : * voltage consumed by the GPU in different states.
47 : *
48 : * The combination of the following flags define which states GPU is allowed
49 : * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
50 : * RC6pp is deepest RC6. Their support by hardware varies according to the
51 : * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
52 : * which brings the most power savings; deeper states save more power, but
53 : * require higher latency to switch to and wake up.
54 : */
55 : #define INTEL_RC6_ENABLE (1<<0)
56 : #define INTEL_RC6p_ENABLE (1<<1)
57 : #define INTEL_RC6pp_ENABLE (1<<2)
58 :
59 0 : static void bxt_init_clock_gating(struct drm_device *dev)
60 : {
61 0 : struct drm_i915_private *dev_priv = dev->dev_private;
62 :
63 : /* WaDisableSDEUnitClockGating:bxt */
64 0 : I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
65 : GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
66 :
67 : /*
68 : * FIXME:
69 : * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
70 : */
71 0 : I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
72 : GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
73 0 : }
74 :
75 0 : static void i915_pineview_get_mem_freq(struct drm_device *dev)
76 : {
77 0 : struct drm_i915_private *dev_priv = dev->dev_private;
78 : u32 tmp;
79 :
80 0 : tmp = I915_READ(CLKCFG);
81 :
82 0 : switch (tmp & CLKCFG_FSB_MASK) {
83 : case CLKCFG_FSB_533:
84 0 : dev_priv->fsb_freq = 533; /* 133*4 */
85 0 : break;
86 : case CLKCFG_FSB_800:
87 0 : dev_priv->fsb_freq = 800; /* 200*4 */
88 0 : break;
89 : case CLKCFG_FSB_667:
90 0 : dev_priv->fsb_freq = 667; /* 167*4 */
91 0 : break;
92 : case CLKCFG_FSB_400:
93 0 : dev_priv->fsb_freq = 400; /* 100*4 */
94 0 : break;
95 : }
96 :
97 0 : switch (tmp & CLKCFG_MEM_MASK) {
98 : case CLKCFG_MEM_533:
99 0 : dev_priv->mem_freq = 533;
100 0 : break;
101 : case CLKCFG_MEM_667:
102 0 : dev_priv->mem_freq = 667;
103 0 : break;
104 : case CLKCFG_MEM_800:
105 0 : dev_priv->mem_freq = 800;
106 0 : break;
107 : }
108 :
109 : /* detect pineview DDR3 setting */
110 0 : tmp = I915_READ(CSHRDDR3CTL);
111 0 : dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
112 0 : }
113 :
114 0 : static void i915_ironlake_get_mem_freq(struct drm_device *dev)
115 : {
116 0 : struct drm_i915_private *dev_priv = dev->dev_private;
117 : u16 ddrpll, csipll;
118 :
119 0 : ddrpll = I915_READ16(DDRMPLL1);
120 0 : csipll = I915_READ16(CSIPLL0);
121 :
122 0 : switch (ddrpll & 0xff) {
123 : case 0xc:
124 0 : dev_priv->mem_freq = 800;
125 0 : break;
126 : case 0x10:
127 0 : dev_priv->mem_freq = 1066;
128 0 : break;
129 : case 0x14:
130 0 : dev_priv->mem_freq = 1333;
131 0 : break;
132 : case 0x18:
133 0 : dev_priv->mem_freq = 1600;
134 0 : break;
135 : default:
136 : DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
137 : ddrpll & 0xff);
138 0 : dev_priv->mem_freq = 0;
139 0 : break;
140 : }
141 :
142 0 : dev_priv->ips.r_t = dev_priv->mem_freq;
143 :
144 0 : switch (csipll & 0x3ff) {
145 : case 0x00c:
146 0 : dev_priv->fsb_freq = 3200;
147 0 : break;
148 : case 0x00e:
149 0 : dev_priv->fsb_freq = 3733;
150 0 : break;
151 : case 0x010:
152 0 : dev_priv->fsb_freq = 4266;
153 0 : break;
154 : case 0x012:
155 0 : dev_priv->fsb_freq = 4800;
156 0 : break;
157 : case 0x014:
158 0 : dev_priv->fsb_freq = 5333;
159 0 : break;
160 : case 0x016:
161 0 : dev_priv->fsb_freq = 5866;
162 0 : break;
163 : case 0x018:
164 0 : dev_priv->fsb_freq = 6400;
165 0 : break;
166 : default:
167 : DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
168 : csipll & 0x3ff);
169 0 : dev_priv->fsb_freq = 0;
170 0 : break;
171 : }
172 :
173 0 : if (dev_priv->fsb_freq == 3200) {
174 0 : dev_priv->ips.c_m = 0;
175 0 : } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
176 0 : dev_priv->ips.c_m = 1;
177 0 : } else {
178 0 : dev_priv->ips.c_m = 2;
179 : }
180 0 : }
181 :
182 : static const struct cxsr_latency cxsr_latency_table[] = {
183 : {1, 0, 800, 400, 3382, 33382, 3983, 33983}, /* DDR2-400 SC */
184 : {1, 0, 800, 667, 3354, 33354, 3807, 33807}, /* DDR2-667 SC */
185 : {1, 0, 800, 800, 3347, 33347, 3763, 33763}, /* DDR2-800 SC */
186 : {1, 1, 800, 667, 6420, 36420, 6873, 36873}, /* DDR3-667 SC */
187 : {1, 1, 800, 800, 5902, 35902, 6318, 36318}, /* DDR3-800 SC */
188 :
189 : {1, 0, 667, 400, 3400, 33400, 4021, 34021}, /* DDR2-400 SC */
190 : {1, 0, 667, 667, 3372, 33372, 3845, 33845}, /* DDR2-667 SC */
191 : {1, 0, 667, 800, 3386, 33386, 3822, 33822}, /* DDR2-800 SC */
192 : {1, 1, 667, 667, 6438, 36438, 6911, 36911}, /* DDR3-667 SC */
193 : {1, 1, 667, 800, 5941, 35941, 6377, 36377}, /* DDR3-800 SC */
194 :
195 : {1, 0, 400, 400, 3472, 33472, 4173, 34173}, /* DDR2-400 SC */
196 : {1, 0, 400, 667, 3443, 33443, 3996, 33996}, /* DDR2-667 SC */
197 : {1, 0, 400, 800, 3430, 33430, 3946, 33946}, /* DDR2-800 SC */
198 : {1, 1, 400, 667, 6509, 36509, 7062, 37062}, /* DDR3-667 SC */
199 : {1, 1, 400, 800, 5985, 35985, 6501, 36501}, /* DDR3-800 SC */
200 :
201 : {0, 0, 800, 400, 3438, 33438, 4065, 34065}, /* DDR2-400 SC */
202 : {0, 0, 800, 667, 3410, 33410, 3889, 33889}, /* DDR2-667 SC */
203 : {0, 0, 800, 800, 3403, 33403, 3845, 33845}, /* DDR2-800 SC */
204 : {0, 1, 800, 667, 6476, 36476, 6955, 36955}, /* DDR3-667 SC */
205 : {0, 1, 800, 800, 5958, 35958, 6400, 36400}, /* DDR3-800 SC */
206 :
207 : {0, 0, 667, 400, 3456, 33456, 4103, 34106}, /* DDR2-400 SC */
208 : {0, 0, 667, 667, 3428, 33428, 3927, 33927}, /* DDR2-667 SC */
209 : {0, 0, 667, 800, 3443, 33443, 3905, 33905}, /* DDR2-800 SC */
210 : {0, 1, 667, 667, 6494, 36494, 6993, 36993}, /* DDR3-667 SC */
211 : {0, 1, 667, 800, 5998, 35998, 6460, 36460}, /* DDR3-800 SC */
212 :
213 : {0, 0, 400, 400, 3528, 33528, 4255, 34255}, /* DDR2-400 SC */
214 : {0, 0, 400, 667, 3500, 33500, 4079, 34079}, /* DDR2-667 SC */
215 : {0, 0, 400, 800, 3487, 33487, 4029, 34029}, /* DDR2-800 SC */
216 : {0, 1, 400, 667, 6566, 36566, 7145, 37145}, /* DDR3-667 SC */
217 : {0, 1, 400, 800, 6042, 36042, 6584, 36584}, /* DDR3-800 SC */
218 : };
219 :
220 0 : static const struct cxsr_latency *intel_get_cxsr_latency(int is_desktop,
221 : int is_ddr3,
222 : int fsb,
223 : int mem)
224 : {
225 : const struct cxsr_latency *latency;
226 : int i;
227 :
228 0 : if (fsb == 0 || mem == 0)
229 0 : return NULL;
230 :
231 0 : for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
232 0 : latency = &cxsr_latency_table[i];
233 0 : if (is_desktop == latency->is_desktop &&
234 0 : is_ddr3 == latency->is_ddr3 &&
235 0 : fsb == latency->fsb_freq && mem == latency->mem_freq)
236 0 : return latency;
237 : }
238 :
239 : DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
240 :
241 0 : return NULL;
242 0 : }
243 :
244 0 : static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
245 : {
246 : u32 val;
247 :
248 0 : mutex_lock(&dev_priv->rps.hw_lock);
249 :
250 0 : val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
251 0 : if (enable)
252 0 : val &= ~FORCE_DDR_HIGH_FREQ;
253 : else
254 0 : val |= FORCE_DDR_HIGH_FREQ;
255 0 : val &= ~FORCE_DDR_LOW_FREQ;
256 0 : val |= FORCE_DDR_FREQ_REQ_ACK;
257 0 : vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
258 :
259 0 : if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
260 : FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
261 0 : DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
262 :
263 0 : mutex_unlock(&dev_priv->rps.hw_lock);
264 0 : }
265 :
266 0 : static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
267 : {
268 : u32 val;
269 :
270 0 : mutex_lock(&dev_priv->rps.hw_lock);
271 :
272 0 : val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
273 0 : if (enable)
274 0 : val |= DSP_MAXFIFO_PM5_ENABLE;
275 : else
276 0 : val &= ~DSP_MAXFIFO_PM5_ENABLE;
277 0 : vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val);
278 :
279 0 : mutex_unlock(&dev_priv->rps.hw_lock);
280 0 : }
281 :
282 : #define FW_WM(value, plane) \
283 : (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK)
284 :
285 0 : void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
286 : {
287 0 : struct drm_device *dev = dev_priv->dev;
288 : u32 val;
289 :
290 0 : if (IS_VALLEYVIEW(dev)) {
291 0 : I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
292 0 : POSTING_READ(FW_BLC_SELF_VLV);
293 0 : dev_priv->wm.vlv.cxsr = enable;
294 0 : } else if (IS_G4X(dev) || IS_CRESTLINE(dev)) {
295 0 : I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
296 0 : POSTING_READ(FW_BLC_SELF);
297 0 : } else if (IS_PINEVIEW(dev)) {
298 0 : val = I915_READ(DSPFW3) & ~PINEVIEW_SELF_REFRESH_EN;
299 0 : val |= enable ? PINEVIEW_SELF_REFRESH_EN : 0;
300 0 : I915_WRITE(DSPFW3, val);
301 0 : POSTING_READ(DSPFW3);
302 0 : } else if (IS_I945G(dev) || IS_I945GM(dev)) {
303 0 : val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) :
304 : _MASKED_BIT_DISABLE(FW_BLC_SELF_EN);
305 0 : I915_WRITE(FW_BLC_SELF, val);
306 0 : POSTING_READ(FW_BLC_SELF);
307 0 : } else if (IS_I915GM(dev)) {
308 0 : val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) :
309 : _MASKED_BIT_DISABLE(INSTPM_SELF_EN);
310 0 : I915_WRITE(INSTPM, val);
311 0 : POSTING_READ(INSTPM);
312 : } else {
313 0 : return;
314 : }
315 :
316 : DRM_DEBUG_KMS("memory self-refresh is %s\n",
317 : enable ? "enabled" : "disabled");
318 0 : }
319 :
320 :
321 : /*
322 : * Latency for FIFO fetches is dependent on several factors:
323 : * - memory configuration (speed, channels)
324 : * - chipset
325 : * - current MCH state
326 : * It can be fairly high in some situations, so here we assume a fairly
327 : * pessimal value. It's a tradeoff between extra memory fetches (if we
328 : * set this value too high, the FIFO will fetch frequently to stay full)
329 : * and power consumption (set it too low to save power and we might see
330 : * FIFO underruns and display "flicker").
331 : *
332 : * A value of 5us seems to be a good balance; safe for very low end
333 : * platforms but not overly aggressive on lower latency configs.
334 : */
335 : static const int pessimal_latency_ns = 5000;
336 :
337 : #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \
338 : ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8))
339 :
340 0 : static int vlv_get_fifo_size(struct drm_device *dev,
341 : enum pipe pipe, int plane)
342 : {
343 0 : struct drm_i915_private *dev_priv = dev->dev_private;
344 : int sprite0_start, sprite1_start, size;
345 :
346 0 : switch (pipe) {
347 : uint32_t dsparb, dsparb2, dsparb3;
348 : case PIPE_A:
349 0 : dsparb = I915_READ(DSPARB);
350 0 : dsparb2 = I915_READ(DSPARB2);
351 0 : sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0);
352 0 : sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4);
353 0 : break;
354 : case PIPE_B:
355 0 : dsparb = I915_READ(DSPARB);
356 0 : dsparb2 = I915_READ(DSPARB2);
357 0 : sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8);
358 0 : sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12);
359 0 : break;
360 : case PIPE_C:
361 0 : dsparb2 = I915_READ(DSPARB2);
362 0 : dsparb3 = I915_READ(DSPARB3);
363 0 : sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16);
364 0 : sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20);
365 0 : break;
366 : default:
367 0 : return 0;
368 : }
369 :
370 0 : switch (plane) {
371 : case 0:
372 : size = sprite0_start;
373 0 : break;
374 : case 1:
375 0 : size = sprite1_start - sprite0_start;
376 0 : break;
377 : case 2:
378 0 : size = 512 - 1 - sprite1_start;
379 0 : break;
380 : default:
381 0 : return 0;
382 : }
383 :
384 : DRM_DEBUG_KMS("Pipe %c %s %c FIFO size: %d\n",
385 : pipe_name(pipe), plane == 0 ? "primary" : "sprite",
386 : plane == 0 ? plane_name(pipe) : sprite_name(pipe, plane - 1),
387 : size);
388 :
389 0 : return size;
390 0 : }
391 :
392 0 : static int i9xx_get_fifo_size(struct drm_device *dev, int plane)
393 : {
394 0 : struct drm_i915_private *dev_priv = dev->dev_private;
395 0 : uint32_t dsparb = I915_READ(DSPARB);
396 : int size;
397 :
398 0 : size = dsparb & 0x7f;
399 0 : if (plane)
400 0 : size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
401 :
402 : DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
403 : plane ? "B" : "A", size);
404 :
405 0 : return size;
406 : }
407 :
408 0 : static int i830_get_fifo_size(struct drm_device *dev, int plane)
409 : {
410 0 : struct drm_i915_private *dev_priv = dev->dev_private;
411 0 : uint32_t dsparb = I915_READ(DSPARB);
412 : int size;
413 :
414 0 : size = dsparb & 0x1ff;
415 0 : if (plane)
416 0 : size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
417 0 : size >>= 1; /* Convert to cachelines */
418 :
419 : DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
420 : plane ? "B" : "A", size);
421 :
422 0 : return size;
423 : }
424 :
425 0 : static int i845_get_fifo_size(struct drm_device *dev, int plane)
426 : {
427 0 : struct drm_i915_private *dev_priv = dev->dev_private;
428 0 : uint32_t dsparb = I915_READ(DSPARB);
429 : int size;
430 :
431 0 : size = dsparb & 0x7f;
432 0 : size >>= 2; /* Convert to cachelines */
433 :
434 : DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
435 : plane ? "B" : "A",
436 : size);
437 :
438 0 : return size;
439 : }
440 :
441 : /* Pineview has different values for various configs */
442 : static const struct intel_watermark_params pineview_display_wm = {
443 : .fifo_size = PINEVIEW_DISPLAY_FIFO,
444 : .max_wm = PINEVIEW_MAX_WM,
445 : .default_wm = PINEVIEW_DFT_WM,
446 : .guard_size = PINEVIEW_GUARD_WM,
447 : .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
448 : };
449 : static const struct intel_watermark_params pineview_display_hplloff_wm = {
450 : .fifo_size = PINEVIEW_DISPLAY_FIFO,
451 : .max_wm = PINEVIEW_MAX_WM,
452 : .default_wm = PINEVIEW_DFT_HPLLOFF_WM,
453 : .guard_size = PINEVIEW_GUARD_WM,
454 : .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
455 : };
456 : static const struct intel_watermark_params pineview_cursor_wm = {
457 : .fifo_size = PINEVIEW_CURSOR_FIFO,
458 : .max_wm = PINEVIEW_CURSOR_MAX_WM,
459 : .default_wm = PINEVIEW_CURSOR_DFT_WM,
460 : .guard_size = PINEVIEW_CURSOR_GUARD_WM,
461 : .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
462 : };
463 : static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
464 : .fifo_size = PINEVIEW_CURSOR_FIFO,
465 : .max_wm = PINEVIEW_CURSOR_MAX_WM,
466 : .default_wm = PINEVIEW_CURSOR_DFT_WM,
467 : .guard_size = PINEVIEW_CURSOR_GUARD_WM,
468 : .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
469 : };
470 : static const struct intel_watermark_params g4x_wm_info = {
471 : .fifo_size = G4X_FIFO_SIZE,
472 : .max_wm = G4X_MAX_WM,
473 : .default_wm = G4X_MAX_WM,
474 : .guard_size = 2,
475 : .cacheline_size = G4X_FIFO_LINE_SIZE,
476 : };
477 : static const struct intel_watermark_params g4x_cursor_wm_info = {
478 : .fifo_size = I965_CURSOR_FIFO,
479 : .max_wm = I965_CURSOR_MAX_WM,
480 : .default_wm = I965_CURSOR_DFT_WM,
481 : .guard_size = 2,
482 : .cacheline_size = G4X_FIFO_LINE_SIZE,
483 : };
484 : static const struct intel_watermark_params valleyview_wm_info = {
485 : .fifo_size = VALLEYVIEW_FIFO_SIZE,
486 : .max_wm = VALLEYVIEW_MAX_WM,
487 : .default_wm = VALLEYVIEW_MAX_WM,
488 : .guard_size = 2,
489 : .cacheline_size = G4X_FIFO_LINE_SIZE,
490 : };
491 : static const struct intel_watermark_params valleyview_cursor_wm_info = {
492 : .fifo_size = I965_CURSOR_FIFO,
493 : .max_wm = VALLEYVIEW_CURSOR_MAX_WM,
494 : .default_wm = I965_CURSOR_DFT_WM,
495 : .guard_size = 2,
496 : .cacheline_size = G4X_FIFO_LINE_SIZE,
497 : };
498 : static const struct intel_watermark_params i965_cursor_wm_info = {
499 : .fifo_size = I965_CURSOR_FIFO,
500 : .max_wm = I965_CURSOR_MAX_WM,
501 : .default_wm = I965_CURSOR_DFT_WM,
502 : .guard_size = 2,
503 : .cacheline_size = I915_FIFO_LINE_SIZE,
504 : };
505 : static const struct intel_watermark_params i945_wm_info = {
506 : .fifo_size = I945_FIFO_SIZE,
507 : .max_wm = I915_MAX_WM,
508 : .default_wm = 1,
509 : .guard_size = 2,
510 : .cacheline_size = I915_FIFO_LINE_SIZE,
511 : };
512 : static const struct intel_watermark_params i915_wm_info = {
513 : .fifo_size = I915_FIFO_SIZE,
514 : .max_wm = I915_MAX_WM,
515 : .default_wm = 1,
516 : .guard_size = 2,
517 : .cacheline_size = I915_FIFO_LINE_SIZE,
518 : };
519 : static const struct intel_watermark_params i830_a_wm_info = {
520 : .fifo_size = I855GM_FIFO_SIZE,
521 : .max_wm = I915_MAX_WM,
522 : .default_wm = 1,
523 : .guard_size = 2,
524 : .cacheline_size = I830_FIFO_LINE_SIZE,
525 : };
526 : static const struct intel_watermark_params i830_bc_wm_info = {
527 : .fifo_size = I855GM_FIFO_SIZE,
528 : .max_wm = I915_MAX_WM/2,
529 : .default_wm = 1,
530 : .guard_size = 2,
531 : .cacheline_size = I830_FIFO_LINE_SIZE,
532 : };
533 : static const struct intel_watermark_params i845_wm_info = {
534 : .fifo_size = I830_FIFO_SIZE,
535 : .max_wm = I915_MAX_WM,
536 : .default_wm = 1,
537 : .guard_size = 2,
538 : .cacheline_size = I830_FIFO_LINE_SIZE,
539 : };
540 :
541 : /**
542 : * intel_calculate_wm - calculate watermark level
543 : * @clock_in_khz: pixel clock
544 : * @wm: chip FIFO params
545 : * @pixel_size: display pixel size
546 : * @latency_ns: memory latency for the platform
547 : *
548 : * Calculate the watermark level (the level at which the display plane will
549 : * start fetching from memory again). Each chip has a different display
550 : * FIFO size and allocation, so the caller needs to figure that out and pass
551 : * in the correct intel_watermark_params structure.
552 : *
553 : * As the pixel clock runs, the FIFO will be drained at a rate that depends
554 : * on the pixel size. When it reaches the watermark level, it'll start
555 : * fetching FIFO line sized based chunks from memory until the FIFO fills
556 : * past the watermark point. If the FIFO drains completely, a FIFO underrun
557 : * will occur, and a display engine hang could result.
558 : */
559 0 : static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
560 : const struct intel_watermark_params *wm,
561 : int fifo_size,
562 : int pixel_size,
563 : unsigned long latency_ns)
564 : {
565 : long entries_required, wm_size;
566 :
567 : /*
568 : * Note: we need to make sure we don't overflow for various clock &
569 : * latency values.
570 : * clocks go from a few thousand to several hundred thousand.
571 : * latency is usually a few thousand
572 : */
573 0 : entries_required = ((clock_in_khz / 1000) * pixel_size * latency_ns) /
574 : 1000;
575 0 : entries_required = DIV_ROUND_UP(entries_required, wm->cacheline_size);
576 :
577 : DRM_DEBUG_KMS("FIFO entries required for mode: %ld\n", entries_required);
578 :
579 0 : wm_size = fifo_size - (entries_required + wm->guard_size);
580 :
581 : DRM_DEBUG_KMS("FIFO watermark level: %ld\n", wm_size);
582 :
583 : /* Don't promote wm_size to unsigned... */
584 0 : if (wm_size > (long)wm->max_wm)
585 0 : wm_size = wm->max_wm;
586 0 : if (wm_size <= 0)
587 0 : wm_size = wm->default_wm;
588 :
589 : /*
590 : * Bspec seems to indicate that the value shouldn't be lower than
591 : * 'burst size + 1'. Certainly 830 is quite unhappy with low values.
592 : * Lets go for 8 which is the burst size since certain platforms
593 : * already use a hardcoded 8 (which is what the spec says should be
594 : * done).
595 : */
596 0 : if (wm_size <= 8)
597 0 : wm_size = 8;
598 :
599 0 : return wm_size;
600 : }
601 :
602 0 : static struct drm_crtc *single_enabled_crtc(struct drm_device *dev)
603 : {
604 : struct drm_crtc *crtc, *enabled = NULL;
605 :
606 0 : for_each_crtc(dev, crtc) {
607 0 : if (intel_crtc_active(crtc)) {
608 0 : if (enabled)
609 0 : return NULL;
610 : enabled = crtc;
611 0 : }
612 : }
613 :
614 0 : return enabled;
615 0 : }
616 :
617 0 : static void pineview_update_wm(struct drm_crtc *unused_crtc)
618 : {
619 0 : struct drm_device *dev = unused_crtc->dev;
620 0 : struct drm_i915_private *dev_priv = dev->dev_private;
621 : struct drm_crtc *crtc;
622 : const struct cxsr_latency *latency;
623 : u32 reg;
624 : unsigned long wm;
625 :
626 0 : latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev), dev_priv->is_ddr3,
627 0 : dev_priv->fsb_freq, dev_priv->mem_freq);
628 0 : if (!latency) {
629 : DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
630 0 : intel_set_memory_cxsr(dev_priv, false);
631 0 : return;
632 : }
633 :
634 0 : crtc = single_enabled_crtc(dev);
635 0 : if (crtc) {
636 0 : const struct drm_display_mode *adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
637 0 : int pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
638 0 : int clock = adjusted_mode->crtc_clock;
639 :
640 : /* Display SR */
641 0 : wm = intel_calculate_wm(clock, &pineview_display_wm,
642 : pineview_display_wm.fifo_size,
643 0 : pixel_size, latency->display_sr);
644 0 : reg = I915_READ(DSPFW1);
645 0 : reg &= ~DSPFW_SR_MASK;
646 0 : reg |= FW_WM(wm, SR);
647 0 : I915_WRITE(DSPFW1, reg);
648 : DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
649 :
650 : /* cursor SR */
651 0 : wm = intel_calculate_wm(clock, &pineview_cursor_wm,
652 : pineview_display_wm.fifo_size,
653 0 : pixel_size, latency->cursor_sr);
654 0 : reg = I915_READ(DSPFW3);
655 0 : reg &= ~DSPFW_CURSOR_SR_MASK;
656 0 : reg |= FW_WM(wm, CURSOR_SR);
657 0 : I915_WRITE(DSPFW3, reg);
658 :
659 : /* Display HPLL off SR */
660 0 : wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
661 : pineview_display_hplloff_wm.fifo_size,
662 0 : pixel_size, latency->display_hpll_disable);
663 0 : reg = I915_READ(DSPFW3);
664 0 : reg &= ~DSPFW_HPLL_SR_MASK;
665 0 : reg |= FW_WM(wm, HPLL_SR);
666 0 : I915_WRITE(DSPFW3, reg);
667 :
668 : /* cursor HPLL off SR */
669 0 : wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
670 : pineview_display_hplloff_wm.fifo_size,
671 0 : pixel_size, latency->cursor_hpll_disable);
672 0 : reg = I915_READ(DSPFW3);
673 0 : reg &= ~DSPFW_HPLL_CURSOR_MASK;
674 0 : reg |= FW_WM(wm, HPLL_CURSOR);
675 0 : I915_WRITE(DSPFW3, reg);
676 : DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
677 :
678 0 : intel_set_memory_cxsr(dev_priv, true);
679 0 : } else {
680 0 : intel_set_memory_cxsr(dev_priv, false);
681 : }
682 0 : }
683 :
684 0 : static bool g4x_compute_wm0(struct drm_device *dev,
685 : int plane,
686 : const struct intel_watermark_params *display,
687 : int display_latency_ns,
688 : const struct intel_watermark_params *cursor,
689 : int cursor_latency_ns,
690 : int *plane_wm,
691 : int *cursor_wm)
692 : {
693 : struct drm_crtc *crtc;
694 : const struct drm_display_mode *adjusted_mode;
695 : int htotal, hdisplay, clock, pixel_size;
696 : int line_time_us, line_count;
697 : int entries, tlb_miss;
698 :
699 0 : crtc = intel_get_crtc_for_plane(dev, plane);
700 0 : if (!intel_crtc_active(crtc)) {
701 0 : *cursor_wm = cursor->guard_size;
702 0 : *plane_wm = display->guard_size;
703 0 : return false;
704 : }
705 :
706 0 : adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
707 0 : clock = adjusted_mode->crtc_clock;
708 0 : htotal = adjusted_mode->crtc_htotal;
709 0 : hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
710 0 : pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
711 :
712 : /* Use the small buffer method to calculate plane watermark */
713 0 : entries = ((clock * pixel_size / 1000) * display_latency_ns) / 1000;
714 0 : tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8;
715 0 : if (tlb_miss > 0)
716 0 : entries += tlb_miss;
717 0 : entries = DIV_ROUND_UP(entries, display->cacheline_size);
718 0 : *plane_wm = entries + display->guard_size;
719 0 : if (*plane_wm > (int)display->max_wm)
720 0 : *plane_wm = display->max_wm;
721 :
722 : /* Use the large buffer method to calculate cursor watermark */
723 0 : line_time_us = max(htotal * 1000 / clock, 1);
724 0 : line_count = (cursor_latency_ns / line_time_us + 1000) / 1000;
725 0 : entries = line_count * crtc->cursor->state->crtc_w * pixel_size;
726 0 : tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8;
727 0 : if (tlb_miss > 0)
728 0 : entries += tlb_miss;
729 0 : entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
730 0 : *cursor_wm = entries + cursor->guard_size;
731 0 : if (*cursor_wm > (int)cursor->max_wm)
732 0 : *cursor_wm = (int)cursor->max_wm;
733 :
734 0 : return true;
735 0 : }
736 :
737 : /*
738 : * Check the wm result.
739 : *
740 : * If any calculated watermark values is larger than the maximum value that
741 : * can be programmed into the associated watermark register, that watermark
742 : * must be disabled.
743 : */
744 0 : static bool g4x_check_srwm(struct drm_device *dev,
745 : int display_wm, int cursor_wm,
746 : const struct intel_watermark_params *display,
747 : const struct intel_watermark_params *cursor)
748 : {
749 : DRM_DEBUG_KMS("SR watermark: display plane %d, cursor %d\n",
750 : display_wm, cursor_wm);
751 :
752 0 : if (display_wm > display->max_wm) {
753 : DRM_DEBUG_KMS("display watermark is too large(%d/%ld), disabling\n",
754 : display_wm, display->max_wm);
755 0 : return false;
756 : }
757 :
758 0 : if (cursor_wm > cursor->max_wm) {
759 : DRM_DEBUG_KMS("cursor watermark is too large(%d/%ld), disabling\n",
760 : cursor_wm, cursor->max_wm);
761 0 : return false;
762 : }
763 :
764 0 : if (!(display_wm || cursor_wm)) {
765 : DRM_DEBUG_KMS("SR latency is 0, disabling\n");
766 0 : return false;
767 : }
768 :
769 0 : return true;
770 0 : }
771 :
772 0 : static bool g4x_compute_srwm(struct drm_device *dev,
773 : int plane,
774 : int latency_ns,
775 : const struct intel_watermark_params *display,
776 : const struct intel_watermark_params *cursor,
777 : int *display_wm, int *cursor_wm)
778 : {
779 : struct drm_crtc *crtc;
780 : const struct drm_display_mode *adjusted_mode;
781 : int hdisplay, htotal, pixel_size, clock;
782 : unsigned long line_time_us;
783 : int line_count, line_size;
784 : int small, large;
785 : int entries;
786 :
787 0 : if (!latency_ns) {
788 0 : *display_wm = *cursor_wm = 0;
789 0 : return false;
790 : }
791 :
792 0 : crtc = intel_get_crtc_for_plane(dev, plane);
793 0 : adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
794 0 : clock = adjusted_mode->crtc_clock;
795 0 : htotal = adjusted_mode->crtc_htotal;
796 0 : hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
797 0 : pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
798 :
799 0 : line_time_us = max(htotal * 1000 / clock, 1);
800 0 : line_count = (latency_ns / line_time_us + 1000) / 1000;
801 0 : line_size = hdisplay * pixel_size;
802 :
803 : /* Use the minimum of the small and large buffer method for primary */
804 0 : small = ((clock * pixel_size / 1000) * latency_ns) / 1000;
805 0 : large = line_count * line_size;
806 :
807 0 : entries = DIV_ROUND_UP(min(small, large), display->cacheline_size);
808 0 : *display_wm = entries + display->guard_size;
809 :
810 : /* calculate the self-refresh watermark for display cursor */
811 0 : entries = line_count * pixel_size * crtc->cursor->state->crtc_w;
812 0 : entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
813 0 : *cursor_wm = entries + cursor->guard_size;
814 :
815 0 : return g4x_check_srwm(dev,
816 0 : *display_wm, *cursor_wm,
817 : display, cursor);
818 0 : }
819 :
820 : #define FW_WM_VLV(value, plane) \
821 : (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV)
822 :
823 0 : static void vlv_write_wm_values(struct intel_crtc *crtc,
824 : const struct vlv_wm_values *wm)
825 : {
826 0 : struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
827 0 : enum pipe pipe = crtc->pipe;
828 :
829 0 : I915_WRITE(VLV_DDL(pipe),
830 : (wm->ddl[pipe].cursor << DDL_CURSOR_SHIFT) |
831 : (wm->ddl[pipe].sprite[1] << DDL_SPRITE_SHIFT(1)) |
832 : (wm->ddl[pipe].sprite[0] << DDL_SPRITE_SHIFT(0)) |
833 : (wm->ddl[pipe].primary << DDL_PLANE_SHIFT));
834 :
835 0 : I915_WRITE(DSPFW1,
836 : FW_WM(wm->sr.plane, SR) |
837 : FW_WM(wm->pipe[PIPE_B].cursor, CURSORB) |
838 : FW_WM_VLV(wm->pipe[PIPE_B].primary, PLANEB) |
839 : FW_WM_VLV(wm->pipe[PIPE_A].primary, PLANEA));
840 0 : I915_WRITE(DSPFW2,
841 : FW_WM_VLV(wm->pipe[PIPE_A].sprite[1], SPRITEB) |
842 : FW_WM(wm->pipe[PIPE_A].cursor, CURSORA) |
843 : FW_WM_VLV(wm->pipe[PIPE_A].sprite[0], SPRITEA));
844 0 : I915_WRITE(DSPFW3,
845 : FW_WM(wm->sr.cursor, CURSOR_SR));
846 :
847 0 : if (IS_CHERRYVIEW(dev_priv)) {
848 0 : I915_WRITE(DSPFW7_CHV,
849 : FW_WM_VLV(wm->pipe[PIPE_B].sprite[1], SPRITED) |
850 : FW_WM_VLV(wm->pipe[PIPE_B].sprite[0], SPRITEC));
851 0 : I915_WRITE(DSPFW8_CHV,
852 : FW_WM_VLV(wm->pipe[PIPE_C].sprite[1], SPRITEF) |
853 : FW_WM_VLV(wm->pipe[PIPE_C].sprite[0], SPRITEE));
854 0 : I915_WRITE(DSPFW9_CHV,
855 : FW_WM_VLV(wm->pipe[PIPE_C].primary, PLANEC) |
856 : FW_WM(wm->pipe[PIPE_C].cursor, CURSORC));
857 0 : I915_WRITE(DSPHOWM,
858 : FW_WM(wm->sr.plane >> 9, SR_HI) |
859 : FW_WM(wm->pipe[PIPE_C].sprite[1] >> 8, SPRITEF_HI) |
860 : FW_WM(wm->pipe[PIPE_C].sprite[0] >> 8, SPRITEE_HI) |
861 : FW_WM(wm->pipe[PIPE_C].primary >> 8, PLANEC_HI) |
862 : FW_WM(wm->pipe[PIPE_B].sprite[1] >> 8, SPRITED_HI) |
863 : FW_WM(wm->pipe[PIPE_B].sprite[0] >> 8, SPRITEC_HI) |
864 : FW_WM(wm->pipe[PIPE_B].primary >> 8, PLANEB_HI) |
865 : FW_WM(wm->pipe[PIPE_A].sprite[1] >> 8, SPRITEB_HI) |
866 : FW_WM(wm->pipe[PIPE_A].sprite[0] >> 8, SPRITEA_HI) |
867 : FW_WM(wm->pipe[PIPE_A].primary >> 8, PLANEA_HI));
868 0 : } else {
869 0 : I915_WRITE(DSPFW7,
870 : FW_WM_VLV(wm->pipe[PIPE_B].sprite[1], SPRITED) |
871 : FW_WM_VLV(wm->pipe[PIPE_B].sprite[0], SPRITEC));
872 0 : I915_WRITE(DSPHOWM,
873 : FW_WM(wm->sr.plane >> 9, SR_HI) |
874 : FW_WM(wm->pipe[PIPE_B].sprite[1] >> 8, SPRITED_HI) |
875 : FW_WM(wm->pipe[PIPE_B].sprite[0] >> 8, SPRITEC_HI) |
876 : FW_WM(wm->pipe[PIPE_B].primary >> 8, PLANEB_HI) |
877 : FW_WM(wm->pipe[PIPE_A].sprite[1] >> 8, SPRITEB_HI) |
878 : FW_WM(wm->pipe[PIPE_A].sprite[0] >> 8, SPRITEA_HI) |
879 : FW_WM(wm->pipe[PIPE_A].primary >> 8, PLANEA_HI));
880 : }
881 :
882 : /* zero (unused) WM1 watermarks */
883 0 : I915_WRITE(DSPFW4, 0);
884 0 : I915_WRITE(DSPFW5, 0);
885 0 : I915_WRITE(DSPFW6, 0);
886 0 : I915_WRITE(DSPHOWM1, 0);
887 :
888 0 : POSTING_READ(DSPFW1);
889 0 : }
890 :
891 : #undef FW_WM_VLV
892 :
893 : enum vlv_wm_level {
894 : VLV_WM_LEVEL_PM2,
895 : VLV_WM_LEVEL_PM5,
896 : VLV_WM_LEVEL_DDR_DVFS,
897 : };
898 :
899 : /* latency must be in 0.1us units. */
900 0 : static unsigned int vlv_wm_method2(unsigned int pixel_rate,
901 : unsigned int pipe_htotal,
902 : unsigned int horiz_pixels,
903 : unsigned int bytes_per_pixel,
904 : unsigned int latency)
905 : {
906 : unsigned int ret;
907 :
908 0 : ret = (latency * pixel_rate) / (pipe_htotal * 10000);
909 0 : ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
910 0 : ret = DIV_ROUND_UP(ret, 64);
911 :
912 0 : return ret;
913 : }
914 :
915 0 : static void vlv_setup_wm_latency(struct drm_device *dev)
916 : {
917 0 : struct drm_i915_private *dev_priv = dev->dev_private;
918 :
919 : /* all latencies in usec */
920 0 : dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
921 :
922 0 : dev_priv->wm.max_level = VLV_WM_LEVEL_PM2;
923 :
924 0 : if (IS_CHERRYVIEW(dev_priv)) {
925 0 : dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
926 0 : dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
927 :
928 0 : dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS;
929 0 : }
930 0 : }
931 :
932 0 : static uint16_t vlv_compute_wm_level(struct intel_plane *plane,
933 : struct intel_crtc *crtc,
934 : const struct intel_plane_state *state,
935 : int level)
936 : {
937 0 : struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
938 : int clock, htotal, pixel_size, width, wm;
939 :
940 0 : if (dev_priv->wm.pri_latency[level] == 0)
941 0 : return USHRT_MAX;
942 :
943 0 : if (!state->visible)
944 0 : return 0;
945 :
946 0 : pixel_size = drm_format_plane_cpp(state->base.fb->pixel_format, 0);
947 0 : clock = crtc->config->base.adjusted_mode.crtc_clock;
948 0 : htotal = crtc->config->base.adjusted_mode.crtc_htotal;
949 0 : width = crtc->config->pipe_src_w;
950 0 : if (WARN_ON(htotal == 0))
951 0 : htotal = 1;
952 :
953 0 : if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
954 : /*
955 : * FIXME the formula gives values that are
956 : * too big for the cursor FIFO, and hence we
957 : * would never be able to use cursors. For
958 : * now just hardcode the watermark.
959 : */
960 : wm = 63;
961 0 : } else {
962 0 : wm = vlv_wm_method2(clock, htotal, width, pixel_size,
963 0 : dev_priv->wm.pri_latency[level] * 10);
964 : }
965 :
966 0 : return min_t(int, wm, USHRT_MAX);
967 0 : }
968 :
969 0 : static void vlv_compute_fifo(struct intel_crtc *crtc)
970 : {
971 0 : struct drm_device *dev = crtc->base.dev;
972 0 : struct vlv_wm_state *wm_state = &crtc->wm_state;
973 : struct intel_plane *plane;
974 : unsigned int total_rate = 0;
975 : const int fifo_size = 512 - 1;
976 : int fifo_extra, fifo_left = fifo_size;
977 :
978 0 : for_each_intel_plane_on_crtc(dev, crtc, plane) {
979 : struct intel_plane_state *state =
980 0 : to_intel_plane_state(plane->base.state);
981 :
982 0 : if (plane->base.type == DRM_PLANE_TYPE_CURSOR)
983 0 : continue;
984 :
985 0 : if (state->visible) {
986 0 : wm_state->num_active_planes++;
987 0 : total_rate += drm_format_plane_cpp(state->base.fb->pixel_format, 0);
988 0 : }
989 0 : }
990 :
991 0 : for_each_intel_plane_on_crtc(dev, crtc, plane) {
992 : struct intel_plane_state *state =
993 0 : to_intel_plane_state(plane->base.state);
994 : unsigned int rate;
995 :
996 0 : if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
997 0 : plane->wm.fifo_size = 63;
998 0 : continue;
999 : }
1000 :
1001 0 : if (!state->visible) {
1002 0 : plane->wm.fifo_size = 0;
1003 0 : continue;
1004 : }
1005 :
1006 0 : rate = drm_format_plane_cpp(state->base.fb->pixel_format, 0);
1007 0 : plane->wm.fifo_size = fifo_size * rate / total_rate;
1008 0 : fifo_left -= plane->wm.fifo_size;
1009 0 : }
1010 :
1011 0 : fifo_extra = DIV_ROUND_UP(fifo_left, wm_state->num_active_planes ?: 1);
1012 :
1013 : /* spread the remainder evenly */
1014 0 : for_each_intel_plane_on_crtc(dev, crtc, plane) {
1015 : int plane_extra;
1016 :
1017 0 : if (fifo_left == 0)
1018 0 : break;
1019 :
1020 0 : if (plane->base.type == DRM_PLANE_TYPE_CURSOR)
1021 0 : continue;
1022 :
1023 : /* give it all to the first plane if none are active */
1024 0 : if (plane->wm.fifo_size == 0 &&
1025 0 : wm_state->num_active_planes)
1026 0 : continue;
1027 :
1028 0 : plane_extra = min(fifo_extra, fifo_left);
1029 0 : plane->wm.fifo_size += plane_extra;
1030 0 : fifo_left -= plane_extra;
1031 0 : }
1032 :
1033 0 : WARN_ON(fifo_left != 0);
1034 0 : }
1035 :
1036 0 : static void vlv_invert_wms(struct intel_crtc *crtc)
1037 : {
1038 0 : struct vlv_wm_state *wm_state = &crtc->wm_state;
1039 : int level;
1040 :
1041 0 : for (level = 0; level < wm_state->num_levels; level++) {
1042 0 : struct drm_device *dev = crtc->base.dev;
1043 0 : const int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
1044 : struct intel_plane *plane;
1045 :
1046 0 : wm_state->sr[level].plane = sr_fifo_size - wm_state->sr[level].plane;
1047 0 : wm_state->sr[level].cursor = 63 - wm_state->sr[level].cursor;
1048 :
1049 0 : for_each_intel_plane_on_crtc(dev, crtc, plane) {
1050 0 : switch (plane->base.type) {
1051 : int sprite;
1052 : case DRM_PLANE_TYPE_CURSOR:
1053 0 : wm_state->wm[level].cursor = plane->wm.fifo_size -
1054 0 : wm_state->wm[level].cursor;
1055 0 : break;
1056 : case DRM_PLANE_TYPE_PRIMARY:
1057 0 : wm_state->wm[level].primary = plane->wm.fifo_size -
1058 0 : wm_state->wm[level].primary;
1059 0 : break;
1060 : case DRM_PLANE_TYPE_OVERLAY:
1061 0 : sprite = plane->plane;
1062 0 : wm_state->wm[level].sprite[sprite] = plane->wm.fifo_size -
1063 0 : wm_state->wm[level].sprite[sprite];
1064 0 : break;
1065 : }
1066 : }
1067 : }
1068 0 : }
1069 :
1070 0 : static void vlv_compute_wm(struct intel_crtc *crtc)
1071 : {
1072 0 : struct drm_device *dev = crtc->base.dev;
1073 0 : struct vlv_wm_state *wm_state = &crtc->wm_state;
1074 : struct intel_plane *plane;
1075 0 : int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
1076 : int level;
1077 :
1078 0 : memset(wm_state, 0, sizeof(*wm_state));
1079 :
1080 0 : wm_state->cxsr = crtc->pipe != PIPE_C && crtc->wm.cxsr_allowed;
1081 0 : wm_state->num_levels = to_i915(dev)->wm.max_level + 1;
1082 :
1083 0 : wm_state->num_active_planes = 0;
1084 :
1085 0 : vlv_compute_fifo(crtc);
1086 :
1087 0 : if (wm_state->num_active_planes != 1)
1088 0 : wm_state->cxsr = false;
1089 :
1090 0 : if (wm_state->cxsr) {
1091 0 : for (level = 0; level < wm_state->num_levels; level++) {
1092 0 : wm_state->sr[level].plane = sr_fifo_size;
1093 0 : wm_state->sr[level].cursor = 63;
1094 : }
1095 : }
1096 :
1097 0 : for_each_intel_plane_on_crtc(dev, crtc, plane) {
1098 : struct intel_plane_state *state =
1099 0 : to_intel_plane_state(plane->base.state);
1100 :
1101 0 : if (!state->visible)
1102 0 : continue;
1103 :
1104 : /* normal watermarks */
1105 0 : for (level = 0; level < wm_state->num_levels; level++) {
1106 0 : int wm = vlv_compute_wm_level(plane, crtc, state, level);
1107 0 : int max_wm = plane->base.type == DRM_PLANE_TYPE_CURSOR ? 63 : 511;
1108 :
1109 : /* hack */
1110 0 : if (WARN_ON(level == 0 && wm > max_wm))
1111 0 : wm = max_wm;
1112 :
1113 0 : if (wm > plane->wm.fifo_size)
1114 0 : break;
1115 :
1116 0 : switch (plane->base.type) {
1117 : int sprite;
1118 : case DRM_PLANE_TYPE_CURSOR:
1119 0 : wm_state->wm[level].cursor = wm;
1120 0 : break;
1121 : case DRM_PLANE_TYPE_PRIMARY:
1122 0 : wm_state->wm[level].primary = wm;
1123 0 : break;
1124 : case DRM_PLANE_TYPE_OVERLAY:
1125 0 : sprite = plane->plane;
1126 0 : wm_state->wm[level].sprite[sprite] = wm;
1127 0 : break;
1128 : }
1129 0 : }
1130 :
1131 0 : wm_state->num_levels = level;
1132 :
1133 0 : if (!wm_state->cxsr)
1134 0 : continue;
1135 :
1136 : /* maxfifo watermarks */
1137 0 : switch (plane->base.type) {
1138 : int sprite, level;
1139 : case DRM_PLANE_TYPE_CURSOR:
1140 0 : for (level = 0; level < wm_state->num_levels; level++)
1141 0 : wm_state->sr[level].cursor =
1142 0 : wm_state->wm[level].cursor;
1143 : break;
1144 : case DRM_PLANE_TYPE_PRIMARY:
1145 0 : for (level = 0; level < wm_state->num_levels; level++)
1146 0 : wm_state->sr[level].plane =
1147 0 : min(wm_state->sr[level].plane,
1148 0 : wm_state->wm[level].primary);
1149 : break;
1150 : case DRM_PLANE_TYPE_OVERLAY:
1151 0 : sprite = plane->plane;
1152 0 : for (level = 0; level < wm_state->num_levels; level++)
1153 0 : wm_state->sr[level].plane =
1154 0 : min(wm_state->sr[level].plane,
1155 0 : wm_state->wm[level].sprite[sprite]);
1156 : break;
1157 : }
1158 0 : }
1159 :
1160 : /* clear any (partially) filled invalid levels */
1161 0 : for (level = wm_state->num_levels; level < to_i915(dev)->wm.max_level + 1; level++) {
1162 0 : memset(&wm_state->wm[level], 0, sizeof(wm_state->wm[level]));
1163 0 : memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level]));
1164 : }
1165 :
1166 0 : vlv_invert_wms(crtc);
1167 0 : }
1168 :
1169 : #define VLV_FIFO(plane, value) \
1170 : (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV)
1171 :
1172 0 : static void vlv_pipe_set_fifo_size(struct intel_crtc *crtc)
1173 : {
1174 0 : struct drm_device *dev = crtc->base.dev;
1175 0 : struct drm_i915_private *dev_priv = to_i915(dev);
1176 : struct intel_plane *plane;
1177 : int sprite0_start = 0, sprite1_start = 0, fifo_size = 0;
1178 :
1179 0 : for_each_intel_plane_on_crtc(dev, crtc, plane) {
1180 0 : if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
1181 0 : WARN_ON(plane->wm.fifo_size != 63);
1182 0 : continue;
1183 : }
1184 :
1185 0 : if (plane->base.type == DRM_PLANE_TYPE_PRIMARY)
1186 0 : sprite0_start = plane->wm.fifo_size;
1187 0 : else if (plane->plane == 0)
1188 0 : sprite1_start = sprite0_start + plane->wm.fifo_size;
1189 : else
1190 0 : fifo_size = sprite1_start + plane->wm.fifo_size;
1191 : }
1192 :
1193 0 : WARN_ON(fifo_size != 512 - 1);
1194 :
1195 : DRM_DEBUG_KMS("Pipe %c FIFO split %d / %d / %d\n",
1196 : pipe_name(crtc->pipe), sprite0_start,
1197 : sprite1_start, fifo_size);
1198 :
1199 0 : switch (crtc->pipe) {
1200 : uint32_t dsparb, dsparb2, dsparb3;
1201 : case PIPE_A:
1202 0 : dsparb = I915_READ(DSPARB);
1203 0 : dsparb2 = I915_READ(DSPARB2);
1204 :
1205 0 : dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) |
1206 : VLV_FIFO(SPRITEB, 0xff));
1207 0 : dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) |
1208 0 : VLV_FIFO(SPRITEB, sprite1_start));
1209 :
1210 0 : dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) |
1211 : VLV_FIFO(SPRITEB_HI, 0x1));
1212 0 : dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) |
1213 0 : VLV_FIFO(SPRITEB_HI, sprite1_start >> 8));
1214 :
1215 0 : I915_WRITE(DSPARB, dsparb);
1216 0 : I915_WRITE(DSPARB2, dsparb2);
1217 0 : break;
1218 : case PIPE_B:
1219 0 : dsparb = I915_READ(DSPARB);
1220 0 : dsparb2 = I915_READ(DSPARB2);
1221 :
1222 0 : dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) |
1223 : VLV_FIFO(SPRITED, 0xff));
1224 0 : dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) |
1225 0 : VLV_FIFO(SPRITED, sprite1_start));
1226 :
1227 0 : dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) |
1228 : VLV_FIFO(SPRITED_HI, 0xff));
1229 0 : dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) |
1230 0 : VLV_FIFO(SPRITED_HI, sprite1_start >> 8));
1231 :
1232 0 : I915_WRITE(DSPARB, dsparb);
1233 0 : I915_WRITE(DSPARB2, dsparb2);
1234 0 : break;
1235 : case PIPE_C:
1236 0 : dsparb3 = I915_READ(DSPARB3);
1237 0 : dsparb2 = I915_READ(DSPARB2);
1238 :
1239 0 : dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) |
1240 : VLV_FIFO(SPRITEF, 0xff));
1241 0 : dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) |
1242 0 : VLV_FIFO(SPRITEF, sprite1_start));
1243 :
1244 0 : dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) |
1245 : VLV_FIFO(SPRITEF_HI, 0xff));
1246 0 : dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) |
1247 0 : VLV_FIFO(SPRITEF_HI, sprite1_start >> 8));
1248 :
1249 0 : I915_WRITE(DSPARB3, dsparb3);
1250 0 : I915_WRITE(DSPARB2, dsparb2);
1251 0 : break;
1252 : default:
1253 : break;
1254 : }
1255 0 : }
1256 :
1257 : #undef VLV_FIFO
1258 :
1259 0 : static void vlv_merge_wm(struct drm_device *dev,
1260 : struct vlv_wm_values *wm)
1261 : {
1262 : struct intel_crtc *crtc;
1263 : int num_active_crtcs = 0;
1264 :
1265 0 : wm->level = to_i915(dev)->wm.max_level;
1266 0 : wm->cxsr = true;
1267 :
1268 0 : for_each_intel_crtc(dev, crtc) {
1269 0 : const struct vlv_wm_state *wm_state = &crtc->wm_state;
1270 :
1271 0 : if (!crtc->active)
1272 0 : continue;
1273 :
1274 0 : if (!wm_state->cxsr)
1275 0 : wm->cxsr = false;
1276 :
1277 0 : num_active_crtcs++;
1278 0 : wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
1279 0 : }
1280 :
1281 0 : if (num_active_crtcs != 1)
1282 0 : wm->cxsr = false;
1283 :
1284 0 : if (num_active_crtcs > 1)
1285 0 : wm->level = VLV_WM_LEVEL_PM2;
1286 :
1287 0 : for_each_intel_crtc(dev, crtc) {
1288 0 : struct vlv_wm_state *wm_state = &crtc->wm_state;
1289 0 : enum pipe pipe = crtc->pipe;
1290 :
1291 0 : if (!crtc->active)
1292 0 : continue;
1293 :
1294 0 : wm->pipe[pipe] = wm_state->wm[wm->level];
1295 0 : if (wm->cxsr)
1296 0 : wm->sr = wm_state->sr[wm->level];
1297 :
1298 0 : wm->ddl[pipe].primary = DDL_PRECISION_HIGH | 2;
1299 0 : wm->ddl[pipe].sprite[0] = DDL_PRECISION_HIGH | 2;
1300 0 : wm->ddl[pipe].sprite[1] = DDL_PRECISION_HIGH | 2;
1301 0 : wm->ddl[pipe].cursor = DDL_PRECISION_HIGH | 2;
1302 0 : }
1303 0 : }
1304 :
1305 0 : static void vlv_update_wm(struct drm_crtc *crtc)
1306 : {
1307 0 : struct drm_device *dev = crtc->dev;
1308 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1309 0 : struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
1310 : #ifdef DRMDEBUG
1311 : enum pipe pipe = intel_crtc->pipe;
1312 : #endif
1313 0 : struct vlv_wm_values wm = {};
1314 :
1315 0 : vlv_compute_wm(intel_crtc);
1316 0 : vlv_merge_wm(dev, &wm);
1317 :
1318 0 : if (memcmp(&dev_priv->wm.vlv, &wm, sizeof(wm)) == 0) {
1319 : /* FIXME should be part of crtc atomic commit */
1320 0 : vlv_pipe_set_fifo_size(intel_crtc);
1321 0 : return;
1322 : }
1323 :
1324 0 : if (wm.level < VLV_WM_LEVEL_DDR_DVFS &&
1325 0 : dev_priv->wm.vlv.level >= VLV_WM_LEVEL_DDR_DVFS)
1326 0 : chv_set_memory_dvfs(dev_priv, false);
1327 :
1328 0 : if (wm.level < VLV_WM_LEVEL_PM5 &&
1329 0 : dev_priv->wm.vlv.level >= VLV_WM_LEVEL_PM5)
1330 0 : chv_set_memory_pm5(dev_priv, false);
1331 :
1332 0 : if (!wm.cxsr && dev_priv->wm.vlv.cxsr)
1333 0 : intel_set_memory_cxsr(dev_priv, false);
1334 :
1335 : /* FIXME should be part of crtc atomic commit */
1336 0 : vlv_pipe_set_fifo_size(intel_crtc);
1337 :
1338 0 : vlv_write_wm_values(intel_crtc, &wm);
1339 :
1340 : DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, "
1341 : "sprite0=%d, sprite1=%d, SR: plane=%d, cursor=%d level=%d cxsr=%d\n",
1342 : pipe_name(pipe), wm.pipe[pipe].primary, wm.pipe[pipe].cursor,
1343 : wm.pipe[pipe].sprite[0], wm.pipe[pipe].sprite[1],
1344 : wm.sr.plane, wm.sr.cursor, wm.level, wm.cxsr);
1345 :
1346 0 : if (wm.cxsr && !dev_priv->wm.vlv.cxsr)
1347 0 : intel_set_memory_cxsr(dev_priv, true);
1348 :
1349 0 : if (wm.level >= VLV_WM_LEVEL_PM5 &&
1350 0 : dev_priv->wm.vlv.level < VLV_WM_LEVEL_PM5)
1351 0 : chv_set_memory_pm5(dev_priv, true);
1352 :
1353 0 : if (wm.level >= VLV_WM_LEVEL_DDR_DVFS &&
1354 0 : dev_priv->wm.vlv.level < VLV_WM_LEVEL_DDR_DVFS)
1355 0 : chv_set_memory_dvfs(dev_priv, true);
1356 :
1357 0 : dev_priv->wm.vlv = wm;
1358 0 : }
1359 :
1360 : #define single_plane_enabled(mask) is_power_of_2(mask)
1361 :
1362 0 : static void g4x_update_wm(struct drm_crtc *crtc)
1363 : {
1364 0 : struct drm_device *dev = crtc->dev;
1365 : static const int sr_latency_ns = 12000;
1366 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1367 0 : int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
1368 0 : int plane_sr, cursor_sr;
1369 : unsigned int enabled = 0;
1370 : bool cxsr_enabled;
1371 :
1372 0 : if (g4x_compute_wm0(dev, PIPE_A,
1373 : &g4x_wm_info, pessimal_latency_ns,
1374 : &g4x_cursor_wm_info, pessimal_latency_ns,
1375 : &planea_wm, &cursora_wm))
1376 0 : enabled |= 1 << PIPE_A;
1377 :
1378 0 : if (g4x_compute_wm0(dev, PIPE_B,
1379 : &g4x_wm_info, pessimal_latency_ns,
1380 : &g4x_cursor_wm_info, pessimal_latency_ns,
1381 : &planeb_wm, &cursorb_wm))
1382 0 : enabled |= 1 << PIPE_B;
1383 :
1384 0 : if (single_plane_enabled(enabled) &&
1385 0 : g4x_compute_srwm(dev, ffs(enabled) - 1,
1386 : sr_latency_ns,
1387 : &g4x_wm_info,
1388 : &g4x_cursor_wm_info,
1389 : &plane_sr, &cursor_sr)) {
1390 : cxsr_enabled = true;
1391 0 : } else {
1392 : cxsr_enabled = false;
1393 0 : intel_set_memory_cxsr(dev_priv, false);
1394 0 : plane_sr = cursor_sr = 0;
1395 : }
1396 :
1397 : DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, "
1398 : "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
1399 : planea_wm, cursora_wm,
1400 : planeb_wm, cursorb_wm,
1401 : plane_sr, cursor_sr);
1402 :
1403 0 : I915_WRITE(DSPFW1,
1404 : FW_WM(plane_sr, SR) |
1405 : FW_WM(cursorb_wm, CURSORB) |
1406 : FW_WM(planeb_wm, PLANEB) |
1407 : FW_WM(planea_wm, PLANEA));
1408 0 : I915_WRITE(DSPFW2,
1409 : (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
1410 : FW_WM(cursora_wm, CURSORA));
1411 : /* HPLL off in SR has some issues on G4x... disable it */
1412 0 : I915_WRITE(DSPFW3,
1413 : (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) |
1414 : FW_WM(cursor_sr, CURSOR_SR));
1415 :
1416 0 : if (cxsr_enabled)
1417 0 : intel_set_memory_cxsr(dev_priv, true);
1418 0 : }
1419 :
1420 0 : static void i965_update_wm(struct drm_crtc *unused_crtc)
1421 : {
1422 0 : struct drm_device *dev = unused_crtc->dev;
1423 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1424 : struct drm_crtc *crtc;
1425 : int srwm = 1;
1426 : int cursor_sr = 16;
1427 : bool cxsr_enabled;
1428 :
1429 : /* Calc sr entries for one plane configs */
1430 0 : crtc = single_enabled_crtc(dev);
1431 0 : if (crtc) {
1432 : /* self-refresh has much higher latency */
1433 : static const int sr_latency_ns = 12000;
1434 0 : const struct drm_display_mode *adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1435 0 : int clock = adjusted_mode->crtc_clock;
1436 0 : int htotal = adjusted_mode->crtc_htotal;
1437 0 : int hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
1438 0 : int pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
1439 : unsigned long line_time_us;
1440 : int entries;
1441 :
1442 0 : line_time_us = max(htotal * 1000 / clock, 1);
1443 :
1444 : /* Use ns/us then divide to preserve precision */
1445 0 : entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1446 0 : pixel_size * hdisplay;
1447 0 : entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
1448 0 : srwm = I965_FIFO_SIZE - entries;
1449 0 : if (srwm < 0)
1450 : srwm = 1;
1451 0 : srwm &= 0x1ff;
1452 : DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
1453 : entries, srwm);
1454 :
1455 0 : entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1456 0 : pixel_size * crtc->cursor->state->crtc_w;
1457 0 : entries = DIV_ROUND_UP(entries,
1458 : i965_cursor_wm_info.cacheline_size);
1459 0 : cursor_sr = i965_cursor_wm_info.fifo_size -
1460 0 : (entries + i965_cursor_wm_info.guard_size);
1461 :
1462 0 : if (cursor_sr > i965_cursor_wm_info.max_wm)
1463 0 : cursor_sr = i965_cursor_wm_info.max_wm;
1464 :
1465 : DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
1466 : "cursor %d\n", srwm, cursor_sr);
1467 :
1468 : cxsr_enabled = true;
1469 0 : } else {
1470 : cxsr_enabled = false;
1471 : /* Turn off self refresh if both pipes are enabled */
1472 0 : intel_set_memory_cxsr(dev_priv, false);
1473 : }
1474 :
1475 : DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
1476 : srwm);
1477 :
1478 : /* 965 has limitations... */
1479 0 : I915_WRITE(DSPFW1, FW_WM(srwm, SR) |
1480 : FW_WM(8, CURSORB) |
1481 : FW_WM(8, PLANEB) |
1482 : FW_WM(8, PLANEA));
1483 0 : I915_WRITE(DSPFW2, FW_WM(8, CURSORA) |
1484 : FW_WM(8, PLANEC_OLD));
1485 : /* update cursor SR watermark */
1486 0 : I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR));
1487 :
1488 0 : if (cxsr_enabled)
1489 0 : intel_set_memory_cxsr(dev_priv, true);
1490 0 : }
1491 :
1492 : #undef FW_WM
1493 :
1494 0 : static void i9xx_update_wm(struct drm_crtc *unused_crtc)
1495 : {
1496 0 : struct drm_device *dev = unused_crtc->dev;
1497 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1498 : const struct intel_watermark_params *wm_info;
1499 : uint32_t fwater_lo;
1500 : uint32_t fwater_hi;
1501 : int cwm, srwm = 1;
1502 : int fifo_size;
1503 : int planea_wm, planeb_wm;
1504 : struct drm_crtc *crtc, *enabled = NULL;
1505 :
1506 0 : if (IS_I945GM(dev))
1507 0 : wm_info = &i945_wm_info;
1508 0 : else if (!IS_GEN2(dev))
1509 0 : wm_info = &i915_wm_info;
1510 : else
1511 : wm_info = &i830_a_wm_info;
1512 :
1513 0 : fifo_size = dev_priv->display.get_fifo_size(dev, 0);
1514 0 : crtc = intel_get_crtc_for_plane(dev, 0);
1515 0 : if (intel_crtc_active(crtc)) {
1516 : const struct drm_display_mode *adjusted_mode;
1517 0 : int cpp = crtc->primary->state->fb->bits_per_pixel / 8;
1518 0 : if (IS_GEN2(dev))
1519 0 : cpp = 4;
1520 :
1521 0 : adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1522 0 : planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1523 : wm_info, fifo_size, cpp,
1524 : pessimal_latency_ns);
1525 : enabled = crtc;
1526 0 : } else {
1527 0 : planea_wm = fifo_size - wm_info->guard_size;
1528 0 : if (planea_wm > (long)wm_info->max_wm)
1529 0 : planea_wm = wm_info->max_wm;
1530 : }
1531 :
1532 0 : if (IS_GEN2(dev))
1533 0 : wm_info = &i830_bc_wm_info;
1534 :
1535 0 : fifo_size = dev_priv->display.get_fifo_size(dev, 1);
1536 0 : crtc = intel_get_crtc_for_plane(dev, 1);
1537 0 : if (intel_crtc_active(crtc)) {
1538 : const struct drm_display_mode *adjusted_mode;
1539 0 : int cpp = crtc->primary->state->fb->bits_per_pixel / 8;
1540 0 : if (IS_GEN2(dev))
1541 0 : cpp = 4;
1542 :
1543 0 : adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1544 0 : planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1545 : wm_info, fifo_size, cpp,
1546 : pessimal_latency_ns);
1547 0 : if (enabled == NULL)
1548 0 : enabled = crtc;
1549 : else
1550 : enabled = NULL;
1551 0 : } else {
1552 0 : planeb_wm = fifo_size - wm_info->guard_size;
1553 0 : if (planeb_wm > (long)wm_info->max_wm)
1554 0 : planeb_wm = wm_info->max_wm;
1555 : }
1556 :
1557 : DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
1558 :
1559 0 : if (IS_I915GM(dev) && enabled) {
1560 : struct drm_i915_gem_object *obj;
1561 :
1562 0 : obj = intel_fb_obj(enabled->primary->state->fb);
1563 :
1564 : /* self-refresh seems busted with untiled */
1565 0 : if (obj->tiling_mode == I915_TILING_NONE)
1566 0 : enabled = NULL;
1567 0 : }
1568 :
1569 : /*
1570 : * Overlay gets an aggressive default since video jitter is bad.
1571 : */
1572 : cwm = 2;
1573 :
1574 : /* Play safe and disable self-refresh before adjusting watermarks. */
1575 0 : intel_set_memory_cxsr(dev_priv, false);
1576 :
1577 : /* Calc sr entries for one plane configs */
1578 0 : if (HAS_FW_BLC(dev) && enabled) {
1579 : /* self-refresh has much higher latency */
1580 : static const int sr_latency_ns = 6000;
1581 0 : const struct drm_display_mode *adjusted_mode = &to_intel_crtc(enabled)->config->base.adjusted_mode;
1582 0 : int clock = adjusted_mode->crtc_clock;
1583 0 : int htotal = adjusted_mode->crtc_htotal;
1584 0 : int hdisplay = to_intel_crtc(enabled)->config->pipe_src_w;
1585 0 : int pixel_size = enabled->primary->state->fb->bits_per_pixel / 8;
1586 : unsigned long line_time_us;
1587 : int entries;
1588 :
1589 0 : line_time_us = max(htotal * 1000 / clock, 1);
1590 :
1591 : /* Use ns/us then divide to preserve precision */
1592 0 : entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1593 0 : pixel_size * hdisplay;
1594 0 : entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
1595 : DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
1596 0 : srwm = wm_info->fifo_size - entries;
1597 0 : if (srwm < 0)
1598 : srwm = 1;
1599 :
1600 0 : if (IS_I945G(dev) || IS_I945GM(dev))
1601 0 : I915_WRITE(FW_BLC_SELF,
1602 : FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
1603 0 : else if (IS_I915GM(dev))
1604 0 : I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
1605 0 : }
1606 :
1607 : DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
1608 : planea_wm, planeb_wm, cwm, srwm);
1609 :
1610 0 : fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
1611 : fwater_hi = (cwm & 0x1f);
1612 :
1613 : /* Set request length to 8 cachelines per fetch */
1614 0 : fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
1615 : fwater_hi = fwater_hi | (1 << 8);
1616 :
1617 0 : I915_WRITE(FW_BLC, fwater_lo);
1618 0 : I915_WRITE(FW_BLC2, fwater_hi);
1619 :
1620 0 : if (enabled)
1621 0 : intel_set_memory_cxsr(dev_priv, true);
1622 0 : }
1623 :
1624 0 : static void i845_update_wm(struct drm_crtc *unused_crtc)
1625 : {
1626 0 : struct drm_device *dev = unused_crtc->dev;
1627 0 : struct drm_i915_private *dev_priv = dev->dev_private;
1628 : struct drm_crtc *crtc;
1629 : const struct drm_display_mode *adjusted_mode;
1630 : uint32_t fwater_lo;
1631 : int planea_wm;
1632 :
1633 0 : crtc = single_enabled_crtc(dev);
1634 0 : if (crtc == NULL)
1635 0 : return;
1636 :
1637 0 : adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1638 0 : planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1639 : &i845_wm_info,
1640 0 : dev_priv->display.get_fifo_size(dev, 0),
1641 : 4, pessimal_latency_ns);
1642 0 : fwater_lo = I915_READ(FW_BLC) & ~0xfff;
1643 0 : fwater_lo |= (3<<8) | planea_wm;
1644 :
1645 : DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
1646 :
1647 0 : I915_WRITE(FW_BLC, fwater_lo);
1648 0 : }
1649 :
1650 0 : uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config)
1651 : {
1652 : uint32_t pixel_rate;
1653 :
1654 0 : pixel_rate = pipe_config->base.adjusted_mode.crtc_clock;
1655 :
1656 : /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to
1657 : * adjust the pixel_rate here. */
1658 :
1659 0 : if (pipe_config->pch_pfit.enabled) {
1660 : uint64_t pipe_w, pipe_h, pfit_w, pfit_h;
1661 0 : uint32_t pfit_size = pipe_config->pch_pfit.size;
1662 :
1663 0 : pipe_w = pipe_config->pipe_src_w;
1664 0 : pipe_h = pipe_config->pipe_src_h;
1665 :
1666 0 : pfit_w = (pfit_size >> 16) & 0xFFFF;
1667 0 : pfit_h = pfit_size & 0xFFFF;
1668 0 : if (pipe_w < pfit_w)
1669 0 : pipe_w = pfit_w;
1670 0 : if (pipe_h < pfit_h)
1671 0 : pipe_h = pfit_h;
1672 :
1673 0 : pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h,
1674 0 : pfit_w * pfit_h);
1675 0 : }
1676 :
1677 0 : return pixel_rate;
1678 : }
1679 :
1680 : /* latency must be in 0.1us units. */
1681 0 : static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
1682 : uint32_t latency)
1683 : {
1684 : uint64_t ret;
1685 :
1686 0 : if (WARN(latency == 0, "Latency value missing\n"))
1687 0 : return UINT_MAX;
1688 :
1689 0 : ret = (uint64_t) pixel_rate * bytes_per_pixel * latency;
1690 0 : ret = DIV_ROUND_UP_ULL(ret, 64 * 10000) + 2;
1691 :
1692 0 : return ret;
1693 0 : }
1694 :
1695 : /* latency must be in 0.1us units. */
1696 0 : static uint32_t ilk_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
1697 : uint32_t horiz_pixels, uint8_t bytes_per_pixel,
1698 : uint32_t latency)
1699 : {
1700 : uint32_t ret;
1701 :
1702 0 : if (WARN(latency == 0, "Latency value missing\n"))
1703 0 : return UINT_MAX;
1704 :
1705 0 : ret = (latency * pixel_rate) / (pipe_htotal * 10000);
1706 0 : ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
1707 0 : ret = DIV_ROUND_UP(ret, 64) + 2;
1708 0 : return ret;
1709 0 : }
1710 :
1711 0 : static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
1712 : uint8_t bytes_per_pixel)
1713 : {
1714 0 : return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
1715 : }
1716 :
1717 : struct skl_pipe_wm_parameters {
1718 : bool active;
1719 : uint32_t pipe_htotal;
1720 : uint32_t pixel_rate; /* in KHz */
1721 : struct intel_plane_wm_parameters plane[I915_MAX_PLANES];
1722 : };
1723 :
1724 : struct ilk_wm_maximums {
1725 : uint16_t pri;
1726 : uint16_t spr;
1727 : uint16_t cur;
1728 : uint16_t fbc;
1729 : };
1730 :
1731 : /* used in computing the new watermarks state */
1732 : struct intel_wm_config {
1733 : unsigned int num_pipes_active;
1734 : bool sprites_enabled;
1735 : bool sprites_scaled;
1736 : };
1737 :
1738 : /*
1739 : * For both WM_PIPE and WM_LP.
1740 : * mem_value must be in 0.1us units.
1741 : */
1742 0 : static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate,
1743 : const struct intel_plane_state *pstate,
1744 : uint32_t mem_value,
1745 : bool is_lp)
1746 : {
1747 0 : int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
1748 : uint32_t method1, method2;
1749 :
1750 0 : if (!cstate->base.active || !pstate->visible)
1751 0 : return 0;
1752 :
1753 0 : method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), bpp, mem_value);
1754 :
1755 0 : if (!is_lp)
1756 0 : return method1;
1757 :
1758 0 : method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
1759 0 : cstate->base.adjusted_mode.crtc_htotal,
1760 0 : drm_rect_width(&pstate->dst),
1761 : bpp,
1762 : mem_value);
1763 :
1764 0 : return min(method1, method2);
1765 0 : }
1766 :
1767 : /*
1768 : * For both WM_PIPE and WM_LP.
1769 : * mem_value must be in 0.1us units.
1770 : */
1771 0 : static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate,
1772 : const struct intel_plane_state *pstate,
1773 : uint32_t mem_value)
1774 : {
1775 0 : int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
1776 : uint32_t method1, method2;
1777 :
1778 0 : if (!cstate->base.active || !pstate->visible)
1779 0 : return 0;
1780 :
1781 0 : method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), bpp, mem_value);
1782 0 : method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
1783 0 : cstate->base.adjusted_mode.crtc_htotal,
1784 0 : drm_rect_width(&pstate->dst),
1785 : bpp,
1786 : mem_value);
1787 0 : return min(method1, method2);
1788 0 : }
1789 :
1790 : /*
1791 : * For both WM_PIPE and WM_LP.
1792 : * mem_value must be in 0.1us units.
1793 : */
1794 0 : static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate,
1795 : const struct intel_plane_state *pstate,
1796 : uint32_t mem_value)
1797 : {
1798 : /*
1799 : * We treat the cursor plane as always-on for the purposes of watermark
1800 : * calculation. Until we have two-stage watermark programming merged,
1801 : * this is necessary to avoid flickering.
1802 : */
1803 : int cpp = 4;
1804 0 : int width = pstate->visible ? pstate->base.crtc_w : 64;
1805 :
1806 0 : if (!cstate->base.active)
1807 0 : return 0;
1808 :
1809 0 : return ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
1810 0 : cstate->base.adjusted_mode.crtc_htotal,
1811 : width, cpp, mem_value);
1812 0 : }
1813 :
1814 : /* Only for WM_LP. */
1815 0 : static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
1816 : const struct intel_plane_state *pstate,
1817 : uint32_t pri_val)
1818 : {
1819 0 : int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
1820 :
1821 0 : if (!cstate->base.active || !pstate->visible)
1822 0 : return 0;
1823 :
1824 0 : return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->dst), bpp);
1825 0 : }
1826 :
1827 0 : static unsigned int ilk_display_fifo_size(const struct drm_device *dev)
1828 : {
1829 0 : if (INTEL_INFO(dev)->gen >= 8)
1830 0 : return 3072;
1831 0 : else if (INTEL_INFO(dev)->gen >= 7)
1832 0 : return 768;
1833 : else
1834 0 : return 512;
1835 0 : }
1836 :
1837 0 : static unsigned int ilk_plane_wm_reg_max(const struct drm_device *dev,
1838 : int level, bool is_sprite)
1839 : {
1840 0 : if (INTEL_INFO(dev)->gen >= 8)
1841 : /* BDW primary/sprite plane watermarks */
1842 0 : return level == 0 ? 255 : 2047;
1843 0 : else if (INTEL_INFO(dev)->gen >= 7)
1844 : /* IVB/HSW primary/sprite plane watermarks */
1845 0 : return level == 0 ? 127 : 1023;
1846 0 : else if (!is_sprite)
1847 : /* ILK/SNB primary plane watermarks */
1848 0 : return level == 0 ? 127 : 511;
1849 : else
1850 : /* ILK/SNB sprite plane watermarks */
1851 0 : return level == 0 ? 63 : 255;
1852 0 : }
1853 :
1854 0 : static unsigned int ilk_cursor_wm_reg_max(const struct drm_device *dev,
1855 : int level)
1856 : {
1857 0 : if (INTEL_INFO(dev)->gen >= 7)
1858 0 : return level == 0 ? 63 : 255;
1859 : else
1860 0 : return level == 0 ? 31 : 63;
1861 0 : }
1862 :
1863 0 : static unsigned int ilk_fbc_wm_reg_max(const struct drm_device *dev)
1864 : {
1865 0 : if (INTEL_INFO(dev)->gen >= 8)
1866 0 : return 31;
1867 : else
1868 0 : return 15;
1869 0 : }
1870 :
1871 : /* Calculate the maximum primary/sprite plane watermark */
1872 0 : static unsigned int ilk_plane_wm_max(const struct drm_device *dev,
1873 : int level,
1874 : const struct intel_wm_config *config,
1875 : enum intel_ddb_partitioning ddb_partitioning,
1876 : bool is_sprite)
1877 : {
1878 0 : unsigned int fifo_size = ilk_display_fifo_size(dev);
1879 :
1880 : /* if sprites aren't enabled, sprites get nothing */
1881 0 : if (is_sprite && !config->sprites_enabled)
1882 0 : return 0;
1883 :
1884 : /* HSW allows LP1+ watermarks even with multiple pipes */
1885 0 : if (level == 0 || config->num_pipes_active > 1) {
1886 0 : fifo_size /= INTEL_INFO(dev)->num_pipes;
1887 :
1888 : /*
1889 : * For some reason the non self refresh
1890 : * FIFO size is only half of the self
1891 : * refresh FIFO size on ILK/SNB.
1892 : */
1893 0 : if (INTEL_INFO(dev)->gen <= 6)
1894 0 : fifo_size /= 2;
1895 : }
1896 :
1897 0 : if (config->sprites_enabled) {
1898 : /* level 0 is always calculated with 1:1 split */
1899 0 : if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
1900 0 : if (is_sprite)
1901 0 : fifo_size *= 5;
1902 0 : fifo_size /= 6;
1903 0 : } else {
1904 0 : fifo_size /= 2;
1905 : }
1906 : }
1907 :
1908 : /* clamp to max that the registers can hold */
1909 0 : return min(fifo_size, ilk_plane_wm_reg_max(dev, level, is_sprite));
1910 0 : }
1911 :
1912 : /* Calculate the maximum cursor plane watermark */
1913 0 : static unsigned int ilk_cursor_wm_max(const struct drm_device *dev,
1914 : int level,
1915 : const struct intel_wm_config *config)
1916 : {
1917 : /* HSW LP1+ watermarks w/ multiple pipes */
1918 0 : if (level > 0 && config->num_pipes_active > 1)
1919 0 : return 64;
1920 :
1921 : /* otherwise just report max that registers can hold */
1922 0 : return ilk_cursor_wm_reg_max(dev, level);
1923 0 : }
1924 :
1925 0 : static void ilk_compute_wm_maximums(const struct drm_device *dev,
1926 : int level,
1927 : const struct intel_wm_config *config,
1928 : enum intel_ddb_partitioning ddb_partitioning,
1929 : struct ilk_wm_maximums *max)
1930 : {
1931 0 : max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false);
1932 0 : max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true);
1933 0 : max->cur = ilk_cursor_wm_max(dev, level, config);
1934 0 : max->fbc = ilk_fbc_wm_reg_max(dev);
1935 0 : }
1936 :
1937 0 : static void ilk_compute_wm_reg_maximums(struct drm_device *dev,
1938 : int level,
1939 : struct ilk_wm_maximums *max)
1940 : {
1941 0 : max->pri = ilk_plane_wm_reg_max(dev, level, false);
1942 0 : max->spr = ilk_plane_wm_reg_max(dev, level, true);
1943 0 : max->cur = ilk_cursor_wm_reg_max(dev, level);
1944 0 : max->fbc = ilk_fbc_wm_reg_max(dev);
1945 0 : }
1946 :
1947 0 : static bool ilk_validate_wm_level(int level,
1948 : const struct ilk_wm_maximums *max,
1949 : struct intel_wm_level *result)
1950 : {
1951 : bool ret;
1952 :
1953 : /* already determined to be invalid? */
1954 0 : if (!result->enable)
1955 0 : return false;
1956 :
1957 0 : result->enable = result->pri_val <= max->pri &&
1958 0 : result->spr_val <= max->spr &&
1959 0 : result->cur_val <= max->cur;
1960 :
1961 : ret = result->enable;
1962 :
1963 : /*
1964 : * HACK until we can pre-compute everything,
1965 : * and thus fail gracefully if LP0 watermarks
1966 : * are exceeded...
1967 : */
1968 0 : if (level == 0 && !result->enable) {
1969 0 : if (result->pri_val > max->pri)
1970 : DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
1971 : level, result->pri_val, max->pri);
1972 0 : if (result->spr_val > max->spr)
1973 : DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
1974 : level, result->spr_val, max->spr);
1975 0 : if (result->cur_val > max->cur)
1976 : DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
1977 : level, result->cur_val, max->cur);
1978 :
1979 0 : result->pri_val = min_t(uint32_t, result->pri_val, max->pri);
1980 0 : result->spr_val = min_t(uint32_t, result->spr_val, max->spr);
1981 0 : result->cur_val = min_t(uint32_t, result->cur_val, max->cur);
1982 0 : result->enable = true;
1983 0 : }
1984 :
1985 0 : return ret;
1986 0 : }
1987 :
1988 0 : static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
1989 : const struct intel_crtc *intel_crtc,
1990 : int level,
1991 : struct intel_crtc_state *cstate,
1992 : struct intel_wm_level *result)
1993 : {
1994 : struct intel_plane *intel_plane;
1995 0 : uint16_t pri_latency = dev_priv->wm.pri_latency[level];
1996 0 : uint16_t spr_latency = dev_priv->wm.spr_latency[level];
1997 0 : uint16_t cur_latency = dev_priv->wm.cur_latency[level];
1998 :
1999 : /* WM1+ latency values stored in 0.5us units */
2000 0 : if (level > 0) {
2001 0 : pri_latency *= 5;
2002 0 : spr_latency *= 5;
2003 0 : cur_latency *= 5;
2004 0 : }
2005 :
2006 0 : for_each_intel_plane_on_crtc(dev_priv->dev, intel_crtc, intel_plane) {
2007 : struct intel_plane_state *pstate =
2008 0 : to_intel_plane_state(intel_plane->base.state);
2009 :
2010 0 : switch (intel_plane->base.type) {
2011 : case DRM_PLANE_TYPE_PRIMARY:
2012 0 : result->pri_val = ilk_compute_pri_wm(cstate, pstate,
2013 0 : pri_latency,
2014 0 : level);
2015 0 : result->fbc_val = ilk_compute_fbc_wm(cstate, pstate,
2016 : result->pri_val);
2017 0 : break;
2018 : case DRM_PLANE_TYPE_OVERLAY:
2019 0 : result->spr_val = ilk_compute_spr_wm(cstate, pstate,
2020 0 : spr_latency);
2021 0 : break;
2022 : case DRM_PLANE_TYPE_CURSOR:
2023 0 : result->cur_val = ilk_compute_cur_wm(cstate, pstate,
2024 0 : cur_latency);
2025 0 : break;
2026 : }
2027 0 : }
2028 :
2029 0 : result->enable = true;
2030 0 : }
2031 :
2032 : static uint32_t
2033 0 : hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
2034 : {
2035 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2036 0 : struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2037 0 : const struct drm_display_mode *adjusted_mode = &intel_crtc->config->base.adjusted_mode;
2038 : u32 linetime, ips_linetime;
2039 :
2040 0 : if (!intel_crtc->active)
2041 0 : return 0;
2042 :
2043 : /* The WM are computed with base on how long it takes to fill a single
2044 : * row at the given clock rate, multiplied by 8.
2045 : * */
2046 0 : linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2047 : adjusted_mode->crtc_clock);
2048 0 : ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2049 : dev_priv->cdclk_freq);
2050 :
2051 0 : return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
2052 : PIPE_WM_LINETIME_TIME(linetime);
2053 0 : }
2054 :
2055 0 : static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[8])
2056 : {
2057 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2058 :
2059 0 : if (IS_GEN9(dev)) {
2060 0 : uint32_t val;
2061 : int ret, i;
2062 0 : int level, max_level = ilk_wm_max_level(dev);
2063 :
2064 : /* read the first set of memory latencies[0:3] */
2065 0 : val = 0; /* data0 to be programmed to 0 for first set */
2066 0 : mutex_lock(&dev_priv->rps.hw_lock);
2067 0 : ret = sandybridge_pcode_read(dev_priv,
2068 : GEN9_PCODE_READ_MEM_LATENCY,
2069 : &val);
2070 0 : mutex_unlock(&dev_priv->rps.hw_lock);
2071 :
2072 0 : if (ret) {
2073 0 : DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2074 0 : return;
2075 : }
2076 :
2077 0 : wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2078 0 : wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2079 : GEN9_MEM_LATENCY_LEVEL_MASK;
2080 0 : wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2081 : GEN9_MEM_LATENCY_LEVEL_MASK;
2082 0 : wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2083 : GEN9_MEM_LATENCY_LEVEL_MASK;
2084 :
2085 : /* read the second set of memory latencies[4:7] */
2086 0 : val = 1; /* data0 to be programmed to 1 for second set */
2087 0 : mutex_lock(&dev_priv->rps.hw_lock);
2088 0 : ret = sandybridge_pcode_read(dev_priv,
2089 : GEN9_PCODE_READ_MEM_LATENCY,
2090 : &val);
2091 0 : mutex_unlock(&dev_priv->rps.hw_lock);
2092 0 : if (ret) {
2093 0 : DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2094 0 : return;
2095 : }
2096 :
2097 0 : wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2098 0 : wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2099 : GEN9_MEM_LATENCY_LEVEL_MASK;
2100 0 : wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2101 : GEN9_MEM_LATENCY_LEVEL_MASK;
2102 0 : wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2103 : GEN9_MEM_LATENCY_LEVEL_MASK;
2104 :
2105 : /*
2106 : * If a level n (n > 1) has a 0us latency, all levels m (m >= n)
2107 : * need to be disabled. We make sure to sanitize the values out
2108 : * of the punit to satisfy this requirement.
2109 : */
2110 0 : for (level = 1; level <= max_level; level++) {
2111 0 : if (wm[level] == 0) {
2112 0 : for (i = level + 1; i <= max_level; i++)
2113 0 : wm[i] = 0;
2114 : break;
2115 : }
2116 : }
2117 :
2118 : /*
2119 : * WaWmMemoryReadLatency:skl
2120 : *
2121 : * punit doesn't take into account the read latency so we need
2122 : * to add 2us to the various latency levels we retrieve from the
2123 : * punit when level 0 response data us 0us.
2124 : */
2125 0 : if (wm[0] == 0) {
2126 0 : wm[0] += 2;
2127 0 : for (level = 1; level <= max_level; level++) {
2128 0 : if (wm[level] == 0)
2129 : break;
2130 0 : wm[level] += 2;
2131 : }
2132 : }
2133 :
2134 0 : } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
2135 0 : uint64_t sskpd = I915_READ64(MCH_SSKPD);
2136 :
2137 0 : wm[0] = (sskpd >> 56) & 0xFF;
2138 0 : if (wm[0] == 0)
2139 0 : wm[0] = sskpd & 0xF;
2140 0 : wm[1] = (sskpd >> 4) & 0xFF;
2141 0 : wm[2] = (sskpd >> 12) & 0xFF;
2142 0 : wm[3] = (sskpd >> 20) & 0x1FF;
2143 0 : wm[4] = (sskpd >> 32) & 0x1FF;
2144 0 : } else if (INTEL_INFO(dev)->gen >= 6) {
2145 0 : uint32_t sskpd = I915_READ(MCH_SSKPD);
2146 :
2147 0 : wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
2148 0 : wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
2149 0 : wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
2150 0 : wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
2151 0 : } else if (INTEL_INFO(dev)->gen >= 5) {
2152 0 : uint32_t mltr = I915_READ(MLTR_ILK);
2153 :
2154 : /* ILK primary LP0 latency is 700 ns */
2155 0 : wm[0] = 7;
2156 0 : wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
2157 0 : wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
2158 0 : }
2159 0 : }
2160 :
2161 0 : static void intel_fixup_spr_wm_latency(struct drm_device *dev, uint16_t wm[5])
2162 : {
2163 : /* ILK sprite LP0 latency is 1300 ns */
2164 0 : if (INTEL_INFO(dev)->gen == 5)
2165 0 : wm[0] = 13;
2166 0 : }
2167 :
2168 0 : static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5])
2169 : {
2170 : /* ILK cursor LP0 latency is 1300 ns */
2171 0 : if (INTEL_INFO(dev)->gen == 5)
2172 0 : wm[0] = 13;
2173 :
2174 : /* WaDoubleCursorLP3Latency:ivb */
2175 0 : if (IS_IVYBRIDGE(dev))
2176 0 : wm[3] *= 2;
2177 0 : }
2178 :
2179 0 : int ilk_wm_max_level(const struct drm_device *dev)
2180 : {
2181 : /* how many WM levels are we expecting */
2182 0 : if (INTEL_INFO(dev)->gen >= 9)
2183 0 : return 7;
2184 0 : else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2185 0 : return 4;
2186 0 : else if (INTEL_INFO(dev)->gen >= 6)
2187 0 : return 3;
2188 : else
2189 0 : return 2;
2190 0 : }
2191 :
2192 0 : static void intel_print_wm_latency(struct drm_device *dev,
2193 : const char *name,
2194 : const uint16_t wm[8])
2195 : {
2196 0 : int level, max_level = ilk_wm_max_level(dev);
2197 :
2198 0 : for (level = 0; level <= max_level; level++) {
2199 0 : unsigned int latency = wm[level];
2200 :
2201 0 : if (latency == 0) {
2202 0 : DRM_ERROR("%s WM%d latency not provided\n",
2203 : name, level);
2204 0 : continue;
2205 : }
2206 :
2207 : /*
2208 : * - latencies are in us on gen9.
2209 : * - before then, WM1+ latency values are in 0.5us units
2210 : */
2211 0 : if (IS_GEN9(dev))
2212 0 : latency *= 10;
2213 0 : else if (level > 0)
2214 0 : latency *= 5;
2215 :
2216 : DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
2217 : name, level, wm[level],
2218 : latency / 10, latency % 10);
2219 0 : }
2220 0 : }
2221 :
2222 0 : static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv,
2223 : uint16_t wm[5], uint16_t min)
2224 : {
2225 0 : int level, max_level = ilk_wm_max_level(dev_priv->dev);
2226 :
2227 0 : if (wm[0] >= min)
2228 0 : return false;
2229 :
2230 0 : wm[0] = max(wm[0], min);
2231 0 : for (level = 1; level <= max_level; level++)
2232 0 : wm[level] = max_t(uint16_t, wm[level], DIV_ROUND_UP(min, 5));
2233 :
2234 0 : return true;
2235 0 : }
2236 :
2237 0 : static void snb_wm_latency_quirk(struct drm_device *dev)
2238 : {
2239 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2240 : bool changed;
2241 :
2242 : /*
2243 : * The BIOS provided WM memory latency values are often
2244 : * inadequate for high resolution displays. Adjust them.
2245 : */
2246 0 : changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) |
2247 0 : ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) |
2248 0 : ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12);
2249 :
2250 0 : if (!changed)
2251 0 : return;
2252 :
2253 : DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n");
2254 0 : intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency);
2255 0 : intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency);
2256 0 : intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency);
2257 0 : }
2258 :
2259 0 : static void ilk_setup_wm_latency(struct drm_device *dev)
2260 : {
2261 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2262 :
2263 0 : intel_read_wm_latency(dev, dev_priv->wm.pri_latency);
2264 :
2265 0 : memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
2266 : sizeof(dev_priv->wm.pri_latency));
2267 0 : memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
2268 : sizeof(dev_priv->wm.pri_latency));
2269 :
2270 0 : intel_fixup_spr_wm_latency(dev, dev_priv->wm.spr_latency);
2271 0 : intel_fixup_cur_wm_latency(dev, dev_priv->wm.cur_latency);
2272 :
2273 0 : intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency);
2274 0 : intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency);
2275 0 : intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency);
2276 :
2277 0 : if (IS_GEN6(dev))
2278 0 : snb_wm_latency_quirk(dev);
2279 0 : }
2280 :
2281 0 : static void skl_setup_wm_latency(struct drm_device *dev)
2282 : {
2283 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2284 :
2285 0 : intel_read_wm_latency(dev, dev_priv->wm.skl_latency);
2286 0 : intel_print_wm_latency(dev, "Gen9 Plane", dev_priv->wm.skl_latency);
2287 0 : }
2288 :
2289 0 : static void ilk_compute_wm_config(struct drm_device *dev,
2290 : struct intel_wm_config *config)
2291 : {
2292 : struct intel_crtc *intel_crtc;
2293 :
2294 : /* Compute the currently _active_ config */
2295 0 : for_each_intel_crtc(dev, intel_crtc) {
2296 0 : const struct intel_pipe_wm *wm = &intel_crtc->wm.active;
2297 :
2298 0 : if (!wm->pipe_enabled)
2299 0 : continue;
2300 :
2301 0 : config->sprites_enabled |= wm->sprites_enabled;
2302 0 : config->sprites_scaled |= wm->sprites_scaled;
2303 0 : config->num_pipes_active++;
2304 0 : }
2305 0 : }
2306 :
2307 : /* Compute new watermarks for the pipe */
2308 0 : static bool intel_compute_pipe_wm(struct intel_crtc_state *cstate,
2309 : struct intel_pipe_wm *pipe_wm)
2310 : {
2311 0 : struct drm_crtc *crtc = cstate->base.crtc;
2312 0 : struct drm_device *dev = crtc->dev;
2313 0 : const struct drm_i915_private *dev_priv = dev->dev_private;
2314 0 : struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2315 : struct intel_plane *intel_plane;
2316 : struct intel_plane_state *sprstate = NULL;
2317 0 : int level, max_level = ilk_wm_max_level(dev);
2318 : /* LP0 watermark maximums depend on this pipe alone */
2319 0 : struct intel_wm_config config = {
2320 : .num_pipes_active = 1,
2321 : };
2322 0 : struct ilk_wm_maximums max;
2323 :
2324 0 : for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
2325 0 : if (intel_plane->base.type == DRM_PLANE_TYPE_OVERLAY) {
2326 0 : sprstate = to_intel_plane_state(intel_plane->base.state);
2327 0 : break;
2328 : }
2329 : }
2330 :
2331 0 : config.sprites_enabled = sprstate->visible;
2332 0 : config.sprites_scaled = sprstate->visible &&
2333 0 : (drm_rect_width(&sprstate->dst) != drm_rect_width(&sprstate->src) >> 16 ||
2334 0 : drm_rect_height(&sprstate->dst) != drm_rect_height(&sprstate->src) >> 16);
2335 :
2336 0 : pipe_wm->pipe_enabled = cstate->base.active;
2337 0 : pipe_wm->sprites_enabled = sprstate->visible;
2338 0 : pipe_wm->sprites_scaled = config.sprites_scaled;
2339 :
2340 : /* ILK/SNB: LP2+ watermarks only w/o sprites */
2341 0 : if (INTEL_INFO(dev)->gen <= 6 && sprstate->visible)
2342 0 : max_level = 1;
2343 :
2344 : /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
2345 0 : if (config.sprites_scaled)
2346 0 : max_level = 0;
2347 :
2348 0 : ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate, &pipe_wm->wm[0]);
2349 :
2350 0 : if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2351 0 : pipe_wm->linetime = hsw_compute_linetime_wm(dev, crtc);
2352 :
2353 : /* LP0 watermarks always use 1/2 DDB partitioning */
2354 0 : ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max);
2355 :
2356 : /* At least LP0 must be valid */
2357 0 : if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0]))
2358 0 : return false;
2359 :
2360 0 : ilk_compute_wm_reg_maximums(dev, 1, &max);
2361 :
2362 0 : for (level = 1; level <= max_level; level++) {
2363 0 : struct intel_wm_level wm = {};
2364 :
2365 0 : ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate, &wm);
2366 :
2367 : /*
2368 : * Disable any watermark level that exceeds the
2369 : * register maximums since such watermarks are
2370 : * always invalid.
2371 : */
2372 0 : if (!ilk_validate_wm_level(level, &max, &wm))
2373 0 : break;
2374 :
2375 0 : pipe_wm->wm[level] = wm;
2376 0 : }
2377 :
2378 0 : return true;
2379 0 : }
2380 :
2381 : /*
2382 : * Merge the watermarks from all active pipes for a specific level.
2383 : */
2384 0 : static void ilk_merge_wm_level(struct drm_device *dev,
2385 : int level,
2386 : struct intel_wm_level *ret_wm)
2387 : {
2388 : const struct intel_crtc *intel_crtc;
2389 :
2390 0 : ret_wm->enable = true;
2391 :
2392 0 : for_each_intel_crtc(dev, intel_crtc) {
2393 0 : const struct intel_pipe_wm *active = &intel_crtc->wm.active;
2394 0 : const struct intel_wm_level *wm = &active->wm[level];
2395 :
2396 0 : if (!active->pipe_enabled)
2397 0 : continue;
2398 :
2399 : /*
2400 : * The watermark values may have been used in the past,
2401 : * so we must maintain them in the registers for some
2402 : * time even if the level is now disabled.
2403 : */
2404 0 : if (!wm->enable)
2405 0 : ret_wm->enable = false;
2406 :
2407 0 : ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val);
2408 0 : ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val);
2409 0 : ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val);
2410 0 : ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val);
2411 0 : }
2412 0 : }
2413 :
2414 : /*
2415 : * Merge all low power watermarks for all active pipes.
2416 : */
2417 0 : static void ilk_wm_merge(struct drm_device *dev,
2418 : const struct intel_wm_config *config,
2419 : const struct ilk_wm_maximums *max,
2420 : struct intel_pipe_wm *merged)
2421 : {
2422 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2423 0 : int level, max_level = ilk_wm_max_level(dev);
2424 : int last_enabled_level = max_level;
2425 :
2426 : /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
2427 0 : if ((INTEL_INFO(dev)->gen <= 6 || IS_IVYBRIDGE(dev)) &&
2428 0 : config->num_pipes_active > 1)
2429 0 : return;
2430 :
2431 : /* ILK: FBC WM must be disabled always */
2432 0 : merged->fbc_wm_enabled = INTEL_INFO(dev)->gen >= 6;
2433 :
2434 : /* merge each WM1+ level */
2435 0 : for (level = 1; level <= max_level; level++) {
2436 0 : struct intel_wm_level *wm = &merged->wm[level];
2437 :
2438 0 : ilk_merge_wm_level(dev, level, wm);
2439 :
2440 0 : if (level > last_enabled_level)
2441 0 : wm->enable = false;
2442 0 : else if (!ilk_validate_wm_level(level, max, wm))
2443 : /* make sure all following levels get disabled */
2444 0 : last_enabled_level = level - 1;
2445 :
2446 : /*
2447 : * The spec says it is preferred to disable
2448 : * FBC WMs instead of disabling a WM level.
2449 : */
2450 0 : if (wm->fbc_val > max->fbc) {
2451 0 : if (wm->enable)
2452 0 : merged->fbc_wm_enabled = false;
2453 0 : wm->fbc_val = 0;
2454 0 : }
2455 : }
2456 :
2457 : /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */
2458 : /*
2459 : * FIXME this is racy. FBC might get enabled later.
2460 : * What we should check here is whether FBC can be
2461 : * enabled sometime later.
2462 : */
2463 0 : if (IS_GEN5(dev) && !merged->fbc_wm_enabled &&
2464 0 : intel_fbc_enabled(dev_priv)) {
2465 0 : for (level = 2; level <= max_level; level++) {
2466 0 : struct intel_wm_level *wm = &merged->wm[level];
2467 :
2468 0 : wm->enable = false;
2469 : }
2470 : }
2471 0 : }
2472 :
2473 0 : static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm)
2474 : {
2475 : /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */
2476 0 : return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable);
2477 : }
2478 :
2479 : /* The value we need to program into the WM_LPx latency field */
2480 0 : static unsigned int ilk_wm_lp_latency(struct drm_device *dev, int level)
2481 : {
2482 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2483 :
2484 0 : if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2485 0 : return 2 * level;
2486 : else
2487 0 : return dev_priv->wm.pri_latency[level];
2488 0 : }
2489 :
2490 0 : static void ilk_compute_wm_results(struct drm_device *dev,
2491 : const struct intel_pipe_wm *merged,
2492 : enum intel_ddb_partitioning partitioning,
2493 : struct ilk_wm_values *results)
2494 : {
2495 : struct intel_crtc *intel_crtc;
2496 : int level, wm_lp;
2497 :
2498 0 : results->enable_fbc_wm = merged->fbc_wm_enabled;
2499 0 : results->partitioning = partitioning;
2500 :
2501 : /* LP1+ register values */
2502 0 : for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
2503 : const struct intel_wm_level *r;
2504 :
2505 0 : level = ilk_wm_lp_to_level(wm_lp, merged);
2506 :
2507 0 : r = &merged->wm[level];
2508 :
2509 : /*
2510 : * Maintain the watermark values even if the level is
2511 : * disabled. Doing otherwise could cause underruns.
2512 : */
2513 0 : results->wm_lp[wm_lp - 1] =
2514 0 : (ilk_wm_lp_latency(dev, level) << WM1_LP_LATENCY_SHIFT) |
2515 0 : (r->pri_val << WM1_LP_SR_SHIFT) |
2516 0 : r->cur_val;
2517 :
2518 0 : if (r->enable)
2519 0 : results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN;
2520 :
2521 0 : if (INTEL_INFO(dev)->gen >= 8)
2522 0 : results->wm_lp[wm_lp - 1] |=
2523 0 : r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
2524 : else
2525 0 : results->wm_lp[wm_lp - 1] |=
2526 0 : r->fbc_val << WM1_LP_FBC_SHIFT;
2527 :
2528 : /*
2529 : * Always set WM1S_LP_EN when spr_val != 0, even if the
2530 : * level is disabled. Doing otherwise could cause underruns.
2531 : */
2532 0 : if (INTEL_INFO(dev)->gen <= 6 && r->spr_val) {
2533 0 : WARN_ON(wm_lp != 1);
2534 0 : results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val;
2535 0 : } else
2536 0 : results->wm_lp_spr[wm_lp - 1] = r->spr_val;
2537 : }
2538 :
2539 : /* LP0 register values */
2540 0 : for_each_intel_crtc(dev, intel_crtc) {
2541 0 : enum pipe pipe = intel_crtc->pipe;
2542 : const struct intel_wm_level *r =
2543 0 : &intel_crtc->wm.active.wm[0];
2544 :
2545 0 : if (WARN_ON(!r->enable))
2546 0 : continue;
2547 :
2548 0 : results->wm_linetime[pipe] = intel_crtc->wm.active.linetime;
2549 :
2550 0 : results->wm_pipe[pipe] =
2551 0 : (r->pri_val << WM0_PIPE_PLANE_SHIFT) |
2552 0 : (r->spr_val << WM0_PIPE_SPRITE_SHIFT) |
2553 0 : r->cur_val;
2554 0 : }
2555 0 : }
2556 :
2557 : /* Find the result with the highest level enabled. Check for enable_fbc_wm in
2558 : * case both are at the same level. Prefer r1 in case they're the same. */
2559 0 : static struct intel_pipe_wm *ilk_find_best_result(struct drm_device *dev,
2560 : struct intel_pipe_wm *r1,
2561 : struct intel_pipe_wm *r2)
2562 : {
2563 0 : int level, max_level = ilk_wm_max_level(dev);
2564 : int level1 = 0, level2 = 0;
2565 :
2566 0 : for (level = 1; level <= max_level; level++) {
2567 0 : if (r1->wm[level].enable)
2568 0 : level1 = level;
2569 0 : if (r2->wm[level].enable)
2570 0 : level2 = level;
2571 : }
2572 :
2573 0 : if (level1 == level2) {
2574 0 : if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled)
2575 0 : return r2;
2576 : else
2577 0 : return r1;
2578 0 : } else if (level1 > level2) {
2579 0 : return r1;
2580 : } else {
2581 0 : return r2;
2582 : }
2583 0 : }
2584 :
2585 : /* dirty bits used to track which watermarks need changes */
2586 : #define WM_DIRTY_PIPE(pipe) (1 << (pipe))
2587 : #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe)))
2588 : #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp)))
2589 : #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3))
2590 : #define WM_DIRTY_FBC (1 << 24)
2591 : #define WM_DIRTY_DDB (1 << 25)
2592 :
2593 0 : static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv,
2594 : const struct ilk_wm_values *old,
2595 : const struct ilk_wm_values *new)
2596 : {
2597 : unsigned int dirty = 0;
2598 : enum pipe pipe;
2599 : int wm_lp;
2600 :
2601 0 : for_each_pipe(dev_priv, pipe) {
2602 0 : if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) {
2603 0 : dirty |= WM_DIRTY_LINETIME(pipe);
2604 : /* Must disable LP1+ watermarks too */
2605 0 : dirty |= WM_DIRTY_LP_ALL;
2606 0 : }
2607 :
2608 0 : if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) {
2609 0 : dirty |= WM_DIRTY_PIPE(pipe);
2610 : /* Must disable LP1+ watermarks too */
2611 0 : dirty |= WM_DIRTY_LP_ALL;
2612 0 : }
2613 : }
2614 :
2615 0 : if (old->enable_fbc_wm != new->enable_fbc_wm) {
2616 0 : dirty |= WM_DIRTY_FBC;
2617 : /* Must disable LP1+ watermarks too */
2618 0 : dirty |= WM_DIRTY_LP_ALL;
2619 0 : }
2620 :
2621 0 : if (old->partitioning != new->partitioning) {
2622 0 : dirty |= WM_DIRTY_DDB;
2623 : /* Must disable LP1+ watermarks too */
2624 0 : dirty |= WM_DIRTY_LP_ALL;
2625 0 : }
2626 :
2627 : /* LP1+ watermarks already deemed dirty, no need to continue */
2628 0 : if (dirty & WM_DIRTY_LP_ALL)
2629 0 : return dirty;
2630 :
2631 : /* Find the lowest numbered LP1+ watermark in need of an update... */
2632 0 : for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
2633 0 : if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] ||
2634 0 : old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1])
2635 : break;
2636 : }
2637 :
2638 : /* ...and mark it and all higher numbered LP1+ watermarks as dirty */
2639 0 : for (; wm_lp <= 3; wm_lp++)
2640 0 : dirty |= WM_DIRTY_LP(wm_lp);
2641 :
2642 0 : return dirty;
2643 0 : }
2644 :
2645 0 : static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv,
2646 : unsigned int dirty)
2647 : {
2648 0 : struct ilk_wm_values *previous = &dev_priv->wm.hw;
2649 : bool changed = false;
2650 :
2651 0 : if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) {
2652 0 : previous->wm_lp[2] &= ~WM1_LP_SR_EN;
2653 0 : I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]);
2654 : changed = true;
2655 0 : }
2656 0 : if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) {
2657 0 : previous->wm_lp[1] &= ~WM1_LP_SR_EN;
2658 0 : I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]);
2659 : changed = true;
2660 0 : }
2661 0 : if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) {
2662 0 : previous->wm_lp[0] &= ~WM1_LP_SR_EN;
2663 0 : I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]);
2664 : changed = true;
2665 0 : }
2666 :
2667 : /*
2668 : * Don't touch WM1S_LP_EN here.
2669 : * Doing so could cause underruns.
2670 : */
2671 :
2672 0 : return changed;
2673 : }
2674 :
2675 : /*
2676 : * The spec says we shouldn't write when we don't need, because every write
2677 : * causes WMs to be re-evaluated, expending some power.
2678 : */
2679 0 : static void ilk_write_wm_values(struct drm_i915_private *dev_priv,
2680 : struct ilk_wm_values *results)
2681 : {
2682 0 : struct drm_device *dev = dev_priv->dev;
2683 0 : struct ilk_wm_values *previous = &dev_priv->wm.hw;
2684 : unsigned int dirty;
2685 : uint32_t val;
2686 :
2687 0 : dirty = ilk_compute_wm_dirty(dev_priv, previous, results);
2688 0 : if (!dirty)
2689 0 : return;
2690 :
2691 0 : _ilk_disable_lp_wm(dev_priv, dirty);
2692 :
2693 0 : if (dirty & WM_DIRTY_PIPE(PIPE_A))
2694 0 : I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
2695 0 : if (dirty & WM_DIRTY_PIPE(PIPE_B))
2696 0 : I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
2697 0 : if (dirty & WM_DIRTY_PIPE(PIPE_C))
2698 0 : I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
2699 :
2700 0 : if (dirty & WM_DIRTY_LINETIME(PIPE_A))
2701 0 : I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
2702 0 : if (dirty & WM_DIRTY_LINETIME(PIPE_B))
2703 0 : I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
2704 0 : if (dirty & WM_DIRTY_LINETIME(PIPE_C))
2705 0 : I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
2706 :
2707 0 : if (dirty & WM_DIRTY_DDB) {
2708 0 : if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
2709 0 : val = I915_READ(WM_MISC);
2710 0 : if (results->partitioning == INTEL_DDB_PART_1_2)
2711 0 : val &= ~WM_MISC_DATA_PARTITION_5_6;
2712 : else
2713 0 : val |= WM_MISC_DATA_PARTITION_5_6;
2714 0 : I915_WRITE(WM_MISC, val);
2715 0 : } else {
2716 0 : val = I915_READ(DISP_ARB_CTL2);
2717 0 : if (results->partitioning == INTEL_DDB_PART_1_2)
2718 0 : val &= ~DISP_DATA_PARTITION_5_6;
2719 : else
2720 0 : val |= DISP_DATA_PARTITION_5_6;
2721 0 : I915_WRITE(DISP_ARB_CTL2, val);
2722 : }
2723 : }
2724 :
2725 0 : if (dirty & WM_DIRTY_FBC) {
2726 0 : val = I915_READ(DISP_ARB_CTL);
2727 0 : if (results->enable_fbc_wm)
2728 0 : val &= ~DISP_FBC_WM_DIS;
2729 : else
2730 0 : val |= DISP_FBC_WM_DIS;
2731 0 : I915_WRITE(DISP_ARB_CTL, val);
2732 0 : }
2733 :
2734 0 : if (dirty & WM_DIRTY_LP(1) &&
2735 0 : previous->wm_lp_spr[0] != results->wm_lp_spr[0])
2736 0 : I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
2737 :
2738 0 : if (INTEL_INFO(dev)->gen >= 7) {
2739 0 : if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1])
2740 0 : I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
2741 0 : if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2])
2742 0 : I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
2743 : }
2744 :
2745 0 : if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0])
2746 0 : I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
2747 0 : if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1])
2748 0 : I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
2749 0 : if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2])
2750 0 : I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
2751 :
2752 0 : dev_priv->wm.hw = *results;
2753 0 : }
2754 :
2755 0 : static bool ilk_disable_lp_wm(struct drm_device *dev)
2756 : {
2757 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2758 :
2759 0 : return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
2760 : }
2761 :
2762 : /*
2763 : * On gen9, we need to allocate Display Data Buffer (DDB) portions to the
2764 : * different active planes.
2765 : */
2766 :
2767 : #define SKL_DDB_SIZE 896 /* in blocks */
2768 : #define BXT_DDB_SIZE 512
2769 :
2770 : static void
2771 0 : skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
2772 : struct drm_crtc *for_crtc,
2773 : const struct intel_wm_config *config,
2774 : const struct skl_pipe_wm_parameters *params,
2775 : struct skl_ddb_entry *alloc /* out */)
2776 : {
2777 : struct drm_crtc *crtc;
2778 : unsigned int pipe_size, ddb_size;
2779 : int nth_active_pipe;
2780 :
2781 0 : if (!params->active) {
2782 0 : alloc->start = 0;
2783 0 : alloc->end = 0;
2784 0 : return;
2785 : }
2786 :
2787 0 : if (IS_BROXTON(dev))
2788 0 : ddb_size = BXT_DDB_SIZE;
2789 : else
2790 : ddb_size = SKL_DDB_SIZE;
2791 :
2792 0 : ddb_size -= 4; /* 4 blocks for bypass path allocation */
2793 :
2794 : nth_active_pipe = 0;
2795 0 : for_each_crtc(dev, crtc) {
2796 0 : if (!to_intel_crtc(crtc)->active)
2797 : continue;
2798 :
2799 0 : if (crtc == for_crtc)
2800 : break;
2801 :
2802 0 : nth_active_pipe++;
2803 0 : }
2804 :
2805 0 : pipe_size = ddb_size / config->num_pipes_active;
2806 0 : alloc->start = nth_active_pipe * ddb_size / config->num_pipes_active;
2807 0 : alloc->end = alloc->start + pipe_size;
2808 0 : }
2809 :
2810 0 : static unsigned int skl_cursor_allocation(const struct intel_wm_config *config)
2811 : {
2812 0 : if (config->num_pipes_active == 1)
2813 0 : return 32;
2814 :
2815 0 : return 8;
2816 0 : }
2817 :
2818 0 : static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg)
2819 : {
2820 0 : entry->start = reg & 0x3ff;
2821 0 : entry->end = (reg >> 16) & 0x3ff;
2822 0 : if (entry->end)
2823 0 : entry->end += 1;
2824 0 : }
2825 :
2826 0 : void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
2827 : struct skl_ddb_allocation *ddb /* out */)
2828 : {
2829 : enum pipe pipe;
2830 : int plane;
2831 : u32 val;
2832 :
2833 0 : memset(ddb, 0, sizeof(*ddb));
2834 :
2835 0 : for_each_pipe(dev_priv, pipe) {
2836 0 : if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PIPE(pipe)))
2837 : continue;
2838 :
2839 0 : for_each_plane(dev_priv, pipe, plane) {
2840 0 : val = I915_READ(PLANE_BUF_CFG(pipe, plane));
2841 0 : skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane],
2842 : val);
2843 : }
2844 :
2845 0 : val = I915_READ(CUR_BUF_CFG(pipe));
2846 0 : skl_ddb_entry_init_from_hw(&ddb->plane[pipe][PLANE_CURSOR],
2847 : val);
2848 0 : }
2849 0 : }
2850 :
2851 : static unsigned int
2852 0 : skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p, int y)
2853 : {
2854 :
2855 : /* for planar format */
2856 0 : if (p->y_bytes_per_pixel) {
2857 0 : if (y) /* y-plane data rate */
2858 0 : return p->horiz_pixels * p->vert_pixels * p->y_bytes_per_pixel;
2859 : else /* uv-plane data rate */
2860 0 : return (p->horiz_pixels/2) * (p->vert_pixels/2) * p->bytes_per_pixel;
2861 : }
2862 :
2863 : /* for packed formats */
2864 0 : return p->horiz_pixels * p->vert_pixels * p->bytes_per_pixel;
2865 0 : }
2866 :
2867 : /*
2868 : * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching
2869 : * a 8192x4096@32bpp framebuffer:
2870 : * 3 * 4096 * 8192 * 4 < 2^32
2871 : */
2872 : static unsigned int
2873 0 : skl_get_total_relative_data_rate(struct intel_crtc *intel_crtc,
2874 : const struct skl_pipe_wm_parameters *params)
2875 : {
2876 : unsigned int total_data_rate = 0;
2877 : int plane;
2878 :
2879 0 : for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
2880 : const struct intel_plane_wm_parameters *p;
2881 :
2882 0 : p = ¶ms->plane[plane];
2883 0 : if (!p->enabled)
2884 0 : continue;
2885 :
2886 0 : total_data_rate += skl_plane_relative_data_rate(p, 0); /* packed/uv */
2887 0 : if (p->y_bytes_per_pixel) {
2888 0 : total_data_rate += skl_plane_relative_data_rate(p, 1); /* y-plane */
2889 0 : }
2890 0 : }
2891 :
2892 0 : return total_data_rate;
2893 : }
2894 :
2895 : static void
2896 0 : skl_allocate_pipe_ddb(struct drm_crtc *crtc,
2897 : const struct intel_wm_config *config,
2898 : const struct skl_pipe_wm_parameters *params,
2899 : struct skl_ddb_allocation *ddb /* out */)
2900 : {
2901 0 : struct drm_device *dev = crtc->dev;
2902 0 : struct drm_i915_private *dev_priv = dev->dev_private;
2903 0 : struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2904 0 : enum pipe pipe = intel_crtc->pipe;
2905 0 : struct skl_ddb_entry *alloc = &ddb->pipe[pipe];
2906 : uint16_t alloc_size, start, cursor_blocks;
2907 0 : uint16_t minimum[I915_MAX_PLANES];
2908 0 : uint16_t y_minimum[I915_MAX_PLANES];
2909 : unsigned int total_data_rate;
2910 : int plane;
2911 :
2912 0 : skl_ddb_get_pipe_allocation_limits(dev, crtc, config, params, alloc);
2913 0 : alloc_size = skl_ddb_entry_size(alloc);
2914 0 : if (alloc_size == 0) {
2915 0 : memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
2916 0 : memset(&ddb->plane[pipe][PLANE_CURSOR], 0,
2917 : sizeof(ddb->plane[pipe][PLANE_CURSOR]));
2918 0 : return;
2919 : }
2920 :
2921 0 : cursor_blocks = skl_cursor_allocation(config);
2922 0 : ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - cursor_blocks;
2923 0 : ddb->plane[pipe][PLANE_CURSOR].end = alloc->end;
2924 :
2925 0 : alloc_size -= cursor_blocks;
2926 0 : alloc->end -= cursor_blocks;
2927 :
2928 : /* 1. Allocate the mininum required blocks for each active plane */
2929 0 : for_each_plane(dev_priv, pipe, plane) {
2930 : const struct intel_plane_wm_parameters *p;
2931 :
2932 0 : p = ¶ms->plane[plane];
2933 0 : if (!p->enabled)
2934 0 : continue;
2935 :
2936 0 : minimum[plane] = 8;
2937 0 : alloc_size -= minimum[plane];
2938 0 : y_minimum[plane] = p->y_bytes_per_pixel ? 8 : 0;
2939 0 : alloc_size -= y_minimum[plane];
2940 0 : }
2941 :
2942 : /*
2943 : * 2. Distribute the remaining space in proportion to the amount of
2944 : * data each plane needs to fetch from memory.
2945 : *
2946 : * FIXME: we may not allocate every single block here.
2947 : */
2948 0 : total_data_rate = skl_get_total_relative_data_rate(intel_crtc, params);
2949 :
2950 0 : start = alloc->start;
2951 0 : for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
2952 : const struct intel_plane_wm_parameters *p;
2953 : unsigned int data_rate, y_data_rate;
2954 : uint16_t plane_blocks, y_plane_blocks = 0;
2955 :
2956 0 : p = ¶ms->plane[plane];
2957 0 : if (!p->enabled)
2958 0 : continue;
2959 :
2960 0 : data_rate = skl_plane_relative_data_rate(p, 0);
2961 :
2962 : /*
2963 : * allocation for (packed formats) or (uv-plane part of planar format):
2964 : * promote the expression to 64 bits to avoid overflowing, the
2965 : * result is < available as data_rate / total_data_rate < 1
2966 : */
2967 0 : plane_blocks = minimum[plane];
2968 0 : plane_blocks += div_u64((uint64_t)alloc_size * data_rate,
2969 : total_data_rate);
2970 :
2971 0 : ddb->plane[pipe][plane].start = start;
2972 0 : ddb->plane[pipe][plane].end = start + plane_blocks;
2973 :
2974 : start += plane_blocks;
2975 :
2976 : /*
2977 : * allocation for y_plane part of planar format:
2978 : */
2979 0 : if (p->y_bytes_per_pixel) {
2980 0 : y_data_rate = skl_plane_relative_data_rate(p, 1);
2981 0 : y_plane_blocks = y_minimum[plane];
2982 0 : y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate,
2983 : total_data_rate);
2984 :
2985 0 : ddb->y_plane[pipe][plane].start = start;
2986 0 : ddb->y_plane[pipe][plane].end = start + y_plane_blocks;
2987 :
2988 : start += y_plane_blocks;
2989 0 : }
2990 :
2991 0 : }
2992 :
2993 0 : }
2994 :
2995 0 : static uint32_t skl_pipe_pixel_rate(const struct intel_crtc_state *config)
2996 : {
2997 : /* TODO: Take into account the scalers once we support them */
2998 0 : return config->base.adjusted_mode.crtc_clock;
2999 : }
3000 :
3001 : /*
3002 : * The max latency should be 257 (max the punit can code is 255 and we add 2us
3003 : * for the read latency) and bytes_per_pixel should always be <= 8, so that
3004 : * should allow pixel_rate up to ~2 GHz which seems sufficient since max
3005 : * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
3006 : */
3007 0 : static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
3008 : uint32_t latency)
3009 : {
3010 : uint32_t wm_intermediate_val, ret;
3011 :
3012 0 : if (latency == 0)
3013 0 : return UINT_MAX;
3014 :
3015 0 : wm_intermediate_val = latency * pixel_rate * bytes_per_pixel / 512;
3016 0 : ret = DIV_ROUND_UP(wm_intermediate_val, 1000);
3017 :
3018 0 : return ret;
3019 0 : }
3020 :
3021 0 : static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
3022 : uint32_t horiz_pixels, uint8_t bytes_per_pixel,
3023 : uint64_t tiling, uint32_t latency)
3024 : {
3025 : uint32_t ret;
3026 : uint32_t plane_bytes_per_line, plane_blocks_per_line;
3027 : uint32_t wm_intermediate_val;
3028 :
3029 0 : if (latency == 0)
3030 0 : return UINT_MAX;
3031 :
3032 0 : plane_bytes_per_line = horiz_pixels * bytes_per_pixel;
3033 :
3034 0 : if (tiling == I915_FORMAT_MOD_Y_TILED ||
3035 0 : tiling == I915_FORMAT_MOD_Yf_TILED) {
3036 0 : plane_bytes_per_line *= 4;
3037 0 : plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
3038 0 : plane_blocks_per_line /= 4;
3039 0 : } else {
3040 0 : plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
3041 : }
3042 :
3043 0 : wm_intermediate_val = latency * pixel_rate;
3044 0 : ret = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000) *
3045 : plane_blocks_per_line;
3046 :
3047 0 : return ret;
3048 0 : }
3049 :
3050 0 : static bool skl_ddb_allocation_changed(const struct skl_ddb_allocation *new_ddb,
3051 : const struct intel_crtc *intel_crtc)
3052 : {
3053 0 : struct drm_device *dev = intel_crtc->base.dev;
3054 0 : struct drm_i915_private *dev_priv = dev->dev_private;
3055 0 : const struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
3056 0 : enum pipe pipe = intel_crtc->pipe;
3057 :
3058 0 : if (memcmp(new_ddb->plane[pipe], cur_ddb->plane[pipe],
3059 : sizeof(new_ddb->plane[pipe])))
3060 0 : return true;
3061 :
3062 0 : if (memcmp(&new_ddb->plane[pipe][PLANE_CURSOR], &cur_ddb->plane[pipe][PLANE_CURSOR],
3063 : sizeof(new_ddb->plane[pipe][PLANE_CURSOR])))
3064 0 : return true;
3065 :
3066 0 : return false;
3067 0 : }
3068 :
3069 0 : static void skl_compute_wm_global_parameters(struct drm_device *dev,
3070 : struct intel_wm_config *config)
3071 : {
3072 : struct drm_crtc *crtc;
3073 : struct drm_plane *plane;
3074 :
3075 0 : list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
3076 0 : config->num_pipes_active += to_intel_crtc(crtc)->active;
3077 :
3078 : /* FIXME: I don't think we need those two global parameters on SKL */
3079 0 : list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
3080 0 : struct intel_plane *intel_plane = to_intel_plane(plane);
3081 :
3082 0 : config->sprites_enabled |= intel_plane->wm.enabled;
3083 0 : config->sprites_scaled |= intel_plane->wm.scaled;
3084 : }
3085 0 : }
3086 :
3087 0 : static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,
3088 : struct skl_pipe_wm_parameters *p)
3089 : {
3090 0 : struct drm_device *dev = crtc->dev;
3091 0 : struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3092 0 : enum pipe pipe = intel_crtc->pipe;
3093 : struct drm_plane *plane;
3094 : struct drm_framebuffer *fb;
3095 : int i = 1; /* Index for sprite planes start */
3096 :
3097 0 : p->active = intel_crtc->active;
3098 0 : if (p->active) {
3099 0 : p->pipe_htotal = intel_crtc->config->base.adjusted_mode.crtc_htotal;
3100 0 : p->pixel_rate = skl_pipe_pixel_rate(intel_crtc->config);
3101 :
3102 0 : fb = crtc->primary->state->fb;
3103 : /* For planar: Bpp is for uv plane, y_Bpp is for y plane */
3104 0 : if (fb) {
3105 0 : p->plane[0].enabled = true;
3106 0 : p->plane[0].bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
3107 0 : drm_format_plane_cpp(fb->pixel_format, 1) :
3108 0 : drm_format_plane_cpp(fb->pixel_format, 0);
3109 0 : p->plane[0].y_bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
3110 0 : drm_format_plane_cpp(fb->pixel_format, 0) : 0;
3111 0 : p->plane[0].tiling = fb->modifier[0];
3112 0 : } else {
3113 0 : p->plane[0].enabled = false;
3114 0 : p->plane[0].bytes_per_pixel = 0;
3115 0 : p->plane[0].y_bytes_per_pixel = 0;
3116 0 : p->plane[0].tiling = DRM_FORMAT_MOD_NONE;
3117 : }
3118 0 : p->plane[0].horiz_pixels = intel_crtc->config->pipe_src_w;
3119 0 : p->plane[0].vert_pixels = intel_crtc->config->pipe_src_h;
3120 0 : p->plane[0].rotation = crtc->primary->state->rotation;
3121 :
3122 0 : fb = crtc->cursor->state->fb;
3123 0 : p->plane[PLANE_CURSOR].y_bytes_per_pixel = 0;
3124 0 : if (fb) {
3125 0 : p->plane[PLANE_CURSOR].enabled = true;
3126 0 : p->plane[PLANE_CURSOR].bytes_per_pixel = fb->bits_per_pixel / 8;
3127 0 : p->plane[PLANE_CURSOR].horiz_pixels = crtc->cursor->state->crtc_w;
3128 0 : p->plane[PLANE_CURSOR].vert_pixels = crtc->cursor->state->crtc_h;
3129 0 : } else {
3130 0 : p->plane[PLANE_CURSOR].enabled = false;
3131 0 : p->plane[PLANE_CURSOR].bytes_per_pixel = 0;
3132 0 : p->plane[PLANE_CURSOR].horiz_pixels = 64;
3133 0 : p->plane[PLANE_CURSOR].vert_pixels = 64;
3134 : }
3135 : }
3136 :
3137 0 : list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
3138 0 : struct intel_plane *intel_plane = to_intel_plane(plane);
3139 :
3140 0 : if (intel_plane->pipe == pipe &&
3141 0 : plane->type == DRM_PLANE_TYPE_OVERLAY)
3142 0 : p->plane[i++] = intel_plane->wm;
3143 : }
3144 0 : }
3145 :
3146 0 : static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
3147 : struct skl_pipe_wm_parameters *p,
3148 : struct intel_plane_wm_parameters *p_params,
3149 : uint16_t ddb_allocation,
3150 : int level,
3151 : uint16_t *out_blocks, /* out */
3152 : uint8_t *out_lines /* out */)
3153 : {
3154 0 : uint32_t latency = dev_priv->wm.skl_latency[level];
3155 : uint32_t method1, method2;
3156 : uint32_t plane_bytes_per_line, plane_blocks_per_line;
3157 : uint32_t res_blocks, res_lines;
3158 : uint32_t selected_result;
3159 : uint8_t bytes_per_pixel;
3160 :
3161 0 : if (latency == 0 || !p->active || !p_params->enabled)
3162 0 : return false;
3163 :
3164 0 : bytes_per_pixel = p_params->y_bytes_per_pixel ?
3165 : p_params->y_bytes_per_pixel :
3166 0 : p_params->bytes_per_pixel;
3167 0 : method1 = skl_wm_method1(p->pixel_rate,
3168 : bytes_per_pixel,
3169 : latency);
3170 0 : method2 = skl_wm_method2(p->pixel_rate,
3171 0 : p->pipe_htotal,
3172 0 : p_params->horiz_pixels,
3173 : bytes_per_pixel,
3174 0 : p_params->tiling,
3175 : latency);
3176 :
3177 0 : plane_bytes_per_line = p_params->horiz_pixels * bytes_per_pixel;
3178 0 : plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
3179 :
3180 0 : if (p_params->tiling == I915_FORMAT_MOD_Y_TILED ||
3181 0 : p_params->tiling == I915_FORMAT_MOD_Yf_TILED) {
3182 : uint32_t min_scanlines = 4;
3183 : uint32_t y_tile_minimum;
3184 0 : if (intel_rotation_90_or_270(p_params->rotation)) {
3185 0 : switch (p_params->bytes_per_pixel) {
3186 : case 1:
3187 : min_scanlines = 16;
3188 0 : break;
3189 : case 2:
3190 : min_scanlines = 8;
3191 0 : break;
3192 : case 8:
3193 0 : WARN(1, "Unsupported pixel depth for rotation");
3194 0 : }
3195 : }
3196 0 : y_tile_minimum = plane_blocks_per_line * min_scanlines;
3197 0 : selected_result = max(method2, y_tile_minimum);
3198 0 : } else {
3199 0 : if ((ddb_allocation / plane_blocks_per_line) >= 1)
3200 0 : selected_result = min(method1, method2);
3201 : else
3202 : selected_result = method1;
3203 : }
3204 :
3205 0 : res_blocks = selected_result + 1;
3206 0 : res_lines = DIV_ROUND_UP(selected_result, plane_blocks_per_line);
3207 :
3208 0 : if (level >= 1 && level <= 7) {
3209 0 : if (p_params->tiling == I915_FORMAT_MOD_Y_TILED ||
3210 0 : p_params->tiling == I915_FORMAT_MOD_Yf_TILED)
3211 0 : res_lines += 4;
3212 : else
3213 0 : res_blocks++;
3214 : }
3215 :
3216 0 : if (res_blocks >= ddb_allocation || res_lines > 31)
3217 0 : return false;
3218 :
3219 0 : *out_blocks = res_blocks;
3220 0 : *out_lines = res_lines;
3221 :
3222 0 : return true;
3223 0 : }
3224 :
3225 0 : static void skl_compute_wm_level(const struct drm_i915_private *dev_priv,
3226 : struct skl_ddb_allocation *ddb,
3227 : struct skl_pipe_wm_parameters *p,
3228 : enum pipe pipe,
3229 : int level,
3230 : int num_planes,
3231 : struct skl_wm_level *result)
3232 : {
3233 : uint16_t ddb_blocks;
3234 : int i;
3235 :
3236 0 : for (i = 0; i < num_planes; i++) {
3237 0 : ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][i]);
3238 :
3239 0 : result->plane_en[i] = skl_compute_plane_wm(dev_priv,
3240 0 : p, &p->plane[i],
3241 : ddb_blocks,
3242 : level,
3243 0 : &result->plane_res_b[i],
3244 0 : &result->plane_res_l[i]);
3245 : }
3246 :
3247 0 : ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][PLANE_CURSOR]);
3248 0 : result->plane_en[PLANE_CURSOR] = skl_compute_plane_wm(dev_priv, p,
3249 0 : &p->plane[PLANE_CURSOR],
3250 : ddb_blocks, level,
3251 0 : &result->plane_res_b[PLANE_CURSOR],
3252 0 : &result->plane_res_l[PLANE_CURSOR]);
3253 0 : }
3254 :
3255 : static uint32_t
3256 0 : skl_compute_linetime_wm(struct drm_crtc *crtc, struct skl_pipe_wm_parameters *p)
3257 : {
3258 0 : if (!to_intel_crtc(crtc)->active)
3259 0 : return 0;
3260 :
3261 0 : if (WARN_ON(p->pixel_rate == 0))
3262 0 : return 0;
3263 :
3264 0 : return DIV_ROUND_UP(8 * p->pipe_htotal * 1000, p->pixel_rate);
3265 0 : }
3266 :
3267 0 : static void skl_compute_transition_wm(struct drm_crtc *crtc,
3268 : struct skl_pipe_wm_parameters *params,
3269 : struct skl_wm_level *trans_wm /* out */)
3270 : {
3271 0 : struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3272 : int i;
3273 :
3274 0 : if (!params->active)
3275 0 : return;
3276 :
3277 : /* Until we know more, just disable transition WMs */
3278 0 : for (i = 0; i < intel_num_planes(intel_crtc); i++)
3279 0 : trans_wm->plane_en[i] = false;
3280 0 : trans_wm->plane_en[PLANE_CURSOR] = false;
3281 0 : }
3282 :
3283 0 : static void skl_compute_pipe_wm(struct drm_crtc *crtc,
3284 : struct skl_ddb_allocation *ddb,
3285 : struct skl_pipe_wm_parameters *params,
3286 : struct skl_pipe_wm *pipe_wm)
3287 : {
3288 0 : struct drm_device *dev = crtc->dev;
3289 0 : const struct drm_i915_private *dev_priv = dev->dev_private;
3290 0 : struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3291 0 : int level, max_level = ilk_wm_max_level(dev);
3292 :
3293 0 : for (level = 0; level <= max_level; level++) {
3294 0 : skl_compute_wm_level(dev_priv, ddb, params, intel_crtc->pipe,
3295 0 : level, intel_num_planes(intel_crtc),
3296 0 : &pipe_wm->wm[level]);
3297 : }
3298 0 : pipe_wm->linetime = skl_compute_linetime_wm(crtc, params);
3299 :
3300 0 : skl_compute_transition_wm(crtc, params, &pipe_wm->trans_wm);
3301 0 : }
3302 :
3303 0 : static void skl_compute_wm_results(struct drm_device *dev,
3304 : struct skl_pipe_wm_parameters *p,
3305 : struct skl_pipe_wm *p_wm,
3306 : struct skl_wm_values *r,
3307 : struct intel_crtc *intel_crtc)
3308 : {
3309 0 : int level, max_level = ilk_wm_max_level(dev);
3310 0 : enum pipe pipe = intel_crtc->pipe;
3311 : uint32_t temp;
3312 : int i;
3313 :
3314 0 : for (level = 0; level <= max_level; level++) {
3315 0 : for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3316 : temp = 0;
3317 :
3318 0 : temp |= p_wm->wm[level].plane_res_l[i] <<
3319 : PLANE_WM_LINES_SHIFT;
3320 0 : temp |= p_wm->wm[level].plane_res_b[i];
3321 0 : if (p_wm->wm[level].plane_en[i])
3322 0 : temp |= PLANE_WM_EN;
3323 :
3324 0 : r->plane[pipe][i][level] = temp;
3325 : }
3326 :
3327 : temp = 0;
3328 :
3329 0 : temp |= p_wm->wm[level].plane_res_l[PLANE_CURSOR] << PLANE_WM_LINES_SHIFT;
3330 0 : temp |= p_wm->wm[level].plane_res_b[PLANE_CURSOR];
3331 :
3332 0 : if (p_wm->wm[level].plane_en[PLANE_CURSOR])
3333 0 : temp |= PLANE_WM_EN;
3334 :
3335 0 : r->plane[pipe][PLANE_CURSOR][level] = temp;
3336 :
3337 : }
3338 :
3339 : /* transition WMs */
3340 0 : for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3341 : temp = 0;
3342 0 : temp |= p_wm->trans_wm.plane_res_l[i] << PLANE_WM_LINES_SHIFT;
3343 0 : temp |= p_wm->trans_wm.plane_res_b[i];
3344 0 : if (p_wm->trans_wm.plane_en[i])
3345 0 : temp |= PLANE_WM_EN;
3346 :
3347 0 : r->plane_trans[pipe][i] = temp;
3348 : }
3349 :
3350 : temp = 0;
3351 0 : temp |= p_wm->trans_wm.plane_res_l[PLANE_CURSOR] << PLANE_WM_LINES_SHIFT;
3352 0 : temp |= p_wm->trans_wm.plane_res_b[PLANE_CURSOR];
3353 0 : if (p_wm->trans_wm.plane_en[PLANE_CURSOR])
3354 0 : temp |= PLANE_WM_EN;
3355 :
3356 0 : r->plane_trans[pipe][PLANE_CURSOR] = temp;
3357 :
3358 0 : r->wm_linetime[pipe] = p_wm->linetime;
3359 0 : }
3360 :
3361 0 : static void skl_ddb_entry_write(struct drm_i915_private *dev_priv, uint32_t reg,
3362 : const struct skl_ddb_entry *entry)
3363 : {
3364 0 : if (entry->end)
3365 0 : I915_WRITE(reg, (entry->end - 1) << 16 | entry->start);
3366 : else
3367 0 : I915_WRITE(reg, 0);
3368 0 : }
3369 :
3370 0 : static void skl_write_wm_values(struct drm_i915_private *dev_priv,
3371 : const struct skl_wm_values *new)
3372 : {
3373 0 : struct drm_device *dev = dev_priv->dev;
3374 : struct intel_crtc *crtc;
3375 :
3376 0 : list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) {
3377 0 : int i, level, max_level = ilk_wm_max_level(dev);
3378 0 : enum pipe pipe = crtc->pipe;
3379 :
3380 0 : if (!new->dirty[pipe])
3381 0 : continue;
3382 :
3383 0 : I915_WRITE(PIPE_WM_LINETIME(pipe), new->wm_linetime[pipe]);
3384 :
3385 0 : for (level = 0; level <= max_level; level++) {
3386 0 : for (i = 0; i < intel_num_planes(crtc); i++)
3387 0 : I915_WRITE(PLANE_WM(pipe, i, level),
3388 : new->plane[pipe][i][level]);
3389 0 : I915_WRITE(CUR_WM(pipe, level),
3390 : new->plane[pipe][PLANE_CURSOR][level]);
3391 : }
3392 0 : for (i = 0; i < intel_num_planes(crtc); i++)
3393 0 : I915_WRITE(PLANE_WM_TRANS(pipe, i),
3394 : new->plane_trans[pipe][i]);
3395 0 : I915_WRITE(CUR_WM_TRANS(pipe),
3396 : new->plane_trans[pipe][PLANE_CURSOR]);
3397 :
3398 0 : for (i = 0; i < intel_num_planes(crtc); i++) {
3399 0 : skl_ddb_entry_write(dev_priv,
3400 0 : PLANE_BUF_CFG(pipe, i),
3401 0 : &new->ddb.plane[pipe][i]);
3402 0 : skl_ddb_entry_write(dev_priv,
3403 0 : PLANE_NV12_BUF_CFG(pipe, i),
3404 0 : &new->ddb.y_plane[pipe][i]);
3405 : }
3406 :
3407 0 : skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe),
3408 0 : &new->ddb.plane[pipe][PLANE_CURSOR]);
3409 0 : }
3410 0 : }
3411 :
3412 : /*
3413 : * When setting up a new DDB allocation arrangement, we need to correctly
3414 : * sequence the times at which the new allocations for the pipes are taken into
3415 : * account or we'll have pipes fetching from space previously allocated to
3416 : * another pipe.
3417 : *
3418 : * Roughly the sequence looks like:
3419 : * 1. re-allocate the pipe(s) with the allocation being reduced and not
3420 : * overlapping with a previous light-up pipe (another way to put it is:
3421 : * pipes with their new allocation strickly included into their old ones).
3422 : * 2. re-allocate the other pipes that get their allocation reduced
3423 : * 3. allocate the pipes having their allocation increased
3424 : *
3425 : * Steps 1. and 2. are here to take care of the following case:
3426 : * - Initially DDB looks like this:
3427 : * | B | C |
3428 : * - enable pipe A.
3429 : * - pipe B has a reduced DDB allocation that overlaps with the old pipe C
3430 : * allocation
3431 : * | A | B | C |
3432 : *
3433 : * We need to sequence the re-allocation: C, B, A (and not B, C, A).
3434 : */
3435 :
3436 : static void
3437 0 : skl_wm_flush_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, int pass)
3438 : {
3439 : int plane;
3440 :
3441 : DRM_DEBUG_KMS("flush pipe %c (pass %d)\n", pipe_name(pipe), pass);
3442 :
3443 0 : for_each_plane(dev_priv, pipe, plane) {
3444 0 : I915_WRITE(PLANE_SURF(pipe, plane),
3445 : I915_READ(PLANE_SURF(pipe, plane)));
3446 : }
3447 0 : I915_WRITE(CURBASE(pipe), I915_READ(CURBASE(pipe)));
3448 0 : }
3449 :
3450 : static bool
3451 0 : skl_ddb_allocation_included(const struct skl_ddb_allocation *old,
3452 : const struct skl_ddb_allocation *new,
3453 : enum pipe pipe)
3454 : {
3455 : uint16_t old_size, new_size;
3456 :
3457 0 : old_size = skl_ddb_entry_size(&old->pipe[pipe]);
3458 0 : new_size = skl_ddb_entry_size(&new->pipe[pipe]);
3459 :
3460 0 : return old_size != new_size &&
3461 0 : new->pipe[pipe].start >= old->pipe[pipe].start &&
3462 0 : new->pipe[pipe].end <= old->pipe[pipe].end;
3463 : }
3464 :
3465 0 : static void skl_flush_wm_values(struct drm_i915_private *dev_priv,
3466 : struct skl_wm_values *new_values)
3467 : {
3468 0 : struct drm_device *dev = dev_priv->dev;
3469 : struct skl_ddb_allocation *cur_ddb, *new_ddb;
3470 0 : bool reallocated[I915_MAX_PIPES] = {};
3471 : struct intel_crtc *crtc;
3472 : enum pipe pipe;
3473 :
3474 0 : new_ddb = &new_values->ddb;
3475 0 : cur_ddb = &dev_priv->wm.skl_hw.ddb;
3476 :
3477 : /*
3478 : * First pass: flush the pipes with the new allocation contained into
3479 : * the old space.
3480 : *
3481 : * We'll wait for the vblank on those pipes to ensure we can safely
3482 : * re-allocate the freed space without this pipe fetching from it.
3483 : */
3484 0 : for_each_intel_crtc(dev, crtc) {
3485 0 : if (!crtc->active)
3486 : continue;
3487 :
3488 0 : pipe = crtc->pipe;
3489 :
3490 0 : if (!skl_ddb_allocation_included(cur_ddb, new_ddb, pipe))
3491 : continue;
3492 :
3493 0 : skl_wm_flush_pipe(dev_priv, pipe, 1);
3494 0 : intel_wait_for_vblank(dev, pipe);
3495 :
3496 0 : reallocated[pipe] = true;
3497 0 : }
3498 :
3499 :
3500 : /*
3501 : * Second pass: flush the pipes that are having their allocation
3502 : * reduced, but overlapping with a previous allocation.
3503 : *
3504 : * Here as well we need to wait for the vblank to make sure the freed
3505 : * space is not used anymore.
3506 : */
3507 0 : for_each_intel_crtc(dev, crtc) {
3508 0 : if (!crtc->active)
3509 : continue;
3510 :
3511 0 : pipe = crtc->pipe;
3512 :
3513 0 : if (reallocated[pipe])
3514 : continue;
3515 :
3516 0 : if (skl_ddb_entry_size(&new_ddb->pipe[pipe]) <
3517 0 : skl_ddb_entry_size(&cur_ddb->pipe[pipe])) {
3518 0 : skl_wm_flush_pipe(dev_priv, pipe, 2);
3519 0 : intel_wait_for_vblank(dev, pipe);
3520 0 : reallocated[pipe] = true;
3521 0 : }
3522 : }
3523 :
3524 : /*
3525 : * Third pass: flush the pipes that got more space allocated.
3526 : *
3527 : * We don't need to actively wait for the update here, next vblank
3528 : * will just get more DDB space with the correct WM values.
3529 : */
3530 0 : for_each_intel_crtc(dev, crtc) {
3531 0 : if (!crtc->active)
3532 : continue;
3533 :
3534 0 : pipe = crtc->pipe;
3535 :
3536 : /*
3537 : * At this point, only the pipes more space than before are
3538 : * left to re-allocate.
3539 : */
3540 0 : if (reallocated[pipe])
3541 : continue;
3542 :
3543 0 : skl_wm_flush_pipe(dev_priv, pipe, 3);
3544 0 : }
3545 0 : }
3546 :
3547 0 : static bool skl_update_pipe_wm(struct drm_crtc *crtc,
3548 : struct skl_pipe_wm_parameters *params,
3549 : struct intel_wm_config *config,
3550 : struct skl_ddb_allocation *ddb, /* out */
3551 : struct skl_pipe_wm *pipe_wm /* out */)
3552 : {
3553 0 : struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3554 :
3555 0 : skl_compute_wm_pipe_parameters(crtc, params);
3556 0 : skl_allocate_pipe_ddb(crtc, config, params, ddb);
3557 0 : skl_compute_pipe_wm(crtc, ddb, params, pipe_wm);
3558 :
3559 0 : if (!memcmp(&intel_crtc->wm.skl_active, pipe_wm, sizeof(*pipe_wm)))
3560 0 : return false;
3561 :
3562 0 : intel_crtc->wm.skl_active = *pipe_wm;
3563 :
3564 0 : return true;
3565 0 : }
3566 :
3567 0 : static void skl_update_other_pipe_wm(struct drm_device *dev,
3568 : struct drm_crtc *crtc,
3569 : struct intel_wm_config *config,
3570 : struct skl_wm_values *r)
3571 : {
3572 : struct intel_crtc *intel_crtc;
3573 0 : struct intel_crtc *this_crtc = to_intel_crtc(crtc);
3574 :
3575 : /*
3576 : * If the WM update hasn't changed the allocation for this_crtc (the
3577 : * crtc we are currently computing the new WM values for), other
3578 : * enabled crtcs will keep the same allocation and we don't need to
3579 : * recompute anything for them.
3580 : */
3581 0 : if (!skl_ddb_allocation_changed(&r->ddb, this_crtc))
3582 0 : return;
3583 :
3584 : /*
3585 : * Otherwise, because of this_crtc being freshly enabled/disabled, the
3586 : * other active pipes need new DDB allocation and WM values.
3587 : */
3588 0 : list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list,
3589 : base.head) {
3590 0 : struct skl_pipe_wm_parameters params = {};
3591 0 : struct skl_pipe_wm pipe_wm = {};
3592 : bool wm_changed;
3593 :
3594 0 : if (this_crtc->pipe == intel_crtc->pipe)
3595 0 : continue;
3596 :
3597 0 : if (!intel_crtc->active)
3598 0 : continue;
3599 :
3600 0 : wm_changed = skl_update_pipe_wm(&intel_crtc->base,
3601 : ¶ms, config,
3602 : &r->ddb, &pipe_wm);
3603 :
3604 : /*
3605 : * If we end up re-computing the other pipe WM values, it's
3606 : * because it was really needed, so we expect the WM values to
3607 : * be different.
3608 : */
3609 0 : WARN_ON(!wm_changed);
3610 :
3611 0 : skl_compute_wm_results(dev, ¶ms, &pipe_wm, r, intel_crtc);
3612 0 : r->dirty[intel_crtc->pipe] = true;
3613 0 : }
3614 0 : }
3615 :
3616 0 : static void skl_clear_wm(struct skl_wm_values *watermarks, enum pipe pipe)
3617 : {
3618 0 : watermarks->wm_linetime[pipe] = 0;
3619 0 : memset(watermarks->plane[pipe], 0,
3620 : sizeof(uint32_t) * 8 * I915_MAX_PLANES);
3621 0 : memset(watermarks->plane_trans[pipe],
3622 : 0, sizeof(uint32_t) * I915_MAX_PLANES);
3623 0 : watermarks->plane_trans[pipe][PLANE_CURSOR] = 0;
3624 :
3625 : /* Clear ddb entries for pipe */
3626 0 : memset(&watermarks->ddb.pipe[pipe], 0, sizeof(struct skl_ddb_entry));
3627 0 : memset(&watermarks->ddb.plane[pipe], 0,
3628 : sizeof(struct skl_ddb_entry) * I915_MAX_PLANES);
3629 0 : memset(&watermarks->ddb.y_plane[pipe], 0,
3630 : sizeof(struct skl_ddb_entry) * I915_MAX_PLANES);
3631 0 : memset(&watermarks->ddb.plane[pipe][PLANE_CURSOR], 0,
3632 : sizeof(struct skl_ddb_entry));
3633 :
3634 0 : }
3635 :
3636 0 : static void skl_update_wm(struct drm_crtc *crtc)
3637 : {
3638 0 : struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3639 0 : struct drm_device *dev = crtc->dev;
3640 0 : struct drm_i915_private *dev_priv = dev->dev_private;
3641 0 : struct skl_pipe_wm_parameters params = {};
3642 0 : struct skl_wm_values *results = &dev_priv->wm.skl_results;
3643 0 : struct skl_pipe_wm pipe_wm = {};
3644 0 : struct intel_wm_config config = {};
3645 :
3646 :
3647 : /* Clear all dirty flags */
3648 0 : memset(results->dirty, 0, sizeof(bool) * I915_MAX_PIPES);
3649 :
3650 0 : skl_clear_wm(results, intel_crtc->pipe);
3651 :
3652 0 : skl_compute_wm_global_parameters(dev, &config);
3653 :
3654 0 : if (!skl_update_pipe_wm(crtc, ¶ms, &config,
3655 0 : &results->ddb, &pipe_wm))
3656 0 : return;
3657 :
3658 0 : skl_compute_wm_results(dev, ¶ms, &pipe_wm, results, intel_crtc);
3659 0 : results->dirty[intel_crtc->pipe] = true;
3660 :
3661 0 : skl_update_other_pipe_wm(dev, crtc, &config, results);
3662 0 : skl_write_wm_values(dev_priv, results);
3663 0 : skl_flush_wm_values(dev_priv, results);
3664 :
3665 : /* store the new configuration */
3666 0 : dev_priv->wm.skl_hw = *results;
3667 0 : }
3668 :
3669 : static void
3670 0 : skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc,
3671 : uint32_t sprite_width, uint32_t sprite_height,
3672 : int pixel_size, bool enabled, bool scaled)
3673 : {
3674 0 : struct intel_plane *intel_plane = to_intel_plane(plane);
3675 0 : struct drm_framebuffer *fb = plane->state->fb;
3676 :
3677 0 : intel_plane->wm.enabled = enabled;
3678 0 : intel_plane->wm.scaled = scaled;
3679 0 : intel_plane->wm.horiz_pixels = sprite_width;
3680 0 : intel_plane->wm.vert_pixels = sprite_height;
3681 0 : intel_plane->wm.tiling = DRM_FORMAT_MOD_NONE;
3682 :
3683 : /* For planar: Bpp is for UV plane, y_Bpp is for Y plane */
3684 0 : intel_plane->wm.bytes_per_pixel =
3685 0 : (fb && fb->pixel_format == DRM_FORMAT_NV12) ?
3686 0 : drm_format_plane_cpp(plane->state->fb->pixel_format, 1) : pixel_size;
3687 0 : intel_plane->wm.y_bytes_per_pixel =
3688 0 : (fb && fb->pixel_format == DRM_FORMAT_NV12) ?
3689 0 : drm_format_plane_cpp(plane->state->fb->pixel_format, 0) : 0;
3690 :
3691 : /*
3692 : * Framebuffer can be NULL on plane disable, but it does not
3693 : * matter for watermarks if we assume no tiling in that case.
3694 : */
3695 0 : if (fb)
3696 0 : intel_plane->wm.tiling = fb->modifier[0];
3697 0 : intel_plane->wm.rotation = plane->state->rotation;
3698 :
3699 0 : skl_update_wm(crtc);
3700 0 : }
3701 :
3702 0 : static void ilk_update_wm(struct drm_crtc *crtc)
3703 : {
3704 0 : struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3705 0 : struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
3706 0 : struct drm_device *dev = crtc->dev;
3707 0 : struct drm_i915_private *dev_priv = dev->dev_private;
3708 0 : struct ilk_wm_maximums max;
3709 0 : struct ilk_wm_values results = {};
3710 : enum intel_ddb_partitioning partitioning;
3711 0 : struct intel_pipe_wm pipe_wm = {};
3712 0 : struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
3713 0 : struct intel_wm_config config = {};
3714 :
3715 0 : WARN_ON(cstate->base.active != intel_crtc->active);
3716 :
3717 0 : intel_compute_pipe_wm(cstate, &pipe_wm);
3718 :
3719 0 : if (!memcmp(&intel_crtc->wm.active, &pipe_wm, sizeof(pipe_wm)))
3720 0 : return;
3721 :
3722 0 : intel_crtc->wm.active = pipe_wm;
3723 :
3724 0 : ilk_compute_wm_config(dev, &config);
3725 :
3726 0 : ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max);
3727 0 : ilk_wm_merge(dev, &config, &max, &lp_wm_1_2);
3728 :
3729 : /* 5/6 split only in single pipe config on IVB+ */
3730 0 : if (INTEL_INFO(dev)->gen >= 7 &&
3731 0 : config.num_pipes_active == 1 && config.sprites_enabled) {
3732 0 : ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max);
3733 0 : ilk_wm_merge(dev, &config, &max, &lp_wm_5_6);
3734 :
3735 0 : best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6);
3736 0 : } else {
3737 : best_lp_wm = &lp_wm_1_2;
3738 : }
3739 :
3740 0 : partitioning = (best_lp_wm == &lp_wm_1_2) ?
3741 : INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
3742 :
3743 0 : ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results);
3744 :
3745 0 : ilk_write_wm_values(dev_priv, &results);
3746 0 : }
3747 :
3748 : static void
3749 0 : ilk_update_sprite_wm(struct drm_plane *plane,
3750 : struct drm_crtc *crtc,
3751 : uint32_t sprite_width, uint32_t sprite_height,
3752 : int pixel_size, bool enabled, bool scaled)
3753 : {
3754 0 : struct drm_device *dev = plane->dev;
3755 0 : struct intel_plane *intel_plane = to_intel_plane(plane);
3756 :
3757 : /*
3758 : * IVB workaround: must disable low power watermarks for at least
3759 : * one frame before enabling scaling. LP watermarks can be re-enabled
3760 : * when scaling is disabled.
3761 : *
3762 : * WaCxSRDisabledForSpriteScaling:ivb
3763 : */
3764 0 : if (IS_IVYBRIDGE(dev) && scaled && ilk_disable_lp_wm(dev))
3765 0 : intel_wait_for_vblank(dev, intel_plane->pipe);
3766 :
3767 0 : ilk_update_wm(crtc);
3768 0 : }
3769 :
3770 0 : static void skl_pipe_wm_active_state(uint32_t val,
3771 : struct skl_pipe_wm *active,
3772 : bool is_transwm,
3773 : bool is_cursor,
3774 : int i,
3775 : int level)
3776 : {
3777 0 : bool is_enabled = (val & PLANE_WM_EN) != 0;
3778 :
3779 0 : if (!is_transwm) {
3780 0 : if (!is_cursor) {
3781 0 : active->wm[level].plane_en[i] = is_enabled;
3782 0 : active->wm[level].plane_res_b[i] =
3783 0 : val & PLANE_WM_BLOCKS_MASK;
3784 0 : active->wm[level].plane_res_l[i] =
3785 0 : (val >> PLANE_WM_LINES_SHIFT) &
3786 : PLANE_WM_LINES_MASK;
3787 0 : } else {
3788 0 : active->wm[level].plane_en[PLANE_CURSOR] = is_enabled;
3789 0 : active->wm[level].plane_res_b[PLANE_CURSOR] =
3790 0 : val & PLANE_WM_BLOCKS_MASK;
3791 0 : active->wm[level].plane_res_l[PLANE_CURSOR] =
3792 0 : (val >> PLANE_WM_LINES_SHIFT) &
3793 : PLANE_WM_LINES_MASK;
3794 : }
3795 : } else {
3796 0 : if (!is_cursor) {
3797 0 : active->trans_wm.plane_en[i] = is_enabled;
3798 0 : active->trans_wm.plane_res_b[i] =
3799 0 : val & PLANE_WM_BLOCKS_MASK;
3800 0 : active->trans_wm.plane_res_l[i] =
3801 0 : (val >> PLANE_WM_LINES_SHIFT) &
3802 : PLANE_WM_LINES_MASK;
3803 0 : } else {
3804 0 : active->trans_wm.plane_en[PLANE_CURSOR] = is_enabled;
3805 0 : active->trans_wm.plane_res_b[PLANE_CURSOR] =
3806 0 : val & PLANE_WM_BLOCKS_MASK;
3807 0 : active->trans_wm.plane_res_l[PLANE_CURSOR] =
3808 0 : (val >> PLANE_WM_LINES_SHIFT) &
3809 : PLANE_WM_LINES_MASK;
3810 : }
3811 : }
3812 0 : }
3813 :
3814 0 : static void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc)
3815 : {
3816 0 : struct drm_device *dev = crtc->dev;
3817 0 : struct drm_i915_private *dev_priv = dev->dev_private;
3818 0 : struct skl_wm_values *hw = &dev_priv->wm.skl_hw;
3819 0 : struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3820 0 : struct skl_pipe_wm *active = &intel_crtc->wm.skl_active;
3821 0 : enum pipe pipe = intel_crtc->pipe;
3822 : int level, i, max_level;
3823 : uint32_t temp;
3824 :
3825 0 : max_level = ilk_wm_max_level(dev);
3826 :
3827 0 : hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
3828 :
3829 0 : for (level = 0; level <= max_level; level++) {
3830 0 : for (i = 0; i < intel_num_planes(intel_crtc); i++)
3831 0 : hw->plane[pipe][i][level] =
3832 0 : I915_READ(PLANE_WM(pipe, i, level));
3833 0 : hw->plane[pipe][PLANE_CURSOR][level] = I915_READ(CUR_WM(pipe, level));
3834 : }
3835 :
3836 0 : for (i = 0; i < intel_num_planes(intel_crtc); i++)
3837 0 : hw->plane_trans[pipe][i] = I915_READ(PLANE_WM_TRANS(pipe, i));
3838 0 : hw->plane_trans[pipe][PLANE_CURSOR] = I915_READ(CUR_WM_TRANS(pipe));
3839 :
3840 0 : if (!intel_crtc->active)
3841 0 : return;
3842 :
3843 0 : hw->dirty[pipe] = true;
3844 :
3845 0 : active->linetime = hw->wm_linetime[pipe];
3846 :
3847 0 : for (level = 0; level <= max_level; level++) {
3848 0 : for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3849 0 : temp = hw->plane[pipe][i][level];
3850 0 : skl_pipe_wm_active_state(temp, active, false,
3851 : false, i, level);
3852 : }
3853 0 : temp = hw->plane[pipe][PLANE_CURSOR][level];
3854 0 : skl_pipe_wm_active_state(temp, active, false, true, i, level);
3855 : }
3856 :
3857 0 : for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3858 0 : temp = hw->plane_trans[pipe][i];
3859 0 : skl_pipe_wm_active_state(temp, active, true, false, i, 0);
3860 : }
3861 :
3862 0 : temp = hw->plane_trans[pipe][PLANE_CURSOR];
3863 0 : skl_pipe_wm_active_state(temp, active, true, true, i, 0);
3864 0 : }
3865 :
3866 0 : void skl_wm_get_hw_state(struct drm_device *dev)
3867 : {
3868 0 : struct drm_i915_private *dev_priv = dev->dev_private;
3869 0 : struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb;
3870 : struct drm_crtc *crtc;
3871 :
3872 0 : skl_ddb_get_hw_state(dev_priv, ddb);
3873 0 : list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
3874 0 : skl_pipe_wm_get_hw_state(crtc);
3875 0 : }
3876 :
3877 0 : static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
3878 : {
3879 0 : struct drm_device *dev = crtc->dev;
3880 0 : struct drm_i915_private *dev_priv = dev->dev_private;
3881 0 : struct ilk_wm_values *hw = &dev_priv->wm.hw;
3882 0 : struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3883 0 : struct intel_pipe_wm *active = &intel_crtc->wm.active;
3884 0 : enum pipe pipe = intel_crtc->pipe;
3885 : static const unsigned int wm0_pipe_reg[] = {
3886 : [PIPE_A] = WM0_PIPEA_ILK,
3887 : [PIPE_B] = WM0_PIPEB_ILK,
3888 : [PIPE_C] = WM0_PIPEC_IVB,
3889 : };
3890 :
3891 0 : hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
3892 0 : if (IS_HASWELL(dev) || IS_BROADWELL(dev))
3893 0 : hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
3894 :
3895 0 : memset(active, 0, sizeof(*active));
3896 :
3897 0 : active->pipe_enabled = intel_crtc->active;
3898 :
3899 0 : if (active->pipe_enabled) {
3900 0 : u32 tmp = hw->wm_pipe[pipe];
3901 :
3902 : /*
3903 : * For active pipes LP0 watermark is marked as
3904 : * enabled, and LP1+ watermaks as disabled since
3905 : * we can't really reverse compute them in case
3906 : * multiple pipes are active.
3907 : */
3908 0 : active->wm[0].enable = true;
3909 0 : active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
3910 0 : active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
3911 0 : active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
3912 0 : active->linetime = hw->wm_linetime[pipe];
3913 0 : } else {
3914 0 : int level, max_level = ilk_wm_max_level(dev);
3915 :
3916 : /*
3917 : * For inactive pipes, all watermark levels
3918 : * should be marked as enabled but zeroed,
3919 : * which is what we'd compute them to.
3920 : */
3921 0 : for (level = 0; level <= max_level; level++)
3922 0 : active->wm[level].enable = true;
3923 : }
3924 0 : }
3925 :
3926 : #define _FW_WM(value, plane) \
3927 : (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT)
3928 : #define _FW_WM_VLV(value, plane) \
3929 : (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT)
3930 :
3931 0 : static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
3932 : struct vlv_wm_values *wm)
3933 : {
3934 : enum pipe pipe;
3935 : uint32_t tmp;
3936 :
3937 0 : for_each_pipe(dev_priv, pipe) {
3938 0 : tmp = I915_READ(VLV_DDL(pipe));
3939 :
3940 0 : wm->ddl[pipe].primary =
3941 0 : (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
3942 0 : wm->ddl[pipe].cursor =
3943 0 : (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
3944 0 : wm->ddl[pipe].sprite[0] =
3945 0 : (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
3946 0 : wm->ddl[pipe].sprite[1] =
3947 0 : (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
3948 : }
3949 :
3950 0 : tmp = I915_READ(DSPFW1);
3951 0 : wm->sr.plane = _FW_WM(tmp, SR);
3952 0 : wm->pipe[PIPE_B].cursor = _FW_WM(tmp, CURSORB);
3953 0 : wm->pipe[PIPE_B].primary = _FW_WM_VLV(tmp, PLANEB);
3954 0 : wm->pipe[PIPE_A].primary = _FW_WM_VLV(tmp, PLANEA);
3955 :
3956 0 : tmp = I915_READ(DSPFW2);
3957 0 : wm->pipe[PIPE_A].sprite[1] = _FW_WM_VLV(tmp, SPRITEB);
3958 0 : wm->pipe[PIPE_A].cursor = _FW_WM(tmp, CURSORA);
3959 0 : wm->pipe[PIPE_A].sprite[0] = _FW_WM_VLV(tmp, SPRITEA);
3960 :
3961 0 : tmp = I915_READ(DSPFW3);
3962 0 : wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
3963 :
3964 0 : if (IS_CHERRYVIEW(dev_priv)) {
3965 0 : tmp = I915_READ(DSPFW7_CHV);
3966 0 : wm->pipe[PIPE_B].sprite[1] = _FW_WM_VLV(tmp, SPRITED);
3967 0 : wm->pipe[PIPE_B].sprite[0] = _FW_WM_VLV(tmp, SPRITEC);
3968 :
3969 0 : tmp = I915_READ(DSPFW8_CHV);
3970 0 : wm->pipe[PIPE_C].sprite[1] = _FW_WM_VLV(tmp, SPRITEF);
3971 0 : wm->pipe[PIPE_C].sprite[0] = _FW_WM_VLV(tmp, SPRITEE);
3972 :
3973 0 : tmp = I915_READ(DSPFW9_CHV);
3974 0 : wm->pipe[PIPE_C].primary = _FW_WM_VLV(tmp, PLANEC);
3975 0 : wm->pipe[PIPE_C].cursor = _FW_WM(tmp, CURSORC);
3976 :
3977 0 : tmp = I915_READ(DSPHOWM);
3978 0 : wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
3979 0 : wm->pipe[PIPE_C].sprite[1] |= _FW_WM(tmp, SPRITEF_HI) << 8;
3980 0 : wm->pipe[PIPE_C].sprite[0] |= _FW_WM(tmp, SPRITEE_HI) << 8;
3981 0 : wm->pipe[PIPE_C].primary |= _FW_WM(tmp, PLANEC_HI) << 8;
3982 0 : wm->pipe[PIPE_B].sprite[1] |= _FW_WM(tmp, SPRITED_HI) << 8;
3983 0 : wm->pipe[PIPE_B].sprite[0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
3984 0 : wm->pipe[PIPE_B].primary |= _FW_WM(tmp, PLANEB_HI) << 8;
3985 0 : wm->pipe[PIPE_A].sprite[1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
3986 0 : wm->pipe[PIPE_A].sprite[0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
3987 0 : wm->pipe[PIPE_A].primary |= _FW_WM(tmp, PLANEA_HI) << 8;
3988 0 : } else {
3989 0 : tmp = I915_READ(DSPFW7);
3990 0 : wm->pipe[PIPE_B].sprite[1] = _FW_WM_VLV(tmp, SPRITED);
3991 0 : wm->pipe[PIPE_B].sprite[0] = _FW_WM_VLV(tmp, SPRITEC);
3992 :
3993 0 : tmp = I915_READ(DSPHOWM);
3994 0 : wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
3995 0 : wm->pipe[PIPE_B].sprite[1] |= _FW_WM(tmp, SPRITED_HI) << 8;
3996 0 : wm->pipe[PIPE_B].sprite[0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
3997 0 : wm->pipe[PIPE_B].primary |= _FW_WM(tmp, PLANEB_HI) << 8;
3998 0 : wm->pipe[PIPE_A].sprite[1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
3999 0 : wm->pipe[PIPE_A].sprite[0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
4000 0 : wm->pipe[PIPE_A].primary |= _FW_WM(tmp, PLANEA_HI) << 8;
4001 : }
4002 0 : }
4003 :
4004 : #undef _FW_WM
4005 : #undef _FW_WM_VLV
4006 :
4007 0 : void vlv_wm_get_hw_state(struct drm_device *dev)
4008 : {
4009 0 : struct drm_i915_private *dev_priv = to_i915(dev);
4010 0 : struct vlv_wm_values *wm = &dev_priv->wm.vlv;
4011 : struct intel_plane *plane;
4012 : enum pipe pipe;
4013 : u32 val;
4014 :
4015 0 : vlv_read_wm_values(dev_priv, wm);
4016 :
4017 0 : for_each_intel_plane(dev, plane) {
4018 0 : switch (plane->base.type) {
4019 : int sprite;
4020 : case DRM_PLANE_TYPE_CURSOR:
4021 0 : plane->wm.fifo_size = 63;
4022 0 : break;
4023 : case DRM_PLANE_TYPE_PRIMARY:
4024 0 : plane->wm.fifo_size = vlv_get_fifo_size(dev, plane->pipe, 0);
4025 0 : break;
4026 : case DRM_PLANE_TYPE_OVERLAY:
4027 0 : sprite = plane->plane;
4028 0 : plane->wm.fifo_size = vlv_get_fifo_size(dev, plane->pipe, sprite + 1);
4029 0 : break;
4030 : }
4031 : }
4032 :
4033 0 : wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
4034 0 : wm->level = VLV_WM_LEVEL_PM2;
4035 :
4036 0 : if (IS_CHERRYVIEW(dev_priv)) {
4037 0 : mutex_lock(&dev_priv->rps.hw_lock);
4038 :
4039 0 : val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
4040 0 : if (val & DSP_MAXFIFO_PM5_ENABLE)
4041 0 : wm->level = VLV_WM_LEVEL_PM5;
4042 :
4043 : /*
4044 : * If DDR DVFS is disabled in the BIOS, Punit
4045 : * will never ack the request. So if that happens
4046 : * assume we don't have to enable/disable DDR DVFS
4047 : * dynamically. To test that just set the REQ_ACK
4048 : * bit to poke the Punit, but don't change the
4049 : * HIGH/LOW bits so that we don't actually change
4050 : * the current state.
4051 : */
4052 0 : val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
4053 0 : val |= FORCE_DDR_FREQ_REQ_ACK;
4054 0 : vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
4055 :
4056 0 : if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
4057 : FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) {
4058 : DRM_DEBUG_KMS("Punit not acking DDR DVFS request, "
4059 : "assuming DDR DVFS is disabled\n");
4060 0 : dev_priv->wm.max_level = VLV_WM_LEVEL_PM5;
4061 0 : } else {
4062 0 : val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
4063 0 : if ((val & FORCE_DDR_HIGH_FREQ) == 0)
4064 0 : wm->level = VLV_WM_LEVEL_DDR_DVFS;
4065 : }
4066 :
4067 0 : mutex_unlock(&dev_priv->rps.hw_lock);
4068 0 : }
4069 :
4070 0 : for_each_pipe(dev_priv, pipe)
4071 : DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
4072 : pipe_name(pipe), wm->pipe[pipe].primary, wm->pipe[pipe].cursor,
4073 : wm->pipe[pipe].sprite[0], wm->pipe[pipe].sprite[1]);
4074 :
4075 : DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
4076 : wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr);
4077 0 : }
4078 :
4079 0 : void ilk_wm_get_hw_state(struct drm_device *dev)
4080 : {
4081 0 : struct drm_i915_private *dev_priv = dev->dev_private;
4082 0 : struct ilk_wm_values *hw = &dev_priv->wm.hw;
4083 : struct drm_crtc *crtc;
4084 :
4085 0 : for_each_crtc(dev, crtc)
4086 0 : ilk_pipe_wm_get_hw_state(crtc);
4087 :
4088 0 : hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
4089 0 : hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
4090 0 : hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
4091 :
4092 0 : hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
4093 0 : if (INTEL_INFO(dev)->gen >= 7) {
4094 0 : hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
4095 0 : hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
4096 0 : }
4097 :
4098 0 : if (IS_HASWELL(dev) || IS_BROADWELL(dev))
4099 0 : hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
4100 : INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
4101 0 : else if (IS_IVYBRIDGE(dev))
4102 0 : hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ?
4103 : INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
4104 :
4105 0 : hw->enable_fbc_wm =
4106 0 : !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
4107 0 : }
4108 :
4109 : /**
4110 : * intel_update_watermarks - update FIFO watermark values based on current modes
4111 : *
4112 : * Calculate watermark values for the various WM regs based on current mode
4113 : * and plane configuration.
4114 : *
4115 : * There are several cases to deal with here:
4116 : * - normal (i.e. non-self-refresh)
4117 : * - self-refresh (SR) mode
4118 : * - lines are large relative to FIFO size (buffer can hold up to 2)
4119 : * - lines are small relative to FIFO size (buffer can hold more than 2
4120 : * lines), so need to account for TLB latency
4121 : *
4122 : * The normal calculation is:
4123 : * watermark = dotclock * bytes per pixel * latency
4124 : * where latency is platform & configuration dependent (we assume pessimal
4125 : * values here).
4126 : *
4127 : * The SR calculation is:
4128 : * watermark = (trunc(latency/line time)+1) * surface width *
4129 : * bytes per pixel
4130 : * where
4131 : * line time = htotal / dotclock
4132 : * surface width = hdisplay for normal plane and 64 for cursor
4133 : * and latency is assumed to be high, as above.
4134 : *
4135 : * The final value programmed to the register should always be rounded up,
4136 : * and include an extra 2 entries to account for clock crossings.
4137 : *
4138 : * We don't use the sprite, so we can ignore that. And on Crestline we have
4139 : * to set the non-SR watermarks to 8.
4140 : */
4141 0 : void intel_update_watermarks(struct drm_crtc *crtc)
4142 : {
4143 0 : struct drm_i915_private *dev_priv = crtc->dev->dev_private;
4144 :
4145 0 : if (dev_priv->display.update_wm)
4146 0 : dev_priv->display.update_wm(crtc);
4147 0 : }
4148 :
4149 0 : void intel_update_sprite_watermarks(struct drm_plane *plane,
4150 : struct drm_crtc *crtc,
4151 : uint32_t sprite_width,
4152 : uint32_t sprite_height,
4153 : int pixel_size,
4154 : bool enabled, bool scaled)
4155 : {
4156 0 : struct drm_i915_private *dev_priv = plane->dev->dev_private;
4157 :
4158 0 : if (dev_priv->display.update_sprite_wm)
4159 0 : dev_priv->display.update_sprite_wm(plane, crtc,
4160 : sprite_width, sprite_height,
4161 : pixel_size, enabled, scaled);
4162 0 : }
4163 :
4164 : /**
4165 : * Lock protecting IPS related data structures
4166 : */
4167 : DEFINE_SPINLOCK(mchdev_lock);
4168 :
4169 : /* Global for IPS driver to get at the current i915 device. Protected by
4170 : * mchdev_lock. */
4171 : static struct drm_i915_private *i915_mch_dev;
4172 :
4173 0 : bool ironlake_set_drps(struct drm_device *dev, u8 val)
4174 : {
4175 0 : struct drm_i915_private *dev_priv = dev->dev_private;
4176 : u16 rgvswctl;
4177 :
4178 0 : assert_spin_locked(&mchdev_lock);
4179 :
4180 0 : rgvswctl = I915_READ16(MEMSWCTL);
4181 0 : if (rgvswctl & MEMCTL_CMD_STS) {
4182 : DRM_DEBUG("gpu busy, RCS change rejected\n");
4183 0 : return false; /* still busy with another command */
4184 : }
4185 :
4186 0 : rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
4187 0 : (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
4188 0 : I915_WRITE16(MEMSWCTL, rgvswctl);
4189 0 : POSTING_READ16(MEMSWCTL);
4190 :
4191 0 : rgvswctl |= MEMCTL_CMD_STS;
4192 0 : I915_WRITE16(MEMSWCTL, rgvswctl);
4193 :
4194 0 : return true;
4195 0 : }
4196 :
4197 0 : static void ironlake_enable_drps(struct drm_device *dev)
4198 : {
4199 0 : struct drm_i915_private *dev_priv = dev->dev_private;
4200 0 : u32 rgvmodectl = I915_READ(MEMMODECTL);
4201 : u8 fmax, fmin, fstart, vstart;
4202 :
4203 0 : spin_lock_irq(&mchdev_lock);
4204 :
4205 : /* Enable temp reporting */
4206 0 : I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
4207 0 : I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
4208 :
4209 : /* 100ms RC evaluation intervals */
4210 0 : I915_WRITE(RCUPEI, 100000);
4211 0 : I915_WRITE(RCDNEI, 100000);
4212 :
4213 : /* Set max/min thresholds to 90ms and 80ms respectively */
4214 0 : I915_WRITE(RCBMAXAVG, 90000);
4215 0 : I915_WRITE(RCBMINAVG, 80000);
4216 :
4217 0 : I915_WRITE(MEMIHYST, 1);
4218 :
4219 : /* Set up min, max, and cur for interrupt handling */
4220 0 : fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
4221 0 : fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
4222 0 : fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
4223 : MEMMODE_FSTART_SHIFT;
4224 :
4225 0 : vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
4226 : PXVFREQ_PX_SHIFT;
4227 :
4228 0 : dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
4229 0 : dev_priv->ips.fstart = fstart;
4230 :
4231 0 : dev_priv->ips.max_delay = fstart;
4232 0 : dev_priv->ips.min_delay = fmin;
4233 0 : dev_priv->ips.cur_delay = fstart;
4234 :
4235 : DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
4236 : fmax, fmin, fstart);
4237 :
4238 0 : I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
4239 :
4240 : /*
4241 : * Interrupts will be enabled in ironlake_irq_postinstall
4242 : */
4243 :
4244 0 : I915_WRITE(VIDSTART, vstart);
4245 0 : POSTING_READ(VIDSTART);
4246 :
4247 0 : rgvmodectl |= MEMMODE_SWMODE_EN;
4248 0 : I915_WRITE(MEMMODECTL, rgvmodectl);
4249 :
4250 0 : if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
4251 0 : DRM_ERROR("stuck trying to change perf mode\n");
4252 0 : mdelay(1);
4253 :
4254 0 : ironlake_set_drps(dev, fstart);
4255 :
4256 0 : dev_priv->ips.last_count1 = I915_READ(DMIEC) +
4257 0 : I915_READ(DDREC) + I915_READ(CSIEC);
4258 0 : dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
4259 0 : dev_priv->ips.last_count2 = I915_READ(GFXEC);
4260 0 : dev_priv->ips.last_time2 = ktime_get_raw_ns();
4261 :
4262 0 : spin_unlock_irq(&mchdev_lock);
4263 0 : }
4264 :
4265 0 : static void ironlake_disable_drps(struct drm_device *dev)
4266 : {
4267 0 : struct drm_i915_private *dev_priv = dev->dev_private;
4268 : u16 rgvswctl;
4269 :
4270 0 : spin_lock_irq(&mchdev_lock);
4271 :
4272 0 : rgvswctl = I915_READ16(MEMSWCTL);
4273 :
4274 : /* Ack interrupts, disable EFC interrupt */
4275 0 : I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
4276 0 : I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
4277 0 : I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
4278 0 : I915_WRITE(DEIIR, DE_PCU_EVENT);
4279 0 : I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
4280 :
4281 : /* Go back to the starting frequency */
4282 0 : ironlake_set_drps(dev, dev_priv->ips.fstart);
4283 0 : mdelay(1);
4284 0 : rgvswctl |= MEMCTL_CMD_STS;
4285 0 : I915_WRITE(MEMSWCTL, rgvswctl);
4286 0 : mdelay(1);
4287 :
4288 0 : spin_unlock_irq(&mchdev_lock);
4289 0 : }
4290 :
4291 : /* There's a funny hw issue where the hw returns all 0 when reading from
4292 : * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
4293 : * ourselves, instead of doing a rmw cycle (which might result in us clearing
4294 : * all limits and the gpu stuck at whatever frequency it is at atm).
4295 : */
4296 0 : static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
4297 : {
4298 : u32 limits;
4299 :
4300 : /* Only set the down limit when we've reached the lowest level to avoid
4301 : * getting more interrupts, otherwise leave this clear. This prevents a
4302 : * race in the hw when coming out of rc6: There's a tiny window where
4303 : * the hw runs at the minimal clock before selecting the desired
4304 : * frequency, if the down threshold expires in that window we will not
4305 : * receive a down interrupt. */
4306 0 : if (IS_GEN9(dev_priv->dev)) {
4307 0 : limits = (dev_priv->rps.max_freq_softlimit) << 23;
4308 0 : if (val <= dev_priv->rps.min_freq_softlimit)
4309 0 : limits |= (dev_priv->rps.min_freq_softlimit) << 14;
4310 : } else {
4311 0 : limits = dev_priv->rps.max_freq_softlimit << 24;
4312 0 : if (val <= dev_priv->rps.min_freq_softlimit)
4313 0 : limits |= dev_priv->rps.min_freq_softlimit << 16;
4314 : }
4315 :
4316 0 : return limits;
4317 : }
4318 :
4319 0 : static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
4320 : {
4321 : int new_power;
4322 : u32 threshold_up = 0, threshold_down = 0; /* in % */
4323 : u32 ei_up = 0, ei_down = 0;
4324 :
4325 0 : new_power = dev_priv->rps.power;
4326 0 : switch (dev_priv->rps.power) {
4327 : case LOW_POWER:
4328 0 : if (val > dev_priv->rps.efficient_freq + 1 && val > dev_priv->rps.cur_freq)
4329 0 : new_power = BETWEEN;
4330 : break;
4331 :
4332 : case BETWEEN:
4333 0 : if (val <= dev_priv->rps.efficient_freq && val < dev_priv->rps.cur_freq)
4334 0 : new_power = LOW_POWER;
4335 0 : else if (val >= dev_priv->rps.rp0_freq && val > dev_priv->rps.cur_freq)
4336 0 : new_power = HIGH_POWER;
4337 : break;
4338 :
4339 : case HIGH_POWER:
4340 0 : if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && val < dev_priv->rps.cur_freq)
4341 0 : new_power = BETWEEN;
4342 : break;
4343 : }
4344 : /* Max/min bins are special */
4345 0 : if (val <= dev_priv->rps.min_freq_softlimit)
4346 0 : new_power = LOW_POWER;
4347 0 : if (val >= dev_priv->rps.max_freq_softlimit)
4348 0 : new_power = HIGH_POWER;
4349 0 : if (new_power == dev_priv->rps.power)
4350 0 : return;
4351 :
4352 : /* Note the units here are not exactly 1us, but 1280ns. */
4353 0 : switch (new_power) {
4354 : case LOW_POWER:
4355 : /* Upclock if more than 95% busy over 16ms */
4356 : ei_up = 16000;
4357 : threshold_up = 95;
4358 :
4359 : /* Downclock if less than 85% busy over 32ms */
4360 : ei_down = 32000;
4361 : threshold_down = 85;
4362 0 : break;
4363 :
4364 : case BETWEEN:
4365 : /* Upclock if more than 90% busy over 13ms */
4366 : ei_up = 13000;
4367 : threshold_up = 90;
4368 :
4369 : /* Downclock if less than 75% busy over 32ms */
4370 : ei_down = 32000;
4371 : threshold_down = 75;
4372 0 : break;
4373 :
4374 : case HIGH_POWER:
4375 : /* Upclock if more than 85% busy over 10ms */
4376 : ei_up = 10000;
4377 : threshold_up = 85;
4378 :
4379 : /* Downclock if less than 60% busy over 32ms */
4380 : ei_down = 32000;
4381 : threshold_down = 60;
4382 0 : break;
4383 : }
4384 :
4385 : /* When byt can survive without system hang with dynamic
4386 : * sw freq adjustments, this restriction can be lifted.
4387 : */
4388 0 : if (IS_VALLEYVIEW(dev_priv))
4389 : goto skip_hw_write;
4390 :
4391 0 : I915_WRITE(GEN6_RP_UP_EI,
4392 : GT_INTERVAL_FROM_US(dev_priv, ei_up));
4393 0 : I915_WRITE(GEN6_RP_UP_THRESHOLD,
4394 : GT_INTERVAL_FROM_US(dev_priv, (ei_up * threshold_up / 100)));
4395 :
4396 0 : I915_WRITE(GEN6_RP_DOWN_EI,
4397 : GT_INTERVAL_FROM_US(dev_priv, ei_down));
4398 0 : I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
4399 : GT_INTERVAL_FROM_US(dev_priv, (ei_down * threshold_down / 100)));
4400 :
4401 0 : I915_WRITE(GEN6_RP_CONTROL,
4402 : GEN6_RP_MEDIA_TURBO |
4403 : GEN6_RP_MEDIA_HW_NORMAL_MODE |
4404 : GEN6_RP_MEDIA_IS_GFX |
4405 : GEN6_RP_ENABLE |
4406 : GEN6_RP_UP_BUSY_AVG |
4407 : GEN6_RP_DOWN_IDLE_AVG);
4408 :
4409 : skip_hw_write:
4410 0 : dev_priv->rps.power = new_power;
4411 0 : dev_priv->rps.up_threshold = threshold_up;
4412 0 : dev_priv->rps.down_threshold = threshold_down;
4413 0 : dev_priv->rps.last_adj = 0;
4414 0 : }
4415 :
4416 0 : static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
4417 : {
4418 : u32 mask = 0;
4419 :
4420 : /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
4421 0 : if (val > dev_priv->rps.min_freq_softlimit)
4422 0 : mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
4423 0 : if (val < dev_priv->rps.max_freq_softlimit)
4424 0 : mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
4425 :
4426 0 : mask &= dev_priv->pm_rps_events;
4427 :
4428 0 : return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
4429 : }
4430 :
4431 : /* gen6_set_rps is called to update the frequency request, but should also be
4432 : * called when the range (min_delay and max_delay) is modified so that we can
4433 : * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
4434 0 : static void gen6_set_rps(struct drm_device *dev, u8 val)
4435 : {
4436 0 : struct drm_i915_private *dev_priv = dev->dev_private;
4437 :
4438 : /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */
4439 0 : if (IS_BXT_REVID(dev, 0, BXT_REVID_A1))
4440 0 : return;
4441 :
4442 0 : WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4443 0 : WARN_ON(val > dev_priv->rps.max_freq);
4444 0 : WARN_ON(val < dev_priv->rps.min_freq);
4445 :
4446 : /* min/max delay may still have been modified so be sure to
4447 : * write the limits value.
4448 : */
4449 0 : if (val != dev_priv->rps.cur_freq) {
4450 0 : gen6_set_rps_thresholds(dev_priv, val);
4451 :
4452 0 : if (IS_GEN9(dev))
4453 0 : I915_WRITE(GEN6_RPNSWREQ,
4454 : GEN9_FREQUENCY(val));
4455 0 : else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
4456 0 : I915_WRITE(GEN6_RPNSWREQ,
4457 : HSW_FREQUENCY(val));
4458 : else
4459 0 : I915_WRITE(GEN6_RPNSWREQ,
4460 : GEN6_FREQUENCY(val) |
4461 : GEN6_OFFSET(0) |
4462 : GEN6_AGGRESSIVE_TURBO);
4463 : }
4464 :
4465 : /* Make sure we continue to get interrupts
4466 : * until we hit the minimum or maximum frequencies.
4467 : */
4468 0 : I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
4469 0 : I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
4470 :
4471 0 : POSTING_READ(GEN6_RPNSWREQ);
4472 :
4473 0 : dev_priv->rps.cur_freq = val;
4474 0 : trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
4475 0 : }
4476 :
4477 0 : static void valleyview_set_rps(struct drm_device *dev, u8 val)
4478 : {
4479 0 : struct drm_i915_private *dev_priv = dev->dev_private;
4480 :
4481 0 : WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4482 0 : WARN_ON(val > dev_priv->rps.max_freq);
4483 0 : WARN_ON(val < dev_priv->rps.min_freq);
4484 :
4485 0 : if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1),
4486 : "Odd GPU freq value\n"))
4487 0 : val &= ~1;
4488 :
4489 0 : I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
4490 :
4491 0 : if (val != dev_priv->rps.cur_freq) {
4492 0 : vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
4493 0 : if (!IS_CHERRYVIEW(dev_priv))
4494 0 : gen6_set_rps_thresholds(dev_priv, val);
4495 : }
4496 :
4497 0 : dev_priv->rps.cur_freq = val;
4498 0 : trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
4499 0 : }
4500 :
4501 : /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
4502 : *
4503 : * * If Gfx is Idle, then
4504 : * 1. Forcewake Media well.
4505 : * 2. Request idle freq.
4506 : * 3. Release Forcewake of Media well.
4507 : */
4508 0 : static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
4509 : {
4510 0 : u32 val = dev_priv->rps.idle_freq;
4511 :
4512 0 : if (dev_priv->rps.cur_freq <= val)
4513 0 : return;
4514 :
4515 : /* Wake up the media well, as that takes a lot less
4516 : * power than the Render well. */
4517 0 : intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
4518 0 : valleyview_set_rps(dev_priv->dev, val);
4519 0 : intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
4520 0 : }
4521 :
4522 0 : void gen6_rps_busy(struct drm_i915_private *dev_priv)
4523 : {
4524 0 : mutex_lock(&dev_priv->rps.hw_lock);
4525 0 : if (dev_priv->rps.enabled) {
4526 0 : if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
4527 0 : gen6_rps_reset_ei(dev_priv);
4528 0 : I915_WRITE(GEN6_PMINTRMSK,
4529 : gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
4530 0 : }
4531 0 : mutex_unlock(&dev_priv->rps.hw_lock);
4532 0 : }
4533 :
4534 0 : void gen6_rps_idle(struct drm_i915_private *dev_priv)
4535 : {
4536 0 : struct drm_device *dev = dev_priv->dev;
4537 :
4538 0 : mutex_lock(&dev_priv->rps.hw_lock);
4539 0 : if (dev_priv->rps.enabled) {
4540 0 : if (IS_VALLEYVIEW(dev))
4541 0 : vlv_set_rps_idle(dev_priv);
4542 : else
4543 0 : gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
4544 0 : dev_priv->rps.last_adj = 0;
4545 0 : I915_WRITE(GEN6_PMINTRMSK,
4546 : gen6_sanitize_rps_pm_mask(dev_priv, ~0));
4547 0 : }
4548 0 : mutex_unlock(&dev_priv->rps.hw_lock);
4549 :
4550 0 : spin_lock(&dev_priv->rps.client_lock);
4551 0 : while (!list_empty(&dev_priv->rps.clients))
4552 0 : list_del_init(dev_priv->rps.clients.next);
4553 0 : spin_unlock(&dev_priv->rps.client_lock);
4554 0 : }
4555 :
4556 0 : void gen6_rps_boost(struct drm_i915_private *dev_priv,
4557 : struct intel_rps_client *rps,
4558 : unsigned long submitted)
4559 : {
4560 : /* This is intentionally racy! We peek at the state here, then
4561 : * validate inside the RPS worker.
4562 : */
4563 0 : if (!(dev_priv->mm.busy &&
4564 0 : dev_priv->rps.enabled &&
4565 0 : dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit))
4566 : return;
4567 :
4568 : /* Force a RPS boost (and don't count it against the client) if
4569 : * the GPU is severely congested.
4570 : */
4571 0 : if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES))
4572 0 : rps = NULL;
4573 :
4574 0 : spin_lock(&dev_priv->rps.client_lock);
4575 0 : if (rps == NULL || list_empty(&rps->link)) {
4576 0 : spin_lock_irq(&dev_priv->irq_lock);
4577 0 : if (dev_priv->rps.interrupts_enabled) {
4578 0 : dev_priv->rps.client_boost = true;
4579 0 : queue_work(dev_priv->wq, &dev_priv->rps.work);
4580 0 : }
4581 0 : spin_unlock_irq(&dev_priv->irq_lock);
4582 :
4583 0 : if (rps != NULL) {
4584 0 : list_add(&rps->link, &dev_priv->rps.clients);
4585 0 : rps->boosts++;
4586 0 : } else
4587 0 : dev_priv->rps.boosts++;
4588 : }
4589 0 : spin_unlock(&dev_priv->rps.client_lock);
4590 0 : }
4591 :
4592 0 : void intel_set_rps(struct drm_device *dev, u8 val)
4593 : {
4594 0 : if (IS_VALLEYVIEW(dev))
4595 0 : valleyview_set_rps(dev, val);
4596 : else
4597 0 : gen6_set_rps(dev, val);
4598 0 : }
4599 :
4600 0 : static void gen9_disable_rps(struct drm_device *dev)
4601 : {
4602 0 : struct drm_i915_private *dev_priv = dev->dev_private;
4603 :
4604 0 : I915_WRITE(GEN6_RC_CONTROL, 0);
4605 0 : I915_WRITE(GEN9_PG_ENABLE, 0);
4606 0 : }
4607 :
4608 0 : static void gen6_disable_rps(struct drm_device *dev)
4609 : {
4610 0 : struct drm_i915_private *dev_priv = dev->dev_private;
4611 :
4612 0 : I915_WRITE(GEN6_RC_CONTROL, 0);
4613 0 : I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
4614 0 : }
4615 :
4616 0 : static void cherryview_disable_rps(struct drm_device *dev)
4617 : {
4618 0 : struct drm_i915_private *dev_priv = dev->dev_private;
4619 :
4620 0 : I915_WRITE(GEN6_RC_CONTROL, 0);
4621 0 : }
4622 :
4623 0 : static void valleyview_disable_rps(struct drm_device *dev)
4624 : {
4625 0 : struct drm_i915_private *dev_priv = dev->dev_private;
4626 :
4627 : /* we're doing forcewake before Disabling RC6,
4628 : * This what the BIOS expects when going into suspend */
4629 0 : intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4630 :
4631 0 : I915_WRITE(GEN6_RC_CONTROL, 0);
4632 :
4633 0 : intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4634 0 : }
4635 :
4636 0 : static void intel_print_rc6_info(struct drm_device *dev, u32 mode)
4637 : {
4638 0 : if (IS_VALLEYVIEW(dev)) {
4639 0 : if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1)))
4640 0 : mode = GEN6_RC_CTL_RC6_ENABLE;
4641 : else
4642 : mode = 0;
4643 : }
4644 0 : if (HAS_RC6p(dev))
4645 : DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s RC6p %s RC6pp %s\n",
4646 : (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off",
4647 : (mode & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off",
4648 : (mode & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off");
4649 :
4650 : else
4651 : DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s\n",
4652 : (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off");
4653 0 : }
4654 :
4655 0 : static int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6)
4656 : {
4657 : /* No RC6 before Ironlake and code is gone for ilk. */
4658 0 : if (INTEL_INFO(dev)->gen < 6)
4659 0 : return 0;
4660 :
4661 : /* Respect the kernel parameter if it is set */
4662 0 : if (enable_rc6 >= 0) {
4663 : int mask;
4664 :
4665 0 : if (HAS_RC6p(dev))
4666 0 : mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE |
4667 : INTEL_RC6pp_ENABLE;
4668 : else
4669 : mask = INTEL_RC6_ENABLE;
4670 :
4671 0 : if ((enable_rc6 & mask) != enable_rc6)
4672 : DRM_DEBUG_KMS("Adjusting RC6 mask to %d (requested %d, valid %d)\n",
4673 : enable_rc6 & mask, enable_rc6, mask);
4674 :
4675 : return enable_rc6 & mask;
4676 : }
4677 :
4678 0 : if (IS_IVYBRIDGE(dev))
4679 0 : return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
4680 :
4681 0 : return INTEL_RC6_ENABLE;
4682 0 : }
4683 :
4684 0 : int intel_enable_rc6(const struct drm_device *dev)
4685 : {
4686 0 : return i915.enable_rc6;
4687 : }
4688 :
4689 0 : static void gen6_init_rps_frequencies(struct drm_device *dev)
4690 : {
4691 0 : struct drm_i915_private *dev_priv = dev->dev_private;
4692 : uint32_t rp_state_cap;
4693 0 : u32 ddcc_status = 0;
4694 : int ret;
4695 :
4696 : /* All of these values are in units of 50MHz */
4697 0 : dev_priv->rps.cur_freq = 0;
4698 : /* static values from HW: RP0 > RP1 > RPn (min_freq) */
4699 0 : if (IS_BROXTON(dev)) {
4700 0 : rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
4701 0 : dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff;
4702 0 : dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff;
4703 0 : dev_priv->rps.min_freq = (rp_state_cap >> 0) & 0xff;
4704 0 : } else {
4705 0 : rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
4706 0 : dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff;
4707 0 : dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff;
4708 0 : dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff;
4709 : }
4710 :
4711 : /* hw_max = RP0 until we check for overclocking */
4712 0 : dev_priv->rps.max_freq = dev_priv->rps.rp0_freq;
4713 :
4714 0 : dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq;
4715 0 : if (IS_HASWELL(dev) || IS_BROADWELL(dev) ||
4716 0 : IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
4717 0 : ret = sandybridge_pcode_read(dev_priv,
4718 : HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
4719 : &ddcc_status);
4720 0 : if (0 == ret)
4721 0 : dev_priv->rps.efficient_freq =
4722 0 : clamp_t(u8,
4723 : ((ddcc_status >> 8) & 0xff),
4724 : dev_priv->rps.min_freq,
4725 : dev_priv->rps.max_freq);
4726 : }
4727 :
4728 0 : if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
4729 : /* Store the frequency values in 16.66 MHZ units, which is
4730 : the natural hardware unit for SKL */
4731 0 : dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER;
4732 0 : dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER;
4733 0 : dev_priv->rps.min_freq *= GEN9_FREQ_SCALER;
4734 0 : dev_priv->rps.max_freq *= GEN9_FREQ_SCALER;
4735 0 : dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER;
4736 0 : }
4737 :
4738 0 : dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
4739 :
4740 : /* Preserve min/max settings in case of re-init */
4741 0 : if (dev_priv->rps.max_freq_softlimit == 0)
4742 0 : dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
4743 :
4744 0 : if (dev_priv->rps.min_freq_softlimit == 0) {
4745 0 : if (IS_HASWELL(dev) || IS_BROADWELL(dev))
4746 0 : dev_priv->rps.min_freq_softlimit =
4747 0 : max_t(int, dev_priv->rps.efficient_freq,
4748 : intel_freq_opcode(dev_priv, 450));
4749 : else
4750 0 : dev_priv->rps.min_freq_softlimit =
4751 0 : dev_priv->rps.min_freq;
4752 : }
4753 0 : }
4754 :
4755 : /* See the Gen9_GT_PM_Programming_Guide doc for the below */
4756 0 : static void gen9_enable_rps(struct drm_device *dev)
4757 : {
4758 0 : struct drm_i915_private *dev_priv = dev->dev_private;
4759 :
4760 0 : intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4761 :
4762 0 : gen6_init_rps_frequencies(dev);
4763 :
4764 : /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */
4765 0 : if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
4766 0 : intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4767 0 : return;
4768 : }
4769 :
4770 : /* Program defaults and thresholds for RPS*/
4771 0 : I915_WRITE(GEN6_RC_VIDEO_FREQ,
4772 : GEN9_FREQUENCY(dev_priv->rps.rp1_freq));
4773 :
4774 : /* 1 second timeout*/
4775 0 : I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
4776 : GT_INTERVAL_FROM_US(dev_priv, 1000000));
4777 :
4778 0 : I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
4779 :
4780 : /* Leaning on the below call to gen6_set_rps to program/setup the
4781 : * Up/Down EI & threshold registers, as well as the RP_CONTROL,
4782 : * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
4783 0 : dev_priv->rps.power = HIGH_POWER; /* force a reset */
4784 0 : gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
4785 :
4786 0 : intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4787 0 : }
4788 :
4789 0 : static void gen9_enable_rc6(struct drm_device *dev)
4790 : {
4791 0 : struct drm_i915_private *dev_priv = dev->dev_private;
4792 : struct intel_engine_cs *ring;
4793 : uint32_t rc6_mask = 0;
4794 : int unused;
4795 :
4796 : /* 1a: Software RC state - RC0 */
4797 0 : I915_WRITE(GEN6_RC_STATE, 0);
4798 :
4799 : /* 1b: Get forcewake during program sequence. Although the driver
4800 : * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
4801 0 : intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4802 :
4803 : /* 2a: Disable RC states. */
4804 0 : I915_WRITE(GEN6_RC_CONTROL, 0);
4805 :
4806 : /* 2b: Program RC6 thresholds.*/
4807 :
4808 : /* WaRsDoubleRc6WrlWithCoarsePowerGating: Doubling WRL only when CPG is enabled */
4809 0 : if (IS_SKYLAKE(dev))
4810 0 : I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
4811 : else
4812 0 : I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
4813 0 : I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
4814 0 : I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
4815 0 : for_each_ring(ring, dev_priv, unused)
4816 0 : I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4817 :
4818 0 : if (HAS_GUC_UCODE(dev))
4819 0 : I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
4820 :
4821 0 : I915_WRITE(GEN6_RC_SLEEP, 0);
4822 :
4823 : /* 2c: Program Coarse Power Gating Policies. */
4824 0 : I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 25);
4825 0 : I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25);
4826 :
4827 : /* 3a: Enable RC6 */
4828 0 : if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
4829 0 : rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
4830 : DRM_INFO("RC6 %s\n", (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
4831 : "on" : "off");
4832 : /* WaRsUseTimeoutMode */
4833 0 : if (IS_SKL_REVID(dev, 0, SKL_REVID_D0) ||
4834 0 : IS_BXT_REVID(dev, 0, BXT_REVID_A0)) {
4835 0 : I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us */
4836 0 : I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4837 : GEN7_RC_CTL_TO_MODE |
4838 : rc6_mask);
4839 0 : } else {
4840 0 : I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
4841 0 : I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4842 : GEN6_RC_CTL_EI_MODE(1) |
4843 : rc6_mask);
4844 : }
4845 :
4846 : /*
4847 : * 3b: Enable Coarse Power Gating only when RC6 is enabled.
4848 : * WaRsDisableCoarsePowerGating:skl,bxt - Render/Media PG need to be disabled with RC6.
4849 : */
4850 0 : if (IS_BXT_REVID(dev, 0, BXT_REVID_A1) ||
4851 0 : ((IS_SKL_GT3(dev) || IS_SKL_GT4(dev)) &&
4852 0 : IS_SKL_REVID(dev, 0, SKL_REVID_E0)))
4853 0 : I915_WRITE(GEN9_PG_ENABLE, 0);
4854 : else
4855 0 : I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
4856 : (GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE) : 0);
4857 :
4858 0 : intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4859 :
4860 0 : }
4861 :
4862 0 : static void gen8_enable_rps(struct drm_device *dev)
4863 : {
4864 0 : struct drm_i915_private *dev_priv = dev->dev_private;
4865 : struct intel_engine_cs *ring;
4866 : uint32_t rc6_mask = 0;
4867 : int unused;
4868 :
4869 : /* 1a: Software RC state - RC0 */
4870 0 : I915_WRITE(GEN6_RC_STATE, 0);
4871 :
4872 : /* 1c & 1d: Get forcewake during program sequence. Although the driver
4873 : * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
4874 0 : intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4875 :
4876 : /* 2a: Disable RC states. */
4877 0 : I915_WRITE(GEN6_RC_CONTROL, 0);
4878 :
4879 : /* Initialize rps frequencies */
4880 0 : gen6_init_rps_frequencies(dev);
4881 :
4882 : /* 2b: Program RC6 thresholds.*/
4883 0 : I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
4884 0 : I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
4885 0 : I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
4886 0 : for_each_ring(ring, dev_priv, unused)
4887 0 : I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4888 0 : I915_WRITE(GEN6_RC_SLEEP, 0);
4889 0 : if (IS_BROADWELL(dev))
4890 0 : I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
4891 : else
4892 0 : I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
4893 :
4894 : /* 3: Enable RC6 */
4895 0 : if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
4896 0 : rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
4897 0 : intel_print_rc6_info(dev, rc6_mask);
4898 0 : if (IS_BROADWELL(dev))
4899 0 : I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4900 : GEN7_RC_CTL_TO_MODE |
4901 : rc6_mask);
4902 : else
4903 0 : I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4904 : GEN6_RC_CTL_EI_MODE(1) |
4905 : rc6_mask);
4906 :
4907 : /* 4 Program defaults and thresholds for RPS*/
4908 0 : I915_WRITE(GEN6_RPNSWREQ,
4909 : HSW_FREQUENCY(dev_priv->rps.rp1_freq));
4910 0 : I915_WRITE(GEN6_RC_VIDEO_FREQ,
4911 : HSW_FREQUENCY(dev_priv->rps.rp1_freq));
4912 : /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
4913 0 : I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
4914 :
4915 : /* Docs recommend 900MHz, and 300 MHz respectively */
4916 0 : I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
4917 : dev_priv->rps.max_freq_softlimit << 24 |
4918 : dev_priv->rps.min_freq_softlimit << 16);
4919 :
4920 0 : I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
4921 0 : I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
4922 0 : I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
4923 0 : I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
4924 :
4925 0 : I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
4926 :
4927 : /* 5: Enable RPS */
4928 0 : I915_WRITE(GEN6_RP_CONTROL,
4929 : GEN6_RP_MEDIA_TURBO |
4930 : GEN6_RP_MEDIA_HW_NORMAL_MODE |
4931 : GEN6_RP_MEDIA_IS_GFX |
4932 : GEN6_RP_ENABLE |
4933 : GEN6_RP_UP_BUSY_AVG |
4934 : GEN6_RP_DOWN_IDLE_AVG);
4935 :
4936 : /* 6: Ring frequency + overclocking (our driver does this later */
4937 :
4938 0 : dev_priv->rps.power = HIGH_POWER; /* force a reset */
4939 0 : gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
4940 :
4941 0 : intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4942 0 : }
4943 :
4944 0 : static void gen6_enable_rps(struct drm_device *dev)
4945 : {
4946 0 : struct drm_i915_private *dev_priv = dev->dev_private;
4947 : struct intel_engine_cs *ring;
4948 0 : u32 rc6vids, pcu_mbox = 0, rc6_mask = 0;
4949 : u32 gtfifodbg;
4950 : int rc6_mode;
4951 : int i, ret;
4952 :
4953 0 : WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4954 :
4955 : /* Here begins a magic sequence of register writes to enable
4956 : * auto-downclocking.
4957 : *
4958 : * Perhaps there might be some value in exposing these to
4959 : * userspace...
4960 : */
4961 0 : I915_WRITE(GEN6_RC_STATE, 0);
4962 :
4963 : /* Clear the DBG now so we don't confuse earlier errors */
4964 0 : if ((gtfifodbg = I915_READ(GTFIFODBG))) {
4965 0 : DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
4966 0 : I915_WRITE(GTFIFODBG, gtfifodbg);
4967 0 : }
4968 :
4969 0 : intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4970 :
4971 : /* Initialize rps frequencies */
4972 0 : gen6_init_rps_frequencies(dev);
4973 :
4974 : /* disable the counters and set deterministic thresholds */
4975 0 : I915_WRITE(GEN6_RC_CONTROL, 0);
4976 :
4977 0 : I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
4978 0 : I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
4979 0 : I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
4980 0 : I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
4981 0 : I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
4982 :
4983 0 : for_each_ring(ring, dev_priv, i)
4984 0 : I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4985 :
4986 0 : I915_WRITE(GEN6_RC_SLEEP, 0);
4987 0 : I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
4988 0 : if (IS_IVYBRIDGE(dev))
4989 0 : I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
4990 : else
4991 0 : I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
4992 0 : I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
4993 0 : I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
4994 :
4995 : /* Check if we are enabling RC6 */
4996 0 : rc6_mode = intel_enable_rc6(dev_priv->dev);
4997 0 : if (rc6_mode & INTEL_RC6_ENABLE)
4998 0 : rc6_mask |= GEN6_RC_CTL_RC6_ENABLE;
4999 :
5000 : /* We don't use those on Haswell */
5001 0 : if (!IS_HASWELL(dev)) {
5002 0 : if (rc6_mode & INTEL_RC6p_ENABLE)
5003 0 : rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
5004 :
5005 0 : if (rc6_mode & INTEL_RC6pp_ENABLE)
5006 0 : rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
5007 : }
5008 :
5009 0 : intel_print_rc6_info(dev, rc6_mask);
5010 :
5011 0 : I915_WRITE(GEN6_RC_CONTROL,
5012 : rc6_mask |
5013 : GEN6_RC_CTL_EI_MODE(1) |
5014 : GEN6_RC_CTL_HW_ENABLE);
5015 :
5016 : /* Power down if completely idle for over 50ms */
5017 0 : I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
5018 0 : I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
5019 :
5020 0 : ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0);
5021 : if (ret)
5022 : DRM_DEBUG_DRIVER("Failed to set the min frequency\n");
5023 :
5024 0 : ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox);
5025 0 : if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */
5026 : DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n",
5027 : (dev_priv->rps.max_freq_softlimit & 0xff) * 50,
5028 : (pcu_mbox & 0xff) * 50);
5029 0 : dev_priv->rps.max_freq = pcu_mbox & 0xff;
5030 0 : }
5031 :
5032 0 : dev_priv->rps.power = HIGH_POWER; /* force a reset */
5033 0 : gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
5034 :
5035 0 : rc6vids = 0;
5036 0 : ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
5037 0 : if (IS_GEN6(dev) && ret) {
5038 : DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
5039 0 : } else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
5040 : DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
5041 : GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
5042 0 : rc6vids &= 0xffff00;
5043 0 : rc6vids |= GEN6_ENCODE_RC6_VID(450);
5044 0 : ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
5045 0 : if (ret)
5046 0 : DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
5047 : }
5048 :
5049 0 : intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5050 0 : }
5051 :
5052 0 : static void __gen6_update_ring_freq(struct drm_device *dev)
5053 : {
5054 0 : struct drm_i915_private *dev_priv = dev->dev_private;
5055 : int min_freq = 15;
5056 : unsigned int gpu_freq;
5057 : unsigned int max_ia_freq, min_ring_freq;
5058 : unsigned int max_gpu_freq, min_gpu_freq;
5059 : int scaling_factor = 180;
5060 : #ifdef notyet
5061 : struct cpufreq_policy *policy;
5062 : #endif
5063 :
5064 0 : WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
5065 :
5066 : #ifdef notyet
5067 : policy = cpufreq_cpu_get(0);
5068 : if (policy) {
5069 : max_ia_freq = policy->cpuinfo.max_freq;
5070 : cpufreq_cpu_put(policy);
5071 : } else {
5072 : /*
5073 : * Default to measured freq if none found, PCU will ensure we
5074 : * don't go over
5075 : */
5076 : max_ia_freq = tsc_khz;
5077 : }
5078 : #else
5079 : /* XXX we ideally want the max not cpuspeed... */
5080 0 : max_ia_freq = cpuspeed;
5081 : #endif
5082 :
5083 : /* Convert from kHz to MHz */
5084 0 : max_ia_freq /= 1000;
5085 :
5086 0 : min_ring_freq = I915_READ(DCLK) & 0xf;
5087 : /* convert DDR frequency from units of 266.6MHz to bandwidth */
5088 0 : min_ring_freq = mult_frac(min_ring_freq, 8, 3);
5089 :
5090 0 : if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
5091 : /* Convert GT frequency to 50 HZ units */
5092 0 : min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER;
5093 0 : max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER;
5094 0 : } else {
5095 0 : min_gpu_freq = dev_priv->rps.min_freq;
5096 0 : max_gpu_freq = dev_priv->rps.max_freq;
5097 : }
5098 :
5099 : /*
5100 : * For each potential GPU frequency, load a ring frequency we'd like
5101 : * to use for memory access. We do this by specifying the IA frequency
5102 : * the PCU should use as a reference to determine the ring frequency.
5103 : */
5104 0 : for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
5105 0 : int diff = max_gpu_freq - gpu_freq;
5106 : unsigned int ia_freq = 0, ring_freq = 0;
5107 :
5108 0 : if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
5109 : /*
5110 : * ring_freq = 2 * GT. ring_freq is in 100MHz units
5111 : * No floor required for ring frequency on SKL.
5112 : */
5113 : ring_freq = gpu_freq;
5114 0 : } else if (INTEL_INFO(dev)->gen >= 8) {
5115 : /* max(2 * GT, DDR). NB: GT is 50MHz units */
5116 0 : ring_freq = max(min_ring_freq, gpu_freq);
5117 0 : } else if (IS_HASWELL(dev)) {
5118 0 : ring_freq = mult_frac(gpu_freq, 5, 4);
5119 0 : ring_freq = max(min_ring_freq, ring_freq);
5120 : /* leave ia_freq as the default, chosen by cpufreq */
5121 0 : } else {
5122 : /* On older processors, there is no separate ring
5123 : * clock domain, so in order to boost the bandwidth
5124 : * of the ring, we need to upclock the CPU (ia_freq).
5125 : *
5126 : * For GPU frequencies less than 750MHz,
5127 : * just use the lowest ring freq.
5128 : */
5129 0 : if (gpu_freq < min_freq)
5130 0 : ia_freq = 800;
5131 : else
5132 0 : ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
5133 0 : ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
5134 : }
5135 :
5136 0 : sandybridge_pcode_write(dev_priv,
5137 : GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
5138 0 : ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
5139 0 : ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
5140 : gpu_freq);
5141 : }
5142 0 : }
5143 :
5144 0 : void gen6_update_ring_freq(struct drm_device *dev)
5145 : {
5146 0 : struct drm_i915_private *dev_priv = dev->dev_private;
5147 :
5148 0 : if (!HAS_CORE_RING_FREQ(dev))
5149 0 : return;
5150 :
5151 0 : mutex_lock(&dev_priv->rps.hw_lock);
5152 0 : __gen6_update_ring_freq(dev);
5153 0 : mutex_unlock(&dev_priv->rps.hw_lock);
5154 0 : }
5155 :
5156 0 : static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
5157 : {
5158 0 : struct drm_device *dev = dev_priv->dev;
5159 : u32 val, rp0;
5160 :
5161 0 : val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
5162 :
5163 0 : switch (INTEL_INFO(dev)->eu_total) {
5164 : case 8:
5165 : /* (2 * 4) config */
5166 0 : rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
5167 0 : break;
5168 : case 12:
5169 : /* (2 * 6) config */
5170 0 : rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
5171 0 : break;
5172 : case 16:
5173 : /* (2 * 8) config */
5174 : default:
5175 : /* Setting (2 * 8) Min RP0 for any other combination */
5176 0 : rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
5177 0 : break;
5178 : }
5179 :
5180 0 : rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
5181 :
5182 0 : return rp0;
5183 : }
5184 :
5185 0 : static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
5186 : {
5187 : u32 val, rpe;
5188 :
5189 0 : val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
5190 0 : rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
5191 :
5192 0 : return rpe;
5193 : }
5194 :
5195 0 : static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
5196 : {
5197 : u32 val, rp1;
5198 :
5199 0 : val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
5200 0 : rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
5201 :
5202 0 : return rp1;
5203 : }
5204 :
5205 0 : static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
5206 : {
5207 : u32 val, rp1;
5208 :
5209 0 : val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
5210 :
5211 0 : rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
5212 :
5213 0 : return rp1;
5214 : }
5215 :
5216 0 : static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
5217 : {
5218 : u32 val, rp0;
5219 :
5220 0 : val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
5221 :
5222 0 : rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
5223 : /* Clamp to max */
5224 0 : rp0 = min_t(u32, rp0, 0xea);
5225 :
5226 0 : return rp0;
5227 : }
5228 :
5229 0 : static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
5230 : {
5231 : u32 val, rpe;
5232 :
5233 0 : val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
5234 0 : rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
5235 0 : val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
5236 0 : rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
5237 :
5238 0 : return rpe;
5239 : }
5240 :
5241 0 : static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
5242 : {
5243 0 : return vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
5244 : }
5245 :
5246 : /* Check that the pctx buffer wasn't move under us. */
5247 0 : static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
5248 : {
5249 0 : unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
5250 :
5251 0 : WARN_ON(pctx_addr != dev_priv->mm.stolen_base +
5252 : dev_priv->vlv_pctx->stolen->start);
5253 0 : }
5254 :
5255 :
5256 : /* Check that the pcbr address is not empty. */
5257 0 : static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
5258 : {
5259 0 : unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
5260 :
5261 0 : WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
5262 0 : }
5263 :
5264 0 : static void cherryview_setup_pctx(struct drm_device *dev)
5265 : {
5266 0 : struct drm_i915_private *dev_priv = dev->dev_private;
5267 : unsigned long pctx_paddr, paddr;
5268 0 : struct i915_gtt *gtt = &dev_priv->gtt;
5269 : u32 pcbr;
5270 : int pctx_size = 32*1024;
5271 :
5272 0 : WARN_ON(!mutex_is_locked(&dev->struct_mutex));
5273 :
5274 0 : pcbr = I915_READ(VLV_PCBR);
5275 0 : if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
5276 : DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
5277 0 : paddr = (dev_priv->mm.stolen_base +
5278 0 : (gtt->stolen_size - pctx_size));
5279 :
5280 0 : pctx_paddr = (paddr & (~4095));
5281 0 : I915_WRITE(VLV_PCBR, pctx_paddr);
5282 0 : }
5283 :
5284 : DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
5285 0 : }
5286 :
5287 0 : static void valleyview_setup_pctx(struct drm_device *dev)
5288 : {
5289 0 : struct drm_i915_private *dev_priv = dev->dev_private;
5290 : struct drm_i915_gem_object *pctx;
5291 : unsigned long pctx_paddr;
5292 : u32 pcbr;
5293 : int pctx_size = 24*1024;
5294 :
5295 0 : WARN_ON(!mutex_is_locked(&dev->struct_mutex));
5296 :
5297 0 : pcbr = I915_READ(VLV_PCBR);
5298 0 : if (pcbr) {
5299 : /* BIOS set it up already, grab the pre-alloc'd space */
5300 : int pcbr_offset;
5301 :
5302 0 : pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base;
5303 0 : pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv->dev,
5304 : pcbr_offset,
5305 : I915_GTT_OFFSET_NONE,
5306 : pctx_size);
5307 : goto out;
5308 : }
5309 :
5310 : DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
5311 :
5312 : /*
5313 : * From the Gunit register HAS:
5314 : * The Gfx driver is expected to program this register and ensure
5315 : * proper allocation within Gfx stolen memory. For example, this
5316 : * register should be programmed such than the PCBR range does not
5317 : * overlap with other ranges, such as the frame buffer, protected
5318 : * memory, or any other relevant ranges.
5319 : */
5320 0 : pctx = i915_gem_object_create_stolen(dev, pctx_size);
5321 0 : if (!pctx) {
5322 : DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
5323 0 : return;
5324 : }
5325 :
5326 0 : pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start;
5327 0 : I915_WRITE(VLV_PCBR, pctx_paddr);
5328 :
5329 : out:
5330 : DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
5331 0 : dev_priv->vlv_pctx = pctx;
5332 0 : }
5333 :
5334 0 : static void valleyview_cleanup_pctx(struct drm_device *dev)
5335 : {
5336 0 : struct drm_i915_private *dev_priv = dev->dev_private;
5337 :
5338 0 : if (WARN_ON(!dev_priv->vlv_pctx))
5339 0 : return;
5340 :
5341 0 : drm_gem_object_unreference(&dev_priv->vlv_pctx->base);
5342 0 : dev_priv->vlv_pctx = NULL;
5343 0 : }
5344 :
5345 0 : static void valleyview_init_gt_powersave(struct drm_device *dev)
5346 : {
5347 0 : struct drm_i915_private *dev_priv = dev->dev_private;
5348 : u32 val;
5349 :
5350 0 : valleyview_setup_pctx(dev);
5351 :
5352 0 : mutex_lock(&dev_priv->rps.hw_lock);
5353 :
5354 0 : val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
5355 0 : switch ((val >> 6) & 3) {
5356 : case 0:
5357 : case 1:
5358 0 : dev_priv->mem_freq = 800;
5359 0 : break;
5360 : case 2:
5361 0 : dev_priv->mem_freq = 1066;
5362 0 : break;
5363 : case 3:
5364 0 : dev_priv->mem_freq = 1333;
5365 0 : break;
5366 : }
5367 : DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
5368 :
5369 0 : dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv);
5370 0 : dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
5371 : DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
5372 : intel_gpu_freq(dev_priv, dev_priv->rps.max_freq),
5373 : dev_priv->rps.max_freq);
5374 :
5375 0 : dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv);
5376 : DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
5377 : intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
5378 : dev_priv->rps.efficient_freq);
5379 :
5380 0 : dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv);
5381 : DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
5382 : intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
5383 : dev_priv->rps.rp1_freq);
5384 :
5385 0 : dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv);
5386 : DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
5387 : intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
5388 : dev_priv->rps.min_freq);
5389 :
5390 0 : dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
5391 :
5392 : /* Preserve min/max settings in case of re-init */
5393 0 : if (dev_priv->rps.max_freq_softlimit == 0)
5394 0 : dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
5395 :
5396 0 : if (dev_priv->rps.min_freq_softlimit == 0)
5397 0 : dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
5398 :
5399 0 : mutex_unlock(&dev_priv->rps.hw_lock);
5400 0 : }
5401 :
5402 0 : static void cherryview_init_gt_powersave(struct drm_device *dev)
5403 : {
5404 0 : struct drm_i915_private *dev_priv = dev->dev_private;
5405 : u32 val;
5406 :
5407 0 : cherryview_setup_pctx(dev);
5408 :
5409 0 : mutex_lock(&dev_priv->rps.hw_lock);
5410 :
5411 0 : mutex_lock(&dev_priv->sb_lock);
5412 0 : val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
5413 0 : mutex_unlock(&dev_priv->sb_lock);
5414 :
5415 0 : switch ((val >> 2) & 0x7) {
5416 : case 3:
5417 0 : dev_priv->mem_freq = 2000;
5418 0 : break;
5419 : default:
5420 0 : dev_priv->mem_freq = 1600;
5421 0 : break;
5422 : }
5423 : DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
5424 :
5425 0 : dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv);
5426 0 : dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
5427 : DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
5428 : intel_gpu_freq(dev_priv, dev_priv->rps.max_freq),
5429 : dev_priv->rps.max_freq);
5430 :
5431 0 : dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv);
5432 : DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
5433 : intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
5434 : dev_priv->rps.efficient_freq);
5435 :
5436 0 : dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv);
5437 : DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
5438 : intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
5439 : dev_priv->rps.rp1_freq);
5440 :
5441 : /* PUnit validated range is only [RPe, RP0] */
5442 0 : dev_priv->rps.min_freq = dev_priv->rps.efficient_freq;
5443 : DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
5444 : intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
5445 : dev_priv->rps.min_freq);
5446 :
5447 0 : WARN_ONCE((dev_priv->rps.max_freq |
5448 : dev_priv->rps.efficient_freq |
5449 : dev_priv->rps.rp1_freq |
5450 : dev_priv->rps.min_freq) & 1,
5451 : "Odd GPU freq values\n");
5452 :
5453 0 : dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
5454 :
5455 : /* Preserve min/max settings in case of re-init */
5456 0 : if (dev_priv->rps.max_freq_softlimit == 0)
5457 0 : dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
5458 :
5459 0 : if (dev_priv->rps.min_freq_softlimit == 0)
5460 0 : dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
5461 :
5462 0 : mutex_unlock(&dev_priv->rps.hw_lock);
5463 0 : }
5464 :
5465 0 : static void valleyview_cleanup_gt_powersave(struct drm_device *dev)
5466 : {
5467 0 : valleyview_cleanup_pctx(dev);
5468 0 : }
5469 :
5470 0 : static void cherryview_enable_rps(struct drm_device *dev)
5471 : {
5472 0 : struct drm_i915_private *dev_priv = dev->dev_private;
5473 : struct intel_engine_cs *ring;
5474 : u32 gtfifodbg, val, rc6_mode = 0, pcbr;
5475 : int i;
5476 :
5477 0 : WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
5478 :
5479 0 : gtfifodbg = I915_READ(GTFIFODBG);
5480 0 : if (gtfifodbg) {
5481 : DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
5482 : gtfifodbg);
5483 0 : I915_WRITE(GTFIFODBG, gtfifodbg);
5484 0 : }
5485 :
5486 0 : cherryview_check_pctx(dev_priv);
5487 :
5488 : /* 1a & 1b: Get forcewake during program sequence. Although the driver
5489 : * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
5490 0 : intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5491 :
5492 : /* Disable RC states. */
5493 0 : I915_WRITE(GEN6_RC_CONTROL, 0);
5494 :
5495 : /* 2a: Program RC6 thresholds.*/
5496 0 : I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
5497 0 : I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
5498 0 : I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
5499 :
5500 0 : for_each_ring(ring, dev_priv, i)
5501 0 : I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
5502 0 : I915_WRITE(GEN6_RC_SLEEP, 0);
5503 :
5504 : /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
5505 0 : I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
5506 :
5507 : /* allows RC6 residency counter to work */
5508 0 : I915_WRITE(VLV_COUNTER_CONTROL,
5509 : _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
5510 : VLV_MEDIA_RC6_COUNT_EN |
5511 : VLV_RENDER_RC6_COUNT_EN));
5512 :
5513 : /* For now we assume BIOS is allocating and populating the PCBR */
5514 0 : pcbr = I915_READ(VLV_PCBR);
5515 :
5516 : /* 3: Enable RC6 */
5517 0 : if ((intel_enable_rc6(dev) & INTEL_RC6_ENABLE) &&
5518 0 : (pcbr >> VLV_PCBR_ADDR_SHIFT))
5519 0 : rc6_mode = GEN7_RC_CTL_TO_MODE;
5520 :
5521 0 : I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
5522 :
5523 : /* 4 Program defaults and thresholds for RPS*/
5524 0 : I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
5525 0 : I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
5526 0 : I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
5527 0 : I915_WRITE(GEN6_RP_UP_EI, 66000);
5528 0 : I915_WRITE(GEN6_RP_DOWN_EI, 350000);
5529 :
5530 0 : I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
5531 :
5532 : /* 5: Enable RPS */
5533 0 : I915_WRITE(GEN6_RP_CONTROL,
5534 : GEN6_RP_MEDIA_HW_NORMAL_MODE |
5535 : GEN6_RP_MEDIA_IS_GFX |
5536 : GEN6_RP_ENABLE |
5537 : GEN6_RP_UP_BUSY_AVG |
5538 : GEN6_RP_DOWN_IDLE_AVG);
5539 :
5540 : /* Setting Fixed Bias */
5541 : val = VLV_OVERRIDE_EN |
5542 : VLV_SOC_TDP_EN |
5543 : CHV_BIAS_CPU_50_SOC_50;
5544 0 : vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
5545 :
5546 0 : val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
5547 :
5548 : /* RPS code assumes GPLL is used */
5549 0 : WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
5550 :
5551 : DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
5552 : DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
5553 :
5554 0 : dev_priv->rps.cur_freq = (val >> 8) & 0xff;
5555 : DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
5556 : intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
5557 : dev_priv->rps.cur_freq);
5558 :
5559 : DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
5560 : intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
5561 : dev_priv->rps.efficient_freq);
5562 :
5563 0 : valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq);
5564 :
5565 0 : intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5566 0 : }
5567 :
5568 0 : static void valleyview_enable_rps(struct drm_device *dev)
5569 : {
5570 0 : struct drm_i915_private *dev_priv = dev->dev_private;
5571 : struct intel_engine_cs *ring;
5572 : u32 gtfifodbg, val, rc6_mode = 0;
5573 : int i;
5574 :
5575 0 : WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
5576 :
5577 0 : valleyview_check_pctx(dev_priv);
5578 :
5579 0 : if ((gtfifodbg = I915_READ(GTFIFODBG))) {
5580 : DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
5581 : gtfifodbg);
5582 0 : I915_WRITE(GTFIFODBG, gtfifodbg);
5583 0 : }
5584 :
5585 : /* If VLV, Forcewake all wells, else re-direct to regular path */
5586 0 : intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5587 :
5588 : /* Disable RC states. */
5589 0 : I915_WRITE(GEN6_RC_CONTROL, 0);
5590 :
5591 0 : I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
5592 0 : I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
5593 0 : I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
5594 0 : I915_WRITE(GEN6_RP_UP_EI, 66000);
5595 0 : I915_WRITE(GEN6_RP_DOWN_EI, 350000);
5596 :
5597 0 : I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
5598 :
5599 0 : I915_WRITE(GEN6_RP_CONTROL,
5600 : GEN6_RP_MEDIA_TURBO |
5601 : GEN6_RP_MEDIA_HW_NORMAL_MODE |
5602 : GEN6_RP_MEDIA_IS_GFX |
5603 : GEN6_RP_ENABLE |
5604 : GEN6_RP_UP_BUSY_AVG |
5605 : GEN6_RP_DOWN_IDLE_CONT);
5606 :
5607 0 : I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
5608 0 : I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
5609 0 : I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
5610 :
5611 0 : for_each_ring(ring, dev_priv, i)
5612 0 : I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
5613 :
5614 0 : I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
5615 :
5616 : /* allows RC6 residency counter to work */
5617 0 : I915_WRITE(VLV_COUNTER_CONTROL,
5618 : _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN |
5619 : VLV_RENDER_RC0_COUNT_EN |
5620 : VLV_MEDIA_RC6_COUNT_EN |
5621 : VLV_RENDER_RC6_COUNT_EN));
5622 :
5623 0 : if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
5624 0 : rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
5625 :
5626 0 : intel_print_rc6_info(dev, rc6_mode);
5627 :
5628 0 : I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
5629 :
5630 : /* Setting Fixed Bias */
5631 : val = VLV_OVERRIDE_EN |
5632 : VLV_SOC_TDP_EN |
5633 : VLV_BIAS_CPU_125_SOC_875;
5634 0 : vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
5635 :
5636 0 : val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
5637 :
5638 : /* RPS code assumes GPLL is used */
5639 0 : WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
5640 :
5641 : DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
5642 : DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
5643 :
5644 0 : dev_priv->rps.cur_freq = (val >> 8) & 0xff;
5645 : DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
5646 : intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
5647 : dev_priv->rps.cur_freq);
5648 :
5649 : DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
5650 : intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
5651 : dev_priv->rps.efficient_freq);
5652 :
5653 0 : valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq);
5654 :
5655 0 : intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5656 0 : }
5657 :
5658 0 : static unsigned long intel_pxfreq(u32 vidfreq)
5659 : {
5660 : unsigned long freq;
5661 0 : int div = (vidfreq & 0x3f0000) >> 16;
5662 0 : int post = (vidfreq & 0x3000) >> 12;
5663 0 : int pre = (vidfreq & 0x7);
5664 :
5665 0 : if (!pre)
5666 0 : return 0;
5667 :
5668 0 : freq = ((div * 133333) / ((1<<post) * pre));
5669 :
5670 0 : return freq;
5671 0 : }
5672 :
5673 : static const struct cparams {
5674 : u16 i;
5675 : u16 t;
5676 : u16 m;
5677 : u16 c;
5678 : } cparams[] = {
5679 : { 1, 1333, 301, 28664 },
5680 : { 1, 1066, 294, 24460 },
5681 : { 1, 800, 294, 25192 },
5682 : { 0, 1333, 276, 27605 },
5683 : { 0, 1066, 276, 27605 },
5684 : { 0, 800, 231, 23784 },
5685 : };
5686 :
5687 0 : static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
5688 : {
5689 : u64 total_count, diff, ret;
5690 : u32 count1, count2, count3, m = 0, c = 0;
5691 0 : unsigned long now = jiffies_to_msecs(jiffies), diff1;
5692 : int i;
5693 :
5694 0 : assert_spin_locked(&mchdev_lock);
5695 :
5696 0 : diff1 = now - dev_priv->ips.last_time1;
5697 :
5698 : /* Prevent division-by-zero if we are asking too fast.
5699 : * Also, we don't get interesting results if we are polling
5700 : * faster than once in 10ms, so just return the saved value
5701 : * in such cases.
5702 : */
5703 0 : if (diff1 <= 10)
5704 0 : return dev_priv->ips.chipset_power;
5705 :
5706 0 : count1 = I915_READ(DMIEC);
5707 0 : count2 = I915_READ(DDREC);
5708 0 : count3 = I915_READ(CSIEC);
5709 :
5710 0 : total_count = count1 + count2 + count3;
5711 :
5712 : /* FIXME: handle per-counter overflow */
5713 0 : if (total_count < dev_priv->ips.last_count1) {
5714 0 : diff = ~0UL - dev_priv->ips.last_count1;
5715 0 : diff += total_count;
5716 0 : } else {
5717 0 : diff = total_count - dev_priv->ips.last_count1;
5718 : }
5719 :
5720 0 : for (i = 0; i < ARRAY_SIZE(cparams); i++) {
5721 0 : if (cparams[i].i == dev_priv->ips.c_m &&
5722 0 : cparams[i].t == dev_priv->ips.r_t) {
5723 0 : m = cparams[i].m;
5724 0 : c = cparams[i].c;
5725 0 : break;
5726 : }
5727 : }
5728 :
5729 0 : diff = div_u64(diff, diff1);
5730 0 : ret = ((m * diff) + c);
5731 0 : ret = div_u64(ret, 10);
5732 :
5733 0 : dev_priv->ips.last_count1 = total_count;
5734 0 : dev_priv->ips.last_time1 = now;
5735 :
5736 0 : dev_priv->ips.chipset_power = ret;
5737 :
5738 0 : return ret;
5739 0 : }
5740 :
5741 0 : unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
5742 : {
5743 0 : struct drm_device *dev = dev_priv->dev;
5744 : unsigned long val;
5745 :
5746 0 : if (INTEL_INFO(dev)->gen != 5)
5747 0 : return 0;
5748 :
5749 0 : spin_lock_irq(&mchdev_lock);
5750 :
5751 0 : val = __i915_chipset_val(dev_priv);
5752 :
5753 0 : spin_unlock_irq(&mchdev_lock);
5754 :
5755 0 : return val;
5756 0 : }
5757 :
5758 0 : unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
5759 : {
5760 : unsigned long m, x, b;
5761 : u32 tsfs;
5762 :
5763 0 : tsfs = I915_READ(TSFS);
5764 :
5765 0 : m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
5766 0 : x = I915_READ8(TR1);
5767 :
5768 0 : b = tsfs & TSFS_INTR_MASK;
5769 :
5770 0 : return ((m * x) / 127) - b;
5771 : }
5772 :
5773 0 : static int _pxvid_to_vd(u8 pxvid)
5774 : {
5775 0 : if (pxvid == 0)
5776 0 : return 0;
5777 :
5778 0 : if (pxvid >= 8 && pxvid < 31)
5779 0 : pxvid = 31;
5780 :
5781 0 : return (pxvid + 2) * 125;
5782 0 : }
5783 :
5784 0 : static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
5785 : {
5786 0 : struct drm_device *dev = dev_priv->dev;
5787 0 : const int vd = _pxvid_to_vd(pxvid);
5788 0 : const int vm = vd - 1125;
5789 :
5790 0 : if (INTEL_INFO(dev)->is_mobile)
5791 0 : return vm > 0 ? vm : 0;
5792 :
5793 0 : return vd;
5794 0 : }
5795 :
5796 0 : static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
5797 : {
5798 : u64 now, diff, diffms;
5799 : u32 count;
5800 :
5801 0 : assert_spin_locked(&mchdev_lock);
5802 :
5803 0 : now = ktime_get_raw_ns();
5804 0 : diffms = now - dev_priv->ips.last_time2;
5805 0 : do_div(diffms, NSEC_PER_MSEC);
5806 :
5807 : /* Don't divide by 0 */
5808 0 : if (!diffms)
5809 0 : return;
5810 :
5811 0 : count = I915_READ(GFXEC);
5812 :
5813 0 : if (count < dev_priv->ips.last_count2) {
5814 0 : diff = ~0UL - dev_priv->ips.last_count2;
5815 0 : diff += count;
5816 0 : } else {
5817 0 : diff = count - dev_priv->ips.last_count2;
5818 : }
5819 :
5820 0 : dev_priv->ips.last_count2 = count;
5821 0 : dev_priv->ips.last_time2 = now;
5822 :
5823 : /* More magic constants... */
5824 0 : diff = diff * 1181;
5825 0 : diff = div_u64(diff, diffms * 10);
5826 0 : dev_priv->ips.gfx_power = diff;
5827 0 : }
5828 :
5829 0 : void i915_update_gfx_val(struct drm_i915_private *dev_priv)
5830 : {
5831 0 : struct drm_device *dev = dev_priv->dev;
5832 :
5833 0 : if (INTEL_INFO(dev)->gen != 5)
5834 0 : return;
5835 :
5836 0 : spin_lock_irq(&mchdev_lock);
5837 :
5838 0 : __i915_update_gfx_val(dev_priv);
5839 :
5840 0 : spin_unlock_irq(&mchdev_lock);
5841 0 : }
5842 :
5843 0 : static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
5844 : {
5845 : unsigned long t, corr, state1, corr2, state2;
5846 : u32 pxvid, ext_v;
5847 :
5848 0 : assert_spin_locked(&mchdev_lock);
5849 :
5850 0 : pxvid = I915_READ(PXVFREQ(dev_priv->rps.cur_freq));
5851 0 : pxvid = (pxvid >> 24) & 0x7f;
5852 0 : ext_v = pvid_to_extvid(dev_priv, pxvid);
5853 :
5854 0 : state1 = ext_v;
5855 :
5856 0 : t = i915_mch_val(dev_priv);
5857 :
5858 : /* Revel in the empirically derived constants */
5859 :
5860 : /* Correction factor in 1/100000 units */
5861 0 : if (t > 80)
5862 0 : corr = ((t * 2349) + 135940);
5863 0 : else if (t >= 50)
5864 0 : corr = ((t * 964) + 29317);
5865 : else /* < 50 */
5866 0 : corr = ((t * 301) + 1004);
5867 :
5868 0 : corr = corr * ((150142 * state1) / 10000 - 78642);
5869 0 : corr /= 100000;
5870 0 : corr2 = (corr * dev_priv->ips.corr);
5871 :
5872 0 : state2 = (corr2 * state1) / 10000;
5873 0 : state2 /= 100; /* convert to mW */
5874 :
5875 0 : __i915_update_gfx_val(dev_priv);
5876 :
5877 0 : return dev_priv->ips.gfx_power + state2;
5878 : }
5879 :
5880 0 : unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
5881 : {
5882 0 : struct drm_device *dev = dev_priv->dev;
5883 : unsigned long val;
5884 :
5885 0 : if (INTEL_INFO(dev)->gen != 5)
5886 0 : return 0;
5887 :
5888 0 : spin_lock_irq(&mchdev_lock);
5889 :
5890 0 : val = __i915_gfx_val(dev_priv);
5891 :
5892 0 : spin_unlock_irq(&mchdev_lock);
5893 :
5894 0 : return val;
5895 0 : }
5896 :
5897 : #ifdef __linux__
5898 : /**
5899 : * i915_read_mch_val - return value for IPS use
5900 : *
5901 : * Calculate and return a value for the IPS driver to use when deciding whether
5902 : * we have thermal and power headroom to increase CPU or GPU power budget.
5903 : */
5904 : unsigned long i915_read_mch_val(void)
5905 : {
5906 : struct drm_i915_private *dev_priv;
5907 : unsigned long chipset_val, graphics_val, ret = 0;
5908 :
5909 : spin_lock_irq(&mchdev_lock);
5910 : if (!i915_mch_dev)
5911 : goto out_unlock;
5912 : dev_priv = i915_mch_dev;
5913 :
5914 : chipset_val = __i915_chipset_val(dev_priv);
5915 : graphics_val = __i915_gfx_val(dev_priv);
5916 :
5917 : ret = chipset_val + graphics_val;
5918 :
5919 : out_unlock:
5920 : spin_unlock_irq(&mchdev_lock);
5921 :
5922 : return ret;
5923 : }
5924 : EXPORT_SYMBOL_GPL(i915_read_mch_val);
5925 :
5926 : /**
5927 : * i915_gpu_raise - raise GPU frequency limit
5928 : *
5929 : * Raise the limit; IPS indicates we have thermal headroom.
5930 : */
5931 : bool i915_gpu_raise(void)
5932 : {
5933 : struct drm_i915_private *dev_priv;
5934 : bool ret = true;
5935 :
5936 : spin_lock_irq(&mchdev_lock);
5937 : if (!i915_mch_dev) {
5938 : ret = false;
5939 : goto out_unlock;
5940 : }
5941 : dev_priv = i915_mch_dev;
5942 :
5943 : if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
5944 : dev_priv->ips.max_delay--;
5945 :
5946 : out_unlock:
5947 : spin_unlock_irq(&mchdev_lock);
5948 :
5949 : return ret;
5950 : }
5951 : EXPORT_SYMBOL_GPL(i915_gpu_raise);
5952 :
5953 : /**
5954 : * i915_gpu_lower - lower GPU frequency limit
5955 : *
5956 : * IPS indicates we're close to a thermal limit, so throttle back the GPU
5957 : * frequency maximum.
5958 : */
5959 : bool i915_gpu_lower(void)
5960 : {
5961 : struct drm_i915_private *dev_priv;
5962 : bool ret = true;
5963 :
5964 : spin_lock_irq(&mchdev_lock);
5965 : if (!i915_mch_dev) {
5966 : ret = false;
5967 : goto out_unlock;
5968 : }
5969 : dev_priv = i915_mch_dev;
5970 :
5971 : if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
5972 : dev_priv->ips.max_delay++;
5973 :
5974 : out_unlock:
5975 : spin_unlock_irq(&mchdev_lock);
5976 :
5977 : return ret;
5978 : }
5979 : EXPORT_SYMBOL_GPL(i915_gpu_lower);
5980 :
5981 : /**
5982 : * i915_gpu_busy - indicate GPU business to IPS
5983 : *
5984 : * Tell the IPS driver whether or not the GPU is busy.
5985 : */
5986 : bool i915_gpu_busy(void)
5987 : {
5988 : struct drm_i915_private *dev_priv;
5989 : struct intel_engine_cs *ring;
5990 : bool ret = false;
5991 : int i;
5992 :
5993 : spin_lock_irq(&mchdev_lock);
5994 : if (!i915_mch_dev)
5995 : goto out_unlock;
5996 : dev_priv = i915_mch_dev;
5997 :
5998 : for_each_ring(ring, dev_priv, i)
5999 : ret |= !list_empty(&ring->request_list);
6000 :
6001 : out_unlock:
6002 : spin_unlock_irq(&mchdev_lock);
6003 :
6004 : return ret;
6005 : }
6006 : EXPORT_SYMBOL_GPL(i915_gpu_busy);
6007 :
6008 : /**
6009 : * i915_gpu_turbo_disable - disable graphics turbo
6010 : *
6011 : * Disable graphics turbo by resetting the max frequency and setting the
6012 : * current frequency to the default.
6013 : */
6014 : bool i915_gpu_turbo_disable(void)
6015 : {
6016 : struct drm_i915_private *dev_priv;
6017 : bool ret = true;
6018 :
6019 : spin_lock_irq(&mchdev_lock);
6020 : if (!i915_mch_dev) {
6021 : ret = false;
6022 : goto out_unlock;
6023 : }
6024 : dev_priv = i915_mch_dev;
6025 :
6026 : dev_priv->ips.max_delay = dev_priv->ips.fstart;
6027 :
6028 : if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart))
6029 : ret = false;
6030 :
6031 : out_unlock:
6032 : spin_unlock_irq(&mchdev_lock);
6033 :
6034 : return ret;
6035 : }
6036 : EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
6037 :
6038 : /**
6039 : * Tells the intel_ips driver that the i915 driver is now loaded, if
6040 : * IPS got loaded first.
6041 : *
6042 : * This awkward dance is so that neither module has to depend on the
6043 : * other in order for IPS to do the appropriate communication of
6044 : * GPU turbo limits to i915.
6045 : */
6046 : static void
6047 : ips_ping_for_i915_load(void)
6048 : {
6049 : void (*link)(void);
6050 :
6051 : link = symbol_get(ips_link_to_i915_driver);
6052 : if (link) {
6053 : link();
6054 : symbol_put(ips_link_to_i915_driver);
6055 : }
6056 : }
6057 : #endif
6058 :
6059 0 : void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
6060 : {
6061 : /* We only register the i915 ips part with intel-ips once everything is
6062 : * set up, to avoid intel-ips sneaking in and reading bogus values. */
6063 0 : spin_lock_irq(&mchdev_lock);
6064 0 : i915_mch_dev = dev_priv;
6065 0 : spin_unlock_irq(&mchdev_lock);
6066 :
6067 : #ifdef __linux__
6068 : ips_ping_for_i915_load();
6069 : #endif
6070 0 : }
6071 :
6072 0 : void intel_gpu_ips_teardown(void)
6073 : {
6074 : #ifdef __linix__
6075 : spin_lock_irq(&mchdev_lock);
6076 : i915_mch_dev = NULL;
6077 : spin_unlock_irq(&mchdev_lock);
6078 : #endif
6079 0 : }
6080 :
6081 0 : static void intel_init_emon(struct drm_device *dev)
6082 : {
6083 0 : struct drm_i915_private *dev_priv = dev->dev_private;
6084 : u32 lcfuse;
6085 0 : u8 pxw[16];
6086 : int i;
6087 :
6088 : /* Disable to program */
6089 0 : I915_WRITE(ECR, 0);
6090 0 : POSTING_READ(ECR);
6091 :
6092 : /* Program energy weights for various events */
6093 0 : I915_WRITE(SDEW, 0x15040d00);
6094 0 : I915_WRITE(CSIEW0, 0x007f0000);
6095 0 : I915_WRITE(CSIEW1, 0x1e220004);
6096 0 : I915_WRITE(CSIEW2, 0x04000004);
6097 :
6098 0 : for (i = 0; i < 5; i++)
6099 0 : I915_WRITE(PEW(i), 0);
6100 0 : for (i = 0; i < 3; i++)
6101 0 : I915_WRITE(DEW(i), 0);
6102 :
6103 : /* Program P-state weights to account for frequency power adjustment */
6104 0 : for (i = 0; i < 16; i++) {
6105 0 : u32 pxvidfreq = I915_READ(PXVFREQ(i));
6106 0 : unsigned long freq = intel_pxfreq(pxvidfreq);
6107 0 : unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
6108 : PXVFREQ_PX_SHIFT;
6109 : unsigned long val;
6110 :
6111 0 : val = vid * vid;
6112 0 : val *= (freq / 1000);
6113 0 : val *= 255;
6114 0 : val /= (127*127*900);
6115 0 : if (val > 0xff)
6116 0 : DRM_ERROR("bad pxval: %ld\n", val);
6117 0 : pxw[i] = val;
6118 : }
6119 : /* Render standby states get 0 weight */
6120 0 : pxw[14] = 0;
6121 0 : pxw[15] = 0;
6122 :
6123 0 : for (i = 0; i < 4; i++) {
6124 0 : u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
6125 0 : (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
6126 0 : I915_WRITE(PXW(i), val);
6127 : }
6128 :
6129 : /* Adjust magic regs to magic values (more experimental results) */
6130 0 : I915_WRITE(OGW0, 0);
6131 0 : I915_WRITE(OGW1, 0);
6132 0 : I915_WRITE(EG0, 0x00007f00);
6133 0 : I915_WRITE(EG1, 0x0000000e);
6134 0 : I915_WRITE(EG2, 0x000e0000);
6135 0 : I915_WRITE(EG3, 0x68000300);
6136 0 : I915_WRITE(EG4, 0x42000000);
6137 0 : I915_WRITE(EG5, 0x00140031);
6138 0 : I915_WRITE(EG6, 0);
6139 0 : I915_WRITE(EG7, 0);
6140 :
6141 0 : for (i = 0; i < 8; i++)
6142 0 : I915_WRITE(PXWL(i), 0);
6143 :
6144 : /* Enable PMON + select events */
6145 0 : I915_WRITE(ECR, 0x80000019);
6146 :
6147 0 : lcfuse = I915_READ(LCFUSE02);
6148 :
6149 0 : dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
6150 0 : }
6151 :
6152 0 : void intel_init_gt_powersave(struct drm_device *dev)
6153 : {
6154 0 : i915.enable_rc6 = sanitize_rc6_option(dev, i915.enable_rc6);
6155 :
6156 0 : if (IS_CHERRYVIEW(dev))
6157 0 : cherryview_init_gt_powersave(dev);
6158 0 : else if (IS_VALLEYVIEW(dev))
6159 0 : valleyview_init_gt_powersave(dev);
6160 0 : }
6161 :
6162 0 : void intel_cleanup_gt_powersave(struct drm_device *dev)
6163 : {
6164 0 : if (IS_CHERRYVIEW(dev))
6165 : return;
6166 0 : else if (IS_VALLEYVIEW(dev))
6167 0 : valleyview_cleanup_gt_powersave(dev);
6168 0 : }
6169 :
6170 0 : static void gen6_suspend_rps(struct drm_device *dev)
6171 : {
6172 0 : struct drm_i915_private *dev_priv = dev->dev_private;
6173 :
6174 0 : flush_delayed_work(&dev_priv->rps.delayed_resume_work);
6175 :
6176 0 : gen6_disable_rps_interrupts(dev);
6177 0 : }
6178 :
6179 : /**
6180 : * intel_suspend_gt_powersave - suspend PM work and helper threads
6181 : * @dev: drm device
6182 : *
6183 : * We don't want to disable RC6 or other features here, we just want
6184 : * to make sure any work we've queued has finished and won't bother
6185 : * us while we're suspended.
6186 : */
6187 0 : void intel_suspend_gt_powersave(struct drm_device *dev)
6188 : {
6189 0 : struct drm_i915_private *dev_priv = dev->dev_private;
6190 :
6191 0 : if (INTEL_INFO(dev)->gen < 6)
6192 0 : return;
6193 :
6194 0 : gen6_suspend_rps(dev);
6195 :
6196 : /* Force GPU to min freq during suspend */
6197 0 : gen6_rps_idle(dev_priv);
6198 0 : }
6199 :
6200 0 : void intel_disable_gt_powersave(struct drm_device *dev)
6201 : {
6202 0 : struct drm_i915_private *dev_priv = dev->dev_private;
6203 :
6204 0 : if (IS_IRONLAKE_M(dev)) {
6205 0 : ironlake_disable_drps(dev);
6206 0 : } else if (INTEL_INFO(dev)->gen >= 6) {
6207 0 : intel_suspend_gt_powersave(dev);
6208 :
6209 0 : mutex_lock(&dev_priv->rps.hw_lock);
6210 0 : if (INTEL_INFO(dev)->gen >= 9)
6211 0 : gen9_disable_rps(dev);
6212 0 : else if (IS_CHERRYVIEW(dev))
6213 0 : cherryview_disable_rps(dev);
6214 0 : else if (IS_VALLEYVIEW(dev))
6215 0 : valleyview_disable_rps(dev);
6216 : else
6217 0 : gen6_disable_rps(dev);
6218 :
6219 0 : dev_priv->rps.enabled = false;
6220 0 : mutex_unlock(&dev_priv->rps.hw_lock);
6221 0 : }
6222 0 : }
6223 :
6224 0 : static void intel_gen6_powersave_work(struct work_struct *work)
6225 : {
6226 : struct drm_i915_private *dev_priv =
6227 0 : container_of(work, struct drm_i915_private,
6228 : rps.delayed_resume_work.work);
6229 0 : struct drm_device *dev = dev_priv->dev;
6230 :
6231 0 : mutex_lock(&dev_priv->rps.hw_lock);
6232 :
6233 0 : gen6_reset_rps_interrupts(dev);
6234 :
6235 0 : if (IS_CHERRYVIEW(dev)) {
6236 0 : cherryview_enable_rps(dev);
6237 0 : } else if (IS_VALLEYVIEW(dev)) {
6238 0 : valleyview_enable_rps(dev);
6239 0 : } else if (INTEL_INFO(dev)->gen >= 9) {
6240 0 : gen9_enable_rc6(dev);
6241 0 : gen9_enable_rps(dev);
6242 0 : if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
6243 0 : __gen6_update_ring_freq(dev);
6244 0 : } else if (IS_BROADWELL(dev)) {
6245 0 : gen8_enable_rps(dev);
6246 0 : __gen6_update_ring_freq(dev);
6247 0 : } else {
6248 0 : gen6_enable_rps(dev);
6249 0 : __gen6_update_ring_freq(dev);
6250 : }
6251 :
6252 0 : WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq);
6253 0 : WARN_ON(dev_priv->rps.idle_freq > dev_priv->rps.max_freq);
6254 :
6255 0 : WARN_ON(dev_priv->rps.efficient_freq < dev_priv->rps.min_freq);
6256 0 : WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq);
6257 :
6258 0 : dev_priv->rps.enabled = true;
6259 :
6260 0 : gen6_enable_rps_interrupts(dev);
6261 :
6262 0 : mutex_unlock(&dev_priv->rps.hw_lock);
6263 :
6264 0 : intel_runtime_pm_put(dev_priv);
6265 0 : }
6266 :
6267 0 : void intel_enable_gt_powersave(struct drm_device *dev)
6268 : {
6269 0 : struct drm_i915_private *dev_priv = dev->dev_private;
6270 :
6271 : /* Powersaving is controlled by the host when inside a VM */
6272 0 : if (intel_vgpu_active(dev))
6273 0 : return;
6274 :
6275 0 : if (IS_IRONLAKE_M(dev)) {
6276 0 : mutex_lock(&dev->struct_mutex);
6277 0 : ironlake_enable_drps(dev);
6278 0 : intel_init_emon(dev);
6279 0 : mutex_unlock(&dev->struct_mutex);
6280 0 : } else if (INTEL_INFO(dev)->gen >= 6) {
6281 : /*
6282 : * PCU communication is slow and this doesn't need to be
6283 : * done at any specific time, so do this out of our fast path
6284 : * to make resume and init faster.
6285 : *
6286 : * We depend on the HW RC6 power context save/restore
6287 : * mechanism when entering D3 through runtime PM suspend. So
6288 : * disable RPM until RPS/RC6 is properly setup. We can only
6289 : * get here via the driver load/system resume/runtime resume
6290 : * paths, so the _noresume version is enough (and in case of
6291 : * runtime resume it's necessary).
6292 : */
6293 0 : if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work,
6294 0 : round_jiffies_up_relative(HZ)))
6295 0 : intel_runtime_pm_get_noresume(dev_priv);
6296 : }
6297 0 : }
6298 :
6299 0 : void intel_reset_gt_powersave(struct drm_device *dev)
6300 : {
6301 0 : struct drm_i915_private *dev_priv = dev->dev_private;
6302 :
6303 0 : if (INTEL_INFO(dev)->gen < 6)
6304 0 : return;
6305 :
6306 0 : gen6_suspend_rps(dev);
6307 0 : dev_priv->rps.enabled = false;
6308 0 : }
6309 :
6310 0 : static void ibx_init_clock_gating(struct drm_device *dev)
6311 : {
6312 0 : struct drm_i915_private *dev_priv = dev->dev_private;
6313 :
6314 : /*
6315 : * On Ibex Peak and Cougar Point, we need to disable clock
6316 : * gating for the panel power sequencer or it will fail to
6317 : * start up when no ports are active.
6318 : */
6319 0 : I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
6320 0 : }
6321 :
6322 0 : static void g4x_disable_trickle_feed(struct drm_device *dev)
6323 : {
6324 0 : struct drm_i915_private *dev_priv = dev->dev_private;
6325 : enum pipe pipe;
6326 :
6327 0 : for_each_pipe(dev_priv, pipe) {
6328 0 : I915_WRITE(DSPCNTR(pipe),
6329 : I915_READ(DSPCNTR(pipe)) |
6330 : DISPPLANE_TRICKLE_FEED_DISABLE);
6331 :
6332 0 : I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
6333 0 : POSTING_READ(DSPSURF(pipe));
6334 : }
6335 0 : }
6336 :
6337 0 : static void ilk_init_lp_watermarks(struct drm_device *dev)
6338 : {
6339 0 : struct drm_i915_private *dev_priv = dev->dev_private;
6340 :
6341 0 : I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
6342 0 : I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
6343 0 : I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
6344 :
6345 : /*
6346 : * Don't touch WM1S_LP_EN here.
6347 : * Doing so could cause underruns.
6348 : */
6349 0 : }
6350 :
6351 0 : static void ironlake_init_clock_gating(struct drm_device *dev)
6352 : {
6353 0 : struct drm_i915_private *dev_priv = dev->dev_private;
6354 : uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
6355 :
6356 : /*
6357 : * Required for FBC
6358 : * WaFbcDisableDpfcClockGating:ilk
6359 : */
6360 : dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
6361 : ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
6362 : ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
6363 :
6364 0 : I915_WRITE(PCH_3DCGDIS0,
6365 : MARIUNIT_CLOCK_GATE_DISABLE |
6366 : SVSMUNIT_CLOCK_GATE_DISABLE);
6367 0 : I915_WRITE(PCH_3DCGDIS1,
6368 : VFMUNIT_CLOCK_GATE_DISABLE);
6369 :
6370 : /*
6371 : * According to the spec the following bits should be set in
6372 : * order to enable memory self-refresh
6373 : * The bit 22/21 of 0x42004
6374 : * The bit 5 of 0x42020
6375 : * The bit 15 of 0x45000
6376 : */
6377 0 : I915_WRITE(ILK_DISPLAY_CHICKEN2,
6378 : (I915_READ(ILK_DISPLAY_CHICKEN2) |
6379 : ILK_DPARB_GATE | ILK_VSDPFD_FULL));
6380 : dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
6381 :
6382 : /* WaFbcWakeMemOn:skl,bxt,kbl */
6383 0 : I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
6384 : DISP_FBC_WM_DIS |
6385 : DISP_FBC_MEMORY_WAKE);
6386 :
6387 0 : ilk_init_lp_watermarks(dev);
6388 :
6389 : /*
6390 : * Based on the document from hardware guys the following bits
6391 : * should be set unconditionally in order to enable FBC.
6392 : * The bit 22 of 0x42000
6393 : * The bit 22 of 0x42004
6394 : * The bit 7,8,9 of 0x42020.
6395 : */
6396 0 : if (IS_IRONLAKE_M(dev)) {
6397 : /* WaFbcAsynchFlipDisableFbcQueue:ilk */
6398 0 : I915_WRITE(ILK_DISPLAY_CHICKEN1,
6399 : I915_READ(ILK_DISPLAY_CHICKEN1) |
6400 : ILK_FBCQ_DIS);
6401 0 : I915_WRITE(ILK_DISPLAY_CHICKEN2,
6402 : I915_READ(ILK_DISPLAY_CHICKEN2) |
6403 : ILK_DPARB_GATE);
6404 0 : }
6405 :
6406 0 : I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
6407 :
6408 0 : I915_WRITE(ILK_DISPLAY_CHICKEN2,
6409 : I915_READ(ILK_DISPLAY_CHICKEN2) |
6410 : ILK_ELPIN_409_SELECT);
6411 0 : I915_WRITE(_3D_CHICKEN2,
6412 : _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
6413 : _3D_CHICKEN2_WM_READ_PIPELINED);
6414 :
6415 : /* WaDisableRenderCachePipelinedFlush:ilk */
6416 0 : I915_WRITE(CACHE_MODE_0,
6417 : _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
6418 :
6419 : /* WaDisable_RenderCache_OperationalFlush:ilk */
6420 0 : I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6421 :
6422 0 : g4x_disable_trickle_feed(dev);
6423 :
6424 0 : ibx_init_clock_gating(dev);
6425 0 : }
6426 :
6427 0 : static void cpt_init_clock_gating(struct drm_device *dev)
6428 : {
6429 0 : struct drm_i915_private *dev_priv = dev->dev_private;
6430 : int pipe;
6431 : uint32_t val;
6432 :
6433 : /*
6434 : * On Ibex Peak and Cougar Point, we need to disable clock
6435 : * gating for the panel power sequencer or it will fail to
6436 : * start up when no ports are active.
6437 : */
6438 0 : I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
6439 : PCH_DPLUNIT_CLOCK_GATE_DISABLE |
6440 : PCH_CPUNIT_CLOCK_GATE_DISABLE);
6441 0 : I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
6442 : DPLS_EDP_PPS_FIX_DIS);
6443 : /* The below fixes the weird display corruption, a few pixels shifted
6444 : * downward, on (only) LVDS of some HP laptops with IVY.
6445 : */
6446 0 : for_each_pipe(dev_priv, pipe) {
6447 0 : val = I915_READ(TRANS_CHICKEN2(pipe));
6448 0 : val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
6449 0 : val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
6450 0 : if (dev_priv->vbt.fdi_rx_polarity_inverted)
6451 0 : val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
6452 0 : val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
6453 0 : val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
6454 0 : val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
6455 0 : I915_WRITE(TRANS_CHICKEN2(pipe), val);
6456 : }
6457 : /* WADP0ClockGatingDisable */
6458 0 : for_each_pipe(dev_priv, pipe) {
6459 0 : I915_WRITE(TRANS_CHICKEN1(pipe),
6460 : TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
6461 : }
6462 0 : }
6463 :
6464 0 : static void gen6_check_mch_setup(struct drm_device *dev)
6465 : {
6466 0 : struct drm_i915_private *dev_priv = dev->dev_private;
6467 : uint32_t tmp;
6468 :
6469 0 : tmp = I915_READ(MCH_SSKPD);
6470 0 : if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
6471 : DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
6472 : tmp);
6473 0 : }
6474 :
6475 0 : static void gen6_init_clock_gating(struct drm_device *dev)
6476 : {
6477 0 : struct drm_i915_private *dev_priv = dev->dev_private;
6478 : uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
6479 :
6480 0 : I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
6481 :
6482 0 : I915_WRITE(ILK_DISPLAY_CHICKEN2,
6483 : I915_READ(ILK_DISPLAY_CHICKEN2) |
6484 : ILK_ELPIN_409_SELECT);
6485 :
6486 : /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
6487 0 : I915_WRITE(_3D_CHICKEN,
6488 : _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
6489 :
6490 : /* WaDisable_RenderCache_OperationalFlush:snb */
6491 0 : I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6492 :
6493 : /*
6494 : * BSpec recoomends 8x4 when MSAA is used,
6495 : * however in practice 16x4 seems fastest.
6496 : *
6497 : * Note that PS/WM thread counts depend on the WIZ hashing
6498 : * disable bit, which we don't touch here, but it's good
6499 : * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6500 : */
6501 0 : I915_WRITE(GEN6_GT_MODE,
6502 : _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6503 :
6504 0 : ilk_init_lp_watermarks(dev);
6505 :
6506 0 : I915_WRITE(CACHE_MODE_0,
6507 : _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
6508 :
6509 0 : I915_WRITE(GEN6_UCGCTL1,
6510 : I915_READ(GEN6_UCGCTL1) |
6511 : GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
6512 : GEN6_CSUNIT_CLOCK_GATE_DISABLE);
6513 :
6514 : /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
6515 : * gating disable must be set. Failure to set it results in
6516 : * flickering pixels due to Z write ordering failures after
6517 : * some amount of runtime in the Mesa "fire" demo, and Unigine
6518 : * Sanctuary and Tropics, and apparently anything else with
6519 : * alpha test or pixel discard.
6520 : *
6521 : * According to the spec, bit 11 (RCCUNIT) must also be set,
6522 : * but we didn't debug actual testcases to find it out.
6523 : *
6524 : * WaDisableRCCUnitClockGating:snb
6525 : * WaDisableRCPBUnitClockGating:snb
6526 : */
6527 0 : I915_WRITE(GEN6_UCGCTL2,
6528 : GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
6529 : GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
6530 :
6531 : /* WaStripsFansDisableFastClipPerformanceFix:snb */
6532 0 : I915_WRITE(_3D_CHICKEN3,
6533 : _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
6534 :
6535 : /*
6536 : * Bspec says:
6537 : * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
6538 : * 3DSTATE_SF number of SF output attributes is more than 16."
6539 : */
6540 0 : I915_WRITE(_3D_CHICKEN3,
6541 : _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
6542 :
6543 : /*
6544 : * According to the spec the following bits should be
6545 : * set in order to enable memory self-refresh and fbc:
6546 : * The bit21 and bit22 of 0x42000
6547 : * The bit21 and bit22 of 0x42004
6548 : * The bit5 and bit7 of 0x42020
6549 : * The bit14 of 0x70180
6550 : * The bit14 of 0x71180
6551 : *
6552 : * WaFbcAsynchFlipDisableFbcQueue:snb
6553 : */
6554 0 : I915_WRITE(ILK_DISPLAY_CHICKEN1,
6555 : I915_READ(ILK_DISPLAY_CHICKEN1) |
6556 : ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
6557 0 : I915_WRITE(ILK_DISPLAY_CHICKEN2,
6558 : I915_READ(ILK_DISPLAY_CHICKEN2) |
6559 : ILK_DPARB_GATE | ILK_VSDPFD_FULL);
6560 0 : I915_WRITE(ILK_DSPCLK_GATE_D,
6561 : I915_READ(ILK_DSPCLK_GATE_D) |
6562 : ILK_DPARBUNIT_CLOCK_GATE_ENABLE |
6563 : ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
6564 :
6565 0 : g4x_disable_trickle_feed(dev);
6566 :
6567 0 : cpt_init_clock_gating(dev);
6568 :
6569 0 : gen6_check_mch_setup(dev);
6570 0 : }
6571 :
6572 0 : static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
6573 : {
6574 0 : uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
6575 :
6576 : /*
6577 : * WaVSThreadDispatchOverride:ivb,vlv
6578 : *
6579 : * This actually overrides the dispatch
6580 : * mode for all thread types.
6581 : */
6582 0 : reg &= ~GEN7_FF_SCHED_MASK;
6583 : reg |= GEN7_FF_TS_SCHED_HW;
6584 : reg |= GEN7_FF_VS_SCHED_HW;
6585 : reg |= GEN7_FF_DS_SCHED_HW;
6586 :
6587 0 : I915_WRITE(GEN7_FF_THREAD_MODE, reg);
6588 0 : }
6589 :
6590 0 : static void lpt_init_clock_gating(struct drm_device *dev)
6591 : {
6592 0 : struct drm_i915_private *dev_priv = dev->dev_private;
6593 :
6594 : /*
6595 : * TODO: this bit should only be enabled when really needed, then
6596 : * disabled when not needed anymore in order to save power.
6597 : */
6598 0 : if (HAS_PCH_LPT_LP(dev))
6599 0 : I915_WRITE(SOUTH_DSPCLK_GATE_D,
6600 : I915_READ(SOUTH_DSPCLK_GATE_D) |
6601 : PCH_LP_PARTITION_LEVEL_DISABLE);
6602 :
6603 : /* WADPOClockGatingDisable:hsw */
6604 0 : I915_WRITE(TRANS_CHICKEN1(PIPE_A),
6605 : I915_READ(TRANS_CHICKEN1(PIPE_A)) |
6606 : TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
6607 0 : }
6608 :
6609 0 : static void lpt_suspend_hw(struct drm_device *dev)
6610 : {
6611 0 : struct drm_i915_private *dev_priv = dev->dev_private;
6612 :
6613 0 : if (HAS_PCH_LPT_LP(dev)) {
6614 0 : uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D);
6615 :
6616 0 : val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
6617 0 : I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
6618 0 : }
6619 0 : }
6620 :
6621 0 : static void broadwell_init_clock_gating(struct drm_device *dev)
6622 : {
6623 0 : struct drm_i915_private *dev_priv = dev->dev_private;
6624 : enum pipe pipe;
6625 : uint32_t misccpctl;
6626 :
6627 0 : ilk_init_lp_watermarks(dev);
6628 :
6629 : /* WaSwitchSolVfFArbitrationPriority:bdw */
6630 0 : I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
6631 :
6632 : /* WaPsrDPAMaskVBlankInSRD:bdw */
6633 0 : I915_WRITE(CHICKEN_PAR1_1,
6634 : I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
6635 :
6636 : /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
6637 0 : for_each_pipe(dev_priv, pipe) {
6638 0 : I915_WRITE(CHICKEN_PIPESL_1(pipe),
6639 : I915_READ(CHICKEN_PIPESL_1(pipe)) |
6640 : BDW_DPRS_MASK_VBLANK_SRD);
6641 : }
6642 :
6643 : /* WaVSRefCountFullforceMissDisable:bdw */
6644 : /* WaDSRefCountFullforceMissDisable:bdw */
6645 0 : I915_WRITE(GEN7_FF_THREAD_MODE,
6646 : I915_READ(GEN7_FF_THREAD_MODE) &
6647 : ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
6648 :
6649 0 : I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
6650 : _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
6651 :
6652 : /* WaDisableSDEUnitClockGating:bdw */
6653 0 : I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
6654 : GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
6655 :
6656 : /*
6657 : * WaProgramL3SqcReg1Default:bdw
6658 : * WaTempDisableDOPClkGating:bdw
6659 : */
6660 0 : misccpctl = I915_READ(GEN7_MISCCPCTL);
6661 0 : I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
6662 0 : I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
6663 : /*
6664 : * Wait at least 100 clocks before re-enabling clock gating. See
6665 : * the definition of L3SQCREG1 in BSpec.
6666 : */
6667 0 : POSTING_READ(GEN8_L3SQCREG1);
6668 0 : udelay(1);
6669 0 : I915_WRITE(GEN7_MISCCPCTL, misccpctl);
6670 :
6671 : /*
6672 : * WaGttCachingOffByDefault:bdw
6673 : * GTT cache may not work with big pages, so if those
6674 : * are ever enabled GTT cache may need to be disabled.
6675 : */
6676 0 : I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
6677 :
6678 0 : lpt_init_clock_gating(dev);
6679 0 : }
6680 :
6681 0 : static void haswell_init_clock_gating(struct drm_device *dev)
6682 : {
6683 0 : struct drm_i915_private *dev_priv = dev->dev_private;
6684 :
6685 0 : ilk_init_lp_watermarks(dev);
6686 :
6687 : /* L3 caching of data atomics doesn't work -- disable it. */
6688 0 : I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
6689 0 : I915_WRITE(HSW_ROW_CHICKEN3,
6690 : _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
6691 :
6692 : /* This is required by WaCatErrorRejectionIssue:hsw */
6693 0 : I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
6694 : I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
6695 : GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
6696 :
6697 : /* WaVSRefCountFullforceMissDisable:hsw */
6698 0 : I915_WRITE(GEN7_FF_THREAD_MODE,
6699 : I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
6700 :
6701 : /* WaDisable_RenderCache_OperationalFlush:hsw */
6702 0 : I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6703 :
6704 : /* enable HiZ Raw Stall Optimization */
6705 0 : I915_WRITE(CACHE_MODE_0_GEN7,
6706 : _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
6707 :
6708 : /* WaDisable4x2SubspanOptimization:hsw */
6709 0 : I915_WRITE(CACHE_MODE_1,
6710 : _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
6711 :
6712 : /*
6713 : * BSpec recommends 8x4 when MSAA is used,
6714 : * however in practice 16x4 seems fastest.
6715 : *
6716 : * Note that PS/WM thread counts depend on the WIZ hashing
6717 : * disable bit, which we don't touch here, but it's good
6718 : * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6719 : */
6720 0 : I915_WRITE(GEN7_GT_MODE,
6721 : _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6722 :
6723 : /* WaSampleCChickenBitEnable:hsw */
6724 0 : I915_WRITE(HALF_SLICE_CHICKEN3,
6725 : _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
6726 :
6727 : /* WaSwitchSolVfFArbitrationPriority:hsw */
6728 0 : I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
6729 :
6730 : /* WaRsPkgCStateDisplayPMReq:hsw */
6731 0 : I915_WRITE(CHICKEN_PAR1_1,
6732 : I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES);
6733 :
6734 0 : lpt_init_clock_gating(dev);
6735 0 : }
6736 :
6737 0 : static void ivybridge_init_clock_gating(struct drm_device *dev)
6738 : {
6739 0 : struct drm_i915_private *dev_priv = dev->dev_private;
6740 : uint32_t snpcr;
6741 :
6742 0 : ilk_init_lp_watermarks(dev);
6743 :
6744 0 : I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
6745 :
6746 : /* WaDisableEarlyCull:ivb */
6747 0 : I915_WRITE(_3D_CHICKEN3,
6748 : _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
6749 :
6750 : /* WaDisableBackToBackFlipFix:ivb */
6751 0 : I915_WRITE(IVB_CHICKEN3,
6752 : CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
6753 : CHICKEN3_DGMG_DONE_FIX_DISABLE);
6754 :
6755 : /* WaDisablePSDDualDispatchEnable:ivb */
6756 0 : if (IS_IVB_GT1(dev))
6757 0 : I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
6758 : _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
6759 :
6760 : /* WaDisable_RenderCache_OperationalFlush:ivb */
6761 0 : I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6762 :
6763 : /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
6764 0 : I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
6765 : GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
6766 :
6767 : /* WaApplyL3ControlAndL3ChickenMode:ivb */
6768 0 : I915_WRITE(GEN7_L3CNTLREG1,
6769 : GEN7_WA_FOR_GEN7_L3_CONTROL);
6770 0 : I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
6771 : GEN7_WA_L3_CHICKEN_MODE);
6772 0 : if (IS_IVB_GT1(dev))
6773 0 : I915_WRITE(GEN7_ROW_CHICKEN2,
6774 : _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6775 : else {
6776 : /* must write both registers */
6777 0 : I915_WRITE(GEN7_ROW_CHICKEN2,
6778 : _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6779 0 : I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
6780 : _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6781 : }
6782 :
6783 : /* WaForceL3Serialization:ivb */
6784 0 : I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
6785 : ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
6786 :
6787 : /*
6788 : * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
6789 : * This implements the WaDisableRCZUnitClockGating:ivb workaround.
6790 : */
6791 0 : I915_WRITE(GEN6_UCGCTL2,
6792 : GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
6793 :
6794 : /* This is required by WaCatErrorRejectionIssue:ivb */
6795 0 : I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
6796 : I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
6797 : GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
6798 :
6799 0 : g4x_disable_trickle_feed(dev);
6800 :
6801 0 : gen7_setup_fixed_func_scheduler(dev_priv);
6802 :
6803 : if (0) { /* causes HiZ corruption on ivb:gt1 */
6804 : /* enable HiZ Raw Stall Optimization */
6805 : I915_WRITE(CACHE_MODE_0_GEN7,
6806 : _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
6807 : }
6808 :
6809 : /* WaDisable4x2SubspanOptimization:ivb */
6810 0 : I915_WRITE(CACHE_MODE_1,
6811 : _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
6812 :
6813 : /*
6814 : * BSpec recommends 8x4 when MSAA is used,
6815 : * however in practice 16x4 seems fastest.
6816 : *
6817 : * Note that PS/WM thread counts depend on the WIZ hashing
6818 : * disable bit, which we don't touch here, but it's good
6819 : * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6820 : */
6821 0 : I915_WRITE(GEN7_GT_MODE,
6822 : _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6823 :
6824 0 : snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
6825 0 : snpcr &= ~GEN6_MBC_SNPCR_MASK;
6826 0 : snpcr |= GEN6_MBC_SNPCR_MED;
6827 0 : I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
6828 :
6829 0 : if (!HAS_PCH_NOP(dev))
6830 0 : cpt_init_clock_gating(dev);
6831 :
6832 0 : gen6_check_mch_setup(dev);
6833 0 : }
6834 :
6835 0 : static void vlv_init_display_clock_gating(struct drm_i915_private *dev_priv)
6836 : {
6837 : u32 val;
6838 :
6839 : /*
6840 : * On driver load, a pipe may be active and driving a DSI display.
6841 : * Preserve DPOUNIT_CLOCK_GATE_DISABLE to avoid the pipe getting stuck
6842 : * (and never recovering) in this case. intel_dsi_post_disable() will
6843 : * clear it when we turn off the display.
6844 : */
6845 0 : val = I915_READ(DSPCLK_GATE_D);
6846 0 : val &= DPOUNIT_CLOCK_GATE_DISABLE;
6847 0 : val |= VRHUNIT_CLOCK_GATE_DISABLE;
6848 0 : I915_WRITE(DSPCLK_GATE_D, val);
6849 :
6850 : /*
6851 : * Disable trickle feed and enable pnd deadline calculation
6852 : */
6853 0 : I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
6854 0 : I915_WRITE(CBR1_VLV, 0);
6855 0 : }
6856 :
6857 0 : static void valleyview_init_clock_gating(struct drm_device *dev)
6858 : {
6859 0 : struct drm_i915_private *dev_priv = dev->dev_private;
6860 :
6861 0 : vlv_init_display_clock_gating(dev_priv);
6862 :
6863 : /* WaDisableEarlyCull:vlv */
6864 0 : I915_WRITE(_3D_CHICKEN3,
6865 : _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
6866 :
6867 : /* WaDisableBackToBackFlipFix:vlv */
6868 0 : I915_WRITE(IVB_CHICKEN3,
6869 : CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
6870 : CHICKEN3_DGMG_DONE_FIX_DISABLE);
6871 :
6872 : /* WaPsdDispatchEnable:vlv */
6873 : /* WaDisablePSDDualDispatchEnable:vlv */
6874 0 : I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
6875 : _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
6876 : GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
6877 :
6878 : /* WaDisable_RenderCache_OperationalFlush:vlv */
6879 0 : I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6880 :
6881 : /* WaForceL3Serialization:vlv */
6882 0 : I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
6883 : ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
6884 :
6885 : /* WaDisableDopClockGating:vlv */
6886 0 : I915_WRITE(GEN7_ROW_CHICKEN2,
6887 : _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6888 :
6889 : /* This is required by WaCatErrorRejectionIssue:vlv */
6890 0 : I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
6891 : I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
6892 : GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
6893 :
6894 0 : gen7_setup_fixed_func_scheduler(dev_priv);
6895 :
6896 : /*
6897 : * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
6898 : * This implements the WaDisableRCZUnitClockGating:vlv workaround.
6899 : */
6900 0 : I915_WRITE(GEN6_UCGCTL2,
6901 : GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
6902 :
6903 : /* WaDisableL3Bank2xClockGate:vlv
6904 : * Disabling L3 clock gating- MMIO 940c[25] = 1
6905 : * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
6906 0 : I915_WRITE(GEN7_UCGCTL4,
6907 : I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
6908 :
6909 : /*
6910 : * BSpec says this must be set, even though
6911 : * WaDisable4x2SubspanOptimization isn't listed for VLV.
6912 : */
6913 0 : I915_WRITE(CACHE_MODE_1,
6914 : _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
6915 :
6916 : /*
6917 : * BSpec recommends 8x4 when MSAA is used,
6918 : * however in practice 16x4 seems fastest.
6919 : *
6920 : * Note that PS/WM thread counts depend on the WIZ hashing
6921 : * disable bit, which we don't touch here, but it's good
6922 : * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6923 : */
6924 0 : I915_WRITE(GEN7_GT_MODE,
6925 : _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6926 :
6927 : /*
6928 : * WaIncreaseL3CreditsForVLVB0:vlv
6929 : * This is the hardware default actually.
6930 : */
6931 0 : I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
6932 :
6933 : /*
6934 : * WaDisableVLVClockGating_VBIIssue:vlv
6935 : * Disable clock gating on th GCFG unit to prevent a delay
6936 : * in the reporting of vblank events.
6937 : */
6938 0 : I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
6939 0 : }
6940 :
6941 0 : static void cherryview_init_clock_gating(struct drm_device *dev)
6942 : {
6943 0 : struct drm_i915_private *dev_priv = dev->dev_private;
6944 :
6945 0 : vlv_init_display_clock_gating(dev_priv);
6946 :
6947 : /* WaVSRefCountFullforceMissDisable:chv */
6948 : /* WaDSRefCountFullforceMissDisable:chv */
6949 0 : I915_WRITE(GEN7_FF_THREAD_MODE,
6950 : I915_READ(GEN7_FF_THREAD_MODE) &
6951 : ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
6952 :
6953 : /* WaDisableSemaphoreAndSyncFlipWait:chv */
6954 0 : I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
6955 : _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
6956 :
6957 : /* WaDisableCSUnitClockGating:chv */
6958 0 : I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
6959 : GEN6_CSUNIT_CLOCK_GATE_DISABLE);
6960 :
6961 : /* WaDisableSDEUnitClockGating:chv */
6962 0 : I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
6963 : GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
6964 :
6965 : /*
6966 : * GTT cache may not work with big pages, so if those
6967 : * are ever enabled GTT cache may need to be disabled.
6968 : */
6969 0 : I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
6970 0 : }
6971 :
6972 0 : static void g4x_init_clock_gating(struct drm_device *dev)
6973 : {
6974 0 : struct drm_i915_private *dev_priv = dev->dev_private;
6975 : uint32_t dspclk_gate;
6976 :
6977 0 : I915_WRITE(RENCLK_GATE_D1, 0);
6978 0 : I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
6979 : GS_UNIT_CLOCK_GATE_DISABLE |
6980 : CL_UNIT_CLOCK_GATE_DISABLE);
6981 0 : I915_WRITE(RAMCLK_GATE_D, 0);
6982 : dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
6983 : OVRUNIT_CLOCK_GATE_DISABLE |
6984 : OVCUNIT_CLOCK_GATE_DISABLE;
6985 0 : if (IS_GM45(dev))
6986 0 : dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
6987 0 : I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
6988 :
6989 : /* WaDisableRenderCachePipelinedFlush */
6990 0 : I915_WRITE(CACHE_MODE_0,
6991 : _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
6992 :
6993 : /* WaDisable_RenderCache_OperationalFlush:g4x */
6994 0 : I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6995 :
6996 0 : g4x_disable_trickle_feed(dev);
6997 0 : }
6998 :
6999 0 : static void crestline_init_clock_gating(struct drm_device *dev)
7000 : {
7001 0 : struct drm_i915_private *dev_priv = dev->dev_private;
7002 :
7003 0 : I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
7004 0 : I915_WRITE(RENCLK_GATE_D2, 0);
7005 0 : I915_WRITE(DSPCLK_GATE_D, 0);
7006 0 : I915_WRITE(RAMCLK_GATE_D, 0);
7007 0 : I915_WRITE16(DEUC, 0);
7008 0 : I915_WRITE(MI_ARB_STATE,
7009 : _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
7010 :
7011 : /* WaDisable_RenderCache_OperationalFlush:gen4 */
7012 0 : I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7013 0 : }
7014 :
7015 0 : static void broadwater_init_clock_gating(struct drm_device *dev)
7016 : {
7017 0 : struct drm_i915_private *dev_priv = dev->dev_private;
7018 :
7019 0 : I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
7020 : I965_RCC_CLOCK_GATE_DISABLE |
7021 : I965_RCPB_CLOCK_GATE_DISABLE |
7022 : I965_ISC_CLOCK_GATE_DISABLE |
7023 : I965_FBC_CLOCK_GATE_DISABLE);
7024 0 : I915_WRITE(RENCLK_GATE_D2, 0);
7025 0 : I915_WRITE(MI_ARB_STATE,
7026 : _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
7027 :
7028 : /* WaDisable_RenderCache_OperationalFlush:gen4 */
7029 0 : I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7030 0 : }
7031 :
7032 0 : static void gen3_init_clock_gating(struct drm_device *dev)
7033 : {
7034 0 : struct drm_i915_private *dev_priv = dev->dev_private;
7035 0 : u32 dstate = I915_READ(D_STATE);
7036 :
7037 0 : dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
7038 : DSTATE_DOT_CLOCK_GATING;
7039 0 : I915_WRITE(D_STATE, dstate);
7040 :
7041 0 : if (IS_PINEVIEW(dev))
7042 0 : I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
7043 :
7044 : /* IIR "flip pending" means done if this bit is set */
7045 0 : I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
7046 :
7047 : /* interrupts should cause a wake up from C3 */
7048 0 : I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN));
7049 :
7050 : /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
7051 0 : I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
7052 :
7053 0 : I915_WRITE(MI_ARB_STATE,
7054 : _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
7055 0 : }
7056 :
7057 0 : static void i85x_init_clock_gating(struct drm_device *dev)
7058 : {
7059 0 : struct drm_i915_private *dev_priv = dev->dev_private;
7060 :
7061 0 : I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
7062 :
7063 : /* interrupts should cause a wake up from C3 */
7064 0 : I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
7065 : _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
7066 :
7067 0 : I915_WRITE(MEM_MODE,
7068 : _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE));
7069 0 : }
7070 :
7071 0 : static void i830_init_clock_gating(struct drm_device *dev)
7072 : {
7073 0 : struct drm_i915_private *dev_priv = dev->dev_private;
7074 :
7075 0 : I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE);
7076 :
7077 0 : I915_WRITE(MEM_MODE,
7078 : _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) |
7079 : _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE));
7080 0 : }
7081 :
7082 0 : void intel_init_clock_gating(struct drm_device *dev)
7083 : {
7084 0 : struct drm_i915_private *dev_priv = dev->dev_private;
7085 :
7086 0 : if (dev_priv->display.init_clock_gating)
7087 0 : dev_priv->display.init_clock_gating(dev);
7088 0 : }
7089 :
7090 0 : void intel_suspend_hw(struct drm_device *dev)
7091 : {
7092 0 : if (HAS_PCH_LPT(dev))
7093 0 : lpt_suspend_hw(dev);
7094 0 : }
7095 :
7096 : /* Set up chip specific power management-related functions */
7097 0 : void intel_init_pm(struct drm_device *dev)
7098 : {
7099 0 : struct drm_i915_private *dev_priv = dev->dev_private;
7100 :
7101 0 : intel_fbc_init(dev_priv);
7102 :
7103 : /* For cxsr */
7104 0 : if (IS_PINEVIEW(dev))
7105 0 : i915_pineview_get_mem_freq(dev);
7106 0 : else if (IS_GEN5(dev))
7107 0 : i915_ironlake_get_mem_freq(dev);
7108 :
7109 : /* For FIFO watermark updates */
7110 0 : if (INTEL_INFO(dev)->gen >= 9) {
7111 0 : skl_setup_wm_latency(dev);
7112 :
7113 0 : if (IS_BROXTON(dev))
7114 0 : dev_priv->display.init_clock_gating =
7115 : bxt_init_clock_gating;
7116 0 : dev_priv->display.update_wm = skl_update_wm;
7117 0 : dev_priv->display.update_sprite_wm = skl_update_sprite_wm;
7118 0 : } else if (HAS_PCH_SPLIT(dev)) {
7119 0 : ilk_setup_wm_latency(dev);
7120 :
7121 0 : if ((IS_GEN5(dev) && dev_priv->wm.pri_latency[1] &&
7122 0 : dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) ||
7123 0 : (!IS_GEN5(dev) && dev_priv->wm.pri_latency[0] &&
7124 0 : dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
7125 0 : dev_priv->display.update_wm = ilk_update_wm;
7126 0 : dev_priv->display.update_sprite_wm = ilk_update_sprite_wm;
7127 0 : } else {
7128 : DRM_DEBUG_KMS("Failed to read display plane latency. "
7129 : "Disable CxSR\n");
7130 : }
7131 :
7132 0 : if (IS_GEN5(dev))
7133 0 : dev_priv->display.init_clock_gating = ironlake_init_clock_gating;
7134 0 : else if (IS_GEN6(dev))
7135 0 : dev_priv->display.init_clock_gating = gen6_init_clock_gating;
7136 0 : else if (IS_IVYBRIDGE(dev))
7137 0 : dev_priv->display.init_clock_gating = ivybridge_init_clock_gating;
7138 0 : else if (IS_HASWELL(dev))
7139 0 : dev_priv->display.init_clock_gating = haswell_init_clock_gating;
7140 0 : else if (INTEL_INFO(dev)->gen == 8)
7141 0 : dev_priv->display.init_clock_gating = broadwell_init_clock_gating;
7142 0 : } else if (IS_CHERRYVIEW(dev)) {
7143 0 : vlv_setup_wm_latency(dev);
7144 :
7145 0 : dev_priv->display.update_wm = vlv_update_wm;
7146 0 : dev_priv->display.init_clock_gating =
7147 : cherryview_init_clock_gating;
7148 0 : } else if (IS_VALLEYVIEW(dev)) {
7149 0 : vlv_setup_wm_latency(dev);
7150 :
7151 0 : dev_priv->display.update_wm = vlv_update_wm;
7152 0 : dev_priv->display.init_clock_gating =
7153 : valleyview_init_clock_gating;
7154 0 : } else if (IS_PINEVIEW(dev)) {
7155 0 : if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev),
7156 0 : dev_priv->is_ddr3,
7157 0 : dev_priv->fsb_freq,
7158 0 : dev_priv->mem_freq)) {
7159 : DRM_INFO("failed to find known CxSR latency "
7160 : "(found ddr%s fsb freq %d, mem freq %d), "
7161 : "disabling CxSR\n",
7162 : (dev_priv->is_ddr3 == 1) ? "3" : "2",
7163 : dev_priv->fsb_freq, dev_priv->mem_freq);
7164 : /* Disable CxSR and never update its watermark again */
7165 0 : intel_set_memory_cxsr(dev_priv, false);
7166 0 : dev_priv->display.update_wm = NULL;
7167 0 : } else
7168 0 : dev_priv->display.update_wm = pineview_update_wm;
7169 0 : dev_priv->display.init_clock_gating = gen3_init_clock_gating;
7170 0 : } else if (IS_G4X(dev)) {
7171 0 : dev_priv->display.update_wm = g4x_update_wm;
7172 0 : dev_priv->display.init_clock_gating = g4x_init_clock_gating;
7173 0 : } else if (IS_GEN4(dev)) {
7174 0 : dev_priv->display.update_wm = i965_update_wm;
7175 0 : if (IS_CRESTLINE(dev))
7176 0 : dev_priv->display.init_clock_gating = crestline_init_clock_gating;
7177 0 : else if (IS_BROADWATER(dev))
7178 0 : dev_priv->display.init_clock_gating = broadwater_init_clock_gating;
7179 0 : } else if (IS_GEN3(dev)) {
7180 0 : dev_priv->display.update_wm = i9xx_update_wm;
7181 0 : dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
7182 0 : dev_priv->display.init_clock_gating = gen3_init_clock_gating;
7183 0 : } else if (IS_GEN2(dev)) {
7184 0 : if (INTEL_INFO(dev)->num_pipes == 1) {
7185 0 : dev_priv->display.update_wm = i845_update_wm;
7186 0 : dev_priv->display.get_fifo_size = i845_get_fifo_size;
7187 0 : } else {
7188 0 : dev_priv->display.update_wm = i9xx_update_wm;
7189 0 : dev_priv->display.get_fifo_size = i830_get_fifo_size;
7190 : }
7191 :
7192 0 : if (IS_I85X(dev) || IS_I865G(dev))
7193 0 : dev_priv->display.init_clock_gating = i85x_init_clock_gating;
7194 : else
7195 0 : dev_priv->display.init_clock_gating = i830_init_clock_gating;
7196 : } else {
7197 0 : DRM_ERROR("unexpected fall-through in intel_init_pm\n");
7198 : }
7199 0 : }
7200 :
7201 0 : int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
7202 : {
7203 0 : WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
7204 :
7205 0 : if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
7206 : DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n");
7207 0 : return -EAGAIN;
7208 : }
7209 :
7210 0 : I915_WRITE(GEN6_PCODE_DATA, *val);
7211 0 : I915_WRITE(GEN6_PCODE_DATA1, 0);
7212 0 : I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
7213 :
7214 0 : if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
7215 : 500)) {
7216 0 : DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox);
7217 0 : return -ETIMEDOUT;
7218 : }
7219 :
7220 0 : *val = I915_READ(GEN6_PCODE_DATA);
7221 0 : I915_WRITE(GEN6_PCODE_DATA, 0);
7222 :
7223 0 : return 0;
7224 0 : }
7225 :
7226 0 : int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val)
7227 : {
7228 0 : WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
7229 :
7230 0 : if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
7231 : DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n");
7232 0 : return -EAGAIN;
7233 : }
7234 :
7235 0 : I915_WRITE(GEN6_PCODE_DATA, val);
7236 0 : I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
7237 :
7238 0 : if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
7239 : 500)) {
7240 0 : DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox);
7241 0 : return -ETIMEDOUT;
7242 : }
7243 :
7244 0 : I915_WRITE(GEN6_PCODE_DATA, 0);
7245 :
7246 0 : return 0;
7247 0 : }
7248 :
7249 0 : static int vlv_gpu_freq_div(unsigned int czclk_freq)
7250 : {
7251 0 : switch (czclk_freq) {
7252 : case 200:
7253 0 : return 10;
7254 : case 267:
7255 0 : return 12;
7256 : case 320:
7257 : case 333:
7258 0 : return 16;
7259 : case 400:
7260 0 : return 20;
7261 : default:
7262 0 : return -1;
7263 : }
7264 0 : }
7265 :
7266 0 : static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
7267 : {
7268 0 : int div, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000);
7269 :
7270 0 : div = vlv_gpu_freq_div(czclk_freq);
7271 0 : if (div < 0)
7272 0 : return div;
7273 :
7274 0 : return DIV_ROUND_CLOSEST(czclk_freq * (val + 6 - 0xbd), div);
7275 0 : }
7276 :
7277 0 : static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
7278 : {
7279 0 : int mul, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000);
7280 :
7281 0 : mul = vlv_gpu_freq_div(czclk_freq);
7282 0 : if (mul < 0)
7283 0 : return mul;
7284 :
7285 0 : return DIV_ROUND_CLOSEST(mul * val, czclk_freq) + 0xbd - 6;
7286 0 : }
7287 :
7288 0 : static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
7289 : {
7290 0 : int div, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000);
7291 :
7292 0 : div = vlv_gpu_freq_div(czclk_freq) / 2;
7293 0 : if (div < 0)
7294 0 : return div;
7295 :
7296 0 : return DIV_ROUND_CLOSEST(czclk_freq * val, 2 * div) / 2;
7297 0 : }
7298 :
7299 0 : static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
7300 : {
7301 0 : int mul, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000);
7302 :
7303 0 : mul = vlv_gpu_freq_div(czclk_freq) / 2;
7304 0 : if (mul < 0)
7305 0 : return mul;
7306 :
7307 : /* CHV needs even values */
7308 0 : return DIV_ROUND_CLOSEST(val * 2 * mul, czclk_freq) * 2;
7309 0 : }
7310 :
7311 0 : int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
7312 : {
7313 0 : if (IS_GEN9(dev_priv->dev))
7314 0 : return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
7315 : GEN9_FREQ_SCALER);
7316 0 : else if (IS_CHERRYVIEW(dev_priv->dev))
7317 0 : return chv_gpu_freq(dev_priv, val);
7318 0 : else if (IS_VALLEYVIEW(dev_priv->dev))
7319 0 : return byt_gpu_freq(dev_priv, val);
7320 : else
7321 0 : return val * GT_FREQUENCY_MULTIPLIER;
7322 0 : }
7323 :
7324 0 : int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
7325 : {
7326 0 : if (IS_GEN9(dev_priv->dev))
7327 0 : return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
7328 : GT_FREQUENCY_MULTIPLIER);
7329 0 : else if (IS_CHERRYVIEW(dev_priv->dev))
7330 0 : return chv_freq_opcode(dev_priv, val);
7331 0 : else if (IS_VALLEYVIEW(dev_priv->dev))
7332 0 : return byt_freq_opcode(dev_priv, val);
7333 : else
7334 0 : return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
7335 0 : }
7336 :
7337 : struct request_boost {
7338 : struct work_struct work;
7339 : struct drm_i915_gem_request *req;
7340 : };
7341 :
7342 0 : static void __intel_rps_boost_work(struct work_struct *work)
7343 : {
7344 0 : struct request_boost *boost = container_of(work, struct request_boost, work);
7345 0 : struct drm_i915_gem_request *req = boost->req;
7346 :
7347 0 : if (!i915_gem_request_completed(req, true))
7348 0 : gen6_rps_boost(to_i915(req->ring->dev), NULL,
7349 0 : req->emitted_jiffies);
7350 :
7351 0 : i915_gem_request_unreference__unlocked(req);
7352 0 : kfree(boost);
7353 0 : }
7354 :
7355 0 : void intel_queue_rps_boost_for_request(struct drm_device *dev,
7356 : struct drm_i915_gem_request *req)
7357 : {
7358 : struct request_boost *boost;
7359 :
7360 0 : if (req == NULL || INTEL_INFO(dev)->gen < 6)
7361 0 : return;
7362 :
7363 0 : if (i915_gem_request_completed(req, true))
7364 0 : return;
7365 :
7366 0 : boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
7367 0 : if (boost == NULL)
7368 0 : return;
7369 :
7370 0 : i915_gem_request_reference(req);
7371 0 : boost->req = req;
7372 :
7373 0 : INIT_WORK(&boost->work, __intel_rps_boost_work);
7374 0 : queue_work(to_i915(dev)->wq, &boost->work);
7375 0 : }
7376 :
7377 0 : void intel_pm_setup(struct drm_device *dev)
7378 : {
7379 0 : struct drm_i915_private *dev_priv = dev->dev_private;
7380 :
7381 0 : rw_init(&dev_priv->rps.hw_lock, "rpshw");
7382 0 : mtx_init(&dev_priv->rps.client_lock, IPL_NONE);
7383 :
7384 0 : INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
7385 : intel_gen6_powersave_work);
7386 0 : INIT_LIST_HEAD(&dev_priv->rps.clients);
7387 0 : INIT_LIST_HEAD(&dev_priv->rps.semaphores.link);
7388 0 : INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link);
7389 :
7390 0 : dev_priv->pm.suspended = false;
7391 0 : }
|