Line data Source code
1 : /*
2 : * Copyright 2010 Advanced Micro Devices, Inc.
3 : * Copyright 2008 Red Hat Inc.
4 : * Copyright 2009 Jerome Glisse.
5 : *
6 : * Permission is hereby granted, free of charge, to any person obtaining a
7 : * copy of this software and associated documentation files (the "Software"),
8 : * to deal in the Software without restriction, including without limitation
9 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 : * and/or sell copies of the Software, and to permit persons to whom the
11 : * Software is furnished to do so, subject to the following conditions:
12 : *
13 : * The above copyright notice and this permission notice shall be included in
14 : * all copies or substantial portions of the Software.
15 : *
16 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 : * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 : * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 : * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 : * OTHER DEALINGS IN THE SOFTWARE.
23 : *
24 : * Authors: Dave Airlie
25 : * Alex Deucher
26 : * Jerome Glisse
27 : */
28 : #include <dev/pci/drm/drmP.h>
29 : #include "radeon.h"
30 : #include "evergreend.h"
31 : #include "evergreen_reg_safe.h"
32 : #include "cayman_reg_safe.h"
33 :
34 : #define MAX(a,b) (((a)>(b))?(a):(b))
35 : #define MIN(a,b) (((a)<(b))?(a):(b))
36 :
37 : #define REG_SAFE_BM_SIZE ARRAY_SIZE(evergreen_reg_safe_bm)
38 :
39 : int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
40 : struct radeon_bo_list **cs_reloc);
41 : struct evergreen_cs_track {
42 : u32 group_size;
43 : u32 nbanks;
44 : u32 npipes;
45 : u32 row_size;
46 : /* value we track */
47 : u32 nsamples; /* unused */
48 : struct radeon_bo *cb_color_bo[12];
49 : u32 cb_color_bo_offset[12];
50 : struct radeon_bo *cb_color_fmask_bo[8]; /* unused */
51 : struct radeon_bo *cb_color_cmask_bo[8]; /* unused */
52 : u32 cb_color_info[12];
53 : u32 cb_color_view[12];
54 : u32 cb_color_pitch[12];
55 : u32 cb_color_slice[12];
56 : u32 cb_color_slice_idx[12];
57 : u32 cb_color_attrib[12];
58 : u32 cb_color_cmask_slice[8];/* unused */
59 : u32 cb_color_fmask_slice[8];/* unused */
60 : u32 cb_target_mask;
61 : u32 cb_shader_mask; /* unused */
62 : u32 vgt_strmout_config;
63 : u32 vgt_strmout_buffer_config;
64 : struct radeon_bo *vgt_strmout_bo[4];
65 : u32 vgt_strmout_bo_offset[4];
66 : u32 vgt_strmout_size[4];
67 : u32 db_depth_control;
68 : u32 db_depth_view;
69 : u32 db_depth_slice;
70 : u32 db_depth_size;
71 : u32 db_z_info;
72 : u32 db_z_read_offset;
73 : u32 db_z_write_offset;
74 : struct radeon_bo *db_z_read_bo;
75 : struct radeon_bo *db_z_write_bo;
76 : u32 db_s_info;
77 : u32 db_s_read_offset;
78 : u32 db_s_write_offset;
79 : struct radeon_bo *db_s_read_bo;
80 : struct radeon_bo *db_s_write_bo;
81 : bool sx_misc_kill_all_prims;
82 : bool cb_dirty;
83 : bool db_dirty;
84 : bool streamout_dirty;
85 : u32 htile_offset;
86 : u32 htile_surface;
87 : struct radeon_bo *htile_bo;
88 : unsigned long indirect_draw_buffer_size;
89 : const unsigned *reg_safe_bm;
90 : };
91 :
92 0 : static u32 evergreen_cs_get_aray_mode(u32 tiling_flags)
93 : {
94 0 : if (tiling_flags & RADEON_TILING_MACRO)
95 0 : return ARRAY_2D_TILED_THIN1;
96 0 : else if (tiling_flags & RADEON_TILING_MICRO)
97 0 : return ARRAY_1D_TILED_THIN1;
98 : else
99 0 : return ARRAY_LINEAR_GENERAL;
100 0 : }
101 :
102 0 : static u32 evergreen_cs_get_num_banks(u32 nbanks)
103 : {
104 0 : switch (nbanks) {
105 : case 2:
106 0 : return ADDR_SURF_2_BANK;
107 : case 4:
108 0 : return ADDR_SURF_4_BANK;
109 : case 8:
110 : default:
111 0 : return ADDR_SURF_8_BANK;
112 : case 16:
113 0 : return ADDR_SURF_16_BANK;
114 : }
115 0 : }
116 :
117 0 : static void evergreen_cs_track_init(struct evergreen_cs_track *track)
118 : {
119 : int i;
120 :
121 0 : for (i = 0; i < 8; i++) {
122 0 : track->cb_color_fmask_bo[i] = NULL;
123 0 : track->cb_color_cmask_bo[i] = NULL;
124 0 : track->cb_color_cmask_slice[i] = 0;
125 0 : track->cb_color_fmask_slice[i] = 0;
126 : }
127 :
128 0 : for (i = 0; i < 12; i++) {
129 0 : track->cb_color_bo[i] = NULL;
130 0 : track->cb_color_bo_offset[i] = 0xFFFFFFFF;
131 0 : track->cb_color_info[i] = 0;
132 0 : track->cb_color_view[i] = 0xFFFFFFFF;
133 0 : track->cb_color_pitch[i] = 0;
134 0 : track->cb_color_slice[i] = 0xfffffff;
135 0 : track->cb_color_slice_idx[i] = 0;
136 : }
137 0 : track->cb_target_mask = 0xFFFFFFFF;
138 0 : track->cb_shader_mask = 0xFFFFFFFF;
139 0 : track->cb_dirty = true;
140 :
141 0 : track->db_depth_slice = 0xffffffff;
142 0 : track->db_depth_view = 0xFFFFC000;
143 0 : track->db_depth_size = 0xFFFFFFFF;
144 0 : track->db_depth_control = 0xFFFFFFFF;
145 0 : track->db_z_info = 0xFFFFFFFF;
146 0 : track->db_z_read_offset = 0xFFFFFFFF;
147 0 : track->db_z_write_offset = 0xFFFFFFFF;
148 0 : track->db_z_read_bo = NULL;
149 0 : track->db_z_write_bo = NULL;
150 0 : track->db_s_info = 0xFFFFFFFF;
151 0 : track->db_s_read_offset = 0xFFFFFFFF;
152 0 : track->db_s_write_offset = 0xFFFFFFFF;
153 0 : track->db_s_read_bo = NULL;
154 0 : track->db_s_write_bo = NULL;
155 0 : track->db_dirty = true;
156 0 : track->htile_bo = NULL;
157 0 : track->htile_offset = 0xFFFFFFFF;
158 0 : track->htile_surface = 0;
159 :
160 0 : for (i = 0; i < 4; i++) {
161 0 : track->vgt_strmout_size[i] = 0;
162 0 : track->vgt_strmout_bo[i] = NULL;
163 0 : track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF;
164 : }
165 0 : track->streamout_dirty = true;
166 0 : track->sx_misc_kill_all_prims = false;
167 0 : }
168 :
169 : struct eg_surface {
170 : /* value gathered from cs */
171 : unsigned nbx;
172 : unsigned nby;
173 : unsigned format;
174 : unsigned mode;
175 : unsigned nbanks;
176 : unsigned bankw;
177 : unsigned bankh;
178 : unsigned tsplit;
179 : unsigned mtilea;
180 : unsigned nsamples;
181 : /* output value */
182 : unsigned bpe;
183 : unsigned layer_size;
184 : unsigned palign;
185 : unsigned halign;
186 : unsigned long base_align;
187 : };
188 :
189 0 : static int evergreen_surface_check_linear(struct radeon_cs_parser *p,
190 : struct eg_surface *surf,
191 : const char *prefix)
192 : {
193 0 : surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
194 0 : surf->base_align = surf->bpe;
195 0 : surf->palign = 1;
196 0 : surf->halign = 1;
197 0 : return 0;
198 : }
199 :
200 0 : static int evergreen_surface_check_linear_aligned(struct radeon_cs_parser *p,
201 : struct eg_surface *surf,
202 : const char *prefix)
203 : {
204 0 : struct evergreen_cs_track *track = p->track;
205 : unsigned palign;
206 :
207 0 : palign = MAX(64, track->group_size / surf->bpe);
208 0 : surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
209 0 : surf->base_align = track->group_size;
210 0 : surf->palign = palign;
211 0 : surf->halign = 1;
212 0 : if (surf->nbx & (palign - 1)) {
213 0 : if (prefix) {
214 0 : dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
215 : __func__, __LINE__, prefix, surf->nbx, palign);
216 0 : }
217 0 : return -EINVAL;
218 : }
219 0 : return 0;
220 0 : }
221 :
222 0 : static int evergreen_surface_check_1d(struct radeon_cs_parser *p,
223 : struct eg_surface *surf,
224 : const char *prefix)
225 : {
226 0 : struct evergreen_cs_track *track = p->track;
227 : unsigned palign;
228 :
229 0 : palign = track->group_size / (8 * surf->bpe * surf->nsamples);
230 0 : palign = MAX(8, palign);
231 0 : surf->layer_size = surf->nbx * surf->nby * surf->bpe;
232 0 : surf->base_align = track->group_size;
233 0 : surf->palign = palign;
234 0 : surf->halign = 8;
235 0 : if ((surf->nbx & (palign - 1))) {
236 0 : if (prefix) {
237 0 : dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d (%d %d %d)\n",
238 : __func__, __LINE__, prefix, surf->nbx, palign,
239 : track->group_size, surf->bpe, surf->nsamples);
240 0 : }
241 0 : return -EINVAL;
242 : }
243 0 : if ((surf->nby & (8 - 1))) {
244 0 : if (prefix) {
245 0 : dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with 8\n",
246 : __func__, __LINE__, prefix, surf->nby);
247 0 : }
248 0 : return -EINVAL;
249 : }
250 0 : return 0;
251 0 : }
252 :
253 0 : static int evergreen_surface_check_2d(struct radeon_cs_parser *p,
254 : struct eg_surface *surf,
255 : const char *prefix)
256 : {
257 0 : struct evergreen_cs_track *track = p->track;
258 : unsigned palign, halign, tileb, slice_pt;
259 : unsigned mtile_pr, mtile_ps, mtileb;
260 :
261 0 : tileb = 64 * surf->bpe * surf->nsamples;
262 : slice_pt = 1;
263 0 : if (tileb > surf->tsplit) {
264 0 : slice_pt = tileb / surf->tsplit;
265 0 : }
266 0 : tileb = tileb / slice_pt;
267 : /* macro tile width & height */
268 0 : palign = (8 * surf->bankw * track->npipes) * surf->mtilea;
269 0 : halign = (8 * surf->bankh * surf->nbanks) / surf->mtilea;
270 0 : mtileb = (palign / 8) * (halign / 8) * tileb;
271 0 : mtile_pr = surf->nbx / palign;
272 0 : mtile_ps = (mtile_pr * surf->nby) / halign;
273 0 : surf->layer_size = mtile_ps * mtileb * slice_pt;
274 0 : surf->base_align = (palign / 8) * (halign / 8) * tileb;
275 0 : surf->palign = palign;
276 0 : surf->halign = halign;
277 :
278 0 : if ((surf->nbx & (palign - 1))) {
279 0 : if (prefix) {
280 0 : dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
281 : __func__, __LINE__, prefix, surf->nbx, palign);
282 0 : }
283 0 : return -EINVAL;
284 : }
285 0 : if ((surf->nby & (halign - 1))) {
286 0 : if (prefix) {
287 0 : dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with %d\n",
288 : __func__, __LINE__, prefix, surf->nby, halign);
289 0 : }
290 0 : return -EINVAL;
291 : }
292 :
293 0 : return 0;
294 0 : }
295 :
296 0 : static int evergreen_surface_check(struct radeon_cs_parser *p,
297 : struct eg_surface *surf,
298 : const char *prefix)
299 : {
300 : /* some common value computed here */
301 0 : surf->bpe = r600_fmt_get_blocksize(surf->format);
302 :
303 0 : switch (surf->mode) {
304 : case ARRAY_LINEAR_GENERAL:
305 0 : return evergreen_surface_check_linear(p, surf, prefix);
306 : case ARRAY_LINEAR_ALIGNED:
307 0 : return evergreen_surface_check_linear_aligned(p, surf, prefix);
308 : case ARRAY_1D_TILED_THIN1:
309 0 : return evergreen_surface_check_1d(p, surf, prefix);
310 : case ARRAY_2D_TILED_THIN1:
311 0 : return evergreen_surface_check_2d(p, surf, prefix);
312 : default:
313 0 : dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
314 : __func__, __LINE__, prefix, surf->mode);
315 0 : return -EINVAL;
316 : }
317 : return -EINVAL;
318 0 : }
319 :
320 0 : static int evergreen_surface_value_conv_check(struct radeon_cs_parser *p,
321 : struct eg_surface *surf,
322 : const char *prefix)
323 : {
324 0 : switch (surf->mode) {
325 : case ARRAY_2D_TILED_THIN1:
326 : break;
327 : case ARRAY_LINEAR_GENERAL:
328 : case ARRAY_LINEAR_ALIGNED:
329 : case ARRAY_1D_TILED_THIN1:
330 0 : return 0;
331 : default:
332 0 : dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
333 : __func__, __LINE__, prefix, surf->mode);
334 0 : return -EINVAL;
335 : }
336 :
337 0 : switch (surf->nbanks) {
338 0 : case 0: surf->nbanks = 2; break;
339 0 : case 1: surf->nbanks = 4; break;
340 0 : case 2: surf->nbanks = 8; break;
341 0 : case 3: surf->nbanks = 16; break;
342 : default:
343 0 : dev_warn(p->dev, "%s:%d %s invalid number of banks %d\n",
344 : __func__, __LINE__, prefix, surf->nbanks);
345 0 : return -EINVAL;
346 : }
347 0 : switch (surf->bankw) {
348 0 : case 0: surf->bankw = 1; break;
349 0 : case 1: surf->bankw = 2; break;
350 0 : case 2: surf->bankw = 4; break;
351 0 : case 3: surf->bankw = 8; break;
352 : default:
353 0 : dev_warn(p->dev, "%s:%d %s invalid bankw %d\n",
354 : __func__, __LINE__, prefix, surf->bankw);
355 0 : return -EINVAL;
356 : }
357 0 : switch (surf->bankh) {
358 0 : case 0: surf->bankh = 1; break;
359 0 : case 1: surf->bankh = 2; break;
360 0 : case 2: surf->bankh = 4; break;
361 0 : case 3: surf->bankh = 8; break;
362 : default:
363 0 : dev_warn(p->dev, "%s:%d %s invalid bankh %d\n",
364 : __func__, __LINE__, prefix, surf->bankh);
365 0 : return -EINVAL;
366 : }
367 0 : switch (surf->mtilea) {
368 0 : case 0: surf->mtilea = 1; break;
369 0 : case 1: surf->mtilea = 2; break;
370 0 : case 2: surf->mtilea = 4; break;
371 0 : case 3: surf->mtilea = 8; break;
372 : default:
373 0 : dev_warn(p->dev, "%s:%d %s invalid macro tile aspect %d\n",
374 : __func__, __LINE__, prefix, surf->mtilea);
375 0 : return -EINVAL;
376 : }
377 0 : switch (surf->tsplit) {
378 0 : case 0: surf->tsplit = 64; break;
379 0 : case 1: surf->tsplit = 128; break;
380 0 : case 2: surf->tsplit = 256; break;
381 0 : case 3: surf->tsplit = 512; break;
382 0 : case 4: surf->tsplit = 1024; break;
383 0 : case 5: surf->tsplit = 2048; break;
384 0 : case 6: surf->tsplit = 4096; break;
385 : default:
386 0 : dev_warn(p->dev, "%s:%d %s invalid tile split %d\n",
387 : __func__, __LINE__, prefix, surf->tsplit);
388 0 : return -EINVAL;
389 : }
390 0 : return 0;
391 0 : }
392 :
393 0 : static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned id)
394 : {
395 0 : struct evergreen_cs_track *track = p->track;
396 0 : struct eg_surface surf;
397 : unsigned pitch, slice, mslice;
398 : unsigned long offset;
399 : int r;
400 :
401 0 : mslice = G_028C6C_SLICE_MAX(track->cb_color_view[id]) + 1;
402 0 : pitch = track->cb_color_pitch[id];
403 0 : slice = track->cb_color_slice[id];
404 0 : surf.nbx = (pitch + 1) * 8;
405 0 : surf.nby = ((slice + 1) * 64) / surf.nbx;
406 0 : surf.mode = G_028C70_ARRAY_MODE(track->cb_color_info[id]);
407 0 : surf.format = G_028C70_FORMAT(track->cb_color_info[id]);
408 0 : surf.tsplit = G_028C74_TILE_SPLIT(track->cb_color_attrib[id]);
409 0 : surf.nbanks = G_028C74_NUM_BANKS(track->cb_color_attrib[id]);
410 0 : surf.bankw = G_028C74_BANK_WIDTH(track->cb_color_attrib[id]);
411 0 : surf.bankh = G_028C74_BANK_HEIGHT(track->cb_color_attrib[id]);
412 0 : surf.mtilea = G_028C74_MACRO_TILE_ASPECT(track->cb_color_attrib[id]);
413 0 : surf.nsamples = 1;
414 :
415 0 : if (!r600_fmt_is_valid_color(surf.format)) {
416 0 : dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08x)\n",
417 : __func__, __LINE__, surf.format,
418 : id, track->cb_color_info[id]);
419 0 : return -EINVAL;
420 : }
421 :
422 0 : r = evergreen_surface_value_conv_check(p, &surf, "cb");
423 0 : if (r) {
424 0 : return r;
425 : }
426 :
427 0 : r = evergreen_surface_check(p, &surf, "cb");
428 0 : if (r) {
429 0 : dev_warn(p->dev, "%s:%d cb[%d] invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
430 : __func__, __LINE__, id, track->cb_color_pitch[id],
431 : track->cb_color_slice[id], track->cb_color_attrib[id],
432 : track->cb_color_info[id]);
433 0 : return r;
434 : }
435 :
436 0 : offset = track->cb_color_bo_offset[id] << 8;
437 0 : if (offset & (surf.base_align - 1)) {
438 0 : dev_warn(p->dev, "%s:%d cb[%d] bo base %ld not aligned with %ld\n",
439 : __func__, __LINE__, id, offset, surf.base_align);
440 0 : return -EINVAL;
441 : }
442 :
443 0 : offset += surf.layer_size * mslice;
444 0 : if (offset > radeon_bo_size(track->cb_color_bo[id])) {
445 : /* old ddx are broken they allocate bo with w*h*bpp but
446 : * program slice with ALIGN(h, 8), catch this and patch
447 : * command stream.
448 : */
449 0 : if (!surf.mode) {
450 0 : uint32_t *ib = p->ib.ptr;
451 : unsigned long tmp, nby, bsize, size, min = 0;
452 :
453 : /* find the height the ddx wants */
454 0 : if (surf.nby > 8) {
455 0 : min = surf.nby - 8;
456 0 : }
457 0 : bsize = radeon_bo_size(track->cb_color_bo[id]);
458 0 : tmp = track->cb_color_bo_offset[id] << 8;
459 0 : for (nby = surf.nby; nby > min; nby--) {
460 0 : size = nby * surf.nbx * surf.bpe * surf.nsamples;
461 0 : if ((tmp + size * mslice) <= bsize) {
462 : break;
463 : }
464 : }
465 0 : if (nby > min) {
466 0 : surf.nby = nby;
467 0 : slice = ((nby * surf.nbx) / 64) - 1;
468 0 : if (!evergreen_surface_check(p, &surf, "cb")) {
469 : /* check if this one works */
470 0 : tmp += surf.layer_size * mslice;
471 0 : if (tmp <= bsize) {
472 0 : ib[track->cb_color_slice_idx[id]] = slice;
473 0 : goto old_ddx_ok;
474 : }
475 : }
476 : }
477 0 : }
478 0 : dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, "
479 : "offset %d, max layer %d, bo size %ld, slice %d)\n",
480 : __func__, __LINE__, id, surf.layer_size,
481 : track->cb_color_bo_offset[id] << 8, mslice,
482 : radeon_bo_size(track->cb_color_bo[id]), slice);
483 0 : dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
484 : __func__, __LINE__, surf.nbx, surf.nby,
485 : surf.mode, surf.bpe, surf.nsamples,
486 : surf.bankw, surf.bankh,
487 : surf.tsplit, surf.mtilea);
488 0 : return -EINVAL;
489 : }
490 : old_ddx_ok:
491 :
492 0 : return 0;
493 0 : }
494 :
495 0 : static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
496 : unsigned nbx, unsigned nby)
497 : {
498 0 : struct evergreen_cs_track *track = p->track;
499 : unsigned long size;
500 :
501 0 : if (track->htile_bo == NULL) {
502 0 : dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n",
503 : __func__, __LINE__, track->db_z_info);
504 0 : return -EINVAL;
505 : }
506 :
507 0 : if (G_028ABC_LINEAR(track->htile_surface)) {
508 : /* pitch must be 16 htiles aligned == 16 * 8 pixel aligned */
509 0 : nbx = round_up(nbx, 16 * 8);
510 : /* height is npipes htiles aligned == npipes * 8 pixel aligned */
511 0 : nby = round_up(nby, track->npipes * 8);
512 0 : } else {
513 : /* always assume 8x8 htile */
514 : /* align is htile align * 8, htile align vary according to
515 : * number of pipe and tile width and nby
516 : */
517 0 : switch (track->npipes) {
518 : case 8:
519 : /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
520 0 : nbx = round_up(nbx, 64 * 8);
521 0 : nby = round_up(nby, 64 * 8);
522 0 : break;
523 : case 4:
524 : /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
525 0 : nbx = round_up(nbx, 64 * 8);
526 0 : nby = round_up(nby, 32 * 8);
527 0 : break;
528 : case 2:
529 : /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
530 0 : nbx = round_up(nbx, 32 * 8);
531 0 : nby = round_up(nby, 32 * 8);
532 0 : break;
533 : case 1:
534 : /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
535 0 : nbx = round_up(nbx, 32 * 8);
536 0 : nby = round_up(nby, 16 * 8);
537 0 : break;
538 : default:
539 0 : dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
540 : __func__, __LINE__, track->npipes);
541 0 : return -EINVAL;
542 : }
543 : }
544 : /* compute number of htile */
545 0 : nbx = nbx >> 3;
546 0 : nby = nby >> 3;
547 : /* size must be aligned on npipes * 2K boundary */
548 0 : size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
549 0 : size += track->htile_offset;
550 :
551 0 : if (size > radeon_bo_size(track->htile_bo)) {
552 0 : dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n",
553 : __func__, __LINE__, radeon_bo_size(track->htile_bo),
554 : size, nbx, nby);
555 0 : return -EINVAL;
556 : }
557 0 : return 0;
558 0 : }
559 :
560 0 : static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p)
561 : {
562 0 : struct evergreen_cs_track *track = p->track;
563 0 : struct eg_surface surf;
564 : unsigned pitch, slice, mslice;
565 : unsigned long offset;
566 : int r;
567 :
568 0 : mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
569 0 : pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
570 0 : slice = track->db_depth_slice;
571 0 : surf.nbx = (pitch + 1) * 8;
572 0 : surf.nby = ((slice + 1) * 64) / surf.nbx;
573 0 : surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
574 0 : surf.format = G_028044_FORMAT(track->db_s_info);
575 0 : surf.tsplit = G_028044_TILE_SPLIT(track->db_s_info);
576 0 : surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
577 0 : surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
578 0 : surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
579 0 : surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
580 0 : surf.nsamples = 1;
581 :
582 0 : if (surf.format != 1) {
583 0 : dev_warn(p->dev, "%s:%d stencil invalid format %d\n",
584 : __func__, __LINE__, surf.format);
585 0 : return -EINVAL;
586 : }
587 : /* replace by color format so we can use same code */
588 0 : surf.format = V_028C70_COLOR_8;
589 :
590 0 : r = evergreen_surface_value_conv_check(p, &surf, "stencil");
591 0 : if (r) {
592 0 : return r;
593 : }
594 :
595 0 : r = evergreen_surface_check(p, &surf, NULL);
596 0 : if (r) {
597 : /* old userspace doesn't compute proper depth/stencil alignment
598 : * check that alignment against a bigger byte per elements and
599 : * only report if that alignment is wrong too.
600 : */
601 0 : surf.format = V_028C70_COLOR_8_8_8_8;
602 0 : r = evergreen_surface_check(p, &surf, "stencil");
603 0 : if (r) {
604 0 : dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
605 : __func__, __LINE__, track->db_depth_size,
606 : track->db_depth_slice, track->db_s_info, track->db_z_info);
607 0 : }
608 0 : return r;
609 : }
610 :
611 0 : offset = track->db_s_read_offset << 8;
612 0 : if (offset & (surf.base_align - 1)) {
613 0 : dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
614 : __func__, __LINE__, offset, surf.base_align);
615 0 : return -EINVAL;
616 : }
617 0 : offset += surf.layer_size * mslice;
618 0 : if (offset > radeon_bo_size(track->db_s_read_bo)) {
619 0 : dev_warn(p->dev, "%s:%d stencil read bo too small (layer size %d, "
620 : "offset %ld, max layer %d, bo size %ld)\n",
621 : __func__, __LINE__, surf.layer_size,
622 : (unsigned long)track->db_s_read_offset << 8, mslice,
623 : radeon_bo_size(track->db_s_read_bo));
624 0 : dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
625 : __func__, __LINE__, track->db_depth_size,
626 : track->db_depth_slice, track->db_s_info, track->db_z_info);
627 0 : return -EINVAL;
628 : }
629 :
630 0 : offset = track->db_s_write_offset << 8;
631 0 : if (offset & (surf.base_align - 1)) {
632 0 : dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
633 : __func__, __LINE__, offset, surf.base_align);
634 0 : return -EINVAL;
635 : }
636 0 : offset += surf.layer_size * mslice;
637 0 : if (offset > radeon_bo_size(track->db_s_write_bo)) {
638 0 : dev_warn(p->dev, "%s:%d stencil write bo too small (layer size %d, "
639 : "offset %ld, max layer %d, bo size %ld)\n",
640 : __func__, __LINE__, surf.layer_size,
641 : (unsigned long)track->db_s_write_offset << 8, mslice,
642 : radeon_bo_size(track->db_s_write_bo));
643 0 : return -EINVAL;
644 : }
645 :
646 : /* hyperz */
647 0 : if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
648 0 : r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
649 0 : if (r) {
650 0 : return r;
651 : }
652 : }
653 :
654 0 : return 0;
655 0 : }
656 :
657 0 : static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p)
658 : {
659 0 : struct evergreen_cs_track *track = p->track;
660 0 : struct eg_surface surf;
661 : unsigned pitch, slice, mslice;
662 : unsigned long offset;
663 : int r;
664 :
665 0 : mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
666 0 : pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
667 0 : slice = track->db_depth_slice;
668 0 : surf.nbx = (pitch + 1) * 8;
669 0 : surf.nby = ((slice + 1) * 64) / surf.nbx;
670 0 : surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
671 0 : surf.format = G_028040_FORMAT(track->db_z_info);
672 0 : surf.tsplit = G_028040_TILE_SPLIT(track->db_z_info);
673 0 : surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
674 0 : surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
675 0 : surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
676 0 : surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
677 0 : surf.nsamples = 1;
678 :
679 0 : switch (surf.format) {
680 : case V_028040_Z_16:
681 0 : surf.format = V_028C70_COLOR_16;
682 0 : break;
683 : case V_028040_Z_24:
684 : case V_028040_Z_32_FLOAT:
685 0 : surf.format = V_028C70_COLOR_8_8_8_8;
686 0 : break;
687 : default:
688 0 : dev_warn(p->dev, "%s:%d depth invalid format %d\n",
689 : __func__, __LINE__, surf.format);
690 0 : return -EINVAL;
691 : }
692 :
693 0 : r = evergreen_surface_value_conv_check(p, &surf, "depth");
694 0 : if (r) {
695 0 : dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
696 : __func__, __LINE__, track->db_depth_size,
697 : track->db_depth_slice, track->db_z_info);
698 0 : return r;
699 : }
700 :
701 0 : r = evergreen_surface_check(p, &surf, "depth");
702 0 : if (r) {
703 0 : dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
704 : __func__, __LINE__, track->db_depth_size,
705 : track->db_depth_slice, track->db_z_info);
706 0 : return r;
707 : }
708 :
709 0 : offset = track->db_z_read_offset << 8;
710 0 : if (offset & (surf.base_align - 1)) {
711 0 : dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
712 : __func__, __LINE__, offset, surf.base_align);
713 0 : return -EINVAL;
714 : }
715 0 : offset += surf.layer_size * mslice;
716 0 : if (offset > radeon_bo_size(track->db_z_read_bo)) {
717 0 : dev_warn(p->dev, "%s:%d depth read bo too small (layer size %d, "
718 : "offset %ld, max layer %d, bo size %ld)\n",
719 : __func__, __LINE__, surf.layer_size,
720 : (unsigned long)track->db_z_read_offset << 8, mslice,
721 : radeon_bo_size(track->db_z_read_bo));
722 0 : return -EINVAL;
723 : }
724 :
725 0 : offset = track->db_z_write_offset << 8;
726 0 : if (offset & (surf.base_align - 1)) {
727 0 : dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
728 : __func__, __LINE__, offset, surf.base_align);
729 0 : return -EINVAL;
730 : }
731 0 : offset += surf.layer_size * mslice;
732 0 : if (offset > radeon_bo_size(track->db_z_write_bo)) {
733 0 : dev_warn(p->dev, "%s:%d depth write bo too small (layer size %d, "
734 : "offset %ld, max layer %d, bo size %ld)\n",
735 : __func__, __LINE__, surf.layer_size,
736 : (unsigned long)track->db_z_write_offset << 8, mslice,
737 : radeon_bo_size(track->db_z_write_bo));
738 0 : return -EINVAL;
739 : }
740 :
741 : /* hyperz */
742 0 : if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
743 0 : r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
744 0 : if (r) {
745 0 : return r;
746 : }
747 : }
748 :
749 0 : return 0;
750 0 : }
751 :
752 0 : static int evergreen_cs_track_validate_texture(struct radeon_cs_parser *p,
753 : struct radeon_bo *texture,
754 : struct radeon_bo *mipmap,
755 : unsigned idx)
756 : {
757 0 : struct eg_surface surf;
758 : unsigned long toffset, moffset;
759 : unsigned dim, llevel, mslice, width, height, depth, i;
760 : u32 texdw[8];
761 : int r;
762 :
763 0 : texdw[0] = radeon_get_ib_value(p, idx + 0);
764 0 : texdw[1] = radeon_get_ib_value(p, idx + 1);
765 0 : texdw[2] = radeon_get_ib_value(p, idx + 2);
766 0 : texdw[3] = radeon_get_ib_value(p, idx + 3);
767 0 : texdw[4] = radeon_get_ib_value(p, idx + 4);
768 0 : texdw[5] = radeon_get_ib_value(p, idx + 5);
769 0 : texdw[6] = radeon_get_ib_value(p, idx + 6);
770 0 : texdw[7] = radeon_get_ib_value(p, idx + 7);
771 0 : dim = G_030000_DIM(texdw[0]);
772 0 : llevel = G_030014_LAST_LEVEL(texdw[5]);
773 0 : mslice = G_030014_LAST_ARRAY(texdw[5]) + 1;
774 0 : width = G_030000_TEX_WIDTH(texdw[0]) + 1;
775 0 : height = G_030004_TEX_HEIGHT(texdw[1]) + 1;
776 0 : depth = G_030004_TEX_DEPTH(texdw[1]) + 1;
777 0 : surf.format = G_03001C_DATA_FORMAT(texdw[7]);
778 0 : surf.nbx = (G_030000_PITCH(texdw[0]) + 1) * 8;
779 0 : surf.nbx = r600_fmt_get_nblocksx(surf.format, surf.nbx);
780 0 : surf.nby = r600_fmt_get_nblocksy(surf.format, height);
781 0 : surf.mode = G_030004_ARRAY_MODE(texdw[1]);
782 0 : surf.tsplit = G_030018_TILE_SPLIT(texdw[6]);
783 0 : surf.nbanks = G_03001C_NUM_BANKS(texdw[7]);
784 0 : surf.bankw = G_03001C_BANK_WIDTH(texdw[7]);
785 0 : surf.bankh = G_03001C_BANK_HEIGHT(texdw[7]);
786 0 : surf.mtilea = G_03001C_MACRO_TILE_ASPECT(texdw[7]);
787 0 : surf.nsamples = 1;
788 0 : toffset = texdw[2] << 8;
789 0 : moffset = texdw[3] << 8;
790 :
791 0 : if (!r600_fmt_is_valid_texture(surf.format, p->family)) {
792 0 : dev_warn(p->dev, "%s:%d texture invalid format %d\n",
793 : __func__, __LINE__, surf.format);
794 0 : return -EINVAL;
795 : }
796 0 : switch (dim) {
797 : case V_030000_SQ_TEX_DIM_1D:
798 : case V_030000_SQ_TEX_DIM_2D:
799 : case V_030000_SQ_TEX_DIM_CUBEMAP:
800 : case V_030000_SQ_TEX_DIM_1D_ARRAY:
801 : case V_030000_SQ_TEX_DIM_2D_ARRAY:
802 : depth = 1;
803 0 : break;
804 : case V_030000_SQ_TEX_DIM_2D_MSAA:
805 : case V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA:
806 0 : surf.nsamples = 1 << llevel;
807 : llevel = 0;
808 : depth = 1;
809 0 : break;
810 : case V_030000_SQ_TEX_DIM_3D:
811 : break;
812 : default:
813 0 : dev_warn(p->dev, "%s:%d texture invalid dimension %d\n",
814 : __func__, __LINE__, dim);
815 0 : return -EINVAL;
816 : }
817 :
818 0 : r = evergreen_surface_value_conv_check(p, &surf, "texture");
819 0 : if (r) {
820 0 : return r;
821 : }
822 :
823 : /* align height */
824 0 : evergreen_surface_check(p, &surf, NULL);
825 0 : surf.nby = roundup2(surf.nby, surf.halign);
826 :
827 0 : r = evergreen_surface_check(p, &surf, "texture");
828 0 : if (r) {
829 0 : dev_warn(p->dev, "%s:%d texture invalid 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
830 : __func__, __LINE__, texdw[0], texdw[1], texdw[4],
831 : texdw[5], texdw[6], texdw[7]);
832 0 : return r;
833 : }
834 :
835 : /* check texture size */
836 0 : if (toffset & (surf.base_align - 1)) {
837 0 : dev_warn(p->dev, "%s:%d texture bo base %ld not aligned with %ld\n",
838 : __func__, __LINE__, toffset, surf.base_align);
839 0 : return -EINVAL;
840 : }
841 0 : if (surf.nsamples <= 1 && moffset & (surf.base_align - 1)) {
842 0 : dev_warn(p->dev, "%s:%d mipmap bo base %ld not aligned with %ld\n",
843 : __func__, __LINE__, moffset, surf.base_align);
844 0 : return -EINVAL;
845 : }
846 0 : if (dim == SQ_TEX_DIM_3D) {
847 0 : toffset += surf.layer_size * depth;
848 0 : } else {
849 0 : toffset += surf.layer_size * mslice;
850 : }
851 0 : if (toffset > radeon_bo_size(texture)) {
852 0 : dev_warn(p->dev, "%s:%d texture bo too small (layer size %d, "
853 : "offset %ld, max layer %d, depth %d, bo size %ld) (%d %d)\n",
854 : __func__, __LINE__, surf.layer_size,
855 : (unsigned long)texdw[2] << 8, mslice,
856 : depth, radeon_bo_size(texture),
857 : surf.nbx, surf.nby);
858 0 : return -EINVAL;
859 : }
860 :
861 0 : if (!mipmap) {
862 0 : if (llevel) {
863 0 : dev_warn(p->dev, "%s:%i got NULL MIP_ADDRESS relocation\n",
864 : __func__, __LINE__);
865 0 : return -EINVAL;
866 : } else {
867 0 : return 0; /* everything's ok */
868 : }
869 : }
870 :
871 : /* check mipmap size */
872 0 : for (i = 1; i <= llevel; i++) {
873 : unsigned w, h, d;
874 :
875 0 : w = r600_mip_minify(width, i);
876 0 : h = r600_mip_minify(height, i);
877 0 : d = r600_mip_minify(depth, i);
878 0 : surf.nbx = r600_fmt_get_nblocksx(surf.format, w);
879 0 : surf.nby = r600_fmt_get_nblocksy(surf.format, h);
880 :
881 0 : switch (surf.mode) {
882 : case ARRAY_2D_TILED_THIN1:
883 0 : if (surf.nbx < surf.palign || surf.nby < surf.halign) {
884 0 : surf.mode = ARRAY_1D_TILED_THIN1;
885 0 : }
886 : /* recompute alignment */
887 0 : evergreen_surface_check(p, &surf, NULL);
888 0 : break;
889 : case ARRAY_LINEAR_GENERAL:
890 : case ARRAY_LINEAR_ALIGNED:
891 : case ARRAY_1D_TILED_THIN1:
892 : break;
893 : default:
894 0 : dev_warn(p->dev, "%s:%d invalid array mode %d\n",
895 : __func__, __LINE__, surf.mode);
896 0 : return -EINVAL;
897 : }
898 0 : surf.nbx = roundup2(surf.nbx, surf.palign);
899 0 : surf.nby = roundup2(surf.nby, surf.halign);
900 :
901 0 : r = evergreen_surface_check(p, &surf, "mipmap");
902 0 : if (r) {
903 0 : return r;
904 : }
905 :
906 0 : if (dim == SQ_TEX_DIM_3D) {
907 0 : moffset += surf.layer_size * d;
908 0 : } else {
909 0 : moffset += surf.layer_size * mslice;
910 : }
911 0 : if (moffset > radeon_bo_size(mipmap)) {
912 0 : dev_warn(p->dev, "%s:%d mipmap [%d] bo too small (layer size %d, "
913 : "offset %ld, coffset %ld, max layer %d, depth %d, "
914 : "bo size %ld) level0 (%d %d %d)\n",
915 : __func__, __LINE__, i, surf.layer_size,
916 : (unsigned long)texdw[3] << 8, moffset, mslice,
917 : d, radeon_bo_size(mipmap),
918 : width, height, depth);
919 0 : dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
920 : __func__, __LINE__, surf.nbx, surf.nby,
921 : surf.mode, surf.bpe, surf.nsamples,
922 : surf.bankw, surf.bankh,
923 : surf.tsplit, surf.mtilea);
924 0 : return -EINVAL;
925 : }
926 0 : }
927 :
928 0 : return 0;
929 0 : }
930 :
931 0 : static int evergreen_cs_track_check(struct radeon_cs_parser *p)
932 : {
933 0 : struct evergreen_cs_track *track = p->track;
934 : unsigned tmp, i;
935 : int r;
936 : unsigned buffer_mask = 0;
937 :
938 : /* check streamout */
939 0 : if (track->streamout_dirty && track->vgt_strmout_config) {
940 0 : for (i = 0; i < 4; i++) {
941 0 : if (track->vgt_strmout_config & (1 << i)) {
942 0 : buffer_mask |= (track->vgt_strmout_buffer_config >> (i * 4)) & 0xf;
943 0 : }
944 : }
945 :
946 0 : for (i = 0; i < 4; i++) {
947 0 : if (buffer_mask & (1 << i)) {
948 0 : if (track->vgt_strmout_bo[i]) {
949 0 : u64 offset = (u64)track->vgt_strmout_bo_offset[i] +
950 0 : (u64)track->vgt_strmout_size[i];
951 0 : if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) {
952 0 : DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n",
953 : i, offset,
954 : radeon_bo_size(track->vgt_strmout_bo[i]));
955 0 : return -EINVAL;
956 : }
957 0 : } else {
958 0 : dev_warn(p->dev, "No buffer for streamout %d\n", i);
959 0 : return -EINVAL;
960 : }
961 : }
962 : }
963 0 : track->streamout_dirty = false;
964 0 : }
965 :
966 0 : if (track->sx_misc_kill_all_prims)
967 0 : return 0;
968 :
969 : /* check that we have a cb for each enabled target
970 : */
971 0 : if (track->cb_dirty) {
972 0 : tmp = track->cb_target_mask;
973 0 : for (i = 0; i < 8; i++) {
974 0 : u32 format = G_028C70_FORMAT(track->cb_color_info[i]);
975 :
976 0 : if (format != V_028C70_COLOR_INVALID &&
977 0 : (tmp >> (i * 4)) & 0xF) {
978 : /* at least one component is enabled */
979 0 : if (track->cb_color_bo[i] == NULL) {
980 0 : dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
981 : __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
982 0 : return -EINVAL;
983 : }
984 : /* check cb */
985 0 : r = evergreen_cs_track_validate_cb(p, i);
986 0 : if (r) {
987 0 : return r;
988 : }
989 : }
990 0 : }
991 0 : track->cb_dirty = false;
992 0 : }
993 :
994 0 : if (track->db_dirty) {
995 : /* Check stencil buffer */
996 0 : if (G_028044_FORMAT(track->db_s_info) != V_028044_STENCIL_INVALID &&
997 0 : G_028800_STENCIL_ENABLE(track->db_depth_control)) {
998 0 : r = evergreen_cs_track_validate_stencil(p);
999 0 : if (r)
1000 0 : return r;
1001 : }
1002 : /* Check depth buffer */
1003 0 : if (G_028040_FORMAT(track->db_z_info) != V_028040_Z_INVALID &&
1004 0 : G_028800_Z_ENABLE(track->db_depth_control)) {
1005 0 : r = evergreen_cs_track_validate_depth(p);
1006 0 : if (r)
1007 0 : return r;
1008 : }
1009 0 : track->db_dirty = false;
1010 0 : }
1011 :
1012 0 : return 0;
1013 0 : }
1014 :
1015 : /**
1016 : * evergreen_cs_packet_parse_vline() - parse userspace VLINE packet
1017 : * @parser: parser structure holding parsing context.
1018 : *
1019 : * This is an Evergreen(+)-specific function for parsing VLINE packets.
1020 : * Real work is done by r600_cs_common_vline_parse function.
1021 : * Here we just set up ASIC-specific register table and call
1022 : * the common implementation function.
1023 : */
1024 0 : static int evergreen_cs_packet_parse_vline(struct radeon_cs_parser *p)
1025 : {
1026 :
1027 : static uint32_t vline_start_end[6] = {
1028 : EVERGREEN_VLINE_START_END + EVERGREEN_CRTC0_REGISTER_OFFSET,
1029 : EVERGREEN_VLINE_START_END + EVERGREEN_CRTC1_REGISTER_OFFSET,
1030 : EVERGREEN_VLINE_START_END + EVERGREEN_CRTC2_REGISTER_OFFSET,
1031 : EVERGREEN_VLINE_START_END + EVERGREEN_CRTC3_REGISTER_OFFSET,
1032 : EVERGREEN_VLINE_START_END + EVERGREEN_CRTC4_REGISTER_OFFSET,
1033 : EVERGREEN_VLINE_START_END + EVERGREEN_CRTC5_REGISTER_OFFSET
1034 : };
1035 : static uint32_t vline_status[6] = {
1036 : EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
1037 : EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
1038 : EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
1039 : EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
1040 : EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
1041 : EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET
1042 : };
1043 :
1044 0 : return r600_cs_common_vline_parse(p, vline_start_end, vline_status);
1045 : }
1046 :
1047 0 : static int evergreen_packet0_check(struct radeon_cs_parser *p,
1048 : struct radeon_cs_packet *pkt,
1049 : unsigned idx, unsigned reg)
1050 : {
1051 : int r;
1052 :
1053 0 : switch (reg) {
1054 : case EVERGREEN_VLINE_START_END:
1055 0 : r = evergreen_cs_packet_parse_vline(p);
1056 0 : if (r) {
1057 0 : DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1058 : idx, reg);
1059 0 : return r;
1060 : }
1061 : break;
1062 : default:
1063 0 : printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
1064 : reg, idx);
1065 0 : return -EINVAL;
1066 : }
1067 0 : return 0;
1068 0 : }
1069 :
1070 0 : static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p,
1071 : struct radeon_cs_packet *pkt)
1072 : {
1073 : unsigned reg, i;
1074 : unsigned idx;
1075 : int r;
1076 :
1077 0 : idx = pkt->idx + 1;
1078 0 : reg = pkt->reg;
1079 0 : for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
1080 0 : r = evergreen_packet0_check(p, pkt, idx, reg);
1081 0 : if (r) {
1082 0 : return r;
1083 : }
1084 : }
1085 0 : return 0;
1086 0 : }
1087 :
1088 : /**
1089 : * evergreen_cs_handle_reg() - process registers that need special handling.
1090 : * @parser: parser structure holding parsing context
1091 : * @reg: register we are testing
1092 : * @idx: index into the cs buffer
1093 : */
1094 0 : static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1095 : {
1096 0 : struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track;
1097 0 : struct radeon_bo_list *reloc;
1098 : u32 tmp, *ib;
1099 : int r;
1100 :
1101 0 : ib = p->ib.ptr;
1102 0 : switch (reg) {
1103 : /* force following reg to 0 in an attempt to disable out buffer
1104 : * which will need us to better understand how it works to perform
1105 : * security check on it (Jerome)
1106 : */
1107 : case SQ_ESGS_RING_SIZE:
1108 : case SQ_GSVS_RING_SIZE:
1109 : case SQ_ESTMP_RING_SIZE:
1110 : case SQ_GSTMP_RING_SIZE:
1111 : case SQ_HSTMP_RING_SIZE:
1112 : case SQ_LSTMP_RING_SIZE:
1113 : case SQ_PSTMP_RING_SIZE:
1114 : case SQ_VSTMP_RING_SIZE:
1115 : case SQ_ESGS_RING_ITEMSIZE:
1116 : case SQ_ESTMP_RING_ITEMSIZE:
1117 : case SQ_GSTMP_RING_ITEMSIZE:
1118 : case SQ_GSVS_RING_ITEMSIZE:
1119 : case SQ_GS_VERT_ITEMSIZE:
1120 : case SQ_GS_VERT_ITEMSIZE_1:
1121 : case SQ_GS_VERT_ITEMSIZE_2:
1122 : case SQ_GS_VERT_ITEMSIZE_3:
1123 : case SQ_GSVS_RING_OFFSET_1:
1124 : case SQ_GSVS_RING_OFFSET_2:
1125 : case SQ_GSVS_RING_OFFSET_3:
1126 : case SQ_HSTMP_RING_ITEMSIZE:
1127 : case SQ_LSTMP_RING_ITEMSIZE:
1128 : case SQ_PSTMP_RING_ITEMSIZE:
1129 : case SQ_VSTMP_RING_ITEMSIZE:
1130 : case VGT_TF_RING_SIZE:
1131 : /* get value to populate the IB don't remove */
1132 : /*tmp =radeon_get_ib_value(p, idx);
1133 : ib[idx] = 0;*/
1134 : break;
1135 : case SQ_ESGS_RING_BASE:
1136 : case SQ_GSVS_RING_BASE:
1137 : case SQ_ESTMP_RING_BASE:
1138 : case SQ_GSTMP_RING_BASE:
1139 : case SQ_HSTMP_RING_BASE:
1140 : case SQ_LSTMP_RING_BASE:
1141 : case SQ_PSTMP_RING_BASE:
1142 : case SQ_VSTMP_RING_BASE:
1143 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1144 0 : if (r) {
1145 0 : dev_warn(p->dev, "bad SET_CONTEXT_REG "
1146 : "0x%04X\n", reg);
1147 0 : return -EINVAL;
1148 : }
1149 0 : ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1150 0 : break;
1151 : case DB_DEPTH_CONTROL:
1152 0 : track->db_depth_control = radeon_get_ib_value(p, idx);
1153 0 : track->db_dirty = true;
1154 0 : break;
1155 : case CAYMAN_DB_EQAA:
1156 0 : if (p->rdev->family < CHIP_CAYMAN) {
1157 0 : dev_warn(p->dev, "bad SET_CONTEXT_REG "
1158 : "0x%04X\n", reg);
1159 0 : return -EINVAL;
1160 : }
1161 : break;
1162 : case CAYMAN_DB_DEPTH_INFO:
1163 0 : if (p->rdev->family < CHIP_CAYMAN) {
1164 0 : dev_warn(p->dev, "bad SET_CONTEXT_REG "
1165 : "0x%04X\n", reg);
1166 0 : return -EINVAL;
1167 : }
1168 : break;
1169 : case DB_Z_INFO:
1170 0 : track->db_z_info = radeon_get_ib_value(p, idx);
1171 0 : if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1172 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1173 0 : if (r) {
1174 0 : dev_warn(p->dev, "bad SET_CONTEXT_REG "
1175 : "0x%04X\n", reg);
1176 0 : return -EINVAL;
1177 : }
1178 0 : ib[idx] &= ~Z_ARRAY_MODE(0xf);
1179 0 : track->db_z_info &= ~Z_ARRAY_MODE(0xf);
1180 0 : ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1181 0 : track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1182 0 : if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1183 0 : unsigned bankw, bankh, mtaspect, tile_split;
1184 :
1185 0 : evergreen_tiling_fields(reloc->tiling_flags,
1186 : &bankw, &bankh, &mtaspect,
1187 : &tile_split);
1188 0 : ib[idx] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1189 0 : ib[idx] |= DB_TILE_SPLIT(tile_split) |
1190 0 : DB_BANK_WIDTH(bankw) |
1191 0 : DB_BANK_HEIGHT(bankh) |
1192 0 : DB_MACRO_TILE_ASPECT(mtaspect);
1193 0 : }
1194 : }
1195 0 : track->db_dirty = true;
1196 0 : break;
1197 : case DB_STENCIL_INFO:
1198 0 : track->db_s_info = radeon_get_ib_value(p, idx);
1199 0 : track->db_dirty = true;
1200 0 : break;
1201 : case DB_DEPTH_VIEW:
1202 0 : track->db_depth_view = radeon_get_ib_value(p, idx);
1203 0 : track->db_dirty = true;
1204 0 : break;
1205 : case DB_DEPTH_SIZE:
1206 0 : track->db_depth_size = radeon_get_ib_value(p, idx);
1207 0 : track->db_dirty = true;
1208 0 : break;
1209 : case R_02805C_DB_DEPTH_SLICE:
1210 0 : track->db_depth_slice = radeon_get_ib_value(p, idx);
1211 0 : track->db_dirty = true;
1212 0 : break;
1213 : case DB_Z_READ_BASE:
1214 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1215 0 : if (r) {
1216 0 : dev_warn(p->dev, "bad SET_CONTEXT_REG "
1217 : "0x%04X\n", reg);
1218 0 : return -EINVAL;
1219 : }
1220 0 : track->db_z_read_offset = radeon_get_ib_value(p, idx);
1221 0 : ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1222 0 : track->db_z_read_bo = reloc->robj;
1223 0 : track->db_dirty = true;
1224 0 : break;
1225 : case DB_Z_WRITE_BASE:
1226 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1227 0 : if (r) {
1228 0 : dev_warn(p->dev, "bad SET_CONTEXT_REG "
1229 : "0x%04X\n", reg);
1230 0 : return -EINVAL;
1231 : }
1232 0 : track->db_z_write_offset = radeon_get_ib_value(p, idx);
1233 0 : ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1234 0 : track->db_z_write_bo = reloc->robj;
1235 0 : track->db_dirty = true;
1236 0 : break;
1237 : case DB_STENCIL_READ_BASE:
1238 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1239 0 : if (r) {
1240 0 : dev_warn(p->dev, "bad SET_CONTEXT_REG "
1241 : "0x%04X\n", reg);
1242 0 : return -EINVAL;
1243 : }
1244 0 : track->db_s_read_offset = radeon_get_ib_value(p, idx);
1245 0 : ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1246 0 : track->db_s_read_bo = reloc->robj;
1247 0 : track->db_dirty = true;
1248 0 : break;
1249 : case DB_STENCIL_WRITE_BASE:
1250 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1251 0 : if (r) {
1252 0 : dev_warn(p->dev, "bad SET_CONTEXT_REG "
1253 : "0x%04X\n", reg);
1254 0 : return -EINVAL;
1255 : }
1256 0 : track->db_s_write_offset = radeon_get_ib_value(p, idx);
1257 0 : ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1258 0 : track->db_s_write_bo = reloc->robj;
1259 0 : track->db_dirty = true;
1260 0 : break;
1261 : case VGT_STRMOUT_CONFIG:
1262 0 : track->vgt_strmout_config = radeon_get_ib_value(p, idx);
1263 0 : track->streamout_dirty = true;
1264 0 : break;
1265 : case VGT_STRMOUT_BUFFER_CONFIG:
1266 0 : track->vgt_strmout_buffer_config = radeon_get_ib_value(p, idx);
1267 0 : track->streamout_dirty = true;
1268 0 : break;
1269 : case VGT_STRMOUT_BUFFER_BASE_0:
1270 : case VGT_STRMOUT_BUFFER_BASE_1:
1271 : case VGT_STRMOUT_BUFFER_BASE_2:
1272 : case VGT_STRMOUT_BUFFER_BASE_3:
1273 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1274 0 : if (r) {
1275 0 : dev_warn(p->dev, "bad SET_CONTEXT_REG "
1276 : "0x%04X\n", reg);
1277 0 : return -EINVAL;
1278 : }
1279 0 : tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16;
1280 0 : track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
1281 0 : ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1282 0 : track->vgt_strmout_bo[tmp] = reloc->robj;
1283 0 : track->streamout_dirty = true;
1284 0 : break;
1285 : case VGT_STRMOUT_BUFFER_SIZE_0:
1286 : case VGT_STRMOUT_BUFFER_SIZE_1:
1287 : case VGT_STRMOUT_BUFFER_SIZE_2:
1288 : case VGT_STRMOUT_BUFFER_SIZE_3:
1289 0 : tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16;
1290 : /* size in register is DWs, convert to bytes */
1291 0 : track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4;
1292 0 : track->streamout_dirty = true;
1293 0 : break;
1294 : case CP_COHER_BASE:
1295 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1296 0 : if (r) {
1297 0 : dev_warn(p->dev, "missing reloc for CP_COHER_BASE "
1298 : "0x%04X\n", reg);
1299 0 : return -EINVAL;
1300 : }
1301 0 : ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1302 : case CB_TARGET_MASK:
1303 0 : track->cb_target_mask = radeon_get_ib_value(p, idx);
1304 0 : track->cb_dirty = true;
1305 0 : break;
1306 : case CB_SHADER_MASK:
1307 0 : track->cb_shader_mask = radeon_get_ib_value(p, idx);
1308 0 : track->cb_dirty = true;
1309 0 : break;
1310 : case PA_SC_AA_CONFIG:
1311 0 : if (p->rdev->family >= CHIP_CAYMAN) {
1312 0 : dev_warn(p->dev, "bad SET_CONTEXT_REG "
1313 : "0x%04X\n", reg);
1314 0 : return -EINVAL;
1315 : }
1316 0 : tmp = radeon_get_ib_value(p, idx) & MSAA_NUM_SAMPLES_MASK;
1317 0 : track->nsamples = 1 << tmp;
1318 0 : break;
1319 : case CAYMAN_PA_SC_AA_CONFIG:
1320 0 : if (p->rdev->family < CHIP_CAYMAN) {
1321 0 : dev_warn(p->dev, "bad SET_CONTEXT_REG "
1322 : "0x%04X\n", reg);
1323 0 : return -EINVAL;
1324 : }
1325 0 : tmp = radeon_get_ib_value(p, idx) & CAYMAN_MSAA_NUM_SAMPLES_MASK;
1326 0 : track->nsamples = 1 << tmp;
1327 0 : break;
1328 : case CB_COLOR0_VIEW:
1329 : case CB_COLOR1_VIEW:
1330 : case CB_COLOR2_VIEW:
1331 : case CB_COLOR3_VIEW:
1332 : case CB_COLOR4_VIEW:
1333 : case CB_COLOR5_VIEW:
1334 : case CB_COLOR6_VIEW:
1335 : case CB_COLOR7_VIEW:
1336 0 : tmp = (reg - CB_COLOR0_VIEW) / 0x3c;
1337 0 : track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1338 0 : track->cb_dirty = true;
1339 0 : break;
1340 : case CB_COLOR8_VIEW:
1341 : case CB_COLOR9_VIEW:
1342 : case CB_COLOR10_VIEW:
1343 : case CB_COLOR11_VIEW:
1344 0 : tmp = ((reg - CB_COLOR8_VIEW) / 0x1c) + 8;
1345 0 : track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1346 0 : track->cb_dirty = true;
1347 0 : break;
1348 : case CB_COLOR0_INFO:
1349 : case CB_COLOR1_INFO:
1350 : case CB_COLOR2_INFO:
1351 : case CB_COLOR3_INFO:
1352 : case CB_COLOR4_INFO:
1353 : case CB_COLOR5_INFO:
1354 : case CB_COLOR6_INFO:
1355 : case CB_COLOR7_INFO:
1356 0 : tmp = (reg - CB_COLOR0_INFO) / 0x3c;
1357 0 : track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1358 0 : if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1359 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1360 0 : if (r) {
1361 0 : dev_warn(p->dev, "bad SET_CONTEXT_REG "
1362 : "0x%04X\n", reg);
1363 0 : return -EINVAL;
1364 : }
1365 0 : ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1366 0 : track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1367 0 : }
1368 0 : track->cb_dirty = true;
1369 0 : break;
1370 : case CB_COLOR8_INFO:
1371 : case CB_COLOR9_INFO:
1372 : case CB_COLOR10_INFO:
1373 : case CB_COLOR11_INFO:
1374 0 : tmp = ((reg - CB_COLOR8_INFO) / 0x1c) + 8;
1375 0 : track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1376 0 : if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1377 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1378 0 : if (r) {
1379 0 : dev_warn(p->dev, "bad SET_CONTEXT_REG "
1380 : "0x%04X\n", reg);
1381 0 : return -EINVAL;
1382 : }
1383 0 : ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1384 0 : track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1385 0 : }
1386 0 : track->cb_dirty = true;
1387 0 : break;
1388 : case CB_COLOR0_PITCH:
1389 : case CB_COLOR1_PITCH:
1390 : case CB_COLOR2_PITCH:
1391 : case CB_COLOR3_PITCH:
1392 : case CB_COLOR4_PITCH:
1393 : case CB_COLOR5_PITCH:
1394 : case CB_COLOR6_PITCH:
1395 : case CB_COLOR7_PITCH:
1396 0 : tmp = (reg - CB_COLOR0_PITCH) / 0x3c;
1397 0 : track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1398 0 : track->cb_dirty = true;
1399 0 : break;
1400 : case CB_COLOR8_PITCH:
1401 : case CB_COLOR9_PITCH:
1402 : case CB_COLOR10_PITCH:
1403 : case CB_COLOR11_PITCH:
1404 0 : tmp = ((reg - CB_COLOR8_PITCH) / 0x1c) + 8;
1405 0 : track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1406 0 : track->cb_dirty = true;
1407 0 : break;
1408 : case CB_COLOR0_SLICE:
1409 : case CB_COLOR1_SLICE:
1410 : case CB_COLOR2_SLICE:
1411 : case CB_COLOR3_SLICE:
1412 : case CB_COLOR4_SLICE:
1413 : case CB_COLOR5_SLICE:
1414 : case CB_COLOR6_SLICE:
1415 : case CB_COLOR7_SLICE:
1416 0 : tmp = (reg - CB_COLOR0_SLICE) / 0x3c;
1417 0 : track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1418 0 : track->cb_color_slice_idx[tmp] = idx;
1419 0 : track->cb_dirty = true;
1420 0 : break;
1421 : case CB_COLOR8_SLICE:
1422 : case CB_COLOR9_SLICE:
1423 : case CB_COLOR10_SLICE:
1424 : case CB_COLOR11_SLICE:
1425 0 : tmp = ((reg - CB_COLOR8_SLICE) / 0x1c) + 8;
1426 0 : track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1427 0 : track->cb_color_slice_idx[tmp] = idx;
1428 0 : track->cb_dirty = true;
1429 0 : break;
1430 : case CB_COLOR0_ATTRIB:
1431 : case CB_COLOR1_ATTRIB:
1432 : case CB_COLOR2_ATTRIB:
1433 : case CB_COLOR3_ATTRIB:
1434 : case CB_COLOR4_ATTRIB:
1435 : case CB_COLOR5_ATTRIB:
1436 : case CB_COLOR6_ATTRIB:
1437 : case CB_COLOR7_ATTRIB:
1438 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1439 0 : if (r) {
1440 0 : dev_warn(p->dev, "bad SET_CONTEXT_REG "
1441 : "0x%04X\n", reg);
1442 0 : return -EINVAL;
1443 : }
1444 0 : if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1445 0 : if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1446 0 : unsigned bankw, bankh, mtaspect, tile_split;
1447 :
1448 0 : evergreen_tiling_fields(reloc->tiling_flags,
1449 : &bankw, &bankh, &mtaspect,
1450 : &tile_split);
1451 0 : ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1452 0 : ib[idx] |= CB_TILE_SPLIT(tile_split) |
1453 0 : CB_BANK_WIDTH(bankw) |
1454 0 : CB_BANK_HEIGHT(bankh) |
1455 0 : CB_MACRO_TILE_ASPECT(mtaspect);
1456 0 : }
1457 : }
1458 0 : tmp = ((reg - CB_COLOR0_ATTRIB) / 0x3c);
1459 0 : track->cb_color_attrib[tmp] = ib[idx];
1460 0 : track->cb_dirty = true;
1461 0 : break;
1462 : case CB_COLOR8_ATTRIB:
1463 : case CB_COLOR9_ATTRIB:
1464 : case CB_COLOR10_ATTRIB:
1465 : case CB_COLOR11_ATTRIB:
1466 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1467 0 : if (r) {
1468 0 : dev_warn(p->dev, "bad SET_CONTEXT_REG "
1469 : "0x%04X\n", reg);
1470 0 : return -EINVAL;
1471 : }
1472 0 : if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1473 0 : if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1474 0 : unsigned bankw, bankh, mtaspect, tile_split;
1475 :
1476 0 : evergreen_tiling_fields(reloc->tiling_flags,
1477 : &bankw, &bankh, &mtaspect,
1478 : &tile_split);
1479 0 : ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1480 0 : ib[idx] |= CB_TILE_SPLIT(tile_split) |
1481 0 : CB_BANK_WIDTH(bankw) |
1482 0 : CB_BANK_HEIGHT(bankh) |
1483 0 : CB_MACRO_TILE_ASPECT(mtaspect);
1484 0 : }
1485 : }
1486 0 : tmp = ((reg - CB_COLOR8_ATTRIB) / 0x1c) + 8;
1487 0 : track->cb_color_attrib[tmp] = ib[idx];
1488 0 : track->cb_dirty = true;
1489 0 : break;
1490 : case CB_COLOR0_FMASK:
1491 : case CB_COLOR1_FMASK:
1492 : case CB_COLOR2_FMASK:
1493 : case CB_COLOR3_FMASK:
1494 : case CB_COLOR4_FMASK:
1495 : case CB_COLOR5_FMASK:
1496 : case CB_COLOR6_FMASK:
1497 : case CB_COLOR7_FMASK:
1498 0 : tmp = (reg - CB_COLOR0_FMASK) / 0x3c;
1499 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1500 0 : if (r) {
1501 0 : dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1502 0 : return -EINVAL;
1503 : }
1504 0 : ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1505 0 : track->cb_color_fmask_bo[tmp] = reloc->robj;
1506 0 : break;
1507 : case CB_COLOR0_CMASK:
1508 : case CB_COLOR1_CMASK:
1509 : case CB_COLOR2_CMASK:
1510 : case CB_COLOR3_CMASK:
1511 : case CB_COLOR4_CMASK:
1512 : case CB_COLOR5_CMASK:
1513 : case CB_COLOR6_CMASK:
1514 : case CB_COLOR7_CMASK:
1515 0 : tmp = (reg - CB_COLOR0_CMASK) / 0x3c;
1516 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1517 0 : if (r) {
1518 0 : dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1519 0 : return -EINVAL;
1520 : }
1521 0 : ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1522 0 : track->cb_color_cmask_bo[tmp] = reloc->robj;
1523 0 : break;
1524 : case CB_COLOR0_FMASK_SLICE:
1525 : case CB_COLOR1_FMASK_SLICE:
1526 : case CB_COLOR2_FMASK_SLICE:
1527 : case CB_COLOR3_FMASK_SLICE:
1528 : case CB_COLOR4_FMASK_SLICE:
1529 : case CB_COLOR5_FMASK_SLICE:
1530 : case CB_COLOR6_FMASK_SLICE:
1531 : case CB_COLOR7_FMASK_SLICE:
1532 0 : tmp = (reg - CB_COLOR0_FMASK_SLICE) / 0x3c;
1533 0 : track->cb_color_fmask_slice[tmp] = radeon_get_ib_value(p, idx);
1534 0 : break;
1535 : case CB_COLOR0_CMASK_SLICE:
1536 : case CB_COLOR1_CMASK_SLICE:
1537 : case CB_COLOR2_CMASK_SLICE:
1538 : case CB_COLOR3_CMASK_SLICE:
1539 : case CB_COLOR4_CMASK_SLICE:
1540 : case CB_COLOR5_CMASK_SLICE:
1541 : case CB_COLOR6_CMASK_SLICE:
1542 : case CB_COLOR7_CMASK_SLICE:
1543 0 : tmp = (reg - CB_COLOR0_CMASK_SLICE) / 0x3c;
1544 0 : track->cb_color_cmask_slice[tmp] = radeon_get_ib_value(p, idx);
1545 0 : break;
1546 : case CB_COLOR0_BASE:
1547 : case CB_COLOR1_BASE:
1548 : case CB_COLOR2_BASE:
1549 : case CB_COLOR3_BASE:
1550 : case CB_COLOR4_BASE:
1551 : case CB_COLOR5_BASE:
1552 : case CB_COLOR6_BASE:
1553 : case CB_COLOR7_BASE:
1554 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1555 0 : if (r) {
1556 0 : dev_warn(p->dev, "bad SET_CONTEXT_REG "
1557 : "0x%04X\n", reg);
1558 0 : return -EINVAL;
1559 : }
1560 0 : tmp = (reg - CB_COLOR0_BASE) / 0x3c;
1561 0 : track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1562 0 : ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1563 0 : track->cb_color_bo[tmp] = reloc->robj;
1564 0 : track->cb_dirty = true;
1565 0 : break;
1566 : case CB_COLOR8_BASE:
1567 : case CB_COLOR9_BASE:
1568 : case CB_COLOR10_BASE:
1569 : case CB_COLOR11_BASE:
1570 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1571 0 : if (r) {
1572 0 : dev_warn(p->dev, "bad SET_CONTEXT_REG "
1573 : "0x%04X\n", reg);
1574 0 : return -EINVAL;
1575 : }
1576 0 : tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8;
1577 0 : track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1578 0 : ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1579 0 : track->cb_color_bo[tmp] = reloc->robj;
1580 0 : track->cb_dirty = true;
1581 0 : break;
1582 : case DB_HTILE_DATA_BASE:
1583 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1584 0 : if (r) {
1585 0 : dev_warn(p->dev, "bad SET_CONTEXT_REG "
1586 : "0x%04X\n", reg);
1587 0 : return -EINVAL;
1588 : }
1589 0 : track->htile_offset = radeon_get_ib_value(p, idx);
1590 0 : ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1591 0 : track->htile_bo = reloc->robj;
1592 0 : track->db_dirty = true;
1593 0 : break;
1594 : case DB_HTILE_SURFACE:
1595 : /* 8x8 only */
1596 0 : track->htile_surface = radeon_get_ib_value(p, idx);
1597 : /* force 8x8 htile width and height */
1598 0 : ib[idx] |= 3;
1599 0 : track->db_dirty = true;
1600 0 : break;
1601 : case CB_IMMED0_BASE:
1602 : case CB_IMMED1_BASE:
1603 : case CB_IMMED2_BASE:
1604 : case CB_IMMED3_BASE:
1605 : case CB_IMMED4_BASE:
1606 : case CB_IMMED5_BASE:
1607 : case CB_IMMED6_BASE:
1608 : case CB_IMMED7_BASE:
1609 : case CB_IMMED8_BASE:
1610 : case CB_IMMED9_BASE:
1611 : case CB_IMMED10_BASE:
1612 : case CB_IMMED11_BASE:
1613 : case SQ_PGM_START_FS:
1614 : case SQ_PGM_START_ES:
1615 : case SQ_PGM_START_VS:
1616 : case SQ_PGM_START_GS:
1617 : case SQ_PGM_START_PS:
1618 : case SQ_PGM_START_HS:
1619 : case SQ_PGM_START_LS:
1620 : case SQ_CONST_MEM_BASE:
1621 : case SQ_ALU_CONST_CACHE_GS_0:
1622 : case SQ_ALU_CONST_CACHE_GS_1:
1623 : case SQ_ALU_CONST_CACHE_GS_2:
1624 : case SQ_ALU_CONST_CACHE_GS_3:
1625 : case SQ_ALU_CONST_CACHE_GS_4:
1626 : case SQ_ALU_CONST_CACHE_GS_5:
1627 : case SQ_ALU_CONST_CACHE_GS_6:
1628 : case SQ_ALU_CONST_CACHE_GS_7:
1629 : case SQ_ALU_CONST_CACHE_GS_8:
1630 : case SQ_ALU_CONST_CACHE_GS_9:
1631 : case SQ_ALU_CONST_CACHE_GS_10:
1632 : case SQ_ALU_CONST_CACHE_GS_11:
1633 : case SQ_ALU_CONST_CACHE_GS_12:
1634 : case SQ_ALU_CONST_CACHE_GS_13:
1635 : case SQ_ALU_CONST_CACHE_GS_14:
1636 : case SQ_ALU_CONST_CACHE_GS_15:
1637 : case SQ_ALU_CONST_CACHE_PS_0:
1638 : case SQ_ALU_CONST_CACHE_PS_1:
1639 : case SQ_ALU_CONST_CACHE_PS_2:
1640 : case SQ_ALU_CONST_CACHE_PS_3:
1641 : case SQ_ALU_CONST_CACHE_PS_4:
1642 : case SQ_ALU_CONST_CACHE_PS_5:
1643 : case SQ_ALU_CONST_CACHE_PS_6:
1644 : case SQ_ALU_CONST_CACHE_PS_7:
1645 : case SQ_ALU_CONST_CACHE_PS_8:
1646 : case SQ_ALU_CONST_CACHE_PS_9:
1647 : case SQ_ALU_CONST_CACHE_PS_10:
1648 : case SQ_ALU_CONST_CACHE_PS_11:
1649 : case SQ_ALU_CONST_CACHE_PS_12:
1650 : case SQ_ALU_CONST_CACHE_PS_13:
1651 : case SQ_ALU_CONST_CACHE_PS_14:
1652 : case SQ_ALU_CONST_CACHE_PS_15:
1653 : case SQ_ALU_CONST_CACHE_VS_0:
1654 : case SQ_ALU_CONST_CACHE_VS_1:
1655 : case SQ_ALU_CONST_CACHE_VS_2:
1656 : case SQ_ALU_CONST_CACHE_VS_3:
1657 : case SQ_ALU_CONST_CACHE_VS_4:
1658 : case SQ_ALU_CONST_CACHE_VS_5:
1659 : case SQ_ALU_CONST_CACHE_VS_6:
1660 : case SQ_ALU_CONST_CACHE_VS_7:
1661 : case SQ_ALU_CONST_CACHE_VS_8:
1662 : case SQ_ALU_CONST_CACHE_VS_9:
1663 : case SQ_ALU_CONST_CACHE_VS_10:
1664 : case SQ_ALU_CONST_CACHE_VS_11:
1665 : case SQ_ALU_CONST_CACHE_VS_12:
1666 : case SQ_ALU_CONST_CACHE_VS_13:
1667 : case SQ_ALU_CONST_CACHE_VS_14:
1668 : case SQ_ALU_CONST_CACHE_VS_15:
1669 : case SQ_ALU_CONST_CACHE_HS_0:
1670 : case SQ_ALU_CONST_CACHE_HS_1:
1671 : case SQ_ALU_CONST_CACHE_HS_2:
1672 : case SQ_ALU_CONST_CACHE_HS_3:
1673 : case SQ_ALU_CONST_CACHE_HS_4:
1674 : case SQ_ALU_CONST_CACHE_HS_5:
1675 : case SQ_ALU_CONST_CACHE_HS_6:
1676 : case SQ_ALU_CONST_CACHE_HS_7:
1677 : case SQ_ALU_CONST_CACHE_HS_8:
1678 : case SQ_ALU_CONST_CACHE_HS_9:
1679 : case SQ_ALU_CONST_CACHE_HS_10:
1680 : case SQ_ALU_CONST_CACHE_HS_11:
1681 : case SQ_ALU_CONST_CACHE_HS_12:
1682 : case SQ_ALU_CONST_CACHE_HS_13:
1683 : case SQ_ALU_CONST_CACHE_HS_14:
1684 : case SQ_ALU_CONST_CACHE_HS_15:
1685 : case SQ_ALU_CONST_CACHE_LS_0:
1686 : case SQ_ALU_CONST_CACHE_LS_1:
1687 : case SQ_ALU_CONST_CACHE_LS_2:
1688 : case SQ_ALU_CONST_CACHE_LS_3:
1689 : case SQ_ALU_CONST_CACHE_LS_4:
1690 : case SQ_ALU_CONST_CACHE_LS_5:
1691 : case SQ_ALU_CONST_CACHE_LS_6:
1692 : case SQ_ALU_CONST_CACHE_LS_7:
1693 : case SQ_ALU_CONST_CACHE_LS_8:
1694 : case SQ_ALU_CONST_CACHE_LS_9:
1695 : case SQ_ALU_CONST_CACHE_LS_10:
1696 : case SQ_ALU_CONST_CACHE_LS_11:
1697 : case SQ_ALU_CONST_CACHE_LS_12:
1698 : case SQ_ALU_CONST_CACHE_LS_13:
1699 : case SQ_ALU_CONST_CACHE_LS_14:
1700 : case SQ_ALU_CONST_CACHE_LS_15:
1701 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1702 0 : if (r) {
1703 0 : dev_warn(p->dev, "bad SET_CONTEXT_REG "
1704 : "0x%04X\n", reg);
1705 0 : return -EINVAL;
1706 : }
1707 0 : ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1708 0 : break;
1709 : case SX_MEMORY_EXPORT_BASE:
1710 0 : if (p->rdev->family >= CHIP_CAYMAN) {
1711 0 : dev_warn(p->dev, "bad SET_CONFIG_REG "
1712 : "0x%04X\n", reg);
1713 0 : return -EINVAL;
1714 : }
1715 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1716 0 : if (r) {
1717 0 : dev_warn(p->dev, "bad SET_CONFIG_REG "
1718 : "0x%04X\n", reg);
1719 0 : return -EINVAL;
1720 : }
1721 0 : ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1722 0 : break;
1723 : case CAYMAN_SX_SCATTER_EXPORT_BASE:
1724 0 : if (p->rdev->family < CHIP_CAYMAN) {
1725 0 : dev_warn(p->dev, "bad SET_CONTEXT_REG "
1726 : "0x%04X\n", reg);
1727 0 : return -EINVAL;
1728 : }
1729 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1730 0 : if (r) {
1731 0 : dev_warn(p->dev, "bad SET_CONTEXT_REG "
1732 : "0x%04X\n", reg);
1733 0 : return -EINVAL;
1734 : }
1735 0 : ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1736 0 : break;
1737 : case SX_MISC:
1738 0 : track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0;
1739 0 : break;
1740 : default:
1741 0 : dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1742 0 : return -EINVAL;
1743 : }
1744 0 : return 0;
1745 0 : }
1746 :
1747 : /**
1748 : * evergreen_is_safe_reg() - check if register is authorized or not
1749 : * @parser: parser structure holding parsing context
1750 : * @reg: register we are testing
1751 : *
1752 : * This function will test against reg_safe_bm and return true
1753 : * if register is safe or false otherwise.
1754 : */
1755 0 : static inline bool evergreen_is_safe_reg(struct radeon_cs_parser *p, u32 reg)
1756 : {
1757 0 : struct evergreen_cs_track *track = p->track;
1758 : u32 m, i;
1759 :
1760 0 : i = (reg >> 7);
1761 0 : if (unlikely(i >= REG_SAFE_BM_SIZE)) {
1762 0 : return false;
1763 : }
1764 0 : m = 1 << ((reg >> 2) & 31);
1765 0 : if (!(track->reg_safe_bm[i] & m))
1766 0 : return true;
1767 :
1768 0 : return false;
1769 0 : }
1770 :
1771 0 : static int evergreen_packet3_check(struct radeon_cs_parser *p,
1772 : struct radeon_cs_packet *pkt)
1773 : {
1774 0 : struct radeon_bo_list *reloc;
1775 : struct evergreen_cs_track *track;
1776 : uint32_t *ib;
1777 : unsigned idx;
1778 : unsigned i;
1779 : unsigned start_reg, end_reg, reg;
1780 : int r;
1781 : u32 idx_value;
1782 :
1783 0 : track = (struct evergreen_cs_track *)p->track;
1784 0 : ib = p->ib.ptr;
1785 0 : idx = pkt->idx + 1;
1786 0 : idx_value = radeon_get_ib_value(p, idx);
1787 :
1788 0 : switch (pkt->opcode) {
1789 : case PACKET3_SET_PREDICATION:
1790 : {
1791 : int pred_op;
1792 : int tmp;
1793 : uint64_t offset;
1794 :
1795 0 : if (pkt->count != 1) {
1796 0 : DRM_ERROR("bad SET PREDICATION\n");
1797 0 : return -EINVAL;
1798 : }
1799 :
1800 0 : tmp = radeon_get_ib_value(p, idx + 1);
1801 0 : pred_op = (tmp >> 16) & 0x7;
1802 :
1803 : /* for the clear predicate operation */
1804 0 : if (pred_op == 0)
1805 0 : return 0;
1806 :
1807 0 : if (pred_op > 2) {
1808 0 : DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op);
1809 0 : return -EINVAL;
1810 : }
1811 :
1812 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1813 0 : if (r) {
1814 0 : DRM_ERROR("bad SET PREDICATION\n");
1815 0 : return -EINVAL;
1816 : }
1817 :
1818 0 : offset = reloc->gpu_offset +
1819 0 : (idx_value & 0xfffffff0) +
1820 0 : ((u64)(tmp & 0xff) << 32);
1821 :
1822 0 : ib[idx + 0] = offset;
1823 0 : ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff);
1824 0 : }
1825 : break;
1826 : case PACKET3_CONTEXT_CONTROL:
1827 0 : if (pkt->count != 1) {
1828 0 : DRM_ERROR("bad CONTEXT_CONTROL\n");
1829 0 : return -EINVAL;
1830 : }
1831 : break;
1832 : case PACKET3_INDEX_TYPE:
1833 : case PACKET3_NUM_INSTANCES:
1834 : case PACKET3_CLEAR_STATE:
1835 0 : if (pkt->count) {
1836 0 : DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
1837 0 : return -EINVAL;
1838 : }
1839 : break;
1840 : case CAYMAN_PACKET3_DEALLOC_STATE:
1841 0 : if (p->rdev->family < CHIP_CAYMAN) {
1842 0 : DRM_ERROR("bad PACKET3_DEALLOC_STATE\n");
1843 0 : return -EINVAL;
1844 : }
1845 0 : if (pkt->count) {
1846 0 : DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
1847 0 : return -EINVAL;
1848 : }
1849 : break;
1850 : case PACKET3_INDEX_BASE:
1851 : {
1852 : uint64_t offset;
1853 :
1854 0 : if (pkt->count != 1) {
1855 0 : DRM_ERROR("bad INDEX_BASE\n");
1856 0 : return -EINVAL;
1857 : }
1858 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1859 0 : if (r) {
1860 0 : DRM_ERROR("bad INDEX_BASE\n");
1861 0 : return -EINVAL;
1862 : }
1863 :
1864 0 : offset = reloc->gpu_offset +
1865 0 : idx_value +
1866 0 : ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1867 :
1868 0 : ib[idx+0] = offset;
1869 0 : ib[idx+1] = upper_32_bits(offset) & 0xff;
1870 :
1871 0 : r = evergreen_cs_track_check(p);
1872 0 : if (r) {
1873 0 : dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1874 0 : return r;
1875 : }
1876 0 : break;
1877 : }
1878 : case PACKET3_INDEX_BUFFER_SIZE:
1879 : {
1880 0 : if (pkt->count != 0) {
1881 0 : DRM_ERROR("bad INDEX_BUFFER_SIZE\n");
1882 0 : return -EINVAL;
1883 : }
1884 : break;
1885 : }
1886 : case PACKET3_DRAW_INDEX:
1887 : {
1888 : uint64_t offset;
1889 0 : if (pkt->count != 3) {
1890 0 : DRM_ERROR("bad DRAW_INDEX\n");
1891 0 : return -EINVAL;
1892 : }
1893 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1894 0 : if (r) {
1895 0 : DRM_ERROR("bad DRAW_INDEX\n");
1896 0 : return -EINVAL;
1897 : }
1898 :
1899 0 : offset = reloc->gpu_offset +
1900 0 : idx_value +
1901 0 : ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1902 :
1903 0 : ib[idx+0] = offset;
1904 0 : ib[idx+1] = upper_32_bits(offset) & 0xff;
1905 :
1906 0 : r = evergreen_cs_track_check(p);
1907 0 : if (r) {
1908 0 : dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1909 0 : return r;
1910 : }
1911 0 : break;
1912 : }
1913 : case PACKET3_DRAW_INDEX_2:
1914 : {
1915 : uint64_t offset;
1916 :
1917 0 : if (pkt->count != 4) {
1918 0 : DRM_ERROR("bad DRAW_INDEX_2\n");
1919 0 : return -EINVAL;
1920 : }
1921 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1922 0 : if (r) {
1923 0 : DRM_ERROR("bad DRAW_INDEX_2\n");
1924 0 : return -EINVAL;
1925 : }
1926 :
1927 0 : offset = reloc->gpu_offset +
1928 0 : radeon_get_ib_value(p, idx+1) +
1929 0 : ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
1930 :
1931 0 : ib[idx+1] = offset;
1932 0 : ib[idx+2] = upper_32_bits(offset) & 0xff;
1933 :
1934 0 : r = evergreen_cs_track_check(p);
1935 0 : if (r) {
1936 0 : dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1937 0 : return r;
1938 : }
1939 0 : break;
1940 : }
1941 : case PACKET3_DRAW_INDEX_AUTO:
1942 0 : if (pkt->count != 1) {
1943 0 : DRM_ERROR("bad DRAW_INDEX_AUTO\n");
1944 0 : return -EINVAL;
1945 : }
1946 0 : r = evergreen_cs_track_check(p);
1947 0 : if (r) {
1948 0 : dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
1949 0 : return r;
1950 : }
1951 : break;
1952 : case PACKET3_DRAW_INDEX_MULTI_AUTO:
1953 0 : if (pkt->count != 2) {
1954 0 : DRM_ERROR("bad DRAW_INDEX_MULTI_AUTO\n");
1955 0 : return -EINVAL;
1956 : }
1957 0 : r = evergreen_cs_track_check(p);
1958 0 : if (r) {
1959 0 : dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
1960 0 : return r;
1961 : }
1962 : break;
1963 : case PACKET3_DRAW_INDEX_IMMD:
1964 0 : if (pkt->count < 2) {
1965 0 : DRM_ERROR("bad DRAW_INDEX_IMMD\n");
1966 0 : return -EINVAL;
1967 : }
1968 0 : r = evergreen_cs_track_check(p);
1969 0 : if (r) {
1970 0 : dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1971 0 : return r;
1972 : }
1973 : break;
1974 : case PACKET3_DRAW_INDEX_OFFSET:
1975 0 : if (pkt->count != 2) {
1976 0 : DRM_ERROR("bad DRAW_INDEX_OFFSET\n");
1977 0 : return -EINVAL;
1978 : }
1979 0 : r = evergreen_cs_track_check(p);
1980 0 : if (r) {
1981 0 : dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1982 0 : return r;
1983 : }
1984 : break;
1985 : case PACKET3_DRAW_INDEX_OFFSET_2:
1986 0 : if (pkt->count != 3) {
1987 0 : DRM_ERROR("bad DRAW_INDEX_OFFSET_2\n");
1988 0 : return -EINVAL;
1989 : }
1990 0 : r = evergreen_cs_track_check(p);
1991 0 : if (r) {
1992 0 : dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1993 0 : return r;
1994 : }
1995 : break;
1996 : case PACKET3_SET_BASE:
1997 : {
1998 : /*
1999 : DW 1 HEADER Header of the packet. Shader_Type in bit 1 of the Header will correspond to the shader type of the Load, see Type-3 Packet.
2000 : 2 BASE_INDEX Bits [3:0] BASE_INDEX - Base Index specifies which base address is specified in the last two DWs.
2001 : 0001: DX11 Draw_Index_Indirect Patch Table Base: Base address for Draw_Index_Indirect data.
2002 : 3 ADDRESS_LO Bits [31:3] - Lower bits of QWORD-Aligned Address. Bits [2:0] - Reserved
2003 : 4 ADDRESS_HI Bits [31:8] - Reserved. Bits [7:0] - Upper bits of Address [47:32]
2004 : */
2005 0 : if (pkt->count != 2) {
2006 0 : DRM_ERROR("bad SET_BASE\n");
2007 0 : return -EINVAL;
2008 : }
2009 :
2010 : /* currently only supporting setting indirect draw buffer base address */
2011 0 : if (idx_value != 1) {
2012 0 : DRM_ERROR("bad SET_BASE\n");
2013 0 : return -EINVAL;
2014 : }
2015 :
2016 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2017 0 : if (r) {
2018 0 : DRM_ERROR("bad SET_BASE\n");
2019 0 : return -EINVAL;
2020 : }
2021 :
2022 0 : track->indirect_draw_buffer_size = radeon_bo_size(reloc->robj);
2023 :
2024 0 : ib[idx+1] = reloc->gpu_offset;
2025 0 : ib[idx+2] = upper_32_bits(reloc->gpu_offset) & 0xff;
2026 :
2027 0 : break;
2028 : }
2029 : case PACKET3_DRAW_INDIRECT:
2030 : case PACKET3_DRAW_INDEX_INDIRECT:
2031 : {
2032 0 : u64 size = pkt->opcode == PACKET3_DRAW_INDIRECT ? 16 : 20;
2033 :
2034 : /*
2035 : DW 1 HEADER
2036 : 2 DATA_OFFSET Bits [31:0] + byte aligned offset where the required data structure starts. Bits 1:0 are zero
2037 : 3 DRAW_INITIATOR Draw Initiator Register. Written to the VGT_DRAW_INITIATOR register for the assigned context
2038 : */
2039 0 : if (pkt->count != 1) {
2040 0 : DRM_ERROR("bad DRAW_INDIRECT\n");
2041 0 : return -EINVAL;
2042 : }
2043 :
2044 0 : if (idx_value + size > track->indirect_draw_buffer_size) {
2045 0 : dev_warn(p->dev, "DRAW_INDIRECT buffer too small %u + %llu > %lu\n",
2046 : idx_value, size, track->indirect_draw_buffer_size);
2047 0 : return -EINVAL;
2048 : }
2049 :
2050 0 : r = evergreen_cs_track_check(p);
2051 0 : if (r) {
2052 0 : dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2053 0 : return r;
2054 : }
2055 0 : break;
2056 : }
2057 : case PACKET3_DISPATCH_DIRECT:
2058 0 : if (pkt->count != 3) {
2059 0 : DRM_ERROR("bad DISPATCH_DIRECT\n");
2060 0 : return -EINVAL;
2061 : }
2062 0 : r = evergreen_cs_track_check(p);
2063 0 : if (r) {
2064 0 : dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2065 0 : return r;
2066 : }
2067 : break;
2068 : case PACKET3_DISPATCH_INDIRECT:
2069 0 : if (pkt->count != 1) {
2070 0 : DRM_ERROR("bad DISPATCH_INDIRECT\n");
2071 0 : return -EINVAL;
2072 : }
2073 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2074 0 : if (r) {
2075 0 : DRM_ERROR("bad DISPATCH_INDIRECT\n");
2076 0 : return -EINVAL;
2077 : }
2078 0 : ib[idx+0] = idx_value + (u32)(reloc->gpu_offset & 0xffffffff);
2079 0 : r = evergreen_cs_track_check(p);
2080 0 : if (r) {
2081 0 : dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2082 0 : return r;
2083 : }
2084 : break;
2085 : case PACKET3_WAIT_REG_MEM:
2086 0 : if (pkt->count != 5) {
2087 0 : DRM_ERROR("bad WAIT_REG_MEM\n");
2088 0 : return -EINVAL;
2089 : }
2090 : /* bit 4 is reg (0) or mem (1) */
2091 0 : if (idx_value & 0x10) {
2092 : uint64_t offset;
2093 :
2094 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2095 0 : if (r) {
2096 0 : DRM_ERROR("bad WAIT_REG_MEM\n");
2097 0 : return -EINVAL;
2098 : }
2099 :
2100 0 : offset = reloc->gpu_offset +
2101 0 : (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2102 0 : ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2103 :
2104 0 : ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc);
2105 0 : ib[idx+2] = upper_32_bits(offset) & 0xff;
2106 0 : } else if (idx_value & 0x100) {
2107 0 : DRM_ERROR("cannot use PFP on REG wait\n");
2108 0 : return -EINVAL;
2109 : }
2110 : break;
2111 : case PACKET3_CP_DMA:
2112 : {
2113 : u32 command, size, info;
2114 : u64 offset, tmp;
2115 0 : if (pkt->count != 4) {
2116 0 : DRM_ERROR("bad CP DMA\n");
2117 0 : return -EINVAL;
2118 : }
2119 0 : command = radeon_get_ib_value(p, idx+4);
2120 0 : size = command & 0x1fffff;
2121 0 : info = radeon_get_ib_value(p, idx+1);
2122 0 : if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
2123 0 : (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
2124 0 : ((((info & 0x00300000) >> 20) == 0) &&
2125 0 : (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
2126 0 : ((((info & 0x60000000) >> 29) == 0) &&
2127 0 : (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
2128 : /* non mem to mem copies requires dw aligned count */
2129 0 : if (size % 4) {
2130 0 : DRM_ERROR("CP DMA command requires dw count alignment\n");
2131 0 : return -EINVAL;
2132 : }
2133 : }
2134 0 : if (command & PACKET3_CP_DMA_CMD_SAS) {
2135 : /* src address space is register */
2136 : /* GDS is ok */
2137 0 : if (((info & 0x60000000) >> 29) != 1) {
2138 0 : DRM_ERROR("CP DMA SAS not supported\n");
2139 0 : return -EINVAL;
2140 : }
2141 : } else {
2142 0 : if (command & PACKET3_CP_DMA_CMD_SAIC) {
2143 0 : DRM_ERROR("CP DMA SAIC only supported for registers\n");
2144 0 : return -EINVAL;
2145 : }
2146 : /* src address space is memory */
2147 0 : if (((info & 0x60000000) >> 29) == 0) {
2148 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2149 0 : if (r) {
2150 0 : DRM_ERROR("bad CP DMA SRC\n");
2151 0 : return -EINVAL;
2152 : }
2153 :
2154 0 : tmp = radeon_get_ib_value(p, idx) +
2155 0 : ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2156 :
2157 0 : offset = reloc->gpu_offset + tmp;
2158 :
2159 0 : if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2160 0 : dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
2161 : tmp + size, radeon_bo_size(reloc->robj));
2162 0 : return -EINVAL;
2163 : }
2164 :
2165 0 : ib[idx] = offset;
2166 0 : ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2167 0 : } else if (((info & 0x60000000) >> 29) != 2) {
2168 0 : DRM_ERROR("bad CP DMA SRC_SEL\n");
2169 0 : return -EINVAL;
2170 : }
2171 : }
2172 0 : if (command & PACKET3_CP_DMA_CMD_DAS) {
2173 : /* dst address space is register */
2174 : /* GDS is ok */
2175 0 : if (((info & 0x00300000) >> 20) != 1) {
2176 0 : DRM_ERROR("CP DMA DAS not supported\n");
2177 0 : return -EINVAL;
2178 : }
2179 : } else {
2180 : /* dst address space is memory */
2181 0 : if (command & PACKET3_CP_DMA_CMD_DAIC) {
2182 0 : DRM_ERROR("CP DMA DAIC only supported for registers\n");
2183 0 : return -EINVAL;
2184 : }
2185 0 : if (((info & 0x00300000) >> 20) == 0) {
2186 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2187 0 : if (r) {
2188 0 : DRM_ERROR("bad CP DMA DST\n");
2189 0 : return -EINVAL;
2190 : }
2191 :
2192 0 : tmp = radeon_get_ib_value(p, idx+2) +
2193 0 : ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
2194 :
2195 0 : offset = reloc->gpu_offset + tmp;
2196 :
2197 0 : if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2198 0 : dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
2199 : tmp + size, radeon_bo_size(reloc->robj));
2200 0 : return -EINVAL;
2201 : }
2202 :
2203 0 : ib[idx+2] = offset;
2204 0 : ib[idx+3] = upper_32_bits(offset) & 0xff;
2205 : } else {
2206 0 : DRM_ERROR("bad CP DMA DST_SEL\n");
2207 0 : return -EINVAL;
2208 : }
2209 : }
2210 0 : break;
2211 : }
2212 : case PACKET3_SURFACE_SYNC:
2213 0 : if (pkt->count != 3) {
2214 0 : DRM_ERROR("bad SURFACE_SYNC\n");
2215 0 : return -EINVAL;
2216 : }
2217 : /* 0xffffffff/0x0 is flush all cache flag */
2218 0 : if (radeon_get_ib_value(p, idx + 1) != 0xffffffff ||
2219 0 : radeon_get_ib_value(p, idx + 2) != 0) {
2220 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2221 0 : if (r) {
2222 0 : DRM_ERROR("bad SURFACE_SYNC\n");
2223 0 : return -EINVAL;
2224 : }
2225 0 : ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2226 0 : }
2227 : break;
2228 : case PACKET3_EVENT_WRITE:
2229 0 : if (pkt->count != 2 && pkt->count != 0) {
2230 0 : DRM_ERROR("bad EVENT_WRITE\n");
2231 0 : return -EINVAL;
2232 : }
2233 0 : if (pkt->count) {
2234 : uint64_t offset;
2235 :
2236 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2237 0 : if (r) {
2238 0 : DRM_ERROR("bad EVENT_WRITE\n");
2239 0 : return -EINVAL;
2240 : }
2241 0 : offset = reloc->gpu_offset +
2242 0 : (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
2243 0 : ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2244 :
2245 0 : ib[idx+1] = offset & 0xfffffff8;
2246 0 : ib[idx+2] = upper_32_bits(offset) & 0xff;
2247 0 : }
2248 : break;
2249 : case PACKET3_EVENT_WRITE_EOP:
2250 : {
2251 : uint64_t offset;
2252 :
2253 0 : if (pkt->count != 4) {
2254 0 : DRM_ERROR("bad EVENT_WRITE_EOP\n");
2255 0 : return -EINVAL;
2256 : }
2257 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2258 0 : if (r) {
2259 0 : DRM_ERROR("bad EVENT_WRITE_EOP\n");
2260 0 : return -EINVAL;
2261 : }
2262 :
2263 0 : offset = reloc->gpu_offset +
2264 0 : (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2265 0 : ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2266 :
2267 0 : ib[idx+1] = offset & 0xfffffffc;
2268 0 : ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2269 0 : break;
2270 : }
2271 : case PACKET3_EVENT_WRITE_EOS:
2272 : {
2273 : uint64_t offset;
2274 :
2275 0 : if (pkt->count != 3) {
2276 0 : DRM_ERROR("bad EVENT_WRITE_EOS\n");
2277 0 : return -EINVAL;
2278 : }
2279 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2280 0 : if (r) {
2281 0 : DRM_ERROR("bad EVENT_WRITE_EOS\n");
2282 0 : return -EINVAL;
2283 : }
2284 :
2285 0 : offset = reloc->gpu_offset +
2286 0 : (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2287 0 : ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2288 :
2289 0 : ib[idx+1] = offset & 0xfffffffc;
2290 0 : ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2291 0 : break;
2292 : }
2293 : case PACKET3_SET_CONFIG_REG:
2294 0 : start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2295 0 : end_reg = 4 * pkt->count + start_reg - 4;
2296 0 : if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2297 0 : (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2298 0 : (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2299 0 : DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2300 0 : return -EINVAL;
2301 : }
2302 0 : for (reg = start_reg, idx++; reg <= end_reg; reg += 4, idx++) {
2303 0 : if (evergreen_is_safe_reg(p, reg))
2304 : continue;
2305 0 : r = evergreen_cs_handle_reg(p, reg, idx);
2306 0 : if (r)
2307 0 : return r;
2308 : }
2309 : break;
2310 : case PACKET3_SET_CONTEXT_REG:
2311 0 : start_reg = (idx_value << 2) + PACKET3_SET_CONTEXT_REG_START;
2312 0 : end_reg = 4 * pkt->count + start_reg - 4;
2313 0 : if ((start_reg < PACKET3_SET_CONTEXT_REG_START) ||
2314 0 : (start_reg >= PACKET3_SET_CONTEXT_REG_END) ||
2315 0 : (end_reg >= PACKET3_SET_CONTEXT_REG_END)) {
2316 0 : DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
2317 0 : return -EINVAL;
2318 : }
2319 0 : for (reg = start_reg, idx++; reg <= end_reg; reg += 4, idx++) {
2320 0 : if (evergreen_is_safe_reg(p, reg))
2321 : continue;
2322 0 : r = evergreen_cs_handle_reg(p, reg, idx);
2323 0 : if (r)
2324 0 : return r;
2325 : }
2326 : break;
2327 : case PACKET3_SET_RESOURCE:
2328 0 : if (pkt->count % 8) {
2329 0 : DRM_ERROR("bad SET_RESOURCE\n");
2330 0 : return -EINVAL;
2331 : }
2332 0 : start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_START;
2333 0 : end_reg = 4 * pkt->count + start_reg - 4;
2334 0 : if ((start_reg < PACKET3_SET_RESOURCE_START) ||
2335 0 : (start_reg >= PACKET3_SET_RESOURCE_END) ||
2336 0 : (end_reg >= PACKET3_SET_RESOURCE_END)) {
2337 0 : DRM_ERROR("bad SET_RESOURCE\n");
2338 0 : return -EINVAL;
2339 : }
2340 0 : for (i = 0; i < (pkt->count / 8); i++) {
2341 : struct radeon_bo *texture, *mipmap;
2342 : u32 toffset, moffset;
2343 : u32 size, offset, mip_address, tex_dim;
2344 :
2345 0 : switch (G__SQ_CONSTANT_TYPE(radeon_get_ib_value(p, idx+1+(i*8)+7))) {
2346 : case SQ_TEX_VTX_VALID_TEXTURE:
2347 : /* tex base */
2348 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2349 0 : if (r) {
2350 0 : DRM_ERROR("bad SET_RESOURCE (tex)\n");
2351 0 : return -EINVAL;
2352 : }
2353 0 : if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
2354 0 : ib[idx+1+(i*8)+1] |=
2355 0 : TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
2356 0 : if (reloc->tiling_flags & RADEON_TILING_MACRO) {
2357 0 : unsigned bankw, bankh, mtaspect, tile_split;
2358 :
2359 0 : evergreen_tiling_fields(reloc->tiling_flags,
2360 : &bankw, &bankh, &mtaspect,
2361 : &tile_split);
2362 0 : ib[idx+1+(i*8)+6] |= TEX_TILE_SPLIT(tile_split);
2363 0 : ib[idx+1+(i*8)+7] |=
2364 0 : TEX_BANK_WIDTH(bankw) |
2365 0 : TEX_BANK_HEIGHT(bankh) |
2366 0 : MACRO_TILE_ASPECT(mtaspect) |
2367 0 : TEX_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
2368 0 : }
2369 : }
2370 0 : texture = reloc->robj;
2371 0 : toffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2372 :
2373 : /* tex mip base */
2374 0 : tex_dim = ib[idx+1+(i*8)+0] & 0x7;
2375 0 : mip_address = ib[idx+1+(i*8)+3];
2376 :
2377 0 : if ((tex_dim == SQ_TEX_DIM_2D_MSAA || tex_dim == SQ_TEX_DIM_2D_ARRAY_MSAA) &&
2378 0 : !mip_address &&
2379 0 : !radeon_cs_packet_next_is_pkt3_nop(p)) {
2380 : /* MIP_ADDRESS should point to FMASK for an MSAA texture.
2381 : * It should be 0 if FMASK is disabled. */
2382 : moffset = 0;
2383 : mipmap = NULL;
2384 0 : } else {
2385 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2386 0 : if (r) {
2387 0 : DRM_ERROR("bad SET_RESOURCE (tex)\n");
2388 0 : return -EINVAL;
2389 : }
2390 0 : moffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2391 0 : mipmap = reloc->robj;
2392 : }
2393 :
2394 0 : r = evergreen_cs_track_validate_texture(p, texture, mipmap, idx+1+(i*8));
2395 0 : if (r)
2396 0 : return r;
2397 0 : ib[idx+1+(i*8)+2] += toffset;
2398 0 : ib[idx+1+(i*8)+3] += moffset;
2399 0 : break;
2400 : case SQ_TEX_VTX_VALID_BUFFER:
2401 : {
2402 : uint64_t offset64;
2403 : /* vtx base */
2404 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2405 0 : if (r) {
2406 0 : DRM_ERROR("bad SET_RESOURCE (vtx)\n");
2407 0 : return -EINVAL;
2408 : }
2409 0 : offset = radeon_get_ib_value(p, idx+1+(i*8)+0);
2410 0 : size = radeon_get_ib_value(p, idx+1+(i*8)+1);
2411 0 : if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) {
2412 : /* force size to size of the buffer */
2413 0 : dev_warn(p->dev, "vbo resource seems too big for the bo\n");
2414 0 : ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset;
2415 0 : }
2416 :
2417 0 : offset64 = reloc->gpu_offset + offset;
2418 0 : ib[idx+1+(i*8)+0] = offset64;
2419 0 : ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) |
2420 0 : (upper_32_bits(offset64) & 0xff);
2421 0 : break;
2422 : }
2423 : case SQ_TEX_VTX_INVALID_TEXTURE:
2424 : case SQ_TEX_VTX_INVALID_BUFFER:
2425 : default:
2426 0 : DRM_ERROR("bad SET_RESOURCE\n");
2427 0 : return -EINVAL;
2428 : }
2429 0 : }
2430 : break;
2431 : case PACKET3_SET_ALU_CONST:
2432 : /* XXX fix me ALU const buffers only */
2433 : break;
2434 : case PACKET3_SET_BOOL_CONST:
2435 0 : start_reg = (idx_value << 2) + PACKET3_SET_BOOL_CONST_START;
2436 0 : end_reg = 4 * pkt->count + start_reg - 4;
2437 0 : if ((start_reg < PACKET3_SET_BOOL_CONST_START) ||
2438 0 : (start_reg >= PACKET3_SET_BOOL_CONST_END) ||
2439 0 : (end_reg >= PACKET3_SET_BOOL_CONST_END)) {
2440 0 : DRM_ERROR("bad SET_BOOL_CONST\n");
2441 0 : return -EINVAL;
2442 : }
2443 : break;
2444 : case PACKET3_SET_LOOP_CONST:
2445 0 : start_reg = (idx_value << 2) + PACKET3_SET_LOOP_CONST_START;
2446 0 : end_reg = 4 * pkt->count + start_reg - 4;
2447 0 : if ((start_reg < PACKET3_SET_LOOP_CONST_START) ||
2448 0 : (start_reg >= PACKET3_SET_LOOP_CONST_END) ||
2449 0 : (end_reg >= PACKET3_SET_LOOP_CONST_END)) {
2450 0 : DRM_ERROR("bad SET_LOOP_CONST\n");
2451 0 : return -EINVAL;
2452 : }
2453 : break;
2454 : case PACKET3_SET_CTL_CONST:
2455 0 : start_reg = (idx_value << 2) + PACKET3_SET_CTL_CONST_START;
2456 0 : end_reg = 4 * pkt->count + start_reg - 4;
2457 0 : if ((start_reg < PACKET3_SET_CTL_CONST_START) ||
2458 0 : (start_reg >= PACKET3_SET_CTL_CONST_END) ||
2459 0 : (end_reg >= PACKET3_SET_CTL_CONST_END)) {
2460 0 : DRM_ERROR("bad SET_CTL_CONST\n");
2461 0 : return -EINVAL;
2462 : }
2463 : break;
2464 : case PACKET3_SET_SAMPLER:
2465 0 : if (pkt->count % 3) {
2466 0 : DRM_ERROR("bad SET_SAMPLER\n");
2467 0 : return -EINVAL;
2468 : }
2469 0 : start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_START;
2470 0 : end_reg = 4 * pkt->count + start_reg - 4;
2471 0 : if ((start_reg < PACKET3_SET_SAMPLER_START) ||
2472 0 : (start_reg >= PACKET3_SET_SAMPLER_END) ||
2473 0 : (end_reg >= PACKET3_SET_SAMPLER_END)) {
2474 0 : DRM_ERROR("bad SET_SAMPLER\n");
2475 0 : return -EINVAL;
2476 : }
2477 : break;
2478 : case PACKET3_STRMOUT_BUFFER_UPDATE:
2479 0 : if (pkt->count != 4) {
2480 0 : DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n");
2481 0 : return -EINVAL;
2482 : }
2483 : /* Updating memory at DST_ADDRESS. */
2484 0 : if (idx_value & 0x1) {
2485 : u64 offset;
2486 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2487 0 : if (r) {
2488 0 : DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
2489 0 : return -EINVAL;
2490 : }
2491 0 : offset = radeon_get_ib_value(p, idx+1);
2492 0 : offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2493 0 : if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2494 0 : DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n",
2495 : offset + 4, radeon_bo_size(reloc->robj));
2496 0 : return -EINVAL;
2497 : }
2498 0 : offset += reloc->gpu_offset;
2499 0 : ib[idx+1] = offset;
2500 0 : ib[idx+2] = upper_32_bits(offset) & 0xff;
2501 0 : }
2502 : /* Reading data from SRC_ADDRESS. */
2503 0 : if (((idx_value >> 1) & 0x3) == 2) {
2504 : u64 offset;
2505 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2506 0 : if (r) {
2507 0 : DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
2508 0 : return -EINVAL;
2509 : }
2510 0 : offset = radeon_get_ib_value(p, idx+3);
2511 0 : offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2512 0 : if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2513 0 : DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n",
2514 : offset + 4, radeon_bo_size(reloc->robj));
2515 0 : return -EINVAL;
2516 : }
2517 0 : offset += reloc->gpu_offset;
2518 0 : ib[idx+3] = offset;
2519 0 : ib[idx+4] = upper_32_bits(offset) & 0xff;
2520 0 : }
2521 : break;
2522 : case PACKET3_MEM_WRITE:
2523 : {
2524 : u64 offset;
2525 :
2526 0 : if (pkt->count != 3) {
2527 0 : DRM_ERROR("bad MEM_WRITE (invalid count)\n");
2528 0 : return -EINVAL;
2529 : }
2530 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2531 0 : if (r) {
2532 0 : DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
2533 0 : return -EINVAL;
2534 : }
2535 0 : offset = radeon_get_ib_value(p, idx+0);
2536 0 : offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
2537 0 : if (offset & 0x7) {
2538 0 : DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
2539 0 : return -EINVAL;
2540 : }
2541 0 : if ((offset + 8) > radeon_bo_size(reloc->robj)) {
2542 0 : DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n",
2543 : offset + 8, radeon_bo_size(reloc->robj));
2544 0 : return -EINVAL;
2545 : }
2546 0 : offset += reloc->gpu_offset;
2547 0 : ib[idx+0] = offset;
2548 0 : ib[idx+1] = upper_32_bits(offset) & 0xff;
2549 0 : break;
2550 : }
2551 : case PACKET3_COPY_DW:
2552 0 : if (pkt->count != 4) {
2553 0 : DRM_ERROR("bad COPY_DW (invalid count)\n");
2554 0 : return -EINVAL;
2555 : }
2556 0 : if (idx_value & 0x1) {
2557 : u64 offset;
2558 : /* SRC is memory. */
2559 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2560 0 : if (r) {
2561 0 : DRM_ERROR("bad COPY_DW (missing src reloc)\n");
2562 0 : return -EINVAL;
2563 : }
2564 0 : offset = radeon_get_ib_value(p, idx+1);
2565 0 : offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2566 0 : if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2567 0 : DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n",
2568 : offset + 4, radeon_bo_size(reloc->robj));
2569 0 : return -EINVAL;
2570 : }
2571 0 : offset += reloc->gpu_offset;
2572 0 : ib[idx+1] = offset;
2573 0 : ib[idx+2] = upper_32_bits(offset) & 0xff;
2574 0 : } else {
2575 : /* SRC is a reg. */
2576 0 : reg = radeon_get_ib_value(p, idx+1) << 2;
2577 0 : if (!evergreen_is_safe_reg(p, reg)) {
2578 0 : dev_warn(p->dev, "forbidden register 0x%08x at %d\n",
2579 : reg, idx + 1);
2580 0 : return -EINVAL;
2581 : }
2582 : }
2583 0 : if (idx_value & 0x2) {
2584 : u64 offset;
2585 : /* DST is memory. */
2586 0 : r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2587 0 : if (r) {
2588 0 : DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
2589 0 : return -EINVAL;
2590 : }
2591 0 : offset = radeon_get_ib_value(p, idx+3);
2592 0 : offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2593 0 : if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2594 0 : DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n",
2595 : offset + 4, radeon_bo_size(reloc->robj));
2596 0 : return -EINVAL;
2597 : }
2598 0 : offset += reloc->gpu_offset;
2599 0 : ib[idx+3] = offset;
2600 0 : ib[idx+4] = upper_32_bits(offset) & 0xff;
2601 0 : } else {
2602 : /* DST is a reg. */
2603 0 : reg = radeon_get_ib_value(p, idx+3) << 2;
2604 0 : if (!evergreen_is_safe_reg(p, reg)) {
2605 0 : dev_warn(p->dev, "forbidden register 0x%08x at %d\n",
2606 : reg, idx + 3);
2607 0 : return -EINVAL;
2608 : }
2609 : }
2610 : break;
2611 : case PACKET3_NOP:
2612 : break;
2613 : default:
2614 0 : DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2615 0 : return -EINVAL;
2616 : }
2617 0 : return 0;
2618 0 : }
2619 :
2620 0 : int evergreen_cs_parse(struct radeon_cs_parser *p)
2621 : {
2622 0 : struct radeon_cs_packet pkt;
2623 : struct evergreen_cs_track *track;
2624 : u32 tmp;
2625 : int r;
2626 :
2627 0 : if (p->track == NULL) {
2628 : /* initialize tracker, we are in kms */
2629 0 : track = kzalloc(sizeof(*track), GFP_KERNEL);
2630 0 : if (track == NULL)
2631 0 : return -ENOMEM;
2632 0 : evergreen_cs_track_init(track);
2633 0 : if (p->rdev->family >= CHIP_CAYMAN) {
2634 0 : tmp = p->rdev->config.cayman.tile_config;
2635 0 : track->reg_safe_bm = cayman_reg_safe_bm;
2636 0 : } else {
2637 0 : tmp = p->rdev->config.evergreen.tile_config;
2638 0 : track->reg_safe_bm = evergreen_reg_safe_bm;
2639 : }
2640 : BUILD_BUG_ON(ARRAY_SIZE(cayman_reg_safe_bm) != REG_SAFE_BM_SIZE);
2641 : BUILD_BUG_ON(ARRAY_SIZE(evergreen_reg_safe_bm) != REG_SAFE_BM_SIZE);
2642 0 : switch (tmp & 0xf) {
2643 : case 0:
2644 0 : track->npipes = 1;
2645 0 : break;
2646 : case 1:
2647 : default:
2648 0 : track->npipes = 2;
2649 0 : break;
2650 : case 2:
2651 0 : track->npipes = 4;
2652 0 : break;
2653 : case 3:
2654 0 : track->npipes = 8;
2655 0 : break;
2656 : }
2657 :
2658 0 : switch ((tmp & 0xf0) >> 4) {
2659 : case 0:
2660 0 : track->nbanks = 4;
2661 0 : break;
2662 : case 1:
2663 : default:
2664 0 : track->nbanks = 8;
2665 0 : break;
2666 : case 2:
2667 0 : track->nbanks = 16;
2668 0 : break;
2669 : }
2670 :
2671 0 : switch ((tmp & 0xf00) >> 8) {
2672 : case 0:
2673 0 : track->group_size = 256;
2674 0 : break;
2675 : case 1:
2676 : default:
2677 0 : track->group_size = 512;
2678 0 : break;
2679 : }
2680 :
2681 0 : switch ((tmp & 0xf000) >> 12) {
2682 : case 0:
2683 0 : track->row_size = 1;
2684 0 : break;
2685 : case 1:
2686 : default:
2687 0 : track->row_size = 2;
2688 0 : break;
2689 : case 2:
2690 0 : track->row_size = 4;
2691 0 : break;
2692 : }
2693 :
2694 0 : p->track = track;
2695 0 : }
2696 0 : do {
2697 0 : r = radeon_cs_packet_parse(p, &pkt, p->idx);
2698 0 : if (r) {
2699 0 : kfree(p->track);
2700 0 : p->track = NULL;
2701 0 : return r;
2702 : }
2703 0 : p->idx += pkt.count + 2;
2704 0 : switch (pkt.type) {
2705 : case RADEON_PACKET_TYPE0:
2706 0 : r = evergreen_cs_parse_packet0(p, &pkt);
2707 0 : break;
2708 : case RADEON_PACKET_TYPE2:
2709 : break;
2710 : case RADEON_PACKET_TYPE3:
2711 0 : r = evergreen_packet3_check(p, &pkt);
2712 0 : break;
2713 : default:
2714 0 : DRM_ERROR("Unknown packet type %d !\n", pkt.type);
2715 0 : kfree(p->track);
2716 0 : p->track = NULL;
2717 0 : return -EINVAL;
2718 : }
2719 0 : if (r) {
2720 0 : kfree(p->track);
2721 0 : p->track = NULL;
2722 0 : return r;
2723 : }
2724 0 : } while (p->idx < p->chunk_ib->length_dw);
2725 : #if 0
2726 : for (r = 0; r < p->ib.length_dw; r++) {
2727 : printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
2728 : mdelay(1);
2729 : }
2730 : #endif
2731 0 : kfree(p->track);
2732 0 : p->track = NULL;
2733 0 : return 0;
2734 0 : }
2735 :
2736 : /**
2737 : * evergreen_dma_cs_parse() - parse the DMA IB
2738 : * @p: parser structure holding parsing context.
2739 : *
2740 : * Parses the DMA IB from the CS ioctl and updates
2741 : * the GPU addresses based on the reloc information and
2742 : * checks for errors. (Evergreen-Cayman)
2743 : * Returns 0 for success and an error on failure.
2744 : **/
2745 0 : int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2746 : {
2747 0 : struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
2748 0 : struct radeon_bo_list *src_reloc, *dst_reloc, *dst2_reloc;
2749 : u32 header, cmd, count, sub_cmd;
2750 0 : uint32_t *ib = p->ib.ptr;
2751 : u32 idx;
2752 : u64 src_offset, dst_offset, dst2_offset;
2753 : int r;
2754 :
2755 0 : do {
2756 0 : if (p->idx >= ib_chunk->length_dw) {
2757 0 : DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2758 : p->idx, ib_chunk->length_dw);
2759 0 : return -EINVAL;
2760 : }
2761 : idx = p->idx;
2762 0 : header = radeon_get_ib_value(p, idx);
2763 0 : cmd = GET_DMA_CMD(header);
2764 0 : count = GET_DMA_COUNT(header);
2765 0 : sub_cmd = GET_DMA_SUB_CMD(header);
2766 :
2767 0 : switch (cmd) {
2768 : case DMA_PACKET_WRITE:
2769 0 : r = r600_dma_cs_next_reloc(p, &dst_reloc);
2770 0 : if (r) {
2771 0 : DRM_ERROR("bad DMA_PACKET_WRITE\n");
2772 0 : return -EINVAL;
2773 : }
2774 0 : switch (sub_cmd) {
2775 : /* tiled */
2776 : case 8:
2777 0 : dst_offset = radeon_get_ib_value(p, idx+1);
2778 0 : dst_offset <<= 8;
2779 :
2780 0 : ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2781 0 : p->idx += count + 7;
2782 0 : break;
2783 : /* linear */
2784 : case 0:
2785 0 : dst_offset = radeon_get_ib_value(p, idx+1);
2786 0 : dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2787 :
2788 0 : ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2789 0 : ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2790 0 : p->idx += count + 3;
2791 0 : break;
2792 : default:
2793 0 : DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, header);
2794 0 : return -EINVAL;
2795 : }
2796 0 : if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2797 0 : dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
2798 : dst_offset, radeon_bo_size(dst_reloc->robj));
2799 0 : return -EINVAL;
2800 : }
2801 : break;
2802 : case DMA_PACKET_COPY:
2803 0 : r = r600_dma_cs_next_reloc(p, &src_reloc);
2804 0 : if (r) {
2805 0 : DRM_ERROR("bad DMA_PACKET_COPY\n");
2806 0 : return -EINVAL;
2807 : }
2808 0 : r = r600_dma_cs_next_reloc(p, &dst_reloc);
2809 0 : if (r) {
2810 0 : DRM_ERROR("bad DMA_PACKET_COPY\n");
2811 0 : return -EINVAL;
2812 : }
2813 0 : switch (sub_cmd) {
2814 : /* Copy L2L, DW aligned */
2815 : case 0x00:
2816 : /* L2L, dw */
2817 0 : src_offset = radeon_get_ib_value(p, idx+2);
2818 0 : src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2819 0 : dst_offset = radeon_get_ib_value(p, idx+1);
2820 0 : dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2821 0 : if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2822 0 : dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
2823 : src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2824 0 : return -EINVAL;
2825 : }
2826 0 : if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2827 0 : dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n",
2828 : dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2829 0 : return -EINVAL;
2830 : }
2831 0 : ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2832 0 : ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2833 0 : ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2834 0 : ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2835 0 : p->idx += 5;
2836 0 : break;
2837 : /* Copy L2T/T2L */
2838 : case 0x08:
2839 : /* detile bit */
2840 0 : if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2841 : /* tiled src, linear dst */
2842 0 : src_offset = radeon_get_ib_value(p, idx+1);
2843 0 : src_offset <<= 8;
2844 0 : ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
2845 :
2846 0 : dst_offset = radeon_get_ib_value(p, idx + 7);
2847 0 : dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
2848 0 : ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2849 0 : ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2850 0 : } else {
2851 : /* linear src, tiled dst */
2852 0 : src_offset = radeon_get_ib_value(p, idx+7);
2853 0 : src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
2854 0 : ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2855 0 : ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2856 :
2857 0 : dst_offset = radeon_get_ib_value(p, idx+1);
2858 0 : dst_offset <<= 8;
2859 0 : ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2860 : }
2861 0 : if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2862 0 : dev_warn(p->dev, "DMA L2T, src buffer too small (%llu %lu)\n",
2863 : src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2864 0 : return -EINVAL;
2865 : }
2866 0 : if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2867 0 : dev_warn(p->dev, "DMA L2T, dst buffer too small (%llu %lu)\n",
2868 : dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2869 0 : return -EINVAL;
2870 : }
2871 0 : p->idx += 9;
2872 0 : break;
2873 : /* Copy L2L, byte aligned */
2874 : case 0x40:
2875 : /* L2L, byte */
2876 0 : src_offset = radeon_get_ib_value(p, idx+2);
2877 0 : src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2878 0 : dst_offset = radeon_get_ib_value(p, idx+1);
2879 0 : dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2880 0 : if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
2881 0 : dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n",
2882 : src_offset + count, radeon_bo_size(src_reloc->robj));
2883 0 : return -EINVAL;
2884 : }
2885 0 : if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
2886 0 : dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n",
2887 : dst_offset + count, radeon_bo_size(dst_reloc->robj));
2888 0 : return -EINVAL;
2889 : }
2890 0 : ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xffffffff);
2891 0 : ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xffffffff);
2892 0 : ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2893 0 : ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2894 0 : p->idx += 5;
2895 0 : break;
2896 : /* Copy L2L, partial */
2897 : case 0x41:
2898 : /* L2L, partial */
2899 0 : if (p->family < CHIP_CAYMAN) {
2900 0 : DRM_ERROR("L2L Partial is cayman only !\n");
2901 0 : return -EINVAL;
2902 : }
2903 0 : ib[idx+1] += (u32)(src_reloc->gpu_offset & 0xffffffff);
2904 0 : ib[idx+2] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2905 0 : ib[idx+4] += (u32)(dst_reloc->gpu_offset & 0xffffffff);
2906 0 : ib[idx+5] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2907 :
2908 0 : p->idx += 9;
2909 0 : break;
2910 : /* Copy L2L, DW aligned, broadcast */
2911 : case 0x44:
2912 : /* L2L, dw, broadcast */
2913 0 : r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2914 0 : if (r) {
2915 0 : DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
2916 0 : return -EINVAL;
2917 : }
2918 0 : dst_offset = radeon_get_ib_value(p, idx+1);
2919 0 : dst_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2920 0 : dst2_offset = radeon_get_ib_value(p, idx+2);
2921 0 : dst2_offset |= ((u64)(radeon_get_ib_value(p, idx+5) & 0xff)) << 32;
2922 0 : src_offset = radeon_get_ib_value(p, idx+3);
2923 0 : src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
2924 0 : if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2925 0 : dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n",
2926 : src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2927 0 : return -EINVAL;
2928 : }
2929 0 : if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2930 0 : dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n",
2931 : dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2932 0 : return -EINVAL;
2933 : }
2934 0 : if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2935 0 : dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n",
2936 : dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2937 0 : return -EINVAL;
2938 : }
2939 0 : ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2940 0 : ib[idx+2] += (u32)(dst2_reloc->gpu_offset & 0xfffffffc);
2941 0 : ib[idx+3] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2942 0 : ib[idx+4] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2943 0 : ib[idx+5] += upper_32_bits(dst2_reloc->gpu_offset) & 0xff;
2944 0 : ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2945 0 : p->idx += 7;
2946 0 : break;
2947 : /* Copy L2T Frame to Field */
2948 : case 0x48:
2949 0 : if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2950 0 : DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2951 0 : return -EINVAL;
2952 : }
2953 0 : r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2954 0 : if (r) {
2955 0 : DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2956 0 : return -EINVAL;
2957 : }
2958 0 : dst_offset = radeon_get_ib_value(p, idx+1);
2959 0 : dst_offset <<= 8;
2960 0 : dst2_offset = radeon_get_ib_value(p, idx+2);
2961 0 : dst2_offset <<= 8;
2962 0 : src_offset = radeon_get_ib_value(p, idx+8);
2963 0 : src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
2964 0 : if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2965 0 : dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n",
2966 : src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2967 0 : return -EINVAL;
2968 : }
2969 0 : if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2970 0 : dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
2971 : dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2972 0 : return -EINVAL;
2973 : }
2974 0 : if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2975 0 : dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
2976 : dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2977 0 : return -EINVAL;
2978 : }
2979 0 : ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2980 0 : ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
2981 0 : ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2982 0 : ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2983 0 : p->idx += 10;
2984 0 : break;
2985 : /* Copy L2T/T2L, partial */
2986 : case 0x49:
2987 : /* L2T, T2L partial */
2988 0 : if (p->family < CHIP_CAYMAN) {
2989 0 : DRM_ERROR("L2T, T2L Partial is cayman only !\n");
2990 0 : return -EINVAL;
2991 : }
2992 : /* detile bit */
2993 0 : if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2994 : /* tiled src, linear dst */
2995 0 : ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
2996 :
2997 0 : ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2998 0 : ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2999 0 : } else {
3000 : /* linear src, tiled dst */
3001 0 : ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3002 0 : ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3003 :
3004 0 : ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3005 : }
3006 0 : p->idx += 12;
3007 0 : break;
3008 : /* Copy L2T broadcast */
3009 : case 0x4b:
3010 : /* L2T, broadcast */
3011 0 : if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3012 0 : DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3013 0 : return -EINVAL;
3014 : }
3015 0 : r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3016 0 : if (r) {
3017 0 : DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3018 0 : return -EINVAL;
3019 : }
3020 0 : dst_offset = radeon_get_ib_value(p, idx+1);
3021 0 : dst_offset <<= 8;
3022 0 : dst2_offset = radeon_get_ib_value(p, idx+2);
3023 0 : dst2_offset <<= 8;
3024 0 : src_offset = radeon_get_ib_value(p, idx+8);
3025 0 : src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3026 0 : if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3027 0 : dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3028 : src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3029 0 : return -EINVAL;
3030 : }
3031 0 : if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3032 0 : dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3033 : dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3034 0 : return -EINVAL;
3035 : }
3036 0 : if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3037 0 : dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3038 : dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3039 0 : return -EINVAL;
3040 : }
3041 0 : ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3042 0 : ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3043 0 : ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3044 0 : ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3045 0 : p->idx += 10;
3046 0 : break;
3047 : /* Copy L2T/T2L (tile units) */
3048 : case 0x4c:
3049 : /* L2T, T2L */
3050 : /* detile bit */
3051 0 : if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3052 : /* tiled src, linear dst */
3053 0 : src_offset = radeon_get_ib_value(p, idx+1);
3054 0 : src_offset <<= 8;
3055 0 : ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3056 :
3057 0 : dst_offset = radeon_get_ib_value(p, idx+7);
3058 0 : dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3059 0 : ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3060 0 : ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
3061 0 : } else {
3062 : /* linear src, tiled dst */
3063 0 : src_offset = radeon_get_ib_value(p, idx+7);
3064 0 : src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3065 0 : ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3066 0 : ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3067 :
3068 0 : dst_offset = radeon_get_ib_value(p, idx+1);
3069 0 : dst_offset <<= 8;
3070 0 : ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3071 : }
3072 0 : if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3073 0 : dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n",
3074 : src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3075 0 : return -EINVAL;
3076 : }
3077 0 : if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3078 0 : dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n",
3079 : dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3080 0 : return -EINVAL;
3081 : }
3082 0 : p->idx += 9;
3083 0 : break;
3084 : /* Copy T2T, partial (tile units) */
3085 : case 0x4d:
3086 : /* T2T partial */
3087 0 : if (p->family < CHIP_CAYMAN) {
3088 0 : DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3089 0 : return -EINVAL;
3090 : }
3091 0 : ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3092 0 : ib[idx+4] += (u32)(dst_reloc->gpu_offset >> 8);
3093 0 : p->idx += 13;
3094 0 : break;
3095 : /* Copy L2T broadcast (tile units) */
3096 : case 0x4f:
3097 : /* L2T, broadcast */
3098 0 : if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3099 0 : DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3100 0 : return -EINVAL;
3101 : }
3102 0 : r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3103 0 : if (r) {
3104 0 : DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3105 0 : return -EINVAL;
3106 : }
3107 0 : dst_offset = radeon_get_ib_value(p, idx+1);
3108 0 : dst_offset <<= 8;
3109 0 : dst2_offset = radeon_get_ib_value(p, idx+2);
3110 0 : dst2_offset <<= 8;
3111 0 : src_offset = radeon_get_ib_value(p, idx+8);
3112 0 : src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3113 0 : if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3114 0 : dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3115 : src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3116 0 : return -EINVAL;
3117 : }
3118 0 : if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3119 0 : dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3120 : dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3121 0 : return -EINVAL;
3122 : }
3123 0 : if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3124 0 : dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3125 : dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3126 0 : return -EINVAL;
3127 : }
3128 0 : ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3129 0 : ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3130 0 : ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3131 0 : ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3132 0 : p->idx += 10;
3133 0 : break;
3134 : default:
3135 0 : DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, header);
3136 0 : return -EINVAL;
3137 : }
3138 : break;
3139 : case DMA_PACKET_CONSTANT_FILL:
3140 0 : r = r600_dma_cs_next_reloc(p, &dst_reloc);
3141 0 : if (r) {
3142 0 : DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
3143 0 : return -EINVAL;
3144 : }
3145 0 : dst_offset = radeon_get_ib_value(p, idx+1);
3146 0 : dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16;
3147 0 : if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3148 0 : dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
3149 : dst_offset, radeon_bo_size(dst_reloc->robj));
3150 0 : return -EINVAL;
3151 : }
3152 0 : ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3153 0 : ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000;
3154 0 : p->idx += 4;
3155 0 : break;
3156 : case DMA_PACKET_NOP:
3157 0 : p->idx += 1;
3158 0 : break;
3159 : default:
3160 0 : DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3161 0 : return -EINVAL;
3162 : }
3163 0 : } while (p->idx < p->chunk_ib->length_dw);
3164 : #if 0
3165 : for (r = 0; r < p->ib->length_dw; r++) {
3166 : printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
3167 : mdelay(1);
3168 : }
3169 : #endif
3170 0 : return 0;
3171 0 : }
3172 :
3173 : /* vm parser */
3174 0 : static bool evergreen_vm_reg_valid(u32 reg)
3175 : {
3176 : /* context regs are fine */
3177 0 : if (reg >= 0x28000)
3178 0 : return true;
3179 :
3180 : /* check config regs */
3181 0 : switch (reg) {
3182 : case WAIT_UNTIL:
3183 : case GRBM_GFX_INDEX:
3184 : case CP_STRMOUT_CNTL:
3185 : case CP_COHER_CNTL:
3186 : case CP_COHER_SIZE:
3187 : case VGT_VTX_VECT_EJECT_REG:
3188 : case VGT_CACHE_INVALIDATION:
3189 : case VGT_GS_VERTEX_REUSE:
3190 : case VGT_PRIMITIVE_TYPE:
3191 : case VGT_INDEX_TYPE:
3192 : case VGT_NUM_INDICES:
3193 : case VGT_NUM_INSTANCES:
3194 : case VGT_COMPUTE_DIM_X:
3195 : case VGT_COMPUTE_DIM_Y:
3196 : case VGT_COMPUTE_DIM_Z:
3197 : case VGT_COMPUTE_START_X:
3198 : case VGT_COMPUTE_START_Y:
3199 : case VGT_COMPUTE_START_Z:
3200 : case VGT_COMPUTE_INDEX:
3201 : case VGT_COMPUTE_THREAD_GROUP_SIZE:
3202 : case VGT_HS_OFFCHIP_PARAM:
3203 : case PA_CL_ENHANCE:
3204 : case PA_SU_LINE_STIPPLE_VALUE:
3205 : case PA_SC_LINE_STIPPLE_STATE:
3206 : case PA_SC_ENHANCE:
3207 : case SQ_DYN_GPR_CNTL_PS_FLUSH_REQ:
3208 : case SQ_DYN_GPR_SIMD_LOCK_EN:
3209 : case SQ_CONFIG:
3210 : case SQ_GPR_RESOURCE_MGMT_1:
3211 : case SQ_GLOBAL_GPR_RESOURCE_MGMT_1:
3212 : case SQ_GLOBAL_GPR_RESOURCE_MGMT_2:
3213 : case SQ_CONST_MEM_BASE:
3214 : case SQ_STATIC_THREAD_MGMT_1:
3215 : case SQ_STATIC_THREAD_MGMT_2:
3216 : case SQ_STATIC_THREAD_MGMT_3:
3217 : case SPI_CONFIG_CNTL:
3218 : case SPI_CONFIG_CNTL_1:
3219 : case TA_CNTL_AUX:
3220 : case DB_DEBUG:
3221 : case DB_DEBUG2:
3222 : case DB_DEBUG3:
3223 : case DB_DEBUG4:
3224 : case DB_WATERMARKS:
3225 : case TD_PS_BORDER_COLOR_INDEX:
3226 : case TD_PS_BORDER_COLOR_RED:
3227 : case TD_PS_BORDER_COLOR_GREEN:
3228 : case TD_PS_BORDER_COLOR_BLUE:
3229 : case TD_PS_BORDER_COLOR_ALPHA:
3230 : case TD_VS_BORDER_COLOR_INDEX:
3231 : case TD_VS_BORDER_COLOR_RED:
3232 : case TD_VS_BORDER_COLOR_GREEN:
3233 : case TD_VS_BORDER_COLOR_BLUE:
3234 : case TD_VS_BORDER_COLOR_ALPHA:
3235 : case TD_GS_BORDER_COLOR_INDEX:
3236 : case TD_GS_BORDER_COLOR_RED:
3237 : case TD_GS_BORDER_COLOR_GREEN:
3238 : case TD_GS_BORDER_COLOR_BLUE:
3239 : case TD_GS_BORDER_COLOR_ALPHA:
3240 : case TD_HS_BORDER_COLOR_INDEX:
3241 : case TD_HS_BORDER_COLOR_RED:
3242 : case TD_HS_BORDER_COLOR_GREEN:
3243 : case TD_HS_BORDER_COLOR_BLUE:
3244 : case TD_HS_BORDER_COLOR_ALPHA:
3245 : case TD_LS_BORDER_COLOR_INDEX:
3246 : case TD_LS_BORDER_COLOR_RED:
3247 : case TD_LS_BORDER_COLOR_GREEN:
3248 : case TD_LS_BORDER_COLOR_BLUE:
3249 : case TD_LS_BORDER_COLOR_ALPHA:
3250 : case TD_CS_BORDER_COLOR_INDEX:
3251 : case TD_CS_BORDER_COLOR_RED:
3252 : case TD_CS_BORDER_COLOR_GREEN:
3253 : case TD_CS_BORDER_COLOR_BLUE:
3254 : case TD_CS_BORDER_COLOR_ALPHA:
3255 : case SQ_ESGS_RING_SIZE:
3256 : case SQ_GSVS_RING_SIZE:
3257 : case SQ_ESTMP_RING_SIZE:
3258 : case SQ_GSTMP_RING_SIZE:
3259 : case SQ_HSTMP_RING_SIZE:
3260 : case SQ_LSTMP_RING_SIZE:
3261 : case SQ_PSTMP_RING_SIZE:
3262 : case SQ_VSTMP_RING_SIZE:
3263 : case SQ_ESGS_RING_ITEMSIZE:
3264 : case SQ_ESTMP_RING_ITEMSIZE:
3265 : case SQ_GSTMP_RING_ITEMSIZE:
3266 : case SQ_GSVS_RING_ITEMSIZE:
3267 : case SQ_GS_VERT_ITEMSIZE:
3268 : case SQ_GS_VERT_ITEMSIZE_1:
3269 : case SQ_GS_VERT_ITEMSIZE_2:
3270 : case SQ_GS_VERT_ITEMSIZE_3:
3271 : case SQ_GSVS_RING_OFFSET_1:
3272 : case SQ_GSVS_RING_OFFSET_2:
3273 : case SQ_GSVS_RING_OFFSET_3:
3274 : case SQ_HSTMP_RING_ITEMSIZE:
3275 : case SQ_LSTMP_RING_ITEMSIZE:
3276 : case SQ_PSTMP_RING_ITEMSIZE:
3277 : case SQ_VSTMP_RING_ITEMSIZE:
3278 : case VGT_TF_RING_SIZE:
3279 : case SQ_ESGS_RING_BASE:
3280 : case SQ_GSVS_RING_BASE:
3281 : case SQ_ESTMP_RING_BASE:
3282 : case SQ_GSTMP_RING_BASE:
3283 : case SQ_HSTMP_RING_BASE:
3284 : case SQ_LSTMP_RING_BASE:
3285 : case SQ_PSTMP_RING_BASE:
3286 : case SQ_VSTMP_RING_BASE:
3287 : case CAYMAN_VGT_OFFCHIP_LDS_BASE:
3288 : case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS:
3289 0 : return true;
3290 : default:
3291 0 : DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3292 0 : return false;
3293 : }
3294 0 : }
3295 :
3296 0 : static int evergreen_vm_packet3_check(struct radeon_device *rdev,
3297 : u32 *ib, struct radeon_cs_packet *pkt)
3298 : {
3299 0 : u32 idx = pkt->idx + 1;
3300 0 : u32 idx_value = ib[idx];
3301 : u32 start_reg, end_reg, reg, i;
3302 : u32 command, info;
3303 :
3304 0 : switch (pkt->opcode) {
3305 : case PACKET3_NOP:
3306 : break;
3307 : case PACKET3_SET_BASE:
3308 0 : if (idx_value != 1) {
3309 0 : DRM_ERROR("bad SET_BASE");
3310 0 : return -EINVAL;
3311 : }
3312 : break;
3313 : case PACKET3_CLEAR_STATE:
3314 : case PACKET3_INDEX_BUFFER_SIZE:
3315 : case PACKET3_DISPATCH_DIRECT:
3316 : case PACKET3_DISPATCH_INDIRECT:
3317 : case PACKET3_MODE_CONTROL:
3318 : case PACKET3_SET_PREDICATION:
3319 : case PACKET3_COND_EXEC:
3320 : case PACKET3_PRED_EXEC:
3321 : case PACKET3_DRAW_INDIRECT:
3322 : case PACKET3_DRAW_INDEX_INDIRECT:
3323 : case PACKET3_INDEX_BASE:
3324 : case PACKET3_DRAW_INDEX_2:
3325 : case PACKET3_CONTEXT_CONTROL:
3326 : case PACKET3_DRAW_INDEX_OFFSET:
3327 : case PACKET3_INDEX_TYPE:
3328 : case PACKET3_DRAW_INDEX:
3329 : case PACKET3_DRAW_INDEX_AUTO:
3330 : case PACKET3_DRAW_INDEX_IMMD:
3331 : case PACKET3_NUM_INSTANCES:
3332 : case PACKET3_DRAW_INDEX_MULTI_AUTO:
3333 : case PACKET3_STRMOUT_BUFFER_UPDATE:
3334 : case PACKET3_DRAW_INDEX_OFFSET_2:
3335 : case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3336 : case PACKET3_MPEG_INDEX:
3337 : case PACKET3_WAIT_REG_MEM:
3338 : case PACKET3_MEM_WRITE:
3339 : case PACKET3_SURFACE_SYNC:
3340 : case PACKET3_EVENT_WRITE:
3341 : case PACKET3_EVENT_WRITE_EOP:
3342 : case PACKET3_EVENT_WRITE_EOS:
3343 : case PACKET3_SET_CONTEXT_REG:
3344 : case PACKET3_SET_BOOL_CONST:
3345 : case PACKET3_SET_LOOP_CONST:
3346 : case PACKET3_SET_RESOURCE:
3347 : case PACKET3_SET_SAMPLER:
3348 : case PACKET3_SET_CTL_CONST:
3349 : case PACKET3_SET_RESOURCE_OFFSET:
3350 : case PACKET3_SET_CONTEXT_REG_INDIRECT:
3351 : case PACKET3_SET_RESOURCE_INDIRECT:
3352 : case CAYMAN_PACKET3_DEALLOC_STATE:
3353 : break;
3354 : case PACKET3_COND_WRITE:
3355 0 : if (idx_value & 0x100) {
3356 0 : reg = ib[idx + 5] * 4;
3357 0 : if (!evergreen_vm_reg_valid(reg))
3358 0 : return -EINVAL;
3359 : }
3360 : break;
3361 : case PACKET3_COPY_DW:
3362 0 : if (idx_value & 0x2) {
3363 0 : reg = ib[idx + 3] * 4;
3364 0 : if (!evergreen_vm_reg_valid(reg))
3365 0 : return -EINVAL;
3366 : }
3367 : break;
3368 : case PACKET3_SET_CONFIG_REG:
3369 0 : start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3370 0 : end_reg = 4 * pkt->count + start_reg - 4;
3371 0 : if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3372 0 : (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3373 0 : (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3374 0 : DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3375 0 : return -EINVAL;
3376 : }
3377 0 : for (i = 0; i < pkt->count; i++) {
3378 0 : reg = start_reg + (4 * i);
3379 0 : if (!evergreen_vm_reg_valid(reg))
3380 0 : return -EINVAL;
3381 : }
3382 : break;
3383 : case PACKET3_CP_DMA:
3384 0 : command = ib[idx + 4];
3385 0 : info = ib[idx + 1];
3386 0 : if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
3387 0 : (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
3388 0 : ((((info & 0x00300000) >> 20) == 0) &&
3389 0 : (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
3390 0 : ((((info & 0x60000000) >> 29) == 0) &&
3391 0 : (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
3392 : /* non mem to mem copies requires dw aligned count */
3393 0 : if ((command & 0x1fffff) % 4) {
3394 0 : DRM_ERROR("CP DMA command requires dw count alignment\n");
3395 0 : return -EINVAL;
3396 : }
3397 : }
3398 0 : if (command & PACKET3_CP_DMA_CMD_SAS) {
3399 : /* src address space is register */
3400 0 : if (((info & 0x60000000) >> 29) == 0) {
3401 0 : start_reg = idx_value << 2;
3402 0 : if (command & PACKET3_CP_DMA_CMD_SAIC) {
3403 : reg = start_reg;
3404 0 : if (!evergreen_vm_reg_valid(reg)) {
3405 0 : DRM_ERROR("CP DMA Bad SRC register\n");
3406 0 : return -EINVAL;
3407 : }
3408 : } else {
3409 0 : for (i = 0; i < (command & 0x1fffff); i++) {
3410 0 : reg = start_reg + (4 * i);
3411 0 : if (!evergreen_vm_reg_valid(reg)) {
3412 0 : DRM_ERROR("CP DMA Bad SRC register\n");
3413 0 : return -EINVAL;
3414 : }
3415 : }
3416 : }
3417 : }
3418 : }
3419 0 : if (command & PACKET3_CP_DMA_CMD_DAS) {
3420 : /* dst address space is register */
3421 0 : if (((info & 0x00300000) >> 20) == 0) {
3422 0 : start_reg = ib[idx + 2];
3423 0 : if (command & PACKET3_CP_DMA_CMD_DAIC) {
3424 : reg = start_reg;
3425 0 : if (!evergreen_vm_reg_valid(reg)) {
3426 0 : DRM_ERROR("CP DMA Bad DST register\n");
3427 0 : return -EINVAL;
3428 : }
3429 : } else {
3430 0 : for (i = 0; i < (command & 0x1fffff); i++) {
3431 0 : reg = start_reg + (4 * i);
3432 0 : if (!evergreen_vm_reg_valid(reg)) {
3433 0 : DRM_ERROR("CP DMA Bad DST register\n");
3434 0 : return -EINVAL;
3435 : }
3436 : }
3437 : }
3438 : }
3439 : }
3440 : break;
3441 : default:
3442 0 : return -EINVAL;
3443 : }
3444 0 : return 0;
3445 0 : }
3446 :
3447 0 : int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3448 : {
3449 : int ret = 0;
3450 : u32 idx = 0;
3451 0 : struct radeon_cs_packet pkt;
3452 :
3453 0 : do {
3454 0 : pkt.idx = idx;
3455 0 : pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
3456 0 : pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
3457 0 : pkt.one_reg_wr = 0;
3458 0 : switch (pkt.type) {
3459 : case RADEON_PACKET_TYPE0:
3460 0 : dev_err(rdev->dev, "Packet0 not allowed!\n");
3461 : ret = -EINVAL;
3462 0 : break;
3463 : case RADEON_PACKET_TYPE2:
3464 0 : idx += 1;
3465 0 : break;
3466 : case RADEON_PACKET_TYPE3:
3467 0 : pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
3468 0 : ret = evergreen_vm_packet3_check(rdev, ib->ptr, &pkt);
3469 0 : idx += pkt.count + 2;
3470 0 : break;
3471 : default:
3472 0 : dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
3473 : ret = -EINVAL;
3474 0 : break;
3475 : }
3476 0 : if (ret)
3477 : break;
3478 0 : } while (idx < ib->length_dw);
3479 :
3480 0 : return ret;
3481 0 : }
3482 :
3483 : /**
3484 : * evergreen_dma_ib_parse() - parse the DMA IB for VM
3485 : * @rdev: radeon_device pointer
3486 : * @ib: radeon_ib pointer
3487 : *
3488 : * Parses the DMA IB from the VM CS ioctl
3489 : * checks for errors. (Cayman-SI)
3490 : * Returns 0 for success and an error on failure.
3491 : **/
3492 0 : int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3493 : {
3494 : u32 idx = 0;
3495 : u32 header, cmd, count, sub_cmd;
3496 :
3497 0 : do {
3498 0 : header = ib->ptr[idx];
3499 0 : cmd = GET_DMA_CMD(header);
3500 0 : count = GET_DMA_COUNT(header);
3501 0 : sub_cmd = GET_DMA_SUB_CMD(header);
3502 :
3503 0 : switch (cmd) {
3504 : case DMA_PACKET_WRITE:
3505 0 : switch (sub_cmd) {
3506 : /* tiled */
3507 : case 8:
3508 0 : idx += count + 7;
3509 0 : break;
3510 : /* linear */
3511 : case 0:
3512 0 : idx += count + 3;
3513 0 : break;
3514 : default:
3515 0 : DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, ib->ptr[idx]);
3516 0 : return -EINVAL;
3517 : }
3518 : break;
3519 : case DMA_PACKET_COPY:
3520 0 : switch (sub_cmd) {
3521 : /* Copy L2L, DW aligned */
3522 : case 0x00:
3523 0 : idx += 5;
3524 0 : break;
3525 : /* Copy L2T/T2L */
3526 : case 0x08:
3527 0 : idx += 9;
3528 0 : break;
3529 : /* Copy L2L, byte aligned */
3530 : case 0x40:
3531 0 : idx += 5;
3532 0 : break;
3533 : /* Copy L2L, partial */
3534 : case 0x41:
3535 0 : idx += 9;
3536 0 : break;
3537 : /* Copy L2L, DW aligned, broadcast */
3538 : case 0x44:
3539 0 : idx += 7;
3540 0 : break;
3541 : /* Copy L2T Frame to Field */
3542 : case 0x48:
3543 0 : idx += 10;
3544 0 : break;
3545 : /* Copy L2T/T2L, partial */
3546 : case 0x49:
3547 0 : idx += 12;
3548 0 : break;
3549 : /* Copy L2T broadcast */
3550 : case 0x4b:
3551 0 : idx += 10;
3552 0 : break;
3553 : /* Copy L2T/T2L (tile units) */
3554 : case 0x4c:
3555 0 : idx += 9;
3556 0 : break;
3557 : /* Copy T2T, partial (tile units) */
3558 : case 0x4d:
3559 0 : idx += 13;
3560 0 : break;
3561 : /* Copy L2T broadcast (tile units) */
3562 : case 0x4f:
3563 0 : idx += 10;
3564 0 : break;
3565 : default:
3566 0 : DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, ib->ptr[idx]);
3567 0 : return -EINVAL;
3568 : }
3569 : break;
3570 : case DMA_PACKET_CONSTANT_FILL:
3571 0 : idx += 4;
3572 0 : break;
3573 : case DMA_PACKET_NOP:
3574 0 : idx += 1;
3575 0 : break;
3576 : default:
3577 0 : DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3578 0 : return -EINVAL;
3579 : }
3580 0 : } while (idx < ib->length_dw);
3581 :
3582 0 : return 0;
3583 0 : }
|