1 /*
   2  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
   3  * Use is subject to license terms.
   4  */
   5 /*
   6  * r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
   7  *
   8  * Copyright (C) The Weather Channel, Inc.  2002.
   9  * Copyright (C) 2004 Nicolai Haehnle.
  10  * All Rights Reserved.
  11  *
  12  * The Weather Channel (TM) funded Tungsten Graphics to develop the
  13  * initial release of the Radeon 8500 driver under the XFree86 license.
  14  * This notice must be preserved.
  15  *
  16  * Permission is hereby granted, free of charge, to any person obtaining a
  17  * copy of this software and associated documentation files (the "Software"),
  18  * to deal in the Software without restriction, including without limitation
  19  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  20  * and/or sell copies of the Software, and to permit persons to whom the
  21  * Software is furnished to do so, subject to the following conditions:
  22  *
  23  * The above copyright notice and this permission notice (including the next
  24  * paragraph) shall be included in all copies or substantial portions of the
  25  * Software.
  26  *
  27  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  28  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  29  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  30  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  31  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  32  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  33  * DEALINGS IN THE SOFTWARE.
  34  *
  35  * Authors:
  36  *    Nicolai Haehnle <prefect_@gmx.net>
  37  */
  38 
  39 #pragma ident   "%Z%%M% %I%     %E% SMI"
  40 
  41 #include "drm.h"
  42 #include "radeon_drm.h"
  43 #include "drmP.h"
  44 #include "radeon_drv.h"
  45 #include "r300_reg.h"
  46 
  47 #define R300_SIMULTANEOUS_CLIPRECTS             4
  48 
  49 /*
  50  * Values for R300_RE_CLIPRECT_CNTL depending on the number of
  51  * cliprects
  52  */
  53 static const int r300_cliprect_cntl[4] = {
  54         0xAAAA,
  55         0xEEEE,
  56         0xFEFE,
  57         0xFFFE
  58 };
  59 
  60 /*
  61  * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
  62  * buffer, starting with index n.
  63  */
  64 static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
  65     drm_radeon_kcmd_buffer_t *cmdbuf, int n)
  66 {
  67         drm_clip_rect_t box;
  68         int nr;
  69         int i;
  70         RING_LOCALS;
  71 
  72         nr = cmdbuf->nbox - n;
  73         if (nr > R300_SIMULTANEOUS_CLIPRECTS)
  74                 nr = R300_SIMULTANEOUS_CLIPRECTS;
  75 
  76         DRM_DEBUG("%i cliprects\n", nr);
  77 
  78         if (nr) {
  79                 BEGIN_RING(6 + nr * 2);
  80                 OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
  81 
  82                 for (i = 0; i < nr; ++i) {
  83                         if (DRM_COPY_FROM_USER_UNCHECKED
  84                             (&box, &cmdbuf->boxes[n + i], sizeof (box))) {
  85                                 DRM_ERROR("copy cliprect faulted\n");
  86                                 return (EFAULT);
  87                         }
  88 
  89                         box.x1 =
  90                             (box.x1 +
  91                             R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
  92                         box.y1 =
  93                             (box.y1 +
  94                             R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
  95                         box.x2 =
  96                             (box.x2 +
  97                             R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
  98                         box.y2 =
  99                             (box.y2 +
 100                             R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
 101 
 102                         OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
 103                             (box.y1 << R300_CLIPRECT_Y_SHIFT));
 104                         OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
 105                             (box.y2 << R300_CLIPRECT_Y_SHIFT));
 106                 }
 107 
 108                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
 109 
 110                 /*
 111                  * TODO/SECURITY: Force scissors to a safe value, otherwise
 112                  * the client might be able to trample over memory.
 113                  * The impact should be very limited, but I'd rather be safe
 114                  * than sorry.
 115                  */
 116                 OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
 117                 OUT_RING(0);
 118                 OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
 119                 ADVANCE_RING();
 120         } else {
 121                 /*
 122                  * Why we allow zero cliprect rendering:
 123                  * There are some commands in a command buffer that must be
 124                  * submitted even when there are no cliprects, e.g. DMA buffer
 125                  * discard or state setting (though state setting could be
 126                  * avoided by simulating a loss of context).
 127                  *
 128                  * Now since the cmdbuf interface is so chaotic right now (and
 129                  * is bound to remain that way for a bit until things settle
 130                  * down), it is basically impossible to filter out the commands
 131                  * that are necessary and those that aren't.
 132                  *
 133                  * So I choose the safe way and don't do any filtering at all;
 134                  * instead, I simply set up the engine so that all rendering
 135                  * can't produce any fragments.
 136                  */
 137                 BEGIN_RING(2);
 138                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
 139                 ADVANCE_RING();
 140         }
 141 
 142         return (0);
 143 }
 144 
 145 static u8 r300_reg_flags[0x10000 >> 2];
 146 
 147 void
 148 r300_init_reg_flags(void)
 149 {
 150         int i;
 151         (void) memset(r300_reg_flags, 0, 0x10000 >> 2);
 152 #define ADD_RANGE_MARK(reg, count, mark) \
 153                 for (i = ((reg) >> 2); i < ((reg) >> 2) + (count); i++)\
 154                         r300_reg_flags[i] |= (mark);
 155 
 156 #define MARK_SAFE               1
 157 #define MARK_CHECK_OFFSET       2
 158 
 159 #define ADD_RANGE(reg, count)   ADD_RANGE_MARK(reg, count, MARK_SAFE)
 160 
 161         /* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
 162         ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
 163         ADD_RANGE(0x2080, 1);
 164         ADD_RANGE(R300_SE_VTE_CNTL, 2);
 165         ADD_RANGE(0x2134, 2);
 166         ADD_RANGE(0x2140, 1);
 167         ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
 168         ADD_RANGE(0x21DC, 1);
 169         ADD_RANGE(0x221C, 1);
 170         ADD_RANGE(0x2220, 4);
 171         ADD_RANGE(0x2288, 1);
 172         ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
 173         ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
 174         ADD_RANGE(R300_GB_ENABLE, 1);
 175         ADD_RANGE(R300_GB_MSPOS0, 5);
 176         ADD_RANGE(R300_TX_CNTL, 1);
 177         ADD_RANGE(R300_TX_ENABLE, 1);
 178         ADD_RANGE(0x4200, 4);
 179         ADD_RANGE(0x4214, 1);
 180         ADD_RANGE(R300_RE_POINTSIZE, 1);
 181         ADD_RANGE(0x4230, 3);
 182         ADD_RANGE(R300_RE_LINE_CNT, 1);
 183         ADD_RANGE(0x4238, 1);
 184         ADD_RANGE(0x4260, 3);
 185         ADD_RANGE(0x4274, 4);
 186         ADD_RANGE(0x4288, 5);
 187         ADD_RANGE(0x42A0, 1);
 188         ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
 189         ADD_RANGE(0x42B4, 1);
 190         ADD_RANGE(R300_RE_CULL_CNTL, 1);
 191         ADD_RANGE(0x42C0, 2);
 192         ADD_RANGE(R300_RS_CNTL_0, 2);
 193         ADD_RANGE(R300_RS_INTERP_0, 8);
 194         ADD_RANGE(R300_RS_ROUTE_0, 8);
 195         ADD_RANGE(0x43A4, 2);
 196         ADD_RANGE(0x43E8, 1);
 197         ADD_RANGE(R300_PFS_CNTL_0, 3);
 198         ADD_RANGE(R300_PFS_NODE_0, 4);
 199         ADD_RANGE(R300_PFS_TEXI_0, 64);
 200         ADD_RANGE(0x46A4, 5);
 201         ADD_RANGE(R300_PFS_INSTR0_0, 64);
 202         ADD_RANGE(R300_PFS_INSTR1_0, 64);
 203         ADD_RANGE(R300_PFS_INSTR2_0, 64);
 204         ADD_RANGE(R300_PFS_INSTR3_0, 64);
 205         ADD_RANGE(0x4BC0, 1);
 206         ADD_RANGE(0x4BC8, 3);
 207         ADD_RANGE(R300_PP_ALPHA_TEST, 2);
 208         ADD_RANGE(0x4BD8, 1);
 209         ADD_RANGE(R300_PFS_PARAM_0_X, 64);
 210         ADD_RANGE(0x4E00, 1);
 211         ADD_RANGE(R300_RB3D_CBLEND, 2);
 212         ADD_RANGE(R300_RB3D_COLORMASK, 1);
 213         ADD_RANGE(0x4E10, 3);
 214         ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET);
 215                                         /* check offset */
 216         ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
 217         ADD_RANGE(0x4E50, 9);
 218         ADD_RANGE(0x4E88, 1);
 219         ADD_RANGE(0x4EA0, 2);
 220         ADD_RANGE(R300_RB3D_ZSTENCIL_CNTL_0, 3);
 221         ADD_RANGE(0x4F10, 4);
 222         ADD_RANGE_MARK(R300_RB3D_DEPTHOFFSET, 1, MARK_CHECK_OFFSET);
 223                                         /* check offset */
 224         ADD_RANGE(R300_RB3D_DEPTHPITCH, 1);
 225         ADD_RANGE(0x4F28, 1);
 226         ADD_RANGE(0x4F30, 2);
 227         ADD_RANGE(0x4F44, 1);
 228         ADD_RANGE(0x4F54, 1);
 229 
 230         ADD_RANGE(R300_TX_FILTER_0, 16);
 231         ADD_RANGE(R300_TX_FILTER1_0, 16);
 232         ADD_RANGE(R300_TX_SIZE_0, 16);
 233         ADD_RANGE(R300_TX_FORMAT_0, 16);
 234         ADD_RANGE(R300_TX_PITCH_0, 16);
 235         /* Texture offset is dangerous and needs more checking */
 236         ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
 237         ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
 238         ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
 239 
 240         /* Sporadic registers used as primitives are emitted */
 241         ADD_RANGE(0x4f18, 1);
 242         ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
 243         ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
 244         ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
 245 
 246 }
 247 
 248 static __inline__ int r300_check_range(unsigned reg, int count)
 249 {
 250         int i;
 251         if (reg & ~0xffff)
 252                 return (-1);
 253         for (i = (reg >> 2); i < (reg >> 2) + count; i++)
 254                 if (r300_reg_flags[i] != MARK_SAFE)
 255                         return (1);
 256         return (0);
 257 }
 258 
 259 static inline int
 260 r300_emit_carefully_checked_packet0(drm_radeon_private_t *dev_priv,
 261     drm_radeon_kcmd_buffer_t *cmdbuf, drm_r300_cmd_header_t header)
 262 {
 263         int reg;
 264         int sz;
 265         int i;
 266         int values[64];
 267         RING_LOCALS;
 268 
 269         sz = header.packet0.count;
 270         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
 271 
 272         if ((sz > 64) || (sz < 0)) {
 273                 DRM_ERROR("Cannot emit more than 64 values at a time "
 274                     "(reg=%04x sz=%d)\n", reg, sz);
 275                 return (EINVAL);
 276         }
 277         for (i = 0; i < sz; i++) {
 278                 values[i] = ((int *)(uintptr_t)cmdbuf->buf)[i];
 279                 switch (r300_reg_flags[(reg >> 2) + i]) {
 280                 case MARK_SAFE:
 281                         break;
 282                 case MARK_CHECK_OFFSET:
 283                         if (!RADEON_CHECK_OFFSET(dev_priv, (u32) values[i])) {
 284                                 DRM_ERROR("Offset failed range check "
 285                                     "(reg=%04x sz=%d)\n", reg, sz);
 286                                 return (EINVAL);
 287                         }
 288                         break;
 289                 default:
 290                         DRM_ERROR("Register %04x failed check as flag=%02x\n",
 291                             reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
 292                         return (EINVAL);
 293                 }
 294         }
 295 
 296         BEGIN_RING(1 + sz);
 297         OUT_RING(CP_PACKET0(reg, sz - 1));
 298         OUT_RING_TABLE(values, sz);
 299         ADVANCE_RING();
 300 
 301         cmdbuf->buf += sz * 4;
 302         cmdbuf->bufsz -= sz * 4;
 303 
 304         return (0);
 305 }
 306 
 307 /*
 308  * Emits a packet0 setting arbitrary registers.
 309  * Called by r300_do_cp_cmdbuf.
 310  *
 311  * Note that checks are performed on contents and addresses of the registers
 312  */
 313 static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
 314                                         drm_radeon_kcmd_buffer_t *cmdbuf,
 315                                         drm_r300_cmd_header_t header)
 316 {
 317         int reg;
 318         int sz;
 319         RING_LOCALS;
 320 
 321         sz = header.packet0.count;
 322         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
 323 
 324         if (!sz)
 325                 return (0);
 326 
 327         if (sz * 4 > cmdbuf->bufsz)
 328                 return (EINVAL);
 329 
 330         if (reg + sz * 4 >= 0x10000) {
 331                 DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n",
 332                     reg, sz);
 333                 return (EINVAL);
 334         }
 335 
 336         if (r300_check_range(reg, sz)) {
 337                 /* go and check everything */
 338                 return (r300_emit_carefully_checked_packet0(dev_priv,
 339                     cmdbuf, header));
 340         }
 341         /*
 342          * the rest of the data is safe to emit, whatever the values
 343          * the user passed
 344          */
 345 
 346         BEGIN_RING(1 + sz);
 347         OUT_RING(CP_PACKET0(reg, sz - 1));
 348         OUT_RING_TABLE(cmdbuf->buf, sz);
 349         ADVANCE_RING();
 350 
 351         cmdbuf->buf += sz * 4;
 352         cmdbuf->bufsz -= sz * 4;
 353 
 354         return (0);
 355 }
 356 
 357 /*
 358  * Uploads user-supplied vertex program instructions or parameters onto
 359  * the graphics card.
 360  * Called by r300_do_cp_cmdbuf.
 361  */
 362 static inline int r300_emit_vpu(drm_radeon_private_t *dev_priv,
 363     drm_radeon_kcmd_buffer_t *cmdbuf, drm_r300_cmd_header_t header)
 364 {
 365         int sz;
 366         int addr;
 367         RING_LOCALS;
 368 
 369         sz = header.vpu.count;
 370         addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
 371 
 372         if (!sz)
 373                 return (0);
 374         if (sz * 16 > cmdbuf->bufsz)
 375                 return (EINVAL);
 376 
 377         BEGIN_RING(5 + sz * 4);
 378         /* Wait for VAP to come to senses.. */
 379         /*
 380          * there is no need to emit it multiple times, (only once before
 381          * VAP is programmed, but this optimization is for later
 382          */
 383         OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0);
 384         OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
 385         OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
 386         OUT_RING_TABLE(cmdbuf->buf, sz * 4);
 387 
 388         ADVANCE_RING();
 389 
 390         cmdbuf->buf += sz * 16;
 391         cmdbuf->bufsz -= sz * 16;
 392 
 393         return (0);
 394 }
 395 
 396 /*
 397  * Emit a clear packet from userspace.
 398  * Called by r300_emit_packet3.
 399  */
 400 static inline int r300_emit_clear(drm_radeon_private_t *dev_priv,
 401     drm_radeon_kcmd_buffer_t *cmdbuf)
 402 {
 403         RING_LOCALS;
 404 
 405         if (8 * 4 > cmdbuf->bufsz)
 406                 return (EINVAL);
 407 
 408         BEGIN_RING(10);
 409         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
 410         OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
 411             (1 << R300_PRIM_NUM_VERTICES_SHIFT));
 412         OUT_RING_TABLE(cmdbuf->buf, 8);
 413         ADVANCE_RING();
 414 
 415         cmdbuf->buf += 8 * 4;
 416         cmdbuf->bufsz -= 8 * 4;
 417 
 418         return (0);
 419 }
 420 
 421 static inline int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
 422     drm_radeon_kcmd_buffer_t *cmdbuf, u32 header)
 423 {
 424         int count, i, k;
 425 #define MAX_ARRAY_PACKET                64
 426         u32 payload[MAX_ARRAY_PACKET];
 427         u32 narrays;
 428         RING_LOCALS;
 429 
 430         count = (header >> 16) & 0x3fff;
 431 
 432         if ((count + 1) > MAX_ARRAY_PACKET) {
 433                 DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
 434                     count);
 435                 return (EINVAL);
 436         }
 437         (void) memset(payload, 0, MAX_ARRAY_PACKET * 4);
 438         (void) memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4);
 439 
 440         /* carefully check packet contents */
 441 
 442         narrays = payload[0];
 443         k = 0;
 444         i = 1;
 445         while ((k < narrays) && (i < (count + 1))) {
 446                 i++;            /* skip attribute field */
 447                 if (!RADEON_CHECK_OFFSET(dev_priv, payload[i])) {
 448                         DRM_ERROR("Offset failed range check (k=%d i=%d) "
 449                             "while processing 3D_LOAD_VBPNTR packet.\n",
 450                             k, i);
 451                         return (EINVAL);
 452                 }
 453                 k++;
 454                 i++;
 455                 if (k == narrays)
 456                         break;
 457                 /* have one more to process, they come in pairs */
 458                 if (!RADEON_CHECK_OFFSET(dev_priv, payload[i])) {
 459                         DRM_ERROR("Offset failed range check (k=%d i=%d) "
 460                             "while processing 3D_LOAD_VBPNTR packet.\n",
 461                             k, i);
 462                         return (EINVAL);
 463                 }
 464                 k++;
 465                 i++;
 466         }
 467         /* do the counts match what we expect ? */
 468         if ((k != narrays) || (i != (count + 1))) {
 469                 DRM_ERROR("Malformed 3D_LOAD_VBPNTR packet "
 470                     "(k=%d i=%d narrays=%d count+1=%d).\n",
 471                     k, i, narrays, count + 1);
 472                 return (EINVAL);
 473         }
 474 
 475         /* all clear, output packet */
 476 
 477         BEGIN_RING(count + 2);
 478         OUT_RING(header);
 479         OUT_RING_TABLE(payload, count + 1);
 480         ADVANCE_RING();
 481 
 482         cmdbuf->buf += (count + 2) * 4;
 483         cmdbuf->bufsz -= (count + 2) * 4;
 484 
 485         return (0);
 486 }
 487 
 488 static inline int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
 489     drm_radeon_kcmd_buffer_t *cmdbuf)
 490 {
 491         u32 *cmd = (u32 *)(uintptr_t)cmdbuf->buf;
 492         int count, ret;
 493         RING_LOCALS;
 494 
 495         count = (cmd[0] >> 16) & 0x3fff;
 496 
 497         if (cmd[0] & 0x8000) {
 498                 u32 offset;
 499 
 500                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
 501                     RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
 502                         offset = cmd[2] << 10;
 503                         ret = !RADEON_CHECK_OFFSET(dev_priv, offset);
 504                         if (ret) {
 505                                 DRM_ERROR("Invalid bitblt first offset "
 506                                     "is %08X\n", offset);
 507                                 return (EINVAL);
 508                         }
 509                 }
 510 
 511                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
 512                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
 513                         offset = cmd[3] << 10;
 514                         ret = !RADEON_CHECK_OFFSET(dev_priv, offset);
 515                         if (ret) {
 516                                 DRM_ERROR("Invalid bitblt second offset "
 517                                     "is %08X\n", offset);
 518                                 return (EINVAL);
 519                         }
 520 
 521                 }
 522         }
 523 
 524         BEGIN_RING(count+2);
 525         OUT_RING(cmd[0]);
 526         OUT_RING_TABLE((cmdbuf->buf + 4), count + 1);
 527         ADVANCE_RING();
 528 
 529         cmdbuf->buf += (count+2)*4;
 530         cmdbuf->bufsz -= (count+2)*4;
 531 
 532         return (0);
 533 }
 534 
 535 
 536 static inline int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv,
 537     drm_radeon_kcmd_buffer_t *cmdbuf)
 538 {
 539         u32 *cmd = (u32 *)(uintptr_t)cmdbuf->buf;
 540         int count, ret;
 541         RING_LOCALS;
 542 
 543         count = (cmd[0]>>16) & 0x3fff;
 544 
 545         if ((cmd[1] & 0x8000ffff) != 0x80000810) {
 546                 DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
 547                 return (EINVAL);
 548         }
 549         ret = !RADEON_CHECK_OFFSET(dev_priv, cmd[2]);
 550         if (ret) {
 551                 DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
 552                 return (EINVAL);
 553         }
 554 
 555         BEGIN_RING(count+2);
 556         OUT_RING(cmd[0]);
 557         OUT_RING_TABLE(cmdbuf->buf + 4, count + 1);
 558         ADVANCE_RING();
 559 
 560         cmdbuf->buf += (count+2)*4;
 561         cmdbuf->bufsz -= (count+2)*4;
 562 
 563         return (0);
 564 }
 565 
 566 
 567 static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
 568                                             drm_radeon_kcmd_buffer_t *cmdbuf)
 569 {
 570         u32 header;
 571         int count;
 572         RING_LOCALS;
 573 
 574         if (4 > cmdbuf->bufsz)
 575                 return (EINVAL);
 576 
 577         /*
 578          * Fixme !! This simply emits a packet without much checking.
 579          * We need to be smarter.
 580          */
 581 
 582         /* obtain first word - actual packet3 header */
 583         header = *(u32 *)(uintptr_t)cmdbuf->buf;
 584 
 585         /* Is it packet 3 ? */
 586         if ((header >> 30) != 0x3) {
 587                 DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
 588                 return (EINVAL);
 589         }
 590 
 591         count = (header >> 16) & 0x3fff;
 592 
 593         /* Check again now that we know how much data to expect */
 594         if ((count + 2) * 4 > cmdbuf->bufsz) {
 595                 DRM_ERROR("Expected packet3 of length %d but have only "
 596                     "%d bytes left\n", (count + 2) * 4, cmdbuf->bufsz);
 597                 return (EINVAL);
 598         }
 599 
 600         /* Is it a packet type we know about ? */
 601         switch (header & 0xff00) {
 602         case RADEON_3D_LOAD_VBPNTR:     /* load vertex array pointers */
 603                 return (r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header));
 604 
 605         case RADEON_CNTL_BITBLT_MULTI:
 606                 return (r300_emit_bitblt_multi(dev_priv, cmdbuf));
 607 
 608         case RADEON_CP_INDX_BUFFER:
 609                         // DRAW_INDX_2 without INDX_BUFFER seems to lock
 610                         // up the GPU
 611                 return (r300_emit_indx_buffer(dev_priv, cmdbuf));
 612 
 613         case RADEON_CP_3D_DRAW_IMMD_2:
 614                         /* triggers drawing using in-packet vertex data */
 615         case RADEON_CP_3D_DRAW_VBUF_2:
 616                         /* triggers drawing of vertex buffers setup elsewhere */
 617         case RADEON_CP_3D_DRAW_INDX_2:
 618                         /* triggers drawing using indices to vertex buffer */
 619         case RADEON_WAIT_FOR_IDLE:
 620         case RADEON_CP_NOP:
 621                 /* these packets are safe */
 622                 break;
 623         default:
 624                 DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
 625                 return (EINVAL);
 626         }
 627 
 628         BEGIN_RING(count + 2);
 629         OUT_RING(header);
 630         OUT_RING_TABLE((cmdbuf->buf + 4), count + 1);
 631         ADVANCE_RING();
 632 
 633         cmdbuf->buf += (count + 2) * 4;
 634         cmdbuf->bufsz -= (count + 2) * 4;
 635 
 636         return (0);
 637 }
 638 
 639 /*
 640  * Emit a rendering packet3 from userspace.
 641  * Called by r300_do_cp_cmdbuf.
 642  */
 643 static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
 644     drm_radeon_kcmd_buffer_t *cmdbuf, drm_r300_cmd_header_t header)
 645 {
 646         int n;
 647         int ret;
 648         char *orig_buf = cmdbuf->buf;
 649         int orig_bufsz = cmdbuf->bufsz;
 650 
 651         /*
 652          * This is a do-while-loop so that we run the interior at least once,
 653          * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
 654          */
 655         n = 0;
 656         do {
 657                 if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
 658                         ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
 659                         if (ret)
 660                                 return (ret);
 661 
 662                         cmdbuf->buf = orig_buf;
 663                         cmdbuf->bufsz = orig_bufsz;
 664                 }
 665 
 666                 switch (header.packet3.packet) {
 667                 case R300_CMD_PACKET3_CLEAR:
 668                         DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
 669                         ret = r300_emit_clear(dev_priv, cmdbuf);
 670                         if (ret) {
 671                                 DRM_ERROR("r300_emit_clear failed\n");
 672                                 return (ret);
 673                         }
 674                         break;
 675 
 676                 case R300_CMD_PACKET3_RAW:
 677                         DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
 678                         ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
 679                         if (ret) {
 680                                 DRM_ERROR("r300_emit_raw_packet3 failed\n");
 681                                 return (ret);
 682                         }
 683                         break;
 684 
 685                 default:
 686                         DRM_ERROR("bad packet3 type %i at %p\n",
 687                             header.packet3.packet,
 688                             cmdbuf->buf - sizeof (header));
 689                         return (EINVAL);
 690                 }
 691 
 692                 n += R300_SIMULTANEOUS_CLIPRECTS;
 693         } while (n < cmdbuf->nbox);
 694 
 695         return (0);
 696 }
 697 
 698 /*
 699  * Some of the R300 chips seem to be extremely touchy about the two registers
 700  * that are configured in r300_pacify.
 701  * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
 702  * sends a command buffer that contains only state setting commands and a
 703  * vertex program/parameter upload sequence, this will eventually lead to a
 704  * lockup, unless the sequence is bracketed by calls to r300_pacify.
 705  * So we should take great care to *always* call r300_pacify before
 706  * *anything* 3D related, and again afterwards. This is what the
 707  * call bracket in r300_do_cp_cmdbuf is for.
 708  */
 709 
 710 /*
 711  * Emit the sequence to pacify R300.
 712  */
 713 static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
 714 {
 715         RING_LOCALS;
 716 
 717         BEGIN_RING(6);
 718         OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
 719         OUT_RING(0xa);
 720         OUT_RING(CP_PACKET0(0x4f18, 0));
 721         OUT_RING(0x3);
 722         OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0));
 723         OUT_RING(0x0);
 724         ADVANCE_RING();
 725 }
 726 
 727 /*
 728  * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
 729  * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
 730  * be careful about how this function is called.
 731  */
 732 static void r300_discard_buffer(drm_device_t *dev, drm_buf_t *buf)
 733 {
 734         drm_radeon_private_t *dev_priv = dev->dev_private;
 735         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
 736 
 737         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
 738         buf->pending = 1;
 739         buf->used = 0;
 740 }
 741 
 742 static int r300_scratch(drm_radeon_private_t *dev_priv,
 743                         drm_radeon_kcmd_buffer_t *cmdbuf,
 744                         drm_r300_cmd_header_t header)
 745 {
 746         u32 *ref_age_base;
 747         u32 i, buf_idx, h_pending;
 748         RING_LOCALS;
 749 
 750         if (cmdbuf->bufsz < sizeof (uint64_t) +
 751             header.scratch.n_bufs * sizeof (buf_idx)) {
 752                 return (EINVAL);
 753         }
 754 
 755         if (header.scratch.reg >= 5) {
 756                 return (EINVAL);
 757         }
 758 
 759         dev_priv->scratch_ages[header.scratch.reg] ++;
 760 
 761         ref_age_base = (u32 *)(uintptr_t)*((uint64_t *)(uintptr_t)cmdbuf->buf);
 762 
 763         cmdbuf->buf += sizeof (uint64_t);
 764         cmdbuf->bufsz -= sizeof (uint64_t);
 765 
 766         for (i = 0; i < header.scratch.n_bufs; i++) {
 767                 buf_idx = *(u32 *)(uintptr_t)cmdbuf->buf;
 768                 buf_idx *= 2; /* 8 bytes per buf */
 769 
 770                 if (DRM_COPY_TO_USER(ref_age_base + buf_idx,
 771                     &dev_priv->scratch_ages[header.scratch.reg],
 772                     sizeof (u32))) {
 773                         return (EINVAL);
 774                 }
 775 
 776                 if (DRM_COPY_FROM_USER(&h_pending,
 777                     ref_age_base + buf_idx + 1, sizeof (u32))) {
 778                         return (EINVAL);
 779                 }
 780 
 781                 if (h_pending == 0) {
 782                         return (EINVAL);
 783                 }
 784 
 785                 h_pending--;
 786 
 787                 if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1,
 788                     &h_pending, sizeof (u32))) {
 789                         return (EINVAL);
 790                 }
 791 
 792                 cmdbuf->buf += sizeof (buf_idx);
 793                 cmdbuf->bufsz -= sizeof (buf_idx);
 794         }
 795 
 796         BEGIN_RING(2);
 797         OUT_RING(CP_PACKET0(RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0));
 798         OUT_RING(dev_priv->scratch_ages[header.scratch.reg]);
 799         ADVANCE_RING();
 800 
 801         return (0);
 802 }
 803 
 804 /*
 805  * Parses and validates a user-supplied command buffer and emits appropriate
 806  * commands on the DMA ring buffer.
 807  * Called by the ioctl handler function radeon_cp_cmdbuf.
 808  */
 809 /*ARGSUSED*/
 810 int
 811 r300_do_cp_cmdbuf(drm_device_t *dev,
 812     drm_file_t *fpriv, drm_radeon_kcmd_buffer_t *cmdbuf)
 813 {
 814         drm_radeon_private_t *dev_priv = dev->dev_private;
 815         drm_device_dma_t *dma = dev->dma;
 816         drm_buf_t *buf = NULL;
 817         int emit_dispatch_age = 0;
 818         int ret = 0;
 819 
 820         DRM_DEBUG("\n");
 821 
 822         /*
 823          * See the comment above r300_emit_begin3d for why this call
 824          * must be here, and what the cleanup gotos are for.
 825          */
 826         r300_pacify(dev_priv);
 827 
 828         if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
 829                 ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
 830                 if (ret)
 831                         goto cleanup;
 832         }
 833 
 834         while (cmdbuf->bufsz >= sizeof (drm_r300_cmd_header_t)) {
 835                 int idx;
 836                 drm_r300_cmd_header_t header;
 837 
 838                 header.u = *(unsigned int *)(uintptr_t)cmdbuf->buf;
 839 
 840                 cmdbuf->buf += sizeof (header);
 841                 cmdbuf->bufsz -= sizeof (header);
 842 
 843                 switch (header.header.cmd_type) {
 844                 case R300_CMD_PACKET0:
 845                         DRM_DEBUG("R300_CMD_PACKET0\n");
 846                         ret = r300_emit_packet0(dev_priv, cmdbuf, header);
 847                         if (ret) {
 848                                 DRM_ERROR("r300_emit_packet0 failed\n");
 849                                 goto cleanup;
 850                         }
 851                         break;
 852 
 853                 case R300_CMD_VPU:
 854                         DRM_DEBUG("R300_CMD_VPU\n");
 855                         ret = r300_emit_vpu(dev_priv, cmdbuf, header);
 856                         if (ret) {
 857                                 DRM_ERROR("r300_emit_vpu failed\n");
 858                                 goto cleanup;
 859                         }
 860                         break;
 861 
 862                 case R300_CMD_PACKET3:
 863                         DRM_DEBUG("R300_CMD_PACKET3\n");
 864                         ret = r300_emit_packet3(dev_priv, cmdbuf, header);
 865                         if (ret) {
 866                                 DRM_ERROR("r300_emit_packet3 failed\n");
 867                                 goto cleanup;
 868                         }
 869                         break;
 870 
 871                 case R300_CMD_END3D:
 872                         DRM_DEBUG("R300_CMD_END3D\n");
 873                         /*
 874                          * TODO:
 875                          * Ideally userspace driver should not need to issue
 876                          * this call, i.e. the drm driver should issue it
 877                          * automatically and prevent lockups. In practice, we
 878                          * do not understand why this call is needed and what
 879                          * it does (except for some vague guesses that it has
 880                          * to do with cache coherence) and so the user space
 881                          * driver does it.
 882                          *
 883                          * Once we are sure which uses prevent lockups the code
 884                          * could be moved into the kernel and the userspace
 885                          * driver will not need to use this command.
 886                          *
 887                          * Note that issuing this command does not hurt anything
 888                          * except, possibly, performance
 889                          */
 890                         r300_pacify(dev_priv);
 891                         break;
 892 
 893                 case R300_CMD_CP_DELAY:
 894                         /* simple enough, we can do it here */
 895                         DRM_DEBUG("R300_CMD_CP_DELAY\n");
 896                         {
 897                                 int i;
 898                                 RING_LOCALS;
 899 
 900                                 BEGIN_RING(header.delay.count);
 901                                 for (i = 0; i < header.delay.count; i++)
 902                                         OUT_RING(RADEON_CP_PACKET2);
 903                                 ADVANCE_RING();
 904                         }
 905                         break;
 906 
 907                 case R300_CMD_DMA_DISCARD:
 908                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
 909                         idx = header.dma.buf_idx;
 910                         if (idx < 0 || idx >= dma->buf_count) {
 911                                 DRM_ERROR("buffer index %d (of %d max)\n",
 912                                     idx, dma->buf_count - 1);
 913                                 ret = EINVAL;
 914                                 goto cleanup;
 915                         }
 916 
 917                         buf = dma->buflist[idx];
 918                         if (buf->filp != fpriv || buf->pending) {
 919                                 DRM_ERROR("bad buffer %p %p %d\n",
 920                                     buf->filp, fpriv, buf->pending);
 921                                 ret = EINVAL;
 922                                 goto cleanup;
 923                         }
 924 
 925                         emit_dispatch_age = 1;
 926                         r300_discard_buffer(dev, buf);
 927                         break;
 928 
 929                 case R300_CMD_WAIT:
 930                         /* simple enough, we can do it here */
 931                         DRM_DEBUG("R300_CMD_WAIT\n");
 932                         if (header.wait.flags == 0)
 933                                 break;  /* nothing to do */
 934 
 935                         {
 936                                 RING_LOCALS;
 937 
 938                                 BEGIN_RING(2);
 939                                 OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
 940                                 OUT_RING((header.wait.flags & 0xf) << 14);
 941                                 ADVANCE_RING();
 942                         }
 943                         break;
 944 
 945                 case R300_CMD_SCRATCH:
 946                         DRM_DEBUG("R300_CMD_SCRATCH\n");
 947                         ret = r300_scratch(dev_priv, cmdbuf, header);
 948                         if (ret) {
 949                                 DRM_ERROR("r300_scratch failed\n");
 950                                 goto cleanup;
 951                         }
 952                         break;
 953 
 954                 default:
 955                         DRM_ERROR("bad cmd_type %i at %p\n",
 956                             header.header.cmd_type,
 957                             cmdbuf->buf - sizeof (header));
 958                         ret = EINVAL;
 959                         goto cleanup;
 960                 }
 961         }
 962 
 963         DRM_DEBUG("END\n");
 964 
 965 cleanup:
 966         r300_pacify(dev_priv);
 967 
 968         /*
 969          * We emit the vertex buffer age here, outside the pacifier "brackets"
 970          * for two reasons:
 971          * (1) This may coalesce multiple age emissions into a single one and
 972          * (2) more importantly, some chips lock up hard when scratch registers
 973          *              are written inside the pacifier bracket.
 974          */
 975         if (emit_dispatch_age) {
 976                 RING_LOCALS;
 977 
 978                 /* Emit the vertex buffer age */
 979                 BEGIN_RING(2);
 980                 RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
 981                 ADVANCE_RING();
 982         }
 983 
 984         COMMIT_RING();
 985 
 986         return (ret);
 987 }