1 /* BEGIN CSTYLED */ 2 3 /* 4 * Copyright (c) 2009, Intel Corporation. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 24 * IN THE SOFTWARE. 25 * 26 * Authors: 27 * Eric Anholt <eric@anholt.net> 28 * 29 */ 30 31 /* 32 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 33 * Use is subject to license terms. 34 */ 35 36 #include <sys/x86_archext.h> 37 #include <sys/vfs_opreg.h> 38 #include "drmP.h" 39 #include "drm.h" 40 #include "i915_drm.h" 41 #include "i915_drv.h" 42 43 #ifndef roundup 44 #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) 45 #endif /* !roundup */ 46 47 #define I915_GEM_GPU_DOMAINS (~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) 48 49 static timeout_id_t worktimer_id = NULL; 50 51 extern int drm_mm_init(struct drm_mm *mm, 52 unsigned long start, unsigned long size); 53 extern void drm_mm_put_block(struct drm_mm_node *cur); 54 extern int choose_addr(struct as *as, caddr_t *addrp, size_t len, offset_t off, 55 int vacalign, uint_t flags); 56 57 static void 58 i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj, 59 uint32_t read_domains, 60 uint32_t write_domain); 61 static void i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj); 62 static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj); 63 static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj); 64 static int i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, 65 int write); 66 static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, 67 int write); 68 static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, 69 uint64_t offset, 70 uint64_t size); 71 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj); 72 static void i915_gem_object_free_page_list(struct drm_gem_object *obj); 73 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); 74 static int i915_gem_object_get_page_list(struct drm_gem_object *obj); 75 76 static void 77 i915_gem_cleanup_ringbuffer(struct drm_device *dev); 78 79 /*ARGSUSED*/ 80 int 81 i915_gem_init_ioctl(DRM_IOCTL_ARGS) 82 { 83 DRM_DEVICE; 84 drm_i915_private_t *dev_priv = dev->dev_private; 85 struct drm_i915_gem_init args; 86 87 if (dev->driver->use_gem != 1) 88 return ENODEV; 89 90 DRM_COPYFROM_WITH_RETURN(&args, 91 (struct drm_i915_gem_init *) data, sizeof(args)); 92 93 spin_lock(&dev->struct_mutex); 94 95 if ((args.gtt_start >= args.gtt_end) || 96 ((args.gtt_start & (PAGE_SIZE - 1)) != 0) || 97 ((args.gtt_end & (PAGE_SIZE - 1)) != 0)) { 98 spin_unlock(&dev->struct_mutex); 99 DRM_ERROR("i915_gem_init_ioctel invalid arg 0x%lx args.start 0x%lx end 0x%lx", &args, args.gtt_start, args.gtt_end); 100 return EINVAL; 101 } 102 103 dev->gtt_total = (uint32_t) (args.gtt_end - args.gtt_start); 104 105 (void) drm_mm_init(&dev_priv->mm.gtt_space, 106 (unsigned long) args.gtt_start, dev->gtt_total); 107 DRM_DEBUG("i915_gem_init_ioctl dev->gtt_total %x, dev_priv->mm.gtt_space 0x%x gtt_start 0x%lx", dev->gtt_total, dev_priv->mm.gtt_space, args.gtt_start); 108 ASSERT(dev->gtt_total != 0); 109 110 spin_unlock(&dev->struct_mutex); 111 112 113 return 0; 114 } 115 116 /*ARGSUSED*/ 117 int 118 i915_gem_get_aperture_ioctl(DRM_IOCTL_ARGS) 119 { 120 DRM_DEVICE; 121 struct drm_i915_gem_get_aperture args; 122 int ret; 123 124 if (dev->driver->use_gem != 1) 125 return ENODEV; 126 127 args.aper_size = (uint64_t)dev->gtt_total; 128 args.aper_available_size = (args.aper_size - 129 atomic_read(&dev->pin_memory)); 130 131 ret = DRM_COPY_TO_USER((struct drm_i915_gem_get_aperture __user *) data, &args, sizeof(args)); 132 133 if ( ret != 0) 134 DRM_ERROR(" i915_gem_get_aperture_ioctl error! %d", ret); 135 136 DRM_DEBUG("i915_gem_get_aaperture_ioctl called sizeof %d, aper_size 0x%x, aper_available_size 0x%x\n", sizeof(args), dev->gtt_total, args.aper_available_size); 137 138 return 0; 139 } 140 141 /** 142 * Creates a new mm object and returns a handle to it. 143 */ 144 /*ARGSUSED*/ 145 int 146 i915_gem_create_ioctl(DRM_IOCTL_ARGS) 147 { 148 DRM_DEVICE; 149 struct drm_i915_gem_create args; 150 struct drm_gem_object *obj; 151 int handlep; 152 int ret; 153 154 if (dev->driver->use_gem != 1) 155 return ENODEV; 156 157 DRM_COPYFROM_WITH_RETURN(&args, 158 (struct drm_i915_gem_create *) data, sizeof(args)); 159 160 161 args.size = (uint64_t) roundup(args.size, PAGE_SIZE); 162 163 if (args.size == 0) { 164 DRM_ERROR("Invalid obj size %d", args.size); 165 return EINVAL; 166 } 167 /* Allocate the new object */ 168 obj = drm_gem_object_alloc(dev, args.size); 169 if (obj == NULL) { 170 DRM_ERROR("Failed to alloc obj"); 171 return ENOMEM; 172 } 173 174 ret = drm_gem_handle_create(fpriv, obj, &handlep); 175 spin_lock(&dev->struct_mutex); 176 drm_gem_object_handle_unreference(obj); 177 spin_unlock(&dev->struct_mutex); 178 if (ret) 179 return ret; 180 181 args.handle = handlep; 182 183 ret = DRM_COPY_TO_USER((struct drm_i915_gem_create *) data, &args, sizeof(args)); 184 185 if ( ret != 0) 186 DRM_ERROR(" gem create error! %d", ret); 187 188 DRM_DEBUG("i915_gem_create_ioctl object name %d, size 0x%lx, list 0x%lx, obj 0x%lx",handlep, args.size, &fpriv->object_idr, obj); 189 190 return 0; 191 } 192 193 /** 194 * Reads data from the object referenced by handle. 195 * 196 * On error, the contents of *data are undefined. 197 */ 198 /*ARGSUSED*/ 199 int 200 i915_gem_pread_ioctl(DRM_IOCTL_ARGS) 201 { 202 DRM_DEVICE; 203 struct drm_i915_gem_pread args; 204 struct drm_gem_object *obj; 205 int ret; 206 207 if (dev->driver->use_gem != 1) 208 return ENODEV; 209 210 DRM_COPYFROM_WITH_RETURN(&args, 211 (struct drm_i915_gem_pread __user *) data, sizeof(args)); 212 213 obj = drm_gem_object_lookup(fpriv, args.handle); 214 if (obj == NULL) 215 return EBADF; 216 217 /* Bounds check source. 218 * 219 * XXX: This could use review for overflow issues... 220 */ 221 if (args.offset > obj->size || args.size > obj->size || 222 args.offset + args.size > obj->size) { 223 drm_gem_object_unreference(obj); 224 DRM_ERROR("i915_gem_pread_ioctl invalid args"); 225 return EINVAL; 226 } 227 228 spin_lock(&dev->struct_mutex); 229 230 ret = i915_gem_object_set_cpu_read_domain_range(obj, args.offset, args.size); 231 if (ret != 0) { 232 drm_gem_object_unreference(obj); 233 spin_unlock(&dev->struct_mutex); 234 DRM_ERROR("pread failed to read domain range ret %d!!!", ret); 235 return EFAULT; 236 } 237 238 unsigned long unwritten = 0; 239 uint32_t *user_data; 240 user_data = (uint32_t *) (uintptr_t) args.data_ptr; 241 242 unwritten = DRM_COPY_TO_USER(user_data, obj->kaddr + args.offset, args.size); 243 if (unwritten) { 244 ret = EFAULT; 245 DRM_ERROR("i915_gem_pread error!!! unwritten %d", unwritten); 246 } 247 248 drm_gem_object_unreference(obj); 249 spin_unlock(&dev->struct_mutex); 250 251 return ret; 252 } 253 254 /*ARGSUSED*/ 255 static int 256 i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 257 struct drm_i915_gem_pwrite *args, 258 struct drm_file *file_priv) 259 { 260 uint32_t *user_data; 261 int ret = 0; 262 unsigned long unwritten = 0; 263 264 user_data = (uint32_t *) (uintptr_t) args->data_ptr; 265 spin_lock(&dev->struct_mutex); 266 ret = i915_gem_object_pin(obj, 0); 267 if (ret) { 268 spin_unlock(&dev->struct_mutex); 269 DRM_ERROR("i915_gem_gtt_pwrite failed to pin ret %d", ret); 270 return ret; 271 } 272 273 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 274 if (ret) 275 goto err; 276 277 DRM_DEBUG("obj %d write domain 0x%x read domain 0x%x", obj->name, obj->write_domain, obj->read_domains); 278 279 unwritten = DRM_COPY_FROM_USER(obj->kaddr + args->offset, user_data, args->size); 280 if (unwritten) { 281 ret = EFAULT; 282 DRM_ERROR("i915_gem_gtt_pwrite error!!! unwritten %d", unwritten); 283 goto err; 284 } 285 286 err: 287 i915_gem_object_unpin(obj); 288 spin_unlock(&dev->struct_mutex); 289 if (ret) 290 DRM_ERROR("i915_gem_gtt_pwrite error %d", ret); 291 return ret; 292 } 293 294 /*ARGSUSED*/ 295 int 296 i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 297 struct drm_i915_gem_pwrite *args, 298 struct drm_file *file_priv) 299 { 300 DRM_ERROR(" i915_gem_shmem_pwrite Not support"); 301 return -1; 302 } 303 304 /** 305 * Writes data to the object referenced by handle. 306 * 307 * On error, the contents of the buffer that were to be modified are undefined. 308 */ 309 /*ARGSUSED*/ 310 int 311 i915_gem_pwrite_ioctl(DRM_IOCTL_ARGS) 312 { 313 DRM_DEVICE; 314 struct drm_i915_gem_pwrite args; 315 struct drm_gem_object *obj; 316 struct drm_i915_gem_object *obj_priv; 317 int ret = 0; 318 319 if (dev->driver->use_gem != 1) 320 return ENODEV; 321 322 ret = DRM_COPY_FROM_USER(&args, 323 (struct drm_i915_gem_pwrite __user *) data, sizeof(args)); 324 if (ret) 325 DRM_ERROR("i915_gem_pwrite_ioctl failed to copy from user"); 326 obj = drm_gem_object_lookup(fpriv, args.handle); 327 if (obj == NULL) 328 return EBADF; 329 obj_priv = obj->driver_private; 330 DRM_DEBUG("i915_gem_pwrite_ioctl, obj->name %d",obj->name); 331 332 /* Bounds check destination. 333 * 334 * XXX: This could use review for overflow issues... 335 */ 336 if (args.offset > obj->size || args.size > obj->size || 337 args.offset + args.size > obj->size) { 338 drm_gem_object_unreference(obj); 339 DRM_ERROR("i915_gem_pwrite_ioctl invalid arg"); 340 return EINVAL; 341 } 342 343 /* We can only do the GTT pwrite on untiled buffers, as otherwise 344 * it would end up going through the fenced access, and we'll get 345 * different detiling behavior between reading and writing. 346 * pread/pwrite currently are reading and writing from the CPU 347 * perspective, requiring manual detiling by the client. 348 */ 349 if (obj_priv->tiling_mode == I915_TILING_NONE && 350 dev->gtt_total != 0) 351 ret = i915_gem_gtt_pwrite(dev, obj, &args, fpriv); 352 else 353 ret = i915_gem_shmem_pwrite(dev, obj, &args, fpriv); 354 355 if (ret) 356 DRM_ERROR("pwrite failed %d\n", ret); 357 358 drm_gem_object_unreference(obj); 359 360 return ret; 361 } 362 363 /** 364 * Called when user space prepares to use an object with the CPU, either 365 * through the mmap ioctl's mapping or a GTT mapping. 366 */ 367 /*ARGSUSED*/ 368 int 369 i915_gem_set_domain_ioctl(DRM_IOCTL_ARGS) 370 { 371 DRM_DEVICE; 372 struct drm_i915_gem_set_domain args; 373 struct drm_gem_object *obj; 374 int ret = 0; 375 376 if (dev->driver->use_gem != 1) 377 return ENODEV; 378 379 DRM_COPYFROM_WITH_RETURN(&args, 380 (struct drm_i915_gem_set_domain __user *) data, sizeof(args)); 381 382 uint32_t read_domains = args.read_domains; 383 uint32_t write_domain = args.write_domain; 384 385 /* Only handle setting domains to types used by the CPU. */ 386 if (write_domain & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) 387 ret = EINVAL; 388 389 if (read_domains & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) 390 ret = EINVAL; 391 392 /* Having something in the write domain implies it's in the read 393 * domain, and only that read domain. Enforce that in the request. 394 */ 395 if (write_domain != 0 && read_domains != write_domain) 396 ret = EINVAL; 397 if (ret) { 398 DRM_ERROR("set_domain invalid read or write"); 399 return EINVAL; 400 } 401 402 obj = drm_gem_object_lookup(fpriv, args.handle); 403 if (obj == NULL) 404 return EBADF; 405 406 spin_lock(&dev->struct_mutex); 407 DRM_DEBUG("set_domain_ioctl %p(name %d size 0x%x), %08x %08x\n", 408 obj, obj->name, obj->size, args.read_domains, args.write_domain); 409 410 if (read_domains & I915_GEM_DOMAIN_GTT) { 411 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 412 413 /* Silently promote "you're not bound, there was nothing to do" 414 * to success, since the client was just asking us to 415 * make sure everything was done. 416 */ 417 if (ret == EINVAL) 418 ret = 0; 419 } else { 420 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 421 } 422 423 drm_gem_object_unreference(obj); 424 spin_unlock(&dev->struct_mutex); 425 if (ret) 426 DRM_ERROR("i915_set_domain_ioctl ret %d", ret); 427 return ret; 428 } 429 430 /** 431 * Called when user space has done writes to this buffer 432 */ 433 /*ARGSUSED*/ 434 int 435 i915_gem_sw_finish_ioctl(DRM_IOCTL_ARGS) 436 { 437 DRM_DEVICE; 438 struct drm_i915_gem_sw_finish args; 439 struct drm_gem_object *obj; 440 struct drm_i915_gem_object *obj_priv; 441 int ret = 0; 442 443 if (dev->driver->use_gem != 1) 444 return ENODEV; 445 446 DRM_COPYFROM_WITH_RETURN(&args, 447 (struct drm_i915_gem_sw_finish __user *) data, sizeof(args)); 448 449 spin_lock(&dev->struct_mutex); 450 obj = drm_gem_object_lookup(fpriv, args.handle); 451 if (obj == NULL) { 452 spin_unlock(&dev->struct_mutex); 453 return EBADF; 454 } 455 456 DRM_DEBUG("%s: sw_finish %d (%p name %d size 0x%x)\n", 457 __func__, args.handle, obj, obj->name, obj->size); 458 459 obj_priv = obj->driver_private; 460 /* Pinned buffers may be scanout, so flush the cache */ 461 if (obj_priv->pin_count) 462 { 463 i915_gem_object_flush_cpu_write_domain(obj); 464 } 465 466 drm_gem_object_unreference(obj); 467 spin_unlock(&dev->struct_mutex); 468 return ret; 469 } 470 471 /** 472 * Maps the contents of an object, returning the address it is mapped 473 * into. 474 * 475 * While the mapping holds a reference on the contents of the object, it doesn't 476 * imply a ref on the object itself. 477 */ 478 /*ARGSUSED*/ 479 int 480 i915_gem_mmap_ioctl(DRM_IOCTL_ARGS) 481 { 482 DRM_DEVICE; 483 struct drm_i915_gem_mmap args; 484 struct drm_gem_object *obj; 485 caddr_t vvaddr = NULL; 486 int ret; 487 488 if (dev->driver->use_gem != 1) 489 return ENODEV; 490 491 DRM_COPYFROM_WITH_RETURN( 492 &args, (struct drm_i915_gem_mmap __user *)data, 493 sizeof (struct drm_i915_gem_mmap)); 494 495 obj = drm_gem_object_lookup(fpriv, args.handle); 496 if (obj == NULL) 497 return EBADF; 498 499 ret = ddi_devmap_segmap(fpriv->dev, (off_t)obj->map->handle, 500 ttoproc(curthread)->p_as, &vvaddr, obj->map->size, 501 PROT_ALL, PROT_ALL, MAP_SHARED, fpriv->credp); 502 if (ret) 503 return ret; 504 505 spin_lock(&dev->struct_mutex); 506 drm_gem_object_unreference(obj); 507 spin_unlock(&dev->struct_mutex); 508 509 args.addr_ptr = (uint64_t)(uintptr_t)vvaddr; 510 511 DRM_COPYTO_WITH_RETURN( 512 (struct drm_i915_gem_mmap __user *)data, 513 &args, sizeof (struct drm_i915_gem_mmap)); 514 515 return 0; 516 } 517 518 static void 519 i915_gem_object_free_page_list(struct drm_gem_object *obj) 520 { 521 struct drm_i915_gem_object *obj_priv = obj->driver_private; 522 if (obj_priv->page_list == NULL) 523 return; 524 525 kmem_free(obj_priv->page_list, 526 btop(obj->size) * sizeof(caddr_t)); 527 528 obj_priv->page_list = NULL; 529 } 530 531 static void 532 i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno) 533 { 534 struct drm_device *dev = obj->dev; 535 drm_i915_private_t *dev_priv = dev->dev_private; 536 struct drm_i915_gem_object *obj_priv = obj->driver_private; 537 538 /* Add a reference if we're newly entering the active list. */ 539 if (!obj_priv->active) { 540 drm_gem_object_reference(obj); 541 obj_priv->active = 1; 542 } 543 /* Move from whatever list we were on to the tail of execution. */ 544 list_move_tail(&obj_priv->list, 545 &dev_priv->mm.active_list, (caddr_t)obj_priv); 546 obj_priv->last_rendering_seqno = seqno; 547 } 548 549 static void 550 i915_gem_object_move_to_flushing(struct drm_gem_object *obj) 551 { 552 struct drm_device *dev = obj->dev; 553 drm_i915_private_t *dev_priv = dev->dev_private; 554 struct drm_i915_gem_object *obj_priv = obj->driver_private; 555 556 list_move_tail(&obj_priv->list, &dev_priv->mm.flushing_list, (caddr_t)obj_priv); 557 obj_priv->last_rendering_seqno = 0; 558 } 559 560 static void 561 i915_gem_object_move_to_inactive(struct drm_gem_object *obj) 562 { 563 struct drm_device *dev = obj->dev; 564 drm_i915_private_t *dev_priv = dev->dev_private; 565 struct drm_i915_gem_object *obj_priv = obj->driver_private; 566 567 if (obj_priv->pin_count != 0) 568 { 569 list_del_init(&obj_priv->list); 570 } else { 571 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list, (caddr_t)obj_priv); 572 } 573 obj_priv->last_rendering_seqno = 0; 574 if (obj_priv->active) { 575 obj_priv->active = 0; 576 drm_gem_object_unreference(obj); 577 } 578 } 579 580 /** 581 * Creates a new sequence number, emitting a write of it to the status page 582 * plus an interrupt, which will trigger i915_user_interrupt_handler. 583 * 584 * Must be called with struct_lock held. 585 * 586 * Returned sequence numbers are nonzero on success. 587 */ 588 static uint32_t 589 i915_add_request(struct drm_device *dev, uint32_t flush_domains) 590 { 591 drm_i915_private_t *dev_priv = dev->dev_private; 592 struct drm_i915_gem_request *request; 593 uint32_t seqno; 594 int was_empty; 595 RING_LOCALS; 596 597 request = drm_calloc(1, sizeof(*request), DRM_MEM_DRIVER); 598 if (request == NULL) { 599 DRM_ERROR("Failed to alloc request"); 600 return 0; 601 } 602 /* Grab the seqno we're going to make this request be, and bump the 603 * next (skipping 0 so it can be the reserved no-seqno value). 604 */ 605 seqno = dev_priv->mm.next_gem_seqno; 606 dev_priv->mm.next_gem_seqno++; 607 if (dev_priv->mm.next_gem_seqno == 0) 608 dev_priv->mm.next_gem_seqno++; 609 610 DRM_DEBUG("add_request seqno = %d dev 0x%lx", seqno, dev); 611 612 BEGIN_LP_RING(4); 613 OUT_RING(MI_STORE_DWORD_INDEX); 614 OUT_RING(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 615 OUT_RING(seqno); 616 OUT_RING(0); 617 ADVANCE_LP_RING(); 618 619 BEGIN_LP_RING(2); 620 OUT_RING(0); 621 OUT_RING(MI_USER_INTERRUPT); 622 ADVANCE_LP_RING(); 623 624 request->seqno = seqno; 625 request->emitted_jiffies = jiffies; 626 was_empty = list_empty(&dev_priv->mm.request_list); 627 list_add_tail(&request->list, &dev_priv->mm.request_list, (caddr_t)request); 628 629 /* Associate any objects on the flushing list matching the write 630 * domain we're flushing with our flush. 631 */ 632 if (flush_domains != 0) { 633 struct drm_i915_gem_object *obj_priv, *next; 634 635 obj_priv = list_entry(dev_priv->mm.flushing_list.next, struct drm_i915_gem_object, list), 636 next = list_entry(obj_priv->list.next, struct drm_i915_gem_object, list); 637 for(; &obj_priv->list != &dev_priv->mm.flushing_list; 638 obj_priv = next, 639 next = list_entry(next->list.next, struct drm_i915_gem_object, list)) { 640 struct drm_gem_object *obj = obj_priv->obj; 641 642 if ((obj->write_domain & flush_domains) == 643 obj->write_domain) { 644 obj->write_domain = 0; 645 i915_gem_object_move_to_active(obj, seqno); 646 } 647 } 648 649 } 650 651 if (was_empty && !dev_priv->mm.suspended) 652 { 653 /* change to delay HZ and then run work (not insert to workqueue of Linux) */ 654 worktimer_id = timeout(i915_gem_retire_work_handler, (void *) dev, DRM_HZ); 655 DRM_DEBUG("i915_gem: schedule_delayed_work"); 656 } 657 return seqno; 658 } 659 660 /** 661 * Command execution barrier 662 * 663 * Ensures that all commands in the ring are finished 664 * before signalling the CPU 665 */ 666 uint32_t 667 i915_retire_commands(struct drm_device *dev) 668 { 669 drm_i915_private_t *dev_priv = dev->dev_private; 670 uint32_t cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 671 uint32_t flush_domains = 0; 672 RING_LOCALS; 673 674 /* The sampler always gets flushed on i965 (sigh) */ 675 if (IS_I965G(dev)) 676 flush_domains |= I915_GEM_DOMAIN_SAMPLER; 677 BEGIN_LP_RING(2); 678 OUT_RING(cmd); 679 OUT_RING(0); /* noop */ 680 ADVANCE_LP_RING(); 681 682 return flush_domains; 683 } 684 685 /** 686 * Moves buffers associated only with the given active seqno from the active 687 * to inactive list, potentially freeing them. 688 */ 689 static void 690 i915_gem_retire_request(struct drm_device *dev, 691 struct drm_i915_gem_request *request) 692 { 693 drm_i915_private_t *dev_priv = dev->dev_private; 694 /* Move any buffers on the active list that are no longer referenced 695 * by the ringbuffer to the flushing/inactive lists as appropriate. 696 */ 697 while (!list_empty(&dev_priv->mm.active_list)) { 698 struct drm_gem_object *obj; 699 struct drm_i915_gem_object *obj_priv; 700 701 obj_priv = list_entry(dev_priv->mm.active_list.next, 702 struct drm_i915_gem_object, 703 list); 704 obj = obj_priv->obj; 705 706 /* If the seqno being retired doesn't match the oldest in the 707 * list, then the oldest in the list must still be newer than 708 * this seqno. 709 */ 710 if (obj_priv->last_rendering_seqno != request->seqno) 711 return; 712 713 DRM_DEBUG("%s: retire %d moves to inactive list %p\n", 714 __func__, request->seqno, obj); 715 716 if (obj->write_domain != 0) { 717 i915_gem_object_move_to_flushing(obj); 718 } else { 719 i915_gem_object_move_to_inactive(obj); 720 } 721 } 722 } 723 724 /** 725 * Returns true if seq1 is later than seq2. 726 */ 727 static int 728 i915_seqno_passed(uint32_t seq1, uint32_t seq2) 729 { 730 return (int32_t)(seq1 - seq2) >= 0; 731 } 732 733 uint32_t 734 i915_get_gem_seqno(struct drm_device *dev) 735 { 736 drm_i915_private_t *dev_priv = dev->dev_private; 737 738 return READ_HWSP(dev_priv, I915_GEM_HWS_INDEX); 739 } 740 741 /** 742 * This function clears the request list as sequence numbers are passed. 743 */ 744 void 745 i915_gem_retire_requests(struct drm_device *dev) 746 { 747 drm_i915_private_t *dev_priv = dev->dev_private; 748 uint32_t seqno; 749 750 seqno = i915_get_gem_seqno(dev); 751 752 while (!list_empty(&dev_priv->mm.request_list)) { 753 struct drm_i915_gem_request *request; 754 uint32_t retiring_seqno; 755 request = (struct drm_i915_gem_request *)(uintptr_t)(dev_priv->mm.request_list.next->contain_ptr); 756 retiring_seqno = request->seqno; 757 758 if (i915_seqno_passed(seqno, retiring_seqno) || 759 dev_priv->mm.wedged) { 760 i915_gem_retire_request(dev, request); 761 762 list_del(&request->list); 763 drm_free(request, sizeof(*request), DRM_MEM_DRIVER); 764 } else 765 break; 766 } 767 } 768 769 void 770 i915_gem_retire_work_handler(void *device) 771 { 772 struct drm_device *dev = (struct drm_device *)device; 773 drm_i915_private_t *dev_priv = dev->dev_private; 774 775 spin_lock(&dev->struct_mutex); 776 777 /* Return if gem idle */ 778 if (worktimer_id == NULL) { 779 spin_unlock(&dev->struct_mutex); 780 return; 781 } 782 783 i915_gem_retire_requests(dev); 784 if (!dev_priv->mm.suspended && !list_empty(&dev_priv->mm.request_list)) 785 { 786 DRM_DEBUG("i915_gem: schedule_delayed_work"); 787 worktimer_id = timeout(i915_gem_retire_work_handler, (void *) dev, DRM_HZ); 788 } 789 spin_unlock(&dev->struct_mutex); 790 } 791 792 /** 793 * i965_reset - reset chip after a hang 794 * @dev: drm device to reset 795 * @flags: reset domains 796 * 797 * Reset the chip. Useful if a hang is detected. 798 * 799 * Procedure is fairly simple: 800 * - reset the chip using the reset reg 801 * - re-init context state 802 * - re-init hardware status page 803 * - re-init ring buffer 804 * - re-init interrupt state 805 * - re-init display 806 */ 807 void i965_reset(struct drm_device *dev, u8 flags) 808 { 809 ddi_acc_handle_t conf_hdl; 810 drm_i915_private_t *dev_priv = dev->dev_private; 811 int timeout = 0; 812 uint8_t gdrst; 813 814 if (flags & GDRST_FULL) 815 i915_save_display(dev); 816 817 if (pci_config_setup(dev->dip, &conf_hdl) != DDI_SUCCESS) { 818 DRM_ERROR(("i915_reset: pci_config_setup fail")); 819 return; 820 } 821 822 /* 823 * Set the reset bit, wait for reset, then clear it. Hardware 824 * will clear the status bit (bit 1) when it's actually ready 825 * for action again. 826 */ 827 gdrst = pci_config_get8(conf_hdl, GDRST); 828 pci_config_put8(conf_hdl, GDRST, gdrst | flags); 829 drv_usecwait(50); 830 pci_config_put8(conf_hdl, GDRST, gdrst | 0xfe); 831 832 /* ...we don't want to loop forever though, 500ms should be plenty */ 833 do { 834 drv_usecwait(100); 835 gdrst = pci_config_get8(conf_hdl, GDRST); 836 } while ((gdrst & 2) && (timeout++ < 5)); 837 838 /* Ok now get things going again... */ 839 840 /* 841 * Everything depends on having the GTT running, so we need to start 842 * there. Fortunately we don't need to do this unless we reset the 843 * chip at a PCI level. 844 * 845 * Next we need to restore the context, but we don't use those 846 * yet either... 847 * 848 * Ring buffer needs to be re-initialized in the KMS case, or if X 849 * was running at the time of the reset (i.e. we weren't VT 850 * switched away). 851 */ 852 if (!dev_priv->mm.suspended) { 853 drm_i915_ring_buffer_t *ring = &dev_priv->ring; 854 struct drm_gem_object *obj = ring->ring_obj; 855 struct drm_i915_gem_object *obj_priv = obj->driver_private; 856 dev_priv->mm.suspended = 0; 857 858 /* Stop the ring if it's running. */ 859 I915_WRITE(PRB0_CTL, 0); 860 I915_WRITE(PRB0_TAIL, 0); 861 I915_WRITE(PRB0_HEAD, 0); 862 863 /* Initialize the ring. */ 864 I915_WRITE(PRB0_START, obj_priv->gtt_offset); 865 I915_WRITE(PRB0_CTL, 866 ((obj->size - 4096) & RING_NR_PAGES) | 867 RING_NO_REPORT | 868 RING_VALID); 869 i915_kernel_lost_context(dev); 870 871 (void) drm_irq_install(dev); 872 } 873 874 /* 875 * Display needs restore too... 876 */ 877 if (flags & GDRST_FULL) 878 i915_restore_display(dev); 879 } 880 881 /** 882 * Waits for a sequence number to be signaled, and cleans up the 883 * request and object lists appropriately for that event. 884 */ 885 int 886 i915_wait_request(struct drm_device *dev, uint32_t seqno) 887 { 888 drm_i915_private_t *dev_priv = dev->dev_private; 889 u32 ier; 890 int ret = 0; 891 892 ASSERT(seqno != 0); 893 894 if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) { 895 if (IS_IGDNG(dev)) 896 ier = I915_READ(DEIER) | I915_READ(GTIER); 897 else 898 ier = I915_READ(IER); 899 if (!ier) { 900 DRM_ERROR("something (likely vbetool) disabled " 901 "interrupts, re-enabling\n"); 902 (void) i915_driver_irq_preinstall(dev); 903 i915_driver_irq_postinstall(dev); 904 } 905 906 dev_priv->mm.waiting_gem_seqno = seqno; 907 i915_user_irq_on(dev); 908 DRM_WAIT(ret, &dev_priv->irq_queue, 909 (i915_seqno_passed(i915_get_gem_seqno(dev), seqno) || 910 dev_priv->mm.wedged)); 911 i915_user_irq_off(dev); 912 dev_priv->mm.waiting_gem_seqno = 0; 913 } 914 if (dev_priv->mm.wedged) { 915 ret = EIO; 916 } 917 918 /* GPU maybe hang, reset needed*/ 919 if (ret == -2 && (seqno > i915_get_gem_seqno(dev))) { 920 if (IS_I965G(dev)) { 921 DRM_ERROR("GPU hang detected try to reset ... wait for irq_queue seqno %d, now seqno %d", seqno, i915_get_gem_seqno(dev)); 922 dev_priv->mm.wedged = 1; 923 i965_reset(dev, GDRST_RENDER); 924 i915_gem_retire_requests(dev); 925 dev_priv->mm.wedged = 0; 926 } 927 else 928 DRM_ERROR("GPU hang detected.... reboot required"); 929 return 0; 930 } 931 /* Directly dispatch request retiring. While we have the work queue 932 * to handle this, the waiter on a request often wants an associated 933 * buffer to have made it to the inactive list, and we would need 934 * a separate wait queue to handle that. 935 */ 936 if (ret == 0) 937 i915_gem_retire_requests(dev); 938 939 return ret; 940 } 941 942 static void 943 i915_gem_flush(struct drm_device *dev, 944 uint32_t invalidate_domains, 945 uint32_t flush_domains) 946 { 947 drm_i915_private_t *dev_priv = dev->dev_private; 948 uint32_t cmd; 949 RING_LOCALS; 950 951 DRM_DEBUG("%s: invalidate %08x flush %08x\n", __func__, 952 invalidate_domains, flush_domains); 953 954 if (flush_domains & I915_GEM_DOMAIN_CPU) 955 drm_agp_chipset_flush(dev); 956 957 if ((invalidate_domains | flush_domains) & ~(I915_GEM_DOMAIN_CPU | 958 I915_GEM_DOMAIN_GTT)) { 959 /* 960 * read/write caches: 961 * 962 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 963 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 964 * also flushed at 2d versus 3d pipeline switches. 965 * 966 * read-only caches: 967 * 968 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 969 * MI_READ_FLUSH is set, and is always flushed on 965. 970 * 971 * I915_GEM_DOMAIN_COMMAND may not exist? 972 * 973 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 974 * invalidated when MI_EXE_FLUSH is set. 975 * 976 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 977 * invalidated with every MI_FLUSH. 978 * 979 * TLBs: 980 * 981 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 982 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 983 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 984 * are flushed at any MI_FLUSH. 985 */ 986 987 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 988 if ((invalidate_domains|flush_domains) & 989 I915_GEM_DOMAIN_RENDER) 990 cmd &= ~MI_NO_WRITE_FLUSH; 991 if (!IS_I965G(dev)) { 992 /* 993 * On the 965, the sampler cache always gets flushed 994 * and this bit is reserved. 995 */ 996 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) 997 cmd |= MI_READ_FLUSH; 998 } 999 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) 1000 cmd |= MI_EXE_FLUSH; 1001 1002 DRM_DEBUG("%s: queue flush %08x to ring\n", __func__, cmd); 1003 1004 BEGIN_LP_RING(2); 1005 OUT_RING(cmd); 1006 OUT_RING(0); /* noop */ 1007 ADVANCE_LP_RING(); 1008 } 1009 } 1010 1011 /** 1012 * Ensures that all rendering to the object has completed and the object is 1013 * safe to unbind from the GTT or access from the CPU. 1014 */ 1015 static int 1016 i915_gem_object_wait_rendering(struct drm_gem_object *obj) 1017 { 1018 struct drm_device *dev = obj->dev; 1019 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1020 int ret, seqno; 1021 1022 /* This function only exists to support waiting for existing rendering, 1023 * not for emitting required flushes. 1024 */ 1025 1026 if((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0) { 1027 DRM_ERROR("write domain should not be GPU DOMAIN %d", obj_priv->active); 1028 return 0; 1029 } 1030 1031 /* If there is rendering queued on the buffer being evicted, wait for 1032 * it. 1033 */ 1034 if (obj_priv->active) { 1035 DRM_DEBUG("%s: object %d %p wait for seqno %08x\n", 1036 __func__, obj->name, obj, obj_priv->last_rendering_seqno); 1037 1038 seqno = obj_priv->last_rendering_seqno; 1039 if (seqno == 0) { 1040 DRM_DEBUG("last rendering maybe finished"); 1041 return 0; 1042 } 1043 ret = i915_wait_request(dev, seqno); 1044 if (ret != 0) { 1045 DRM_ERROR("%s: i915_wait_request request->seqno %d now %d\n", __func__, seqno, i915_get_gem_seqno(dev)); 1046 return ret; 1047 } 1048 } 1049 1050 return 0; 1051 } 1052 1053 /** 1054 * Unbinds an object from the GTT aperture. 1055 */ 1056 int 1057 i915_gem_object_unbind(struct drm_gem_object *obj, uint32_t type) 1058 { 1059 struct drm_device *dev = obj->dev; 1060 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1061 int ret = 0; 1062 1063 if (obj_priv->gtt_space == NULL) 1064 return 0; 1065 1066 if (obj_priv->pin_count != 0) { 1067 DRM_ERROR("Attempting to unbind pinned buffer\n"); 1068 return EINVAL; 1069 } 1070 1071 /* Wait for any rendering to complete 1072 */ 1073 ret = i915_gem_object_wait_rendering(obj); 1074 if (ret) { 1075 DRM_ERROR("wait_rendering failed: %d\n", ret); 1076 return ret; 1077 } 1078 1079 /* Move the object to the CPU domain to ensure that 1080 * any possible CPU writes while it's not in the GTT 1081 * are flushed when we go to remap it. This will 1082 * also ensure that all pending GPU writes are finished 1083 * before we unbind. 1084 */ 1085 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 1086 if (ret) { 1087 DRM_ERROR("set_domain failed: %d\n", ret); 1088 return ret; 1089 } 1090 1091 if (!obj_priv->agp_mem) { 1092 (void) drm_agp_unbind_pages(dev, obj->size / PAGE_SIZE, 1093 obj_priv->gtt_offset, type); 1094 obj_priv->agp_mem = -1; 1095 } 1096 1097 ASSERT(!obj_priv->active); 1098 1099 i915_gem_object_free_page_list(obj); 1100 1101 if (obj_priv->gtt_space) { 1102 atomic_dec(&dev->gtt_count); 1103 atomic_sub(obj->size, &dev->gtt_memory); 1104 drm_mm_put_block(obj_priv->gtt_space); 1105 obj_priv->gtt_space = NULL; 1106 } 1107 1108 /* Remove ourselves from the LRU list if present. */ 1109 if (!list_empty(&obj_priv->list)) 1110 list_del_init(&obj_priv->list); 1111 1112 return 0; 1113 } 1114 1115 static int 1116 i915_gem_evict_something(struct drm_device *dev) 1117 { 1118 drm_i915_private_t *dev_priv = dev->dev_private; 1119 struct drm_gem_object *obj; 1120 struct drm_i915_gem_object *obj_priv; 1121 int ret = 0; 1122 1123 for (;;) { 1124 /* If there's an inactive buffer available now, grab it 1125 * and be done. 1126 */ 1127 if (!list_empty(&dev_priv->mm.inactive_list)) { 1128 obj_priv = list_entry(dev_priv->mm.inactive_list.next, 1129 struct drm_i915_gem_object, 1130 list); 1131 obj = obj_priv->obj; 1132 ASSERT(!(obj_priv->pin_count != 0)); 1133 DRM_DEBUG("%s: evicting %d\n", __func__, obj->name); 1134 ASSERT(!(obj_priv->active)); 1135 /* Wait on the rendering and unbind the buffer. */ 1136 ret = i915_gem_object_unbind(obj, 1); 1137 break; 1138 } 1139 /* If we didn't get anything, but the ring is still processing 1140 * things, wait for one of those things to finish and hopefully 1141 * leave us a buffer to evict. 1142 */ 1143 if (!list_empty(&dev_priv->mm.request_list)) { 1144 struct drm_i915_gem_request *request; 1145 1146 request = list_entry(dev_priv->mm.request_list.next, 1147 struct drm_i915_gem_request, 1148 list); 1149 1150 ret = i915_wait_request(dev, request->seqno); 1151 if (ret) { 1152 break; 1153 } 1154 /* if waiting caused an object to become inactive, 1155 * then loop around and wait for it. Otherwise, we 1156 * assume that waiting freed and unbound something, 1157 * so there should now be some space in the GTT 1158 */ 1159 if (!list_empty(&dev_priv->mm.inactive_list)) 1160 continue; 1161 break; 1162 } 1163 1164 /* If we didn't have anything on the request list but there 1165 * are buffers awaiting a flush, emit one and try again. 1166 * When we wait on it, those buffers waiting for that flush 1167 * will get moved to inactive. 1168 */ 1169 if (!list_empty(&dev_priv->mm.flushing_list)) { 1170 obj_priv = list_entry(dev_priv->mm.flushing_list.next, 1171 struct drm_i915_gem_object, 1172 list); 1173 obj = obj_priv->obj; 1174 1175 i915_gem_flush(dev, 1176 obj->write_domain, 1177 obj->write_domain); 1178 (void) i915_add_request(dev, obj->write_domain); 1179 1180 obj = NULL; 1181 continue; 1182 } 1183 1184 DRM_ERROR("inactive empty %d request empty %d " 1185 "flushing empty %d\n", 1186 list_empty(&dev_priv->mm.inactive_list), 1187 list_empty(&dev_priv->mm.request_list), 1188 list_empty(&dev_priv->mm.flushing_list)); 1189 /* If we didn't do any of the above, there's nothing to be done 1190 * and we just can't fit it in. 1191 */ 1192 return ENOMEM; 1193 } 1194 return ret; 1195 } 1196 1197 static int 1198 i915_gem_evict_everything(struct drm_device *dev) 1199 { 1200 int ret; 1201 1202 for (;;) { 1203 ret = i915_gem_evict_something(dev); 1204 if (ret != 0) 1205 break; 1206 } 1207 if (ret == ENOMEM) 1208 return 0; 1209 else 1210 DRM_ERROR("evict_everything ret %d", ret); 1211 return ret; 1212 } 1213 1214 /** 1215 * Finds free space in the GTT aperture and binds the object there. 1216 */ 1217 static int 1218 i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, uint32_t alignment) 1219 { 1220 struct drm_device *dev = obj->dev; 1221 drm_i915_private_t *dev_priv = dev->dev_private; 1222 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1223 struct drm_mm_node *free_space; 1224 int page_count, ret; 1225 1226 if (dev_priv->mm.suspended) 1227 return EBUSY; 1228 if (alignment == 0) 1229 alignment = PAGE_SIZE; 1230 if (alignment & (PAGE_SIZE - 1)) { 1231 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 1232 return EINVAL; 1233 } 1234 1235 if (obj_priv->gtt_space) { 1236 DRM_ERROR("Already bind!!"); 1237 return 0; 1238 } 1239 search_free: 1240 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space, 1241 (unsigned long) obj->size, alignment, 0); 1242 if (free_space != NULL) { 1243 obj_priv->gtt_space = drm_mm_get_block(free_space, (unsigned long) obj->size, 1244 alignment); 1245 if (obj_priv->gtt_space != NULL) { 1246 obj_priv->gtt_space->private = obj; 1247 obj_priv->gtt_offset = obj_priv->gtt_space->start; 1248 } 1249 } 1250 if (obj_priv->gtt_space == NULL) { 1251 /* If the gtt is empty and we're still having trouble 1252 * fitting our object in, we're out of memory. 1253 */ 1254 if (list_empty(&dev_priv->mm.inactive_list) && 1255 list_empty(&dev_priv->mm.flushing_list) && 1256 list_empty(&dev_priv->mm.active_list)) { 1257 DRM_ERROR("GTT full, but LRU list empty\n"); 1258 return ENOMEM; 1259 } 1260 1261 ret = i915_gem_evict_something(dev); 1262 if (ret != 0) { 1263 DRM_ERROR("Failed to evict a buffer %d\n", ret); 1264 return ret; 1265 } 1266 goto search_free; 1267 } 1268 1269 ret = i915_gem_object_get_page_list(obj); 1270 if (ret) { 1271 drm_mm_put_block(obj_priv->gtt_space); 1272 obj_priv->gtt_space = NULL; 1273 DRM_ERROR("bind to gtt failed to get page list"); 1274 return ret; 1275 } 1276 1277 page_count = obj->size / PAGE_SIZE; 1278 /* Create an AGP memory structure pointing at our pages, and bind it 1279 * into the GTT. 1280 */ 1281 DRM_DEBUG("Binding object %d of page_count %d at gtt_offset 0x%x obj->pfnarray = 0x%lx", 1282 obj->name, page_count, obj_priv->gtt_offset, obj->pfnarray); 1283 1284 obj_priv->agp_mem = drm_agp_bind_pages(dev, 1285 obj->pfnarray, 1286 page_count, 1287 obj_priv->gtt_offset); 1288 if (obj_priv->agp_mem) { 1289 i915_gem_object_free_page_list(obj); 1290 drm_mm_put_block(obj_priv->gtt_space); 1291 obj_priv->gtt_space = NULL; 1292 DRM_ERROR("Failed to bind pages obj %d, obj 0x%lx", obj->name, obj); 1293 return ENOMEM; 1294 } 1295 atomic_inc(&dev->gtt_count); 1296 atomic_add(obj->size, &dev->gtt_memory); 1297 1298 /* Assert that the object is not currently in any GPU domain. As it 1299 * wasn't in the GTT, there shouldn't be any way it could have been in 1300 * a GPU cache 1301 */ 1302 ASSERT(!(obj->read_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT))); 1303 ASSERT(!(obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT))); 1304 1305 return 0; 1306 } 1307 1308 void 1309 i915_gem_clflush_object(struct drm_gem_object *obj) 1310 { 1311 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1312 1313 /* If we don't have a page list set up, then we're not pinned 1314 * to GPU, and we can ignore the cache flush because it'll happen 1315 * again at bind time. 1316 */ 1317 1318 if (obj_priv->page_list == NULL) 1319 return; 1320 drm_clflush_pages(obj_priv->page_list, obj->size / PAGE_SIZE); 1321 } 1322 1323 /** Flushes any GPU write domain for the object if it's dirty. */ 1324 static void 1325 i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj) 1326 { 1327 struct drm_device *dev = obj->dev; 1328 uint32_t seqno; 1329 1330 if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) 1331 return; 1332 1333 /* Queue the GPU write cache flushing we need. */ 1334 i915_gem_flush(dev, 0, obj->write_domain); 1335 seqno = i915_add_request(dev, obj->write_domain); 1336 DRM_DEBUG("flush_gpu_write_domain seqno = %d", seqno); 1337 obj->write_domain = 0; 1338 i915_gem_object_move_to_active(obj, seqno); 1339 } 1340 1341 /** Flushes the GTT write domain for the object if it's dirty. */ 1342 static void 1343 i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj) 1344 { 1345 if (obj->write_domain != I915_GEM_DOMAIN_GTT) 1346 return; 1347 1348 /* No actual flushing is required for the GTT write domain. Writes 1349 * to it immediately go to main memory as far as we know, so there's 1350 * no chipset flush. It also doesn't land in render cache. 1351 */ 1352 obj->write_domain = 0; 1353 } 1354 1355 /** Flushes the CPU write domain for the object if it's dirty. */ 1356 static void 1357 i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj) 1358 { 1359 struct drm_device *dev = obj->dev; 1360 1361 if (obj->write_domain != I915_GEM_DOMAIN_CPU) 1362 return; 1363 1364 i915_gem_clflush_object(obj); 1365 drm_agp_chipset_flush(dev); 1366 obj->write_domain = 0; 1367 } 1368 1369 /** 1370 * Moves a single object to the GTT read, and possibly write domain. 1371 * 1372 * This function returns when the move is complete, including waiting on 1373 * flushes to occur. 1374 */ 1375 static int 1376 i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) 1377 { 1378 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1379 int ret; 1380 1381 /* Not valid to be called on unbound objects. */ 1382 if (obj_priv->gtt_space == NULL) 1383 return EINVAL; 1384 1385 i915_gem_object_flush_gpu_write_domain(obj); 1386 /* Wait on any GPU rendering and flushing to occur. */ 1387 ret = i915_gem_object_wait_rendering(obj); 1388 if (ret != 0) { 1389 DRM_ERROR("set_to_gtt_domain wait_rendering ret %d", ret); 1390 return ret; 1391 } 1392 /* If we're writing through the GTT domain, then CPU and GPU caches 1393 * will need to be invalidated at next use. 1394 */ 1395 if (write) 1396 obj->read_domains &= I915_GEM_DOMAIN_GTT; 1397 i915_gem_object_flush_cpu_write_domain(obj); 1398 1399 DRM_DEBUG("i915_gem_object_set_to_gtt_domain obj->read_domains %x ", obj->read_domains); 1400 /* It should now be out of any other write domains, and we can update 1401 * the domain values for our changes. 1402 */ 1403 ASSERT(!((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0)); 1404 obj->read_domains |= I915_GEM_DOMAIN_GTT; 1405 if (write) { 1406 obj->write_domain = I915_GEM_DOMAIN_GTT; 1407 obj_priv->dirty = 1; 1408 } 1409 1410 return 0; 1411 } 1412 1413 /** 1414 * Moves a single object to the CPU read, and possibly write domain. 1415 * 1416 * This function returns when the move is complete, including waiting on 1417 * flushes to occur. 1418 */ 1419 static int 1420 i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) 1421 { 1422 struct drm_device *dev = obj->dev; 1423 int ret; 1424 1425 1426 i915_gem_object_flush_gpu_write_domain(obj); 1427 /* Wait on any GPU rendering and flushing to occur. */ 1428 1429 ret = i915_gem_object_wait_rendering(obj); 1430 if (ret != 0) 1431 return ret; 1432 1433 i915_gem_object_flush_gtt_write_domain(obj); 1434 1435 /* If we have a partially-valid cache of the object in the CPU, 1436 * finish invalidating it and free the per-page flags. 1437 */ 1438 i915_gem_object_set_to_full_cpu_read_domain(obj); 1439 1440 /* Flush the CPU cache if it's still invalid. */ 1441 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 1442 i915_gem_clflush_object(obj); 1443 drm_agp_chipset_flush(dev); 1444 obj->read_domains |= I915_GEM_DOMAIN_CPU; 1445 } 1446 1447 /* It should now be out of any other write domains, and we can update 1448 * the domain values for our changes. 1449 */ 1450 ASSERT(!((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0)); 1451 1452 /* If we're writing through the CPU, then the GPU read domains will 1453 * need to be invalidated at next use. 1454 */ 1455 if (write) { 1456 obj->read_domains &= I915_GEM_DOMAIN_CPU; 1457 obj->write_domain = I915_GEM_DOMAIN_CPU; 1458 } 1459 1460 return 0; 1461 } 1462 1463 /* 1464 * Set the next domain for the specified object. This 1465 * may not actually perform the necessary flushing/invaliding though, 1466 * as that may want to be batched with other set_domain operations 1467 * 1468 * This is (we hope) the only really tricky part of gem. The goal 1469 * is fairly simple -- track which caches hold bits of the object 1470 * and make sure they remain coherent. A few concrete examples may 1471 * help to explain how it works. For shorthand, we use the notation 1472 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the 1473 * a pair of read and write domain masks. 1474 * 1475 * Case 1: the batch buffer 1476 * 1477 * 1. Allocated 1478 * 2. Written by CPU 1479 * 3. Mapped to GTT 1480 * 4. Read by GPU 1481 * 5. Unmapped from GTT 1482 * 6. Freed 1483 * 1484 * Let's take these a step at a time 1485 * 1486 * 1. Allocated 1487 * Pages allocated from the kernel may still have 1488 * cache contents, so we set them to (CPU, CPU) always. 1489 * 2. Written by CPU (using pwrite) 1490 * The pwrite function calls set_domain (CPU, CPU) and 1491 * this function does nothing (as nothing changes) 1492 * 3. Mapped by GTT 1493 * This function asserts that the object is not 1494 * currently in any GPU-based read or write domains 1495 * 4. Read by GPU 1496 * i915_gem_execbuffer calls set_domain (COMMAND, 0). 1497 * As write_domain is zero, this function adds in the 1498 * current read domains (CPU+COMMAND, 0). 1499 * flush_domains is set to CPU. 1500 * invalidate_domains is set to COMMAND 1501 * clflush is run to get data out of the CPU caches 1502 * then i915_dev_set_domain calls i915_gem_flush to 1503 * emit an MI_FLUSH and drm_agp_chipset_flush 1504 * 5. Unmapped from GTT 1505 * i915_gem_object_unbind calls set_domain (CPU, CPU) 1506 * flush_domains and invalidate_domains end up both zero 1507 * so no flushing/invalidating happens 1508 * 6. Freed 1509 * yay, done 1510 * 1511 * Case 2: The shared render buffer 1512 * 1513 * 1. Allocated 1514 * 2. Mapped to GTT 1515 * 3. Read/written by GPU 1516 * 4. set_domain to (CPU,CPU) 1517 * 5. Read/written by CPU 1518 * 6. Read/written by GPU 1519 * 1520 * 1. Allocated 1521 * Same as last example, (CPU, CPU) 1522 * 2. Mapped to GTT 1523 * Nothing changes (assertions find that it is not in the GPU) 1524 * 3. Read/written by GPU 1525 * execbuffer calls set_domain (RENDER, RENDER) 1526 * flush_domains gets CPU 1527 * invalidate_domains gets GPU 1528 * clflush (obj) 1529 * MI_FLUSH and drm_agp_chipset_flush 1530 * 4. set_domain (CPU, CPU) 1531 * flush_domains gets GPU 1532 * invalidate_domains gets CPU 1533 * wait_rendering (obj) to make sure all drawing is complete. 1534 * This will include an MI_FLUSH to get the data from GPU 1535 * to memory 1536 * clflush (obj) to invalidate the CPU cache 1537 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) 1538 * 5. Read/written by CPU 1539 * cache lines are loaded and dirtied 1540 * 6. Read written by GPU 1541 * Same as last GPU access 1542 * 1543 * Case 3: The constant buffer 1544 * 1545 * 1. Allocated 1546 * 2. Written by CPU 1547 * 3. Read by GPU 1548 * 4. Updated (written) by CPU again 1549 * 5. Read by GPU 1550 * 1551 * 1. Allocated 1552 * (CPU, CPU) 1553 * 2. Written by CPU 1554 * (CPU, CPU) 1555 * 3. Read by GPU 1556 * (CPU+RENDER, 0) 1557 * flush_domains = CPU 1558 * invalidate_domains = RENDER 1559 * clflush (obj) 1560 * MI_FLUSH 1561 * drm_agp_chipset_flush 1562 * 4. Updated (written) by CPU again 1563 * (CPU, CPU) 1564 * flush_domains = 0 (no previous write domain) 1565 * invalidate_domains = 0 (no new read domains) 1566 * 5. Read by GPU 1567 * (CPU+RENDER, 0) 1568 * flush_domains = CPU 1569 * invalidate_domains = RENDER 1570 * clflush (obj) 1571 * MI_FLUSH 1572 * drm_agp_chipset_flush 1573 */ 1574 static void 1575 i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj, 1576 uint32_t read_domains, 1577 uint32_t write_domain) 1578 { 1579 struct drm_device *dev = obj->dev; 1580 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1581 uint32_t invalidate_domains = 0; 1582 uint32_t flush_domains = 0; 1583 1584 DRM_DEBUG("%s: object %p read %08x -> %08x write %08x -> %08x\n", 1585 __func__, obj, 1586 obj->read_domains, read_domains, 1587 obj->write_domain, write_domain); 1588 /* 1589 * If the object isn't moving to a new write domain, 1590 * let the object stay in multiple read domains 1591 */ 1592 if (write_domain == 0) 1593 read_domains |= obj->read_domains; 1594 else 1595 obj_priv->dirty = 1; 1596 1597 /* 1598 * Flush the current write domain if 1599 * the new read domains don't match. Invalidate 1600 * any read domains which differ from the old 1601 * write domain 1602 */ 1603 if (obj->write_domain && obj->write_domain != read_domains) { 1604 flush_domains |= obj->write_domain; 1605 invalidate_domains |= read_domains & ~obj->write_domain; 1606 } 1607 /* 1608 * Invalidate any read caches which may have 1609 * stale data. That is, any new read domains. 1610 */ 1611 invalidate_domains |= read_domains & ~obj->read_domains; 1612 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) { 1613 DRM_DEBUG("%s: CPU domain flush %08x invalidate %08x\n", 1614 __func__, flush_domains, invalidate_domains); 1615 i915_gem_clflush_object(obj); 1616 } 1617 1618 if ((write_domain | flush_domains) != 0) 1619 obj->write_domain = write_domain; 1620 obj->read_domains = read_domains; 1621 1622 dev->invalidate_domains |= invalidate_domains; 1623 dev->flush_domains |= flush_domains; 1624 1625 DRM_DEBUG("%s: read %08x write %08x invalidate %08x flush %08x\n", 1626 __func__, 1627 obj->read_domains, obj->write_domain, 1628 dev->invalidate_domains, dev->flush_domains); 1629 1630 } 1631 1632 /** 1633 * Moves the object from a partially CPU read to a full one. 1634 * 1635 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(), 1636 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU). 1637 */ 1638 static void 1639 i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj) 1640 { 1641 struct drm_device *dev = obj->dev; 1642 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1643 1644 if (!obj_priv->page_cpu_valid) 1645 return; 1646 1647 /* If we're partially in the CPU read domain, finish moving it in. 1648 */ 1649 if (obj->read_domains & I915_GEM_DOMAIN_CPU) { 1650 int i; 1651 1652 for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) { 1653 if (obj_priv->page_cpu_valid[i]) 1654 continue; 1655 drm_clflush_pages(obj_priv->page_list + i, 1); 1656 } 1657 drm_agp_chipset_flush(dev); 1658 } 1659 1660 /* Free the page_cpu_valid mappings which are now stale, whether 1661 * or not we've got I915_GEM_DOMAIN_CPU. 1662 */ 1663 drm_free(obj_priv->page_cpu_valid, obj->size / PAGE_SIZE, 1664 DRM_MEM_DRIVER); 1665 obj_priv->page_cpu_valid = NULL; 1666 } 1667 1668 /** 1669 * Set the CPU read domain on a range of the object. 1670 * 1671 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's 1672 * not entirely valid. The page_cpu_valid member of the object flags which 1673 * pages have been flushed, and will be respected by 1674 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping 1675 * of the whole object. 1676 * 1677 * This function returns when the move is complete, including waiting on 1678 * flushes to occur. 1679 */ 1680 static int 1681 i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, 1682 uint64_t offset, uint64_t size) 1683 { 1684 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1685 int i, ret; 1686 1687 if (offset == 0 && size == obj->size) 1688 return i915_gem_object_set_to_cpu_domain(obj, 0); 1689 1690 i915_gem_object_flush_gpu_write_domain(obj); 1691 /* Wait on any GPU rendering and flushing to occur. */ 1692 ret = i915_gem_object_wait_rendering(obj); 1693 if (ret != 0) 1694 return ret; 1695 i915_gem_object_flush_gtt_write_domain(obj); 1696 1697 /* If we're already fully in the CPU read domain, we're done. */ 1698 if (obj_priv->page_cpu_valid == NULL && 1699 (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0) 1700 return 0; 1701 1702 /* Otherwise, create/clear the per-page CPU read domain flag if we're 1703 * newly adding I915_GEM_DOMAIN_CPU 1704 */ 1705 if (obj_priv->page_cpu_valid == NULL) { 1706 obj_priv->page_cpu_valid = drm_calloc(1, obj->size / PAGE_SIZE, 1707 DRM_MEM_DRIVER); 1708 if (obj_priv->page_cpu_valid == NULL) 1709 return ENOMEM; 1710 } else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) 1711 (void) memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE); 1712 1713 /* Flush the cache on any pages that are still invalid from the CPU's 1714 * perspective. 1715 */ 1716 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE; 1717 i++) { 1718 if (obj_priv->page_cpu_valid[i]) 1719 continue; 1720 1721 drm_clflush_pages(obj_priv->page_list + i, 1); 1722 obj_priv->page_cpu_valid[i] = 1; 1723 } 1724 1725 /* It should now be out of any other write domains, and we can update 1726 * the domain values for our changes. 1727 */ 1728 ASSERT(!((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0)); 1729 1730 obj->read_domains |= I915_GEM_DOMAIN_CPU; 1731 1732 return 0; 1733 } 1734 1735 /** 1736 * Pin an object to the GTT and evaluate the relocations landing in it. 1737 */ 1738 static int 1739 i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, 1740 struct drm_file *file_priv, 1741 struct drm_i915_gem_exec_object *entry) 1742 { 1743 struct drm_i915_gem_relocation_entry reloc; 1744 struct drm_i915_gem_relocation_entry __user *relocs; 1745 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1746 int i, ret; 1747 1748 /* Choose the GTT offset for our buffer and put it there. */ 1749 ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment); 1750 if (ret) { 1751 DRM_ERROR("failed to pin"); 1752 return ret; 1753 } 1754 entry->offset = obj_priv->gtt_offset; 1755 1756 relocs = (struct drm_i915_gem_relocation_entry __user *) 1757 (uintptr_t) entry->relocs_ptr; 1758 /* Apply the relocations, using the GTT aperture to avoid cache 1759 * flushing requirements. 1760 */ 1761 for (i = 0; i < entry->relocation_count; i++) { 1762 struct drm_gem_object *target_obj; 1763 struct drm_i915_gem_object *target_obj_priv; 1764 uint32_t reloc_val, reloc_offset, *reloc_entry; 1765 1766 ret = DRM_COPY_FROM_USER(&reloc, relocs + i, sizeof(reloc)); 1767 if (ret != 0) { 1768 i915_gem_object_unpin(obj); 1769 DRM_ERROR("failed to copy from user"); 1770 return ret; 1771 } 1772 1773 target_obj = drm_gem_object_lookup(file_priv, 1774 reloc.target_handle); 1775 if (target_obj == NULL) { 1776 i915_gem_object_unpin(obj); 1777 return EBADF; 1778 } 1779 target_obj_priv = target_obj->driver_private; 1780 1781 /* The target buffer should have appeared before us in the 1782 * exec_object list, so it should have a GTT space bound by now. 1783 */ 1784 if (target_obj_priv->gtt_space == NULL) { 1785 DRM_ERROR("No GTT space found for object %d\n", 1786 reloc.target_handle); 1787 drm_gem_object_unreference(target_obj); 1788 i915_gem_object_unpin(obj); 1789 return EINVAL; 1790 } 1791 1792 if (reloc.offset > obj->size - 4) { 1793 DRM_ERROR("Relocation beyond object bounds: " 1794 "obj %p target %d offset %d size %d.\n", 1795 obj, reloc.target_handle, 1796 (int) reloc.offset, (int) obj->size); 1797 drm_gem_object_unreference(target_obj); 1798 i915_gem_object_unpin(obj); 1799 return EINVAL; 1800 } 1801 if (reloc.offset & 3) { 1802 DRM_ERROR("Relocation not 4-byte aligned: " 1803 "obj %p target %d offset %d.\n", 1804 obj, reloc.target_handle, 1805 (int) reloc.offset); 1806 drm_gem_object_unreference(target_obj); 1807 i915_gem_object_unpin(obj); 1808 return EINVAL; 1809 } 1810 1811 if (reloc.write_domain & I915_GEM_DOMAIN_CPU || 1812 reloc.read_domains & I915_GEM_DOMAIN_CPU) { 1813 DRM_ERROR("reloc with read/write CPU domains: " 1814 "obj %p target %d offset %d " 1815 "read %08x write %08x", 1816 obj, reloc.target_handle, 1817 (int) reloc.offset, 1818 reloc.read_domains, 1819 reloc.write_domain); 1820 drm_gem_object_unreference(target_obj); 1821 i915_gem_object_unpin(obj); 1822 return EINVAL; 1823 } 1824 1825 if (reloc.write_domain && target_obj->pending_write_domain && 1826 reloc.write_domain != target_obj->pending_write_domain) { 1827 DRM_ERROR("Write domain conflict: " 1828 "obj %p target %d offset %d " 1829 "new %08x old %08x\n", 1830 obj, reloc.target_handle, 1831 (int) reloc.offset, 1832 reloc.write_domain, 1833 target_obj->pending_write_domain); 1834 drm_gem_object_unreference(target_obj); 1835 i915_gem_object_unpin(obj); 1836 return EINVAL; 1837 } 1838 DRM_DEBUG("%s: obj %p offset %08x target %d " 1839 "read %08x write %08x gtt %08x " 1840 "presumed %08x delta %08x\n", 1841 __func__, 1842 obj, 1843 (int) reloc.offset, 1844 (int) reloc.target_handle, 1845 (int) reloc.read_domains, 1846 (int) reloc.write_domain, 1847 (int) target_obj_priv->gtt_offset, 1848 (int) reloc.presumed_offset, 1849 reloc.delta); 1850 1851 target_obj->pending_read_domains |= reloc.read_domains; 1852 target_obj->pending_write_domain |= reloc.write_domain; 1853 1854 /* If the relocation already has the right value in it, no 1855 * more work needs to be done. 1856 */ 1857 if (target_obj_priv->gtt_offset == reloc.presumed_offset) { 1858 drm_gem_object_unreference(target_obj); 1859 continue; 1860 } 1861 1862 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 1863 if (ret != 0) { 1864 drm_gem_object_unreference(target_obj); 1865 i915_gem_object_unpin(obj); 1866 return EINVAL; 1867 } 1868 1869 /* Map the page containing the relocation we're going to 1870 * perform. 1871 */ 1872 1873 int reloc_base = (reloc.offset & ~(PAGE_SIZE-1)); 1874 reloc_offset = reloc.offset & (PAGE_SIZE-1); 1875 reloc_entry = (uint32_t *)(uintptr_t)(obj_priv->page_list[reloc_base/PAGE_SIZE] + reloc_offset); 1876 reloc_val = target_obj_priv->gtt_offset + reloc.delta; 1877 *reloc_entry = reloc_val; 1878 1879 /* Write the updated presumed offset for this entry back out 1880 * to the user. 1881 */ 1882 reloc.presumed_offset = target_obj_priv->gtt_offset; 1883 ret = DRM_COPY_TO_USER(relocs + i, &reloc, sizeof(reloc)); 1884 if (ret != 0) { 1885 drm_gem_object_unreference(target_obj); 1886 i915_gem_object_unpin(obj); 1887 DRM_ERROR("%s: Failed to copy to user ret %d", __func__, ret); 1888 return ret; 1889 } 1890 1891 drm_gem_object_unreference(target_obj); 1892 } 1893 1894 return 0; 1895 } 1896 1897 /** Dispatch a batchbuffer to the ring 1898 */ 1899 static int 1900 i915_dispatch_gem_execbuffer(struct drm_device *dev, 1901 struct drm_i915_gem_execbuffer *exec, 1902 uint64_t exec_offset) 1903 { 1904 drm_i915_private_t *dev_priv = dev->dev_private; 1905 struct drm_clip_rect __user *boxes = (struct drm_clip_rect __user *) 1906 (uintptr_t) exec->cliprects_ptr; 1907 int nbox = exec->num_cliprects; 1908 int i = 0, count; 1909 uint64_t exec_start, exec_len; 1910 RING_LOCALS; 1911 1912 exec_start = exec_offset + exec->batch_start_offset; 1913 exec_len = exec->batch_len; 1914 1915 if ((exec_start | exec_len) & 0x7) { 1916 DRM_ERROR("alignment\n"); 1917 return EINVAL; 1918 } 1919 1920 if (!exec_start) { 1921 DRM_ERROR("wrong arg"); 1922 return EINVAL; 1923 } 1924 1925 count = nbox ? nbox : 1; 1926 1927 for (i = 0; i < count; i++) { 1928 if (i < nbox) { 1929 int ret = i915_emit_box(dev, boxes, i, 1930 exec->DR1, exec->DR4); 1931 if (ret) { 1932 DRM_ERROR("i915_emit_box %d DR1 0x%lx DRI2 0x%lx", ret, exec->DR1, exec->DR4); 1933 return ret; 1934 } 1935 } 1936 if (IS_I830(dev) || IS_845G(dev)) { 1937 BEGIN_LP_RING(4); 1938 OUT_RING(MI_BATCH_BUFFER); 1939 OUT_RING(exec_start | MI_BATCH_NON_SECURE); 1940 OUT_RING(exec_start + exec_len - 4); 1941 OUT_RING(0); 1942 ADVANCE_LP_RING(); 1943 } else { 1944 BEGIN_LP_RING(2); 1945 if (IS_I965G(dev)) { 1946 OUT_RING(MI_BATCH_BUFFER_START | 1947 (2 << 6) | 1948 (3 << 9) | 1949 MI_BATCH_NON_SECURE_I965); 1950 OUT_RING(exec_start); 1951 1952 } else { 1953 OUT_RING(MI_BATCH_BUFFER_START | 1954 (2 << 6)); 1955 OUT_RING(exec_start | MI_BATCH_NON_SECURE); 1956 } 1957 ADVANCE_LP_RING(); 1958 } 1959 } 1960 /* XXX breadcrumb */ 1961 return 0; 1962 } 1963 1964 /* Throttle our rendering by waiting until the ring has completed our requests 1965 * emitted over 20 msec ago. 1966 * 1967 * This should get us reasonable parallelism between CPU and GPU but also 1968 * relatively low latency when blocking on a particular request to finish. 1969 */ 1970 static int 1971 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv) 1972 { 1973 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; 1974 int ret = 0; 1975 uint32_t seqno; 1976 1977 spin_lock(&dev->struct_mutex); 1978 seqno = i915_file_priv->mm.last_gem_throttle_seqno; 1979 i915_file_priv->mm.last_gem_throttle_seqno = 1980 i915_file_priv->mm.last_gem_seqno; 1981 if (seqno) { 1982 ret = i915_wait_request(dev, seqno); 1983 if (ret != 0) 1984 DRM_ERROR("%s: i915_wait_request request->seqno %d now %d\n", __func__, seqno, i915_get_gem_seqno(dev)); 1985 } 1986 spin_unlock(&dev->struct_mutex); 1987 return ret; 1988 } 1989 1990 /*ARGSUSED*/ 1991 int 1992 i915_gem_execbuffer(DRM_IOCTL_ARGS) 1993 { 1994 DRM_DEVICE; 1995 drm_i915_private_t *dev_priv = dev->dev_private; 1996 struct drm_i915_file_private *i915_file_priv = fpriv->driver_priv; 1997 struct drm_i915_gem_execbuffer args; 1998 struct drm_i915_gem_exec_object *exec_list = NULL; 1999 struct drm_gem_object **object_list = NULL; 2000 struct drm_gem_object *batch_obj; 2001 struct drm_i915_gem_object *obj_priv; 2002 int ret = 0, i, pinned = 0; 2003 uint64_t exec_offset; 2004 uint32_t seqno, flush_domains; 2005 int pin_tries; 2006 2007 if (dev->driver->use_gem != 1) 2008 return ENODEV; 2009 2010 DRM_COPYFROM_WITH_RETURN(&args, 2011 (struct drm_i915_gem_execbuffer __user *) data, sizeof(args)); 2012 2013 DRM_DEBUG("buffer_count %d len %x\n", args.buffer_count, args.batch_len); 2014 2015 if (args.buffer_count < 1) { 2016 DRM_ERROR("execbuf with %d buffers\n", args.buffer_count); 2017 return EINVAL; 2018 } 2019 /* Copy in the exec list from userland */ 2020 exec_list = drm_calloc(sizeof(*exec_list), args.buffer_count, 2021 DRM_MEM_DRIVER); 2022 object_list = drm_calloc(sizeof(*object_list), args.buffer_count, 2023 DRM_MEM_DRIVER); 2024 if (exec_list == NULL || object_list == NULL) { 2025 DRM_ERROR("Failed to allocate exec or object list " 2026 "for %d buffers\n", 2027 args.buffer_count); 2028 ret = ENOMEM; 2029 goto pre_mutex_err; 2030 } 2031 2032 ret = DRM_COPY_FROM_USER(exec_list, 2033 (struct drm_i915_gem_exec_object __user *) 2034 (uintptr_t) args.buffers_ptr, 2035 sizeof(*exec_list) * args.buffer_count); 2036 if (ret != 0) { 2037 DRM_ERROR("copy %d exec entries failed %d\n", 2038 args.buffer_count, ret); 2039 goto pre_mutex_err; 2040 } 2041 spin_lock(&dev->struct_mutex); 2042 2043 if (dev_priv->mm.wedged) { 2044 DRM_ERROR("Execbuf while wedged\n"); 2045 spin_unlock(&dev->struct_mutex); 2046 return EIO; 2047 } 2048 2049 if (dev_priv->mm.suspended) { 2050 DRM_ERROR("Execbuf while VT-switched.\n"); 2051 spin_unlock(&dev->struct_mutex); 2052 return EBUSY; 2053 } 2054 2055 /* Look up object handles */ 2056 for (i = 0; i < args.buffer_count; i++) { 2057 object_list[i] = drm_gem_object_lookup(fpriv, 2058 exec_list[i].handle); 2059 if (object_list[i] == NULL) { 2060 DRM_ERROR("Invalid object handle %d at index %d\n", 2061 exec_list[i].handle, i); 2062 ret = EBADF; 2063 goto err; 2064 } 2065 obj_priv = object_list[i]->driver_private; 2066 if (obj_priv->in_execbuffer) { 2067 DRM_ERROR("Object[%d] (%d) %p appears more than once in object list in args.buffer_count %d \n", 2068 i, object_list[i]->name, object_list[i], args.buffer_count); 2069 2070 ret = EBADF; 2071 goto err; 2072 } 2073 2074 obj_priv->in_execbuffer = 1; 2075 } 2076 2077 /* Pin and relocate */ 2078 for (pin_tries = 0; ; pin_tries++) { 2079 ret = 0; 2080 for (i = 0; i < args.buffer_count; i++) { 2081 object_list[i]->pending_read_domains = 0; 2082 object_list[i]->pending_write_domain = 0; 2083 ret = i915_gem_object_pin_and_relocate(object_list[i], 2084 fpriv, 2085 &exec_list[i]); 2086 if (ret) { 2087 DRM_ERROR("Not all object pinned"); 2088 break; 2089 } 2090 pinned = i + 1; 2091 } 2092 /* success */ 2093 if (ret == 0) 2094 { 2095 DRM_DEBUG("gem_execbuffer pin_relocate success"); 2096 break; 2097 } 2098 /* error other than GTT full, or we've already tried again */ 2099 if (ret != ENOMEM || pin_tries >= 1) { 2100 if (ret != ERESTART) 2101 DRM_ERROR("Failed to pin buffers %d\n", ret); 2102 goto err; 2103 } 2104 2105 /* unpin all of our buffers */ 2106 for (i = 0; i < pinned; i++) 2107 i915_gem_object_unpin(object_list[i]); 2108 pinned = 0; 2109 2110 /* evict everyone we can from the aperture */ 2111 ret = i915_gem_evict_everything(dev); 2112 if (ret) 2113 goto err; 2114 } 2115 2116 /* Set the pending read domains for the batch buffer to COMMAND */ 2117 batch_obj = object_list[args.buffer_count-1]; 2118 batch_obj->pending_read_domains = I915_GEM_DOMAIN_COMMAND; 2119 batch_obj->pending_write_domain = 0; 2120 2121 /* Zero the gloabl flush/invalidate flags. These 2122 * will be modified as each object is bound to the 2123 * gtt 2124 */ 2125 dev->invalidate_domains = 0; 2126 dev->flush_domains = 0; 2127 2128 for (i = 0; i < args.buffer_count; i++) { 2129 struct drm_gem_object *obj = object_list[i]; 2130 2131 /* Compute new gpu domains and update invalidate/flush */ 2132 i915_gem_object_set_to_gpu_domain(obj, 2133 obj->pending_read_domains, 2134 obj->pending_write_domain); 2135 } 2136 2137 if (dev->invalidate_domains | dev->flush_domains) { 2138 2139 DRM_DEBUG("%s: invalidate_domains %08x flush_domains %08x Then flush\n", 2140 __func__, 2141 dev->invalidate_domains, 2142 dev->flush_domains); 2143 i915_gem_flush(dev, 2144 dev->invalidate_domains, 2145 dev->flush_domains); 2146 if (dev->flush_domains) { 2147 (void) i915_add_request(dev, dev->flush_domains); 2148 2149 } 2150 } 2151 2152 for (i = 0; i < args.buffer_count; i++) { 2153 struct drm_gem_object *obj = object_list[i]; 2154 2155 obj->write_domain = obj->pending_write_domain; 2156 } 2157 2158 exec_offset = exec_list[args.buffer_count - 1].offset; 2159 2160 /* Exec the batchbuffer */ 2161 ret = i915_dispatch_gem_execbuffer(dev, &args, exec_offset); 2162 if (ret) { 2163 DRM_ERROR("dispatch failed %d\n", ret); 2164 goto err; 2165 } 2166 2167 /* 2168 * Ensure that the commands in the batch buffer are 2169 * finished before the interrupt fires 2170 */ 2171 flush_domains = i915_retire_commands(dev); 2172 2173 /* 2174 * Get a seqno representing the execution of the current buffer, 2175 * which we can wait on. We would like to mitigate these interrupts, 2176 * likely by only creating seqnos occasionally (so that we have 2177 * *some* interrupts representing completion of buffers that we can 2178 * wait on when trying to clear up gtt space). 2179 */ 2180 seqno = i915_add_request(dev, flush_domains); 2181 ASSERT(!(seqno == 0)); 2182 i915_file_priv->mm.last_gem_seqno = seqno; 2183 for (i = 0; i < args.buffer_count; i++) { 2184 struct drm_gem_object *obj = object_list[i]; 2185 i915_gem_object_move_to_active(obj, seqno); 2186 DRM_DEBUG("%s: move to exec list %p\n", __func__, obj); 2187 } 2188 2189 err: 2190 if (object_list != NULL) { 2191 for (i = 0; i < pinned; i++) 2192 i915_gem_object_unpin(object_list[i]); 2193 2194 for (i = 0; i < args.buffer_count; i++) { 2195 if (object_list[i]) { 2196 obj_priv = object_list[i]->driver_private; 2197 obj_priv->in_execbuffer = 0; 2198 } 2199 drm_gem_object_unreference(object_list[i]); 2200 } 2201 } 2202 spin_unlock(&dev->struct_mutex); 2203 2204 if (!ret) { 2205 /* Copy the new buffer offsets back to the user's exec list. */ 2206 ret = DRM_COPY_TO_USER((struct drm_i915_relocation_entry __user *) 2207 (uintptr_t) args.buffers_ptr, 2208 exec_list, 2209 sizeof(*exec_list) * args.buffer_count); 2210 if (ret) 2211 DRM_ERROR("failed to copy %d exec entries " 2212 "back to user (%d)\n", 2213 args.buffer_count, ret); 2214 } 2215 2216 pre_mutex_err: 2217 drm_free(object_list, sizeof(*object_list) * args.buffer_count, 2218 DRM_MEM_DRIVER); 2219 drm_free(exec_list, sizeof(*exec_list) * args.buffer_count, 2220 DRM_MEM_DRIVER); 2221 2222 return ret; 2223 } 2224 2225 int 2226 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) 2227 { 2228 struct drm_device *dev = obj->dev; 2229 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2230 int ret; 2231 2232 if (obj_priv->gtt_space == NULL) { 2233 ret = i915_gem_object_bind_to_gtt(obj, alignment); 2234 if (ret != 0) { 2235 DRM_ERROR("Failure to bind: %d", ret); 2236 return ret; 2237 } 2238 } 2239 obj_priv->pin_count++; 2240 2241 /* If the object is not active and not pending a flush, 2242 * remove it from the inactive list 2243 */ 2244 if (obj_priv->pin_count == 1) { 2245 atomic_inc(&dev->pin_count); 2246 atomic_add(obj->size, &dev->pin_memory); 2247 if (!obj_priv->active && 2248 (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | 2249 I915_GEM_DOMAIN_GTT)) == 0 && 2250 !list_empty(&obj_priv->list)) 2251 list_del_init(&obj_priv->list); 2252 } 2253 return 0; 2254 } 2255 2256 void 2257 i915_gem_object_unpin(struct drm_gem_object *obj) 2258 { 2259 struct drm_device *dev = obj->dev; 2260 drm_i915_private_t *dev_priv = dev->dev_private; 2261 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2262 obj_priv->pin_count--; 2263 ASSERT(!(obj_priv->pin_count < 0)); 2264 ASSERT(!(obj_priv->gtt_space == NULL)); 2265 2266 /* If the object is no longer pinned, and is 2267 * neither active nor being flushed, then stick it on 2268 * the inactive list 2269 */ 2270 if (obj_priv->pin_count == 0) { 2271 if (!obj_priv->active && 2272 (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | 2273 I915_GEM_DOMAIN_GTT)) == 0) 2274 list_move_tail(&obj_priv->list, 2275 &dev_priv->mm.inactive_list, (caddr_t)obj_priv); 2276 atomic_dec(&dev->pin_count); 2277 atomic_sub(obj->size, &dev->pin_memory); 2278 } 2279 } 2280 2281 /*ARGSUSED*/ 2282 int 2283 i915_gem_pin_ioctl(DRM_IOCTL_ARGS) 2284 { 2285 DRM_DEVICE; 2286 struct drm_i915_gem_pin args; 2287 struct drm_gem_object *obj; 2288 struct drm_i915_gem_object *obj_priv; 2289 int ret; 2290 2291 if (dev->driver->use_gem != 1) 2292 return ENODEV; 2293 2294 DRM_COPYFROM_WITH_RETURN(&args, 2295 (struct drm_i915_gem_pin __user *) data, sizeof(args)); 2296 2297 spin_lock(&dev->struct_mutex); 2298 2299 obj = drm_gem_object_lookup(fpriv, args.handle); 2300 if (obj == NULL) { 2301 DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n", 2302 args.handle); 2303 spin_unlock(&dev->struct_mutex); 2304 return EBADF; 2305 } 2306 DRM_DEBUG("i915_gem_pin_ioctl obj->name %d", obj->name); 2307 obj_priv = obj->driver_private; 2308 2309 if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != fpriv) { 2310 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 2311 args.handle); 2312 drm_gem_object_unreference(obj); 2313 spin_unlock(&dev->struct_mutex); 2314 return EINVAL; 2315 } 2316 2317 obj_priv->user_pin_count++; 2318 obj_priv->pin_filp = fpriv; 2319 if (obj_priv->user_pin_count == 1) { 2320 ret = i915_gem_object_pin(obj, args.alignment); 2321 if (ret != 0) { 2322 drm_gem_object_unreference(obj); 2323 spin_unlock(&dev->struct_mutex); 2324 return ret; 2325 } 2326 } 2327 2328 /* XXX - flush the CPU caches for pinned objects 2329 * as the X server doesn't manage domains yet 2330 */ 2331 i915_gem_object_flush_cpu_write_domain(obj); 2332 args.offset = obj_priv->gtt_offset; 2333 2334 ret = DRM_COPY_TO_USER((struct drm_i915_gem_pin __user *) data, &args, sizeof(args)); 2335 if ( ret != 0) 2336 DRM_ERROR(" gem pin ioctl error! %d", ret); 2337 2338 drm_gem_object_unreference(obj); 2339 spin_unlock(&dev->struct_mutex); 2340 2341 return 0; 2342 } 2343 2344 /*ARGSUSED*/ 2345 int 2346 i915_gem_unpin_ioctl(DRM_IOCTL_ARGS) 2347 { 2348 DRM_DEVICE; 2349 struct drm_i915_gem_pin args; 2350 struct drm_gem_object *obj; 2351 struct drm_i915_gem_object *obj_priv; 2352 2353 if (dev->driver->use_gem != 1) 2354 return ENODEV; 2355 2356 DRM_COPYFROM_WITH_RETURN(&args, 2357 (struct drm_i915_gem_pin __user *) data, sizeof(args)); 2358 2359 spin_lock(&dev->struct_mutex); 2360 2361 obj = drm_gem_object_lookup(fpriv, args.handle); 2362 if (obj == NULL) { 2363 DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n", 2364 args.handle); 2365 spin_unlock(&dev->struct_mutex); 2366 return EBADF; 2367 } 2368 obj_priv = obj->driver_private; 2369 DRM_DEBUG("i915_gem_unpin_ioctl, obj->name %d", obj->name); 2370 if (obj_priv->pin_filp != fpriv) { 2371 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 2372 args.handle); 2373 drm_gem_object_unreference(obj); 2374 spin_unlock(&dev->struct_mutex); 2375 return EINVAL; 2376 } 2377 obj_priv->user_pin_count--; 2378 if (obj_priv->user_pin_count == 0) { 2379 obj_priv->pin_filp = NULL; 2380 i915_gem_object_unpin(obj); 2381 } 2382 drm_gem_object_unreference(obj); 2383 spin_unlock(&dev->struct_mutex); 2384 return 0; 2385 } 2386 2387 /*ARGSUSED*/ 2388 int 2389 i915_gem_busy_ioctl(DRM_IOCTL_ARGS) 2390 { 2391 DRM_DEVICE; 2392 struct drm_i915_gem_busy args; 2393 struct drm_gem_object *obj; 2394 struct drm_i915_gem_object *obj_priv; 2395 int ret; 2396 2397 if (dev->driver->use_gem != 1) 2398 return ENODEV; 2399 2400 DRM_COPYFROM_WITH_RETURN(&args, 2401 (struct drm_i915_gem_busy __user *) data, sizeof(args)); 2402 2403 spin_lock(&dev->struct_mutex); 2404 obj = drm_gem_object_lookup(fpriv, args.handle); 2405 if (obj == NULL) { 2406 DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n", 2407 args.handle); 2408 spin_unlock(&dev->struct_mutex); 2409 return EBADF; 2410 } 2411 2412 obj_priv = obj->driver_private; 2413 /* Don't count being on the flushing list against the object being 2414 * done. Otherwise, a buffer left on the flushing list but not getting 2415 * flushed (because nobody's flushing that domain) won't ever return 2416 * unbusy and get reused by libdrm's bo cache. The other expected 2417 * consumer of this interface, OpenGL's occlusion queries, also specs 2418 * that the objects get unbusy "eventually" without any interference. 2419 */ 2420 args.busy = obj_priv->active && obj_priv->last_rendering_seqno != 0; 2421 DRM_DEBUG("i915_gem_busy_ioctl call obj->name %d busy %d", obj->name, args.busy); 2422 2423 ret = DRM_COPY_TO_USER((struct drm_i915_gem_busy __user *) data, &args, sizeof(args)); 2424 if ( ret != 0) 2425 DRM_ERROR(" gem busy error! %d", ret); 2426 2427 drm_gem_object_unreference(obj); 2428 spin_unlock(&dev->struct_mutex); 2429 return 0; 2430 } 2431 2432 /*ARGSUSED*/ 2433 int 2434 i915_gem_throttle_ioctl(DRM_IOCTL_ARGS) 2435 { 2436 DRM_DEVICE; 2437 2438 if (dev->driver->use_gem != 1) 2439 return ENODEV; 2440 2441 return i915_gem_ring_throttle(dev, fpriv); 2442 } 2443 2444 static int 2445 i915_gem_object_get_page_list(struct drm_gem_object *obj) 2446 { 2447 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2448 caddr_t va; 2449 long i; 2450 2451 if (obj_priv->page_list) 2452 return 0; 2453 pgcnt_t np = btop(obj->size); 2454 2455 obj_priv->page_list = kmem_zalloc(np * sizeof(caddr_t), KM_SLEEP); 2456 if (obj_priv->page_list == NULL) { 2457 DRM_ERROR("Faled to allocate page list\n"); 2458 return ENOMEM; 2459 } 2460 2461 for (i = 0, va = obj->kaddr; i < np; i++, va += PAGESIZE) { 2462 obj_priv->page_list[i] = va; 2463 } 2464 return 0; 2465 } 2466 2467 2468 int i915_gem_init_object(struct drm_gem_object *obj) 2469 { 2470 struct drm_i915_gem_object *obj_priv; 2471 2472 obj_priv = drm_calloc(1, sizeof(*obj_priv), DRM_MEM_DRIVER); 2473 if (obj_priv == NULL) 2474 return ENOMEM; 2475 2476 /* 2477 * We've just allocated pages from the kernel, 2478 * so they've just been written by the CPU with 2479 * zeros. They'll need to be clflushed before we 2480 * use them with the GPU. 2481 */ 2482 obj->write_domain = I915_GEM_DOMAIN_CPU; 2483 obj->read_domains = I915_GEM_DOMAIN_CPU; 2484 2485 obj->driver_private = obj_priv; 2486 obj_priv->obj = obj; 2487 INIT_LIST_HEAD(&obj_priv->list); 2488 return 0; 2489 } 2490 2491 void i915_gem_free_object(struct drm_gem_object *obj) 2492 { 2493 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2494 2495 while (obj_priv->pin_count > 0) 2496 i915_gem_object_unpin(obj); 2497 2498 DRM_DEBUG("%s: obj %d",__func__, obj->name); 2499 2500 (void) i915_gem_object_unbind(obj, 1); 2501 if (obj_priv->page_cpu_valid != NULL) 2502 drm_free(obj_priv->page_cpu_valid, obj->size / PAGE_SIZE, DRM_MEM_DRIVER); 2503 drm_free(obj->driver_private, sizeof(*obj_priv), DRM_MEM_DRIVER); 2504 } 2505 2506 /** Unbinds all objects that are on the given buffer list. */ 2507 static int 2508 i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head, uint32_t type) 2509 { 2510 struct drm_gem_object *obj; 2511 struct drm_i915_gem_object *obj_priv; 2512 int ret; 2513 2514 while (!list_empty(head)) { 2515 obj_priv = list_entry(head->next, 2516 struct drm_i915_gem_object, 2517 list); 2518 obj = obj_priv->obj; 2519 2520 if (obj_priv->pin_count != 0) { 2521 DRM_ERROR("Pinned object in unbind list\n"); 2522 spin_unlock(&dev->struct_mutex); 2523 return EINVAL; 2524 } 2525 DRM_DEBUG("%s: obj %d type %d",__func__, obj->name, type); 2526 ret = i915_gem_object_unbind(obj, type); 2527 if (ret != 0) { 2528 DRM_ERROR("Error unbinding object in LeaveVT: %d\n", 2529 ret); 2530 spin_unlock(&dev->struct_mutex); 2531 return ret; 2532 } 2533 } 2534 2535 2536 return 0; 2537 } 2538 2539 static int 2540 i915_gem_idle(struct drm_device *dev, uint32_t type) 2541 { 2542 drm_i915_private_t *dev_priv = dev->dev_private; 2543 uint32_t seqno, cur_seqno, last_seqno; 2544 int stuck, ret; 2545 2546 spin_lock(&dev->struct_mutex); 2547 2548 if (dev_priv->mm.suspended || dev_priv->ring.ring_obj == NULL) { 2549 spin_unlock(&dev->struct_mutex); 2550 return 0; 2551 } 2552 2553 /* Hack! Don't let anybody do execbuf while we don't control the chip. 2554 * We need to replace this with a semaphore, or something. 2555 */ 2556 dev_priv->mm.suspended = 1; 2557 2558 /* Cancel the retire work handler, wait for it to finish if running 2559 */ 2560 if (worktimer_id != NULL) { 2561 (void) untimeout(worktimer_id); 2562 worktimer_id = NULL; 2563 } 2564 2565 i915_kernel_lost_context(dev); 2566 2567 /* Flush the GPU along with all non-CPU write domains 2568 */ 2569 i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT), 2570 ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); 2571 seqno = i915_add_request(dev, ~(I915_GEM_DOMAIN_CPU | 2572 I915_GEM_DOMAIN_GTT)); 2573 if (seqno == 0) { 2574 spin_unlock(&dev->struct_mutex); 2575 return ENOMEM; 2576 } 2577 2578 dev_priv->mm.waiting_gem_seqno = seqno; 2579 last_seqno = 0; 2580 stuck = 0; 2581 for (;;) { 2582 cur_seqno = i915_get_gem_seqno(dev); 2583 if (i915_seqno_passed(cur_seqno, seqno)) 2584 break; 2585 if (last_seqno == cur_seqno) { 2586 if (stuck++ > 100) { 2587 DRM_ERROR("hardware wedged\n"); 2588 dev_priv->mm.wedged = 1; 2589 DRM_WAKEUP(&dev_priv->irq_queue); 2590 break; 2591 } 2592 } 2593 DRM_UDELAY(10); 2594 last_seqno = cur_seqno; 2595 } 2596 dev_priv->mm.waiting_gem_seqno = 0; 2597 2598 i915_gem_retire_requests(dev); 2599 2600 /* Empty the active and flushing lists to inactive. If there's 2601 * anything left at this point, it means that we're wedged and 2602 * nothing good's going to happen by leaving them there. So strip 2603 * the GPU domains and just stuff them onto inactive. 2604 */ 2605 while (!list_empty(&dev_priv->mm.active_list)) { 2606 struct drm_i915_gem_object *obj_priv; 2607 2608 obj_priv = list_entry(dev_priv->mm.active_list.next, 2609 struct drm_i915_gem_object, 2610 list); 2611 obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; 2612 i915_gem_object_move_to_inactive(obj_priv->obj); 2613 } 2614 2615 while (!list_empty(&dev_priv->mm.flushing_list)) { 2616 struct drm_i915_gem_object *obj_priv; 2617 2618 obj_priv = list_entry(dev_priv->mm.flushing_list.next, 2619 struct drm_i915_gem_object, 2620 list); 2621 obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; 2622 i915_gem_object_move_to_inactive(obj_priv->obj); 2623 } 2624 2625 /* Move all inactive buffers out of the GTT. */ 2626 ret = i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list, type); 2627 ASSERT(list_empty(&dev_priv->mm.inactive_list)); 2628 if (ret) { 2629 spin_unlock(&dev->struct_mutex); 2630 return ret; 2631 } 2632 2633 i915_gem_cleanup_ringbuffer(dev); 2634 spin_unlock(&dev->struct_mutex); 2635 2636 return 0; 2637 } 2638 2639 static int 2640 i915_gem_init_hws(struct drm_device *dev) 2641 { 2642 drm_i915_private_t *dev_priv = dev->dev_private; 2643 struct drm_gem_object *obj; 2644 struct drm_i915_gem_object *obj_priv; 2645 int ret; 2646 2647 /* If we need a physical address for the status page, it's already 2648 * initialized at driver load time. 2649 */ 2650 if (!I915_NEED_GFX_HWS(dev)) 2651 return 0; 2652 2653 2654 obj = drm_gem_object_alloc(dev, 4096); 2655 if (obj == NULL) { 2656 DRM_ERROR("Failed to allocate status page\n"); 2657 return ENOMEM; 2658 } 2659 2660 obj_priv = obj->driver_private; 2661 2662 ret = i915_gem_object_pin(obj, 4096); 2663 if (ret != 0) { 2664 drm_gem_object_unreference(obj); 2665 return ret; 2666 } 2667 2668 dev_priv->status_gfx_addr = obj_priv->gtt_offset; 2669 dev_priv->hws_map.offset = dev->agp->agp_info.agpi_aperbase + obj_priv->gtt_offset; 2670 dev_priv->hws_map.size = 4096; 2671 dev_priv->hws_map.type = 0; 2672 dev_priv->hws_map.flags = 0; 2673 dev_priv->hws_map.mtrr = 0; 2674 2675 drm_core_ioremap(&dev_priv->hws_map, dev); 2676 if (dev_priv->hws_map.handle == NULL) { 2677 DRM_ERROR("Failed to map status page.\n"); 2678 (void) memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 2679 drm_gem_object_unreference(obj); 2680 return EINVAL; 2681 } 2682 2683 dev_priv->hws_obj = obj; 2684 2685 dev_priv->hw_status_page = dev_priv->hws_map.handle; 2686 2687 (void) memset(dev_priv->hw_status_page, 0, PAGE_SIZE); 2688 I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr); 2689 (void) I915_READ(HWS_PGA); /* posting read */ 2690 DRM_DEBUG("hws offset: 0x%08x\n", dev_priv->status_gfx_addr); 2691 2692 return 0; 2693 } 2694 2695 static void 2696 i915_gem_cleanup_hws(struct drm_device *dev) 2697 { 2698 drm_i915_private_t *dev_priv = dev->dev_private; 2699 struct drm_gem_object *obj; 2700 2701 if (dev_priv->hws_obj == NULL) 2702 return; 2703 2704 obj = dev_priv->hws_obj; 2705 2706 drm_core_ioremapfree(&dev_priv->hws_map, dev); 2707 i915_gem_object_unpin(obj); 2708 drm_gem_object_unreference(obj); 2709 dev_priv->hws_obj = NULL; 2710 2711 (void) memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 2712 dev_priv->hw_status_page = NULL; 2713 2714 /* Write high address into HWS_PGA when disabling. */ 2715 I915_WRITE(HWS_PGA, 0x1ffff000); 2716 } 2717 2718 int 2719 i915_gem_init_ringbuffer(struct drm_device *dev) 2720 { 2721 drm_i915_private_t *dev_priv = dev->dev_private; 2722 struct drm_gem_object *obj; 2723 struct drm_i915_gem_object *obj_priv; 2724 int ret; 2725 u32 head; 2726 2727 ret = i915_gem_init_hws(dev); 2728 if (ret != 0) 2729 return ret; 2730 obj = drm_gem_object_alloc(dev, 128 * 1024); 2731 if (obj == NULL) { 2732 DRM_ERROR("Failed to allocate ringbuffer\n"); 2733 i915_gem_cleanup_hws(dev); 2734 return ENOMEM; 2735 } 2736 2737 obj_priv = obj->driver_private; 2738 ret = i915_gem_object_pin(obj, 4096); 2739 if (ret != 0) { 2740 drm_gem_object_unreference(obj); 2741 i915_gem_cleanup_hws(dev); 2742 return ret; 2743 } 2744 2745 /* Set up the kernel mapping for the ring. */ 2746 dev_priv->ring.Size = obj->size; 2747 dev_priv->ring.tail_mask = obj->size - 1; 2748 2749 dev_priv->ring.map.offset = dev->agp->agp_info.agpi_aperbase + obj_priv->gtt_offset; 2750 dev_priv->ring.map.size = obj->size; 2751 dev_priv->ring.map.type = 0; 2752 dev_priv->ring.map.flags = 0; 2753 dev_priv->ring.map.mtrr = 0; 2754 2755 drm_core_ioremap(&dev_priv->ring.map, dev); 2756 if (dev_priv->ring.map.handle == NULL) { 2757 DRM_ERROR("Failed to map ringbuffer.\n"); 2758 (void) memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); 2759 drm_gem_object_unreference(obj); 2760 i915_gem_cleanup_hws(dev); 2761 return EINVAL; 2762 } 2763 2764 dev_priv->ring.ring_obj = obj; 2765 2766 dev_priv->ring.virtual_start = (u8 *) dev_priv->ring.map.handle; 2767 2768 /* Stop the ring if it's running. */ 2769 I915_WRITE(PRB0_CTL, 0); 2770 I915_WRITE(PRB0_HEAD, 0); 2771 I915_WRITE(PRB0_TAIL, 0); 2772 2773 2774 /* Initialize the ring. */ 2775 I915_WRITE(PRB0_START, obj_priv->gtt_offset); 2776 head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 2777 2778 /* G45 ring initialization fails to reset head to zero */ 2779 if (head != 0) { 2780 DRM_ERROR("Ring head not reset to zero " 2781 "ctl %08x head %08x tail %08x start %08x\n", 2782 I915_READ(PRB0_CTL), 2783 I915_READ(PRB0_HEAD), 2784 I915_READ(PRB0_TAIL), 2785 I915_READ(PRB0_START)); 2786 I915_WRITE(PRB0_HEAD, 0); 2787 2788 DRM_ERROR("Ring head forced to zero " 2789 "ctl %08x head %08x tail %08x start %08x\n", 2790 I915_READ(PRB0_CTL), 2791 I915_READ(PRB0_HEAD), 2792 I915_READ(PRB0_TAIL), 2793 I915_READ(PRB0_START)); 2794 } 2795 2796 I915_WRITE(PRB0_CTL, 2797 ((obj->size - 4096) & RING_NR_PAGES) | 2798 RING_NO_REPORT | 2799 RING_VALID); 2800 2801 head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 2802 2803 /* If the head is still not zero, the ring is dead */ 2804 if (head != 0) { 2805 DRM_ERROR("Ring initialization failed " 2806 "ctl %08x head %08x tail %08x start %08x\n", 2807 I915_READ(PRB0_CTL), 2808 I915_READ(PRB0_HEAD), 2809 I915_READ(PRB0_TAIL), 2810 I915_READ(PRB0_START)); 2811 return EIO; 2812 } 2813 2814 /* Update our cache of the ring state */ 2815 i915_kernel_lost_context(dev); 2816 2817 return 0; 2818 } 2819 2820 static void 2821 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 2822 { 2823 drm_i915_private_t *dev_priv = dev->dev_private; 2824 2825 if (dev_priv->ring.ring_obj == NULL) 2826 return; 2827 2828 drm_core_ioremapfree(&dev_priv->ring.map, dev); 2829 2830 i915_gem_object_unpin(dev_priv->ring.ring_obj); 2831 drm_gem_object_unreference(dev_priv->ring.ring_obj); 2832 dev_priv->ring.ring_obj = NULL; 2833 (void) memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); 2834 i915_gem_cleanup_hws(dev); 2835 } 2836 2837 /*ARGSUSED*/ 2838 int 2839 i915_gem_entervt_ioctl(DRM_IOCTL_ARGS) 2840 { 2841 DRM_DEVICE; 2842 drm_i915_private_t *dev_priv = dev->dev_private; 2843 int ret; 2844 2845 if (dev->driver->use_gem != 1) 2846 return ENODEV; 2847 2848 if (dev_priv->mm.wedged) { 2849 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 2850 dev_priv->mm.wedged = 0; 2851 } 2852 /* Set up the kernel mapping for the ring. */ 2853 dev_priv->mm.gtt_mapping.offset = dev->agp->agp_info.agpi_aperbase; 2854 dev_priv->mm.gtt_mapping.size = dev->agp->agp_info.agpi_apersize; 2855 dev_priv->mm.gtt_mapping.type = 0; 2856 dev_priv->mm.gtt_mapping.flags = 0; 2857 dev_priv->mm.gtt_mapping.mtrr = 0; 2858 2859 drm_core_ioremap(&dev_priv->mm.gtt_mapping, dev); 2860 2861 spin_lock(&dev->struct_mutex); 2862 dev_priv->mm.suspended = 0; 2863 ret = i915_gem_init_ringbuffer(dev); 2864 if (ret != 0) 2865 return ret; 2866 2867 spin_unlock(&dev->struct_mutex); 2868 2869 (void) drm_irq_install(dev); 2870 2871 return 0; 2872 } 2873 2874 /*ARGSUSED*/ 2875 int 2876 i915_gem_leavevt_ioctl(DRM_IOCTL_ARGS) 2877 { 2878 DRM_DEVICE; 2879 drm_i915_private_t *dev_priv = dev->dev_private; 2880 int ret; 2881 2882 if (dev->driver->use_gem != 1) 2883 return ENODEV; 2884 2885 ret = i915_gem_idle(dev, 0); 2886 (void) drm_irq_uninstall(dev); 2887 2888 drm_core_ioremapfree(&dev_priv->mm.gtt_mapping, dev); 2889 return ret; 2890 } 2891 2892 void 2893 i915_gem_lastclose(struct drm_device *dev) 2894 { 2895 drm_i915_private_t *dev_priv = dev->dev_private; 2896 int ret; 2897 2898 ret = i915_gem_idle(dev, 1); 2899 if (ret) 2900 DRM_ERROR("failed to idle hardware: %d\n", ret); 2901 2902 drm_mm_clean_ml(&dev_priv->mm.gtt_space); 2903 } 2904 2905 void 2906 i915_gem_load(struct drm_device *dev) 2907 { 2908 drm_i915_private_t *dev_priv = dev->dev_private; 2909 2910 INIT_LIST_HEAD(&dev_priv->mm.active_list); 2911 INIT_LIST_HEAD(&dev_priv->mm.flushing_list); 2912 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 2913 INIT_LIST_HEAD(&dev_priv->mm.request_list); 2914 dev_priv->mm.next_gem_seqno = 1; 2915 2916 i915_gem_detect_bit_6_swizzle(dev); 2917 2918 } 2919