1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 27 /* 28 * This is the lock device driver. 29 * 30 * The lock driver provides a variation of inter-process mutexes with the 31 * following twist in semantics: 32 * A waiter for a lock after a set timeout can "break" the lock and 33 * grab it from the current owner (without informing the owner). 34 * 35 * These semantics result in temporarily multiple processes thinking they 36 * own the lock. This usually does not make sense for cases where locks are 37 * used to protect a critical region and it is important to serialize access 38 * to data structures. As breaking the lock will also lose the serialization 39 * and result in corrupt data structures. 40 * 41 * The usage for winlock driver is primarily driven by the graphics system 42 * when doing DGA (direct graphics access) graphics. The locks are used to 43 * protect access to the frame buffer (presumably reflects back to the screen) 44 * between competing processes that directly write to the screen as opposed 45 * to going through the window server etc. 46 * In this case, the result of breaking the lock at worst causes the screen 47 * image to be distorted and is easily fixed by doing a "refresh" 48 * 49 * In well-behaved applications, the lock is held for a very short time and 50 * the breaking semantics do not come into play. Not having this feature and 51 * using normal inter-process mutexes will result in a misbehaved application 52 * from grabbing the screen writing capability from the window manager and 53 * effectively make the system look like it is hung (mouse pointer does not 54 * move). 55 * 56 * A secondary aspect of the winlock driver is that it allows for extremely 57 * fast lock acquire/release in cases where there is low contention. A memory 58 * write is all that is needed (not even a function call). And the window 59 * manager is the only DGA writer usually and this optimized for. Occasionally 60 * some processes might do DGA graphics and cause kernel faults to handle 61 * the contention/locking (and that has got to be slow!). 62 * 63 * The following IOCTLs are supported: 64 * 65 * GRABPAGEALLOC: 66 * Compatibility with old cgsix device driver lockpage ioctls. 67 * Lockpages created this way must be an entire page for compatibility with 68 * older software. This ioctl allocates a lock context with its own 69 * private lock page. The unique "ident" that identifies this lock is 70 * returned. 71 * 72 * GRABPAGEFREE: 73 * Compatibility with cgsix device driver lockpage ioctls. This 74 * ioctl releases the lock context allocated by GRABPAGEALLOC. 75 * 76 * GRABLOCKINFO: 77 * Returns a one-word flag. '1' means that multiple clients may 78 * access this lock page. Older device drivers returned '0', 79 * meaning that only two clients could access a lock page. 80 * 81 * GRABATTACH: 82 * Not supported. This ioctl would have grabbed all lock pages 83 * on behalf of the calling program. 84 * 85 * WINLOCKALLOC: 86 * Allocate a lock context. This ioctl accepts a key value. as 87 * its argument. If the key is zero, a new lock context is 88 * created, and its "ident" is returned. If the key is nonzero, 89 * all existing contexts are checked to see if they match they 90 * key. If a match is found, its reference count is incremented 91 * and its ident is returned, otherwise a new context is created 92 * and its ident is returned. 93 * 94 * WINLOCKFREE: 95 * Free a lock context. This ioctl accepts the ident of a lock 96 * context and decrements its reference count. Once the reference 97 * count reaches zero *and* all mappings are released, the lock 98 * context is freed. When all the lock context in the lock page are 99 * freed, the lock page is freed as well. 100 * 101 * WINLOCKSETTIMEOUT: 102 * Set lock timeout for a context. This ioctl accepts the ident 103 * of a lock context and a timeout value in milliseconds. 104 * Whenever lock contention occurs, the timer is started and the lock is 105 * broken after the timeout expires. If timeout value is zero, lock does 106 * not timeout. This value will be rounded to the nearest clock 107 * tick, so don't try to use it for real-time control or something. 108 * 109 * WINLOCKGETTIMEOUT: 110 * Get lock timeout from a context. 111 * 112 * WINLOCKDUMP: 113 * Dump state of this device. 114 * 115 * 116 * How /dev/winlock works: 117 * 118 * Every lock context consists of two mappings for the client to the lock 119 * page. These mappings are known as the "lock page" and "unlock page" 120 * to the client. The first mmap to the lock context (identified by the 121 * sy_ident field returns during alloc) allocates mapping to the lock page, 122 * the second mmap allocates a mapping to the unlock page. 123 * The mappings dont have to be ordered in virtual address space, but do 124 * need to be ordered in time. Mapping and unmapping of these lock and unlock 125 * pages should happen in pairs. Doing them one at a time or unmapping one 126 * and leaving one mapped etc cause undefined behaviors. 127 * The mappings are always of length PAGESIZE, and type MAP_SHARED. 128 * 129 * The first ioctl is to ALLOC a lock, either based on a key (if trying to 130 * grab a preexisting lock) or 0 (gets a default new one) 131 * This ioctl returns a value in sy_ident which is needed to do the 132 * later mmaps and FREE/other ioctls. 133 * 134 * The "page number" portion of the sy_ident needs to be passed as the 135 * file offset when doing an mmap for both the lock page and unlock page 136 * 137 * The value returned by mmap ( a user virtual address) needs to be 138 * incremented by the "page offset" portion of sy_ident to obtain the 139 * pointer to the actual lock. (Skipping this step, does not cause any 140 * visible error, but the process will be using the wrong lock!) 141 * 142 * On a fork(), the child process will inherit the mappings for free, but 143 * will not inherit the parent's lock ownership if any. The child should NOT 144 * do an explicit FREE on the lock context unless it did an explicit ALLOC. 145 * Only one process at a time is allowed to have a valid hat 146 * mapping to a lock page. This is enforced by this driver. 147 * A client acquires a lock by writing a '1' to the lock page. 148 * Note, that it is not necessary to read and veryify that the lock is '0' 149 * prior to writing a '1' in it. 150 * If it does not already have a valid mapping to that page, the driver 151 * takes a fault (devmap_access), loads the client mapping 152 * and allows the client to continue. The client releases the lock by 153 * writing a '0' to the unlock page. Again, if it does not have a valid 154 * mapping to the unlock page, the segment driver takes a fault, 155 * loads the mapping, and lets the client continue. From this point 156 * forward, the client can make as many locks and unlocks as it 157 * wants, without any more faults into the kernel. 158 * 159 * If a different process wants to acquire a lock, it takes a page fault 160 * when it writes the '1' to the lock page. If the segment driver sees 161 * that the lock page contained a zero, then it invalidates the owner's 162 * mappings and gives the mappings to this process. 163 * 164 * If there is already a '1' in the lock page when the second client 165 * tries to access the lock page, then a lock exists. The segment 166 * driver sleeps the second client and, if applicable, starts the 167 * timeout on the lock. The owner's mapping to the unlock page 168 * is invalidated so that the driver will be woken again when the owner 169 * releases the lock. 170 * 171 * When the locking client finally writes a '0' to the unlock page, the 172 * segment driver takes another fault. The client is given a valid 173 * mapping, not to the unlock page, but to the "trash page", and allowed 174 * to continue. Meanwhile, the sleeping client is given a valid mapping 175 * to the lock/unlock pages and allowed to continue as well. 176 * 177 * RFE: There is a leak if process exits before freeing allocated locks 178 * But currently not tracking which locks were allocated by which 179 * process and we do not have a clean entry point into the driver 180 * to do garbage collection. If the interface used a file descriptor for each 181 * lock it allocs, then the driver can free up stuff in the _close routine 182 */ 183 184 #include <sys/types.h> /* various type defn's */ 185 #include <sys/debug.h> 186 #include <sys/param.h> /* various kernel limits */ 187 #include <sys/time.h> 188 #include <sys/errno.h> 189 #include <sys/kmem.h> /* defines kmem_alloc() */ 190 #include <sys/conf.h> /* defines cdevsw */ 191 #include <sys/file.h> /* various file modes, etc. */ 192 #include <sys/uio.h> /* UIO stuff */ 193 #include <sys/ioctl.h> 194 #include <sys/cred.h> /* defines cred struct */ 195 #include <sys/mman.h> /* defines mmap(2) parameters */ 196 #include <sys/stat.h> /* defines S_IFCHR */ 197 #include <sys/cmn_err.h> /* use cmn_err */ 198 #include <sys/ddi.h> /* ddi stuff */ 199 #include <sys/sunddi.h> /* ddi stuff */ 200 #include <sys/ddi_impldefs.h> /* ddi stuff */ 201 #include <sys/winlockio.h> /* defines ioctls, flags, data structs */ 202 203 static int winlock_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 204 static int winlock_devmap(dev_t, devmap_cookie_t, offset_t, size_t, 205 size_t *, uint_t); 206 static int winlocksegmap(dev_t, off_t, struct as *, caddr_t *, off_t, 207 uint_t, uint_t, uint_t, cred_t *); 208 209 static struct cb_ops winlock_cb_ops = { 210 nulldev, /* open */ 211 nulldev, /* close */ 212 nodev, /* strategy */ 213 nodev, /* print */ 214 nodev, /* dump */ 215 nodev, /* read */ 216 nodev, /* write */ 217 winlock_ioctl, /* ioctl */ 218 winlock_devmap, /* devmap */ 219 nodev, /* mmap */ 220 winlocksegmap, /* segmap */ 221 nochpoll, /* poll */ 222 ddi_prop_op, /* prop_op */ 223 NULL, /* streamtab */ 224 D_NEW|D_MP|D_DEVMAP, /* Driver compatibility flag */ 225 0, /* rev */ 226 nodev, /* aread */ 227 nodev /* awrite */ 228 }; 229 230 static int winlock_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 231 static int winlock_attach(dev_info_t *, ddi_attach_cmd_t); 232 static int winlock_detach(dev_info_t *, ddi_detach_cmd_t); 233 234 static struct dev_ops winlock_ops = { 235 DEVO_REV, 236 0, /* refcount */ 237 winlock_info, /* info */ 238 nulldev, /* identify */ 239 nulldev, /* probe */ 240 winlock_attach, /* attach */ 241 winlock_detach, /* detach */ 242 nodev, /* reset */ 243 &winlock_cb_ops, /* driver ops */ 244 NULL, /* bus ops */ 245 NULL, /* power */ 246 ddi_quiesce_not_needed, /* quiesce */ 247 }; 248 249 static int winlockmap_map(devmap_cookie_t, dev_t, uint_t, offset_t, size_t, 250 void **); 251 static void winlockmap_unmap(devmap_cookie_t, void *, offset_t, size_t, 252 devmap_cookie_t, void **, devmap_cookie_t, void **); 253 static int winlockmap_dup(devmap_cookie_t, void *, 254 devmap_cookie_t, void **); 255 static int winlockmap_access(devmap_cookie_t, void *, offset_t, size_t, 256 uint_t, uint_t); 257 258 static 259 struct devmap_callback_ctl winlockmap_ops = { 260 DEVMAP_OPS_REV, 261 winlockmap_map, 262 winlockmap_access, 263 winlockmap_dup, 264 winlockmap_unmap, 265 }; 266 267 #if DEBUG 268 static int lock_debug = 0; 269 #define DEBUGF(level, args) { if (lock_debug >= (level)) cmn_err args; } 270 #else 271 #define DEBUGF(level, args) 272 #endif 273 274 /* Driver supports two styles of locks */ 275 enum winlock_style { NEWSTYLE_LOCK, OLDSTYLE_LOCK }; 276 277 /* 278 * These structures describe a lock context. We permit multiple 279 * clients (not just two) to access a lock page 280 * 281 * The "cookie" identifies the lock context. It is the page number portion 282 * sy_ident returned on lock allocation. Cookie is used in later ioctls. 283 * "cookie" is lockid * PAGESIZE 284 * "lockptr" is the kernel virtual address to the lock itself 285 * The page offset portion of lockptr is the page offset portion of sy_ident 286 */ 287 288 /* 289 * per-process information about locks. This is the private field of 290 * a devmap mapping. Note that usually *two* mappings point to this. 291 */ 292 293 /* 294 * Each process using winlock is associated with a segproc structure 295 * In various driver entry points, we need to search to find the right 296 * segproc structure (If we were using file handles for each lock this 297 * would not have been necessary). 298 * It would have been simple to use the process pid (and ddi_get_pid) 299 * However, during fork devmap_dup is called in the parent process context 300 * and using the pid complicates the code by introducing orphans. 301 * Instead we use the as pointer for the process as a cookie 302 * which requires delving into various non-DDI kosher structs 303 */ 304 typedef struct segproc { 305 struct segproc *next; /* next client of this lock */ 306 struct seglock *lp; /* associated lock context */ 307 devmap_cookie_t lockseg; /* lock mapping, if any */ 308 devmap_cookie_t unlockseg; /* unlock mapping, if any */ 309 void *tag; /* process as pointer as tag */ 310 uint_t flag; /* see "flag bits" in winlockio.h */ 311 } SegProc; 312 313 #define ID(sdp) ((sdp)->tag) 314 #define CURPROC_ID (void *)(curproc->p_as) 315 316 /* per lock context information */ 317 318 typedef struct seglock { 319 struct seglock *next; /* next lock */ 320 uint_t sleepers; /* nthreads sleeping on this lock */ 321 uint_t alloccount; /* how many times created? */ 322 uint_t cookie; /* mmap() offset (page #) into device */ 323 uint_t key; /* key, if any */ 324 enum winlock_style style; /* style of lock - OLDSTYLE, NEWSTYLE */ 325 clock_t timeout; /* sleep time in ticks */ 326 ddi_umem_cookie_t umem_cookie; /* cookie for umem allocated memory */ 327 int *lockptr; /* kernel virtual addr of lock */ 328 struct segproc *clients; /* list of clients of this lock */ 329 struct segproc *owner; /* current owner of lock */ 330 kmutex_t mutex; /* mutex for lock */ 331 kcondvar_t locksleep; /* for sleeping on lock */ 332 } SegLock; 333 334 #define LOCK(lp) (*((lp)->lockptr)) 335 336 /* 337 * Number of locks that can fit in a page. Driver can support only that many. 338 * For oldsytle locks, it is relatively easy to increase the limit as each 339 * is in a separate page (MAX_LOCKS mostly serves to prevent runaway allocation 340 * For newstyle locks, this is trickier as the code needs to allow for mapping 341 * into the second or third page of the cookie for some locks. 342 */ 343 #define MAX_LOCKS (PAGESIZE/sizeof (int)) 344 345 #define LOCKTIME 3 /* Default lock timeout in seconds */ 346 347 348 /* Protections setting for winlock user mappings */ 349 #define WINLOCK_PROT (PROT_READ|PROT_WRITE|PROT_USER) 350 351 /* 352 * The trash page is where unwanted writes go 353 * when a process is releasing a lock. 354 */ 355 static ddi_umem_cookie_t trashpage_cookie = NULL; 356 357 /* For newstyle allocations a common page of locks is used */ 358 static caddr_t lockpage = NULL; 359 static ddi_umem_cookie_t lockpage_cookie = NULL; 360 361 static dev_info_t *winlock_dip = NULL; 362 static kmutex_t winlock_mutex; 363 364 /* 365 * winlock_mutex protects 366 * lock_list 367 * lock_free_list 368 * "next" field in SegLock 369 * next_lock 370 * trashpage_cookie 371 * lockpage & lockpage_cookie 372 * 373 * SegLock_mutex protects 374 * rest of fields in SegLock 375 * All fields in list of SegProc (lp->clients) 376 * 377 * Lock ordering is winlock_mutex->SegLock_mutex 378 * During devmap/seg operations SegLock_mutex acquired without winlock_mutex 379 * 380 * During devmap callbacks, the pointer to SegProc is stored as the private 381 * data in the devmap handle. This pointer will not go stale (i.e., the 382 * SegProc getting deleted) as the SegProc is not deleted until both the 383 * lockseg and unlockseg have been unmapped and the pointers stored in 384 * the devmap handles have been NULL'ed. 385 * But before this pointer is used to access any fields (other than the 'lp') 386 * lp->mutex must be held. 387 */ 388 389 /* 390 * The allocation code tries to allocate from lock_free_list 391 * first, otherwise it uses kmem_zalloc. When lock list is idle, all 392 * locks in lock_free_list are kmem_freed 393 */ 394 static SegLock *lock_list = NULL; /* in-use locks */ 395 static SegLock *lock_free_list = NULL; /* free locks */ 396 static int next_lock = 0; /* next lock cookie */ 397 398 /* Routines to find a lock in lock_list based on offset or key */ 399 static SegLock *seglock_findlock(uint_t); 400 static SegLock *seglock_findkey(uint_t); 401 402 /* Routines to find and allocate SegProc structures */ 403 static SegProc *seglock_find_specific(SegLock *, void *); 404 static SegProc *seglock_alloc_specific(SegLock *, void *); 405 #define seglock_findclient(lp) seglock_find_specific((lp), CURPROC_ID) 406 #define seglock_allocclient(lp) seglock_alloc_specific((lp), CURPROC_ID) 407 408 /* Delete client from lock's client list */ 409 static void seglock_deleteclient(SegLock *, SegProc *); 410 static void garbage_collect_lock(SegLock *, SegProc *); 411 412 /* Create a new lock */ 413 static SegLock *seglock_createlock(enum winlock_style); 414 /* Destroy lock */ 415 static void seglock_destroylock(SegLock *); 416 static void lock_destroyall(void); 417 418 /* Helper functions in winlockmap_access */ 419 static int give_mapping(SegLock *, SegProc *, uint_t); 420 static int lock_giveup(SegLock *, int); 421 static int seglock_lockfault(devmap_cookie_t, SegProc *, SegLock *, uint_t); 422 423 /* routines called from ioctl */ 424 static int seglock_graballoc(intptr_t, enum winlock_style, int); 425 static int seglock_grabinfo(intptr_t, int); 426 static int seglock_grabfree(intptr_t, int); 427 static int seglock_gettimeout(intptr_t, int); 428 static int seglock_settimeout(intptr_t, int); 429 static void seglock_dump_all(void); 430 431 static int 432 winlock_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 433 { 434 DEBUGF(1, (CE_CONT, "winlock_attach, devi=%p, cmd=%d\n", 435 (void *)devi, (int)cmd)); 436 if (cmd != DDI_ATTACH) 437 return (DDI_FAILURE); 438 if (ddi_create_minor_node(devi, "winlock", S_IFCHR, 0, DDI_PSEUDO, 0) 439 == DDI_FAILURE) { 440 return (DDI_FAILURE); 441 } 442 winlock_dip = devi; 443 ddi_report_dev(devi); 444 return (DDI_SUCCESS); 445 } 446 447 /*ARGSUSED*/ 448 static int 449 winlock_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 450 { 451 DEBUGF(1, (CE_CONT, "winlock_detach, devi=%p, cmd=%d\n", 452 (void *)devi, (int)cmd)); 453 if (cmd != DDI_DETACH) 454 return (DDI_FAILURE); 455 456 mutex_enter(&winlock_mutex); 457 if (lock_list != NULL) { 458 mutex_exit(&winlock_mutex); 459 return (DDI_FAILURE); 460 } 461 ASSERT(lock_free_list == NULL); 462 463 DEBUGF(1, (CE_CONT, "detach freeing trashpage and lockpage\n")); 464 /* destroy any common stuff created */ 465 if (trashpage_cookie != NULL) { 466 ddi_umem_free(trashpage_cookie); 467 trashpage_cookie = NULL; 468 } 469 if (lockpage != NULL) { 470 ddi_umem_free(lockpage_cookie); 471 lockpage = NULL; 472 lockpage_cookie = NULL; 473 } 474 winlock_dip = NULL; 475 mutex_exit(&winlock_mutex); 476 return (DDI_SUCCESS); 477 } 478 479 /*ARGSUSED*/ 480 static int 481 winlock_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 482 { 483 register int error; 484 485 /* initialize result */ 486 *result = NULL; 487 488 /* only valid instance (i.e., getminor) is 0 */ 489 if (getminor((dev_t)arg) >= 1) 490 return (DDI_FAILURE); 491 492 switch (infocmd) { 493 case DDI_INFO_DEVT2DEVINFO: 494 if (winlock_dip == NULL) 495 error = DDI_FAILURE; 496 else { 497 *result = (void *)winlock_dip; 498 error = DDI_SUCCESS; 499 } 500 break; 501 case DDI_INFO_DEVT2INSTANCE: 502 *result = (void *)0; 503 error = DDI_SUCCESS; 504 break; 505 default: 506 error = DDI_FAILURE; 507 } 508 return (error); 509 } 510 511 512 /*ARGSUSED*/ 513 int 514 winlock_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 515 cred_t *cred, int *rval) 516 { 517 DEBUGF(1, (CE_CONT, "winlockioctl: cmd=%d, arg=0x%p\n", 518 cmd, (void *)arg)); 519 520 switch (cmd) { 521 /* 522 * ioctls that used to be handled by framebuffers (defined in fbio.h) 523 * RFE: No code really calls the GRAB* ioctls now. Should EOL. 524 */ 525 526 case GRABPAGEALLOC: 527 return (seglock_graballoc(arg, OLDSTYLE_LOCK, mode)); 528 case GRABPAGEFREE: 529 return (seglock_grabfree(arg, mode)); 530 case GRABLOCKINFO: 531 return (seglock_grabinfo(arg, mode)); 532 case GRABATTACH: 533 return (EINVAL); /* GRABATTACH is not supported (never was) */ 534 535 case WINLOCKALLOC: 536 return (seglock_graballoc(arg, NEWSTYLE_LOCK, mode)); 537 case WINLOCKFREE: 538 return (seglock_grabfree(arg, mode)); 539 case WINLOCKSETTIMEOUT: 540 return (seglock_settimeout(arg, mode)); 541 case WINLOCKGETTIMEOUT: 542 return (seglock_gettimeout(arg, mode)); 543 case WINLOCKDUMP: 544 seglock_dump_all(); 545 return (0); 546 547 #ifdef DEBUG 548 case (WIOC|255): 549 lock_debug = arg; 550 return (0); 551 #endif 552 553 default: 554 return (ENOTTY); /* Why is this not EINVAL */ 555 } 556 } 557 558 int 559 winlocksegmap( 560 dev_t dev, /* major:minor */ 561 off_t off, /* device offset from mmap(2) */ 562 struct as *as, /* user's address space. */ 563 caddr_t *addr, /* address from mmap(2) */ 564 off_t len, /* length from mmap(2) */ 565 uint_t prot, /* user wants this access */ 566 uint_t maxprot, /* this is the maximum the user can have */ 567 uint_t flags, /* flags from mmap(2) */ 568 cred_t *cred) 569 { 570 DEBUGF(1, (CE_CONT, "winlock_segmap off=%lx, len=0x%lx\n", off, len)); 571 572 /* Only MAP_SHARED mappings are supported */ 573 if ((flags & MAP_TYPE) == MAP_PRIVATE) { 574 return (EINVAL); 575 } 576 577 /* Use devmap_setup to setup the mapping */ 578 return (devmap_setup(dev, (offset_t)off, as, addr, (size_t)len, prot, 579 maxprot, flags, cred)); 580 } 581 582 /*ARGSUSED*/ 583 int 584 winlock_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len, 585 size_t *maplen, uint_t model) 586 { 587 SegLock *lp; 588 int err; 589 590 DEBUGF(1, (CE_CONT, "winlock devmap: off=%llx, len=%lx, dhp=%p\n", 591 off, len, (void *)dhp)); 592 593 *maplen = 0; 594 595 /* Check if the lock exists, i.e., has been created by alloc */ 596 /* off is the sy_ident returned in the alloc ioctl */ 597 if ((lp = seglock_findlock((uint_t)off)) == NULL) { 598 return (ENXIO); 599 } 600 601 /* 602 * The offset bits in mmap(2) offset has to be same as in lockptr 603 * OR the offset should be 0 (i.e. masked off) 604 */ 605 if (((off & PAGEOFFSET) != 0) && 606 ((off ^ (uintptr_t)(lp->lockptr)) & (offset_t)PAGEOFFSET) != 0) { 607 DEBUGF(2, (CE_CONT, 608 "mmap offset %llx mismatch with lockptr %p\n", 609 off, (void *)lp->lockptr)); 610 mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */ 611 return (EINVAL); 612 } 613 614 /* Only supports PAGESIZE length mappings */ 615 if (len != PAGESIZE) { 616 mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */ 617 return (EINVAL); 618 } 619 620 /* 621 * Set up devmap to point at page associated with lock 622 * RFE: At this point we dont know if this is a lockpage or unlockpage 623 * a lockpage would not need DEVMAP_ALLOW_REMAP setting 624 * We could have kept track of the mapping order here, 625 * but devmap framework does not support storing any state in this 626 * devmap callback as it does not callback for error cleanup if some 627 * other error happens in the framework. 628 * RFE: We should modify the winlock mmap interface so that the 629 * user process marks in the offset passed in whether this is for a 630 * lock or unlock mapping instead of guessing based on order of maps 631 * This would cleanup other things (such as in fork) 632 */ 633 if ((err = devmap_umem_setup(dhp, winlock_dip, &winlockmap_ops, 634 lp->umem_cookie, 0, PAGESIZE, WINLOCK_PROT, 635 DEVMAP_ALLOW_REMAP, 0)) < 0) { 636 mutex_exit(&lp->mutex); /* held by seglock_findlock */ 637 return (err); 638 } 639 /* 640 * No mappings are loaded to those segments yet. The correctness 641 * of the winlock semantics depends on the devmap framework/seg_dev NOT 642 * loading the translations without calling _access callback. 643 */ 644 645 mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */ 646 *maplen = PAGESIZE; 647 return (0); 648 } 649 650 /* 651 * This routine is called by the devmap framework after the devmap entry point 652 * above and the mapping is setup in seg_dev. 653 * We store the pointer to the per-process context in the devmap private data. 654 */ 655 /*ARGSUSED*/ 656 static int 657 winlockmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off, 658 size_t len, void **pvtp) 659 { 660 SegLock *lp = seglock_findlock((uint_t)off); /* returns w/ mutex held */ 661 SegProc *sdp; 662 663 ASSERT(len == PAGESIZE); 664 665 /* Find the per-process context for this lock, alloc one if not found */ 666 sdp = seglock_allocclient(lp); 667 668 /* 669 * RFE: Determining which is a lock vs unlock seg is based on order 670 * of mmaps, we should change that to be derivable from off 671 */ 672 if (sdp->lockseg == NULL) { 673 sdp->lockseg = dhp; 674 } else if (sdp->unlockseg == NULL) { 675 sdp->unlockseg = dhp; 676 } else { 677 /* attempting to map lock more than twice */ 678 mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */ 679 return (ENOMEM); 680 } 681 682 *pvtp = sdp; 683 mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */ 684 return (DDI_SUCCESS); 685 } 686 687 /* 688 * duplicate a segment, as in fork() 689 * On fork, the child inherits the mappings to the lock 690 * lp->alloccount is NOT incremented, so child should not do a free(). 691 * Semantics same as if done an alloc(), map(), map(). 692 * This way it would work fine if doing an exec() variant later 693 * Child does not inherit any UFLAGS set in parent 694 * The lock and unlock pages are started off unmapped, i.e., child does not 695 * own the lock. 696 * The code assumes that the child process has a valid pid at this point 697 * RFE: This semantics depends on fork not duplicating the hat mappings 698 * (which is the current implementation). To enforce it would need to 699 * call devmap_unload from here - not clear if that is allowed. 700 */ 701 702 static int 703 winlockmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp, 704 void **newpvt) 705 { 706 SegProc *sdp = (SegProc *)oldpvt; 707 SegProc *ndp; 708 SegLock *lp = sdp->lp; 709 710 mutex_enter(&lp->mutex); 711 ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg)); 712 713 /* 714 * Note: At this point, the child process does have a pid, but 715 * the arguments passed to as_dup and hence to devmap_dup dont pass it 716 * down. So we cannot use normal seglock_findclient - which finds the 717 * parent sdp itself! 718 * Instead we allocate the child's SegProc by using the child as pointer 719 * RFE: we are using the as stucture which means peeking into the 720 * devmap_cookie. This is not DDI-compliant. Need a compliant way of 721 * getting at either the as or, better, a way to get the child's new pid 722 */ 723 ndp = seglock_alloc_specific(lp, 724 (void *)((devmap_handle_t *)new_dhp)->dh_seg->s_as); 725 ASSERT(ndp != sdp); 726 727 if (sdp->lockseg == dhp) { 728 ASSERT(ndp->lockseg == NULL); 729 ndp->lockseg = new_dhp; 730 } else { 731 ASSERT(sdp->unlockseg == dhp); 732 ASSERT(ndp->unlockseg == NULL); 733 ndp->unlockseg = new_dhp; 734 if (sdp->flag & TRASHPAGE) { 735 ndp->flag |= TRASHPAGE; 736 } 737 } 738 mutex_exit(&lp->mutex); 739 *newpvt = (void *)ndp; 740 return (0); 741 } 742 743 744 /*ARGSUSED*/ 745 static void 746 winlockmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len, 747 devmap_cookie_t new_dhp1, void **newpvtp1, 748 devmap_cookie_t new_dhp2, void **newpvtp2) 749 { 750 SegProc *sdp = (SegProc *)pvtp; 751 SegLock *lp = sdp->lp; 752 753 /* 754 * We always create PAGESIZE length mappings, so there should never 755 * be a partial unmapping case 756 */ 757 ASSERT((new_dhp1 == NULL) && (new_dhp2 == NULL)); 758 759 mutex_enter(&lp->mutex); 760 ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg)); 761 /* make sure this process doesn't own the lock */ 762 if (sdp == lp->owner) { 763 /* 764 * Not handling errors - i.e., errors in unloading mapping 765 * As part of unmapping hat/seg structure get torn down anyway 766 */ 767 (void) lock_giveup(lp, 0); 768 } 769 770 ASSERT(sdp != lp->owner); 771 if (sdp->lockseg == dhp) { 772 sdp->lockseg = NULL; 773 } else { 774 ASSERT(sdp->unlockseg == dhp); 775 sdp->unlockseg = NULL; 776 sdp->flag &= ~TRASHPAGE; /* clear flag if set */ 777 } 778 779 garbage_collect_lock(lp, sdp); 780 } 781 782 /*ARGSUSED*/ 783 static int 784 winlockmap_access(devmap_cookie_t dhp, void *pvt, offset_t off, size_t len, 785 uint_t type, uint_t rw) 786 { 787 SegProc *sdp = (SegProc *)pvt; 788 SegLock *lp = sdp->lp; 789 int err; 790 791 /* Driver handles only DEVMAP_ACCESS type of faults */ 792 if (type != DEVMAP_ACCESS) 793 return (-1); 794 795 mutex_enter(&lp->mutex); 796 ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg)); 797 798 /* should be using a SegProc that corresponds to current process */ 799 ASSERT(ID(sdp) == CURPROC_ID); 800 801 /* 802 * If process is faulting but does not have both segments mapped 803 * return error (should cause a segv). 804 * RFE: could give it a permanent trashpage 805 */ 806 if ((sdp->lockseg == NULL) || (sdp->unlockseg == NULL)) { 807 err = -1; 808 } else { 809 err = seglock_lockfault(dhp, sdp, lp, rw); 810 } 811 mutex_exit(&lp->mutex); 812 return (err); 813 } 814 815 /* INTERNAL ROUTINES START HERE */ 816 817 818 819 /* 820 * search the lock_list list for the specified cookie 821 * The cookie is the sy_ident field returns by ALLOC ioctl. 822 * This has two parts: 823 * the pageoffset bits contain offset into the lock page. 824 * the pagenumber bits contain the lock id. 825 * The user code is supposed to pass in only the pagenumber portion 826 * (i.e. mask off the pageoffset bits). However the code below 827 * does the mask in case the users are not diligent 828 * if found, returns with mutex for SegLock structure held 829 */ 830 static SegLock * 831 seglock_findlock(uint_t cookie) 832 { 833 SegLock *lp; 834 835 cookie &= (uint_t)PAGEMASK; /* remove pageoffset bits to get cookie */ 836 mutex_enter(&winlock_mutex); 837 for (lp = lock_list; lp != NULL; lp = lp->next) { 838 mutex_enter(&lp->mutex); 839 if (cookie == lp->cookie) { 840 break; /* return with lp->mutex held */ 841 } 842 mutex_exit(&lp->mutex); 843 } 844 mutex_exit(&winlock_mutex); 845 return (lp); 846 } 847 848 /* 849 * search the lock_list list for the specified non-zero key 850 * if found, returns with lock for SegLock structure held 851 */ 852 static SegLock * 853 seglock_findkey(uint_t key) 854 { 855 SegLock *lp; 856 857 ASSERT(MUTEX_HELD(&winlock_mutex)); 858 /* The driver allows multiple locks with key 0, dont search */ 859 if (key == 0) 860 return (NULL); 861 for (lp = lock_list; lp != NULL; lp = lp->next) { 862 mutex_enter(&lp->mutex); 863 if (key == lp->key) 864 break; 865 mutex_exit(&lp->mutex); 866 } 867 return (lp); 868 } 869 870 /* 871 * Create a new lock context. 872 * Returns with SegLock mutex held 873 */ 874 875 static SegLock * 876 seglock_createlock(enum winlock_style style) 877 { 878 SegLock *lp; 879 880 DEBUGF(3, (CE_CONT, "seglock_createlock: free_list=%p, next_lock %d\n", 881 (void *)lock_free_list, next_lock)); 882 883 ASSERT(MUTEX_HELD(&winlock_mutex)); 884 if (lock_free_list != NULL) { 885 lp = lock_free_list; 886 lock_free_list = lp->next; 887 } else if (next_lock >= MAX_LOCKS) { 888 return (NULL); 889 } else { 890 lp = kmem_zalloc(sizeof (SegLock), KM_SLEEP); 891 lp->cookie = (next_lock + 1) * (uint_t)PAGESIZE; 892 mutex_init(&lp->mutex, NULL, MUTEX_DEFAULT, NULL); 893 cv_init(&lp->locksleep, NULL, CV_DEFAULT, NULL); 894 ++next_lock; 895 } 896 897 mutex_enter(&lp->mutex); 898 ASSERT((lp->cookie/PAGESIZE) <= next_lock); 899 900 if (style == OLDSTYLE_LOCK) { 901 lp->lockptr = (int *)ddi_umem_alloc(PAGESIZE, 902 DDI_UMEM_SLEEP, &(lp->umem_cookie)); 903 } else { 904 lp->lockptr = ((int *)lockpage) + ((lp->cookie/PAGESIZE) - 1); 905 lp->umem_cookie = lockpage_cookie; 906 } 907 908 ASSERT(lp->lockptr != NULL); 909 lp->style = style; 910 lp->sleepers = 0; 911 lp->alloccount = 1; 912 lp->timeout = LOCKTIME*hz; 913 lp->clients = NULL; 914 lp->owner = NULL; 915 LOCK(lp) = 0; 916 lp->next = lock_list; 917 lock_list = lp; 918 return (lp); 919 } 920 921 /* 922 * Routine to destory a lock structure. 923 * This routine is called while holding the lp->mutex but not the 924 * winlock_mutex. 925 */ 926 927 static void 928 seglock_destroylock(SegLock *lp) 929 { 930 ASSERT(MUTEX_HELD(&lp->mutex)); 931 ASSERT(!MUTEX_HELD(&winlock_mutex)); 932 933 DEBUGF(3, (CE_CONT, "destroying lock cookie %d key %d\n", 934 lp->cookie, lp->key)); 935 936 ASSERT(lp->alloccount == 0); 937 ASSERT(lp->clients == NULL); 938 ASSERT(lp->owner == NULL); 939 ASSERT(lp->sleepers == 0); 940 941 /* clean up/release fields in lp */ 942 if (lp->style == OLDSTYLE_LOCK) { 943 ddi_umem_free(lp->umem_cookie); 944 } 945 lp->umem_cookie = NULL; 946 lp->lockptr = NULL; 947 lp->key = 0; 948 949 /* 950 * Reduce cookie by 1, makes it non page-aligned and invalid 951 * This prevents any valid lookup from finding this lock 952 * so when we drop the lock and regrab it it will still 953 * be there and nobody else would have attached to it 954 */ 955 lp->cookie--; 956 957 /* Drop and reacquire mutexes in right order */ 958 mutex_exit(&lp->mutex); 959 mutex_enter(&winlock_mutex); 960 mutex_enter(&lp->mutex); 961 962 /* reincrement the cookie to get the original valid cookie */ 963 lp->cookie++; 964 ASSERT((lp->cookie & PAGEOFFSET) == 0); 965 ASSERT(lp->alloccount == 0); 966 ASSERT(lp->clients == NULL); 967 ASSERT(lp->owner == NULL); 968 ASSERT(lp->sleepers == 0); 969 970 /* Remove lp from lock_list */ 971 if (lock_list == lp) { 972 lock_list = lp->next; 973 } else { 974 SegLock *tmp = lock_list; 975 while (tmp->next != lp) { 976 tmp = tmp->next; 977 ASSERT(tmp != NULL); 978 } 979 tmp->next = lp->next; 980 } 981 982 /* Add to lock_free_list */ 983 lp->next = lock_free_list; 984 lock_free_list = lp; 985 mutex_exit(&lp->mutex); 986 987 /* Check if all locks deleted and cleanup */ 988 if (lock_list == NULL) { 989 lock_destroyall(); 990 } 991 992 mutex_exit(&winlock_mutex); 993 } 994 995 /* Routine to find a SegProc corresponding to the tag */ 996 997 static SegProc * 998 seglock_find_specific(SegLock *lp, void *tag) 999 { 1000 SegProc *sdp; 1001 1002 ASSERT(MUTEX_HELD(&lp->mutex)); 1003 ASSERT(tag != NULL); 1004 for (sdp = lp->clients; sdp != NULL; sdp = sdp->next) { 1005 if (ID(sdp) == tag) 1006 break; 1007 } 1008 return (sdp); 1009 } 1010 1011 /* Routine to find (and if needed allocate) a SegProc corresponding to tag */ 1012 1013 static SegProc * 1014 seglock_alloc_specific(SegLock *lp, void *tag) 1015 { 1016 SegProc *sdp; 1017 1018 ASSERT(MUTEX_HELD(&lp->mutex)); 1019 ASSERT(tag != NULL); 1020 1021 /* Search and return if existing one found */ 1022 sdp = seglock_find_specific(lp, tag); 1023 if (sdp != NULL) 1024 return (sdp); 1025 1026 DEBUGF(3, (CE_CONT, "Allocating segproc structure for tag %p lock %d\n", 1027 tag, lp->cookie)); 1028 1029 /* Allocate a new SegProc */ 1030 sdp = kmem_zalloc(sizeof (SegProc), KM_SLEEP); 1031 sdp->next = lp->clients; 1032 lp->clients = sdp; 1033 sdp->lp = lp; 1034 ID(sdp) = tag; 1035 return (sdp); 1036 } 1037 1038 /* 1039 * search a context's client list for the given client and delete 1040 */ 1041 1042 static void 1043 seglock_deleteclient(SegLock *lp, SegProc *sdp) 1044 { 1045 ASSERT(MUTEX_HELD(&lp->mutex)); 1046 ASSERT(lp->owner != sdp); /* Not current owner of lock */ 1047 ASSERT(sdp->lockseg == NULL); /* Mappings torn down */ 1048 ASSERT(sdp->unlockseg == NULL); 1049 1050 DEBUGF(3, (CE_CONT, "Deleting segproc structure for pid %d lock %d\n", 1051 ddi_get_pid(), lp->cookie)); 1052 if (lp->clients == sdp) { 1053 lp->clients = sdp->next; 1054 } else { 1055 SegProc *tmp = lp->clients; 1056 while (tmp->next != sdp) { 1057 tmp = tmp->next; 1058 ASSERT(tmp != NULL); 1059 } 1060 tmp->next = sdp->next; 1061 } 1062 kmem_free(sdp, sizeof (SegProc)); 1063 } 1064 1065 /* 1066 * Routine to verify if a SegProc and SegLock 1067 * structures are empty/idle. 1068 * Destroys the structures if they are ready 1069 * Can be called with sdp == NULL if want to verify only the lock state 1070 * caller should hold the lp->mutex 1071 * and this routine drops the mutex 1072 */ 1073 static void 1074 garbage_collect_lock(SegLock *lp, SegProc *sdp) 1075 { 1076 ASSERT(MUTEX_HELD(&lp->mutex)); 1077 /* see if both segments unmapped from client structure */ 1078 if ((sdp != NULL) && (sdp->lockseg == NULL) && (sdp->unlockseg == NULL)) 1079 seglock_deleteclient(lp, sdp); 1080 1081 /* see if this is last client in the entire lock context */ 1082 if ((lp->clients == NULL) && (lp->alloccount == 0)) { 1083 seglock_destroylock(lp); 1084 } else { 1085 mutex_exit(&lp->mutex); 1086 } 1087 } 1088 1089 1090 /* IOCTLS START HERE */ 1091 1092 static int 1093 seglock_grabinfo(intptr_t arg, int mode) 1094 { 1095 int i = 1; 1096 1097 /* multiple clients per lock supported - see comments up top */ 1098 if (ddi_copyout((caddr_t)&i, (caddr_t)arg, sizeof (int), mode) != 0) 1099 return (EFAULT); 1100 return (0); 1101 } 1102 1103 static int 1104 seglock_graballoc(intptr_t arg, enum winlock_style style, int mode) /* IOCTL */ 1105 { 1106 struct seglock *lp; 1107 uint_t key; 1108 struct winlockalloc wla; 1109 int err; 1110 1111 if (style == OLDSTYLE_LOCK) { 1112 key = 0; 1113 } else { 1114 if (ddi_copyin((caddr_t)arg, (caddr_t)&wla, sizeof (wla), 1115 mode)) { 1116 return (EFAULT); 1117 } 1118 key = wla.sy_key; 1119 } 1120 1121 DEBUGF(3, (CE_CONT, 1122 "seglock_graballoc: key=%u, style=%d\n", key, style)); 1123 1124 mutex_enter(&winlock_mutex); 1125 /* Allocate lockpage on first new style alloc */ 1126 if ((lockpage == NULL) && (style == NEWSTYLE_LOCK)) { 1127 lockpage = ddi_umem_alloc(PAGESIZE, DDI_UMEM_SLEEP, 1128 &lockpage_cookie); 1129 } 1130 1131 /* Allocate trashpage on first alloc (any style) */ 1132 if (trashpage_cookie == NULL) { 1133 (void) ddi_umem_alloc(PAGESIZE, DDI_UMEM_TRASH | DDI_UMEM_SLEEP, 1134 &trashpage_cookie); 1135 } 1136 1137 if ((lp = seglock_findkey(key)) != NULL) { 1138 DEBUGF(2, (CE_CONT, "alloc: found lock key %d cookie %d\n", 1139 key, lp->cookie)); 1140 ++lp->alloccount; 1141 } else if ((lp = seglock_createlock(style)) != NULL) { 1142 DEBUGF(2, (CE_CONT, "alloc: created lock key %d cookie %d\n", 1143 key, lp->cookie)); 1144 lp->key = key; 1145 } else { 1146 DEBUGF(2, (CE_CONT, "alloc: cannot create lock key %d\n", key)); 1147 mutex_exit(&winlock_mutex); 1148 return (ENOMEM); 1149 } 1150 ASSERT((lp != NULL) && MUTEX_HELD(&lp->mutex)); 1151 1152 mutex_exit(&winlock_mutex); 1153 1154 if (style == OLDSTYLE_LOCK) { 1155 err = ddi_copyout((caddr_t)&lp->cookie, (caddr_t)arg, 1156 sizeof (lp->cookie), mode); 1157 } else { 1158 wla.sy_ident = lp->cookie + 1159 (uint_t)((uintptr_t)(lp->lockptr) & PAGEOFFSET); 1160 err = ddi_copyout((caddr_t)&wla, (caddr_t)arg, 1161 sizeof (wla), mode); 1162 } 1163 1164 if (err) { 1165 /* On error, should undo allocation */ 1166 lp->alloccount--; 1167 1168 /* Verify and delete if lock is unused now */ 1169 garbage_collect_lock(lp, NULL); 1170 return (EFAULT); 1171 } 1172 1173 mutex_exit(&lp->mutex); 1174 return (0); 1175 } 1176 1177 static int 1178 seglock_grabfree(intptr_t arg, int mode) /* IOCTL */ 1179 { 1180 struct seglock *lp; 1181 uint_t offset; 1182 1183 if (ddi_copyin((caddr_t)arg, &offset, sizeof (offset), mode) 1184 != 0) { 1185 return (EFAULT); 1186 } 1187 DEBUGF(2, (CE_CONT, "seglock_grabfree: offset=%u", offset)); 1188 1189 if ((lp = seglock_findlock(offset)) == NULL) { 1190 DEBUGF(2, (CE_CONT, "did not find lock\n")); 1191 return (EINVAL); 1192 } 1193 DEBUGF(3, (CE_CONT, " lock key %d, cookie %d, alloccount %d\n", 1194 lp->key, lp->cookie, lp->alloccount)); 1195 1196 if (lp->alloccount > 0) 1197 lp->alloccount--; 1198 1199 /* Verify and delete if lock is unused now */ 1200 garbage_collect_lock(lp, NULL); 1201 return (0); 1202 } 1203 1204 1205 /* 1206 * Sets timeout in lock and UFLAGS in client 1207 * the UFLAGS are stored in the client structure and persistent only 1208 * till the unmap of the lock pages. If the process sets UFLAGS 1209 * does a map of the lock/unlock pages and unmaps them, the client 1210 * structure will get deleted and the UFLAGS will be lost. The process 1211 * will need to resetup the flags. 1212 */ 1213 static int 1214 seglock_settimeout(intptr_t arg, int mode) /* IOCTL */ 1215 { 1216 SegLock *lp; 1217 SegProc *sdp; 1218 struct winlocktimeout wlt; 1219 1220 if (ddi_copyin((caddr_t)arg, &wlt, sizeof (wlt), mode) != 0) { 1221 return (EFAULT); 1222 } 1223 1224 if ((lp = seglock_findlock(wlt.sy_ident)) == NULL) 1225 return (EINVAL); 1226 1227 lp->timeout = MSEC_TO_TICK_ROUNDUP(wlt.sy_timeout); 1228 /* if timeout modified, wake up any sleepers */ 1229 if (lp->sleepers > 0) { 1230 cv_broadcast(&lp->locksleep); 1231 } 1232 1233 /* 1234 * If the process is trying to set UFLAGS, 1235 * Find the client segproc and allocate one if needed 1236 * Set the flags preserving the kernel flags 1237 * If the process is clearing UFLAGS 1238 * Find the client segproc but dont allocate one if does not exist 1239 */ 1240 if (wlt.sy_flags & UFLAGS) { 1241 sdp = seglock_allocclient(lp); 1242 sdp->flag = sdp->flag & KFLAGS | wlt.sy_flags & UFLAGS; 1243 } else if ((sdp = seglock_findclient(lp)) != NULL) { 1244 sdp->flag = sdp->flag & KFLAGS; 1245 /* If clearing UFLAGS leaves the segment or lock idle, delete */ 1246 garbage_collect_lock(lp, sdp); 1247 return (0); 1248 } 1249 mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */ 1250 return (0); 1251 } 1252 1253 static int 1254 seglock_gettimeout(intptr_t arg, int mode) 1255 { 1256 SegLock *lp; 1257 SegProc *sdp; 1258 struct winlocktimeout wlt; 1259 1260 if (ddi_copyin((caddr_t)arg, &wlt, sizeof (wlt), mode) != 0) 1261 return (EFAULT); 1262 1263 if ((lp = seglock_findlock(wlt.sy_ident)) == NULL) 1264 return (EINVAL); 1265 1266 wlt.sy_timeout = TICK_TO_MSEC(lp->timeout); 1267 /* 1268 * If this process has an active allocated lock return those flags 1269 * Dont allocate a client structure on gettimeout 1270 * If not, return 0. 1271 */ 1272 if ((sdp = seglock_findclient(lp)) != NULL) { 1273 wlt.sy_flags = sdp->flag & UFLAGS; 1274 } else { 1275 wlt.sy_flags = 0; 1276 } 1277 mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */ 1278 1279 if (ddi_copyout(&wlt, (caddr_t)arg, sizeof (wlt), mode) != 0) 1280 return (EFAULT); 1281 1282 return (0); 1283 } 1284 1285 /* 1286 * Handle lock segment faults here... 1287 * 1288 * This is where the magic happens. 1289 */ 1290 1291 /* ARGSUSED */ 1292 static int 1293 seglock_lockfault(devmap_cookie_t dhp, SegProc *sdp, SegLock *lp, uint_t rw) 1294 { 1295 SegProc *owner = lp->owner; 1296 int err; 1297 1298 ASSERT(MUTEX_HELD(&lp->mutex)); 1299 DEBUGF(3, (CE_CONT, 1300 "seglock_lockfault: hdl=%p, sdp=%p, lp=%p owner=%p\n", 1301 (void *)dhp, (void *)sdp, (void *)lp, (void *)owner)); 1302 1303 /* lockfault is always called with sdp in current process context */ 1304 ASSERT(ID(sdp) == CURPROC_ID); 1305 1306 /* If Lock has no current owner, give the mapping to new owner */ 1307 if (owner == NULL) { 1308 DEBUGF(4, (CE_CONT, " lock has no current owner\n")); 1309 return (give_mapping(lp, sdp, rw)); 1310 } 1311 1312 if (owner == sdp) { 1313 /* 1314 * Current owner is faulting on owned lock segment OR 1315 * Current owner is faulting on unlock page and has no waiters 1316 * Then can give the mapping to current owner 1317 */ 1318 if ((sdp->lockseg == dhp) || (lp->sleepers == 0)) { 1319 DEBUGF(4, (CE_CONT, "lock owner faulting\n")); 1320 return (give_mapping(lp, sdp, rw)); 1321 } else { 1322 /* 1323 * Owner must be writing to unlock page and there are waiters. 1324 * other cases have been checked earlier. 1325 * Release the lock, owner, and owners mappings 1326 * As the owner is trying to write to the unlock page, leave 1327 * it with a trashpage mapping and wake up the sleepers 1328 */ 1329 ASSERT((dhp == sdp->unlockseg) && (lp->sleepers != 0)); 1330 DEBUGF(4, (CE_CONT, " owner fault on unlock seg w/ sleeper\n")); 1331 return (lock_giveup(lp, 1)); 1332 } 1333 } 1334 1335 ASSERT(owner != sdp); 1336 1337 /* 1338 * If old owner faulting on trash unlock mapping, 1339 * load hat mappings to trash page 1340 * RFE: non-owners should NOT be faulting on unlock mapping as they 1341 * as first supposed to fault on the lock seg. We could give them 1342 * a trash page or return error. 1343 */ 1344 if ((sdp->unlockseg == dhp) && (sdp->flag & TRASHPAGE)) { 1345 DEBUGF(4, (CE_CONT, " old owner reloads trash mapping\n")); 1346 return (devmap_load(sdp->unlockseg, lp->cookie, PAGESIZE, 1347 DEVMAP_ACCESS, rw)); 1348 } 1349 1350 /* 1351 * Non-owner faulting. Need to check current LOCK state. 1352 * 1353 * Before reading lock value in LOCK(lp), we must make sure that 1354 * the owner cannot change its value before we change mappings 1355 * or else we could end up either with a hung process 1356 * or more than one process thinking they have the lock. 1357 * We do that by unloading the owner's mappings 1358 */ 1359 DEBUGF(4, (CE_CONT, " owner loses mappings to check lock state\n")); 1360 err = devmap_unload(owner->lockseg, lp->cookie, PAGESIZE); 1361 err |= devmap_unload(owner->unlockseg, lp->cookie, PAGESIZE); 1362 if (err != 0) 1363 return (err); /* unable to remove owner mapping */ 1364 1365 /* 1366 * If lock is not held, then current owner mappings were 1367 * unloaded above and we can give the lock to the new owner 1368 */ 1369 if (LOCK(lp) == 0) { 1370 DEBUGF(4, (CE_CONT, 1371 "Free lock (%p): Giving mapping to new owner %d\n", 1372 (void *)lp, ddi_get_pid())); 1373 return (give_mapping(lp, sdp, rw)); 1374 } 1375 1376 DEBUGF(4, (CE_CONT, " lock held, sleeping\n")); 1377 1378 /* 1379 * A non-owning process tried to write (presumably to the lockpage, 1380 * but it doesn't matter) but the lock is held; we need to sleep for 1381 * the lock while there is an owner. 1382 */ 1383 1384 lp->sleepers++; 1385 while ((owner = lp->owner) != NULL) { 1386 int rval; 1387 1388 if ((lp->timeout == 0) || (owner->flag & SY_NOTIMEOUT)) { 1389 /* 1390 * No timeout has been specified for this lock; 1391 * we'll simply sleep on the condition variable. 1392 */ 1393 rval = cv_wait_sig(&lp->locksleep, &lp->mutex); 1394 } else { 1395 /* 1396 * A timeout _has_ been specified for this lock. We need 1397 * to wake up and possibly steal this lock if the owner 1398 * does not let it go. Note that all sleepers on a lock 1399 * with a timeout wait; the sleeper with the earliest 1400 * timeout will wakeup, and potentially steal the lock 1401 * Stealing the lock will cause a broadcast on the 1402 * locksleep cv and thus kick the other timed waiters 1403 * and cause everyone to restart in a new timedwait 1404 */ 1405 rval = cv_reltimedwait_sig(&lp->locksleep, 1406 &lp->mutex, lp->timeout, TR_CLOCK_TICK); 1407 } 1408 1409 /* 1410 * Timeout and still old owner - steal lock 1411 * Force-Release lock and give old owner a trashpage mapping 1412 */ 1413 if ((rval == -1) && (lp->owner == owner)) { 1414 /* 1415 * if any errors in lock_giveup, go back and sleep/retry 1416 * If successful, will break out of loop 1417 */ 1418 cmn_err(CE_NOTE, "Process %d timed out on lock %d\n", 1419 ddi_get_pid(), lp->cookie); 1420 (void) lock_giveup(lp, 1); 1421 } else if (rval == 0) { /* signal pending */ 1422 cmn_err(CE_NOTE, 1423 "Process %d signalled while waiting on lock %d\n", 1424 ddi_get_pid(), lp->cookie); 1425 lp->sleepers--; 1426 return (FC_MAKE_ERR(EINTR)); 1427 } 1428 } 1429 1430 lp->sleepers--; 1431 /* 1432 * Give mapping to this process and save a fault later 1433 */ 1434 return (give_mapping(lp, sdp, rw)); 1435 } 1436 1437 /* 1438 * Utility: give a valid mapping to lock and unlock pages to current process. 1439 * Caller responsible for unloading old owner's mappings 1440 */ 1441 1442 static int 1443 give_mapping(SegLock *lp, SegProc *sdp, uint_t rw) 1444 { 1445 int err = 0; 1446 1447 ASSERT(MUTEX_HELD(&lp->mutex)); 1448 ASSERT(!((lp->owner == NULL) && (LOCK(lp) != 0))); 1449 /* give_mapping is always called with sdp in current process context */ 1450 ASSERT(ID(sdp) == CURPROC_ID); 1451 1452 /* remap any old trash mappings */ 1453 if (sdp->flag & TRASHPAGE) { 1454 /* current owner should not have a trash mapping */ 1455 ASSERT(sdp != lp->owner); 1456 1457 DEBUGF(4, (CE_CONT, 1458 "new owner %d remapping old trash mapping\n", 1459 ddi_get_pid())); 1460 if ((err = devmap_umem_remap(sdp->unlockseg, winlock_dip, 1461 lp->umem_cookie, 0, PAGESIZE, WINLOCK_PROT, 0, 0)) != 0) { 1462 /* 1463 * unable to remap old trash page, 1464 * abort before changing owner 1465 */ 1466 DEBUGF(4, (CE_CONT, 1467 "aborting: error in umem_remap %d\n", err)); 1468 return (err); 1469 } 1470 sdp->flag &= ~TRASHPAGE; 1471 } 1472 1473 /* we have a new owner now */ 1474 lp->owner = sdp; 1475 1476 if ((err = devmap_load(sdp->lockseg, lp->cookie, PAGESIZE, 1477 DEVMAP_ACCESS, rw)) != 0) { 1478 return (err); 1479 } 1480 DEBUGF(4, (CE_CONT, "new owner %d gets lock mapping", ddi_get_pid())); 1481 1482 if (lp->sleepers) { 1483 /* Force unload unlock mapping if there are waiters */ 1484 DEBUGF(4, (CE_CONT, 1485 " lock has %d sleepers => remove unlock mapping\n", 1486 lp->sleepers)); 1487 err = devmap_unload(sdp->unlockseg, lp->cookie, PAGESIZE); 1488 } else { 1489 /* 1490 * while here, give new owner a valid mapping to unlock 1491 * page so we don't get called again. 1492 */ 1493 DEBUGF(4, (CE_CONT, " and unlock mapping\n")); 1494 err = devmap_load(sdp->unlockseg, lp->cookie, PAGESIZE, 1495 DEVMAP_ACCESS, PROT_WRITE); 1496 } 1497 return (err); 1498 } 1499 1500 /* 1501 * Unload owner's mappings, release the lock and wakeup any sleepers 1502 * If trash, then the old owner is given a trash mapping 1503 * => old owner held lock too long and caused a timeout 1504 */ 1505 static int 1506 lock_giveup(SegLock *lp, int trash) 1507 { 1508 SegProc *owner = lp->owner; 1509 1510 DEBUGF(4, (CE_CONT, "winlock_giveup: lp=%p, owner=%p, trash %d\n", 1511 (void *)lp, (void *)ID(lp->owner), trash)); 1512 1513 ASSERT(MUTEX_HELD(&lp->mutex)); 1514 ASSERT(owner != NULL); 1515 1516 /* 1517 * owner loses lockpage/unlockpage mappings and gains a 1518 * trashpage mapping, if needed. 1519 */ 1520 if (!trash) { 1521 /* 1522 * We do not handle errors in devmap_unload in the !trash case, 1523 * as the process is attempting to unmap/exit or otherwise 1524 * release the lock. Errors in unloading the mapping are not 1525 * going to affect that (unmap does not take error return). 1526 */ 1527 (void) devmap_unload(owner->lockseg, lp->cookie, PAGESIZE); 1528 (void) devmap_unload(owner->unlockseg, lp->cookie, PAGESIZE); 1529 } else { 1530 int err; 1531 1532 if (err = devmap_unload(owner->lockseg, lp->cookie, PAGESIZE)) { 1533 /* error unloading lockseg mapping. abort giveup */ 1534 return (err); 1535 } 1536 1537 /* 1538 * old owner gets mapping to trash page so it can continue 1539 * devmap_umem_remap does a hat_unload (and does it holding 1540 * the right locks), so no need to devmap_unload on unlockseg 1541 */ 1542 if ((err = devmap_umem_remap(owner->unlockseg, winlock_dip, 1543 trashpage_cookie, 0, PAGESIZE, WINLOCK_PROT, 0, 0)) != 0) { 1544 /* error remapping to trash page, abort giveup */ 1545 return (err); 1546 } 1547 owner->flag |= TRASHPAGE; 1548 /* 1549 * Preload mapping to trash page by calling devmap_load 1550 * However, devmap_load can only be called on the faulting 1551 * process context and not on the owner's process context 1552 * we preload only if we happen to be in owner process context 1553 * Other processes will fault on the unlock mapping 1554 * and be given a trash mapping at that time. 1555 */ 1556 if (ID(owner) == CURPROC_ID) { 1557 (void) devmap_load(owner->unlockseg, lp->cookie, 1558 PAGESIZE, DEVMAP_ACCESS, PROT_WRITE); 1559 } 1560 } 1561 1562 lp->owner = NULL; 1563 1564 /* Clear the lock value in underlying page so new owner can grab it */ 1565 LOCK(lp) = 0; 1566 1567 if (lp->sleepers) { 1568 DEBUGF(4, (CE_CONT, " waking up, lp=%p\n", (void *)lp)); 1569 cv_broadcast(&lp->locksleep); 1570 } 1571 return (0); 1572 } 1573 1574 /* 1575 * destroy all allocated memory. 1576 */ 1577 1578 static void 1579 lock_destroyall(void) 1580 { 1581 SegLock *lp, *lpnext; 1582 1583 ASSERT(MUTEX_HELD(&winlock_mutex)); 1584 ASSERT(lock_list == NULL); 1585 1586 DEBUGF(1, (CE_CONT, "Lock list empty. Releasing free list\n")); 1587 for (lp = lock_free_list; lp != NULL; lp = lpnext) { 1588 mutex_enter(&lp->mutex); 1589 lpnext = lp->next; 1590 ASSERT(lp->clients == NULL); 1591 ASSERT(lp->owner == NULL); 1592 ASSERT(lp->alloccount == 0); 1593 mutex_destroy(&lp->mutex); 1594 cv_destroy(&lp->locksleep); 1595 kmem_free(lp, sizeof (SegLock)); 1596 } 1597 lock_free_list = NULL; 1598 next_lock = 0; 1599 } 1600 1601 1602 /* RFE: create mdb walkers instead of dump routines? */ 1603 static void 1604 seglock_dump_all(void) 1605 { 1606 SegLock *lp; 1607 1608 mutex_enter(&winlock_mutex); 1609 cmn_err(CE_CONT, "ID\tKEY\tNALLOC\tATTCH\tOWNED\tLOCK\tWAITER\n"); 1610 1611 cmn_err(CE_CONT, "Lock List:\n"); 1612 for (lp = lock_list; lp != NULL; lp = lp->next) { 1613 mutex_enter(&lp->mutex); 1614 cmn_err(CE_CONT, "%d\t%d\t%u\t%c\t%c\t%c\t%d\n", 1615 lp->cookie, lp->key, lp->alloccount, 1616 lp->clients ? 'Y' : 'N', 1617 lp->owner ? 'Y' : 'N', 1618 lp->lockptr != 0 && LOCK(lp) ? 'Y' : 'N', 1619 lp->sleepers); 1620 mutex_exit(&lp->mutex); 1621 } 1622 cmn_err(CE_CONT, "Free Lock List:\n"); 1623 for (lp = lock_free_list; lp != NULL; lp = lp->next) { 1624 mutex_enter(&lp->mutex); 1625 cmn_err(CE_CONT, "%d\t%d\t%u\t%c\t%c\t%c\t%d\n", 1626 lp->cookie, lp->key, lp->alloccount, 1627 lp->clients ? 'Y' : 'N', 1628 lp->owner ? 'Y' : 'N', 1629 lp->lockptr != 0 && LOCK(lp) ? 'Y' : 'N', 1630 lp->sleepers); 1631 mutex_exit(&lp->mutex); 1632 } 1633 1634 #ifdef DEBUG 1635 if (lock_debug < 3) { 1636 mutex_exit(&winlock_mutex); 1637 return; 1638 } 1639 1640 for (lp = lock_list; lp != NULL; lp = lp->next) { 1641 SegProc *sdp; 1642 1643 mutex_enter(&lp->mutex); 1644 cmn_err(CE_CONT, 1645 "lock %p, key=%d, cookie=%d, nalloc=%u, lock=%d, wait=%d\n", 1646 (void *)lp, lp->key, lp->cookie, lp->alloccount, 1647 lp->lockptr != 0 ? LOCK(lp) : -1, lp->sleepers); 1648 1649 cmn_err(CE_CONT, 1650 "style=%d, lockptr=%p, timeout=%ld, clients=%p, owner=%p\n", 1651 lp->style, (void *)lp->lockptr, lp->timeout, 1652 (void *)lp->clients, (void *)lp->owner); 1653 1654 1655 for (sdp = lp->clients; sdp != NULL; sdp = sdp->next) { 1656 cmn_err(CE_CONT, " client %p%s, lp=%p, flag=%x, " 1657 "process tag=%p, lockseg=%p, unlockseg=%p\n", 1658 (void *)sdp, sdp == lp->owner ? " (owner)" : "", 1659 (void *)sdp->lp, sdp->flag, (void *)ID(sdp), 1660 (void *)sdp->lockseg, (void *)sdp->unlockseg); 1661 } 1662 mutex_exit(&lp->mutex); 1663 } 1664 #endif 1665 mutex_exit(&winlock_mutex); 1666 } 1667 1668 #include <sys/modctl.h> 1669 1670 static struct modldrv modldrv = { 1671 &mod_driverops, /* Type of module. This one is a driver */ 1672 "Winlock Driver", /* Name of the module */ 1673 &winlock_ops, /* driver ops */ 1674 }; 1675 1676 static struct modlinkage modlinkage = { 1677 MODREV_1, 1678 { (void *)&modldrv, NULL } 1679 }; 1680 1681 int 1682 _init(void) 1683 { 1684 int e; 1685 1686 mutex_init(&winlock_mutex, NULL, MUTEX_DEFAULT, NULL); 1687 e = mod_install(&modlinkage); 1688 if (e) { 1689 mutex_destroy(&winlock_mutex); 1690 } 1691 return (e); 1692 } 1693 1694 1695 int 1696 _info(struct modinfo *modinfop) 1697 { 1698 return (mod_info(&modlinkage, modinfop)); 1699 } 1700 1701 int 1702 _fini(void) 1703 { 1704 int e; 1705 1706 e = mod_remove(&modlinkage); 1707 if (e == 0) { 1708 mutex_destroy(&winlock_mutex); 1709 } 1710 return (e); 1711 }