1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * Inter-Process Communication Semaphore Facility. 31 * 32 * See os/ipc.c for a description of common IPC functionality. 33 * 34 * Resource controls 35 * ----------------- 36 * 37 * Control: zone.max-sem-ids (rc_zone_semmni) 38 * Description: Maximum number of semaphore ids allowed a zone. 39 * 40 * When semget() is used to allocate a semaphore set, one id is 41 * allocated. If the id allocation doesn't succeed, semget() fails 42 * and errno is set to ENOSPC. Upon successful semctl(, IPC_RMID) 43 * the id is deallocated. 44 * 45 * Control: project.max-sem-ids (rc_project_semmni) 46 * Description: Maximum number of semaphore ids allowed a project. 47 * 48 * When semget() is used to allocate a semaphore set, one id is 49 * allocated. If the id allocation doesn't succeed, semget() fails 50 * and errno is set to ENOSPC. Upon successful semctl(, IPC_RMID) 51 * the id is deallocated. 52 * 53 * Control: process.max-sem-nsems (rc_process_semmsl) 54 * Description: Maximum number of semaphores allowed per semaphore set. 55 * 56 * When semget() is used to allocate a semaphore set, the size of the 57 * set is compared with this limit. If the number of semaphores 58 * exceeds the limit, semget() fails and errno is set to EINVAL. 59 * 60 * Control: process.max-sem-ops (rc_process_semopm) 61 * Description: Maximum number of semaphore operations allowed per 62 * semop call. 63 * 64 * When semget() successfully allocates a semaphore set, the minimum 65 * enforced value of this limit is used to initialize the 66 * "system-imposed maximum" number of operations a semop() call for 67 * this set can perform. 68 * 69 * Undo structures 70 * --------------- 71 * 72 * Removing the undo structure tunables involved a serious redesign of 73 * how they were implemented. There is now one undo structure for 74 * every process/semaphore array combination (lazily allocated, of 75 * course), and each is equal in size to the semaphore it corresponds 76 * to. To avoid scalability and performance problems, the undo 77 * structures are stored in two places: a per-process AVL tree sorted 78 * by ksemid pointer (p_semacct, protected by p_lock) and an unsorted 79 * per-semaphore linked list (sem_undos, protected by the semaphore's 80 * ID lock). The former is used by semop, where a lookup is performed 81 * once and cached if SEM_UNDO is specified for any of the operations, 82 * and at process exit where the undoable operations are rolled back. 83 * The latter is used when removing the semaphore, so the undo 84 * structures can be removed from the appropriate processes' trees. 85 * 86 * The undo structure itself contains pointers to the ksemid and proc 87 * to which it corresponds, a list node, an AVL node, and an array of 88 * adjust-on-exit (AOE) values. When an undo structure is allocated it 89 * is immediately added to both the process's tree and the semaphore's 90 * list. Lastly, the reference count on the semaphore is increased. 91 * 92 * Avoiding a lock ordering violation between p_lock and the ID lock, 93 * wont to occur when there is a race between a process exiting and the 94 * removal of a semaphore, mandates the delicate dance that exists 95 * between semexit and sem_rmid. 96 * 97 * sem_rmid, holding the ID lock, iterates through all undo structures 98 * and for each takes the appropriate process's p_lock and checks to 99 * see if p_semacct is NULL. If it is, it skips that undo structure 100 * and continues to the next. Otherwise, it removes the undo structure 101 * from both the AVL tree and the semaphore's list, and releases the 102 * hold that the undo structure had on the semaphore. 103 * 104 * The important other half of this is semexit, which will immediately 105 * take p_lock, obtain the AVL pointer, clear p_semacct, and drop 106 * p_lock. From this point on it is semexit's responsibility to clean 107 * up all undo structures found in the tree -- a coexecuting sem_rmid 108 * will see the NULL p_semacct and skip that undo structure. It walks 109 * the AVL tree (using avl_destroy_nodes) and for each undo structure 110 * takes the appropriate semaphore's ID lock (always legal since the 111 * undo structure has a hold on the semaphore), updates all semaphores 112 * with non-zero AOE values, and removes the structure from the 113 * semaphore's list. It then drops the structure's reference on the 114 * semaphore, drops the ID lock, and frees the undo structure. 115 */ 116 117 #include <sys/types.h> 118 #include <sys/t_lock.h> 119 #include <sys/param.h> 120 #include <sys/systm.h> 121 #include <sys/sysmacros.h> 122 #include <sys/cred.h> 123 #include <sys/vmem.h> 124 #include <sys/kmem.h> 125 #include <sys/errno.h> 126 #include <sys/time.h> 127 #include <sys/ipc.h> 128 #include <sys/ipc_impl.h> 129 #include <sys/sem.h> 130 #include <sys/sem_impl.h> 131 #include <sys/user.h> 132 #include <sys/proc.h> 133 #include <sys/cpuvar.h> 134 #include <sys/debug.h> 135 #include <sys/var.h> 136 #include <sys/cmn_err.h> 137 #include <sys/modctl.h> 138 #include <sys/syscall.h> 139 #include <sys/avl.h> 140 #include <sys/list.h> 141 #include <sys/zone.h> 142 143 #include <c2/audit.h> 144 145 extern rctl_hndl_t rc_zone_semmni; 146 extern rctl_hndl_t rc_project_semmni; 147 extern rctl_hndl_t rc_process_semmsl; 148 extern rctl_hndl_t rc_process_semopm; 149 static ipc_service_t *sem_svc; 150 static zone_key_t sem_zone_key; 151 152 /* 153 * The following tunables are obsolete. Though for compatibility we 154 * still read and interpret seminfo_semmsl, seminfo_semopm and 155 * seminfo_semmni (see os/project.c and os/rctl_proc.c), the preferred 156 * mechanism for administrating the IPC Semaphore facility is through 157 * the resource controls described at the top of this file. 158 */ 159 int seminfo_semaem = 16384; /* (obsolete) */ 160 int seminfo_semmap = 10; /* (obsolete) */ 161 int seminfo_semmni = 10; /* (obsolete) */ 162 int seminfo_semmns = 60; /* (obsolete) */ 163 int seminfo_semmnu = 30; /* (obsolete) */ 164 int seminfo_semmsl = 25; /* (obsolete) */ 165 int seminfo_semopm = 10; /* (obsolete) */ 166 int seminfo_semume = 10; /* (obsolete) */ 167 int seminfo_semusz = 96; /* (obsolete) */ 168 int seminfo_semvmx = 32767; /* (obsolete) */ 169 170 #define SEM_MAXUCOPS 4096 /* max # of unchecked ops per semop call */ 171 #define SEM_UNDOSZ(n) (sizeof (struct sem_undo) + (n - 1) * sizeof (int)) 172 173 static int semsys(int opcode, uintptr_t a0, uintptr_t a1, 174 uintptr_t a2, uintptr_t a3); 175 static void sem_dtor(kipc_perm_t *); 176 static void sem_rmid(kipc_perm_t *); 177 static void sem_remove_zone(zoneid_t, void *); 178 179 static struct sysent ipcsem_sysent = { 180 5, 181 SE_NOUNLOAD | SE_ARGC | SE_32RVAL1, 182 semsys 183 }; 184 185 /* 186 * Module linkage information for the kernel. 187 */ 188 static struct modlsys modlsys = { 189 &mod_syscallops, "System V semaphore facility", &ipcsem_sysent 190 }; 191 192 #ifdef _SYSCALL32_IMPL 193 static struct modlsys modlsys32 = { 194 &mod_syscallops32, "32-bit System V semaphore facility", &ipcsem_sysent 195 }; 196 #endif 197 198 static struct modlinkage modlinkage = { 199 MODREV_1, 200 { &modlsys, 201 #ifdef _SYSCALL32_IMPL 202 &modlsys32, 203 #endif 204 NULL 205 } 206 }; 207 208 209 int 210 _init(void) 211 { 212 int result; 213 214 sem_svc = ipcs_create("semids", rc_project_semmni, rc_zone_semmni, 215 sizeof (ksemid_t), sem_dtor, sem_rmid, AT_IPC_SEM, 216 offsetof(ipc_rqty_t, ipcq_semmni)); 217 zone_key_create(&sem_zone_key, NULL, sem_remove_zone, NULL); 218 219 if ((result = mod_install(&modlinkage)) == 0) 220 return (0); 221 222 (void) zone_key_delete(sem_zone_key); 223 ipcs_destroy(sem_svc); 224 225 return (result); 226 } 227 228 int 229 _fini(void) 230 { 231 return (EBUSY); 232 } 233 234 int 235 _info(struct modinfo *modinfop) 236 { 237 return (mod_info(&modlinkage, modinfop)); 238 } 239 240 static void 241 sem_dtor(kipc_perm_t *perm) 242 { 243 ksemid_t *sp = (ksemid_t *)perm; 244 245 kmem_free(sp->sem_base, 246 P2ROUNDUP(sp->sem_nsems * sizeof (struct sem), 64)); 247 list_destroy(&sp->sem_undos); 248 } 249 250 /* 251 * sem_undo_add - Create or update adjust on exit entry. 252 */ 253 static int 254 sem_undo_add(short val, ushort_t num, struct sem_undo *undo) 255 { 256 int newval = undo->un_aoe[num] - val; 257 258 if (newval > USHRT_MAX || newval < -USHRT_MAX) 259 return (ERANGE); 260 undo->un_aoe[num] = newval; 261 262 return (0); 263 } 264 265 /* 266 * sem_undo_clear - clears all undo entries for specified semaphores 267 * 268 * Used when semaphores are reset by SETVAL or SETALL. 269 */ 270 static void 271 sem_undo_clear(ksemid_t *sp, ushort_t low, ushort_t high) 272 { 273 struct sem_undo *undo; 274 int i; 275 276 ASSERT(low <= high); 277 ASSERT(high < sp->sem_nsems); 278 279 for (undo = list_head(&sp->sem_undos); undo; 280 undo = list_next(&sp->sem_undos, undo)) 281 for (i = low; i <= high; i++) 282 undo->un_aoe[i] = 0; 283 } 284 285 /* 286 * sem_rollback - roll back work done so far if unable to complete operation 287 */ 288 static void 289 sem_rollback(ksemid_t *sp, struct sembuf *op, int n, struct sem_undo *undo) 290 { 291 struct sem *semp; /* semaphore ptr */ 292 293 for (op += n - 1; n--; op--) { 294 if (op->sem_op == 0) 295 continue; 296 semp = &sp->sem_base[op->sem_num]; 297 semp->semval -= op->sem_op; 298 if (op->sem_flg & SEM_UNDO) { 299 ASSERT(undo != NULL); 300 (void) sem_undo_add(-op->sem_op, op->sem_num, undo); 301 } 302 } 303 } 304 305 static void 306 sem_rmid(kipc_perm_t *perm) 307 { 308 ksemid_t *sp = (ksemid_t *)perm; 309 struct sem *semp; 310 struct sem_undo *undo; 311 size_t size = SEM_UNDOSZ(sp->sem_nsems); 312 int i; 313 314 /*LINTED*/ 315 while (undo = list_head(&sp->sem_undos)) { 316 list_remove(&sp->sem_undos, undo); 317 mutex_enter(&undo->un_proc->p_lock); 318 if (undo->un_proc->p_semacct == NULL) { 319 mutex_exit(&undo->un_proc->p_lock); 320 continue; 321 } 322 avl_remove(undo->un_proc->p_semacct, undo); 323 mutex_exit(&undo->un_proc->p_lock); 324 kmem_free(undo, size); 325 ipc_rele_locked(sem_svc, (kipc_perm_t *)sp); 326 } 327 328 for (i = 0; i < sp->sem_nsems; i++) { 329 semp = &sp->sem_base[i]; 330 semp->semval = semp->sempid = 0; 331 if (semp->semncnt) { 332 cv_broadcast(&semp->semncnt_cv); 333 semp->semncnt = 0; 334 } 335 if (semp->semzcnt) { 336 cv_broadcast(&semp->semzcnt_cv); 337 semp->semzcnt = 0; 338 } 339 } 340 } 341 342 /* 343 * semctl - Semctl system call. 344 */ 345 static int 346 semctl(int semid, uint_t semnum, int cmd, uintptr_t arg) 347 { 348 ksemid_t *sp; /* ptr to semaphore header */ 349 struct sem *p; /* ptr to semaphore */ 350 unsigned int i; /* loop control */ 351 ushort_t *vals, *vp; 352 size_t vsize = 0; 353 int error = 0; 354 int retval = 0; 355 struct cred *cr; 356 kmutex_t *lock; 357 model_t mdl = get_udatamodel(); 358 STRUCT_DECL(semid_ds, sid); 359 struct semid_ds64 ds64; 360 361 STRUCT_INIT(sid, mdl); 362 cr = CRED(); 363 364 /* 365 * Perform pre- or non-lookup actions (e.g. copyins, RMID). 366 */ 367 switch (cmd) { 368 case IPC_SET: 369 if (copyin((void *)arg, STRUCT_BUF(sid), STRUCT_SIZE(sid))) 370 return (set_errno(EFAULT)); 371 break; 372 373 case IPC_SET64: 374 if (copyin((void *)arg, &ds64, sizeof (struct semid_ds64))) 375 return (set_errno(EFAULT)); 376 break; 377 378 case SETALL: 379 if ((lock = ipc_lookup(sem_svc, semid, 380 (kipc_perm_t **)&sp)) == NULL) 381 return (set_errno(EINVAL)); 382 vsize = sp->sem_nsems * sizeof (*vals); 383 mutex_exit(lock); 384 385 /* allocate space to hold all semaphore values */ 386 vals = kmem_alloc(vsize, KM_SLEEP); 387 388 if (copyin((void *)arg, vals, vsize)) { 389 kmem_free(vals, vsize); 390 return (set_errno(EFAULT)); 391 } 392 break; 393 394 case IPC_RMID: 395 if (error = ipc_rmid(sem_svc, semid, cr)) 396 return (set_errno(error)); 397 return (0); 398 } 399 400 if ((lock = ipc_lookup(sem_svc, semid, (kipc_perm_t **)&sp)) == NULL) { 401 if (vsize != 0) 402 kmem_free(vals, vsize); 403 return (set_errno(EINVAL)); 404 } 405 switch (cmd) { 406 /* Set ownership and permissions. */ 407 case IPC_SET: 408 409 if (error = ipcperm_set(sem_svc, cr, &sp->sem_perm, 410 &STRUCT_BUF(sid)->sem_perm, mdl)) { 411 mutex_exit(lock); 412 return (set_errno(error)); 413 } 414 sp->sem_ctime = gethrestime_sec(); 415 mutex_exit(lock); 416 return (0); 417 418 /* Get semaphore data structure. */ 419 case IPC_STAT: 420 421 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 422 mutex_exit(lock); 423 return (set_errno(error)); 424 } 425 426 ipcperm_stat(&STRUCT_BUF(sid)->sem_perm, &sp->sem_perm, mdl); 427 STRUCT_FSETP(sid, sem_base, NULL); /* kernel addr */ 428 STRUCT_FSET(sid, sem_nsems, sp->sem_nsems); 429 STRUCT_FSET(sid, sem_otime, sp->sem_otime); 430 STRUCT_FSET(sid, sem_ctime, sp->sem_ctime); 431 STRUCT_FSET(sid, sem_binary, sp->sem_binary); 432 mutex_exit(lock); 433 434 if (copyout(STRUCT_BUF(sid), (void *)arg, STRUCT_SIZE(sid))) 435 return (set_errno(EFAULT)); 436 return (0); 437 438 case IPC_SET64: 439 440 if (error = ipcperm_set64(sem_svc, cr, &sp->sem_perm, 441 &ds64.semx_perm)) { 442 mutex_exit(lock); 443 return (set_errno(error)); 444 } 445 sp->sem_ctime = gethrestime_sec(); 446 mutex_exit(lock); 447 return (0); 448 449 case IPC_STAT64: 450 451 ipcperm_stat64(&ds64.semx_perm, &sp->sem_perm); 452 ds64.semx_nsems = sp->sem_nsems; 453 ds64.semx_otime = sp->sem_otime; 454 ds64.semx_ctime = sp->sem_ctime; 455 456 mutex_exit(lock); 457 if (copyout(&ds64, (void *)arg, sizeof (struct semid_ds64))) 458 return (set_errno(EFAULT)); 459 460 return (0); 461 462 /* Get # of processes sleeping for greater semval. */ 463 case GETNCNT: 464 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 465 mutex_exit(lock); 466 return (set_errno(error)); 467 } 468 if (semnum >= sp->sem_nsems) { 469 mutex_exit(lock); 470 return (set_errno(EINVAL)); 471 } 472 retval = sp->sem_base[semnum].semncnt; 473 mutex_exit(lock); 474 return (retval); 475 476 /* Get pid of last process to operate on semaphore. */ 477 case GETPID: 478 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 479 mutex_exit(lock); 480 return (set_errno(error)); 481 } 482 if (semnum >= sp->sem_nsems) { 483 mutex_exit(lock); 484 return (set_errno(EINVAL)); 485 } 486 retval = sp->sem_base[semnum].sempid; 487 mutex_exit(lock); 488 return (retval); 489 490 /* Get semval of one semaphore. */ 491 case GETVAL: 492 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 493 mutex_exit(lock); 494 return (set_errno(error)); 495 } 496 if (semnum >= sp->sem_nsems) { 497 mutex_exit(lock); 498 return (set_errno(EINVAL)); 499 } 500 retval = sp->sem_base[semnum].semval; 501 mutex_exit(lock); 502 return (retval); 503 504 /* Get all semvals in set. */ 505 case GETALL: 506 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 507 mutex_exit(lock); 508 return (set_errno(error)); 509 } 510 511 /* allocate space to hold all semaphore values */ 512 vsize = sp->sem_nsems * sizeof (*vals); 513 vals = vp = kmem_alloc(vsize, KM_SLEEP); 514 515 for (i = sp->sem_nsems, p = sp->sem_base; i--; p++, vp++) 516 bcopy(&p->semval, vp, sizeof (p->semval)); 517 518 mutex_exit(lock); 519 520 if (copyout((void *)vals, (void *)arg, vsize)) { 521 kmem_free(vals, vsize); 522 return (set_errno(EFAULT)); 523 } 524 525 kmem_free(vals, vsize); 526 return (0); 527 528 /* Get # of processes sleeping for semval to become zero. */ 529 case GETZCNT: 530 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 531 mutex_exit(lock); 532 return (set_errno(error)); 533 } 534 if (semnum >= sp->sem_nsems) { 535 mutex_exit(lock); 536 return (set_errno(EINVAL)); 537 } 538 retval = sp->sem_base[semnum].semzcnt; 539 mutex_exit(lock); 540 return (retval); 541 542 /* Set semval of one semaphore. */ 543 case SETVAL: 544 if (error = ipcperm_access(&sp->sem_perm, SEM_A, cr)) { 545 mutex_exit(lock); 546 return (set_errno(error)); 547 } 548 if (semnum >= sp->sem_nsems) { 549 mutex_exit(lock); 550 return (set_errno(EINVAL)); 551 } 552 if ((uint_t)arg > USHRT_MAX) { 553 mutex_exit(lock); 554 return (set_errno(ERANGE)); 555 } 556 p = &sp->sem_base[semnum]; 557 if ((p->semval = (ushort_t)arg) != 0) { 558 if (p->semncnt) { 559 cv_broadcast(&p->semncnt_cv); 560 } 561 } else if (p->semzcnt) { 562 cv_broadcast(&p->semzcnt_cv); 563 } 564 p->sempid = curproc->p_pid; 565 sem_undo_clear(sp, (ushort_t)semnum, (ushort_t)semnum); 566 mutex_exit(lock); 567 return (0); 568 569 /* Set semvals of all semaphores in set. */ 570 case SETALL: 571 /* Check if semaphore set has been deleted and reallocated. */ 572 if (sp->sem_nsems * sizeof (*vals) != vsize) { 573 error = set_errno(EINVAL); 574 goto seterr; 575 } 576 if (error = ipcperm_access(&sp->sem_perm, SEM_A, cr)) { 577 error = set_errno(error); 578 goto seterr; 579 } 580 sem_undo_clear(sp, 0, sp->sem_nsems - 1); 581 for (i = 0, p = sp->sem_base; i < sp->sem_nsems; 582 (p++)->sempid = curproc->p_pid) { 583 if ((p->semval = vals[i++]) != 0) { 584 if (p->semncnt) { 585 cv_broadcast(&p->semncnt_cv); 586 } 587 } else if (p->semzcnt) { 588 cv_broadcast(&p->semzcnt_cv); 589 } 590 } 591 seterr: 592 mutex_exit(lock); 593 kmem_free(vals, vsize); 594 return (error); 595 596 default: 597 mutex_exit(lock); 598 return (set_errno(EINVAL)); 599 } 600 601 /* NOTREACHED */ 602 } 603 604 /* 605 * semexit - Called by exit() to clean up on process exit. 606 */ 607 void 608 semexit(proc_t *pp) 609 { 610 avl_tree_t *tree; 611 struct sem_undo *undo; 612 void *cookie = NULL; 613 614 mutex_enter(&pp->p_lock); 615 tree = pp->p_semacct; 616 pp->p_semacct = NULL; 617 mutex_exit(&pp->p_lock); 618 619 while (undo = avl_destroy_nodes(tree, &cookie)) { 620 ksemid_t *sp = undo->un_sp; 621 size_t size = SEM_UNDOSZ(sp->sem_nsems); 622 int i; 623 624 (void) ipc_lock(sem_svc, sp->sem_perm.ipc_id); 625 if (!IPC_FREE(&sp->sem_perm)) { 626 for (i = 0; i < sp->sem_nsems; i++) { 627 int adj = undo->un_aoe[i]; 628 if (adj) { 629 struct sem *semp = &sp->sem_base[i]; 630 int v = (int)semp->semval + adj; 631 632 if (v < 0 || v > USHRT_MAX) 633 continue; 634 semp->semval = (ushort_t)v; 635 if (v == 0 && semp->semzcnt) 636 cv_broadcast(&semp->semzcnt_cv); 637 if (adj > 0 && semp->semncnt) 638 cv_broadcast(&semp->semncnt_cv); 639 } 640 } 641 list_remove(&sp->sem_undos, undo); 642 } 643 ipc_rele(sem_svc, (kipc_perm_t *)sp); 644 kmem_free(undo, size); 645 } 646 647 avl_destroy(tree); 648 kmem_free(tree, sizeof (avl_tree_t)); 649 } 650 651 /* 652 * Remove all semaphores associated with a given zone. Called by 653 * zone_shutdown when the zone is halted. 654 */ 655 /*ARGSUSED1*/ 656 static void 657 sem_remove_zone(zoneid_t zoneid, void *arg) 658 { 659 ipc_remove_zone(sem_svc, zoneid); 660 } 661 662 /* 663 * semget - Semget system call. 664 */ 665 static int 666 semget(key_t key, int nsems, int semflg) 667 { 668 ksemid_t *sp; 669 kmutex_t *lock; 670 int id, error; 671 proc_t *pp = curproc; 672 673 top: 674 if (error = ipc_get(sem_svc, key, semflg, (kipc_perm_t **)&sp, &lock)) 675 return (set_errno(error)); 676 677 if (!IPC_FREE(&sp->sem_perm)) { 678 /* 679 * A semaphore with the requested key exists. 680 */ 681 if (!((nsems >= 0) && (nsems <= sp->sem_nsems))) { 682 mutex_exit(lock); 683 return (set_errno(EINVAL)); 684 } 685 } else { 686 /* 687 * This is a new semaphore set. Finish initialization. 688 */ 689 if (nsems <= 0 || (rctl_test(rc_process_semmsl, pp->p_rctls, pp, 690 nsems, RCA_SAFE) & RCT_DENY)) { 691 mutex_exit(lock); 692 mutex_exit(&pp->p_lock); 693 ipc_cleanup(sem_svc, (kipc_perm_t *)sp); 694 return (set_errno(EINVAL)); 695 } 696 mutex_exit(lock); 697 mutex_exit(&pp->p_lock); 698 699 /* 700 * We round the allocation up to coherency granularity 701 * so that multiple semaphore allocations won't result 702 * in the false sharing of their sem structures. 703 */ 704 sp->sem_base = 705 kmem_zalloc(P2ROUNDUP(nsems * sizeof (struct sem), 64), 706 KM_SLEEP); 707 sp->sem_binary = (nsems == 1); 708 sp->sem_nsems = (ushort_t)nsems; 709 sp->sem_ctime = gethrestime_sec(); 710 sp->sem_otime = 0; 711 list_create(&sp->sem_undos, sizeof (struct sem_undo), 712 offsetof(struct sem_undo, un_list)); 713 714 if (error = ipc_commit_begin(sem_svc, key, semflg, 715 (kipc_perm_t *)sp)) { 716 if (error == EAGAIN) 717 goto top; 718 return (set_errno(error)); 719 } 720 sp->sem_maxops = 721 rctl_enforced_value(rc_process_semopm, pp->p_rctls, pp); 722 if (rctl_test(rc_process_semmsl, pp->p_rctls, pp, nsems, 723 RCA_SAFE) & RCT_DENY) { 724 ipc_cleanup(sem_svc, (kipc_perm_t *)sp); 725 return (set_errno(EINVAL)); 726 } 727 lock = ipc_commit_end(sem_svc, &sp->sem_perm); 728 } 729 730 if (AU_AUDITING()) 731 audit_ipcget(AT_IPC_SEM, (void *)sp); 732 733 id = sp->sem_perm.ipc_id; 734 mutex_exit(lock); 735 return (id); 736 } 737 738 /* 739 * semids system call. 740 */ 741 static int 742 semids(int *buf, uint_t nids, uint_t *pnids) 743 { 744 int error; 745 746 if (error = ipc_ids(sem_svc, buf, nids, pnids)) 747 return (set_errno(error)); 748 749 return (0); 750 } 751 752 753 /* 754 * Helper function for semop - copies in the provided timespec and 755 * computes the absolute future time after which we must return. 756 */ 757 static int 758 compute_timeout(timespec_t **tsp, timespec_t *ts, timespec_t *now, 759 timespec_t *timeout) 760 { 761 model_t datamodel = get_udatamodel(); 762 763 if (datamodel == DATAMODEL_NATIVE) { 764 if (copyin(timeout, ts, sizeof (timespec_t))) 765 return (EFAULT); 766 } else { 767 timespec32_t ts32; 768 769 if (copyin(timeout, &ts32, sizeof (timespec32_t))) 770 return (EFAULT); 771 TIMESPEC32_TO_TIMESPEC(ts, &ts32) 772 } 773 774 if (itimerspecfix(ts)) 775 return (EINVAL); 776 777 /* 778 * Convert the timespec value into absolute time. 779 */ 780 timespecadd(ts, now); 781 *tsp = ts; 782 783 return (0); 784 } 785 786 /* 787 * Undo structure comparator. We sort based on ksemid_t pointer. 788 */ 789 static int 790 sem_undo_compar(const void *x, const void *y) 791 { 792 struct sem_undo *undo1 = (struct sem_undo *)x; 793 struct sem_undo *undo2 = (struct sem_undo *)y; 794 795 if (undo1->un_sp < undo2->un_sp) 796 return (-1); 797 if (undo1->un_sp > undo2->un_sp) 798 return (1); 799 return (0); 800 } 801 802 /* 803 * Helper function for semop - creates an undo structure and adds it to 804 * the process's avl tree and the semaphore's list. 805 */ 806 static int 807 sem_undo_alloc(proc_t *pp, ksemid_t *sp, kmutex_t **lock, 808 struct sem_undo *template, struct sem_undo **un) 809 { 810 size_t size; 811 struct sem_undo *undo; 812 avl_tree_t *tree = NULL; 813 avl_index_t where; 814 815 mutex_exit(*lock); 816 817 size = SEM_UNDOSZ(sp->sem_nsems); 818 undo = kmem_zalloc(size, KM_SLEEP); 819 undo->un_proc = pp; 820 undo->un_sp = sp; 821 822 if (pp->p_semacct == NULL) 823 tree = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); 824 825 *lock = ipc_lock(sem_svc, sp->sem_perm.ipc_id); 826 if (IPC_FREE(&sp->sem_perm)) { 827 kmem_free(undo, size); 828 if (tree) 829 kmem_free(tree, sizeof (avl_tree_t)); 830 return (EIDRM); 831 } 832 833 mutex_enter(&pp->p_lock); 834 if (tree) { 835 if (pp->p_semacct == NULL) { 836 avl_create(tree, sem_undo_compar, 837 sizeof (struct sem_undo), 838 offsetof(struct sem_undo, un_avl)); 839 pp->p_semacct = tree; 840 } else { 841 kmem_free(tree, sizeof (avl_tree_t)); 842 } 843 } 844 845 if (*un = avl_find(pp->p_semacct, template, &where)) { 846 mutex_exit(&pp->p_lock); 847 kmem_free(undo, size); 848 } else { 849 *un = undo; 850 avl_insert(pp->p_semacct, undo, where); 851 mutex_exit(&pp->p_lock); 852 list_insert_head(&sp->sem_undos, undo); 853 ipc_hold(sem_svc, (kipc_perm_t *)sp); 854 } 855 856 857 return (0); 858 } 859 860 /* 861 * semop - Semop system call. 862 */ 863 static int 864 semop(int semid, struct sembuf *sops, size_t nsops, timespec_t *timeout) 865 { 866 ksemid_t *sp = NULL; 867 kmutex_t *lock; 868 struct sembuf *op; /* ptr to operation */ 869 int i; /* loop control */ 870 struct sem *semp; /* ptr to semaphore */ 871 int error = 0; 872 struct sembuf *uops; /* ptr to copy of user ops */ 873 struct sembuf x_sem; /* avoid kmem_alloc's */ 874 timespec_t now, ts, *tsp = NULL; 875 int timecheck = 0; 876 int cvres, needundo, mode; 877 struct sem_undo *undo; 878 proc_t *pp = curproc; 879 int held = 0; 880 881 CPU_STATS_ADDQ(CPU, sys, sema, 1); /* bump semaphore op count */ 882 883 /* 884 * To avoid the cost of copying in 'timeout' in the common 885 * case, we could only grab the time here and defer the copyin 886 * and associated computations until we are about to block. 887 * 888 * The down side to this is that we would then have to spin 889 * some goto top nonsense to avoid the copyin behind the semid 890 * lock. As a common use of timed semaphores is as an explicit 891 * blocking mechanism, this could incur a greater penalty. 892 * 893 * If we eventually decide that this would be a wise route to 894 * take, the deferrable functionality is completely contained 895 * in 'compute_timeout', and the interface is defined such that 896 * we can legally not validate 'timeout' if it is unused. 897 */ 898 if (timeout != NULL) { 899 timecheck = timechanged; 900 gethrestime(&now); 901 if (error = compute_timeout(&tsp, &ts, &now, timeout)) 902 return (set_errno(error)); 903 } 904 905 /* 906 * Allocate space to hold the vector of semaphore ops. If 907 * there is only 1 operation we use a preallocated buffer on 908 * the stack for speed. 909 * 910 * Since we don't want to allow the user to allocate an 911 * arbitrary amount of kernel memory, we need to check against 912 * the number of operations allowed by the semaphore. We only 913 * bother doing this if the number of operations is larger than 914 * SEM_MAXUCOPS. 915 */ 916 if (nsops == 1) 917 uops = &x_sem; 918 else if (nsops == 0) 919 return (0); 920 else if (nsops <= SEM_MAXUCOPS) 921 uops = kmem_alloc(nsops * sizeof (*uops), KM_SLEEP); 922 923 if (nsops > SEM_MAXUCOPS) { 924 if ((lock = ipc_lookup(sem_svc, semid, 925 (kipc_perm_t **)&sp)) == NULL) 926 return (set_errno(EFAULT)); 927 928 if (nsops > sp->sem_maxops) { 929 mutex_exit(lock); 930 return (set_errno(E2BIG)); 931 } 932 held = 1; 933 ipc_hold(sem_svc, (kipc_perm_t *)sp); 934 mutex_exit(lock); 935 936 uops = kmem_alloc(nsops * sizeof (*uops), KM_SLEEP); 937 if (copyin(sops, uops, nsops * sizeof (*op))) { 938 error = EFAULT; 939 (void) ipc_lock(sem_svc, sp->sem_perm.ipc_id); 940 goto semoperr; 941 } 942 943 lock = ipc_lock(sem_svc, sp->sem_perm.ipc_id); 944 if (IPC_FREE(&sp->sem_perm)) { 945 error = EIDRM; 946 goto semoperr; 947 } 948 } else { 949 /* 950 * This could be interleaved with the above code, but 951 * keeping them separate improves readability. 952 */ 953 if (copyin(sops, uops, nsops * sizeof (*op))) { 954 error = EFAULT; 955 goto semoperr_unlocked; 956 } 957 958 if ((lock = ipc_lookup(sem_svc, semid, 959 (kipc_perm_t **)&sp)) == NULL) { 960 error = EINVAL; 961 goto semoperr_unlocked; 962 } 963 964 if (nsops > sp->sem_maxops) { 965 error = E2BIG; 966 goto semoperr; 967 } 968 } 969 970 /* 971 * Scan all operations. Verify that sem #s are in range and 972 * this process is allowed the requested operations. If any 973 * operations are marked SEM_UNDO, find (or allocate) the undo 974 * structure for this process and semaphore. 975 */ 976 needundo = 0; 977 mode = 0; 978 for (i = 0, op = uops; i++ < nsops; op++) { 979 mode |= op->sem_op ? SEM_A : SEM_R; 980 if (op->sem_num >= sp->sem_nsems) { 981 error = EFBIG; 982 goto semoperr; 983 } 984 if ((op->sem_flg & SEM_UNDO) && op->sem_op) 985 needundo = 1; 986 } 987 if (error = ipcperm_access(&sp->sem_perm, mode, CRED())) 988 goto semoperr; 989 990 if (needundo) { 991 struct sem_undo template; 992 993 template.un_sp = sp; 994 mutex_enter(&pp->p_lock); 995 if (pp->p_semacct) 996 undo = avl_find(pp->p_semacct, &template, NULL); 997 else 998 undo = NULL; 999 mutex_exit(&pp->p_lock); 1000 if (undo == NULL) { 1001 if (!held) { 1002 held = 1; 1003 ipc_hold(sem_svc, (kipc_perm_t *)sp); 1004 } 1005 if (error = sem_undo_alloc(pp, sp, &lock, &template, 1006 &undo)) 1007 goto semoperr; 1008 1009 /* sem_undo_alloc unlocks the semaphore */ 1010 if (error = ipcperm_access(&sp->sem_perm, mode, CRED())) 1011 goto semoperr; 1012 } 1013 } 1014 1015 check: 1016 /* 1017 * Loop waiting for the operations to be satisfied atomically. 1018 * Actually, do the operations and undo them if a wait is needed 1019 * or an error is detected. 1020 */ 1021 for (i = 0; i < nsops; i++) { 1022 op = &uops[i]; 1023 semp = &sp->sem_base[op->sem_num]; 1024 1025 /* 1026 * Raise the semaphore (i.e. sema_v) 1027 */ 1028 if (op->sem_op > 0) { 1029 if (op->sem_op + (int)semp->semval > USHRT_MAX || 1030 ((op->sem_flg & SEM_UNDO) && 1031 (error = sem_undo_add(op->sem_op, op->sem_num, 1032 undo)))) { 1033 if (i) 1034 sem_rollback(sp, uops, i, undo); 1035 if (error == 0) 1036 error = ERANGE; 1037 goto semoperr; 1038 } 1039 semp->semval += op->sem_op; 1040 /* 1041 * If we are only incrementing the semaphore value 1042 * by one on a binary semaphore, we can cv_signal. 1043 */ 1044 if (semp->semncnt) { 1045 if (op->sem_op == 1 && sp->sem_binary) 1046 cv_signal(&semp->semncnt_cv); 1047 else 1048 cv_broadcast(&semp->semncnt_cv); 1049 } 1050 if (semp->semzcnt && !semp->semval) 1051 cv_broadcast(&semp->semzcnt_cv); 1052 continue; 1053 } 1054 1055 /* 1056 * Lower the semaphore (i.e. sema_p) 1057 */ 1058 if (op->sem_op < 0) { 1059 if (semp->semval >= (unsigned)(-op->sem_op)) { 1060 if ((op->sem_flg & SEM_UNDO) && 1061 (error = sem_undo_add(op->sem_op, 1062 op->sem_num, undo))) { 1063 if (i) 1064 sem_rollback(sp, uops, i, undo); 1065 goto semoperr; 1066 } 1067 semp->semval += op->sem_op; 1068 if (semp->semzcnt && !semp->semval) 1069 cv_broadcast(&semp->semzcnt_cv); 1070 continue; 1071 } 1072 if (i) 1073 sem_rollback(sp, uops, i, undo); 1074 if (op->sem_flg & IPC_NOWAIT) { 1075 error = EAGAIN; 1076 goto semoperr; 1077 } 1078 1079 /* 1080 * Mark the semaphore set as not a binary type 1081 * if we are decrementing the value by more than 1. 1082 * 1083 * V operations will resort to cv_broadcast 1084 * for this set because there are too many weird 1085 * cases that have to be caught. 1086 */ 1087 if (op->sem_op < -1) 1088 sp->sem_binary = 0; 1089 if (!held) { 1090 held = 1; 1091 ipc_hold(sem_svc, (kipc_perm_t *)sp); 1092 } 1093 semp->semncnt++; 1094 cvres = cv_waituntil_sig(&semp->semncnt_cv, lock, 1095 tsp, timecheck); 1096 lock = ipc_relock(sem_svc, sp->sem_perm.ipc_id, lock); 1097 1098 if (!IPC_FREE(&sp->sem_perm)) { 1099 ASSERT(semp->semncnt != 0); 1100 semp->semncnt--; 1101 if (cvres > 0) /* normal wakeup */ 1102 goto check; 1103 } 1104 1105 /* EINTR or EAGAIN overrides EIDRM */ 1106 if (cvres == 0) 1107 error = EINTR; 1108 else if (cvres < 0) 1109 error = EAGAIN; 1110 else 1111 error = EIDRM; 1112 goto semoperr; 1113 } 1114 1115 /* 1116 * Wait for zero value 1117 */ 1118 if (semp->semval) { 1119 if (i) 1120 sem_rollback(sp, uops, i, undo); 1121 if (op->sem_flg & IPC_NOWAIT) { 1122 error = EAGAIN; 1123 goto semoperr; 1124 } 1125 1126 if (!held) { 1127 held = 1; 1128 ipc_hold(sem_svc, (kipc_perm_t *)sp); 1129 } 1130 semp->semzcnt++; 1131 cvres = cv_waituntil_sig(&semp->semzcnt_cv, lock, 1132 tsp, timecheck); 1133 lock = ipc_relock(sem_svc, sp->sem_perm.ipc_id, lock); 1134 1135 /* 1136 * Don't touch semp if the semaphores have been removed. 1137 */ 1138 if (!IPC_FREE(&sp->sem_perm)) { 1139 ASSERT(semp->semzcnt != 0); 1140 semp->semzcnt--; 1141 if (cvres > 0) /* normal wakeup */ 1142 goto check; 1143 } 1144 1145 /* EINTR or EAGAIN overrides EIDRM */ 1146 if (cvres == 0) 1147 error = EINTR; 1148 else if (cvres < 0) 1149 error = EAGAIN; 1150 else 1151 error = EIDRM; 1152 goto semoperr; 1153 } 1154 } 1155 1156 /* All operations succeeded. Update sempid for accessed semaphores. */ 1157 for (i = 0, op = uops; i++ < nsops; 1158 sp->sem_base[(op++)->sem_num].sempid = pp->p_pid) 1159 ; 1160 sp->sem_otime = gethrestime_sec(); 1161 if (held) 1162 ipc_rele(sem_svc, (kipc_perm_t *)sp); 1163 else 1164 mutex_exit(lock); 1165 1166 /* Before leaving, deallocate the buffer that held the user semops */ 1167 if (nsops != 1) 1168 kmem_free(uops, sizeof (*uops) * nsops); 1169 return (0); 1170 1171 /* 1172 * Error return labels 1173 */ 1174 semoperr: 1175 if (held) 1176 ipc_rele(sem_svc, (kipc_perm_t *)sp); 1177 else 1178 mutex_exit(lock); 1179 1180 semoperr_unlocked: 1181 1182 /* Before leaving, deallocate the buffer that held the user semops */ 1183 if (nsops != 1) 1184 kmem_free(uops, sizeof (*uops) * nsops); 1185 return (set_errno(error)); 1186 } 1187 1188 /* 1189 * semsys - System entry point for semctl, semget, and semop system calls. 1190 */ 1191 static int 1192 semsys(int opcode, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4) 1193 { 1194 int error; 1195 1196 switch (opcode) { 1197 case SEMCTL: 1198 error = semctl((int)a1, (uint_t)a2, (int)a3, a4); 1199 break; 1200 case SEMGET: 1201 error = semget((key_t)a1, (int)a2, (int)a3); 1202 break; 1203 case SEMOP: 1204 error = semop((int)a1, (struct sembuf *)a2, (size_t)a3, 0); 1205 break; 1206 case SEMIDS: 1207 error = semids((int *)a1, (uint_t)a2, (uint_t *)a3); 1208 break; 1209 case SEMTIMEDOP: 1210 error = semop((int)a1, (struct sembuf *)a2, (size_t)a3, 1211 (timespec_t *)a4); 1212 break; 1213 default: 1214 error = set_errno(EINVAL); 1215 break; 1216 } 1217 return (error); 1218 }