1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28
29 /*
30 * Inter-Process Communication Semaphore Facility.
31 *
32 * See os/ipc.c for a description of common IPC functionality.
33 *
34 * Resource controls
35 * -----------------
36 *
37 * Control: zone.max-sem-ids (rc_zone_semmni)
38 * Description: Maximum number of semaphore ids allowed a zone.
39 *
40 * When semget() is used to allocate a semaphore set, one id is
41 * allocated. If the id allocation doesn't succeed, semget() fails
42 * and errno is set to ENOSPC. Upon successful semctl(, IPC_RMID)
43 * the id is deallocated.
44 *
45 * Control: project.max-sem-ids (rc_project_semmni)
46 * Description: Maximum number of semaphore ids allowed a project.
47 *
48 * When semget() is used to allocate a semaphore set, one id is
49 * allocated. If the id allocation doesn't succeed, semget() fails
50 * and errno is set to ENOSPC. Upon successful semctl(, IPC_RMID)
51 * the id is deallocated.
52 *
53 * Control: process.max-sem-nsems (rc_process_semmsl)
54 * Description: Maximum number of semaphores allowed per semaphore set.
55 *
56 * When semget() is used to allocate a semaphore set, the size of the
57 * set is compared with this limit. If the number of semaphores
58 * exceeds the limit, semget() fails and errno is set to EINVAL.
59 *
60 * Control: process.max-sem-ops (rc_process_semopm)
61 * Description: Maximum number of semaphore operations allowed per
62 * semop call.
63 *
64 * When semget() successfully allocates a semaphore set, the minimum
65 * enforced value of this limit is used to initialize the
66 * "system-imposed maximum" number of operations a semop() call for
67 * this set can perform.
68 *
69 * Undo structures
70 * ---------------
71 *
72 * Removing the undo structure tunables involved a serious redesign of
73 * how they were implemented. There is now one undo structure for
74 * every process/semaphore array combination (lazily allocated, of
75 * course), and each is equal in size to the semaphore it corresponds
76 * to. To avoid scalability and performance problems, the undo
77 * structures are stored in two places: a per-process AVL tree sorted
78 * by ksemid pointer (p_semacct, protected by p_lock) and an unsorted
79 * per-semaphore linked list (sem_undos, protected by the semaphore's
80 * ID lock). The former is used by semop, where a lookup is performed
81 * once and cached if SEM_UNDO is specified for any of the operations,
82 * and at process exit where the undoable operations are rolled back.
83 * The latter is used when removing the semaphore, so the undo
84 * structures can be removed from the appropriate processes' trees.
85 *
86 * The undo structure itself contains pointers to the ksemid and proc
87 * to which it corresponds, a list node, an AVL node, and an array of
88 * adjust-on-exit (AOE) values. When an undo structure is allocated it
89 * is immediately added to both the process's tree and the semaphore's
90 * list. Lastly, the reference count on the semaphore is increased.
91 *
92 * Avoiding a lock ordering violation between p_lock and the ID lock,
93 * wont to occur when there is a race between a process exiting and the
94 * removal of a semaphore, mandates the delicate dance that exists
95 * between semexit and sem_rmid.
96 *
97 * sem_rmid, holding the ID lock, iterates through all undo structures
98 * and for each takes the appropriate process's p_lock and checks to
99 * see if p_semacct is NULL. If it is, it skips that undo structure
100 * and continues to the next. Otherwise, it removes the undo structure
101 * from both the AVL tree and the semaphore's list, and releases the
102 * hold that the undo structure had on the semaphore.
103 *
104 * The important other half of this is semexit, which will immediately
105 * take p_lock, obtain the AVL pointer, clear p_semacct, and drop
106 * p_lock. From this point on it is semexit's responsibility to clean
107 * up all undo structures found in the tree -- a coexecuting sem_rmid
108 * will see the NULL p_semacct and skip that undo structure. It walks
109 * the AVL tree (using avl_destroy_nodes) and for each undo structure
110 * takes the appropriate semaphore's ID lock (always legal since the
111 * undo structure has a hold on the semaphore), updates all semaphores
112 * with non-zero AOE values, and removes the structure from the
113 * semaphore's list. It then drops the structure's reference on the
114 * semaphore, drops the ID lock, and frees the undo structure.
115 */
116
117 #include <sys/types.h>
118 #include <sys/t_lock.h>
119 #include <sys/param.h>
120 #include <sys/systm.h>
121 #include <sys/sysmacros.h>
122 #include <sys/cred.h>
123 #include <sys/vmem.h>
124 #include <sys/kmem.h>
125 #include <sys/errno.h>
126 #include <sys/time.h>
127 #include <sys/ipc.h>
128 #include <sys/ipc_impl.h>
129 #include <sys/sem.h>
130 #include <sys/sem_impl.h>
131 #include <sys/user.h>
132 #include <sys/proc.h>
133 #include <sys/cpuvar.h>
134 #include <sys/debug.h>
135 #include <sys/var.h>
136 #include <sys/cmn_err.h>
137 #include <sys/modctl.h>
138 #include <sys/syscall.h>
139 #include <sys/avl.h>
140 #include <sys/list.h>
141 #include <sys/zone.h>
142
143 #include <c2/audit.h>
144
145 extern rctl_hndl_t rc_zone_semmni;
146 extern rctl_hndl_t rc_project_semmni;
147 extern rctl_hndl_t rc_process_semmsl;
148 extern rctl_hndl_t rc_process_semopm;
149 static ipc_service_t *sem_svc;
150 static zone_key_t sem_zone_key;
151
152 /*
153 * The following tunables are obsolete. Though for compatibility we
154 * still read and interpret seminfo_semmsl, seminfo_semopm and
155 * seminfo_semmni (see os/project.c and os/rctl_proc.c), the preferred
156 * mechanism for administrating the IPC Semaphore facility is through
157 * the resource controls described at the top of this file.
158 */
159 int seminfo_semaem = 16384; /* (obsolete) */
160 int seminfo_semmap = 10; /* (obsolete) */
161 int seminfo_semmni = 10; /* (obsolete) */
162 int seminfo_semmns = 60; /* (obsolete) */
163 int seminfo_semmnu = 30; /* (obsolete) */
164 int seminfo_semmsl = 25; /* (obsolete) */
165 int seminfo_semopm = 10; /* (obsolete) */
166 int seminfo_semume = 10; /* (obsolete) */
167 int seminfo_semusz = 96; /* (obsolete) */
168 int seminfo_semvmx = 32767; /* (obsolete) */
169
170 #define SEM_MAXUCOPS 4096 /* max # of unchecked ops per semop call */
171 #define SEM_UNDOSZ(n) (sizeof (struct sem_undo) + (n - 1) * sizeof (int))
172
173 static int semsys(int opcode, uintptr_t a0, uintptr_t a1,
174 uintptr_t a2, uintptr_t a3);
175 static void sem_dtor(kipc_perm_t *);
176 static void sem_rmid(kipc_perm_t *);
177 static void sem_remove_zone(zoneid_t, void *);
178
179 static struct sysent ipcsem_sysent = {
180 5,
181 SE_NOUNLOAD | SE_ARGC | SE_32RVAL1,
182 semsys
183 };
184
185 /*
186 * Module linkage information for the kernel.
187 */
188 static struct modlsys modlsys = {
189 &mod_syscallops, "System V semaphore facility", &ipcsem_sysent
190 };
191
192 #ifdef _SYSCALL32_IMPL
193 static struct modlsys modlsys32 = {
194 &mod_syscallops32, "32-bit System V semaphore facility", &ipcsem_sysent
195 };
196 #endif
197
198 static struct modlinkage modlinkage = {
199 MODREV_1,
200 { &modlsys,
201 #ifdef _SYSCALL32_IMPL
202 &modlsys32,
203 #endif
204 NULL
205 }
206 };
207
208
209 int
210 _init(void)
211 {
212 int result;
213
214 sem_svc = ipcs_create("semids", rc_project_semmni, rc_zone_semmni,
215 sizeof (ksemid_t), sem_dtor, sem_rmid, AT_IPC_SEM,
216 offsetof(ipc_rqty_t, ipcq_semmni));
217 zone_key_create(&sem_zone_key, NULL, sem_remove_zone, NULL);
218
219 if ((result = mod_install(&modlinkage)) == 0)
220 return (0);
221
222 (void) zone_key_delete(sem_zone_key);
223 ipcs_destroy(sem_svc);
224
225 return (result);
226 }
227
228 int
229 _fini(void)
230 {
231 return (EBUSY);
232 }
233
234 int
235 _info(struct modinfo *modinfop)
236 {
237 return (mod_info(&modlinkage, modinfop));
238 }
239
240 static void
241 sem_dtor(kipc_perm_t *perm)
242 {
243 ksemid_t *sp = (ksemid_t *)perm;
244
245 kmem_free(sp->sem_base,
246 P2ROUNDUP(sp->sem_nsems * sizeof (struct sem), 64));
247 list_destroy(&sp->sem_undos);
248 }
249
250 /*
251 * sem_undo_add - Create or update adjust on exit entry.
252 */
253 static int
254 sem_undo_add(short val, ushort_t num, struct sem_undo *undo)
255 {
256 int newval = undo->un_aoe[num] - val;
257
258 if (newval > USHRT_MAX || newval < -USHRT_MAX)
259 return (ERANGE);
260 undo->un_aoe[num] = newval;
261
262 return (0);
263 }
264
265 /*
266 * sem_undo_clear - clears all undo entries for specified semaphores
267 *
268 * Used when semaphores are reset by SETVAL or SETALL.
269 */
270 static void
271 sem_undo_clear(ksemid_t *sp, ushort_t low, ushort_t high)
272 {
273 struct sem_undo *undo;
274 int i;
275
276 ASSERT(low <= high);
277 ASSERT(high < sp->sem_nsems);
278
279 for (undo = list_head(&sp->sem_undos); undo;
280 undo = list_next(&sp->sem_undos, undo))
281 for (i = low; i <= high; i++)
282 undo->un_aoe[i] = 0;
283 }
284
285 /*
286 * sem_rollback - roll back work done so far if unable to complete operation
287 */
288 static void
289 sem_rollback(ksemid_t *sp, struct sembuf *op, int n, struct sem_undo *undo)
290 {
291 struct sem *semp; /* semaphore ptr */
292
293 for (op += n - 1; n--; op--) {
294 if (op->sem_op == 0)
295 continue;
296 semp = &sp->sem_base[op->sem_num];
297 semp->semval -= op->sem_op;
298 if (op->sem_flg & SEM_UNDO) {
299 ASSERT(undo != NULL);
300 (void) sem_undo_add(-op->sem_op, op->sem_num, undo);
301 }
302 }
303 }
304
305 static void
306 sem_rmid(kipc_perm_t *perm)
307 {
308 ksemid_t *sp = (ksemid_t *)perm;
309 struct sem *semp;
310 struct sem_undo *undo;
311 size_t size = SEM_UNDOSZ(sp->sem_nsems);
312 int i;
313
314 /*LINTED*/
315 while (undo = list_head(&sp->sem_undos)) {
316 list_remove(&sp->sem_undos, undo);
317 mutex_enter(&undo->un_proc->p_lock);
318 if (undo->un_proc->p_semacct == NULL) {
319 mutex_exit(&undo->un_proc->p_lock);
320 continue;
321 }
322 avl_remove(undo->un_proc->p_semacct, undo);
323 mutex_exit(&undo->un_proc->p_lock);
324 kmem_free(undo, size);
325 ipc_rele_locked(sem_svc, (kipc_perm_t *)sp);
326 }
327
328 for (i = 0; i < sp->sem_nsems; i++) {
329 semp = &sp->sem_base[i];
330 semp->semval = semp->sempid = 0;
331 if (semp->semncnt) {
332 cv_broadcast(&semp->semncnt_cv);
333 semp->semncnt = 0;
334 }
335 if (semp->semzcnt) {
336 cv_broadcast(&semp->semzcnt_cv);
337 semp->semzcnt = 0;
338 }
339 }
340 }
341
342 /*
343 * semctl - Semctl system call.
344 */
345 static int
346 semctl(int semid, uint_t semnum, int cmd, uintptr_t arg)
347 {
348 ksemid_t *sp; /* ptr to semaphore header */
349 struct sem *p; /* ptr to semaphore */
350 unsigned int i; /* loop control */
351 ushort_t *vals, *vp;
352 size_t vsize = 0;
353 int error = 0;
354 int retval = 0;
355 struct cred *cr;
356 kmutex_t *lock;
357 model_t mdl = get_udatamodel();
358 STRUCT_DECL(semid_ds, sid);
359 struct semid_ds64 ds64;
360
361 STRUCT_INIT(sid, mdl);
362 cr = CRED();
363
364 /*
365 * Perform pre- or non-lookup actions (e.g. copyins, RMID).
366 */
367 switch (cmd) {
368 case IPC_SET:
369 if (copyin((void *)arg, STRUCT_BUF(sid), STRUCT_SIZE(sid)))
370 return (set_errno(EFAULT));
371 break;
372
373 case IPC_SET64:
374 if (copyin((void *)arg, &ds64, sizeof (struct semid_ds64)))
375 return (set_errno(EFAULT));
376 break;
377
378 case SETALL:
379 if ((lock = ipc_lookup(sem_svc, semid,
380 (kipc_perm_t **)&sp)) == NULL)
381 return (set_errno(EINVAL));
382 vsize = sp->sem_nsems * sizeof (*vals);
383 mutex_exit(lock);
384
385 /* allocate space to hold all semaphore values */
386 vals = kmem_alloc(vsize, KM_SLEEP);
387
388 if (copyin((void *)arg, vals, vsize)) {
389 kmem_free(vals, vsize);
390 return (set_errno(EFAULT));
391 }
392 break;
393
394 case IPC_RMID:
395 if (error = ipc_rmid(sem_svc, semid, cr))
396 return (set_errno(error));
397 return (0);
398 }
399
400 if ((lock = ipc_lookup(sem_svc, semid, (kipc_perm_t **)&sp)) == NULL) {
401 if (vsize != 0)
402 kmem_free(vals, vsize);
403 return (set_errno(EINVAL));
404 }
405 switch (cmd) {
406 /* Set ownership and permissions. */
407 case IPC_SET:
408
409 if (error = ipcperm_set(sem_svc, cr, &sp->sem_perm,
410 &STRUCT_BUF(sid)->sem_perm, mdl)) {
411 mutex_exit(lock);
412 return (set_errno(error));
413 }
414 sp->sem_ctime = gethrestime_sec();
415 mutex_exit(lock);
416 return (0);
417
418 /* Get semaphore data structure. */
419 case IPC_STAT:
420
421 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) {
422 mutex_exit(lock);
423 return (set_errno(error));
424 }
425
426 ipcperm_stat(&STRUCT_BUF(sid)->sem_perm, &sp->sem_perm, mdl);
427 STRUCT_FSETP(sid, sem_base, NULL); /* kernel addr */
428 STRUCT_FSET(sid, sem_nsems, sp->sem_nsems);
429 STRUCT_FSET(sid, sem_otime, sp->sem_otime);
430 STRUCT_FSET(sid, sem_ctime, sp->sem_ctime);
431 STRUCT_FSET(sid, sem_binary, sp->sem_binary);
432 mutex_exit(lock);
433
434 if (copyout(STRUCT_BUF(sid), (void *)arg, STRUCT_SIZE(sid)))
435 return (set_errno(EFAULT));
436 return (0);
437
438 case IPC_SET64:
439
440 if (error = ipcperm_set64(sem_svc, cr, &sp->sem_perm,
441 &ds64.semx_perm)) {
442 mutex_exit(lock);
443 return (set_errno(error));
444 }
445 sp->sem_ctime = gethrestime_sec();
446 mutex_exit(lock);
447 return (0);
448
449 case IPC_STAT64:
450
451 ipcperm_stat64(&ds64.semx_perm, &sp->sem_perm);
452 ds64.semx_nsems = sp->sem_nsems;
453 ds64.semx_otime = sp->sem_otime;
454 ds64.semx_ctime = sp->sem_ctime;
455
456 mutex_exit(lock);
457 if (copyout(&ds64, (void *)arg, sizeof (struct semid_ds64)))
458 return (set_errno(EFAULT));
459
460 return (0);
461
462 /* Get # of processes sleeping for greater semval. */
463 case GETNCNT:
464 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) {
465 mutex_exit(lock);
466 return (set_errno(error));
467 }
468 if (semnum >= sp->sem_nsems) {
469 mutex_exit(lock);
470 return (set_errno(EINVAL));
471 }
472 retval = sp->sem_base[semnum].semncnt;
473 mutex_exit(lock);
474 return (retval);
475
476 /* Get pid of last process to operate on semaphore. */
477 case GETPID:
478 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) {
479 mutex_exit(lock);
480 return (set_errno(error));
481 }
482 if (semnum >= sp->sem_nsems) {
483 mutex_exit(lock);
484 return (set_errno(EINVAL));
485 }
486 retval = sp->sem_base[semnum].sempid;
487 mutex_exit(lock);
488 return (retval);
489
490 /* Get semval of one semaphore. */
491 case GETVAL:
492 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) {
493 mutex_exit(lock);
494 return (set_errno(error));
495 }
496 if (semnum >= sp->sem_nsems) {
497 mutex_exit(lock);
498 return (set_errno(EINVAL));
499 }
500 retval = sp->sem_base[semnum].semval;
501 mutex_exit(lock);
502 return (retval);
503
504 /* Get all semvals in set. */
505 case GETALL:
506 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) {
507 mutex_exit(lock);
508 return (set_errno(error));
509 }
510
511 /* allocate space to hold all semaphore values */
512 vsize = sp->sem_nsems * sizeof (*vals);
513 vals = vp = kmem_alloc(vsize, KM_SLEEP);
514
515 for (i = sp->sem_nsems, p = sp->sem_base; i--; p++, vp++)
516 bcopy(&p->semval, vp, sizeof (p->semval));
517
518 mutex_exit(lock);
519
520 if (copyout((void *)vals, (void *)arg, vsize)) {
521 kmem_free(vals, vsize);
522 return (set_errno(EFAULT));
523 }
524
525 kmem_free(vals, vsize);
526 return (0);
527
528 /* Get # of processes sleeping for semval to become zero. */
529 case GETZCNT:
530 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) {
531 mutex_exit(lock);
532 return (set_errno(error));
533 }
534 if (semnum >= sp->sem_nsems) {
535 mutex_exit(lock);
536 return (set_errno(EINVAL));
537 }
538 retval = sp->sem_base[semnum].semzcnt;
539 mutex_exit(lock);
540 return (retval);
541
542 /* Set semval of one semaphore. */
543 case SETVAL:
544 if (error = ipcperm_access(&sp->sem_perm, SEM_A, cr)) {
545 mutex_exit(lock);
546 return (set_errno(error));
547 }
548 if (semnum >= sp->sem_nsems) {
549 mutex_exit(lock);
550 return (set_errno(EINVAL));
551 }
552 if ((uint_t)arg > USHRT_MAX) {
553 mutex_exit(lock);
554 return (set_errno(ERANGE));
555 }
556 p = &sp->sem_base[semnum];
557 if ((p->semval = (ushort_t)arg) != 0) {
558 if (p->semncnt) {
559 cv_broadcast(&p->semncnt_cv);
560 }
561 } else if (p->semzcnt) {
562 cv_broadcast(&p->semzcnt_cv);
563 }
564 p->sempid = curproc->p_pid;
565 sem_undo_clear(sp, (ushort_t)semnum, (ushort_t)semnum);
566 mutex_exit(lock);
567 return (0);
568
569 /* Set semvals of all semaphores in set. */
570 case SETALL:
571 /* Check if semaphore set has been deleted and reallocated. */
572 if (sp->sem_nsems * sizeof (*vals) != vsize) {
573 error = set_errno(EINVAL);
574 goto seterr;
575 }
576 if (error = ipcperm_access(&sp->sem_perm, SEM_A, cr)) {
577 error = set_errno(error);
578 goto seterr;
579 }
580 sem_undo_clear(sp, 0, sp->sem_nsems - 1);
581 for (i = 0, p = sp->sem_base; i < sp->sem_nsems;
582 (p++)->sempid = curproc->p_pid) {
583 if ((p->semval = vals[i++]) != 0) {
584 if (p->semncnt) {
585 cv_broadcast(&p->semncnt_cv);
586 }
587 } else if (p->semzcnt) {
588 cv_broadcast(&p->semzcnt_cv);
589 }
590 }
591 seterr:
592 mutex_exit(lock);
593 kmem_free(vals, vsize);
594 return (error);
595
596 default:
597 mutex_exit(lock);
598 return (set_errno(EINVAL));
599 }
600
601 /* NOTREACHED */
602 }
603
604 /*
605 * semexit - Called by exit() to clean up on process exit.
606 */
607 void
608 semexit(proc_t *pp)
609 {
610 avl_tree_t *tree;
611 struct sem_undo *undo;
612 void *cookie = NULL;
613
614 mutex_enter(&pp->p_lock);
615 tree = pp->p_semacct;
616 pp->p_semacct = NULL;
617 mutex_exit(&pp->p_lock);
618
619 while (undo = avl_destroy_nodes(tree, &cookie)) {
620 ksemid_t *sp = undo->un_sp;
621 size_t size = SEM_UNDOSZ(sp->sem_nsems);
622 int i;
623
624 (void) ipc_lock(sem_svc, sp->sem_perm.ipc_id);
625 if (!IPC_FREE(&sp->sem_perm)) {
626 for (i = 0; i < sp->sem_nsems; i++) {
627 int adj = undo->un_aoe[i];
628 if (adj) {
629 struct sem *semp = &sp->sem_base[i];
630 int v = (int)semp->semval + adj;
631
632 if (v < 0 || v > USHRT_MAX)
633 continue;
634 semp->semval = (ushort_t)v;
635 if (v == 0 && semp->semzcnt)
636 cv_broadcast(&semp->semzcnt_cv);
637 if (adj > 0 && semp->semncnt)
638 cv_broadcast(&semp->semncnt_cv);
639 }
640 }
641 list_remove(&sp->sem_undos, undo);
642 }
643 ipc_rele(sem_svc, (kipc_perm_t *)sp);
644 kmem_free(undo, size);
645 }
646
647 avl_destroy(tree);
648 kmem_free(tree, sizeof (avl_tree_t));
649 }
650
651 /*
652 * Remove all semaphores associated with a given zone. Called by
653 * zone_shutdown when the zone is halted.
654 */
655 /*ARGSUSED1*/
656 static void
657 sem_remove_zone(zoneid_t zoneid, void *arg)
658 {
659 ipc_remove_zone(sem_svc, zoneid);
660 }
661
662 /*
663 * semget - Semget system call.
664 */
665 static int
666 semget(key_t key, int nsems, int semflg)
667 {
668 ksemid_t *sp;
669 kmutex_t *lock;
670 int id, error;
671 proc_t *pp = curproc;
672
673 top:
674 if (error = ipc_get(sem_svc, key, semflg, (kipc_perm_t **)&sp, &lock))
675 return (set_errno(error));
676
677 if (!IPC_FREE(&sp->sem_perm)) {
678 /*
679 * A semaphore with the requested key exists.
680 */
681 if (!((nsems >= 0) && (nsems <= sp->sem_nsems))) {
682 mutex_exit(lock);
683 return (set_errno(EINVAL));
684 }
685 } else {
686 /*
687 * This is a new semaphore set. Finish initialization.
688 */
689 if (nsems <= 0 || (rctl_test(rc_process_semmsl, pp->p_rctls, pp,
690 nsems, RCA_SAFE) & RCT_DENY)) {
691 mutex_exit(lock);
692 mutex_exit(&pp->p_lock);
693 ipc_cleanup(sem_svc, (kipc_perm_t *)sp);
694 return (set_errno(EINVAL));
695 }
696 mutex_exit(lock);
697 mutex_exit(&pp->p_lock);
698
699 /*
700 * We round the allocation up to coherency granularity
701 * so that multiple semaphore allocations won't result
702 * in the false sharing of their sem structures.
703 */
704 sp->sem_base =
705 kmem_zalloc(P2ROUNDUP(nsems * sizeof (struct sem), 64),
706 KM_SLEEP);
707 sp->sem_binary = (nsems == 1);
708 sp->sem_nsems = (ushort_t)nsems;
709 sp->sem_ctime = gethrestime_sec();
710 sp->sem_otime = 0;
711 list_create(&sp->sem_undos, sizeof (struct sem_undo),
712 offsetof(struct sem_undo, un_list));
713
714 if (error = ipc_commit_begin(sem_svc, key, semflg,
715 (kipc_perm_t *)sp)) {
716 if (error == EAGAIN)
717 goto top;
718 return (set_errno(error));
719 }
720 sp->sem_maxops =
721 rctl_enforced_value(rc_process_semopm, pp->p_rctls, pp);
722 if (rctl_test(rc_process_semmsl, pp->p_rctls, pp, nsems,
723 RCA_SAFE) & RCT_DENY) {
724 ipc_cleanup(sem_svc, (kipc_perm_t *)sp);
725 return (set_errno(EINVAL));
726 }
727 lock = ipc_commit_end(sem_svc, &sp->sem_perm);
728 }
729
730 if (AU_AUDITING())
731 audit_ipcget(AT_IPC_SEM, (void *)sp);
732
733 id = sp->sem_perm.ipc_id;
734 mutex_exit(lock);
735 return (id);
736 }
737
738 /*
739 * semids system call.
740 */
741 static int
742 semids(int *buf, uint_t nids, uint_t *pnids)
743 {
744 int error;
745
746 if (error = ipc_ids(sem_svc, buf, nids, pnids))
747 return (set_errno(error));
748
749 return (0);
750 }
751
752
753 /*
754 * Helper function for semop - copies in the provided timespec and
755 * computes the absolute future time after which we must return.
756 */
757 static int
758 compute_timeout(timespec_t **tsp, timespec_t *ts, timespec_t *now,
759 timespec_t *timeout)
760 {
761 model_t datamodel = get_udatamodel();
762
763 if (datamodel == DATAMODEL_NATIVE) {
764 if (copyin(timeout, ts, sizeof (timespec_t)))
765 return (EFAULT);
766 } else {
767 timespec32_t ts32;
768
769 if (copyin(timeout, &ts32, sizeof (timespec32_t)))
770 return (EFAULT);
771 TIMESPEC32_TO_TIMESPEC(ts, &ts32)
772 }
773
774 if (itimerspecfix(ts))
775 return (EINVAL);
776
777 /*
778 * Convert the timespec value into absolute time.
779 */
780 timespecadd(ts, now);
781 *tsp = ts;
782
783 return (0);
784 }
785
786 /*
787 * Undo structure comparator. We sort based on ksemid_t pointer.
788 */
789 static int
790 sem_undo_compar(const void *x, const void *y)
791 {
792 struct sem_undo *undo1 = (struct sem_undo *)x;
793 struct sem_undo *undo2 = (struct sem_undo *)y;
794
795 if (undo1->un_sp < undo2->un_sp)
796 return (-1);
797 if (undo1->un_sp > undo2->un_sp)
798 return (1);
799 return (0);
800 }
801
802 /*
803 * Helper function for semop - creates an undo structure and adds it to
804 * the process's avl tree and the semaphore's list.
805 */
806 static int
807 sem_undo_alloc(proc_t *pp, ksemid_t *sp, kmutex_t **lock,
808 struct sem_undo *template, struct sem_undo **un)
809 {
810 size_t size;
811 struct sem_undo *undo;
812 avl_tree_t *tree = NULL;
813 avl_index_t where;
814
815 mutex_exit(*lock);
816
817 size = SEM_UNDOSZ(sp->sem_nsems);
818 undo = kmem_zalloc(size, KM_SLEEP);
819 undo->un_proc = pp;
820 undo->un_sp = sp;
821
822 if (pp->p_semacct == NULL)
823 tree = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
824
825 *lock = ipc_lock(sem_svc, sp->sem_perm.ipc_id);
826 if (IPC_FREE(&sp->sem_perm)) {
827 kmem_free(undo, size);
828 if (tree)
829 kmem_free(tree, sizeof (avl_tree_t));
830 return (EIDRM);
831 }
832
833 mutex_enter(&pp->p_lock);
834 if (tree) {
835 if (pp->p_semacct == NULL) {
836 avl_create(tree, sem_undo_compar,
837 sizeof (struct sem_undo),
838 offsetof(struct sem_undo, un_avl));
839 pp->p_semacct = tree;
840 } else {
841 kmem_free(tree, sizeof (avl_tree_t));
842 }
843 }
844
845 if (*un = avl_find(pp->p_semacct, template, &where)) {
846 mutex_exit(&pp->p_lock);
847 kmem_free(undo, size);
848 } else {
849 *un = undo;
850 avl_insert(pp->p_semacct, undo, where);
851 mutex_exit(&pp->p_lock);
852 list_insert_head(&sp->sem_undos, undo);
853 ipc_hold(sem_svc, (kipc_perm_t *)sp);
854 }
855
856
857 return (0);
858 }
859
860 /*
861 * semop - Semop system call.
862 */
863 static int
864 semop(int semid, struct sembuf *sops, size_t nsops, timespec_t *timeout)
865 {
866 ksemid_t *sp = NULL;
867 kmutex_t *lock;
868 struct sembuf *op; /* ptr to operation */
869 int i; /* loop control */
870 struct sem *semp; /* ptr to semaphore */
871 int error = 0;
872 struct sembuf *uops; /* ptr to copy of user ops */
873 struct sembuf x_sem; /* avoid kmem_alloc's */
874 timespec_t now, ts, *tsp = NULL;
875 int timecheck = 0;
876 int cvres, needundo, mode;
877 struct sem_undo *undo;
878 proc_t *pp = curproc;
879 int held = 0;
880
881 CPU_STATS_ADDQ(CPU, sys, sema, 1); /* bump semaphore op count */
882
883 /*
884 * To avoid the cost of copying in 'timeout' in the common
885 * case, we could only grab the time here and defer the copyin
886 * and associated computations until we are about to block.
887 *
888 * The down side to this is that we would then have to spin
889 * some goto top nonsense to avoid the copyin behind the semid
890 * lock. As a common use of timed semaphores is as an explicit
891 * blocking mechanism, this could incur a greater penalty.
892 *
893 * If we eventually decide that this would be a wise route to
894 * take, the deferrable functionality is completely contained
895 * in 'compute_timeout', and the interface is defined such that
896 * we can legally not validate 'timeout' if it is unused.
897 */
898 if (timeout != NULL) {
899 timecheck = timechanged;
900 gethrestime(&now);
901 if (error = compute_timeout(&tsp, &ts, &now, timeout))
902 return (set_errno(error));
903 }
904
905 /*
906 * Allocate space to hold the vector of semaphore ops. If
907 * there is only 1 operation we use a preallocated buffer on
908 * the stack for speed.
909 *
910 * Since we don't want to allow the user to allocate an
911 * arbitrary amount of kernel memory, we need to check against
912 * the number of operations allowed by the semaphore. We only
913 * bother doing this if the number of operations is larger than
914 * SEM_MAXUCOPS.
915 */
916 if (nsops == 1)
917 uops = &x_sem;
918 else if (nsops == 0)
919 return (0);
920 else if (nsops <= SEM_MAXUCOPS)
921 uops = kmem_alloc(nsops * sizeof (*uops), KM_SLEEP);
922
923 if (nsops > SEM_MAXUCOPS) {
924 if ((lock = ipc_lookup(sem_svc, semid,
925 (kipc_perm_t **)&sp)) == NULL)
926 return (set_errno(EFAULT));
927
928 if (nsops > sp->sem_maxops) {
929 mutex_exit(lock);
930 return (set_errno(E2BIG));
931 }
932 held = 1;
933 ipc_hold(sem_svc, (kipc_perm_t *)sp);
934 mutex_exit(lock);
935
936 uops = kmem_alloc(nsops * sizeof (*uops), KM_SLEEP);
937 if (copyin(sops, uops, nsops * sizeof (*op))) {
938 error = EFAULT;
939 (void) ipc_lock(sem_svc, sp->sem_perm.ipc_id);
940 goto semoperr;
941 }
942
943 lock = ipc_lock(sem_svc, sp->sem_perm.ipc_id);
944 if (IPC_FREE(&sp->sem_perm)) {
945 error = EIDRM;
946 goto semoperr;
947 }
948 } else {
949 /*
950 * This could be interleaved with the above code, but
951 * keeping them separate improves readability.
952 */
953 if (copyin(sops, uops, nsops * sizeof (*op))) {
954 error = EFAULT;
955 goto semoperr_unlocked;
956 }
957
958 if ((lock = ipc_lookup(sem_svc, semid,
959 (kipc_perm_t **)&sp)) == NULL) {
960 error = EINVAL;
961 goto semoperr_unlocked;
962 }
963
964 if (nsops > sp->sem_maxops) {
965 error = E2BIG;
966 goto semoperr;
967 }
968 }
969
970 /*
971 * Scan all operations. Verify that sem #s are in range and
972 * this process is allowed the requested operations. If any
973 * operations are marked SEM_UNDO, find (or allocate) the undo
974 * structure for this process and semaphore.
975 */
976 needundo = 0;
977 mode = 0;
978 for (i = 0, op = uops; i++ < nsops; op++) {
979 mode |= op->sem_op ? SEM_A : SEM_R;
980 if (op->sem_num >= sp->sem_nsems) {
981 error = EFBIG;
982 goto semoperr;
983 }
984 if ((op->sem_flg & SEM_UNDO) && op->sem_op)
985 needundo = 1;
986 }
987 if (error = ipcperm_access(&sp->sem_perm, mode, CRED()))
988 goto semoperr;
989
990 if (needundo) {
991 struct sem_undo template;
992
993 template.un_sp = sp;
994 mutex_enter(&pp->p_lock);
995 if (pp->p_semacct)
996 undo = avl_find(pp->p_semacct, &template, NULL);
997 else
998 undo = NULL;
999 mutex_exit(&pp->p_lock);
1000 if (undo == NULL) {
1001 if (!held) {
1002 held = 1;
1003 ipc_hold(sem_svc, (kipc_perm_t *)sp);
1004 }
1005 if (error = sem_undo_alloc(pp, sp, &lock, &template,
1006 &undo))
1007 goto semoperr;
1008
1009 /* sem_undo_alloc unlocks the semaphore */
1010 if (error = ipcperm_access(&sp->sem_perm, mode, CRED()))
1011 goto semoperr;
1012 }
1013 }
1014
1015 check:
1016 /*
1017 * Loop waiting for the operations to be satisfied atomically.
1018 * Actually, do the operations and undo them if a wait is needed
1019 * or an error is detected.
1020 */
1021 for (i = 0; i < nsops; i++) {
1022 op = &uops[i];
1023 semp = &sp->sem_base[op->sem_num];
1024
1025 /*
1026 * Raise the semaphore (i.e. sema_v)
1027 */
1028 if (op->sem_op > 0) {
1029 if (op->sem_op + (int)semp->semval > USHRT_MAX ||
1030 ((op->sem_flg & SEM_UNDO) &&
1031 (error = sem_undo_add(op->sem_op, op->sem_num,
1032 undo)))) {
1033 if (i)
1034 sem_rollback(sp, uops, i, undo);
1035 if (error == 0)
1036 error = ERANGE;
1037 goto semoperr;
1038 }
1039 semp->semval += op->sem_op;
1040 /*
1041 * If we are only incrementing the semaphore value
1042 * by one on a binary semaphore, we can cv_signal.
1043 */
1044 if (semp->semncnt) {
1045 if (op->sem_op == 1 && sp->sem_binary)
1046 cv_signal(&semp->semncnt_cv);
1047 else
1048 cv_broadcast(&semp->semncnt_cv);
1049 }
1050 if (semp->semzcnt && !semp->semval)
1051 cv_broadcast(&semp->semzcnt_cv);
1052 continue;
1053 }
1054
1055 /*
1056 * Lower the semaphore (i.e. sema_p)
1057 */
1058 if (op->sem_op < 0) {
1059 if (semp->semval >= (unsigned)(-op->sem_op)) {
1060 if ((op->sem_flg & SEM_UNDO) &&
1061 (error = sem_undo_add(op->sem_op,
1062 op->sem_num, undo))) {
1063 if (i)
1064 sem_rollback(sp, uops, i, undo);
1065 goto semoperr;
1066 }
1067 semp->semval += op->sem_op;
1068 if (semp->semzcnt && !semp->semval)
1069 cv_broadcast(&semp->semzcnt_cv);
1070 continue;
1071 }
1072 if (i)
1073 sem_rollback(sp, uops, i, undo);
1074 if (op->sem_flg & IPC_NOWAIT) {
1075 error = EAGAIN;
1076 goto semoperr;
1077 }
1078
1079 /*
1080 * Mark the semaphore set as not a binary type
1081 * if we are decrementing the value by more than 1.
1082 *
1083 * V operations will resort to cv_broadcast
1084 * for this set because there are too many weird
1085 * cases that have to be caught.
1086 */
1087 if (op->sem_op < -1)
1088 sp->sem_binary = 0;
1089 if (!held) {
1090 held = 1;
1091 ipc_hold(sem_svc, (kipc_perm_t *)sp);
1092 }
1093 semp->semncnt++;
1094 cvres = cv_waituntil_sig(&semp->semncnt_cv, lock,
1095 tsp, timecheck);
1096 lock = ipc_relock(sem_svc, sp->sem_perm.ipc_id, lock);
1097
1098 if (!IPC_FREE(&sp->sem_perm)) {
1099 ASSERT(semp->semncnt != 0);
1100 semp->semncnt--;
1101 if (cvres > 0) /* normal wakeup */
1102 goto check;
1103 }
1104
1105 /* EINTR or EAGAIN overrides EIDRM */
1106 if (cvres == 0)
1107 error = EINTR;
1108 else if (cvres < 0)
1109 error = EAGAIN;
1110 else
1111 error = EIDRM;
1112 goto semoperr;
1113 }
1114
1115 /*
1116 * Wait for zero value
1117 */
1118 if (semp->semval) {
1119 if (i)
1120 sem_rollback(sp, uops, i, undo);
1121 if (op->sem_flg & IPC_NOWAIT) {
1122 error = EAGAIN;
1123 goto semoperr;
1124 }
1125
1126 if (!held) {
1127 held = 1;
1128 ipc_hold(sem_svc, (kipc_perm_t *)sp);
1129 }
1130 semp->semzcnt++;
1131 cvres = cv_waituntil_sig(&semp->semzcnt_cv, lock,
1132 tsp, timecheck);
1133 lock = ipc_relock(sem_svc, sp->sem_perm.ipc_id, lock);
1134
1135 /*
1136 * Don't touch semp if the semaphores have been removed.
1137 */
1138 if (!IPC_FREE(&sp->sem_perm)) {
1139 ASSERT(semp->semzcnt != 0);
1140 semp->semzcnt--;
1141 if (cvres > 0) /* normal wakeup */
1142 goto check;
1143 }
1144
1145 /* EINTR or EAGAIN overrides EIDRM */
1146 if (cvres == 0)
1147 error = EINTR;
1148 else if (cvres < 0)
1149 error = EAGAIN;
1150 else
1151 error = EIDRM;
1152 goto semoperr;
1153 }
1154 }
1155
1156 /* All operations succeeded. Update sempid for accessed semaphores. */
1157 for (i = 0, op = uops; i++ < nsops;
1158 sp->sem_base[(op++)->sem_num].sempid = pp->p_pid)
1159 ;
1160 sp->sem_otime = gethrestime_sec();
1161 if (held)
1162 ipc_rele(sem_svc, (kipc_perm_t *)sp);
1163 else
1164 mutex_exit(lock);
1165
1166 /* Before leaving, deallocate the buffer that held the user semops */
1167 if (nsops != 1)
1168 kmem_free(uops, sizeof (*uops) * nsops);
1169 return (0);
1170
1171 /*
1172 * Error return labels
1173 */
1174 semoperr:
1175 if (held)
1176 ipc_rele(sem_svc, (kipc_perm_t *)sp);
1177 else
1178 mutex_exit(lock);
1179
1180 semoperr_unlocked:
1181
1182 /* Before leaving, deallocate the buffer that held the user semops */
1183 if (nsops != 1)
1184 kmem_free(uops, sizeof (*uops) * nsops);
1185 return (set_errno(error));
1186 }
1187
1188 /*
1189 * semsys - System entry point for semctl, semget, and semop system calls.
1190 */
1191 static int
1192 semsys(int opcode, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4)
1193 {
1194 int error;
1195
1196 switch (opcode) {
1197 case SEMCTL:
1198 error = semctl((int)a1, (uint_t)a2, (int)a3, a4);
1199 break;
1200 case SEMGET:
1201 error = semget((key_t)a1, (int)a2, (int)a3);
1202 break;
1203 case SEMOP:
1204 error = semop((int)a1, (struct sembuf *)a2, (size_t)a3, 0);
1205 break;
1206 case SEMIDS:
1207 error = semids((int *)a1, (uint_t)a2, (uint_t *)a3);
1208 break;
1209 case SEMTIMEDOP:
1210 error = semop((int)a1, (struct sembuf *)a2, (size_t)a3,
1211 (timespec_t *)a4);
1212 break;
1213 default:
1214 error = set_errno(EINVAL);
1215 break;
1216 }
1217 return (error);
1218 }