1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T             */
  26 /*      All Rights Reserved                                     */
  27 
  28 
  29 /*
  30  * Common Inter-Process Communication routines.
  31  *
  32  * Overview
  33  * --------
  34  *
  35  * The System V inter-process communication (IPC) facilities provide
  36  * three services, message queues, semaphore arrays, and shared memory
  37  * segments, which are mananged using filesystem-like namespaces.
  38  * Unlike a filesystem, these namespaces aren't mounted and accessible
  39  * via a path -- a special API is used to interact with the different
  40  * facilities (nothing precludes a VFS-based interface, but the
  41  * standards require the special APIs).  Furthermore, these special
  42  * APIs don't use file descriptors, nor do they have an equivalent.
  43  * This means that every operation which acts on an object needs to
  44  * perform the quivalent of a lookup, which in turn means that every
  45  * operation can fail if the specified object doesn't exist in the
  46  * facility's namespace.
  47  *
  48  * Objects
  49  * -------
  50  *
  51  * Each object in a namespace has a unique ID, which is assigned by the
  52  * system and is used to identify the object when performing operations
  53  * on it.  An object can also have a key, which is selected by the user
  54  * at allocation time and is used as a primitive rendezvous mechanism.
  55  * An object without a key is said to have a "private" key.
  56  *
  57  * To perform an operation on an object given its key, one must first
  58  * perform a lookup and obtain its ID.  The ID is then used to identify
  59  * the object when performing the operation.  If the object has a
  60  * private key, the ID must be known or obtained by other means.
  61  *
  62  * Each object in the namespace has a creator uid and gid, as well as
  63  * an owner uid and gid.  Both are initialized with the ruid and rgid
  64  * of the process which created the object.  The creator or current
  65  * owner has the ability to change the owner of the object.
  66  *
  67  * Each object in the namespace has a set of file-like permissions,
  68  * which, in conjunction with the creator and owner uid and gid,
  69  * control read and write access to the object (execute is ignored).
  70  *
  71  * Each object also has a creator project and zone, which are used to
  72  * account for its resource usage.
  73  *
  74  * Operations
  75  * ----------
  76  *
  77  * There are five operations which all three facilities have in
  78  * common: GET, SET, STAT, RMID, and IDS.
  79  *
  80  * GET, like open, is used to allocate a new object or obtain an
  81  * existing one (using its key).  It takes a key, a set of flags and
  82  * mode bits, and optionally facility-specific arguments.  If the key
  83  * is IPC_PRIVATE, a new object with the requested mode bits and
  84  * facility-specific attributes is created.  If the key isn't
  85  * IPC_PRIVATE, the GET will attempt to look up the specified key and
  86  * either return that or create a new key depending on the state of the
  87  * IPC_CREAT and IPC_EXCL flags, much like open.  If GET needs to
  88  * allocate an object, it can fail if there is insufficient space in
  89  * the namespace (the maximum number of ids for the facility has been
  90  * exceeded) or if the facility-specific initialization fails.  If GET
  91  * finds an object it can return, it can still fail if that object's
  92  * permissions or facility-specific attributes are less than those
  93  * requested.
  94  *
  95  * SET is used to adjust facility-specific parameters of an object, in
  96  * addition to the owner uid and gid, and mode bits.  It can fail if
  97  * the caller isn't the creator or owner.
  98  *
  99  * STAT is used to obtain information about an object including the
 100  * general attributes object described as well as facility-specific
 101  * information.  It can fail if the caller doesn't have read
 102  * permission.
 103  *
 104  * RMID removes an object from the namespace.  Subsequent operations
 105  * using the object's ID or key will fail (until another object is
 106  * created with the same key or ID).  Since an RMID may be performed
 107  * asynchronously with other operations, it is possible that other
 108  * threads and/or processes will have references to the object.  While
 109  * a facility may have actions which need to be performed at RMID time,
 110  * only when all references are dropped can the object be destroyed.
 111  * RMID will fail if the caller isn't the creator or owner.
 112  *
 113  * IDS obtains a list of all IDs in a facility's namespace.  There are
 114  * no facility-specific behaviors of IDS.
 115  *
 116  * Design
 117  * ------
 118  *
 119  * Because some IPC facilities provide services whose operations must
 120  * scale, a mechanism which allows fast, concurrent access to
 121  * individual objects is needed.  Of primary importance is object
 122  * lookup based on ID (SET, STAT, others).  Allocation (GET),
 123  * deallocation (RMID), ID enumeration (IDS), and key lookups (GET) are
 124  * lesser concerns, but should be implemented in such a way that ID
 125  * lookup isn't affected (at least not in the common case).
 126  *
 127  * Starting from the bottom up, each object is represented by a
 128  * structure, the first member of which must be a kipc_perm_t.  The
 129  * kipc_perm_t contains the information described above in "Objects", a
 130  * reference count (since the object may continue to exist after it has
 131  * been removed from the namespace), as well as some additional
 132  * metadata used to manage data structure membership.  These objects
 133  * are dynamically allocated.
 134  *
 135  * Above the objects is a power-of-two sized table of ID slots.  Each
 136  * slot contains a pointer to an object, a sequence number, and a
 137  * lock.  An object's ID is a function of its slot's index in the table
 138  * and its slot's sequence number.  Every time a slot is released (via
 139  * RMID) its sequence number is increased.  Strictly speaking, the
 140  * sequence number is unnecessary.  However, checking the sequence
 141  * number after a lookup provides a certain degree of robustness
 142  * against the use of stale IDs (useful since nothing else does).  When
 143  * the table fills up, it is resized (see Locking, below).
 144  *
 145  * Of an ID's 31 bits (an ID is, as defined by the standards, a signed
 146  * int) the top IPC_SEQ_BITS are used for the sequence number with the
 147  * remainder holding the index into the table.  The size of the table
 148  * is therefore bounded at 2 ^ (31 - IPC_SEQ_BITS) slots.
 149  *
 150  * Managing this table is the ipc_service structure.  It contains a
 151  * pointer to the dynamically allocated ID table, a namespace-global
 152  * lock, an id_space for managing the free space in the table, and
 153  * sundry other metadata necessary for the maintenance of the
 154  * namespace.  An AVL tree of all keyed objects in the table (sorted by
 155  * key) is used for key lookups.  An unordered doubly linked list of
 156  * all objects in the namespace (keyed or not) is maintained to
 157  * facilitate ID enumeration.
 158  *
 159  * To help visualize these relationships, here's a picture of a
 160  * namespace with a table of size 8 containing three objects
 161  * (IPC_SEQ_BITS = 28):
 162  *
 163  *
 164  * +-ipc_service_t--+
 165  * | table          *---\
 166  * | keys           *---+----------------------\
 167  * | all ids        *--\|                      |
 168  * |                |  ||                      |
 169  * +----------------+  ||                      |
 170  *                     ||                      |
 171  * /-------------------/|                      |
 172  * |    /---------------/                      |
 173  * |    |                                      |
 174  * |    v                                      |
 175  * |  +-0------+-1------+-2------+-3------+-4--+---+-5------+-6------+-7------+
 176  * |  | Seq=3  |        |        | Seq=1  |    :   |        |        | Seq=6  |
 177  * |  |        |        |        |        |    :   |        |        |        |
 178  * |  +-*------+--------+--------+-*------+----+---+--------+--------+-*------+
 179  * |    |                          |           |                       |
 180  * |    |                      /---/           |      /----------------/
 181  * |    |                      |               |      |
 182  * |    v                      v               |      v
 183  * |  +-kipc_perm_t-+        +-kipc_perm_t-+   |    +-kipc_perm_t-+
 184  * |  | id=0x30     |        | id=0x13     |   |    | id=0x67     |
 185  * |  | key=0xfeed  |        | key=0xbeef  |   |    | key=0xcafe  |
 186  * \->| [list]      |<------>| [list]      |<------>| [list]      |
 187  * /->| [avl left]  x   /--->| [avl left]  x   \--->| [avl left]  *---\
 188  * |  | [avl right] x   |    | [avl right] x        | [avl right] *---+-\
 189  * |  |             |   |    |             |        |             |   | |
 190  * |  +-------------+   |    +-------------+        +-------------+   | |
 191  * |                    \---------------------------------------------/ |
 192  * \--------------------------------------------------------------------/
 193  *
 194  * Locking
 195  * -------
 196  *
 197  * There are three locks (or sets of locks) which are used to ensure
 198  * correctness: the slot locks, the namespace lock, and p_lock (needed
 199  * when checking resource controls).  Their ordering is
 200  *
 201  *   namespace lock -> slot lock 0 -> ... -> slot lock t -> p_lock
 202  *
 203  * Generally speaking, the namespace lock is used to protect allocation
 204  * and removal from the namespace, ID enumeration, and resizing the ID
 205  * table.  Specifically:
 206  *
 207  * - write access to all fields of the ipc_service structure
 208  * - read access to all variable fields of ipc_service except
 209  *   ipcs_tabsz (table size) and ipcs_table (the table pointer)
 210  * - read/write access to ipc_avl, ipc_list in visible objects'
 211  *   kipc_perm structures (i.e. objects which have been removed from
 212  *   the namespace don't have this restriction)
 213  * - write access to ipct_seq and ipct_data in the table entries
 214  *
 215  * A slot lock by itself is meaningless (except when resizing).  Of
 216  * greater interest conceptually is the notion of an ID lock -- a
 217  * "virtual lock" which refers to whichever slot lock an object's ID
 218  * currently hashes to.
 219  *
 220  * An ID lock protects all objects with that ID.  Normally there will
 221  * only be one such object: the one pointed to by the locked slot.
 222  * However, if an object is removed from the namespace but retains
 223  * references (e.g. an attached shared memory segment which has been
 224  * RMIDed), it continues to use the lock associated with its original
 225  * ID.  While this can result in increased contention, operations which
 226  * require taking the ID lock of removed objects are infrequent.
 227  *
 228  * Specifically, an ID lock protects the contents of an object's
 229  * structure, including the contents of the embedded kipc_perm
 230  * structure (but excluding those fields protected by the namespace
 231  * lock).  It also protects the ipct_seq and ipct_data fields in its
 232  * slot (it is really a slot lock, after all).
 233  *
 234  * Recall that the table is resizable.  To avoid requiring every ID
 235  * lookup to take a global lock, a scheme much like that employed for
 236  * file descriptors (see the comment above UF_ENTER in user.h) is
 237  * used.  Note that the sequence number and data pointer are protected
 238  * by both the namespace lock and their slot lock.  When the table is
 239  * resized, the following operations take place:
 240  *
 241  *   1) A new table is allocated.
 242  *   2) The global lock is taken.
 243  *   3) All old slots are locked, in order.
 244  *   4) The first half of the new slots are locked.
 245  *   5) All table entries are copied to the new table, and cleared from
 246  *      the old table.
 247  *   6) The ipc_service structure is updated to point to the new table.
 248  *   7) The ipc_service structure is updated with the new table size.
 249  *   8) All slot locks (old and new) are dropped.
 250  *
 251  * Because the slot locks are embedded in the table, ID lookups and
 252  * other operations which require taking an slot lock need to verify
 253  * that the lock taken wasn't part of a stale table.  This is
 254  * accomplished by checking the table size before and after
 255  * dereferencing the table pointer and taking the lock: if the size
 256  * changes, the lock must be dropped and reacquired.  It is this
 257  * additional work which distinguishes an ID lock from a slot lock.
 258  *
 259  * Because we can't guarantee that threads aren't accessing the old
 260  * tables' locks, they are never deallocated.  To prevent spurious
 261  * reports of memory leaks, a pointer to the discarded table is stored
 262  * in the new one in step 5.  (Theoretically ipcs_destroy will delete
 263  * the discarded tables, but it is only ever called from a failed _init
 264  * invocation; i.e. when there aren't any.)
 265  *
 266  * Interfaces
 267  * ----------
 268  *
 269  * The following interfaces are provided by the ipc module for use by
 270  * the individual IPC facilities:
 271  *
 272  * ipcperm_access
 273  *
 274  *   Given an object and a cred structure, determines if the requested
 275  *   access type is allowed.
 276  *
 277  * ipcperm_set, ipcperm_stat,
 278  * ipcperm_set64, ipcperm_stat64
 279  *
 280  *   Performs the common portion of an STAT or SET operation.  All
 281  *   (except stat and stat64) can fail, so they should be called before
 282  *   any facility-specific non-reversible changes are made to an
 283  *   object.  Similarly, the set operations have side effects, so they
 284  *   should only be called once the possibility of a facility-specific
 285  *   failure is eliminated.
 286  *
 287  * ipcs_create
 288  *
 289  *   Creates an IPC namespace for use by an IPC facility.
 290  *
 291  * ipcs_destroy
 292  *
 293  *   Destroys an IPC namespace.
 294  *
 295  * ipcs_lock, ipcs_unlock
 296  *
 297  *   Takes the namespace lock.  Ideally such access wouldn't be
 298  *   necessary, but there may be facility-specific data protected by
 299  *   this lock (e.g. project-wide resource consumption).
 300  *
 301  * ipc_lock
 302  *
 303  *   Takes the lock associated with an ID.  Can't fail.
 304  *
 305  * ipc_relock
 306  *
 307  *   Like ipc_lock, but takes a pointer to a held lock.  Drops the lock
 308  *   unless it is the one that would have been returned by ipc_lock.
 309  *   Used after calls to cv_wait.
 310  *
 311  * ipc_lookup
 312  *
 313  *   Performs an ID lookup, returns with the ID lock held.  Fails if
 314  *   the ID doesn't exist in the namespace.
 315  *
 316  * ipc_hold
 317  *
 318  *   Takes a reference on an object.
 319  *
 320  * ipc_rele
 321  *
 322  *   Releases a reference on an object, and drops the object's lock.
 323  *   Calls the object's destructor if last reference is being
 324  *   released.
 325  *
 326  * ipc_rele_locked
 327  *
 328  *   Releases a reference on an object.  Doesn't drop lock, and may
 329  *   only be called when there is more than one reference to the
 330  *   object.
 331  *
 332  * ipc_get, ipc_commit_begin, ipc_commit_end, ipc_cleanup
 333  *
 334  *   Components of a GET operation.  ipc_get performs a key lookup,
 335  *   allocating an object if the key isn't found (returning with the
 336  *   namespace lock and p_lock held), and returning the existing object
 337  *   if it is (with the object lock held).  ipc_get doesn't modify the
 338  *   namespace.
 339  *
 340  *   ipc_commit_begin begins the process of inserting an object
 341  *   allocated by ipc_get into the namespace, and can fail.  If
 342  *   successful, it returns with the namespace lock and p_lock held.
 343  *   ipc_commit_end completes the process of inserting an object into
 344  *   the namespace and can't fail.  The facility can call ipc_cleanup
 345  *   at any time following a successful ipc_get and before
 346  *   ipc_commit_end or a failed ipc_commit_begin to fail the
 347  *   allocation.  Pseudocode for the suggested GET implementation:
 348  *
 349  *   top:
 350  *
 351  *     ipc_get
 352  *
 353  *     if failure
 354  *       return
 355  *
 356  *     if found {
 357  *
 358  *       if object meets criteria
 359  *         unlock object and return success
 360  *       else
 361  *         unlock object and return failure
 362  *
 363  *     } else {
 364  *
 365  *       perform resource control tests
 366  *       drop namespace lock, p_lock
 367  *       if failure
 368  *         ipc_cleanup
 369  *
 370  *       perform facility-specific initialization
 371  *       if failure {
 372  *         facility-specific cleanup
 373  *         ipc_cleanup
 374  *       }
 375  *
 376  *       ( At this point the object should be destructible using the
 377  *         destructor given to ipcs_create )
 378  *
 379  *       ipc_commit_begin
 380  *       if retry
 381  *         goto top
 382  *       else if failure
 383  *         return
 384  *
 385  *       perform facility-specific resource control tests/allocations
 386  *       if failure
 387  *         ipc_cleanup
 388  *
 389  *       ipc_commit_end
 390  *       perform any infallible post-creation actions, unlock, and return
 391  *
 392  *     }
 393  *
 394  * ipc_rmid
 395  *
 396  *   Performs the common portion of an RMID operation -- looks up an ID
 397  *   removes it, and calls the a facility-specific function to do
 398  *   RMID-time cleanup on the private portions of the object.
 399  *
 400  * ipc_ids
 401  *
 402  *   Performs the common portion of an IDS operation.
 403  *
 404  */
 405 
 406 #include <sys/types.h>
 407 #include <sys/param.h>
 408 #include <sys/cred.h>
 409 #include <sys/policy.h>
 410 #include <sys/proc.h>
 411 #include <sys/user.h>
 412 #include <sys/ipc.h>
 413 #include <sys/ipc_impl.h>
 414 #include <sys/errno.h>
 415 #include <sys/systm.h>
 416 #include <sys/list.h>
 417 #include <sys/atomic.h>
 418 #include <sys/zone.h>
 419 #include <sys/task.h>
 420 #include <sys/modctl.h>
 421 
 422 #include <c2/audit.h>
 423 
 424 static struct modlmisc modlmisc = {
 425         &mod_miscops,
 426         "common ipc code",
 427 };
 428 
 429 static struct modlinkage modlinkage = {
 430         MODREV_1, (void *)&modlmisc, NULL
 431 };
 432 
 433 
 434 int
 435 _init(void)
 436 {
 437         return (mod_install(&modlinkage));
 438 }
 439 
 440 int
 441 _fini(void)
 442 {
 443         return (mod_remove(&modlinkage));
 444 }
 445 
 446 int
 447 _info(struct modinfo *modinfop)
 448 {
 449         return (mod_info(&modlinkage, modinfop));
 450 }
 451 
 452 
 453 /*
 454  * Check message, semaphore, or shared memory access permissions.
 455  *
 456  * This routine verifies the requested access permission for the current
 457  * process.  The zone ids are compared, and the appropriate bits are
 458  * checked corresponding to owner, group (including the list of
 459  * supplementary groups), or everyone.  Zero is returned on success.
 460  * On failure, the security policy is asked to check to override the
 461  * permissions check; the policy will either return 0 for access granted
 462  * or EACCES.
 463  *
 464  * Access to objects in other zones requires that the caller be in the
 465  * global zone and have the appropriate IPC_DAC_* privilege, regardless
 466  * of whether the uid or gid match those of the object.  Note that
 467  * cross-zone accesses will normally never get here since they'll
 468  * fail in ipc_lookup or ipc_get.
 469  *
 470  * The arguments must be set up as follows:
 471  *      p - Pointer to permission structure to verify
 472  *      mode - Desired access permissions
 473  */
 474 int
 475 ipcperm_access(kipc_perm_t *p, int mode, cred_t *cr)
 476 {
 477         int shifts = 0;
 478         uid_t uid = crgetuid(cr);
 479         zoneid_t zoneid = getzoneid();
 480 
 481         if (p->ipc_zoneid == zoneid) {
 482                 if (uid != p->ipc_uid && uid != p->ipc_cuid) {
 483                         shifts += 3;
 484                         if (!groupmember(p->ipc_gid, cr) &&
 485                             !groupmember(p->ipc_cgid, cr))
 486                                 shifts += 3;
 487                 }
 488 
 489                 mode &= ~(p->ipc_mode << shifts);
 490 
 491                 if (mode == 0)
 492                         return (0);
 493         } else if (zoneid != GLOBAL_ZONEID)
 494                 return (EACCES);
 495 
 496         return (secpolicy_ipc_access(cr, p, mode));
 497 }
 498 
 499 /*
 500  * There are two versions of the ipcperm_set/stat functions:
 501  *   ipcperm_???        - for use with IPC_SET/STAT
 502  *   ipcperm_???_64     - for use with IPC_SET64/STAT64
 503  *
 504  * These functions encapsulate the common portions (copying, permission
 505  * checks, and auditing) of the set/stat operations.  All, except for
 506  * stat and stat_64 which are void, return 0 on success or a non-zero
 507  * errno value on error.
 508  */
 509 
 510 int
 511 ipcperm_set(ipc_service_t *service, struct cred *cr,
 512     kipc_perm_t *kperm, struct ipc_perm *perm, model_t model)
 513 {
 514         STRUCT_HANDLE(ipc_perm, lperm);
 515         uid_t uid;
 516         gid_t gid;
 517         mode_t mode;
 518         zone_t *zone;
 519 
 520         ASSERT(IPC_LOCKED(service, kperm));
 521 
 522         STRUCT_SET_HANDLE(lperm, model, perm);
 523         uid = STRUCT_FGET(lperm, uid);
 524         gid = STRUCT_FGET(lperm, gid);
 525         mode = STRUCT_FGET(lperm, mode);
 526 
 527         if (secpolicy_ipc_owner(cr, kperm) != 0)
 528                 return (EPERM);
 529 
 530         zone = crgetzone(cr);
 531         if (!VALID_UID(uid, zone) || !VALID_GID(gid, zone))
 532                 return (EINVAL);
 533 
 534         kperm->ipc_uid = uid;
 535         kperm->ipc_gid = gid;
 536         kperm->ipc_mode = (mode & 0777) | (kperm->ipc_mode & ~0777);
 537 
 538         if (AU_AUDITING())
 539                 audit_ipcget(service->ipcs_atype, kperm);
 540 
 541         return (0);
 542 }
 543 
 544 void
 545 ipcperm_stat(struct ipc_perm *perm, kipc_perm_t *kperm, model_t model)
 546 {
 547         STRUCT_HANDLE(ipc_perm, lperm);
 548 
 549         STRUCT_SET_HANDLE(lperm, model, perm);
 550         STRUCT_FSET(lperm, uid, kperm->ipc_uid);
 551         STRUCT_FSET(lperm, gid, kperm->ipc_gid);
 552         STRUCT_FSET(lperm, cuid, kperm->ipc_cuid);
 553         STRUCT_FSET(lperm, cgid, kperm->ipc_cgid);
 554         STRUCT_FSET(lperm, mode, kperm->ipc_mode);
 555         STRUCT_FSET(lperm, seq, 0);
 556         STRUCT_FSET(lperm, key, kperm->ipc_key);
 557 }
 558 
 559 int
 560 ipcperm_set64(ipc_service_t *service, struct cred *cr,
 561     kipc_perm_t *kperm, ipc_perm64_t *perm64)
 562 {
 563         zone_t *zone;
 564 
 565         ASSERT(IPC_LOCKED(service, kperm));
 566 
 567         if (secpolicy_ipc_owner(cr, kperm) != 0)
 568                 return (EPERM);
 569 
 570         zone = crgetzone(cr);
 571         if (!VALID_UID(perm64->ipcx_uid, zone) ||
 572             !VALID_GID(perm64->ipcx_gid, zone))
 573                 return (EINVAL);
 574 
 575         kperm->ipc_uid = perm64->ipcx_uid;
 576         kperm->ipc_gid = perm64->ipcx_gid;
 577         kperm->ipc_mode = (perm64->ipcx_mode & 0777) |
 578             (kperm->ipc_mode & ~0777);
 579 
 580         if (AU_AUDITING())
 581                 audit_ipcget(service->ipcs_atype, kperm);
 582 
 583         return (0);
 584 }
 585 
 586 void
 587 ipcperm_stat64(ipc_perm64_t *perm64, kipc_perm_t *kperm)
 588 {
 589         perm64->ipcx_uid = kperm->ipc_uid;
 590         perm64->ipcx_gid = kperm->ipc_gid;
 591         perm64->ipcx_cuid = kperm->ipc_cuid;
 592         perm64->ipcx_cgid = kperm->ipc_cgid;
 593         perm64->ipcx_mode = kperm->ipc_mode;
 594         perm64->ipcx_key = kperm->ipc_key;
 595         perm64->ipcx_projid = kperm->ipc_proj->kpj_id;
 596         perm64->ipcx_zoneid = kperm->ipc_zoneid;
 597 }
 598 
 599 
 600 /*
 601  * ipc key comparator.
 602  */
 603 static int
 604 ipc_key_compar(const void *a, const void *b)
 605 {
 606         kipc_perm_t *aperm = (kipc_perm_t *)a;
 607         kipc_perm_t *bperm = (kipc_perm_t *)b;
 608         int ak = aperm->ipc_key;
 609         int bk = bperm->ipc_key;
 610         zoneid_t az;
 611         zoneid_t bz;
 612 
 613         ASSERT(ak != IPC_PRIVATE);
 614         ASSERT(bk != IPC_PRIVATE);
 615 
 616         /*
 617          * Compare key first, then zoneid.  This optimizes performance for
 618          * systems with only one zone, since the zone checks will only be
 619          * made when the keys match.
 620          */
 621         if (ak < bk)
 622                 return (-1);
 623         if (ak > bk)
 624                 return (1);
 625 
 626         /* keys match */
 627         az = aperm->ipc_zoneid;
 628         bz = bperm->ipc_zoneid;
 629         if (az < bz)
 630                 return (-1);
 631         if (az > bz)
 632                 return (1);
 633         return (0);
 634 }
 635 
 636 /*
 637  * Create an ipc service.
 638  */
 639 ipc_service_t *
 640 ipcs_create(const char *name, rctl_hndl_t proj_rctl, rctl_hndl_t zone_rctl,
 641     size_t size, ipc_func_t *dtor, ipc_func_t *rmid, int audit_type,
 642     size_t rctl_offset)
 643 {
 644         ipc_service_t *result;
 645 
 646         result = kmem_alloc(sizeof (ipc_service_t), KM_SLEEP);
 647 
 648         mutex_init(&result->ipcs_lock, NULL, MUTEX_ADAPTIVE, NULL);
 649         result->ipcs_count = 0;
 650         avl_create(&result->ipcs_keys, ipc_key_compar, size, 0);
 651         result->ipcs_tabsz = IPC_IDS_MIN;
 652         result->ipcs_table =
 653             kmem_zalloc(IPC_IDS_MIN * sizeof (ipc_slot_t), KM_SLEEP);
 654         result->ipcs_ssize = size;
 655         result->ipcs_ids = id_space_create(name, 0, IPC_IDS_MIN);
 656         result->ipcs_dtor = dtor;
 657         result->ipcs_rmid = rmid;
 658         result->ipcs_proj_rctl = proj_rctl;
 659         result->ipcs_zone_rctl = zone_rctl;
 660         result->ipcs_atype = audit_type;
 661         ASSERT(rctl_offset < sizeof (ipc_rqty_t));
 662         result->ipcs_rctlofs = rctl_offset;
 663         list_create(&result->ipcs_usedids, sizeof (kipc_perm_t),
 664             offsetof(kipc_perm_t, ipc_list));
 665 
 666         return (result);
 667 }
 668 
 669 /*
 670  * Destroy an ipc service.
 671  */
 672 void
 673 ipcs_destroy(ipc_service_t *service)
 674 {
 675         ipc_slot_t *slot, *next;
 676 
 677         mutex_enter(&service->ipcs_lock);
 678 
 679         ASSERT(service->ipcs_count == 0);
 680         avl_destroy(&service->ipcs_keys);
 681         list_destroy(&service->ipcs_usedids);
 682         id_space_destroy(service->ipcs_ids);
 683 
 684         for (slot = service->ipcs_table; slot; slot = next) {
 685                 next = slot[0].ipct_chain;
 686                 kmem_free(slot, service->ipcs_tabsz * sizeof (ipc_slot_t));
 687                 service->ipcs_tabsz >>= 1;
 688         }
 689 
 690         mutex_destroy(&service->ipcs_lock);
 691         kmem_free(service, sizeof (ipc_service_t));
 692 }
 693 
 694 /*
 695  * Takes the service lock.
 696  */
 697 void
 698 ipcs_lock(ipc_service_t *service)
 699 {
 700         mutex_enter(&service->ipcs_lock);
 701 }
 702 
 703 /*
 704  * Releases the service lock.
 705  */
 706 void
 707 ipcs_unlock(ipc_service_t *service)
 708 {
 709         mutex_exit(&service->ipcs_lock);
 710 }
 711 
 712 
 713 /*
 714  * Locks the specified ID.  Returns the ID's ID table index.
 715  */
 716 static int
 717 ipc_lock_internal(ipc_service_t *service, uint_t id)
 718 {
 719         uint_t  tabsz;
 720         uint_t  index;
 721         kmutex_t *mutex;
 722 
 723         for (;;) {
 724                 tabsz = service->ipcs_tabsz;
 725                 membar_consumer();
 726                 index = id & (tabsz - 1);
 727                 mutex = &service->ipcs_table[index].ipct_lock;
 728                 mutex_enter(mutex);
 729                 if (tabsz == service->ipcs_tabsz)
 730                         break;
 731                 mutex_exit(mutex);
 732         }
 733 
 734         return (index);
 735 }
 736 
 737 /*
 738  * Locks the specified ID.  Returns a pointer to the ID's lock.
 739  */
 740 kmutex_t *
 741 ipc_lock(ipc_service_t *service, int id)
 742 {
 743         uint_t index;
 744 
 745         /*
 746          * These assertions don't reflect requirements of the code
 747          * which follows, but they should never fail nonetheless.
 748          */
 749         ASSERT(id >= 0);
 750         ASSERT(IPC_INDEX(id) < service->ipcs_tabsz);
 751         index = ipc_lock_internal(service, id);
 752 
 753         return (&service->ipcs_table[index].ipct_lock);
 754 }
 755 
 756 /*
 757  * Checks to see if the held lock provided is the current lock for the
 758  * specified id.  If so, we return it instead of dropping it and
 759  * returning the result of ipc_lock.  This is intended to speed up cv
 760  * wakeups where we are left holding a lock which could be stale, but
 761  * probably isn't.
 762  */
 763 kmutex_t *
 764 ipc_relock(ipc_service_t *service, int id, kmutex_t *lock)
 765 {
 766         ASSERT(id >= 0);
 767         ASSERT(IPC_INDEX(id) < service->ipcs_tabsz);
 768         ASSERT(MUTEX_HELD(lock));
 769 
 770         if (&service->ipcs_table[IPC_INDEX(id)].ipct_lock == lock)
 771                 return (lock);
 772 
 773         mutex_exit(lock);
 774         return (ipc_lock(service, id));
 775 }
 776 
 777 /*
 778  * Performs an ID lookup.  If the ID doesn't exist or has been removed,
 779  * or isn't visible to the caller (because of zones), NULL is returned.
 780  * Otherwise, a pointer to the ID's perm structure and held ID lock are
 781  * returned.
 782  */
 783 kmutex_t *
 784 ipc_lookup(ipc_service_t *service, int id, kipc_perm_t **perm)
 785 {
 786         kipc_perm_t *result;
 787         uint_t index;
 788 
 789         /*
 790          * There is no need to check to see if id is in-range (i.e.
 791          * positive and fits into the table).  If it is out-of-range,
 792          * the id simply won't match the object's.
 793          */
 794 
 795         index = ipc_lock_internal(service, id);
 796         result = service->ipcs_table[index].ipct_data;
 797         if (result == NULL || result->ipc_id != (uint_t)id ||
 798             !HASZONEACCESS(curproc, result->ipc_zoneid)) {
 799                 mutex_exit(&service->ipcs_table[index].ipct_lock);
 800                 return (NULL);
 801         }
 802 
 803         ASSERT(IPC_SEQ(id) == service->ipcs_table[index].ipct_seq);
 804 
 805         *perm = result;
 806         if (AU_AUDITING())
 807                 audit_ipc(service->ipcs_atype, id, result);
 808 
 809         return (&service->ipcs_table[index].ipct_lock);
 810 }
 811 
 812 /*
 813  * Increase the reference count on an ID.
 814  */
 815 /*ARGSUSED*/
 816 void
 817 ipc_hold(ipc_service_t *s, kipc_perm_t *perm)
 818 {
 819         ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz);
 820         ASSERT(IPC_LOCKED(s, perm));
 821         perm->ipc_ref++;
 822 }
 823 
 824 /*
 825  * Decrease the reference count on an ID and drops the ID's lock.
 826  * Destroys the ID if the new reference count is zero.
 827  */
 828 void
 829 ipc_rele(ipc_service_t *s, kipc_perm_t *perm)
 830 {
 831         int nref;
 832 
 833         ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz);
 834         ASSERT(IPC_LOCKED(s, perm));
 835         ASSERT(perm->ipc_ref > 0);
 836 
 837         nref = --perm->ipc_ref;
 838         mutex_exit(&s->ipcs_table[IPC_INDEX(perm->ipc_id)].ipct_lock);
 839 
 840         if (nref == 0) {
 841                 ASSERT(IPC_FREE(perm));         /* ipc_rmid clears IPC_ALLOC */
 842                 s->ipcs_dtor(perm);
 843                 project_rele(perm->ipc_proj);
 844                 zone_rele_ref(&perm->ipc_zone_ref, ZONE_REF_IPC);
 845                 kmem_free(perm, s->ipcs_ssize);
 846         }
 847 }
 848 
 849 /*
 850  * Decrease the reference count on an ID, but don't drop the ID lock.
 851  * Used in cases where one thread needs to remove many references (on
 852  * behalf of other parties).
 853  */
 854 void
 855 ipc_rele_locked(ipc_service_t *s, kipc_perm_t *perm)
 856 {
 857         ASSERT(perm->ipc_ref > 1);
 858         ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz);
 859         ASSERT(IPC_LOCKED(s, perm));
 860 
 861         perm->ipc_ref--;
 862 }
 863 
 864 
 865 /*
 866  * Internal function to grow the service ID table.
 867  */
 868 static int
 869 ipc_grow(ipc_service_t *service)
 870 {
 871         ipc_slot_t *new, *old;
 872         int i, oldsize, newsize;
 873 
 874         ASSERT(MUTEX_HELD(&service->ipcs_lock));
 875         ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
 876 
 877         if (service->ipcs_tabsz == IPC_IDS_MAX)
 878                 return (ENOSPC);
 879 
 880         oldsize = service->ipcs_tabsz;
 881         newsize = oldsize << 1;
 882         new = kmem_zalloc(newsize * sizeof (ipc_slot_t), KM_NOSLEEP);
 883         if (new == NULL)
 884                 return (ENOSPC);
 885 
 886         old = service->ipcs_table;
 887         for (i = 0; i < oldsize; i++) {
 888                 mutex_enter(&old[i].ipct_lock);
 889                 mutex_enter(&new[i].ipct_lock);
 890 
 891                 new[i].ipct_seq = old[i].ipct_seq;
 892                 new[i].ipct_data = old[i].ipct_data;
 893                 old[i].ipct_data = NULL;
 894         }
 895 
 896         new[0].ipct_chain = old;
 897         service->ipcs_table = new;
 898         membar_producer();
 899         service->ipcs_tabsz = newsize;
 900 
 901         for (i = 0; i < oldsize; i++) {
 902                 mutex_exit(&old[i].ipct_lock);
 903                 mutex_exit(&new[i].ipct_lock);
 904         }
 905 
 906         id_space_extend(service->ipcs_ids, oldsize, service->ipcs_tabsz);
 907 
 908         return (0);
 909 }
 910 
 911 
 912 static int
 913 ipc_keylookup(ipc_service_t *service, key_t key, int flag, kipc_perm_t **permp)
 914 {
 915         kipc_perm_t *perm = NULL;
 916         avl_index_t where;
 917         kipc_perm_t template;
 918 
 919         ASSERT(MUTEX_HELD(&service->ipcs_lock));
 920 
 921         template.ipc_key = key;
 922         template.ipc_zoneid = getzoneid();
 923         if (perm = avl_find(&service->ipcs_keys, &template, &where)) {
 924                 ASSERT(!IPC_FREE(perm));
 925                 if ((flag & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL))
 926                         return (EEXIST);
 927                 if ((flag & 0777) & ~perm->ipc_mode) {
 928                         if (AU_AUDITING())
 929                                 audit_ipcget(NULL, (void *)perm);
 930                         return (EACCES);
 931                 }
 932                 *permp = perm;
 933                 return (0);
 934         } else if (flag & IPC_CREAT) {
 935                 *permp = NULL;
 936                 return (0);
 937         }
 938         return (ENOENT);
 939 }
 940 
 941 static int
 942 ipc_alloc_test(ipc_service_t *service, proc_t *pp)
 943 {
 944         ASSERT(MUTEX_HELD(&service->ipcs_lock));
 945 
 946         /*
 947          * Resizing the table first would result in a cleaner code
 948          * path, but would also allow a user to (permanently) double
 949          * the id table size in cases where the allocation would be
 950          * denied.  Hence we test the rctl first.
 951          */
 952 retry:
 953         mutex_enter(&pp->p_lock);
 954         if ((rctl_test(service->ipcs_proj_rctl, pp->p_task->tk_proj->kpj_rctls,
 955             pp, 1, RCA_SAFE) & RCT_DENY) ||
 956             (rctl_test(service->ipcs_zone_rctl, pp->p_zone->zone_rctls,
 957             pp, 1, RCA_SAFE) & RCT_DENY)) {
 958                 mutex_exit(&pp->p_lock);
 959                 return (ENOSPC);
 960         }
 961 
 962         if (service->ipcs_count == service->ipcs_tabsz) {
 963                 int error;
 964 
 965                 mutex_exit(&pp->p_lock);
 966                 if (error = ipc_grow(service))
 967                         return (error);
 968                 goto retry;
 969         }
 970 
 971         return (0);
 972 }
 973 
 974 /*
 975  * Given a key, search for or create the associated identifier.
 976  *
 977  * If IPC_CREAT is specified and the key isn't found, or if the key is
 978  * equal to IPC_PRIVATE, we return 0 and place a pointer to a newly
 979  * allocated object structure in permp.  A pointer to the held service
 980  * lock is placed in lockp.  ipc_mode's IPC_ALLOC bit is clear.
 981  *
 982  * If the key is found and no error conditions arise, we return 0 and
 983  * place a pointer to the existing object structure in permp.  A
 984  * pointer to the held ID lock is placed in lockp.  ipc_mode's
 985  * IPC_ALLOC bit is set.
 986  *
 987  * Otherwise, a non-zero errno value is returned.
 988  */
 989 int
 990 ipc_get(ipc_service_t *service, key_t key, int flag, kipc_perm_t **permp,
 991     kmutex_t **lockp)
 992 {
 993         kipc_perm_t     *perm = NULL;
 994         proc_t          *pp = curproc;
 995         int             error, index;
 996         cred_t          *cr = CRED();
 997 
 998         if (key != IPC_PRIVATE) {
 999 
1000                 mutex_enter(&service->ipcs_lock);
1001                 error = ipc_keylookup(service, key, flag, &perm);
1002                 if (perm != NULL)
1003                         index = ipc_lock_internal(service, perm->ipc_id);
1004                 mutex_exit(&service->ipcs_lock);
1005 
1006                 if (error) {
1007                         ASSERT(perm == NULL);
1008                         return (error);
1009                 }
1010 
1011                 if (perm) {
1012                         ASSERT(!IPC_FREE(perm));
1013                         *permp = perm;
1014                         *lockp = &service->ipcs_table[index].ipct_lock;
1015                         return (0);
1016                 }
1017 
1018                 /* Key not found; fall through */
1019         }
1020 
1021         perm = kmem_zalloc(service->ipcs_ssize, KM_SLEEP);
1022 
1023         mutex_enter(&service->ipcs_lock);
1024         if (error = ipc_alloc_test(service, pp)) {
1025                 mutex_exit(&service->ipcs_lock);
1026                 kmem_free(perm, service->ipcs_ssize);
1027                 return (error);
1028         }
1029 
1030         perm->ipc_cuid = perm->ipc_uid = crgetuid(cr);
1031         perm->ipc_cgid = perm->ipc_gid = crgetgid(cr);
1032         perm->ipc_zoneid = getzoneid();
1033         perm->ipc_mode = flag & 0777;
1034         perm->ipc_key = key;
1035         perm->ipc_ref = 1;
1036         perm->ipc_id = IPC_ID_INVAL;
1037         *permp = perm;
1038         *lockp = &service->ipcs_lock;
1039 
1040         return (0);
1041 }
1042 
1043 /*
1044  * Attempts to add the a newly created ID to the global namespace.  If
1045  * creating it would cause an error, we return the error.  If there is
1046  * the possibility that we could obtain the existing ID and return it
1047  * to the user, we return EAGAIN.  Otherwise, we return 0 with p_lock
1048  * and the service lock held.
1049  *
1050  * Since this should be only called after all initialization has been
1051  * completed, on failure we automatically invoke the destructor for the
1052  * object and deallocate the memory associated with it.
1053  */
1054 int
1055 ipc_commit_begin(ipc_service_t *service, key_t key, int flag,
1056     kipc_perm_t *newperm)
1057 {
1058         kipc_perm_t *perm;
1059         int error;
1060         proc_t *pp = curproc;
1061 
1062         ASSERT(newperm->ipc_ref == 1);
1063         ASSERT(IPC_FREE(newperm));
1064 
1065         /*
1066          * Set ipc_proj and ipc_zone_ref so that future calls to ipc_cleanup()
1067          * clean up the necessary state.  This must be done before the
1068          * potential call to ipcs_dtor() below.
1069          */
1070         newperm->ipc_proj = pp->p_task->tk_proj;
1071         zone_init_ref(&newperm->ipc_zone_ref);
1072         zone_hold_ref(pp->p_zone, &newperm->ipc_zone_ref, ZONE_REF_IPC);
1073 
1074         mutex_enter(&service->ipcs_lock);
1075         /*
1076          * Ensure that no-one has raced with us and created the key.
1077          */
1078         if ((key != IPC_PRIVATE) &&
1079             (((error = ipc_keylookup(service, key, flag, &perm)) != 0) ||
1080             (perm != NULL))) {
1081                 error = error ? error : EAGAIN;
1082                 goto errout;
1083         }
1084 
1085         /*
1086          * Ensure that no-one has raced with us and used the last of
1087          * the permissible ids, or the last of the free spaces in the
1088          * id table.
1089          */
1090         if (error = ipc_alloc_test(service, pp))
1091                 goto errout;
1092 
1093         ASSERT(MUTEX_HELD(&service->ipcs_lock));
1094         ASSERT(MUTEX_HELD(&pp->p_lock));
1095 
1096         return (0);
1097 errout:
1098         mutex_exit(&service->ipcs_lock);
1099         service->ipcs_dtor(newperm);
1100         zone_rele_ref(&newperm->ipc_zone_ref, ZONE_REF_IPC);
1101         kmem_free(newperm, service->ipcs_ssize);
1102         return (error);
1103 }
1104 
1105 /*
1106  * Commit the ID allocation transaction.  Called with p_lock and the
1107  * service lock held, both of which are dropped.  Returns the held ID
1108  * lock so the caller can extract the ID and perform ipcget auditing.
1109  */
1110 kmutex_t *
1111 ipc_commit_end(ipc_service_t *service, kipc_perm_t *perm)
1112 {
1113         ipc_slot_t *slot;
1114         avl_index_t where;
1115         int index;
1116         void *loc;
1117 
1118         ASSERT(MUTEX_HELD(&service->ipcs_lock));
1119         ASSERT(MUTEX_HELD(&curproc->p_lock));
1120 
1121         (void) project_hold(perm->ipc_proj);
1122         mutex_exit(&curproc->p_lock);
1123 
1124         /*
1125          * Pick out our slot.
1126          */
1127         service->ipcs_count++;
1128         index = id_alloc(service->ipcs_ids);
1129         ASSERT(index < service->ipcs_tabsz);
1130         slot = &service->ipcs_table[index];
1131         mutex_enter(&slot->ipct_lock);
1132         ASSERT(slot->ipct_data == NULL);
1133 
1134         /*
1135          * Update the perm structure.
1136          */
1137         perm->ipc_mode |= IPC_ALLOC;
1138         perm->ipc_id = (slot->ipct_seq << IPC_SEQ_SHIFT) | index;
1139 
1140         /*
1141          * Push into global visibility.
1142          */
1143         slot->ipct_data = perm;
1144         if (perm->ipc_key != IPC_PRIVATE) {
1145                 loc = avl_find(&service->ipcs_keys, perm, &where);
1146                 ASSERT(loc == NULL);
1147                 avl_insert(&service->ipcs_keys, perm, where);
1148         }
1149         list_insert_head(&service->ipcs_usedids, perm);
1150 
1151         /*
1152          * Update resource consumption.
1153          */
1154         IPC_PROJ_USAGE(perm, service) += 1;
1155         IPC_ZONE_USAGE(perm, service) += 1;
1156 
1157         mutex_exit(&service->ipcs_lock);
1158         return (&slot->ipct_lock);
1159 }
1160 
1161 /*
1162  * Clean up function, in case the allocation fails.  If called between
1163  * ipc_lookup and ipc_commit_begin, perm->ipc_proj will be 0 and we
1164  * merely free the perm structure.  If called after ipc_commit_begin,
1165  * we also drop locks and call the ID's destructor.
1166  */
1167 void
1168 ipc_cleanup(ipc_service_t *service, kipc_perm_t *perm)
1169 {
1170         ASSERT(IPC_FREE(perm));
1171         if (perm->ipc_proj) {
1172                 mutex_exit(&curproc->p_lock);
1173                 mutex_exit(&service->ipcs_lock);
1174                 service->ipcs_dtor(perm);
1175         }
1176         if (perm->ipc_zone_ref.zref_zone != NULL)
1177                 zone_rele_ref(&perm->ipc_zone_ref, ZONE_REF_IPC);
1178         kmem_free(perm, service->ipcs_ssize);
1179 }
1180 
1181 
1182 /*
1183  * Common code to remove an IPC object.  This should be called after
1184  * all permissions checks have been performed, and with the service
1185  * and ID locked.  Note that this does not remove the object from
1186  * the ipcs_usedids list (this needs to be done by the caller before
1187  * dropping the service lock).
1188  */
1189 static void
1190 ipc_remove(ipc_service_t *service, kipc_perm_t *perm)
1191 {
1192         int id = perm->ipc_id;
1193         int index;
1194 
1195         ASSERT(MUTEX_HELD(&service->ipcs_lock));
1196         ASSERT(IPC_LOCKED(service, perm));
1197 
1198         index = IPC_INDEX(id);
1199 
1200         service->ipcs_table[index].ipct_data = NULL;
1201 
1202         if (perm->ipc_key != IPC_PRIVATE)
1203                 avl_remove(&service->ipcs_keys, perm);
1204         list_remove(&service->ipcs_usedids, perm);
1205         perm->ipc_mode &= ~IPC_ALLOC;
1206 
1207         id_free(service->ipcs_ids, index);
1208 
1209         if (service->ipcs_table[index].ipct_seq++ == IPC_SEQ_MASK)
1210                 service->ipcs_table[index].ipct_seq = 0;
1211         service->ipcs_count--;
1212         ASSERT(IPC_PROJ_USAGE(perm, service) > 0);
1213         ASSERT(IPC_ZONE_USAGE(perm, service) > 0);
1214         IPC_PROJ_USAGE(perm, service) -= 1;
1215         IPC_ZONE_USAGE(perm, service) -= 1;
1216         ASSERT(service->ipcs_count || ((IPC_PROJ_USAGE(perm, service) == 0) &&
1217             (IPC_ZONE_USAGE(perm, service) == 0)));
1218 }
1219 
1220 
1221 /*
1222  * Common code to perform an IPC_RMID.  Returns an errno value on
1223  * failure, 0 on success.
1224  */
1225 int
1226 ipc_rmid(ipc_service_t *service, int id, cred_t *cr)
1227 {
1228         kipc_perm_t *perm;
1229         kmutex_t *lock;
1230 
1231         mutex_enter(&service->ipcs_lock);
1232 
1233         lock = ipc_lookup(service, id, &perm);
1234         if (lock == NULL) {
1235                 mutex_exit(&service->ipcs_lock);
1236                 return (EINVAL);
1237         }
1238 
1239         ASSERT(service->ipcs_count > 0);
1240 
1241         if (secpolicy_ipc_owner(cr, perm) != 0) {
1242                 mutex_exit(lock);
1243                 mutex_exit(&service->ipcs_lock);
1244                 return (EPERM);
1245         }
1246 
1247         /*
1248          * Nothing can fail from this point on.
1249          */
1250         ipc_remove(service, perm);
1251         mutex_exit(&service->ipcs_lock);
1252 
1253         /* perform any per-service removal actions */
1254         service->ipcs_rmid(perm);
1255 
1256         ipc_rele(service, perm);
1257 
1258         return (0);
1259 }
1260 
1261 /*
1262  * Implementation for shmids, semids, and msgids.  buf is the address
1263  * of the user buffer, nids is the size, and pnids is a pointer to
1264  * where we write the actual number of ids that [would] have been
1265  * copied out.
1266  */
1267 int
1268 ipc_ids(ipc_service_t *service, int *buf, uint_t nids, uint_t *pnids)
1269 {
1270         kipc_perm_t *perm;
1271         size_t  idsize = 0;
1272         int     error = 0;
1273         int     idcount;
1274         int     *ids;
1275         int     numids = 0;
1276         zoneid_t zoneid = getzoneid();
1277         int     global = INGLOBALZONE(curproc);
1278 
1279         if (buf == NULL)
1280                 nids = 0;
1281 
1282         /*
1283          * Get an accurate count of the total number of ids, and allocate a
1284          * staging buffer.  Since ipcs_count is always sane, we don't have
1285          * to take ipcs_lock for our first guess.  If there are no ids, or
1286          * we're in the global zone and the number of ids is greater than
1287          * the size of the specified buffer, we shunt to the end.  Otherwise,
1288          * we go through the id list looking for (and counting) what is
1289          * visible in the specified zone.
1290          */
1291         idcount = service->ipcs_count;
1292         for (;;) {
1293                 if ((global && idcount > nids) || idcount == 0) {
1294                         numids = idcount;
1295                         nids = 0;
1296                         goto out;
1297                 }
1298 
1299                 idsize = idcount * sizeof (int);
1300                 ids = kmem_alloc(idsize, KM_SLEEP);
1301 
1302                 mutex_enter(&service->ipcs_lock);
1303                 if (idcount >= service->ipcs_count)
1304                         break;
1305                 idcount = service->ipcs_count;
1306                 mutex_exit(&service->ipcs_lock);
1307 
1308                 if (idsize != 0) {
1309                         kmem_free(ids, idsize);
1310                         idsize = 0;
1311                 }
1312         }
1313 
1314         for (perm = list_head(&service->ipcs_usedids); perm != NULL;
1315             perm = list_next(&service->ipcs_usedids, perm)) {
1316                 ASSERT(!IPC_FREE(perm));
1317                 if (global || perm->ipc_zoneid == zoneid)
1318                         ids[numids++] = perm->ipc_id;
1319         }
1320         mutex_exit(&service->ipcs_lock);
1321 
1322         /*
1323          * If there isn't enough space to hold all of the ids, just
1324          * return the number of ids without copying out any of them.
1325          */
1326         if (nids < numids)
1327                 nids = 0;
1328 
1329 out:
1330         if (suword32(pnids, (uint32_t)numids) ||
1331             (nids != 0 && copyout(ids, buf, numids * sizeof (int))))
1332                 error = EFAULT;
1333         if (idsize != 0)
1334                 kmem_free(ids, idsize);
1335         return (error);
1336 }
1337 
1338 /*
1339  * Destroy IPC objects from the given service that are associated with
1340  * the given zone.
1341  *
1342  * We can't hold on to the service lock when freeing objects, so we
1343  * first search the service and move all the objects to a private
1344  * list, then walk through and free them after dropping the lock.
1345  */
1346 void
1347 ipc_remove_zone(ipc_service_t *service, zoneid_t zoneid)
1348 {
1349         kipc_perm_t *perm, *next;
1350         list_t rmlist;
1351         kmutex_t *lock;
1352 
1353         list_create(&rmlist, sizeof (kipc_perm_t),
1354             offsetof(kipc_perm_t, ipc_list));
1355 
1356         mutex_enter(&service->ipcs_lock);
1357         for (perm = list_head(&service->ipcs_usedids); perm != NULL;
1358             perm = next) {
1359                 next = list_next(&service->ipcs_usedids, perm);
1360                 if (perm->ipc_zoneid != zoneid)
1361                         continue;
1362 
1363                 /*
1364                  * Remove the object from the service, then put it on
1365                  * the removal list so we can defer the call to
1366                  * ipc_rele (which will actually free the structure).
1367                  * We need to do this since the destructor may grab
1368                  * the service lock.
1369                  */
1370                 ASSERT(!IPC_FREE(perm));
1371                 lock = ipc_lock(service, perm->ipc_id);
1372                 ipc_remove(service, perm);
1373                 mutex_exit(lock);
1374                 list_insert_tail(&rmlist, perm);
1375         }
1376         mutex_exit(&service->ipcs_lock);
1377 
1378         /*
1379          * Now that we've dropped the service lock, loop through the
1380          * private list freeing removed objects.
1381          */
1382         for (perm = list_head(&rmlist); perm != NULL; perm = next) {
1383                 next = list_next(&rmlist, perm);
1384                 list_remove(&rmlist, perm);
1385 
1386                 (void) ipc_lock(service, perm->ipc_id);
1387 
1388                 /* perform any per-service removal actions */
1389                 service->ipcs_rmid(perm);
1390 
1391                 /* release reference */
1392                 ipc_rele(service, perm);
1393         }
1394 
1395         list_destroy(&rmlist);
1396 }