1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 /*
  23  * Copyright (c) 2000-2001 by Sun Microsystems, Inc.
  24  * All rights reserved.
  25  */
  26 
  27 #pragma ident   "%Z%%M% %I%     %E% SMI"
  28 
  29 #include <sys/types.h>
  30 #include <synch.h>
  31 #include <assert.h>
  32 #include <stdlib.h>
  33 #include <string.h>
  34 #include <stdio.h>
  35 #include <fcntl.h>
  36 #include <errno.h>
  37 #include <dhcpmsg.h>
  38 #include <unistd.h>
  39 #include <dhcp_svc_private.h>
  40 
  41 #include "container.h"
  42 
  43 /*
  44  * Container locking code -- warning: serious pain ahead.
  45  *
  46  * This code synchronizes access to a given container across multiple
  47  * threads in this (dsvclockd) process, and optionally synchronizes across
  48  * multiple instances of dsvclockd running on different hosts.  The
  49  * synchronization allows multiple readers or a single writer at one time.
  50  *
  51  * Since by definition there is at most one dsvclockd running per host and
  52  * all requests by all threads in all processes running on that host funnel
  53  * into it, this code effectively synchronizes access to a given container
  54  * across all threads in all processes running on a given host.  This means
  55  * that the optional synchronization across multiple instances of dsvclockd
  56  * on different hosts provides true cross-host synchronization for all
  57  * threads in all processes on all cooperating machines (though all hosts
  58  * must have write access to a common directory).
  59  *
  60  * The container synchronization here should be viewed as a two step
  61  * process, where the first step is optional:
  62  *
  63  *      1. Synchronize access across the set of cooperating dsvclockd's
  64  *         on multiple hosts.  This is known as acquiring the host lock.
  65  *
  66  *      2. Synchronize access across the set of threads running inside
  67  *         this dsvclockd process.  This is known as acquiring the
  68  *         intra-process lock.
  69  *
  70  * In order to implement the first (host lock) step, we use fcntl()-based
  71  * file locking on a file inside an NFS-shared directory and rely on NFS to
  72  * do our synchronization for us.  Note that this can only be used to
  73  * implement the first step since fcntl()-based locks are process locks,
  74  * and the effects of using these locks with multiple threads are not
  75  * defined.  Furthermore, note that this means it requires some fancy
  76  * footwork to ensure that only one thread in a given dsvclockd process
  77  * tries to acquire the fcntl() lock for that process.
  78  *
  79  * In order to implement the second step, we use custom-made reader-writer
  80  * locks since the stock Solaris ones don't quite have the semantics we
  81  * need -- in particular, we need to relax the requirement that the thread
  82  * which acquired the lock is the one releasing it.
  83  *
  84  * Lock ordering guidelines:
  85  *
  86  * For the most part, this code does not acquire more than one container
  87  * lock at a time -- whenever feasible, please do the same.  If you must
  88  * acquire more than one lock at a time, the correct order is:
  89  *
  90  *      1. cn_nholds_lock
  91  *      2. cn_lock
  92  *      3. cn_hlock_lock
  93  */
  94 
  95 static int host_lock(dsvcd_container_t *, int, boolean_t);
  96 static int host_unlock(dsvcd_container_t *);
  97 static unsigned int cn_nlocks(dsvcd_container_t *);
  98 
  99 /*
 100  * Create a container identified by `cn_id'; returns an instance of the new
 101  * container upon success, or NULL on failure.  Note that `cn_id' is
 102  * treated as a pathname and thus must be a unique name for the container
 103  * across all containers, container versions, and datastores -- additionally,
 104  * if `crosshost' is set, then the directory named by `cn_id' must be a
 105  * directory mounted on all cooperating hosts.
 106  */
 107 dsvcd_container_t *
 108 cn_create(const char *cn_id, boolean_t crosshost)
 109 {
 110         dsvcd_container_t *cn;
 111 
 112         dhcpmsg(MSG_VERBOSE, "creating %scontainer synchpoint `%s'", crosshost ?
 113             "crosshost " : "", cn_id);
 114 
 115         cn = calloc(1, sizeof (dsvcd_container_t));
 116         if (cn == NULL)
 117                 return (NULL);
 118 
 119         cn->cn_id = strdup(cn_id);
 120         if (cn->cn_id == NULL) {
 121                 free(cn);
 122                 return (NULL);
 123         }
 124 
 125         (void) mutex_init(&cn->cn_lock, USYNC_THREAD, NULL);
 126         (void) mutex_init(&cn->cn_hlock_lock, USYNC_THREAD, NULL);
 127         (void) mutex_init(&cn->cn_nholds_lock, USYNC_THREAD, NULL);
 128 
 129         (void) cond_init(&cn->cn_hlockcv, USYNC_THREAD, NULL);
 130 
 131         cn->cn_whead   = NULL;
 132         cn->cn_wtail   = NULL;
 133         cn->cn_nholds          = 0;
 134         cn->cn_closing         = B_FALSE;
 135         cn->cn_crosshost  = crosshost;
 136         cn->cn_hlockstate = CN_HUNLOCKED;
 137         cn->cn_hlockcount = 0;
 138 
 139         return (cn);
 140 }
 141 
 142 /*
 143  * Destroy container `cn'; wait a decent amount of time for activity on the
 144  * container to quiesce first.  If the caller has not prohibited other
 145  * threads from calling into the container yet, this may take a long time.
 146  */
 147 void
 148 cn_destroy(dsvcd_container_t *cn)
 149 {
 150         unsigned int    attempts;
 151         unsigned int    nstalelocks;
 152 
 153         dhcpmsg(MSG_VERBOSE, "destroying container synchpoint `%s'", cn->cn_id);
 154 
 155         (void) mutex_lock(&cn->cn_lock);
 156         cn->cn_closing = B_TRUE;
 157         (void) mutex_unlock(&cn->cn_lock);
 158 
 159         /*
 160          * Wait for up to CN_DESTROY_WAIT seconds for all the lock holders
 161          * to relinquish their locks.  If the container has locks that seem
 162          * to be stale, then warn the user before destroying it.  The locks
 163          * will be unlocked automatically when we exit.
 164          */
 165         for (attempts = 0; attempts < CN_DESTROY_WAIT; attempts++) {
 166                 nstalelocks = cn_nlocks(cn);
 167                 if (nstalelocks == 0)
 168                         break;
 169 
 170                 (void) sleep(1);
 171         }
 172 
 173         if (nstalelocks == 1) {
 174                 dhcpmsg(MSG_WARNING, "unlocking stale lock on "
 175                     "container `%s'", cn->cn_id);
 176         } else if (nstalelocks != 0) {
 177                 dhcpmsg(MSG_WARNING, "unlocking %d stale locks on "
 178                     "container `%s'", nstalelocks, cn->cn_id);
 179         }
 180 
 181         (void) cond_destroy(&cn->cn_hlockcv);
 182         (void) mutex_destroy(&cn->cn_nholds_lock);
 183         (void) mutex_destroy(&cn->cn_hlock_lock);
 184         (void) mutex_destroy(&cn->cn_lock);
 185 
 186         free(cn->cn_id);
 187         free(cn);
 188 }
 189 
 190 /*
 191  * Wait (block) until a lock of type `locktype' is obtained on container
 192  * `cn'.  Returns a DSVC_* return code; if DSVC_SUCCESS is returned, then
 193  * the lock is held upon return.  Must be called with the container's
 194  * cn_nholds_lock held on entry; returns with it unlocked.
 195  */
 196 static int
 197 cn_wait_for_lock(dsvcd_container_t *cn, dsvcd_locktype_t locktype)
 198 {
 199         dsvcd_waitlist_t        waititem;
 200         int                     retval = DSVC_SUCCESS;
 201 
 202         assert(MUTEX_HELD(&cn->cn_nholds_lock));
 203         assert(cn->cn_nholds != 0);
 204 
 205         waititem.wl_next = NULL;
 206         waititem.wl_prev = NULL;
 207         waititem.wl_locktype = locktype;
 208         (void) cond_init(&waititem.wl_cv, USYNC_THREAD, NULL);
 209 
 210         /*
 211          * Chain our stack-local waititem onto the list; this keeps us from
 212          * having to worry about allocation failures and also makes it easy
 213          * for cn_unlock() to just pull us off the list without worrying
 214          * about freeing the memory.
 215          *
 216          * Note that we can do this because by definition we are blocked in
 217          * this function until we are signalled.
 218          */
 219         if (cn->cn_whead != NULL) {
 220                 waititem.wl_prev = cn->cn_wtail;
 221                 cn->cn_wtail->wl_next = &waititem;
 222                 cn->cn_wtail = &waititem;
 223         } else {
 224                 cn->cn_whead = &waititem;
 225                 cn->cn_wtail = &waititem;
 226         }
 227 
 228         do {
 229                 if (cond_wait(&waititem.wl_cv, &cn->cn_nholds_lock) != 0) {
 230                         dhcpmsg(MSG_DEBUG, "cn_wait_for_lock: cond_wait error");
 231                         retval = DSVC_INTERNAL;
 232                         break;
 233                 }
 234         } while ((locktype == DSVCD_RDLOCK && cn->cn_nholds == -1) ||
 235             (locktype == DSVCD_WRLOCK && cn->cn_nholds != 0));
 236 
 237         (void) cond_destroy(&waititem.wl_cv);
 238 
 239         assert(MUTEX_HELD(&cn->cn_nholds_lock));
 240 
 241         /*
 242          * We got woken up; pull ourselves off of the local waitlist.
 243          */
 244         if (waititem.wl_prev != NULL)
 245                 waititem.wl_prev->wl_next = waititem.wl_next;
 246         else
 247                 cn->cn_whead = waititem.wl_next;
 248 
 249         if (waititem.wl_next != NULL)
 250                 waititem.wl_next->wl_prev = waititem.wl_prev;
 251         else
 252                 cn->cn_wtail = waititem.wl_prev;
 253 
 254         if (retval == DSVC_SUCCESS) {
 255                 if (locktype == DSVCD_WRLOCK)
 256                         cn->cn_nholds = -1;
 257                 else
 258                         cn->cn_nholds++;
 259         }
 260 
 261         /*
 262          * If we just acquired a read lock and the next waiter is waiting
 263          * for a readlock too, signal the waiter.  Note that we wake each
 264          * reader up one-by-one like this to avoid excessive contention on
 265          * cn_nholds_lock.
 266          */
 267         if (locktype == DSVCD_RDLOCK && cn->cn_whead != NULL &&
 268             cn->cn_whead->wl_locktype == DSVCD_RDLOCK)
 269                 (void) cond_signal(&cn->cn_whead->wl_cv);
 270 
 271         (void) mutex_unlock(&cn->cn_nholds_lock);
 272         return (retval);
 273 }
 274 
 275 /*
 276  * Lock container `cn' for reader (shared) access.  If the container cannot
 277  * be locked immediately (there is currently a writer lock held or a writer
 278  * lock waiting for the lock), then if `nonblock' is B_TRUE, DSVC_BUSY is
 279  * returned.  Otherwise, block until the lock can be obtained.  Returns a
 280  * DSVC_* code.
 281  */
 282 int
 283 cn_rdlock(dsvcd_container_t *cn, boolean_t nonblock)
 284 {
 285         int     retval;
 286 
 287         /*
 288          * The container is going away; no new lock requests.
 289          */
 290         (void) mutex_lock(&cn->cn_lock);
 291         if (cn->cn_closing) {
 292                 (void) mutex_unlock(&cn->cn_lock);
 293                 return (DSVC_SYNCH_ERR);
 294         }
 295         (void) mutex_unlock(&cn->cn_lock);
 296 
 297         /*
 298          * See if we can grab the lock without having to block; only
 299          * possible if we can acquire the host lock without blocking, if
 300          * the lock is not currently owned by a writer and if there are no
 301          * writers currently enqueued for accessing this lock (we know that
 302          * if there's a waiter it must be a writer since this code doesn't
 303          * enqueue readers until there's a writer enqueued).  We enqueue
 304          * these requests to improve fairness.
 305          */
 306         (void) mutex_lock(&cn->cn_nholds_lock);
 307 
 308         if (cn->cn_nholds != -1 && cn->cn_whead == NULL &&
 309             host_lock(cn, F_RDLCK, B_TRUE) == DSVC_SUCCESS) {
 310                 cn->cn_nholds++;
 311                 (void) mutex_unlock(&cn->cn_nholds_lock);
 312                 return (DSVC_SUCCESS);
 313         }
 314 
 315         (void) mutex_unlock(&cn->cn_nholds_lock);
 316 
 317         /*
 318          * Cannot grab the lock without blocking somewhere; wait until we
 319          * can grab the host lock, then with that lock held obtain our
 320          * intra-process lock.
 321          */
 322         if (nonblock)
 323                 return (DSVC_BUSY);
 324         retval = host_lock(cn, F_RDLCK, B_FALSE);
 325         if (retval != DSVC_SUCCESS)
 326                 return (retval);
 327 
 328         /*
 329          * We've got the read lock; if there aren't any writers currently
 330          * contending for our intra-process lock then succeed immediately.
 331          * It's possible for there to be waiters but for nholds to be zero
 332          * via the following scenario:
 333          *
 334          *      1. The last holder of a lock unlocks, dropping nholds to
 335          *         zero and signaling the head waiter on the waitlist.
 336          *
 337          *      2. The last holder drops cn_nholds_lock.
 338          *
 339          *      3. We acquire cn_nholds_lock before the signaled waiter
 340          *         does.
 341          *
 342          * Note that this case won't cause a deadlock even if we didn't
 343          * check for it here (when the waiter finally gets cn_nholds_lock,
 344          * it'll find that the waitlist is once again non-NULL, and signal
 345          * the us).  However, as an optimization, handle the case here.
 346          */
 347         (void) mutex_lock(&cn->cn_nholds_lock);
 348         if (cn->cn_nholds != -1 &&
 349             (cn->cn_whead == NULL || cn->cn_nholds == 0)) {
 350                 cn->cn_nholds++;
 351                 (void) mutex_unlock(&cn->cn_nholds_lock);
 352                 return (DSVC_SUCCESS);
 353         }
 354 
 355         /* cn_wait_for_lock() will drop cn_nholds_lock */
 356         retval = cn_wait_for_lock(cn, DSVCD_RDLOCK);
 357         if (retval != DSVC_SUCCESS) {
 358                 (void) host_unlock(cn);
 359                 return (retval);
 360         }
 361         return (DSVC_SUCCESS);
 362 }
 363 
 364 /*
 365  * Lock container `cn' for writer (exclusive) access.  If the container
 366  * cannot be locked immediately (there are currently readers or a writer),
 367  * then if `nonblock' is B_TRUE, DSVC_BUSY is returned.  Otherwise, block
 368  * until the lock can be obtained.  Returns a DSVC_* code.
 369  */
 370 int
 371 cn_wrlock(dsvcd_container_t *cn, boolean_t nonblock)
 372 {
 373         int     retval;
 374 
 375         /*
 376          * The container is going away; no new lock requests.
 377          */
 378         (void) mutex_lock(&cn->cn_lock);
 379         if (cn->cn_closing) {
 380                 (void) mutex_unlock(&cn->cn_lock);
 381                 return (DSVC_SYNCH_ERR);
 382         }
 383         (void) mutex_unlock(&cn->cn_lock);
 384 
 385         /*
 386          * See if we can grab the lock without having to block; only
 387          * possible if there are no current writers within our process and
 388          * that we can immediately acquire the host lock.
 389          */
 390         (void) mutex_lock(&cn->cn_nholds_lock);
 391 
 392         if (cn->cn_nholds == 0 &&
 393             host_lock(cn, F_WRLCK, B_TRUE) == DSVC_SUCCESS) {
 394                 cn->cn_nholds = -1;
 395                 (void) mutex_unlock(&cn->cn_nholds_lock);
 396                 return (DSVC_SUCCESS);
 397         }
 398 
 399         (void) mutex_unlock(&cn->cn_nholds_lock);
 400 
 401         /*
 402          * Cannot grab the lock without blocking somewhere; wait until we
 403          * can grab the host lock, then with that lock held obtain our
 404          * intra-process lock.
 405          */
 406         if (nonblock)
 407                 return (DSVC_BUSY);
 408         retval = host_lock(cn, F_WRLCK, B_FALSE);
 409         if (retval != DSVC_SUCCESS)
 410                 return (retval);
 411 
 412         /*
 413          * We've got the host lock; if there aren't any writers currently
 414          * contending for our intra-process lock then succeed immediately.
 415          */
 416         (void) mutex_lock(&cn->cn_nholds_lock);
 417         if (cn->cn_nholds == 0) {
 418                 cn->cn_nholds = -1;
 419                 (void) mutex_unlock(&cn->cn_nholds_lock);
 420                 return (DSVC_SUCCESS);
 421         }
 422 
 423         /* cn_wait_for_lock() will drop cn_nholds_lock */
 424         retval = cn_wait_for_lock(cn, DSVCD_WRLOCK);
 425         if (retval != DSVC_SUCCESS) {
 426                 (void) host_unlock(cn);
 427                 return (retval);
 428         }
 429         return (DSVC_SUCCESS);
 430 }
 431 
 432 /*
 433  * Unlock reader or writer lock on container `cn'; returns a DSVC_* code
 434  */
 435 int
 436 cn_unlock(dsvcd_container_t *cn)
 437 {
 438         (void) mutex_lock(&cn->cn_nholds_lock);
 439 
 440         if (cn->cn_nholds == 0) {
 441                 (void) mutex_unlock(&cn->cn_nholds_lock);
 442                 return (DSVC_SYNCH_ERR);
 443         }
 444 
 445         if (cn->cn_nholds != -1 && cn->cn_nholds != 1) {
 446                 cn->cn_nholds--;
 447                 (void) host_unlock(cn);
 448                 (void) mutex_unlock(&cn->cn_nholds_lock);
 449                 return (DSVC_SUCCESS);
 450         }
 451 
 452         /*
 453          * The last reader or a writer just unlocked -- signal the first
 454          * waiter.  To avoid a thundering herd, we only signal the first
 455          * waiter, even if there are multiple readers ready to go --
 456          * instead, each reader is responsible for signaling the next
 457          * in cn_wait_for_lock().
 458          */
 459         cn->cn_nholds = 0;
 460         if (cn->cn_whead != NULL)
 461                 (void) cond_signal(&cn->cn_whead->wl_cv);
 462 
 463         (void) host_unlock(cn);
 464         (void) mutex_unlock(&cn->cn_nholds_lock);
 465 
 466         return (DSVC_SUCCESS);
 467 }
 468 
 469 /*
 470  * Find out what kind of lock is on `cn'.  Note that this is just a
 471  * snapshot in time and without additional locks the answer may be invalid
 472  * by the time the function returns.
 473  */
 474 dsvcd_locktype_t
 475 cn_locktype(dsvcd_container_t *cn)
 476 {
 477         int nholds;
 478 
 479         (void) mutex_lock(&cn->cn_nholds_lock);
 480         nholds = cn->cn_nholds;
 481         (void) mutex_unlock(&cn->cn_nholds_lock);
 482 
 483         if (nholds == 0)
 484                 return (DSVCD_NOLOCK);
 485         else if (nholds > 0)
 486                 return (DSVCD_RDLOCK);
 487         else
 488                 return (DSVCD_WRLOCK);
 489 }
 490 
 491 /*
 492  * Obtain a lock of type `locktype' on container `cn' such that we have
 493  * shared or exclusive access to this container across all hosts.  If
 494  * `nonblock' is true and the lock cannot be obtained return DSVC_BUSY.  If
 495  * the lock is already held, the number of instances of the lock "checked
 496  * out" by this host is incremented.
 497  */
 498 static int
 499 host_lock(dsvcd_container_t *cn, int locktype, boolean_t nonblock)
 500 {
 501         struct flock    flock;
 502         int             fd;
 503         char            *basename, lockpath[MAXPATHLEN];
 504         int             error;
 505 
 506         if (!cn->cn_crosshost)
 507                 return (DSVC_SUCCESS);
 508 
 509         /*
 510          * Before we wait for a while, see if the container is going away;
 511          * if so, fail now so the container can drain quicker..
 512          */
 513         (void) mutex_lock(&cn->cn_lock);
 514         if (cn->cn_closing) {
 515                 (void) mutex_unlock(&cn->cn_lock);
 516                 return (DSVC_SYNCH_ERR);
 517         }
 518         (void) mutex_unlock(&cn->cn_lock);
 519 
 520         /*
 521          * Note that we only wait if (1) there's already a thread trying to
 522          * grab the host lock on our host or if (2) this host currently
 523          * holds a host shared lock and we need an exclusive lock.  Note
 524          * that we do *not* wait in the following situations:
 525          *
 526          *      * This host holds an exclusive host lock and another
 527          *        exclusive host lock request comes in.  We rely on the
 528          *        intra-process lock to do the synchronization.
 529          *
 530          *      * This host holds an exclusive host lock and a shared host
 531          *        lock request comes in.  Since this host already has
 532          *        exclusive access, we already implicitly hold the shared
 533          *        host lock as far as this host is concerned, so just rely
 534          *        on the intra-process lock to do the synchronization.
 535          *
 536          * These semantics make sense as long as one remembers that the
 537          * host lock merely provides exclusive or shared access for a given
 538          * host or set of hosts -- that is, exclusive access is exclusive
 539          * access for that machine, not for the given request.
 540          */
 541         (void) mutex_lock(&cn->cn_hlock_lock);
 542 
 543         while (cn->cn_hlockstate == CN_HPENDING ||
 544             cn->cn_hlockstate == CN_HRDLOCKED && locktype == F_WRLCK) {
 545                 if (nonblock) {
 546                         (void) mutex_unlock(&cn->cn_hlock_lock);
 547                         return (DSVC_BUSY);
 548                 }
 549 
 550                 if (cond_wait(&cn->cn_hlockcv, &cn->cn_hlock_lock) != 0) {
 551                         (void) mutex_unlock(&cn->cn_hlock_lock);
 552                         return (DSVC_SYNCH_ERR);
 553                 }
 554         }
 555 
 556         if (cn->cn_hlockstate == CN_HRDLOCKED ||
 557             cn->cn_hlockstate == CN_HWRLOCKED) {
 558                 /*
 559                  * Already locked; just bump the held lock count.
 560                  */
 561                 assert(cn->cn_hlockcount > 0);
 562                 cn->cn_hlockcount++;
 563                 (void) mutex_unlock(&cn->cn_hlock_lock);
 564                 return (DSVC_SUCCESS);
 565         }
 566 
 567         /*
 568          * We're the thread that's going to try to acquire the host lock.
 569          */
 570 
 571         assert(cn->cn_hlockcount == 0);
 572 
 573         /*
 574          * Create the lock file as a hidden file in the directory named by
 575          * cn_id.  So if cn_id is /var/dhcp/SUNWfiles1_dhcptab, we want the
 576          * lock file to be /var/dhcp/.SUNWfiles1_dhcptab.lock.  Please, no
 577          * giggles about the snprintf().
 578          */
 579         basename = strrchr(cn->cn_id, '/');
 580         if (basename == NULL)
 581                 basename = cn->cn_id;
 582         else
 583                 basename++;
 584 
 585         (void) snprintf(lockpath, MAXPATHLEN, "%.*s.%s.lock",
 586             basename - cn->cn_id, cn->cn_id, basename);
 587         fd = open(lockpath, O_RDWR|O_CREAT, 0600);
 588         if (fd == -1) {
 589                 (void) mutex_unlock(&cn->cn_hlock_lock);
 590                 return (DSVC_SYNCH_ERR);
 591         }
 592 
 593         cn->cn_hlockstate = CN_HPENDING;
 594         (void) mutex_unlock(&cn->cn_hlock_lock);
 595 
 596         flock.l_len     = 0;
 597         flock.l_type    = locktype;
 598         flock.l_start   = 0;
 599         flock.l_whence  = SEEK_SET;
 600 
 601         if (fcntl(fd, nonblock ? F_SETLK : F_SETLKW, &flock) == -1) {
 602                 /*
 603                  * For some reason we couldn't acquire the lock.  Reset the
 604                  * host lock state to "unlocked" and signal another thread
 605                  * (if there's one waiting) to pick up where we left off.
 606                  */
 607                 error = errno;
 608                 (void) mutex_lock(&cn->cn_hlock_lock);
 609                 cn->cn_hlockstate = CN_HUNLOCKED;
 610                 (void) cond_signal(&cn->cn_hlockcv);
 611                 (void) mutex_unlock(&cn->cn_hlock_lock);
 612                 (void) close(fd);
 613                 return (error == EAGAIN ? DSVC_BUSY : DSVC_SYNCH_ERR);
 614         }
 615 
 616         /*
 617          * Got the lock; wake up all the waiters since they can all succeed
 618          */
 619         (void) mutex_lock(&cn->cn_hlock_lock);
 620         cn->cn_hlockstate = (locktype == F_WRLCK ? CN_HWRLOCKED : CN_HRDLOCKED);
 621         cn->cn_hlockcount++;
 622         cn->cn_hlockfd = fd;
 623         (void) cond_broadcast(&cn->cn_hlockcv);
 624         (void) mutex_unlock(&cn->cn_hlock_lock);
 625 
 626         return (DSVC_SUCCESS);
 627 }
 628 
 629 /*
 630  * Unlock a checked out instance of a shared or exclusive lock on container
 631  * `cn'; if the number of checked out instances goes to zero, then the host
 632  * lock is unlocked so that other hosts may compete for it.
 633  */
 634 static int
 635 host_unlock(dsvcd_container_t *cn)
 636 {
 637         struct flock    flock;
 638 
 639         if (!cn->cn_crosshost)
 640                 return (DSVC_SUCCESS);
 641 
 642         assert(cn->cn_hlockcount > 0);
 643 
 644         (void) mutex_lock(&cn->cn_hlock_lock);
 645         if (cn->cn_hlockcount > 1) {
 646                 /*
 647                  * Not the last unlock by this host; just decrement the
 648                  * held lock count.
 649                  */
 650                 cn->cn_hlockcount--;
 651                 (void) mutex_unlock(&cn->cn_hlock_lock);
 652                 return (DSVC_SUCCESS);
 653         }
 654 
 655         flock.l_len     = 0;
 656         flock.l_type    = F_UNLCK;
 657         flock.l_start   = 0;
 658         flock.l_whence  = SEEK_SET;
 659 
 660         if (fcntl(cn->cn_hlockfd, F_SETLK, &flock) == -1) {
 661                 (void) mutex_unlock(&cn->cn_hlock_lock);
 662                 return (DSVC_SYNCH_ERR);
 663         }
 664 
 665         /*
 666          * Note that we don't unlink the lockfile for a number of reasons,
 667          * the most blatant reason being:
 668          *
 669          *      1. Several hosts lock the lockfile for shared access.
 670          *      2. One host unlocks the lockfile and unlinks it (here).
 671          *      3. Another host comes in, goes to exclusively lock the
 672          *         lockfile, finds no lockfile, and creates a new one
 673          *         (meanwhile, the other hosts are still accessing the
 674          *         container through the unlinked lockfile).
 675          *
 676          * We could put in some hairy code to try to unlink lockfiles
 677          * elsewhere (when possible), but it hardly seems worth it since
 678          * inodes are cheap.
 679          */
 680 
 681         (void) close(cn->cn_hlockfd);
 682         cn->cn_hlockcount = 0;
 683         cn->cn_hlockstate = CN_HUNLOCKED;
 684         /*
 685          * We need to signal `cn_hlockcv' in case there are threads which
 686          * are waiting on it to attempt flock() exclusive access (see the
 687          * comments in host_lock() for more details about this case).
 688          */
 689         (void) cond_signal(&cn->cn_hlockcv);
 690         (void) mutex_unlock(&cn->cn_hlock_lock);
 691 
 692         return (DSVC_SUCCESS);
 693 }
 694 
 695 /*
 696  * Return the number of locks currently held for container `cn'.
 697  */
 698 static unsigned int
 699 cn_nlocks(dsvcd_container_t *cn)
 700 {
 701         unsigned int nlocks;
 702 
 703         (void) mutex_lock(&cn->cn_nholds_lock);
 704         (void) mutex_lock(&cn->cn_hlock_lock);
 705 
 706         switch (cn->cn_nholds) {
 707         case 0:
 708                 nlocks = cn->cn_hlockcount;
 709                 break;
 710         case -1:
 711                 nlocks = 1;
 712                 break;
 713         default:
 714                 nlocks = cn->cn_nholds;
 715                 break;
 716         }
 717 
 718         dhcpmsg(MSG_DEBUG, "cn_nlocks: nholds=%d hlockstate=%d hlockcount=%d",
 719             cn->cn_nholds, cn->cn_hlockstate, cn->cn_hlockcount);
 720 
 721         (void) mutex_unlock(&cn->cn_hlock_lock);
 722         (void) mutex_unlock(&cn->cn_nholds_lock);
 723 
 724         return (nlocks);
 725 }