1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * zoneadmd manages zones; one zoneadmd process is launched for each
  28  * non-global zone on the system.  This daemon juggles four jobs:
  29  *
  30  * - Implement setup and teardown of the zone "virtual platform": mount and
  31  *   unmount filesystems; create and destroy network interfaces; communicate
  32  *   with devfsadmd to lay out devices for the zone; instantiate the zone
  33  *   console device; configure process runtime attributes such as resource
  34  *   controls, pool bindings, fine-grained privileges.
  35  *
  36  * - Launch the zone's init(1M) process.
  37  *
  38  * - Implement a door server; clients (like zoneadm) connect to the door
  39  *   server and request zone state changes.  The kernel is also a client of
  40  *   this door server.  A request to halt or reboot the zone which originates
  41  *   *inside* the zone results in a door upcall from the kernel into zoneadmd.
  42  *
  43  *   One minor problem is that messages emitted by zoneadmd need to be passed
  44  *   back to the zoneadm process making the request.  These messages need to
  45  *   be rendered in the client's locale; so, this is passed in as part of the
  46  *   request.  The exception is the kernel upcall to zoneadmd, in which case
  47  *   messages are syslog'd.
  48  *
  49  *   To make all of this work, the Makefile adds -a to xgettext to extract *all*
  50  *   strings, and an exclusion file (zoneadmd.xcl) is used to exclude those
  51  *   strings which do not need to be translated.
  52  *
  53  * - Act as a console server for zlogin -C processes; see comments in zcons.c
  54  *   for more information about the zone console architecture.
  55  *
  56  * DESIGN NOTES
  57  *
  58  * Restart:
  59  *   A chief design constraint of zoneadmd is that it should be restartable in
  60  *   the case that the administrator kills it off, or it suffers a fatal error,
  61  *   without the running zone being impacted; this is akin to being able to
  62  *   reboot the service processor of a server without affecting the OS instance.
  63  */
  64 
  65 #include <sys/param.h>
  66 #include <sys/mman.h>
  67 #include <sys/types.h>
  68 #include <sys/stat.h>
  69 #include <sys/sysmacros.h>
  70 
  71 #include <bsm/adt.h>
  72 #include <bsm/adt_event.h>
  73 
  74 #include <alloca.h>
  75 #include <assert.h>
  76 #include <errno.h>
  77 #include <door.h>
  78 #include <fcntl.h>
  79 #include <locale.h>
  80 #include <signal.h>
  81 #include <stdarg.h>
  82 #include <stdio.h>
  83 #include <stdlib.h>
  84 #include <string.h>
  85 #include <strings.h>
  86 #include <synch.h>
  87 #include <syslog.h>
  88 #include <thread.h>
  89 #include <unistd.h>
  90 #include <wait.h>
  91 #include <limits.h>
  92 #include <zone.h>
  93 #include <libbrand.h>
  94 #include <sys/brand.h>
  95 #include <libcontract.h>
  96 #include <libcontract_priv.h>
  97 #include <sys/brand.h>
  98 #include <sys/contract/process.h>
  99 #include <sys/ctfs.h>
 100 #include <libdladm.h>
 101 #include <sys/dls_mgmt.h>
 102 
 103 #include <libzonecfg.h>
 104 #include <zonestat_impl.h>
 105 #include "zoneadmd.h"
 106 
 107 static char *progname;
 108 char *zone_name;        /* zone which we are managing */
 109 char pool_name[MAXNAMELEN];
 110 char default_brand[MAXNAMELEN];
 111 char brand_name[MAXNAMELEN];
 112 boolean_t zone_isnative;
 113 boolean_t zone_iscluster;
 114 boolean_t zone_islabeled;
 115 static zoneid_t zone_id;
 116 dladm_handle_t dld_handle = NULL;
 117 
 118 static char pre_statechg_hook[2 * MAXPATHLEN];
 119 static char post_statechg_hook[2 * MAXPATHLEN];
 120 char query_hook[2 * MAXPATHLEN];
 121 
 122 zlog_t logsys;
 123 
 124 mutex_t lock = DEFAULTMUTEX;    /* to serialize stuff */
 125 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */
 126 
 127 static sema_t scratch_sem;      /* for scratch zones */
 128 
 129 static char     zone_door_path[MAXPATHLEN];
 130 static int      zone_door = -1;
 131 
 132 boolean_t in_death_throes = B_FALSE;    /* daemon is dying */
 133 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */
 134 
 135 #if !defined(TEXT_DOMAIN)               /* should be defined by cc -D */
 136 #define TEXT_DOMAIN     "SYS_TEST"      /* Use this only if it wasn't */
 137 #endif
 138 
 139 #define DEFAULT_LOCALE  "C"
 140 
 141 static const char *
 142 z_cmd_name(zone_cmd_t zcmd)
 143 {
 144         /* This list needs to match the enum in sys/zone.h */
 145         static const char *zcmdstr[] = {
 146                 "ready", "boot", "forceboot", "reboot", "halt",
 147                 "note_uninstalling", "mount", "forcemount", "unmount"
 148         };
 149 
 150         if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr))
 151                 return ("unknown");
 152         else
 153                 return (zcmdstr[(int)zcmd]);
 154 }
 155 
 156 static char *
 157 get_execbasename(char *execfullname)
 158 {
 159         char *last_slash, *execbasename;
 160 
 161         /* guard against '/' at end of command invocation */
 162         for (;;) {
 163                 last_slash = strrchr(execfullname, '/');
 164                 if (last_slash == NULL) {
 165                         execbasename = execfullname;
 166                         break;
 167                 } else {
 168                         execbasename = last_slash + 1;
 169                         if (*execbasename == '\0') {
 170                                 *last_slash = '\0';
 171                                 continue;
 172                         }
 173                         break;
 174                 }
 175         }
 176         return (execbasename);
 177 }
 178 
 179 static void
 180 usage(void)
 181 {
 182         (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname);
 183         (void) fprintf(stderr,
 184             gettext("\tNote: %s should not be run directly.\n"), progname);
 185         exit(2);
 186 }
 187 
 188 /* ARGSUSED */
 189 static void
 190 sigchld(int sig)
 191 {
 192 }
 193 
 194 char *
 195 localize_msg(char *locale, const char *msg)
 196 {
 197         char *out;
 198 
 199         (void) mutex_lock(&msglock);
 200         (void) setlocale(LC_MESSAGES, locale);
 201         out = gettext(msg);
 202         (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE);
 203         (void) mutex_unlock(&msglock);
 204         return (out);
 205 }
 206 
 207 /* PRINTFLIKE3 */
 208 void
 209 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...)
 210 {
 211         va_list alist;
 212         char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */
 213         char *bp;
 214         int saved_errno = errno;
 215 
 216         if (zlogp == NULL)
 217                 return;
 218         if (zlogp == &logsys)
 219                 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ",
 220                     zone_name);
 221         else
 222                 buf[0] = '\0';
 223         bp = &(buf[strlen(buf)]);
 224 
 225         /*
 226          * In theory, the locale pointer should be set to either "C" or a
 227          * char array, so it should never be NULL
 228          */
 229         assert(zlogp->locale != NULL);
 230         /* Locale is per process, but we are multi-threaded... */
 231         fmt = localize_msg(zlogp->locale, fmt);
 232 
 233         va_start(alist, fmt);
 234         (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist);
 235         va_end(alist);
 236         bp = &(buf[strlen(buf)]);
 237         if (use_strerror)
 238                 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s",
 239                     strerror(saved_errno));
 240         if (zlogp == &logsys) {
 241                 (void) syslog(LOG_ERR, "%s", buf);
 242         } else if (zlogp->logfile != NULL) {
 243                 (void) fprintf(zlogp->logfile, "%s\n", buf);
 244         } else {
 245                 size_t buflen;
 246                 size_t copylen;
 247 
 248                 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf);
 249                 copylen = MIN(buflen, zlogp->loglen);
 250                 zlogp->log += copylen;
 251                 zlogp->loglen -= copylen;
 252         }
 253 }
 254 
 255 /*
 256  * Emit a warning for any boot arguments which are unrecognized.  Since
 257  * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we
 258  * put the arguments into an argv style array, use getopt to process them,
 259  * and put the resultant argument string back into outargs.
 260  *
 261  * During the filtering, we pull out any arguments which are truly "boot"
 262  * arguments, leaving only those which are to be passed intact to the
 263  * progenitor process.  The one we support at the moment is -i, which
 264  * indicates to the kernel which program should be launched as 'init'.
 265  *
 266  * A return of Z_INVAL indicates specifically that the arguments are
 267  * not valid; this is a non-fatal error.  Except for Z_OK, all other return
 268  * values are treated as fatal.
 269  */
 270 static int
 271 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
 272     char *init_file, char *badarg)
 273 {
 274         int argc = 0, argc_save;
 275         int i;
 276         int err;
 277         char *arg, *lasts, **argv = NULL, **argv_save;
 278         char zonecfg_args[BOOTARGS_MAX];
 279         char scratchargs[BOOTARGS_MAX], *sargs;
 280         char c;
 281 
 282         bzero(outargs, BOOTARGS_MAX);
 283         bzero(badarg, BOOTARGS_MAX);
 284 
 285         /*
 286          * If the user didn't specify transient boot arguments, check
 287          * to see if there were any specified in the zone configuration,
 288          * and use them if applicable.
 289          */
 290         if (inargs == NULL || inargs[0] == '\0')  {
 291                 zone_dochandle_t handle;
 292                 if ((handle = zonecfg_init_handle()) == NULL) {
 293                         zerror(zlogp, B_TRUE,
 294                             "getting zone configuration handle");
 295                         return (Z_BAD_HANDLE);
 296                 }
 297                 err = zonecfg_get_snapshot_handle(zone_name, handle);
 298                 if (err != Z_OK) {
 299                         zerror(zlogp, B_FALSE,
 300                             "invalid configuration snapshot");
 301                         zonecfg_fini_handle(handle);
 302                         return (Z_BAD_HANDLE);
 303                 }
 304 
 305                 bzero(zonecfg_args, sizeof (zonecfg_args));
 306                 (void) zonecfg_get_bootargs(handle, zonecfg_args,
 307                     sizeof (zonecfg_args));
 308                 inargs = zonecfg_args;
 309                 zonecfg_fini_handle(handle);
 310         }
 311 
 312         if (strlen(inargs) >= BOOTARGS_MAX) {
 313                 zerror(zlogp, B_FALSE, "boot argument string too long");
 314                 return (Z_INVAL);
 315         }
 316 
 317         (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
 318         sargs = scratchargs;
 319         while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
 320                 sargs = NULL;
 321                 argc++;
 322         }
 323 
 324         if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) {
 325                 zerror(zlogp, B_FALSE, "memory allocation failed");
 326                 return (Z_NOMEM);
 327         }
 328 
 329         argv_save = argv;
 330         argc_save = argc;
 331 
 332         (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
 333         sargs = scratchargs;
 334         i = 0;
 335         while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
 336                 sargs = NULL;
 337                 if ((argv[i] = strdup(arg)) == NULL) {
 338                         err = Z_NOMEM;
 339                         zerror(zlogp, B_FALSE, "memory allocation failed");
 340                         goto done;
 341                 }
 342                 i++;
 343         }
 344 
 345         /*
 346          * We preserve compatibility with the Solaris system boot behavior,
 347          * which allows:
 348          *
 349          *      # reboot kernel/unix -s -m verbose
 350          *
 351          * In this example, kernel/unix tells the booter what file to
 352          * boot.  We don't want reboot in a zone to be gratuitously different,
 353          * so we silently ignore the boot file, if necessary.
 354          */
 355         if (argv[0] == NULL)
 356                 goto done;
 357 
 358         assert(argv[0][0] != ' ');
 359         assert(argv[0][0] != '\t');
 360 
 361         if (argv[0][0] != '-' && argv[0][0] != '\0') {
 362                 argv = &argv[1];
 363                 argc--;
 364         }
 365 
 366         optind = 0;
 367         opterr = 0;
 368         err = Z_OK;
 369         while ((c = getopt(argc, argv, "fi:m:s")) != -1) {
 370                 switch (c) {
 371                 case 'i':
 372                         /*
 373                          * -i is handled by the runtime and is not passed
 374                          * along to userland
 375                          */
 376                         (void) strlcpy(init_file, optarg, MAXPATHLEN);
 377                         break;
 378                 case 'f':
 379                         /* This has already been processed by zoneadm */
 380                         break;
 381                 case 'm':
 382                 case 's':
 383                         /* These pass through unmolested */
 384                         (void) snprintf(outargs, BOOTARGS_MAX,
 385                             "%s -%c %s ", outargs, c, optarg ? optarg : "");
 386                         break;
 387                 case '?':
 388                         /*
 389                          * We warn about unknown arguments but pass them
 390                          * along anyway-- if someone wants to develop their
 391                          * own init replacement, they can pass it whatever
 392                          * args they want.
 393                          */
 394                         err = Z_INVAL;
 395                         (void) snprintf(outargs, BOOTARGS_MAX,
 396                             "%s -%c", outargs, optopt);
 397                         (void) snprintf(badarg, BOOTARGS_MAX,
 398                             "%s -%c", badarg, optopt);
 399                         break;
 400                 }
 401         }
 402 
 403         /*
 404          * For Solaris Zones we warn about and discard non-option arguments.
 405          * Hence 'boot foo bar baz gub' --> 'boot'.  However, to be similar
 406          * to the kernel, we concat up all the other remaining boot args.
 407          * and warn on them as a group.
 408          */
 409         if (optind < argc) {
 410                 err = Z_INVAL;
 411                 while (optind < argc) {
 412                         (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s",
 413                             badarg, strlen(badarg) > 0 ? " " : "",
 414                             argv[optind]);
 415                         optind++;
 416                 }
 417                 zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot "
 418                     "arguments `%s'.", badarg);
 419         }
 420 
 421 done:
 422         for (i = 0; i < argc_save; i++) {
 423                 if (argv_save[i] != NULL)
 424                         free(argv_save[i]);
 425         }
 426         free(argv_save);
 427         return (err);
 428 }
 429 
 430 
 431 static int
 432 mkzonedir(zlog_t *zlogp)
 433 {
 434         struct stat st;
 435         /*
 436          * We must create and lock everyone but root out of ZONES_TMPDIR
 437          * since anyone can open any UNIX domain socket, regardless of
 438          * its file system permissions.  Sigh...
 439          */
 440         if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
 441                 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR);
 442                 return (-1);
 443         }
 444         /* paranoia */
 445         if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) {
 446                 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR);
 447                 return (-1);
 448         }
 449         (void) chmod(ZONES_TMPDIR, S_IRWXU);
 450         return (0);
 451 }
 452 
 453 /*
 454  * Run the brand's pre-state change callback, if it exists.
 455  */
 456 static int
 457 brand_prestatechg(zlog_t *zlogp, int state, int cmd)
 458 {
 459         char cmdbuf[2 * MAXPATHLEN];
 460         const char *altroot;
 461 
 462         if (pre_statechg_hook[0] == '\0')
 463                 return (0);
 464 
 465         altroot = zonecfg_get_root();
 466         if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", pre_statechg_hook,
 467             state, cmd, altroot) > sizeof (cmdbuf))
 468                 return (-1);
 469 
 470         if (do_subproc(zlogp, cmdbuf, NULL) != 0)
 471                 return (-1);
 472 
 473         return (0);
 474 }
 475 
 476 /*
 477  * Run the brand's post-state change callback, if it exists.
 478  */
 479 static int
 480 brand_poststatechg(zlog_t *zlogp, int state, int cmd)
 481 {
 482         char cmdbuf[2 * MAXPATHLEN];
 483         const char *altroot;
 484 
 485         if (post_statechg_hook[0] == '\0')
 486                 return (0);
 487 
 488         altroot = zonecfg_get_root();
 489         if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook,
 490             state, cmd, altroot) > sizeof (cmdbuf))
 491                 return (-1);
 492 
 493         if (do_subproc(zlogp, cmdbuf, NULL) != 0)
 494                 return (-1);
 495 
 496         return (0);
 497 }
 498 
 499 /*
 500  * Notify zonestatd of the new zone.  If zonestatd is not running, this
 501  * will do nothing.
 502  */
 503 static void
 504 notify_zonestatd(zoneid_t zoneid)
 505 {
 506         int cmd[2];
 507         int fd;
 508         door_arg_t params;
 509 
 510         fd = open(ZS_DOOR_PATH, O_RDONLY);
 511         if (fd < 0)
 512                 return;
 513 
 514         cmd[0] = ZSD_CMD_NEW_ZONE;
 515         cmd[1] = zoneid;
 516         params.data_ptr = (char *)&cmd;
 517         params.data_size = sizeof (cmd);
 518         params.desc_ptr = NULL;
 519         params.desc_num = 0;
 520         params.rbuf = NULL;
 521         params.rsize = NULL;
 522         (void) door_call(fd, &params);
 523         (void) close(fd);
 524 }
 525 
 526 /*
 527  * Bring a zone up to the pre-boot "ready" stage.  The mount_cmd argument is
 528  * 'true' if this is being invoked as part of the processing for the "mount"
 529  * subcommand.
 530  */
 531 static int
 532 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate)
 533 {
 534         int err;
 535 
 536         if (brand_prestatechg(zlogp, zstate, Z_READY) != 0)
 537                 return (-1);
 538 
 539         if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
 540                 zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
 541                     zonecfg_strerror(err));
 542                 goto bad;
 543         }
 544 
 545         if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) {
 546                 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
 547                         zerror(zlogp, B_FALSE, "destroying snapshot: %s",
 548                             zonecfg_strerror(err));
 549                 goto bad;
 550         }
 551         if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) {
 552                 bringup_failure_recovery = B_TRUE;
 553                 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE);
 554                 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
 555                         zerror(zlogp, B_FALSE, "destroying snapshot: %s",
 556                             zonecfg_strerror(err));
 557                 goto bad;
 558         }
 559 
 560         if (brand_poststatechg(zlogp, zstate, Z_READY) != 0)
 561                 goto bad;
 562 
 563         return (0);
 564 
 565 bad:
 566         /*
 567          * If something goes wrong, we up the zones's state to the target
 568          * state, READY, and then invoke the hook as if we're halting.
 569          */
 570         (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT);
 571         return (-1);
 572 }
 573 
 574 int
 575 init_template(void)
 576 {
 577         int fd;
 578         int err = 0;
 579 
 580         fd = open64(CTFS_ROOT "/process/template", O_RDWR);
 581         if (fd == -1)
 582                 return (-1);
 583 
 584         /*
 585          * For now, zoneadmd doesn't do anything with the contract.
 586          * Deliver no events, don't inherit, and allow it to be orphaned.
 587          */
 588         err |= ct_tmpl_set_critical(fd, 0);
 589         err |= ct_tmpl_set_informative(fd, 0);
 590         err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
 591         err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
 592         if (err || ct_tmpl_activate(fd)) {
 593                 (void) close(fd);
 594                 return (-1);
 595         }
 596 
 597         return (fd);
 598 }
 599 
 600 typedef struct fs_callback {
 601         zlog_t          *zlogp;
 602         zoneid_t        zoneid;
 603         boolean_t       mount_cmd;
 604 } fs_callback_t;
 605 
 606 static int
 607 mount_early_fs(void *data, const char *spec, const char *dir,
 608     const char *fstype, const char *opt)
 609 {
 610         zlog_t *zlogp = ((fs_callback_t *)data)->zlogp;
 611         zoneid_t zoneid = ((fs_callback_t *)data)->zoneid;
 612         boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd;
 613         char rootpath[MAXPATHLEN];
 614         pid_t child;
 615         int child_status;
 616         int tmpl_fd;
 617         int rv;
 618         ctid_t ct;
 619 
 620         /* determine the zone rootpath */
 621         if (mount_cmd) {
 622                 char zonepath[MAXPATHLEN];
 623                 char luroot[MAXPATHLEN];
 624 
 625                 if (zone_get_zonepath(zone_name,
 626                     zonepath, sizeof (zonepath)) != Z_OK) {
 627                         zerror(zlogp, B_FALSE, "unable to determine zone path");
 628                         return (-1);
 629                 }
 630 
 631                 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
 632                 resolve_lofs(zlogp, luroot, sizeof (luroot));
 633                 (void) strlcpy(rootpath, luroot, sizeof (rootpath));
 634         } else {
 635                 if (zone_get_rootpath(zone_name,
 636                     rootpath, sizeof (rootpath)) != Z_OK) {
 637                         zerror(zlogp, B_FALSE, "unable to determine zone root");
 638                         return (-1);
 639                 }
 640         }
 641 
 642         if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) {
 643                 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point",
 644                     rootpath, dir);
 645                 return (-1);
 646         } else if (rv > 0) {
 647                 /* The mount point path doesn't exist, create it now. */
 648                 if (make_one_dir(zlogp, rootpath, dir,
 649                     DEFAULT_DIR_MODE, DEFAULT_DIR_USER,
 650                     DEFAULT_DIR_GROUP) != 0) {
 651                         zerror(zlogp, B_FALSE, "failed to create mount point");
 652                         return (-1);
 653                 }
 654 
 655                 /*
 656                  * Now this might seem weird, but we need to invoke
 657                  * valid_mount_path() again.  Why?  Because it checks
 658                  * to make sure that the mount point path is canonical,
 659                  * which it can only do if the path exists, so now that
 660                  * we've created the path we have to verify it again.
 661                  */
 662                 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir,
 663                     fstype)) < 0) {
 664                         zerror(zlogp, B_FALSE,
 665                             "%s%s is not a valid mount point", rootpath, dir);
 666                         return (-1);
 667                 }
 668         }
 669 
 670         if ((tmpl_fd = init_template()) == -1) {
 671                 zerror(zlogp, B_TRUE, "failed to create contract");
 672                 return (-1);
 673         }
 674 
 675         if ((child = fork()) == -1) {
 676                 (void) ct_tmpl_clear(tmpl_fd);
 677                 (void) close(tmpl_fd);
 678                 zerror(zlogp, B_TRUE, "failed to fork");
 679                 return (-1);
 680 
 681         } else if (child == 0) {        /* child */
 682                 char opt_buf[MAX_MNTOPT_STR];
 683                 int optlen = 0;
 684                 int mflag = MS_DATA;
 685 
 686                 (void) ct_tmpl_clear(tmpl_fd);
 687                 /*
 688                  * Even though there are no procs running in the zone, we
 689                  * do this for paranoia's sake.
 690                  */
 691                 (void) closefrom(0);
 692 
 693                 if (zone_enter(zoneid) == -1) {
 694                         _exit(errno);
 695                 }
 696                 if (opt != NULL) {
 697                         /*
 698                          * The mount() system call is incredibly annoying.
 699                          * If options are specified, we need to copy them
 700                          * into a temporary buffer since the mount() system
 701                          * call will overwrite the options string.  It will
 702                          * also fail if the new option string it wants to
 703                          * write is bigger than the one we passed in, so
 704                          * you must pass in a buffer of the maximum possible
 705                          * option string length.  sigh.
 706                          */
 707                         (void) strlcpy(opt_buf, opt, sizeof (opt_buf));
 708                         opt = opt_buf;
 709                         optlen = MAX_MNTOPT_STR;
 710                         mflag = MS_OPTIONSTR;
 711                 }
 712                 if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0)
 713                         _exit(errno);
 714                 _exit(0);
 715         }
 716 
 717         /* parent */
 718         if (contract_latest(&ct) == -1)
 719                 ct = -1;
 720         (void) ct_tmpl_clear(tmpl_fd);
 721         (void) close(tmpl_fd);
 722         if (waitpid(child, &child_status, 0) != child) {
 723                 /* unexpected: we must have been signalled */
 724                 (void) contract_abandon_id(ct);
 725                 return (-1);
 726         }
 727         (void) contract_abandon_id(ct);
 728         if (WEXITSTATUS(child_status) != 0) {
 729                 errno = WEXITSTATUS(child_status);
 730                 zerror(zlogp, B_TRUE, "mount of %s failed", dir);
 731                 return (-1);
 732         }
 733 
 734         return (0);
 735 }
 736 
 737 /*
 738  * If retstr is not NULL, the output of the subproc is returned in the str,
 739  * otherwise it is output using zerror().  Any memory allocated for retstr
 740  * should be freed by the caller.
 741  */
 742 int
 743 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
 744 {
 745         char buf[1024];         /* arbitrary large amount */
 746         char *inbuf;
 747         FILE *file;
 748         int status;
 749         int rd_cnt;
 750 
 751         if (retstr != NULL) {
 752                 if ((*retstr = malloc(1024)) == NULL) {
 753                         zerror(zlogp, B_FALSE, "out of memory");
 754                         return (-1);
 755                 }
 756                 inbuf = *retstr;
 757                 rd_cnt = 0;
 758         } else {
 759                 inbuf = buf;
 760         }
 761 
 762         file = popen(cmdbuf, "r");
 763         if (file == NULL) {
 764                 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf);
 765                 return (-1);
 766         }
 767 
 768         while (fgets(inbuf, 1024, file) != NULL) {
 769                 if (retstr == NULL) {
 770                         if (zlogp != &logsys)
 771                                 zerror(zlogp, B_FALSE, "%s", inbuf);
 772                 } else {
 773                         char *p;
 774 
 775                         rd_cnt += 1024 - 1;
 776                         if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) {
 777                                 zerror(zlogp, B_FALSE, "out of memory");
 778                                 (void) pclose(file);
 779                                 return (-1);
 780                         }
 781 
 782                         *retstr = p;
 783                         inbuf = *retstr + rd_cnt;
 784                 }
 785         }
 786         status = pclose(file);
 787 
 788         if (WIFSIGNALED(status)) {
 789                 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
 790                     "signal %d", cmdbuf, WTERMSIG(status));
 791                 return (-1);
 792         }
 793         assert(WIFEXITED(status));
 794         if (WEXITSTATUS(status) == ZEXIT_EXEC) {
 795                 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf);
 796                 return (-1);
 797         }
 798         return (WEXITSTATUS(status));
 799 }
 800 
 801 static int
 802 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
 803 {
 804         zoneid_t zoneid;
 805         struct stat st;
 806         char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
 807         char nbootargs[BOOTARGS_MAX];
 808         char cmdbuf[MAXPATHLEN];
 809         fs_callback_t cb;
 810         brand_handle_t bh;
 811         zone_iptype_t iptype;
 812         boolean_t links_loaded = B_FALSE;
 813         dladm_status_t status;
 814         char errmsg[DLADM_STRSIZE];
 815         int err;
 816 
 817         if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0)
 818                 return (-1);
 819 
 820         if ((zoneid = getzoneidbyname(zone_name)) == -1) {
 821                 zerror(zlogp, B_TRUE, "unable to get zoneid");
 822                 goto bad;
 823         }
 824 
 825         cb.zlogp = zlogp;
 826         cb.zoneid = zoneid;
 827         cb.mount_cmd = B_FALSE;
 828 
 829         /* Get a handle to the brand info for this zone */
 830         if ((bh = brand_open(brand_name)) == NULL) {
 831                 zerror(zlogp, B_FALSE, "unable to determine zone brand");
 832                 goto bad;
 833         }
 834 
 835         /*
 836          * Get the list of filesystems to mount from the brand
 837          * configuration.  These mounts are done via a thread that will
 838          * enter the zone, so they are done from within the context of the
 839          * zone.
 840          */
 841         if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) {
 842                 zerror(zlogp, B_FALSE, "unable to mount filesystems");
 843                 brand_close(bh);
 844                 goto bad;
 845         }
 846 
 847         /*
 848          * Get the brand's boot callback if it exists.
 849          */
 850         if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
 851                 zerror(zlogp, B_FALSE, "unable to determine zone path");
 852                 brand_close(bh);
 853                 goto bad;
 854         }
 855         (void) strcpy(cmdbuf, EXEC_PREFIX);
 856         if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
 857             sizeof (cmdbuf) - EXEC_LEN) != 0) {
 858                 zerror(zlogp, B_FALSE,
 859                     "unable to determine branded zone's boot callback");
 860                 brand_close(bh);
 861                 goto bad;
 862         }
 863 
 864         /* Get the path for this zone's init(1M) (or equivalent) process.  */
 865         if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) {
 866                 zerror(zlogp, B_FALSE,
 867                     "unable to determine zone's init(1M) location");
 868                 brand_close(bh);
 869                 goto bad;
 870         }
 871 
 872         brand_close(bh);
 873 
 874         err = filter_bootargs(zlogp, bootargs, nbootargs, init_file,
 875             bad_boot_arg);
 876         if (err == Z_INVAL)
 877                 eventstream_write(Z_EVT_ZONE_BADARGS);
 878         else if (err != Z_OK)
 879                 goto bad;
 880 
 881         assert(init_file[0] != '\0');
 882 
 883         /* Try to anticipate possible problems: Make sure init is executable. */
 884         if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
 885                 zerror(zlogp, B_FALSE, "unable to determine zone root");
 886                 goto bad;
 887         }
 888 
 889         (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file);
 890 
 891         if (stat(initpath, &st) == -1) {
 892                 zerror(zlogp, B_TRUE, "could not stat %s", initpath);
 893                 goto bad;
 894         }
 895 
 896         if ((st.st_mode & S_IXUSR) == 0) {
 897                 zerror(zlogp, B_FALSE, "%s is not executable", initpath);
 898                 goto bad;
 899         }
 900 
 901         /*
 902          * Exclusive stack zones interact with the dlmgmtd running in the
 903          * global zone.  dladm_zone_boot() tells dlmgmtd that this zone is
 904          * booting, and loads its datalinks from the zone's datalink
 905          * configuration file.
 906          */
 907         if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) {
 908                 status = dladm_zone_boot(dld_handle, zoneid);
 909                 if (status != DLADM_STATUS_OK) {
 910                         zerror(zlogp, B_FALSE, "unable to load zone datalinks: "
 911                             " %s", dladm_status2str(status, errmsg));
 912                         goto bad;
 913                 }
 914                 links_loaded = B_TRUE;
 915         }
 916 
 917         /*
 918          * If there is a brand 'boot' callback, execute it now to give the
 919          * brand one last chance to do any additional setup before the zone
 920          * is booted.
 921          */
 922         if ((strlen(cmdbuf) > EXEC_LEN) &&
 923             (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
 924                 zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
 925                 goto bad;
 926         }
 927 
 928         if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) {
 929                 zerror(zlogp, B_TRUE, "could not set zone boot file");
 930                 goto bad;
 931         }
 932 
 933         if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) {
 934                 zerror(zlogp, B_TRUE, "could not set zone boot arguments");
 935                 goto bad;
 936         }
 937 
 938         /*
 939          * Inform zonestatd of a new zone so that it can install a door for
 940          * the zone to contact it.
 941          */
 942         notify_zonestatd(zone_id);
 943 
 944         if (zone_boot(zoneid) == -1) {
 945                 zerror(zlogp, B_TRUE, "unable to boot zone");
 946                 goto bad;
 947         }
 948 
 949         if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0)
 950                 goto bad;
 951 
 952         return (0);
 953 
 954 bad:
 955         /*
 956          * If something goes wrong, we up the zones's state to the target
 957          * state, RUNNING, and then invoke the hook as if we're halting.
 958          */
 959         (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT);
 960         if (links_loaded)
 961                 (void) dladm_zone_halt(dld_handle, zoneid);
 962         return (-1);
 963 }
 964 
 965 static int
 966 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate)
 967 {
 968         int err;
 969 
 970         if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0)
 971                 return (-1);
 972 
 973         if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) {
 974                 if (!bringup_failure_recovery)
 975                         zerror(zlogp, B_FALSE, "unable to destroy zone");
 976                 return (-1);
 977         }
 978 
 979         if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
 980                 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
 981                     zonecfg_strerror(err));
 982 
 983         if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0)
 984                 return (-1);
 985 
 986         return (0);
 987 }
 988 
 989 /*
 990  * Generate AUE_zone_state for a command that boots a zone.
 991  */
 992 static void
 993 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val,
 994     char *new_state)
 995 {
 996         adt_session_data_t      *ah;
 997         adt_event_data_t        *event;
 998         int                     pass_fail, fail_reason;
 999 
1000         if (!adt_audit_enabled())
1001                 return;
1002 
1003         if (return_val == 0) {
1004                 pass_fail = ADT_SUCCESS;
1005                 fail_reason = ADT_SUCCESS;
1006         } else {
1007                 pass_fail = ADT_FAILURE;
1008                 fail_reason = ADT_FAIL_VALUE_PROGRAM;
1009         }
1010 
1011         if (adt_start_session(&ah, NULL, 0)) {
1012                 zerror(zlogp, B_TRUE, gettext("audit failure."));
1013                 return;
1014         }
1015         if (adt_set_from_ucred(ah, uc, ADT_NEW)) {
1016                 zerror(zlogp, B_TRUE, gettext("audit failure."));
1017                 (void) adt_end_session(ah);
1018                 return;
1019         }
1020 
1021         event = adt_alloc_event(ah, ADT_zone_state);
1022         if (event == NULL) {
1023                 zerror(zlogp, B_TRUE, gettext("audit failure."));
1024                 (void) adt_end_session(ah);
1025                 return;
1026         }
1027         event->adt_zone_state.zonename = zone_name;
1028         event->adt_zone_state.new_state = new_state;
1029 
1030         if (adt_put_event(event, pass_fail, fail_reason))
1031                 zerror(zlogp, B_TRUE, gettext("audit failure."));
1032 
1033         adt_free_event(event);
1034 
1035         (void) adt_end_session(ah);
1036 }
1037 
1038 /*
1039  * The main routine for the door server that deals with zone state transitions.
1040  */
1041 /* ARGSUSED */
1042 static void
1043 server(void *cookie, char *args, size_t alen, door_desc_t *dp,
1044     uint_t n_desc)
1045 {
1046         ucred_t *uc = NULL;
1047         const priv_set_t *eset;
1048 
1049         zone_state_t zstate;
1050         zone_cmd_t cmd;
1051         zone_cmd_arg_t *zargp;
1052 
1053         boolean_t kernelcall;
1054 
1055         int rval = -1;
1056         uint64_t uniqid;
1057         zoneid_t zoneid = -1;
1058         zlog_t zlog;
1059         zlog_t *zlogp;
1060         zone_cmd_rval_t *rvalp;
1061         size_t rlen = getpagesize(); /* conservative */
1062         fs_callback_t cb;
1063         brand_handle_t bh;
1064 
1065         /* LINTED E_BAD_PTR_CAST_ALIGN */
1066         zargp = (zone_cmd_arg_t *)args;
1067 
1068         /*
1069          * When we get the door unref message, we've fdetach'd the door, and
1070          * it is time for us to shut down zoneadmd.
1071          */
1072         if (zargp == DOOR_UNREF_DATA) {
1073                 /*
1074                  * See comment at end of main() for info on the last rites.
1075                  */
1076                 exit(0);
1077         }
1078 
1079         if (zargp == NULL) {
1080                 (void) door_return(NULL, 0, 0, 0);
1081         }
1082 
1083         rvalp = alloca(rlen);
1084         bzero(rvalp, rlen);
1085         zlog.logfile = NULL;
1086         zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1;
1087         zlog.buf = rvalp->errbuf;
1088         zlog.log = zlog.buf;
1089         /* defer initialization of zlog.locale until after credential check */
1090         zlogp = &zlog;
1091 
1092         if (alen != sizeof (zone_cmd_arg_t)) {
1093                 /*
1094                  * This really shouldn't be happening.
1095                  */
1096                 zerror(&logsys, B_FALSE, "argument size (%d bytes) "
1097                     "unexpected (expected %d bytes)", alen,
1098                     sizeof (zone_cmd_arg_t));
1099                 goto out;
1100         }
1101         cmd = zargp->cmd;
1102 
1103         if (door_ucred(&uc) != 0) {
1104                 zerror(&logsys, B_TRUE, "door_ucred");
1105                 goto out;
1106         }
1107         eset = ucred_getprivset(uc, PRIV_EFFECTIVE);
1108         if (ucred_getzoneid(uc) != GLOBAL_ZONEID ||
1109             (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) :
1110             ucred_geteuid(uc) != 0)) {
1111                 zerror(&logsys, B_FALSE, "insufficient privileges");
1112                 goto out;
1113         }
1114 
1115         kernelcall = ucred_getpid(uc) == 0;
1116 
1117         /*
1118          * This is safe because we only use a zlog_t throughout the
1119          * duration of a door call; i.e., by the time the pointer
1120          * might become invalid, the door call would be over.
1121          */
1122         zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale;
1123 
1124         (void) mutex_lock(&lock);
1125 
1126         /*
1127          * Once we start to really die off, we don't want more connections.
1128          */
1129         if (in_death_throes) {
1130                 (void) mutex_unlock(&lock);
1131                 ucred_free(uc);
1132                 (void) door_return(NULL, 0, 0, 0);
1133                 thr_exit(NULL);
1134         }
1135 
1136         /*
1137          * Check for validity of command.
1138          */
1139         if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT &&
1140             cmd != Z_REBOOT && cmd != Z_HALT && cmd != Z_NOTE_UNINSTALLING &&
1141             cmd != Z_MOUNT && cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) {
1142                 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd);
1143                 goto out;
1144         }
1145 
1146         if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) {
1147                 /*
1148                  * Can't happen
1149                  */
1150                 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d",
1151                     cmd);
1152                 goto out;
1153         }
1154         /*
1155          * We ignore the possibility of someone calling zone_create(2)
1156          * explicitly; all requests must come through zoneadmd.
1157          */
1158         if (zone_get_state(zone_name, &zstate) != Z_OK) {
1159                 /*
1160                  * Something terribly wrong happened
1161                  */
1162                 zerror(&logsys, B_FALSE, "unable to determine state of zone");
1163                 goto out;
1164         }
1165 
1166         if (kernelcall) {
1167                 /*
1168                  * Kernel-initiated requests may lose their validity if the
1169                  * zone_t the kernel was referring to has gone away.
1170                  */
1171                 if ((zoneid = getzoneidbyname(zone_name)) == -1 ||
1172                     zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid,
1173                     sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) {
1174                         /*
1175                          * We're not talking about the same zone. The request
1176                          * must have arrived too late.  Return error.
1177                          */
1178                         rval = -1;
1179                         goto out;
1180                 }
1181                 zlogp = &logsys;    /* Log errors to syslog */
1182         }
1183 
1184         /*
1185          * If we are being asked to forcibly mount or boot a zone, we
1186          * pretend that an INCOMPLETE zone is actually INSTALLED.
1187          */
1188         if (zstate == ZONE_STATE_INCOMPLETE &&
1189             (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT))
1190                 zstate = ZONE_STATE_INSTALLED;
1191 
1192         switch (zstate) {
1193         case ZONE_STATE_CONFIGURED:
1194         case ZONE_STATE_INCOMPLETE:
1195                 /*
1196                  * Not our area of expertise; we just print a nice message
1197                  * and die off.
1198                  */
1199                 zerror(zlogp, B_FALSE,
1200                     "%s operation is invalid for zones in state '%s'",
1201                     z_cmd_name(cmd), zone_state_str(zstate));
1202                 break;
1203 
1204         case ZONE_STATE_INSTALLED:
1205                 switch (cmd) {
1206                 case Z_READY:
1207                         rval = zone_ready(zlogp, Z_MNT_BOOT, zstate);
1208                         if (rval == 0)
1209                                 eventstream_write(Z_EVT_ZONE_READIED);
1210                         break;
1211                 case Z_BOOT:
1212                 case Z_FORCEBOOT:
1213                         eventstream_write(Z_EVT_ZONE_BOOTING);
1214                         if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1215                             == 0) {
1216                                 rval = zone_bootup(zlogp, zargp->bootbuf,
1217                                     zstate);
1218                         }
1219                         audit_put_record(zlogp, uc, rval, "boot");
1220                         if (rval != 0) {
1221                                 bringup_failure_recovery = B_TRUE;
1222                                 (void) zone_halt(zlogp, B_FALSE, B_FALSE,
1223                                     zstate);
1224                                 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1225                         }
1226                         break;
1227                 case Z_HALT:
1228                         if (kernelcall) /* Invalid; can't happen */
1229                                 abort();
1230                         /*
1231                          * We could have two clients racing to halt this
1232                          * zone; the second client loses, but his request
1233                          * doesn't fail, since the zone is now in the desired
1234                          * state.
1235                          */
1236                         zerror(zlogp, B_FALSE, "zone is already halted");
1237                         rval = 0;
1238                         break;
1239                 case Z_REBOOT:
1240                         if (kernelcall) /* Invalid; can't happen */
1241                                 abort();
1242                         zerror(zlogp, B_FALSE, "%s operation is invalid "
1243                             "for zones in state '%s'", z_cmd_name(cmd),
1244                             zone_state_str(zstate));
1245                         rval = -1;
1246                         break;
1247                 case Z_NOTE_UNINSTALLING:
1248                         if (kernelcall) /* Invalid; can't happen */
1249                                 abort();
1250                         /*
1251                          * Tell the console to print out a message about this.
1252                          * Once it does, we will be in_death_throes.
1253                          */
1254                         eventstream_write(Z_EVT_ZONE_UNINSTALLING);
1255                         break;
1256                 case Z_MOUNT:
1257                 case Z_FORCEMOUNT:
1258                         if (kernelcall) /* Invalid; can't happen */
1259                                 abort();
1260                         if (!zone_isnative && !zone_iscluster &&
1261                             !zone_islabeled) {
1262                                 /*
1263                                  * -U mounts the zone without lofs mounting
1264                                  * zone file systems back into the scratch
1265                                  * zone.  This is required when mounting
1266                                  * non-native branded zones.
1267                                  */
1268                                 (void) strlcpy(zargp->bootbuf, "-U",
1269                                     BOOTARGS_MAX);
1270                         }
1271 
1272                         rval = zone_ready(zlogp,
1273                             strcmp(zargp->bootbuf, "-U") == 0 ?
1274                             Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate);
1275                         if (rval != 0)
1276                                 break;
1277 
1278                         eventstream_write(Z_EVT_ZONE_READIED);
1279 
1280                         /*
1281                          * Get a handle to the default brand info.
1282                          * We must always use the default brand file system
1283                          * list when mounting the zone.
1284                          */
1285                         if ((bh = brand_open(default_brand)) == NULL) {
1286                                 rval = -1;
1287                                 break;
1288                         }
1289 
1290                         /*
1291                          * Get the list of filesystems to mount from
1292                          * the brand configuration.  These mounts are done
1293                          * via a thread that will enter the zone, so they
1294                          * are done from within the context of the zone.
1295                          */
1296                         cb.zlogp = zlogp;
1297                         cb.zoneid = zone_id;
1298                         cb.mount_cmd = B_TRUE;
1299                         rval = brand_platform_iter_mounts(bh,
1300                             mount_early_fs, &cb);
1301 
1302                         brand_close(bh);
1303 
1304                         /*
1305                          * Ordinarily, /dev/fd would be mounted inside the zone
1306                          * by svc:/system/filesystem/usr:default, but since
1307                          * we're not booting the zone, we need to do this
1308                          * manually.
1309                          */
1310                         if (rval == 0)
1311                                 rval = mount_early_fs(&cb,
1312                                     "fd", "/dev/fd", "fd", NULL);
1313                         break;
1314                 case Z_UNMOUNT:
1315                         if (kernelcall) /* Invalid; can't happen */
1316                                 abort();
1317                         zerror(zlogp, B_FALSE, "zone is already unmounted");
1318                         rval = 0;
1319                         break;
1320                 }
1321                 break;
1322 
1323         case ZONE_STATE_READY:
1324                 switch (cmd) {
1325                 case Z_READY:
1326                         /*
1327                          * We could have two clients racing to ready this
1328                          * zone; the second client loses, but his request
1329                          * doesn't fail, since the zone is now in the desired
1330                          * state.
1331                          */
1332                         zerror(zlogp, B_FALSE, "zone is already ready");
1333                         rval = 0;
1334                         break;
1335                 case Z_BOOT:
1336                         (void) strlcpy(boot_args, zargp->bootbuf,
1337                             sizeof (boot_args));
1338                         eventstream_write(Z_EVT_ZONE_BOOTING);
1339                         rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1340                         audit_put_record(zlogp, uc, rval, "boot");
1341                         if (rval != 0) {
1342                                 bringup_failure_recovery = B_TRUE;
1343                                 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1344                                     zstate);
1345                                 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1346                         }
1347                         boot_args[0] = '\0';
1348                         break;
1349                 case Z_HALT:
1350                         if (kernelcall) /* Invalid; can't happen */
1351                                 abort();
1352                         if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1353                             != 0)
1354                                 break;
1355                         eventstream_write(Z_EVT_ZONE_HALTED);
1356                         break;
1357                 case Z_REBOOT:
1358                 case Z_NOTE_UNINSTALLING:
1359                 case Z_MOUNT:
1360                 case Z_UNMOUNT:
1361                         if (kernelcall) /* Invalid; can't happen */
1362                                 abort();
1363                         zerror(zlogp, B_FALSE, "%s operation is invalid "
1364                             "for zones in state '%s'", z_cmd_name(cmd),
1365                             zone_state_str(zstate));
1366                         rval = -1;
1367                         break;
1368                 }
1369                 break;
1370 
1371         case ZONE_STATE_MOUNTED:
1372                 switch (cmd) {
1373                 case Z_UNMOUNT:
1374                         if (kernelcall) /* Invalid; can't happen */
1375                                 abort();
1376                         rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate);
1377                         if (rval == 0) {
1378                                 eventstream_write(Z_EVT_ZONE_HALTED);
1379                                 (void) sema_post(&scratch_sem);
1380                         }
1381                         break;
1382                 default:
1383                         if (kernelcall) /* Invalid; can't happen */
1384                                 abort();
1385                         zerror(zlogp, B_FALSE, "%s operation is invalid "
1386                             "for zones in state '%s'", z_cmd_name(cmd),
1387                             zone_state_str(zstate));
1388                         rval = -1;
1389                         break;
1390                 }
1391                 break;
1392 
1393         case ZONE_STATE_RUNNING:
1394         case ZONE_STATE_SHUTTING_DOWN:
1395         case ZONE_STATE_DOWN:
1396                 switch (cmd) {
1397                 case Z_READY:
1398                         if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1399                             != 0)
1400                                 break;
1401                         if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0)
1402                                 eventstream_write(Z_EVT_ZONE_READIED);
1403                         else
1404                                 eventstream_write(Z_EVT_ZONE_HALTED);
1405                         break;
1406                 case Z_BOOT:
1407                         /*
1408                          * We could have two clients racing to boot this
1409                          * zone; the second client loses, but his request
1410                          * doesn't fail, since the zone is now in the desired
1411                          * state.
1412                          */
1413                         zerror(zlogp, B_FALSE, "zone is already booted");
1414                         rval = 0;
1415                         break;
1416                 case Z_HALT:
1417                         if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1418                             != 0)
1419                                 break;
1420                         eventstream_write(Z_EVT_ZONE_HALTED);
1421                         break;
1422                 case Z_REBOOT:
1423                         (void) strlcpy(boot_args, zargp->bootbuf,
1424                             sizeof (boot_args));
1425                         eventstream_write(Z_EVT_ZONE_REBOOTING);
1426                         if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1427                             != 0) {
1428                                 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1429                                 boot_args[0] = '\0';
1430                                 break;
1431                         }
1432                         if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1433                             != 0) {
1434                                 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1435                                 boot_args[0] = '\0';
1436                                 break;
1437                         }
1438                         rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1439                         audit_put_record(zlogp, uc, rval, "reboot");
1440                         if (rval != 0) {
1441                                 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1442                                     zstate);
1443                                 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1444                         }
1445                         boot_args[0] = '\0';
1446                         break;
1447                 case Z_NOTE_UNINSTALLING:
1448                 case Z_MOUNT:
1449                 case Z_UNMOUNT:
1450                         zerror(zlogp, B_FALSE, "%s operation is invalid "
1451                             "for zones in state '%s'", z_cmd_name(cmd),
1452                             zone_state_str(zstate));
1453                         rval = -1;
1454                         break;
1455                 }
1456                 break;
1457         default:
1458                 abort();
1459         }
1460 
1461         /*
1462          * Because the state of the zone may have changed, we make sure
1463          * to wake the console poller, which is in charge of initiating
1464          * the shutdown procedure as necessary.
1465          */
1466         eventstream_write(Z_EVT_NULL);
1467 
1468 out:
1469         (void) mutex_unlock(&lock);
1470         if (kernelcall) {
1471                 rvalp = NULL;
1472                 rlen = 0;
1473         } else {
1474                 rvalp->rval = rval;
1475         }
1476         if (uc != NULL)
1477                 ucred_free(uc);
1478         (void) door_return((char *)rvalp, rlen, NULL, 0);
1479         thr_exit(NULL);
1480 }
1481 
1482 static int
1483 setup_door(zlog_t *zlogp)
1484 {
1485         if ((zone_door = door_create(server, NULL,
1486             DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) {
1487                 zerror(zlogp, B_TRUE, "%s failed", "door_create");
1488                 return (-1);
1489         }
1490         (void) fdetach(zone_door_path);
1491 
1492         if (fattach(zone_door, zone_door_path) != 0) {
1493                 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path);
1494                 (void) door_revoke(zone_door);
1495                 (void) fdetach(zone_door_path);
1496                 zone_door = -1;
1497                 return (-1);
1498         }
1499         return (0);
1500 }
1501 
1502 /*
1503  * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this
1504  * is where zoneadmd itself will check to see that another instance of
1505  * zoneadmd isn't already controlling this zone.
1506  *
1507  * The idea here is that we want to open the path to which we will
1508  * attach our door, lock it, and then make sure that no-one has beat us
1509  * to fattach(3c)ing onto it.
1510  *
1511  * fattach(3c) is really a mount, so there are actually two possible
1512  * vnodes we could be dealing with.  Our strategy is as follows:
1513  *
1514  * - If the file we opened is a regular file (common case):
1515  *      There is no fattach(3c)ed door, so we have a chance of becoming
1516  *      the managing zoneadmd. We attempt to lock the file: if it is
1517  *      already locked, that means someone else raced us here, so we
1518  *      lose and give up.  zoneadm(1m) will try to contact the zoneadmd
1519  *      that beat us to it.
1520  *
1521  * - If the file we opened is a namefs file:
1522  *      This means there is already an established door fattach(3c)'ed
1523  *      to the rendezvous path.  We've lost the race, so we give up.
1524  *      Note that in this case we also try to grab the file lock, and
1525  *      will succeed in acquiring it since the vnode locked by the
1526  *      "winning" zoneadmd was a regular one, and the one we locked was
1527  *      the fattach(3c)'ed door node.  At any rate, no harm is done, and
1528  *      we just return to zoneadm(1m) which knows to retry.
1529  */
1530 static int
1531 make_daemon_exclusive(zlog_t *zlogp)
1532 {
1533         int doorfd = -1;
1534         int err, ret = -1;
1535         struct stat st;
1536         struct flock flock;
1537         zone_state_t zstate;
1538 
1539 top:
1540         if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
1541                 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
1542                     zonecfg_strerror(err));
1543                 goto out;
1544         }
1545         if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR,
1546             S_IREAD|S_IWRITE)) < 0) {
1547                 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path);
1548                 goto out;
1549         }
1550         if (fstat(doorfd, &st) < 0) {
1551                 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path);
1552                 goto out;
1553         }
1554         /*
1555          * Lock the file to synchronize with other zoneadmd
1556          */
1557         flock.l_type = F_WRLCK;
1558         flock.l_whence = SEEK_SET;
1559         flock.l_start = (off_t)0;
1560         flock.l_len = (off_t)0;
1561         if (fcntl(doorfd, F_SETLK, &flock) < 0) {
1562                 /*
1563                  * Someone else raced us here and grabbed the lock file
1564                  * first.  A warning here is inappropriate since nothing
1565                  * went wrong.
1566                  */
1567                 goto out;
1568         }
1569 
1570         if (strcmp(st.st_fstype, "namefs") == 0) {
1571                 struct door_info info;
1572 
1573                 /*
1574                  * There is already something fattach()'ed to this file.
1575                  * Lets see what the door is up to.
1576                  */
1577                 if (door_info(doorfd, &info) == 0 && info.di_target != -1) {
1578                         /*
1579                          * Another zoneadmd process seems to be in
1580                          * control of the situation and we don't need to
1581                          * be here.  A warning here is inappropriate
1582                          * since nothing went wrong.
1583                          *
1584                          * If the door has been revoked, the zoneadmd
1585                          * process currently managing the zone is going
1586                          * away.  We'll return control to zoneadm(1m)
1587                          * which will try again (by which time zoneadmd
1588                          * will hopefully have exited).
1589                          */
1590                         goto out;
1591                 }
1592 
1593                 /*
1594                  * If we got this far, there's a fattach(3c)'ed door
1595                  * that belongs to a process that has exited, which can
1596                  * happen if the previous zoneadmd died unexpectedly.
1597                  *
1598                  * Let user know that something is amiss, but that we can
1599                  * recover; if the zone is in the installed state, then don't
1600                  * message, since having a running zoneadmd isn't really
1601                  * expected/needed.  We want to keep occurences of this message
1602                  * limited to times when zoneadmd is picking back up from a
1603                  * zoneadmd that died while the zone was in some non-trivial
1604                  * state.
1605                  */
1606                 if (zstate > ZONE_STATE_INSTALLED) {
1607                         zerror(zlogp, B_FALSE,
1608                             "zone '%s': WARNING: zone is in state '%s', but "
1609                             "zoneadmd does not appear to be available; "
1610                             "restarted zoneadmd to recover.",
1611                             zone_name, zone_state_str(zstate));
1612                 }
1613 
1614                 (void) fdetach(zone_door_path);
1615                 (void) close(doorfd);
1616                 goto top;
1617         }
1618         ret = 0;
1619 out:
1620         (void) close(doorfd);
1621         return (ret);
1622 }
1623 
1624 /*
1625  * Setup the brand's pre and post state change callbacks, as well as the
1626  * query callback, if any of these exist.
1627  */
1628 static int
1629 brand_callback_init(brand_handle_t bh, char *zone_name)
1630 {
1631         char zpath[MAXPATHLEN];
1632 
1633         if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK)
1634                 return (-1);
1635 
1636         (void) strlcpy(pre_statechg_hook, EXEC_PREFIX,
1637             sizeof (pre_statechg_hook));
1638 
1639         if (brand_get_prestatechange(bh, zone_name, zpath,
1640             pre_statechg_hook + EXEC_LEN,
1641             sizeof (pre_statechg_hook) - EXEC_LEN) != 0)
1642                 return (-1);
1643 
1644         if (strlen(pre_statechg_hook) <= EXEC_LEN)
1645                 pre_statechg_hook[0] = '\0';
1646 
1647         (void) strlcpy(post_statechg_hook, EXEC_PREFIX,
1648             sizeof (post_statechg_hook));
1649 
1650         if (brand_get_poststatechange(bh, zone_name, zpath,
1651             post_statechg_hook + EXEC_LEN,
1652             sizeof (post_statechg_hook) - EXEC_LEN) != 0)
1653                 return (-1);
1654 
1655         if (strlen(post_statechg_hook) <= EXEC_LEN)
1656                 post_statechg_hook[0] = '\0';
1657 
1658         (void) strlcpy(query_hook, EXEC_PREFIX,
1659             sizeof (query_hook));
1660 
1661         if (brand_get_query(bh, zone_name, zpath, query_hook + EXEC_LEN,
1662             sizeof (query_hook) - EXEC_LEN) != 0)
1663                 return (-1);
1664 
1665         if (strlen(query_hook) <= EXEC_LEN)
1666                 query_hook[0] = '\0';
1667 
1668         return (0);
1669 }
1670 
1671 int
1672 main(int argc, char *argv[])
1673 {
1674         int opt;
1675         zoneid_t zid;
1676         priv_set_t *privset;
1677         zone_state_t zstate;
1678         char parents_locale[MAXPATHLEN];
1679         brand_handle_t bh;
1680         int err;
1681 
1682         pid_t pid;
1683         sigset_t blockset;
1684         sigset_t block_cld;
1685 
1686         struct {
1687                 sema_t sem;
1688                 int status;
1689                 zlog_t log;
1690         } *shstate;
1691         size_t shstatelen = getpagesize();
1692 
1693         zlog_t errlog;
1694         zlog_t *zlogp;
1695 
1696         int ctfd;
1697 
1698         progname = get_execbasename(argv[0]);
1699 
1700         /*
1701          * Make sure stderr is unbuffered
1702          */
1703         (void) setbuffer(stderr, NULL, 0);
1704 
1705         /*
1706          * Get out of the way of mounted filesystems, since we will daemonize
1707          * soon.
1708          */
1709         (void) chdir("/");
1710 
1711         /*
1712          * Use the default system umask per PSARC 1998/110 rather than
1713          * anything that may have been set by the caller.
1714          */
1715         (void) umask(CMASK);
1716 
1717         /*
1718          * Initially we want to use our parent's locale.
1719          */
1720         (void) setlocale(LC_ALL, "");
1721         (void) textdomain(TEXT_DOMAIN);
1722         (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL),
1723             sizeof (parents_locale));
1724 
1725         /*
1726          * This zlog_t is used for writing to stderr
1727          */
1728         errlog.logfile = stderr;
1729         errlog.buflen = errlog.loglen = 0;
1730         errlog.buf = errlog.log = NULL;
1731         errlog.locale = parents_locale;
1732 
1733         /*
1734          * We start off writing to stderr until we're ready to daemonize.
1735          */
1736         zlogp = &errlog;
1737 
1738         /*
1739          * Process options.
1740          */
1741         while ((opt = getopt(argc, argv, "R:z:")) != EOF) {
1742                 switch (opt) {
1743                 case 'R':
1744                         zonecfg_set_root(optarg);
1745                         break;
1746                 case 'z':
1747                         zone_name = optarg;
1748                         break;
1749                 default:
1750                         usage();
1751                 }
1752         }
1753 
1754         if (zone_name == NULL)
1755                 usage();
1756 
1757         /*
1758          * Because usage() prints directly to stderr, it has gettext()
1759          * wrapping, which depends on the locale.  But since zerror() calls
1760          * localize() which tweaks the locale, it is not safe to call zerror()
1761          * until after the last call to usage().  Fortunately, the last call
1762          * to usage() is just above and the first call to zerror() is just
1763          * below.  Don't mess this up.
1764          */
1765         if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) {
1766                 zerror(zlogp, B_FALSE, "cannot manage the %s zone",
1767                     GLOBAL_ZONENAME);
1768                 return (1);
1769         }
1770 
1771         if (zone_get_id(zone_name, &zid) != 0) {
1772                 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name,
1773                     zonecfg_strerror(Z_NO_ZONE));
1774                 return (1);
1775         }
1776 
1777         if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
1778                 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
1779                     zonecfg_strerror(err));
1780                 return (1);
1781         }
1782         if (zstate < ZONE_STATE_INCOMPLETE) {
1783                 zerror(zlogp, B_FALSE,
1784                     "cannot manage a zone which is in state '%s'",
1785                     zone_state_str(zstate));
1786                 return (1);
1787         }
1788 
1789         if (zonecfg_default_brand(default_brand,
1790             sizeof (default_brand)) != Z_OK) {
1791                 zerror(zlogp, B_FALSE, "unable to determine default brand");
1792                 return (1);
1793         }
1794 
1795         /* Get a handle to the brand info for this zone */
1796         if (zone_get_brand(zone_name, brand_name, sizeof (brand_name))
1797             != Z_OK) {
1798                 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1799                 return (1);
1800         }
1801         zone_isnative = (strcmp(brand_name, NATIVE_BRAND_NAME) == 0);
1802         zone_islabeled = (strcmp(brand_name, LABELED_BRAND_NAME) == 0);
1803 
1804         /*
1805          * In the alternate root environment, the only supported
1806          * operations are mount and unmount.  In this case, just treat
1807          * the zone as native if it is cluster.  Cluster zones can be
1808          * native for the purpose of LU or upgrade, and the cluster
1809          * brand may not exist in the miniroot (such as in net install
1810          * upgrade).
1811          */
1812         if (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0) {
1813                 zone_iscluster = B_TRUE;
1814                 if (zonecfg_in_alt_root()) {
1815                         (void) strlcpy(brand_name, default_brand,
1816                             sizeof (brand_name));
1817                 }
1818         } else {
1819                 zone_iscluster = B_FALSE;
1820         }
1821 
1822         if ((bh = brand_open(brand_name)) == NULL) {
1823                 zerror(zlogp, B_FALSE, "unable to open zone brand");
1824                 return (1);
1825         }
1826 
1827         /* Get state change brand hooks. */
1828         if (brand_callback_init(bh, zone_name) == -1) {
1829                 zerror(zlogp, B_TRUE,
1830                     "failed to initialize brand state change hooks");
1831                 brand_close(bh);
1832                 return (1);
1833         }
1834 
1835         brand_close(bh);
1836 
1837         /*
1838          * Check that we have all privileges.  It would be nice to pare
1839          * this down, but this is at least a first cut.
1840          */
1841         if ((privset = priv_allocset()) == NULL) {
1842                 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
1843                 return (1);
1844         }
1845 
1846         if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
1847                 zerror(zlogp, B_TRUE, "%s failed", "getppriv");
1848                 priv_freeset(privset);
1849                 return (1);
1850         }
1851 
1852         if (priv_isfullset(privset) == B_FALSE) {
1853                 zerror(zlogp, B_FALSE, "You lack sufficient privilege to "
1854                     "run this command (all privs required)");
1855                 priv_freeset(privset);
1856                 return (1);
1857         }
1858         priv_freeset(privset);
1859 
1860         if (mkzonedir(zlogp) != 0)
1861                 return (1);
1862 
1863         /*
1864          * Pre-fork: setup shared state
1865          */
1866         if ((shstate = (void *)mmap(NULL, shstatelen,
1867             PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) ==
1868             MAP_FAILED) {
1869                 zerror(zlogp, B_TRUE, "%s failed", "mmap");
1870                 return (1);
1871         }
1872         if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) {
1873                 zerror(zlogp, B_TRUE, "%s failed", "sema_init()");
1874                 (void) munmap((char *)shstate, shstatelen);
1875                 return (1);
1876         }
1877         shstate->log.logfile = NULL;
1878         shstate->log.buflen = shstatelen - sizeof (*shstate);
1879         shstate->log.loglen = shstate->log.buflen;
1880         shstate->log.buf = (char *)shstate + sizeof (*shstate);
1881         shstate->log.log = shstate->log.buf;
1882         shstate->log.locale = parents_locale;
1883         shstate->status = -1;
1884 
1885         /*
1886          * We need a SIGCHLD handler so the sema_wait() below will wake
1887          * up if the child dies without doing a sema_post().
1888          */
1889         (void) sigset(SIGCHLD, sigchld);
1890         /*
1891          * We must mask SIGCHLD until after we've coped with the fork
1892          * sufficiently to deal with it; otherwise we can race and
1893          * receive the signal before pid has been initialized
1894          * (yes, this really happens).
1895          */
1896         (void) sigemptyset(&block_cld);
1897         (void) sigaddset(&block_cld, SIGCHLD);
1898         (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1899 
1900         if ((ctfd = init_template()) == -1) {
1901                 zerror(zlogp, B_TRUE, "failed to create contract");
1902                 return (1);
1903         }
1904 
1905         /*
1906          * Do not let another thread localize a message while we are forking.
1907          */
1908         (void) mutex_lock(&msglock);
1909         pid = fork();
1910         (void) mutex_unlock(&msglock);
1911 
1912         /*
1913          * In all cases (parent, child, and in the event of an error) we
1914          * don't want to cause creation of contracts on subsequent fork()s.
1915          */
1916         (void) ct_tmpl_clear(ctfd);
1917         (void) close(ctfd);
1918 
1919         if (pid == -1) {
1920                 zerror(zlogp, B_TRUE, "could not fork");
1921                 return (1);
1922 
1923         } else if (pid > 0) { /* parent */
1924                 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1925                 /*
1926                  * This marks a window of vulnerability in which we receive
1927                  * the SIGCLD before falling into sema_wait (normally we would
1928                  * get woken up from sema_wait with EINTR upon receipt of
1929                  * SIGCLD).  So we may need to use some other scheme like
1930                  * sema_posting in the sigcld handler.
1931                  * blech
1932                  */
1933                 (void) sema_wait(&shstate->sem);
1934                 (void) sema_destroy(&shstate->sem);
1935                 if (shstate->status != 0)
1936                         (void) waitpid(pid, NULL, WNOHANG);
1937                 /*
1938                  * It's ok if we die with SIGPIPE.  It's not like we could have
1939                  * done anything about it.
1940                  */
1941                 (void) fprintf(stderr, "%s", shstate->log.buf);
1942                 _exit(shstate->status == 0 ? 0 : 1);
1943         }
1944 
1945         /*
1946          * The child charges on.
1947          */
1948         (void) sigset(SIGCHLD, SIG_DFL);
1949         (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1950 
1951         /*
1952          * SIGPIPE can be delivered if we write to a socket for which the
1953          * peer endpoint is gone.  That can lead to too-early termination
1954          * of zoneadmd, and that's not good eats.
1955          */
1956         (void) sigset(SIGPIPE, SIG_IGN);
1957         /*
1958          * Stop using stderr
1959          */
1960         zlogp = &shstate->log;
1961 
1962         /*
1963          * We don't need stdout/stderr from now on.
1964          */
1965         closefrom(0);
1966 
1967         /*
1968          * Initialize the syslog zlog_t.  This needs to be done after
1969          * the call to closefrom().
1970          */
1971         logsys.buf = logsys.log = NULL;
1972         logsys.buflen = logsys.loglen = 0;
1973         logsys.logfile = NULL;
1974         logsys.locale = DEFAULT_LOCALE;
1975 
1976         openlog("zoneadmd", LOG_PID, LOG_DAEMON);
1977 
1978         /*
1979          * The eventstream is used to publish state changes in the zone
1980          * from the door threads to the console I/O poller.
1981          */
1982         if (eventstream_init() == -1) {
1983                 zerror(zlogp, B_TRUE, "unable to create eventstream");
1984                 goto child_out;
1985         }
1986 
1987         (void) snprintf(zone_door_path, sizeof (zone_door_path),
1988             "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name);
1989 
1990         /*
1991          * See if another zoneadmd is running for this zone.  If not, then we
1992          * can now modify system state.
1993          */
1994         if (make_daemon_exclusive(zlogp) == -1)
1995                 goto child_out;
1996 
1997 
1998         /*
1999          * Create/join a new session; we need to be careful of what we do with
2000          * the console from now on so we don't end up being the session leader
2001          * for the terminal we're going to be handing out.
2002          */
2003         (void) setsid();
2004 
2005         /*
2006          * This thread shouldn't be receiving any signals; in particular,
2007          * SIGCHLD should be received by the thread doing the fork().
2008          */
2009         (void) sigfillset(&blockset);
2010         (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL);
2011 
2012         /*
2013          * Setup the console device and get ready to serve the console;
2014          * once this has completed, we're ready to let console clients
2015          * make an attempt to connect (they will block until
2016          * serve_console_sock() below gets called, and any pending
2017          * connection is accept()ed).
2018          */
2019         if (!zonecfg_in_alt_root() && init_console(zlogp) < 0)
2020                 goto child_out;
2021 
2022         /*
2023          * Take the lock now, so that when the door server gets going, we
2024          * are guaranteed that it won't take a request until we are sure
2025          * that everything is completely set up.  See the child_out: label
2026          * below to see why this matters.
2027          */
2028         (void) mutex_lock(&lock);
2029 
2030         /* Init semaphore for scratch zones. */
2031         if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) {
2032                 zerror(zlogp, B_TRUE,
2033                     "failed to initialize semaphore for scratch zone");
2034                 goto child_out;
2035         }
2036 
2037         /* open the dladm handle */
2038         if (dladm_open(&dld_handle) != DLADM_STATUS_OK) {
2039                 zerror(zlogp, B_FALSE, "failed to open dladm handle");
2040                 goto child_out;
2041         }
2042 
2043         /*
2044          * Note: door setup must occur *after* the console is setup.
2045          * This is so that as zlogin tests the door to see if zoneadmd
2046          * is ready yet, we know that the console will get serviced
2047          * once door_info() indicates that the door is "up".
2048          */
2049         if (setup_door(zlogp) == -1)
2050                 goto child_out;
2051 
2052         /*
2053          * Things seem OK so far; tell the parent process that we're done
2054          * with setup tasks.  This will cause the parent to exit, signalling
2055          * to zoneadm, zlogin, or whatever forked it that we are ready to
2056          * service requests.
2057          */
2058         shstate->status = 0;
2059         (void) sema_post(&shstate->sem);
2060         (void) munmap((char *)shstate, shstatelen);
2061         shstate = NULL;
2062 
2063         (void) mutex_unlock(&lock);
2064 
2065         /*
2066          * zlogp is now invalid, so reset it to the syslog logger.
2067          */
2068         zlogp = &logsys;
2069 
2070         /*
2071          * Now that we are free of any parents, switch to the default locale.
2072          */
2073         (void) setlocale(LC_ALL, DEFAULT_LOCALE);
2074 
2075         /*
2076          * At this point the setup portion of main() is basically done, so
2077          * we reuse this thread to manage the zone console.  When
2078          * serve_console() has returned, we are past the point of no return
2079          * in the life of this zoneadmd.
2080          */
2081         if (zonecfg_in_alt_root()) {
2082                 /*
2083                  * This is just awful, but mounted scratch zones don't (and
2084                  * can't) have consoles.  We just wait for unmount instead.
2085                  */
2086                 while (sema_wait(&scratch_sem) == EINTR)
2087                         ;
2088         } else {
2089                 serve_console(zlogp);
2090                 assert(in_death_throes);
2091         }
2092 
2093         /*
2094          * This is the next-to-last part of the exit interlock.  Upon calling
2095          * fdetach(), the door will go unreferenced; once any
2096          * outstanding requests (like the door thread doing Z_HALT) are
2097          * done, the door will get an UNREF notification; when it handles
2098          * the UNREF, the door server will cause the exit.  It's possible
2099          * that fdetach() can fail because the file is in use, in which
2100          * case we'll retry the operation.
2101          */
2102         assert(!MUTEX_HELD(&lock));
2103         for (;;) {
2104                 if ((fdetach(zone_door_path) == 0) || (errno != EBUSY))
2105                         break;
2106                 yield();
2107         }
2108 
2109         for (;;)
2110                 (void) pause();
2111 
2112 child_out:
2113         assert(pid == 0);
2114         if (shstate != NULL) {
2115                 shstate->status = -1;
2116                 (void) sema_post(&shstate->sem);
2117                 (void) munmap((char *)shstate, shstatelen);
2118         }
2119 
2120         /*
2121          * This might trigger an unref notification, but if so,
2122          * we are still holding the lock, so our call to exit will
2123          * ultimately win the race and will publish the right exit
2124          * code.
2125          */
2126         if (zone_door != -1) {
2127                 assert(MUTEX_HELD(&lock));
2128                 (void) door_revoke(zone_door);
2129                 (void) fdetach(zone_door_path);
2130         }
2131 
2132         if (dld_handle != NULL)
2133                 dladm_close(dld_handle);
2134 
2135         return (1); /* return from main() forcibly exits an MT process */
2136 }