1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2014 Nexenta Systems, Inc. All rights reserved. 25 * Copyright (c) 2016 by Delphix. All rights reserved. 26 * Copyright (c) 2011, Joyent Inc. All rights reserved. 27 */ 28 29 /* 30 * zoneadmd manages zones; one zoneadmd process is launched for each 31 * non-global zone on the system. This daemon juggles four jobs: 32 * 33 * - Implement setup and teardown of the zone "virtual platform": mount and 34 * unmount filesystems; create and destroy network interfaces; communicate 35 * with devfsadmd to lay out devices for the zone; instantiate the zone 36 * console device; configure process runtime attributes such as resource 37 * controls, pool bindings, fine-grained privileges. 38 * 39 * - Launch the zone's init(1M) process. 40 * 41 * - Implement a door server; clients (like zoneadm) connect to the door 42 * server and request zone state changes. The kernel is also a client of 43 * this door server. A request to halt or reboot the zone which originates 44 * *inside* the zone results in a door upcall from the kernel into zoneadmd. 45 * 46 * One minor problem is that messages emitted by zoneadmd need to be passed 47 * back to the zoneadm process making the request. These messages need to 48 * be rendered in the client's locale; so, this is passed in as part of the 49 * request. The exception is the kernel upcall to zoneadmd, in which case 50 * messages are syslog'd. 51 * 52 * To make all of this work, the Makefile adds -a to xgettext to extract *all* 53 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those 54 * strings which do not need to be translated. 55 * 56 * - Act as a console server for zlogin -C processes; see comments in zcons.c 57 * for more information about the zone console architecture. 58 * 59 * DESIGN NOTES 60 * 61 * Restart: 62 * A chief design constraint of zoneadmd is that it should be restartable in 63 * the case that the administrator kills it off, or it suffers a fatal error, 64 * without the running zone being impacted; this is akin to being able to 65 * reboot the service processor of a server without affecting the OS instance. 66 */ 67 68 #include <sys/param.h> 69 #include <sys/mman.h> 70 #include <sys/types.h> 71 #include <sys/stat.h> 72 #include <sys/sysmacros.h> 73 74 #include <bsm/adt.h> 75 #include <bsm/adt_event.h> 76 77 #include <alloca.h> 78 #include <assert.h> 79 #include <errno.h> 80 #include <door.h> 81 #include <fcntl.h> 82 #include <locale.h> 83 #include <signal.h> 84 #include <stdarg.h> 85 #include <stdio.h> 86 #include <stdlib.h> 87 #include <string.h> 88 #include <strings.h> 89 #include <synch.h> 90 #include <syslog.h> 91 #include <thread.h> 92 #include <unistd.h> 93 #include <wait.h> 94 #include <limits.h> 95 #include <zone.h> 96 #include <libbrand.h> 97 #include <sys/brand.h> 98 #include <libcontract.h> 99 #include <libcontract_priv.h> 100 #include <sys/brand.h> 101 #include <sys/contract/process.h> 102 #include <sys/ctfs.h> 103 #include <libdladm.h> 104 #include <sys/dls_mgmt.h> 105 #include <libscf.h> 106 107 #include <libzonecfg.h> 108 #include <zonestat_impl.h> 109 #include "zoneadmd.h" 110 111 static char *progname; 112 char *zone_name; /* zone which we are managing */ 113 char pool_name[MAXNAMELEN]; 114 char default_brand[MAXNAMELEN]; 115 char brand_name[MAXNAMELEN]; 116 boolean_t zone_isnative; 117 boolean_t zone_iscluster; 118 boolean_t zone_islabeled; 119 boolean_t shutdown_in_progress; 120 static zoneid_t zone_id; 121 static zoneid_t zone_did = 0; 122 dladm_handle_t dld_handle = NULL; 123 124 static char pre_statechg_hook[2 * MAXPATHLEN]; 125 static char post_statechg_hook[2 * MAXPATHLEN]; 126 char query_hook[2 * MAXPATHLEN]; 127 128 zlog_t logsys; 129 130 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */ 131 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */ 132 133 static sema_t scratch_sem; /* for scratch zones */ 134 135 static char zone_door_path[MAXPATHLEN]; 136 static int zone_door = -1; 137 138 boolean_t in_death_throes = B_FALSE; /* daemon is dying */ 139 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */ 140 141 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */ 142 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ 143 #endif 144 145 #define DEFAULT_LOCALE "C" 146 147 static const char * 148 z_cmd_name(zone_cmd_t zcmd) 149 { 150 /* This list needs to match the enum in sys/zone.h */ 151 static const char *zcmdstr[] = { 152 "ready", "boot", "forceboot", "reboot", "halt", 153 "note_uninstalling", "mount", "forcemount", "unmount", 154 "shutdown" 155 }; 156 157 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr)) 158 return ("unknown"); 159 else 160 return (zcmdstr[(int)zcmd]); 161 } 162 163 static char * 164 get_execbasename(char *execfullname) 165 { 166 char *last_slash, *execbasename; 167 168 /* guard against '/' at end of command invocation */ 169 for (;;) { 170 last_slash = strrchr(execfullname, '/'); 171 if (last_slash == NULL) { 172 execbasename = execfullname; 173 break; 174 } else { 175 execbasename = last_slash + 1; 176 if (*execbasename == '\0') { 177 *last_slash = '\0'; 178 continue; 179 } 180 break; 181 } 182 } 183 return (execbasename); 184 } 185 186 static void 187 usage(void) 188 { 189 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname); 190 (void) fprintf(stderr, 191 gettext("\tNote: %s should not be run directly.\n"), progname); 192 exit(2); 193 } 194 195 /* ARGSUSED */ 196 static void 197 sigchld(int sig) 198 { 199 } 200 201 char * 202 localize_msg(char *locale, const char *msg) 203 { 204 char *out; 205 206 (void) mutex_lock(&msglock); 207 (void) setlocale(LC_MESSAGES, locale); 208 out = gettext(msg); 209 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE); 210 (void) mutex_unlock(&msglock); 211 return (out); 212 } 213 214 /* PRINTFLIKE3 */ 215 void 216 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...) 217 { 218 va_list alist; 219 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */ 220 char *bp; 221 int saved_errno = errno; 222 223 if (zlogp == NULL) 224 return; 225 if (zlogp == &logsys) 226 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ", 227 zone_name); 228 else 229 buf[0] = '\0'; 230 bp = &(buf[strlen(buf)]); 231 232 /* 233 * In theory, the locale pointer should be set to either "C" or a 234 * char array, so it should never be NULL 235 */ 236 assert(zlogp->locale != NULL); 237 /* Locale is per process, but we are multi-threaded... */ 238 fmt = localize_msg(zlogp->locale, fmt); 239 240 va_start(alist, fmt); 241 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist); 242 va_end(alist); 243 bp = &(buf[strlen(buf)]); 244 if (use_strerror) 245 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s", 246 strerror(saved_errno)); 247 if (zlogp == &logsys) { 248 (void) syslog(LOG_ERR, "%s", buf); 249 } else if (zlogp->logfile != NULL) { 250 (void) fprintf(zlogp->logfile, "%s\n", buf); 251 } else { 252 size_t buflen; 253 size_t copylen; 254 255 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf); 256 copylen = MIN(buflen, zlogp->loglen); 257 zlogp->log += copylen; 258 zlogp->loglen -= copylen; 259 } 260 } 261 262 /* 263 * Emit a warning for any boot arguments which are unrecognized. Since 264 * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we 265 * put the arguments into an argv style array, use getopt to process them, 266 * and put the resultant argument string back into outargs. 267 * 268 * During the filtering, we pull out any arguments which are truly "boot" 269 * arguments, leaving only those which are to be passed intact to the 270 * progenitor process. The one we support at the moment is -i, which 271 * indicates to the kernel which program should be launched as 'init'. 272 * 273 * A return of Z_INVAL indicates specifically that the arguments are 274 * not valid; this is a non-fatal error. Except for Z_OK, all other return 275 * values are treated as fatal. 276 */ 277 static int 278 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs, 279 char *init_file, char *badarg) 280 { 281 int argc = 0, argc_save; 282 int i; 283 int err; 284 char *arg, *lasts, **argv = NULL, **argv_save; 285 char zonecfg_args[BOOTARGS_MAX]; 286 char scratchargs[BOOTARGS_MAX], *sargs; 287 char c; 288 289 bzero(outargs, BOOTARGS_MAX); 290 bzero(badarg, BOOTARGS_MAX); 291 292 /* 293 * If the user didn't specify transient boot arguments, check 294 * to see if there were any specified in the zone configuration, 295 * and use them if applicable. 296 */ 297 if (inargs == NULL || inargs[0] == '\0') { 298 zone_dochandle_t handle; 299 if ((handle = zonecfg_init_handle()) == NULL) { 300 zerror(zlogp, B_TRUE, 301 "getting zone configuration handle"); 302 return (Z_BAD_HANDLE); 303 } 304 err = zonecfg_get_snapshot_handle(zone_name, handle); 305 if (err != Z_OK) { 306 zerror(zlogp, B_FALSE, 307 "invalid configuration snapshot"); 308 zonecfg_fini_handle(handle); 309 return (Z_BAD_HANDLE); 310 } 311 312 bzero(zonecfg_args, sizeof (zonecfg_args)); 313 (void) zonecfg_get_bootargs(handle, zonecfg_args, 314 sizeof (zonecfg_args)); 315 inargs = zonecfg_args; 316 zonecfg_fini_handle(handle); 317 } 318 319 if (strlen(inargs) >= BOOTARGS_MAX) { 320 zerror(zlogp, B_FALSE, "boot argument string too long"); 321 return (Z_INVAL); 322 } 323 324 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 325 sargs = scratchargs; 326 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 327 sargs = NULL; 328 argc++; 329 } 330 331 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) { 332 zerror(zlogp, B_FALSE, "memory allocation failed"); 333 return (Z_NOMEM); 334 } 335 336 argv_save = argv; 337 argc_save = argc; 338 339 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 340 sargs = scratchargs; 341 i = 0; 342 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 343 sargs = NULL; 344 if ((argv[i] = strdup(arg)) == NULL) { 345 err = Z_NOMEM; 346 zerror(zlogp, B_FALSE, "memory allocation failed"); 347 goto done; 348 } 349 i++; 350 } 351 352 /* 353 * We preserve compatibility with the Solaris system boot behavior, 354 * which allows: 355 * 356 * # reboot kernel/unix -s -m verbose 357 * 358 * In this example, kernel/unix tells the booter what file to 359 * boot. We don't want reboot in a zone to be gratuitously different, 360 * so we silently ignore the boot file, if necessary. 361 */ 362 if (argv[0] == NULL) 363 goto done; 364 365 assert(argv[0][0] != ' '); 366 assert(argv[0][0] != '\t'); 367 368 if (argv[0][0] != '-' && argv[0][0] != '\0') { 369 argv = &argv[1]; 370 argc--; 371 } 372 373 optind = 0; 374 opterr = 0; 375 err = Z_OK; 376 while ((c = getopt(argc, argv, "fi:m:s")) != -1) { 377 switch (c) { 378 case 'i': 379 /* 380 * -i is handled by the runtime and is not passed 381 * along to userland 382 */ 383 (void) strlcpy(init_file, optarg, MAXPATHLEN); 384 break; 385 case 'f': 386 /* This has already been processed by zoneadm */ 387 break; 388 case 'm': 389 case 's': 390 /* These pass through unmolested */ 391 (void) snprintf(outargs, BOOTARGS_MAX, 392 "%s -%c %s ", outargs, c, optarg ? optarg : ""); 393 break; 394 case '?': 395 /* 396 * We warn about unknown arguments but pass them 397 * along anyway-- if someone wants to develop their 398 * own init replacement, they can pass it whatever 399 * args they want. 400 */ 401 err = Z_INVAL; 402 (void) snprintf(outargs, BOOTARGS_MAX, 403 "%s -%c", outargs, optopt); 404 (void) snprintf(badarg, BOOTARGS_MAX, 405 "%s -%c", badarg, optopt); 406 break; 407 } 408 } 409 410 /* 411 * For Solaris Zones we warn about and discard non-option arguments. 412 * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar 413 * to the kernel, we concat up all the other remaining boot args. 414 * and warn on them as a group. 415 */ 416 if (optind < argc) { 417 err = Z_INVAL; 418 while (optind < argc) { 419 (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s", 420 badarg, strlen(badarg) > 0 ? " " : "", 421 argv[optind]); 422 optind++; 423 } 424 zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot " 425 "arguments `%s'.", badarg); 426 } 427 428 done: 429 for (i = 0; i < argc_save; i++) { 430 if (argv_save[i] != NULL) 431 free(argv_save[i]); 432 } 433 free(argv_save); 434 return (err); 435 } 436 437 438 static int 439 mkzonedir(zlog_t *zlogp) 440 { 441 struct stat st; 442 /* 443 * We must create and lock everyone but root out of ZONES_TMPDIR 444 * since anyone can open any UNIX domain socket, regardless of 445 * its file system permissions. Sigh... 446 */ 447 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) { 448 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR); 449 return (-1); 450 } 451 /* paranoia */ 452 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) { 453 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR); 454 return (-1); 455 } 456 (void) chmod(ZONES_TMPDIR, S_IRWXU); 457 return (0); 458 } 459 460 /* 461 * Run the brand's pre-state change callback, if it exists. 462 */ 463 static int 464 brand_prestatechg(zlog_t *zlogp, int state, int cmd) 465 { 466 char cmdbuf[2 * MAXPATHLEN]; 467 const char *altroot; 468 469 if (pre_statechg_hook[0] == '\0') 470 return (0); 471 472 altroot = zonecfg_get_root(); 473 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", pre_statechg_hook, 474 state, cmd, altroot) > sizeof (cmdbuf)) 475 return (-1); 476 477 if (do_subproc(zlogp, cmdbuf, NULL) != 0) 478 return (-1); 479 480 return (0); 481 } 482 483 /* 484 * Run the brand's post-state change callback, if it exists. 485 */ 486 static int 487 brand_poststatechg(zlog_t *zlogp, int state, int cmd) 488 { 489 char cmdbuf[2 * MAXPATHLEN]; 490 const char *altroot; 491 492 if (post_statechg_hook[0] == '\0') 493 return (0); 494 495 altroot = zonecfg_get_root(); 496 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook, 497 state, cmd, altroot) > sizeof (cmdbuf)) 498 return (-1); 499 500 if (do_subproc(zlogp, cmdbuf, NULL) != 0) 501 return (-1); 502 503 return (0); 504 } 505 506 /* 507 * Notify zonestatd of the new zone. If zonestatd is not running, this 508 * will do nothing. 509 */ 510 static void 511 notify_zonestatd(zoneid_t zoneid) 512 { 513 int cmd[2]; 514 int fd; 515 door_arg_t params; 516 517 fd = open(ZS_DOOR_PATH, O_RDONLY); 518 if (fd < 0) 519 return; 520 521 cmd[0] = ZSD_CMD_NEW_ZONE; 522 cmd[1] = zoneid; 523 params.data_ptr = (char *)&cmd; 524 params.data_size = sizeof (cmd); 525 params.desc_ptr = NULL; 526 params.desc_num = 0; 527 params.rbuf = NULL; 528 params.rsize = 0; 529 (void) door_call(fd, ¶ms); 530 (void) close(fd); 531 } 532 533 /* 534 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is 535 * 'true' if this is being invoked as part of the processing for the "mount" 536 * subcommand. 537 */ 538 static int 539 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate) 540 { 541 int err; 542 543 if (brand_prestatechg(zlogp, zstate, Z_READY) != 0) 544 return (-1); 545 546 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) { 547 zerror(zlogp, B_FALSE, "unable to create snapshot: %s", 548 zonecfg_strerror(err)); 549 goto bad; 550 } 551 552 if (zone_did == 0) 553 zone_did = zone_get_did(zone_name); 554 555 if ((zone_id = vplat_create(zlogp, mount_cmd, zone_did)) == -1) { 556 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 557 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 558 zonecfg_strerror(err)); 559 goto bad; 560 } 561 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) { 562 bringup_failure_recovery = B_TRUE; 563 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE); 564 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 565 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 566 zonecfg_strerror(err)); 567 goto bad; 568 } 569 570 if (brand_poststatechg(zlogp, zstate, Z_READY) != 0) 571 goto bad; 572 573 return (0); 574 575 bad: 576 /* 577 * If something goes wrong, we up the zones's state to the target 578 * state, READY, and then invoke the hook as if we're halting. 579 */ 580 (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT); 581 return (-1); 582 } 583 584 int 585 init_template(void) 586 { 587 int fd; 588 int err = 0; 589 590 fd = open64(CTFS_ROOT "/process/template", O_RDWR); 591 if (fd == -1) 592 return (-1); 593 594 /* 595 * For now, zoneadmd doesn't do anything with the contract. 596 * Deliver no events, don't inherit, and allow it to be orphaned. 597 */ 598 err |= ct_tmpl_set_critical(fd, 0); 599 err |= ct_tmpl_set_informative(fd, 0); 600 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR); 601 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT); 602 if (err || ct_tmpl_activate(fd)) { 603 (void) close(fd); 604 return (-1); 605 } 606 607 return (fd); 608 } 609 610 typedef struct fs_callback { 611 zlog_t *zlogp; 612 zoneid_t zoneid; 613 boolean_t mount_cmd; 614 } fs_callback_t; 615 616 static int 617 mount_early_fs(void *data, const char *spec, const char *dir, 618 const char *fstype, const char *opt) 619 { 620 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp; 621 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid; 622 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd; 623 char rootpath[MAXPATHLEN]; 624 pid_t child; 625 int child_status; 626 int tmpl_fd; 627 int rv; 628 ctid_t ct; 629 630 /* determine the zone rootpath */ 631 if (mount_cmd) { 632 char zonepath[MAXPATHLEN]; 633 char luroot[MAXPATHLEN]; 634 635 if (zone_get_zonepath(zone_name, 636 zonepath, sizeof (zonepath)) != Z_OK) { 637 zerror(zlogp, B_FALSE, "unable to determine zone path"); 638 return (-1); 639 } 640 641 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath); 642 resolve_lofs(zlogp, luroot, sizeof (luroot)); 643 (void) strlcpy(rootpath, luroot, sizeof (rootpath)); 644 } else { 645 if (zone_get_rootpath(zone_name, 646 rootpath, sizeof (rootpath)) != Z_OK) { 647 zerror(zlogp, B_FALSE, "unable to determine zone root"); 648 return (-1); 649 } 650 } 651 652 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) { 653 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point", 654 rootpath, dir); 655 return (-1); 656 } else if (rv > 0) { 657 /* The mount point path doesn't exist, create it now. */ 658 if (make_one_dir(zlogp, rootpath, dir, 659 DEFAULT_DIR_MODE, DEFAULT_DIR_USER, 660 DEFAULT_DIR_GROUP) != 0) { 661 zerror(zlogp, B_FALSE, "failed to create mount point"); 662 return (-1); 663 } 664 665 /* 666 * Now this might seem weird, but we need to invoke 667 * valid_mount_path() again. Why? Because it checks 668 * to make sure that the mount point path is canonical, 669 * which it can only do if the path exists, so now that 670 * we've created the path we have to verify it again. 671 */ 672 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, 673 fstype)) < 0) { 674 zerror(zlogp, B_FALSE, 675 "%s%s is not a valid mount point", rootpath, dir); 676 return (-1); 677 } 678 } 679 680 if ((tmpl_fd = init_template()) == -1) { 681 zerror(zlogp, B_TRUE, "failed to create contract"); 682 return (-1); 683 } 684 685 if ((child = fork()) == -1) { 686 (void) ct_tmpl_clear(tmpl_fd); 687 (void) close(tmpl_fd); 688 zerror(zlogp, B_TRUE, "failed to fork"); 689 return (-1); 690 691 } else if (child == 0) { /* child */ 692 char opt_buf[MAX_MNTOPT_STR]; 693 int optlen = 0; 694 int mflag = MS_DATA; 695 696 (void) ct_tmpl_clear(tmpl_fd); 697 /* 698 * Even though there are no procs running in the zone, we 699 * do this for paranoia's sake. 700 */ 701 (void) closefrom(0); 702 703 if (zone_enter(zoneid) == -1) { 704 _exit(errno); 705 } 706 if (opt != NULL) { 707 /* 708 * The mount() system call is incredibly annoying. 709 * If options are specified, we need to copy them 710 * into a temporary buffer since the mount() system 711 * call will overwrite the options string. It will 712 * also fail if the new option string it wants to 713 * write is bigger than the one we passed in, so 714 * you must pass in a buffer of the maximum possible 715 * option string length. sigh. 716 */ 717 (void) strlcpy(opt_buf, opt, sizeof (opt_buf)); 718 opt = opt_buf; 719 optlen = MAX_MNTOPT_STR; 720 mflag = MS_OPTIONSTR; 721 } 722 if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0) 723 _exit(errno); 724 _exit(0); 725 } 726 727 /* parent */ 728 if (contract_latest(&ct) == -1) 729 ct = -1; 730 (void) ct_tmpl_clear(tmpl_fd); 731 (void) close(tmpl_fd); 732 if (waitpid(child, &child_status, 0) != child) { 733 /* unexpected: we must have been signalled */ 734 (void) contract_abandon_id(ct); 735 return (-1); 736 } 737 (void) contract_abandon_id(ct); 738 if (WEXITSTATUS(child_status) != 0) { 739 errno = WEXITSTATUS(child_status); 740 zerror(zlogp, B_TRUE, "mount of %s failed", dir); 741 return (-1); 742 } 743 744 return (0); 745 } 746 747 /* 748 * If retstr is not NULL, the output of the subproc is returned in the str, 749 * otherwise it is output using zerror(). Any memory allocated for retstr 750 * should be freed by the caller. 751 */ 752 int 753 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr) 754 { 755 char buf[1024]; /* arbitrary large amount */ 756 char *inbuf; 757 FILE *file; 758 int status; 759 int rd_cnt; 760 761 if (retstr != NULL) { 762 if ((*retstr = malloc(1024)) == NULL) { 763 zerror(zlogp, B_FALSE, "out of memory"); 764 return (-1); 765 } 766 inbuf = *retstr; 767 rd_cnt = 0; 768 } else { 769 inbuf = buf; 770 } 771 772 file = popen(cmdbuf, "r"); 773 if (file == NULL) { 774 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf); 775 return (-1); 776 } 777 778 while (fgets(inbuf, 1024, file) != NULL) { 779 if (retstr == NULL) { 780 if (zlogp != &logsys) 781 zerror(zlogp, B_FALSE, "%s", inbuf); 782 } else { 783 char *p; 784 785 rd_cnt += 1024 - 1; 786 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) { 787 zerror(zlogp, B_FALSE, "out of memory"); 788 (void) pclose(file); 789 return (-1); 790 } 791 792 *retstr = p; 793 inbuf = *retstr + rd_cnt; 794 } 795 } 796 status = pclose(file); 797 798 if (WIFSIGNALED(status)) { 799 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to " 800 "signal %d", cmdbuf, WTERMSIG(status)); 801 return (-1); 802 } 803 assert(WIFEXITED(status)); 804 if (WEXITSTATUS(status) == ZEXIT_EXEC) { 805 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf); 806 return (-1); 807 } 808 return (WEXITSTATUS(status)); 809 } 810 811 static int 812 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate) 813 { 814 zoneid_t zoneid; 815 struct stat st; 816 char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN]; 817 char nbootargs[BOOTARGS_MAX]; 818 char cmdbuf[MAXPATHLEN]; 819 fs_callback_t cb; 820 brand_handle_t bh; 821 zone_iptype_t iptype; 822 boolean_t links_loaded = B_FALSE; 823 dladm_status_t status; 824 char errmsg[DLADM_STRSIZE]; 825 int err; 826 boolean_t restart_init; 827 828 if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0) 829 return (-1); 830 831 if ((zoneid = getzoneidbyname(zone_name)) == -1) { 832 zerror(zlogp, B_TRUE, "unable to get zoneid"); 833 goto bad; 834 } 835 836 cb.zlogp = zlogp; 837 cb.zoneid = zoneid; 838 cb.mount_cmd = B_FALSE; 839 840 /* Get a handle to the brand info for this zone */ 841 if ((bh = brand_open(brand_name)) == NULL) { 842 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 843 goto bad; 844 } 845 846 /* 847 * Get the list of filesystems to mount from the brand 848 * configuration. These mounts are done via a thread that will 849 * enter the zone, so they are done from within the context of the 850 * zone. 851 */ 852 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) { 853 zerror(zlogp, B_FALSE, "unable to mount filesystems"); 854 brand_close(bh); 855 goto bad; 856 } 857 858 /* 859 * Get the brand's boot callback if it exists. 860 */ 861 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 862 zerror(zlogp, B_FALSE, "unable to determine zone path"); 863 brand_close(bh); 864 goto bad; 865 } 866 (void) strcpy(cmdbuf, EXEC_PREFIX); 867 if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN, 868 sizeof (cmdbuf) - EXEC_LEN) != 0) { 869 zerror(zlogp, B_FALSE, 870 "unable to determine branded zone's boot callback"); 871 brand_close(bh); 872 goto bad; 873 } 874 875 /* Get the path for this zone's init(1M) (or equivalent) process. */ 876 if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) { 877 zerror(zlogp, B_FALSE, 878 "unable to determine zone's init(1M) location"); 879 brand_close(bh); 880 goto bad; 881 } 882 883 /* See if this zone's brand should restart init if it dies. */ 884 restart_init = brand_restartinit(bh); 885 886 brand_close(bh); 887 888 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file, 889 bad_boot_arg); 890 if (err == Z_INVAL) 891 eventstream_write(Z_EVT_ZONE_BADARGS); 892 else if (err != Z_OK) 893 goto bad; 894 895 assert(init_file[0] != '\0'); 896 897 /* Try to anticipate possible problems: Make sure init is executable. */ 898 if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 899 zerror(zlogp, B_FALSE, "unable to determine zone root"); 900 goto bad; 901 } 902 903 (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file); 904 905 if (stat(initpath, &st) == -1) { 906 zerror(zlogp, B_TRUE, "could not stat %s", initpath); 907 goto bad; 908 } 909 910 if ((st.st_mode & S_IXUSR) == 0) { 911 zerror(zlogp, B_FALSE, "%s is not executable", initpath); 912 goto bad; 913 } 914 915 /* 916 * Exclusive stack zones interact with the dlmgmtd running in the 917 * global zone. dladm_zone_boot() tells dlmgmtd that this zone is 918 * booting, and loads its datalinks from the zone's datalink 919 * configuration file. 920 */ 921 if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) { 922 status = dladm_zone_boot(dld_handle, zoneid); 923 if (status != DLADM_STATUS_OK) { 924 zerror(zlogp, B_FALSE, "unable to load zone datalinks: " 925 " %s", dladm_status2str(status, errmsg)); 926 goto bad; 927 } 928 links_loaded = B_TRUE; 929 } 930 931 /* 932 * If there is a brand 'boot' callback, execute it now to give the 933 * brand one last chance to do any additional setup before the zone 934 * is booted. 935 */ 936 if ((strlen(cmdbuf) > EXEC_LEN) && 937 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) { 938 zerror(zlogp, B_FALSE, "%s failed", cmdbuf); 939 goto bad; 940 } 941 942 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) { 943 zerror(zlogp, B_TRUE, "could not set zone boot file"); 944 goto bad; 945 } 946 947 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) { 948 zerror(zlogp, B_TRUE, "could not set zone boot arguments"); 949 goto bad; 950 } 951 952 if (!restart_init && zone_setattr(zoneid, ZONE_ATTR_INITNORESTART, 953 NULL, 0) == -1) { 954 zerror(zlogp, B_TRUE, "could not set zone init-no-restart"); 955 goto bad; 956 } 957 958 /* 959 * Inform zonestatd of a new zone so that it can install a door for 960 * the zone to contact it. 961 */ 962 notify_zonestatd(zone_id); 963 964 if (zone_boot(zoneid) == -1) { 965 zerror(zlogp, B_TRUE, "unable to boot zone"); 966 goto bad; 967 } 968 969 if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0) 970 goto bad; 971 972 return (0); 973 974 bad: 975 /* 976 * If something goes wrong, we up the zones's state to the target 977 * state, RUNNING, and then invoke the hook as if we're halting. 978 */ 979 (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT); 980 if (links_loaded) 981 (void) dladm_zone_halt(dld_handle, zoneid); 982 return (-1); 983 } 984 985 static int 986 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate) 987 { 988 int err; 989 990 if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0) 991 return (-1); 992 993 if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) { 994 if (!bringup_failure_recovery) 995 zerror(zlogp, B_FALSE, "unable to destroy zone"); 996 return (-1); 997 } 998 999 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 1000 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 1001 zonecfg_strerror(err)); 1002 1003 if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0) 1004 return (-1); 1005 1006 return (0); 1007 } 1008 1009 static int 1010 zone_graceful_shutdown(zlog_t *zlogp) 1011 { 1012 zoneid_t zoneid; 1013 pid_t child; 1014 char cmdbuf[MAXPATHLEN]; 1015 brand_handle_t bh = NULL; 1016 char zpath[MAXPATHLEN]; 1017 ctid_t ct; 1018 int tmpl_fd; 1019 int child_status; 1020 1021 if (shutdown_in_progress) { 1022 zerror(zlogp, B_FALSE, "shutdown already in progress"); 1023 return (-1); 1024 } 1025 1026 if ((zoneid = getzoneidbyname(zone_name)) == -1) { 1027 zerror(zlogp, B_TRUE, "unable to get zoneid"); 1028 return (-1); 1029 } 1030 1031 /* Get a handle to the brand info for this zone */ 1032 if ((bh = brand_open(brand_name)) == NULL) { 1033 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 1034 return (-1); 1035 } 1036 1037 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 1038 zerror(zlogp, B_FALSE, "unable to determine zone path"); 1039 brand_close(bh); 1040 return (-1); 1041 } 1042 1043 /* 1044 * If there is a brand 'shutdown' callback, execute it now to give the 1045 * brand a chance to cleanup any custom configuration. 1046 */ 1047 (void) strcpy(cmdbuf, EXEC_PREFIX); 1048 if (brand_get_shutdown(bh, zone_name, zpath, cmdbuf + EXEC_LEN, 1049 sizeof (cmdbuf) - EXEC_LEN) != 0 || strlen(cmdbuf) <= EXEC_LEN) { 1050 (void) strcat(cmdbuf, SHUTDOWN_DEFAULT); 1051 } 1052 brand_close(bh); 1053 1054 if ((tmpl_fd = init_template()) == -1) { 1055 zerror(zlogp, B_TRUE, "failed to create contract"); 1056 return (-1); 1057 } 1058 1059 if ((child = fork()) == -1) { 1060 (void) ct_tmpl_clear(tmpl_fd); 1061 (void) close(tmpl_fd); 1062 zerror(zlogp, B_TRUE, "failed to fork"); 1063 return (-1); 1064 } else if (child == 0) { 1065 (void) ct_tmpl_clear(tmpl_fd); 1066 if (zone_enter(zoneid) == -1) { 1067 _exit(errno); 1068 } 1069 _exit(execl("/bin/sh", "sh", "-c", cmdbuf, (char *)NULL)); 1070 } 1071 1072 if (contract_latest(&ct) == -1) 1073 ct = -1; 1074 (void) ct_tmpl_clear(tmpl_fd); 1075 (void) close(tmpl_fd); 1076 1077 if (waitpid(child, &child_status, 0) != child) { 1078 /* unexpected: we must have been signalled */ 1079 (void) contract_abandon_id(ct); 1080 return (-1); 1081 } 1082 1083 (void) contract_abandon_id(ct); 1084 if (WEXITSTATUS(child_status) != 0) { 1085 errno = WEXITSTATUS(child_status); 1086 zerror(zlogp, B_FALSE, "unable to shutdown zone"); 1087 return (-1); 1088 } 1089 1090 shutdown_in_progress = B_TRUE; 1091 1092 return (0); 1093 } 1094 1095 static int 1096 zone_wait_shutdown(zlog_t *zlogp) 1097 { 1098 zone_state_t zstate; 1099 uint64_t *tm = NULL; 1100 scf_simple_prop_t *prop = NULL; 1101 int timeout; 1102 int tries; 1103 int rc = -1; 1104 1105 /* Get default stop timeout from SMF framework */ 1106 timeout = SHUTDOWN_WAIT; 1107 if ((prop = scf_simple_prop_get(NULL, SHUTDOWN_FMRI, "stop", 1108 SCF_PROPERTY_TIMEOUT)) != NULL) { 1109 if ((tm = scf_simple_prop_next_count(prop)) != NULL) { 1110 if (tm != 0) 1111 timeout = *tm; 1112 } 1113 scf_simple_prop_free(prop); 1114 } 1115 1116 /* allow time for zone to shutdown cleanly */ 1117 for (tries = 0; tries < timeout; tries ++) { 1118 (void) sleep(1); 1119 if (zone_get_state(zone_name, &zstate) == Z_OK && 1120 zstate == ZONE_STATE_INSTALLED) { 1121 rc = 0; 1122 break; 1123 } 1124 } 1125 1126 if (rc != 0) 1127 zerror(zlogp, B_FALSE, "unable to shutdown zone"); 1128 1129 shutdown_in_progress = B_FALSE; 1130 1131 return (rc); 1132 } 1133 1134 1135 1136 /* 1137 * Generate AUE_zone_state for a command that boots a zone. 1138 */ 1139 static void 1140 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val, 1141 char *new_state) 1142 { 1143 adt_session_data_t *ah; 1144 adt_event_data_t *event; 1145 int pass_fail, fail_reason; 1146 1147 if (!adt_audit_enabled()) 1148 return; 1149 1150 if (return_val == 0) { 1151 pass_fail = ADT_SUCCESS; 1152 fail_reason = ADT_SUCCESS; 1153 } else { 1154 pass_fail = ADT_FAILURE; 1155 fail_reason = ADT_FAIL_VALUE_PROGRAM; 1156 } 1157 1158 if (adt_start_session(&ah, NULL, 0)) { 1159 zerror(zlogp, B_TRUE, gettext("audit failure.")); 1160 return; 1161 } 1162 if (adt_set_from_ucred(ah, uc, ADT_NEW)) { 1163 zerror(zlogp, B_TRUE, gettext("audit failure.")); 1164 (void) adt_end_session(ah); 1165 return; 1166 } 1167 1168 event = adt_alloc_event(ah, ADT_zone_state); 1169 if (event == NULL) { 1170 zerror(zlogp, B_TRUE, gettext("audit failure.")); 1171 (void) adt_end_session(ah); 1172 return; 1173 } 1174 event->adt_zone_state.zonename = zone_name; 1175 event->adt_zone_state.new_state = new_state; 1176 1177 if (adt_put_event(event, pass_fail, fail_reason)) 1178 zerror(zlogp, B_TRUE, gettext("audit failure.")); 1179 1180 adt_free_event(event); 1181 1182 (void) adt_end_session(ah); 1183 } 1184 1185 /* 1186 * The main routine for the door server that deals with zone state transitions. 1187 */ 1188 /* ARGSUSED */ 1189 static void 1190 server(void *cookie, char *args, size_t alen, door_desc_t *dp, 1191 uint_t n_desc) 1192 { 1193 ucred_t *uc = NULL; 1194 const priv_set_t *eset; 1195 1196 zone_state_t zstate; 1197 zone_cmd_t cmd; 1198 zone_cmd_arg_t *zargp; 1199 1200 boolean_t kernelcall; 1201 1202 int rval = -1; 1203 uint64_t uniqid; 1204 zoneid_t zoneid = -1; 1205 zlog_t zlog; 1206 zlog_t *zlogp; 1207 zone_cmd_rval_t *rvalp; 1208 size_t rlen = getpagesize(); /* conservative */ 1209 fs_callback_t cb; 1210 brand_handle_t bh; 1211 boolean_t wait_shut = B_FALSE; 1212 1213 /* LINTED E_BAD_PTR_CAST_ALIGN */ 1214 zargp = (zone_cmd_arg_t *)args; 1215 1216 /* 1217 * When we get the door unref message, we've fdetach'd the door, and 1218 * it is time for us to shut down zoneadmd. 1219 */ 1220 if (zargp == DOOR_UNREF_DATA) { 1221 /* 1222 * See comment at end of main() for info on the last rites. 1223 */ 1224 exit(0); 1225 } 1226 1227 if (zargp == NULL) { 1228 (void) door_return(NULL, 0, 0, 0); 1229 } 1230 1231 rvalp = alloca(rlen); 1232 bzero(rvalp, rlen); 1233 zlog.logfile = NULL; 1234 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1; 1235 zlog.buf = rvalp->errbuf; 1236 zlog.log = zlog.buf; 1237 /* defer initialization of zlog.locale until after credential check */ 1238 zlogp = &zlog; 1239 1240 if (alen != sizeof (zone_cmd_arg_t)) { 1241 /* 1242 * This really shouldn't be happening. 1243 */ 1244 zerror(&logsys, B_FALSE, "argument size (%d bytes) " 1245 "unexpected (expected %d bytes)", alen, 1246 sizeof (zone_cmd_arg_t)); 1247 goto out; 1248 } 1249 cmd = zargp->cmd; 1250 1251 if (door_ucred(&uc) != 0) { 1252 zerror(&logsys, B_TRUE, "door_ucred"); 1253 goto out; 1254 } 1255 eset = ucred_getprivset(uc, PRIV_EFFECTIVE); 1256 if (ucred_getzoneid(uc) != GLOBAL_ZONEID || 1257 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) : 1258 ucred_geteuid(uc) != 0)) { 1259 zerror(&logsys, B_FALSE, "insufficient privileges"); 1260 goto out; 1261 } 1262 1263 kernelcall = ucred_getpid(uc) == 0; 1264 1265 /* 1266 * This is safe because we only use a zlog_t throughout the 1267 * duration of a door call; i.e., by the time the pointer 1268 * might become invalid, the door call would be over. 1269 */ 1270 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale; 1271 1272 (void) mutex_lock(&lock); 1273 1274 /* 1275 * Once we start to really die off, we don't want more connections. 1276 */ 1277 if (in_death_throes) { 1278 (void) mutex_unlock(&lock); 1279 ucred_free(uc); 1280 (void) door_return(NULL, 0, 0, 0); 1281 thr_exit(NULL); 1282 } 1283 1284 /* 1285 * Check for validity of command. 1286 */ 1287 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT && 1288 cmd != Z_REBOOT && cmd != Z_SHUTDOWN && cmd != Z_HALT && 1289 cmd != Z_NOTE_UNINSTALLING && cmd != Z_MOUNT && 1290 cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) { 1291 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd); 1292 goto out; 1293 } 1294 1295 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) { 1296 /* 1297 * Can't happen 1298 */ 1299 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d", 1300 cmd); 1301 goto out; 1302 } 1303 /* 1304 * We ignore the possibility of someone calling zone_create(2) 1305 * explicitly; all requests must come through zoneadmd. 1306 */ 1307 if (zone_get_state(zone_name, &zstate) != Z_OK) { 1308 /* 1309 * Something terribly wrong happened 1310 */ 1311 zerror(&logsys, B_FALSE, "unable to determine state of zone"); 1312 goto out; 1313 } 1314 1315 if (kernelcall) { 1316 /* 1317 * Kernel-initiated requests may lose their validity if the 1318 * zone_t the kernel was referring to has gone away. 1319 */ 1320 if ((zoneid = getzoneidbyname(zone_name)) == -1 || 1321 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid, 1322 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) { 1323 /* 1324 * We're not talking about the same zone. The request 1325 * must have arrived too late. Return error. 1326 */ 1327 rval = -1; 1328 goto out; 1329 } 1330 zlogp = &logsys; /* Log errors to syslog */ 1331 } 1332 1333 /* 1334 * If we are being asked to forcibly mount or boot a zone, we 1335 * pretend that an INCOMPLETE zone is actually INSTALLED. 1336 */ 1337 if (zstate == ZONE_STATE_INCOMPLETE && 1338 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT)) 1339 zstate = ZONE_STATE_INSTALLED; 1340 1341 switch (zstate) { 1342 case ZONE_STATE_CONFIGURED: 1343 case ZONE_STATE_INCOMPLETE: 1344 /* 1345 * Not our area of expertise; we just print a nice message 1346 * and die off. 1347 */ 1348 zerror(zlogp, B_FALSE, 1349 "%s operation is invalid for zones in state '%s'", 1350 z_cmd_name(cmd), zone_state_str(zstate)); 1351 break; 1352 1353 case ZONE_STATE_INSTALLED: 1354 switch (cmd) { 1355 case Z_READY: 1356 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate); 1357 if (rval == 0) 1358 eventstream_write(Z_EVT_ZONE_READIED); 1359 break; 1360 case Z_BOOT: 1361 case Z_FORCEBOOT: 1362 eventstream_write(Z_EVT_ZONE_BOOTING); 1363 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, 1364 zstate)) == 0) { 1365 rval = zone_bootup(zlogp, zargp->bootbuf, 1366 zstate); 1367 } 1368 audit_put_record(zlogp, uc, rval, "boot"); 1369 if (rval != 0) { 1370 bringup_failure_recovery = B_TRUE; 1371 (void) zone_halt(zlogp, B_FALSE, B_FALSE, 1372 zstate); 1373 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1374 } 1375 break; 1376 case Z_SHUTDOWN: 1377 case Z_HALT: 1378 if (kernelcall) /* Invalid; can't happen */ 1379 abort(); 1380 /* 1381 * We could have two clients racing to halt this 1382 * zone; the second client loses, but its request 1383 * doesn't fail, since the zone is now in the desired 1384 * state. 1385 */ 1386 zerror(zlogp, B_FALSE, "zone is already halted"); 1387 rval = 0; 1388 break; 1389 case Z_REBOOT: 1390 if (kernelcall) /* Invalid; can't happen */ 1391 abort(); 1392 zerror(zlogp, B_FALSE, "%s operation is invalid " 1393 "for zones in state '%s'", z_cmd_name(cmd), 1394 zone_state_str(zstate)); 1395 rval = -1; 1396 break; 1397 case Z_NOTE_UNINSTALLING: 1398 if (kernelcall) /* Invalid; can't happen */ 1399 abort(); 1400 /* 1401 * Tell the console to print out a message about this. 1402 * Once it does, we will be in_death_throes. 1403 */ 1404 eventstream_write(Z_EVT_ZONE_UNINSTALLING); 1405 break; 1406 case Z_MOUNT: 1407 case Z_FORCEMOUNT: 1408 if (kernelcall) /* Invalid; can't happen */ 1409 abort(); 1410 if (!zone_isnative && !zone_iscluster && 1411 !zone_islabeled) { 1412 /* 1413 * -U mounts the zone without lofs mounting 1414 * zone file systems back into the scratch 1415 * zone. This is required when mounting 1416 * non-native branded zones. 1417 */ 1418 (void) strlcpy(zargp->bootbuf, "-U", 1419 BOOTARGS_MAX); 1420 } 1421 1422 rval = zone_ready(zlogp, 1423 strcmp(zargp->bootbuf, "-U") == 0 ? 1424 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate); 1425 if (rval != 0) 1426 break; 1427 1428 eventstream_write(Z_EVT_ZONE_READIED); 1429 1430 /* 1431 * Get a handle to the default brand info. 1432 * We must always use the default brand file system 1433 * list when mounting the zone. 1434 */ 1435 if ((bh = brand_open(default_brand)) == NULL) { 1436 rval = -1; 1437 break; 1438 } 1439 1440 /* 1441 * Get the list of filesystems to mount from 1442 * the brand configuration. These mounts are done 1443 * via a thread that will enter the zone, so they 1444 * are done from within the context of the zone. 1445 */ 1446 cb.zlogp = zlogp; 1447 cb.zoneid = zone_id; 1448 cb.mount_cmd = B_TRUE; 1449 rval = brand_platform_iter_mounts(bh, 1450 mount_early_fs, &cb); 1451 1452 brand_close(bh); 1453 1454 /* 1455 * Ordinarily, /dev/fd would be mounted inside the zone 1456 * by svc:/system/filesystem/usr:default, but since 1457 * we're not booting the zone, we need to do this 1458 * manually. 1459 */ 1460 if (rval == 0) 1461 rval = mount_early_fs(&cb, 1462 "fd", "/dev/fd", "fd", NULL); 1463 break; 1464 case Z_UNMOUNT: 1465 if (kernelcall) /* Invalid; can't happen */ 1466 abort(); 1467 zerror(zlogp, B_FALSE, "zone is already unmounted"); 1468 rval = 0; 1469 break; 1470 } 1471 break; 1472 1473 case ZONE_STATE_READY: 1474 switch (cmd) { 1475 case Z_READY: 1476 /* 1477 * We could have two clients racing to ready this 1478 * zone; the second client loses, but its request 1479 * doesn't fail, since the zone is now in the desired 1480 * state. 1481 */ 1482 zerror(zlogp, B_FALSE, "zone is already ready"); 1483 rval = 0; 1484 break; 1485 case Z_BOOT: 1486 (void) strlcpy(boot_args, zargp->bootbuf, 1487 sizeof (boot_args)); 1488 eventstream_write(Z_EVT_ZONE_BOOTING); 1489 rval = zone_bootup(zlogp, zargp->bootbuf, zstate); 1490 audit_put_record(zlogp, uc, rval, "boot"); 1491 if (rval != 0) { 1492 bringup_failure_recovery = B_TRUE; 1493 (void) zone_halt(zlogp, B_FALSE, B_TRUE, 1494 zstate); 1495 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1496 } 1497 boot_args[0] = '\0'; 1498 break; 1499 case Z_HALT: 1500 if (kernelcall) /* Invalid; can't happen */ 1501 abort(); 1502 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate)) 1503 != 0) 1504 break; 1505 eventstream_write(Z_EVT_ZONE_HALTED); 1506 break; 1507 case Z_SHUTDOWN: 1508 case Z_REBOOT: 1509 case Z_NOTE_UNINSTALLING: 1510 case Z_MOUNT: 1511 case Z_UNMOUNT: 1512 if (kernelcall) /* Invalid; can't happen */ 1513 abort(); 1514 zerror(zlogp, B_FALSE, "%s operation is invalid " 1515 "for zones in state '%s'", z_cmd_name(cmd), 1516 zone_state_str(zstate)); 1517 rval = -1; 1518 break; 1519 } 1520 break; 1521 1522 case ZONE_STATE_MOUNTED: 1523 switch (cmd) { 1524 case Z_UNMOUNT: 1525 if (kernelcall) /* Invalid; can't happen */ 1526 abort(); 1527 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate); 1528 if (rval == 0) { 1529 eventstream_write(Z_EVT_ZONE_HALTED); 1530 (void) sema_post(&scratch_sem); 1531 } 1532 break; 1533 default: 1534 if (kernelcall) /* Invalid; can't happen */ 1535 abort(); 1536 zerror(zlogp, B_FALSE, "%s operation is invalid " 1537 "for zones in state '%s'", z_cmd_name(cmd), 1538 zone_state_str(zstate)); 1539 rval = -1; 1540 break; 1541 } 1542 break; 1543 1544 case ZONE_STATE_RUNNING: 1545 case ZONE_STATE_SHUTTING_DOWN: 1546 case ZONE_STATE_DOWN: 1547 switch (cmd) { 1548 case Z_READY: 1549 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate)) 1550 != 0) 1551 break; 1552 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) 1553 == 0) 1554 eventstream_write(Z_EVT_ZONE_READIED); 1555 else 1556 eventstream_write(Z_EVT_ZONE_HALTED); 1557 break; 1558 case Z_BOOT: 1559 /* 1560 * We could have two clients racing to boot this 1561 * zone; the second client loses, but its request 1562 * doesn't fail, since the zone is now in the desired 1563 * state. 1564 */ 1565 zerror(zlogp, B_FALSE, "zone is already booted"); 1566 rval = 0; 1567 break; 1568 case Z_HALT: 1569 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate)) 1570 != 0) 1571 break; 1572 eventstream_write(Z_EVT_ZONE_HALTED); 1573 break; 1574 case Z_REBOOT: 1575 (void) strlcpy(boot_args, zargp->bootbuf, 1576 sizeof (boot_args)); 1577 eventstream_write(Z_EVT_ZONE_REBOOTING); 1578 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate)) 1579 != 0) { 1580 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1581 boot_args[0] = '\0'; 1582 break; 1583 } 1584 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) 1585 != 0) { 1586 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1587 boot_args[0] = '\0'; 1588 break; 1589 } 1590 rval = zone_bootup(zlogp, zargp->bootbuf, zstate); 1591 audit_put_record(zlogp, uc, rval, "reboot"); 1592 if (rval != 0) { 1593 (void) zone_halt(zlogp, B_FALSE, B_TRUE, 1594 zstate); 1595 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1596 } 1597 boot_args[0] = '\0'; 1598 break; 1599 case Z_SHUTDOWN: 1600 if ((rval = zone_graceful_shutdown(zlogp)) == 0) { 1601 wait_shut = B_TRUE; 1602 } 1603 break; 1604 case Z_NOTE_UNINSTALLING: 1605 case Z_MOUNT: 1606 case Z_UNMOUNT: 1607 zerror(zlogp, B_FALSE, "%s operation is invalid " 1608 "for zones in state '%s'", z_cmd_name(cmd), 1609 zone_state_str(zstate)); 1610 rval = -1; 1611 break; 1612 } 1613 break; 1614 default: 1615 abort(); 1616 } 1617 1618 /* 1619 * Because the state of the zone may have changed, we make sure 1620 * to wake the console poller, which is in charge of initiating 1621 * the shutdown procedure as necessary. 1622 */ 1623 eventstream_write(Z_EVT_NULL); 1624 1625 out: 1626 (void) mutex_unlock(&lock); 1627 1628 /* Wait for the Z_SHUTDOWN commands to complete */ 1629 if (wait_shut) 1630 rval = zone_wait_shutdown(zlogp); 1631 1632 if (kernelcall) { 1633 rvalp = NULL; 1634 rlen = 0; 1635 } else { 1636 rvalp->rval = rval; 1637 } 1638 if (uc != NULL) 1639 ucred_free(uc); 1640 (void) door_return((char *)rvalp, rlen, NULL, 0); 1641 thr_exit(NULL); 1642 } 1643 1644 static int 1645 setup_door(zlog_t *zlogp) 1646 { 1647 if ((zone_door = door_create(server, NULL, 1648 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) { 1649 zerror(zlogp, B_TRUE, "%s failed", "door_create"); 1650 return (-1); 1651 } 1652 (void) fdetach(zone_door_path); 1653 1654 if (fattach(zone_door, zone_door_path) != 0) { 1655 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path); 1656 (void) door_revoke(zone_door); 1657 (void) fdetach(zone_door_path); 1658 zone_door = -1; 1659 return (-1); 1660 } 1661 return (0); 1662 } 1663 1664 /* 1665 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this 1666 * is where zoneadmd itself will check to see that another instance of 1667 * zoneadmd isn't already controlling this zone. 1668 * 1669 * The idea here is that we want to open the path to which we will 1670 * attach our door, lock it, and then make sure that no-one has beat us 1671 * to fattach(3c)ing onto it. 1672 * 1673 * fattach(3c) is really a mount, so there are actually two possible 1674 * vnodes we could be dealing with. Our strategy is as follows: 1675 * 1676 * - If the file we opened is a regular file (common case): 1677 * There is no fattach(3c)ed door, so we have a chance of becoming 1678 * the managing zoneadmd. We attempt to lock the file: if it is 1679 * already locked, that means someone else raced us here, so we 1680 * lose and give up. zoneadm(1m) will try to contact the zoneadmd 1681 * that beat us to it. 1682 * 1683 * - If the file we opened is a namefs file: 1684 * This means there is already an established door fattach(3c)'ed 1685 * to the rendezvous path. We've lost the race, so we give up. 1686 * Note that in this case we also try to grab the file lock, and 1687 * will succeed in acquiring it since the vnode locked by the 1688 * "winning" zoneadmd was a regular one, and the one we locked was 1689 * the fattach(3c)'ed door node. At any rate, no harm is done, and 1690 * we just return to zoneadm(1m) which knows to retry. 1691 */ 1692 static int 1693 make_daemon_exclusive(zlog_t *zlogp) 1694 { 1695 int doorfd = -1; 1696 int err, ret = -1; 1697 struct stat st; 1698 struct flock flock; 1699 zone_state_t zstate; 1700 1701 top: 1702 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1703 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1704 zonecfg_strerror(err)); 1705 goto out; 1706 } 1707 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR, 1708 S_IREAD|S_IWRITE)) < 0) { 1709 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path); 1710 goto out; 1711 } 1712 if (fstat(doorfd, &st) < 0) { 1713 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path); 1714 goto out; 1715 } 1716 /* 1717 * Lock the file to synchronize with other zoneadmd 1718 */ 1719 flock.l_type = F_WRLCK; 1720 flock.l_whence = SEEK_SET; 1721 flock.l_start = (off_t)0; 1722 flock.l_len = (off_t)0; 1723 if (fcntl(doorfd, F_SETLK, &flock) < 0) { 1724 /* 1725 * Someone else raced us here and grabbed the lock file 1726 * first. A warning here is inappropriate since nothing 1727 * went wrong. 1728 */ 1729 goto out; 1730 } 1731 1732 if (strcmp(st.st_fstype, "namefs") == 0) { 1733 struct door_info info; 1734 1735 /* 1736 * There is already something fattach()'ed to this file. 1737 * Lets see what the door is up to. 1738 */ 1739 if (door_info(doorfd, &info) == 0 && info.di_target != -1) { 1740 /* 1741 * Another zoneadmd process seems to be in 1742 * control of the situation and we don't need to 1743 * be here. A warning here is inappropriate 1744 * since nothing went wrong. 1745 * 1746 * If the door has been revoked, the zoneadmd 1747 * process currently managing the zone is going 1748 * away. We'll return control to zoneadm(1m) 1749 * which will try again (by which time zoneadmd 1750 * will hopefully have exited). 1751 */ 1752 goto out; 1753 } 1754 1755 /* 1756 * If we got this far, there's a fattach(3c)'ed door 1757 * that belongs to a process that has exited, which can 1758 * happen if the previous zoneadmd died unexpectedly. 1759 * 1760 * Let user know that something is amiss, but that we can 1761 * recover; if the zone is in the installed state, then don't 1762 * message, since having a running zoneadmd isn't really 1763 * expected/needed. We want to keep occurences of this message 1764 * limited to times when zoneadmd is picking back up from a 1765 * zoneadmd that died while the zone was in some non-trivial 1766 * state. 1767 */ 1768 if (zstate > ZONE_STATE_INSTALLED) { 1769 zerror(zlogp, B_FALSE, 1770 "zone '%s': WARNING: zone is in state '%s', but " 1771 "zoneadmd does not appear to be available; " 1772 "restarted zoneadmd to recover.", 1773 zone_name, zone_state_str(zstate)); 1774 } 1775 1776 (void) fdetach(zone_door_path); 1777 (void) close(doorfd); 1778 goto top; 1779 } 1780 ret = 0; 1781 out: 1782 (void) close(doorfd); 1783 return (ret); 1784 } 1785 1786 /* 1787 * Setup the brand's pre and post state change callbacks, as well as the 1788 * query callback, if any of these exist. 1789 */ 1790 static int 1791 brand_callback_init(brand_handle_t bh, char *zone_name) 1792 { 1793 char zpath[MAXPATHLEN]; 1794 1795 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) 1796 return (-1); 1797 1798 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX, 1799 sizeof (pre_statechg_hook)); 1800 1801 if (brand_get_prestatechange(bh, zone_name, zpath, 1802 pre_statechg_hook + EXEC_LEN, 1803 sizeof (pre_statechg_hook) - EXEC_LEN) != 0) 1804 return (-1); 1805 1806 if (strlen(pre_statechg_hook) <= EXEC_LEN) 1807 pre_statechg_hook[0] = '\0'; 1808 1809 (void) strlcpy(post_statechg_hook, EXEC_PREFIX, 1810 sizeof (post_statechg_hook)); 1811 1812 if (brand_get_poststatechange(bh, zone_name, zpath, 1813 post_statechg_hook + EXEC_LEN, 1814 sizeof (post_statechg_hook) - EXEC_LEN) != 0) 1815 return (-1); 1816 1817 if (strlen(post_statechg_hook) <= EXEC_LEN) 1818 post_statechg_hook[0] = '\0'; 1819 1820 (void) strlcpy(query_hook, EXEC_PREFIX, 1821 sizeof (query_hook)); 1822 1823 if (brand_get_query(bh, zone_name, zpath, query_hook + EXEC_LEN, 1824 sizeof (query_hook) - EXEC_LEN) != 0) 1825 return (-1); 1826 1827 if (strlen(query_hook) <= EXEC_LEN) 1828 query_hook[0] = '\0'; 1829 1830 return (0); 1831 } 1832 1833 int 1834 main(int argc, char *argv[]) 1835 { 1836 int opt; 1837 zoneid_t zid; 1838 priv_set_t *privset; 1839 zone_state_t zstate; 1840 char parents_locale[MAXPATHLEN]; 1841 brand_handle_t bh; 1842 int err; 1843 1844 pid_t pid; 1845 sigset_t blockset; 1846 sigset_t block_cld; 1847 1848 struct { 1849 sema_t sem; 1850 int status; 1851 zlog_t log; 1852 } *shstate; 1853 size_t shstatelen = getpagesize(); 1854 1855 zlog_t errlog; 1856 zlog_t *zlogp; 1857 1858 int ctfd; 1859 1860 progname = get_execbasename(argv[0]); 1861 1862 /* 1863 * Make sure stderr is unbuffered 1864 */ 1865 (void) setbuffer(stderr, NULL, 0); 1866 1867 /* 1868 * Get out of the way of mounted filesystems, since we will daemonize 1869 * soon. 1870 */ 1871 (void) chdir("/"); 1872 1873 /* 1874 * Use the default system umask per PSARC 1998/110 rather than 1875 * anything that may have been set by the caller. 1876 */ 1877 (void) umask(CMASK); 1878 1879 /* 1880 * Initially we want to use our parent's locale. 1881 */ 1882 (void) setlocale(LC_ALL, ""); 1883 (void) textdomain(TEXT_DOMAIN); 1884 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL), 1885 sizeof (parents_locale)); 1886 1887 /* 1888 * This zlog_t is used for writing to stderr 1889 */ 1890 errlog.logfile = stderr; 1891 errlog.buflen = errlog.loglen = 0; 1892 errlog.buf = errlog.log = NULL; 1893 errlog.locale = parents_locale; 1894 1895 /* 1896 * We start off writing to stderr until we're ready to daemonize. 1897 */ 1898 zlogp = &errlog; 1899 1900 /* 1901 * Process options. 1902 */ 1903 while ((opt = getopt(argc, argv, "R:z:")) != EOF) { 1904 switch (opt) { 1905 case 'R': 1906 zonecfg_set_root(optarg); 1907 break; 1908 case 'z': 1909 zone_name = optarg; 1910 break; 1911 default: 1912 usage(); 1913 } 1914 } 1915 1916 if (zone_name == NULL) 1917 usage(); 1918 1919 /* 1920 * Because usage() prints directly to stderr, it has gettext() 1921 * wrapping, which depends on the locale. But since zerror() calls 1922 * localize() which tweaks the locale, it is not safe to call zerror() 1923 * until after the last call to usage(). Fortunately, the last call 1924 * to usage() is just above and the first call to zerror() is just 1925 * below. Don't mess this up. 1926 */ 1927 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) { 1928 zerror(zlogp, B_FALSE, "cannot manage the %s zone", 1929 GLOBAL_ZONENAME); 1930 return (1); 1931 } 1932 1933 if (zone_get_id(zone_name, &zid) != 0) { 1934 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name, 1935 zonecfg_strerror(Z_NO_ZONE)); 1936 return (1); 1937 } 1938 1939 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1940 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1941 zonecfg_strerror(err)); 1942 return (1); 1943 } 1944 if (zstate < ZONE_STATE_INCOMPLETE) { 1945 zerror(zlogp, B_FALSE, 1946 "cannot manage a zone which is in state '%s'", 1947 zone_state_str(zstate)); 1948 return (1); 1949 } 1950 1951 if (zonecfg_default_brand(default_brand, 1952 sizeof (default_brand)) != Z_OK) { 1953 zerror(zlogp, B_FALSE, "unable to determine default brand"); 1954 return (1); 1955 } 1956 1957 /* Get a handle to the brand info for this zone */ 1958 if (zone_get_brand(zone_name, brand_name, sizeof (brand_name)) 1959 != Z_OK) { 1960 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 1961 return (1); 1962 } 1963 zone_isnative = (strcmp(brand_name, NATIVE_BRAND_NAME) == 0); 1964 zone_islabeled = (strcmp(brand_name, LABELED_BRAND_NAME) == 0); 1965 1966 /* 1967 * In the alternate root environment, the only supported 1968 * operations are mount and unmount. In this case, just treat 1969 * the zone as native if it is cluster. Cluster zones can be 1970 * native for the purpose of LU or upgrade, and the cluster 1971 * brand may not exist in the miniroot (such as in net install 1972 * upgrade). 1973 */ 1974 if (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0) { 1975 zone_iscluster = B_TRUE; 1976 if (zonecfg_in_alt_root()) { 1977 (void) strlcpy(brand_name, default_brand, 1978 sizeof (brand_name)); 1979 } 1980 } else { 1981 zone_iscluster = B_FALSE; 1982 } 1983 1984 if ((bh = brand_open(brand_name)) == NULL) { 1985 zerror(zlogp, B_FALSE, "unable to open zone brand"); 1986 return (1); 1987 } 1988 1989 /* Get state change brand hooks. */ 1990 if (brand_callback_init(bh, zone_name) == -1) { 1991 zerror(zlogp, B_TRUE, 1992 "failed to initialize brand state change hooks"); 1993 brand_close(bh); 1994 return (1); 1995 } 1996 1997 brand_close(bh); 1998 1999 /* 2000 * Check that we have all privileges. It would be nice to pare 2001 * this down, but this is at least a first cut. 2002 */ 2003 if ((privset = priv_allocset()) == NULL) { 2004 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 2005 return (1); 2006 } 2007 2008 if (getppriv(PRIV_EFFECTIVE, privset) != 0) { 2009 zerror(zlogp, B_TRUE, "%s failed", "getppriv"); 2010 priv_freeset(privset); 2011 return (1); 2012 } 2013 2014 if (priv_isfullset(privset) == B_FALSE) { 2015 zerror(zlogp, B_FALSE, "You lack sufficient privilege to " 2016 "run this command (all privs required)"); 2017 priv_freeset(privset); 2018 return (1); 2019 } 2020 priv_freeset(privset); 2021 2022 if (mkzonedir(zlogp) != 0) 2023 return (1); 2024 2025 /* 2026 * Pre-fork: setup shared state 2027 */ 2028 if ((shstate = (void *)mmap(NULL, shstatelen, 2029 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) == 2030 MAP_FAILED) { 2031 zerror(zlogp, B_TRUE, "%s failed", "mmap"); 2032 return (1); 2033 } 2034 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) { 2035 zerror(zlogp, B_TRUE, "%s failed", "sema_init()"); 2036 (void) munmap((char *)shstate, shstatelen); 2037 return (1); 2038 } 2039 shstate->log.logfile = NULL; 2040 shstate->log.buflen = shstatelen - sizeof (*shstate); 2041 shstate->log.loglen = shstate->log.buflen; 2042 shstate->log.buf = (char *)shstate + sizeof (*shstate); 2043 shstate->log.log = shstate->log.buf; 2044 shstate->log.locale = parents_locale; 2045 shstate->status = -1; 2046 2047 /* 2048 * We need a SIGCHLD handler so the sema_wait() below will wake 2049 * up if the child dies without doing a sema_post(). 2050 */ 2051 (void) sigset(SIGCHLD, sigchld); 2052 /* 2053 * We must mask SIGCHLD until after we've coped with the fork 2054 * sufficiently to deal with it; otherwise we can race and 2055 * receive the signal before pid has been initialized 2056 * (yes, this really happens). 2057 */ 2058 (void) sigemptyset(&block_cld); 2059 (void) sigaddset(&block_cld, SIGCHLD); 2060 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL); 2061 2062 /* 2063 * The parent only needs stderr after the fork, so close other fd's 2064 * that we inherited from zoneadm so that the parent doesn't have those 2065 * open while waiting. The child will close the rest after the fork. 2066 */ 2067 closefrom(3); 2068 2069 if ((ctfd = init_template()) == -1) { 2070 zerror(zlogp, B_TRUE, "failed to create contract"); 2071 return (1); 2072 } 2073 2074 /* 2075 * Do not let another thread localize a message while we are forking. 2076 */ 2077 (void) mutex_lock(&msglock); 2078 pid = fork(); 2079 (void) mutex_unlock(&msglock); 2080 2081 /* 2082 * In all cases (parent, child, and in the event of an error) we 2083 * don't want to cause creation of contracts on subsequent fork()s. 2084 */ 2085 (void) ct_tmpl_clear(ctfd); 2086 (void) close(ctfd); 2087 2088 if (pid == -1) { 2089 zerror(zlogp, B_TRUE, "could not fork"); 2090 return (1); 2091 2092 } else if (pid > 0) { /* parent */ 2093 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 2094 /* 2095 * This marks a window of vulnerability in which we receive 2096 * the SIGCLD before falling into sema_wait (normally we would 2097 * get woken up from sema_wait with EINTR upon receipt of 2098 * SIGCLD). So we may need to use some other scheme like 2099 * sema_posting in the sigcld handler. 2100 * blech 2101 */ 2102 (void) sema_wait(&shstate->sem); 2103 (void) sema_destroy(&shstate->sem); 2104 if (shstate->status != 0) 2105 (void) waitpid(pid, NULL, WNOHANG); 2106 /* 2107 * It's ok if we die with SIGPIPE. It's not like we could have 2108 * done anything about it. 2109 */ 2110 (void) fprintf(stderr, "%s", shstate->log.buf); 2111 _exit(shstate->status == 0 ? 0 : 1); 2112 } 2113 2114 /* 2115 * The child charges on. 2116 */ 2117 (void) sigset(SIGCHLD, SIG_DFL); 2118 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 2119 2120 /* 2121 * SIGPIPE can be delivered if we write to a socket for which the 2122 * peer endpoint is gone. That can lead to too-early termination 2123 * of zoneadmd, and that's not good eats. 2124 */ 2125 (void) sigset(SIGPIPE, SIG_IGN); 2126 /* 2127 * Stop using stderr 2128 */ 2129 zlogp = &shstate->log; 2130 2131 /* 2132 * We don't need stdout/stderr from now on. 2133 */ 2134 closefrom(0); 2135 2136 /* 2137 * Initialize the syslog zlog_t. This needs to be done after 2138 * the call to closefrom(). 2139 */ 2140 logsys.buf = logsys.log = NULL; 2141 logsys.buflen = logsys.loglen = 0; 2142 logsys.logfile = NULL; 2143 logsys.locale = DEFAULT_LOCALE; 2144 2145 openlog("zoneadmd", LOG_PID, LOG_DAEMON); 2146 2147 /* 2148 * The eventstream is used to publish state changes in the zone 2149 * from the door threads to the console I/O poller. 2150 */ 2151 if (eventstream_init() == -1) { 2152 zerror(zlogp, B_TRUE, "unable to create eventstream"); 2153 goto child_out; 2154 } 2155 2156 (void) snprintf(zone_door_path, sizeof (zone_door_path), 2157 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name); 2158 2159 /* 2160 * See if another zoneadmd is running for this zone. If not, then we 2161 * can now modify system state. 2162 */ 2163 if (make_daemon_exclusive(zlogp) == -1) 2164 goto child_out; 2165 2166 2167 /* 2168 * Create/join a new session; we need to be careful of what we do with 2169 * the console from now on so we don't end up being the session leader 2170 * for the terminal we're going to be handing out. 2171 */ 2172 (void) setsid(); 2173 2174 /* 2175 * This thread shouldn't be receiving any signals; in particular, 2176 * SIGCHLD should be received by the thread doing the fork(). 2177 */ 2178 (void) sigfillset(&blockset); 2179 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL); 2180 2181 /* 2182 * Setup the console device and get ready to serve the console; 2183 * once this has completed, we're ready to let console clients 2184 * make an attempt to connect (they will block until 2185 * serve_console_sock() below gets called, and any pending 2186 * connection is accept()ed). 2187 */ 2188 if (!zonecfg_in_alt_root() && init_console(zlogp) < 0) 2189 goto child_out; 2190 2191 /* 2192 * Take the lock now, so that when the door server gets going, we 2193 * are guaranteed that it won't take a request until we are sure 2194 * that everything is completely set up. See the child_out: label 2195 * below to see why this matters. 2196 */ 2197 (void) mutex_lock(&lock); 2198 2199 /* Init semaphore for scratch zones. */ 2200 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) { 2201 zerror(zlogp, B_TRUE, 2202 "failed to initialize semaphore for scratch zone"); 2203 goto child_out; 2204 } 2205 2206 /* open the dladm handle */ 2207 if (dladm_open(&dld_handle) != DLADM_STATUS_OK) { 2208 zerror(zlogp, B_FALSE, "failed to open dladm handle"); 2209 goto child_out; 2210 } 2211 2212 /* 2213 * Note: door setup must occur *after* the console is setup. 2214 * This is so that as zlogin tests the door to see if zoneadmd 2215 * is ready yet, we know that the console will get serviced 2216 * once door_info() indicates that the door is "up". 2217 */ 2218 if (setup_door(zlogp) == -1) 2219 goto child_out; 2220 2221 /* 2222 * Things seem OK so far; tell the parent process that we're done 2223 * with setup tasks. This will cause the parent to exit, signalling 2224 * to zoneadm, zlogin, or whatever forked it that we are ready to 2225 * service requests. 2226 */ 2227 shstate->status = 0; 2228 (void) sema_post(&shstate->sem); 2229 (void) munmap((char *)shstate, shstatelen); 2230 shstate = NULL; 2231 2232 (void) mutex_unlock(&lock); 2233 2234 /* 2235 * zlogp is now invalid, so reset it to the syslog logger. 2236 */ 2237 zlogp = &logsys; 2238 2239 /* 2240 * Now that we are free of any parents, switch to the default locale. 2241 */ 2242 (void) setlocale(LC_ALL, DEFAULT_LOCALE); 2243 2244 /* 2245 * At this point the setup portion of main() is basically done, so 2246 * we reuse this thread to manage the zone console. When 2247 * serve_console() has returned, we are past the point of no return 2248 * in the life of this zoneadmd. 2249 */ 2250 if (zonecfg_in_alt_root()) { 2251 /* 2252 * This is just awful, but mounted scratch zones don't (and 2253 * can't) have consoles. We just wait for unmount instead. 2254 */ 2255 while (sema_wait(&scratch_sem) == EINTR) 2256 ; 2257 } else { 2258 serve_console(zlogp); 2259 assert(in_death_throes); 2260 } 2261 2262 /* 2263 * This is the next-to-last part of the exit interlock. Upon calling 2264 * fdetach(), the door will go unreferenced; once any 2265 * outstanding requests (like the door thread doing Z_HALT) are 2266 * done, the door will get an UNREF notification; when it handles 2267 * the UNREF, the door server will cause the exit. It's possible 2268 * that fdetach() can fail because the file is in use, in which 2269 * case we'll retry the operation. 2270 */ 2271 assert(!MUTEX_HELD(&lock)); 2272 for (;;) { 2273 if ((fdetach(zone_door_path) == 0) || (errno != EBUSY)) 2274 break; 2275 yield(); 2276 } 2277 2278 for (;;) 2279 (void) pause(); 2280 2281 child_out: 2282 assert(pid == 0); 2283 2284 shstate->status = -1; 2285 (void) sema_post(&shstate->sem); 2286 (void) munmap((char *)shstate, shstatelen); 2287 2288 /* 2289 * This might trigger an unref notification, but if so, 2290 * we are still holding the lock, so our call to exit will 2291 * ultimately win the race and will publish the right exit 2292 * code. 2293 */ 2294 if (zone_door != -1) { 2295 assert(MUTEX_HELD(&lock)); 2296 (void) door_revoke(zone_door); 2297 (void) fdetach(zone_door_path); 2298 } 2299 2300 if (dld_handle != NULL) 2301 dladm_close(dld_handle); 2302 2303 return (1); /* return from main() forcibly exits an MT process */ 2304 }