1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2012, Joyent Inc. All rights reserved. 25 */ 26 27 /* 28 * zoneadmd manages zones; one zoneadmd process is launched for each 29 * non-global zone on the system. This daemon juggles four jobs: 30 * 31 * - Implement setup and teardown of the zone "virtual platform": mount and 32 * unmount filesystems; create and destroy network interfaces; communicate 33 * with devfsadmd to lay out devices for the zone; instantiate the zone 34 * console device; configure process runtime attributes such as resource 35 * controls, pool bindings, fine-grained privileges. 36 * 37 * - Launch the zone's init(1M) process. 38 * 39 * - Implement a door server; clients (like zoneadm) connect to the door 40 * server and request zone state changes. The kernel is also a client of 41 * this door server. A request to halt or reboot the zone which originates 42 * *inside* the zone results in a door upcall from the kernel into zoneadmd. 43 * 44 * One minor problem is that messages emitted by zoneadmd need to be passed 45 * back to the zoneadm process making the request. These messages need to 46 * be rendered in the client's locale; so, this is passed in as part of the 47 * request. The exception is the kernel upcall to zoneadmd, in which case 48 * messages are syslog'd. 49 * 50 * To make all of this work, the Makefile adds -a to xgettext to extract *all* 51 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those 52 * strings which do not need to be translated. 53 * 54 * - Act as a console server for zlogin -C processes; see comments in zcons.c 55 * for more information about the zone console architecture. 56 * 57 * DESIGN NOTES 58 * 59 * Restart: 60 * A chief design constraint of zoneadmd is that it should be restartable in 61 * the case that the administrator kills it off, or it suffers a fatal error, 62 * without the running zone being impacted; this is akin to being able to 63 * reboot the service processor of a server without affecting the OS instance. 64 */ 65 66 #include <sys/param.h> 67 #include <sys/mman.h> 68 #include <sys/types.h> 69 #include <sys/stat.h> 70 #include <sys/sysmacros.h> 71 72 #include <bsm/adt.h> 73 #include <bsm/adt_event.h> 74 75 #include <alloca.h> 76 #include <assert.h> 77 #include <errno.h> 78 #include <door.h> 79 #include <fcntl.h> 80 #include <locale.h> 81 #include <signal.h> 82 #include <stdarg.h> 83 #include <stdio.h> 84 #include <stdlib.h> 85 #include <string.h> 86 #include <strings.h> 87 #include <synch.h> 88 #include <syslog.h> 89 #include <thread.h> 90 #include <unistd.h> 91 #include <wait.h> 92 #include <limits.h> 93 #include <zone.h> 94 #include <libbrand.h> 95 #include <sys/brand.h> 96 #include <libcontract.h> 97 #include <libcontract_priv.h> 98 #include <sys/brand.h> 99 #include <sys/contract/process.h> 100 #include <sys/ctfs.h> 101 #include <libdladm.h> 102 #include <sys/dls_mgmt.h> 103 104 #include <libzonecfg.h> 105 #include <zonestat_impl.h> 106 #include "zoneadmd.h" 107 108 static char *progname; 109 char *zone_name; /* zone which we are managing */ 110 zone_dochandle_t snap_hndl; /* handle for snapshot created when ready */ 111 char pool_name[MAXNAMELEN]; 112 char default_brand[MAXNAMELEN]; 113 char brand_name[MAXNAMELEN]; 114 boolean_t zone_isnative; 115 boolean_t zone_iscluster; 116 boolean_t zone_islabeled; 117 static zoneid_t zone_id; 118 static zoneid_t zone_did = 0; 119 dladm_handle_t dld_handle = NULL; 120 121 char pre_statechg_hook[2 * MAXPATHLEN]; 122 char post_statechg_hook[2 * MAXPATHLEN]; 123 char query_hook[2 * MAXPATHLEN]; 124 125 zlog_t logsys; 126 127 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */ 128 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */ 129 130 static sema_t scratch_sem; /* for scratch zones */ 131 132 static char zone_door_path[MAXPATHLEN]; 133 static int zone_door = -1; 134 135 boolean_t in_death_throes = B_FALSE; /* daemon is dying */ 136 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */ 137 138 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */ 139 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ 140 #endif 141 142 #define DEFAULT_LOCALE "C" 143 144 #define RSRC_NET "net" 145 #define RSRC_DEV "device" 146 147 static const char * 148 z_cmd_name(zone_cmd_t zcmd) 149 { 150 /* This list needs to match the enum in sys/zone.h */ 151 static const char *zcmdstr[] = { 152 "ready", "boot", "forceboot", "reboot", "halt", 153 "note_uninstalling", "mount", "forcemount", "unmount" 154 }; 155 156 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr)) 157 return ("unknown"); 158 else 159 return (zcmdstr[(int)zcmd]); 160 } 161 162 static char * 163 get_execbasename(char *execfullname) 164 { 165 char *last_slash, *execbasename; 166 167 /* guard against '/' at end of command invocation */ 168 for (;;) { 169 last_slash = strrchr(execfullname, '/'); 170 if (last_slash == NULL) { 171 execbasename = execfullname; 172 break; 173 } else { 174 execbasename = last_slash + 1; 175 if (*execbasename == '\0') { 176 *last_slash = '\0'; 177 continue; 178 } 179 break; 180 } 181 } 182 return (execbasename); 183 } 184 185 static void 186 usage(void) 187 { 188 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname); 189 (void) fprintf(stderr, 190 gettext("\tNote: %s should not be run directly.\n"), progname); 191 exit(2); 192 } 193 194 /* ARGSUSED */ 195 static void 196 sigchld(int sig) 197 { 198 } 199 200 char * 201 localize_msg(char *locale, const char *msg) 202 { 203 char *out; 204 205 (void) mutex_lock(&msglock); 206 (void) setlocale(LC_MESSAGES, locale); 207 out = gettext(msg); 208 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE); 209 (void) mutex_unlock(&msglock); 210 return (out); 211 } 212 213 /* PRINTFLIKE3 */ 214 void 215 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...) 216 { 217 va_list alist; 218 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */ 219 char *bp; 220 int saved_errno = errno; 221 222 if (zlogp == NULL) 223 return; 224 if (zlogp == &logsys) 225 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ", 226 zone_name); 227 else 228 buf[0] = '\0'; 229 bp = &(buf[strlen(buf)]); 230 231 /* 232 * In theory, the locale pointer should be set to either "C" or a 233 * char array, so it should never be NULL 234 */ 235 assert(zlogp->locale != NULL); 236 /* Locale is per process, but we are multi-threaded... */ 237 fmt = localize_msg(zlogp->locale, fmt); 238 239 va_start(alist, fmt); 240 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist); 241 va_end(alist); 242 bp = &(buf[strlen(buf)]); 243 if (use_strerror) 244 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s", 245 strerror(saved_errno)); 246 if (zlogp == &logsys) { 247 (void) syslog(LOG_ERR, "%s", buf); 248 } else if (zlogp->logfile != NULL) { 249 (void) fprintf(zlogp->logfile, "%s\n", buf); 250 } else { 251 size_t buflen; 252 size_t copylen; 253 254 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf); 255 copylen = MIN(buflen, zlogp->loglen); 256 zlogp->log += copylen; 257 zlogp->loglen -= copylen; 258 } 259 } 260 261 /* 262 * Emit a warning for any boot arguments which are unrecognized. Since 263 * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we 264 * put the arguments into an argv style array, use getopt to process them, 265 * and put the resultant argument string back into outargs. 266 * 267 * During the filtering, we pull out any arguments which are truly "boot" 268 * arguments, leaving only those which are to be passed intact to the 269 * progenitor process. The one we support at the moment is -i, which 270 * indicates to the kernel which program should be launched as 'init'. 271 * 272 * A return of Z_INVAL indicates specifically that the arguments are 273 * not valid; this is a non-fatal error. Except for Z_OK, all other return 274 * values are treated as fatal. 275 */ 276 static int 277 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs, 278 char *init_file, char *badarg) 279 { 280 int argc = 0, argc_save; 281 int i; 282 int err = Z_OK; 283 char *arg, *lasts, **argv = NULL, **argv_save; 284 char zonecfg_args[BOOTARGS_MAX]; 285 char scratchargs[BOOTARGS_MAX], *sargs; 286 char c; 287 288 bzero(outargs, BOOTARGS_MAX); 289 bzero(badarg, BOOTARGS_MAX); 290 291 /* 292 * If the user didn't specify transient boot arguments, check 293 * to see if there were any specified in the zone configuration, 294 * and use them if applicable. 295 */ 296 if (inargs == NULL || inargs[0] == '\0') { 297 bzero(zonecfg_args, sizeof (zonecfg_args)); 298 (void) zonecfg_get_bootargs(snap_hndl, zonecfg_args, 299 sizeof (zonecfg_args)); 300 inargs = zonecfg_args; 301 } 302 303 if (strlen(inargs) >= BOOTARGS_MAX) { 304 zerror(zlogp, B_FALSE, "boot argument string too long"); 305 return (Z_INVAL); 306 } 307 308 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 309 sargs = scratchargs; 310 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 311 sargs = NULL; 312 argc++; 313 } 314 315 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) { 316 zerror(zlogp, B_FALSE, "memory allocation failed"); 317 return (Z_NOMEM); 318 } 319 320 argv_save = argv; 321 argc_save = argc; 322 323 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 324 sargs = scratchargs; 325 i = 0; 326 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 327 sargs = NULL; 328 if ((argv[i] = strdup(arg)) == NULL) { 329 err = Z_NOMEM; 330 zerror(zlogp, B_FALSE, "memory allocation failed"); 331 goto done; 332 } 333 i++; 334 } 335 336 /* 337 * We preserve compatibility with the Solaris system boot behavior, 338 * which allows: 339 * 340 * # reboot kernel/unix -s -m verbose 341 * 342 * In this example, kernel/unix tells the booter what file to 343 * boot. We don't want reboot in a zone to be gratuitously different, 344 * so we silently ignore the boot file, if necessary. 345 */ 346 if (argv[0] == NULL) 347 goto done; 348 349 assert(argv[0][0] != ' '); 350 assert(argv[0][0] != '\t'); 351 352 if (argv[0][0] != '-' && argv[0][0] != '\0') { 353 argv = &argv[1]; 354 argc--; 355 } 356 357 optind = 0; 358 opterr = 0; 359 err = Z_OK; 360 while ((c = getopt(argc, argv, "fi:m:s")) != -1) { 361 switch (c) { 362 case 'i': 363 /* 364 * -i is handled by the runtime and is not passed 365 * along to userland 366 */ 367 (void) strlcpy(init_file, optarg, MAXPATHLEN); 368 break; 369 case 'f': 370 /* This has already been processed by zoneadm */ 371 break; 372 case 'm': 373 case 's': 374 /* These pass through unmolested */ 375 (void) snprintf(outargs, BOOTARGS_MAX, 376 "%s -%c %s ", outargs, c, optarg ? optarg : ""); 377 break; 378 case '?': 379 /* 380 * We warn about unknown arguments but pass them 381 * along anyway-- if someone wants to develop their 382 * own init replacement, they can pass it whatever 383 * args they want. 384 */ 385 err = Z_INVAL; 386 (void) snprintf(outargs, BOOTARGS_MAX, 387 "%s -%c", outargs, optopt); 388 (void) snprintf(badarg, BOOTARGS_MAX, 389 "%s -%c", badarg, optopt); 390 break; 391 } 392 } 393 394 /* 395 * For Solaris Zones we warn about and discard non-option arguments. 396 * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar 397 * to the kernel, we concat up all the other remaining boot args. 398 * and warn on them as a group. 399 */ 400 if (optind < argc) { 401 err = Z_INVAL; 402 while (optind < argc) { 403 (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s", 404 badarg, strlen(badarg) > 0 ? " " : "", 405 argv[optind]); 406 optind++; 407 } 408 zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot " 409 "arguments `%s'.", badarg); 410 } 411 412 done: 413 for (i = 0; i < argc_save; i++) { 414 if (argv_save[i] != NULL) 415 free(argv_save[i]); 416 } 417 free(argv_save); 418 return (err); 419 } 420 421 422 static int 423 mkzonedir(zlog_t *zlogp) 424 { 425 struct stat st; 426 /* 427 * We must create and lock everyone but root out of ZONES_TMPDIR 428 * since anyone can open any UNIX domain socket, regardless of 429 * its file system permissions. Sigh... 430 */ 431 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) { 432 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR); 433 return (-1); 434 } 435 /* paranoia */ 436 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) { 437 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR); 438 return (-1); 439 } 440 (void) chmod(ZONES_TMPDIR, S_IRWXU); 441 return (0); 442 } 443 444 /* 445 * Run the brand's pre-state change callback, if it exists. 446 */ 447 static int 448 brand_prestatechg(zlog_t *zlogp, int state, int cmd, boolean_t debug) 449 { 450 char cmdbuf[2 * MAXPATHLEN]; 451 const char *altroot; 452 453 if (pre_statechg_hook[0] == '\0') 454 return (0); 455 456 altroot = zonecfg_get_root(); 457 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", pre_statechg_hook, 458 state, cmd, altroot) > sizeof (cmdbuf)) 459 return (-1); 460 461 if (do_subproc(zlogp, cmdbuf, NULL, debug) != 0) 462 return (-1); 463 464 return (0); 465 } 466 467 /* 468 * Run the brand's post-state change callback, if it exists. 469 */ 470 static int 471 brand_poststatechg(zlog_t *zlogp, int state, int cmd, boolean_t debug) 472 { 473 char cmdbuf[2 * MAXPATHLEN]; 474 const char *altroot; 475 476 if (post_statechg_hook[0] == '\0') 477 return (0); 478 479 altroot = zonecfg_get_root(); 480 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook, 481 state, cmd, altroot) > sizeof (cmdbuf)) 482 return (-1); 483 484 if (do_subproc(zlogp, cmdbuf, NULL, debug) != 0) 485 return (-1); 486 487 return (0); 488 } 489 490 /* 491 * Notify zonestatd of the new zone. If zonestatd is not running, this 492 * will do nothing. 493 */ 494 static void 495 notify_zonestatd(zoneid_t zoneid) 496 { 497 int cmd[2]; 498 int fd; 499 door_arg_t params; 500 501 fd = open(ZS_DOOR_PATH, O_RDONLY); 502 if (fd < 0) 503 return; 504 505 cmd[0] = ZSD_CMD_NEW_ZONE; 506 cmd[1] = zoneid; 507 params.data_ptr = (char *)&cmd; 508 params.data_size = sizeof (cmd); 509 params.desc_ptr = NULL; 510 params.desc_num = 0; 511 params.rbuf = NULL; 512 params.rsize = NULL; 513 (void) door_call(fd, ¶ms); 514 (void) close(fd); 515 } 516 517 /* 518 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is 519 * 'true' if this is being invoked as part of the processing for the "mount" 520 * subcommand. 521 */ 522 static int 523 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate, boolean_t debug) 524 { 525 int err; 526 boolean_t snapped = B_FALSE; 527 528 if ((snap_hndl = zonecfg_init_handle()) == NULL) { 529 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 530 goto bad; 531 } 532 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) { 533 zerror(zlogp, B_FALSE, "unable to create snapshot: %s", 534 zonecfg_strerror(err)); 535 goto bad; 536 } 537 snapped = B_TRUE; 538 539 if (zonecfg_get_snapshot_handle(zone_name, snap_hndl) != Z_OK) { 540 zerror(zlogp, B_FALSE, "invalid configuration snapshot"); 541 goto bad; 542 } 543 544 if (zone_did == 0) 545 zone_did = zone_get_did(zone_name); 546 547 if (brand_prestatechg(zlogp, zstate, Z_READY, debug) != 0) 548 goto bad; 549 550 if ((zone_id = vplat_create(zlogp, mount_cmd, zone_did)) == -1) 551 goto bad; 552 553 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) { 554 bringup_failure_recovery = B_TRUE; 555 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE, 556 debug); 557 goto bad; 558 } 559 560 if (brand_poststatechg(zlogp, zstate, Z_READY, debug) != 0) 561 goto bad; 562 563 return (0); 564 565 bad: 566 /* 567 * If something goes wrong, we up the zones's state to the target 568 * state, READY, and then invoke the hook as if we're halting. 569 */ 570 (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT, debug); 571 if (snapped) 572 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 573 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 574 zonecfg_strerror(err)); 575 zonecfg_fini_handle(snap_hndl); 576 snap_hndl = NULL; 577 return (-1); 578 } 579 580 int 581 init_template(void) 582 { 583 int fd; 584 int err = 0; 585 586 fd = open64(CTFS_ROOT "/process/template", O_RDWR); 587 if (fd == -1) 588 return (-1); 589 590 /* 591 * For now, zoneadmd doesn't do anything with the contract. 592 * Deliver no events, don't inherit, and allow it to be orphaned. 593 */ 594 err |= ct_tmpl_set_critical(fd, 0); 595 err |= ct_tmpl_set_informative(fd, 0); 596 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR); 597 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT); 598 if (err || ct_tmpl_activate(fd)) { 599 (void) close(fd); 600 return (-1); 601 } 602 603 return (fd); 604 } 605 606 typedef struct fs_callback { 607 zlog_t *zlogp; 608 zoneid_t zoneid; 609 boolean_t mount_cmd; 610 } fs_callback_t; 611 612 static int 613 mount_early_fs(void *data, const char *spec, const char *dir, 614 const char *fstype, const char *opt) 615 { 616 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp; 617 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid; 618 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd; 619 char rootpath[MAXPATHLEN]; 620 pid_t child; 621 int child_status; 622 int tmpl_fd; 623 int rv; 624 ctid_t ct; 625 626 /* determine the zone rootpath */ 627 if (mount_cmd) { 628 char zonepath[MAXPATHLEN]; 629 char luroot[MAXPATHLEN]; 630 631 if (zone_get_zonepath(zone_name, 632 zonepath, sizeof (zonepath)) != Z_OK) { 633 zerror(zlogp, B_FALSE, "unable to determine zone path"); 634 return (-1); 635 } 636 637 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath); 638 resolve_lofs(zlogp, luroot, sizeof (luroot)); 639 (void) strlcpy(rootpath, luroot, sizeof (rootpath)); 640 } else { 641 if (zone_get_rootpath(zone_name, 642 rootpath, sizeof (rootpath)) != Z_OK) { 643 zerror(zlogp, B_FALSE, "unable to determine zone root"); 644 return (-1); 645 } 646 } 647 648 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) { 649 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point", 650 rootpath, dir); 651 return (-1); 652 } else if (rv > 0) { 653 /* The mount point path doesn't exist, create it now. */ 654 if (make_one_dir(zlogp, rootpath, dir, 655 DEFAULT_DIR_MODE, DEFAULT_DIR_USER, 656 DEFAULT_DIR_GROUP) != 0) { 657 zerror(zlogp, B_FALSE, "failed to create mount point"); 658 return (-1); 659 } 660 661 /* 662 * Now this might seem weird, but we need to invoke 663 * valid_mount_path() again. Why? Because it checks 664 * to make sure that the mount point path is canonical, 665 * which it can only do if the path exists, so now that 666 * we've created the path we have to verify it again. 667 */ 668 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, 669 fstype)) < 0) { 670 zerror(zlogp, B_FALSE, 671 "%s%s is not a valid mount point", rootpath, dir); 672 return (-1); 673 } 674 } 675 676 if ((tmpl_fd = init_template()) == -1) { 677 zerror(zlogp, B_TRUE, "failed to create contract"); 678 return (-1); 679 } 680 681 if ((child = fork()) == -1) { 682 (void) ct_tmpl_clear(tmpl_fd); 683 (void) close(tmpl_fd); 684 zerror(zlogp, B_TRUE, "failed to fork"); 685 return (-1); 686 687 } else if (child == 0) { /* child */ 688 char opt_buf[MAX_MNTOPT_STR]; 689 int optlen = 0; 690 int mflag = MS_DATA; 691 692 (void) ct_tmpl_clear(tmpl_fd); 693 /* 694 * Even though there are no procs running in the zone, we 695 * do this for paranoia's sake. 696 */ 697 (void) closefrom(0); 698 699 if (zone_enter(zoneid) == -1) { 700 _exit(errno); 701 } 702 if (opt != NULL) { 703 /* 704 * The mount() system call is incredibly annoying. 705 * If options are specified, we need to copy them 706 * into a temporary buffer since the mount() system 707 * call will overwrite the options string. It will 708 * also fail if the new option string it wants to 709 * write is bigger than the one we passed in, so 710 * you must pass in a buffer of the maximum possible 711 * option string length. sigh. 712 */ 713 (void) strlcpy(opt_buf, opt, sizeof (opt_buf)); 714 opt = opt_buf; 715 optlen = MAX_MNTOPT_STR; 716 mflag = MS_OPTIONSTR; 717 } 718 if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0) 719 _exit(errno); 720 _exit(0); 721 } 722 723 /* parent */ 724 if (contract_latest(&ct) == -1) 725 ct = -1; 726 (void) ct_tmpl_clear(tmpl_fd); 727 (void) close(tmpl_fd); 728 if (waitpid(child, &child_status, 0) != child) { 729 /* unexpected: we must have been signalled */ 730 (void) contract_abandon_id(ct); 731 return (-1); 732 } 733 (void) contract_abandon_id(ct); 734 if (WEXITSTATUS(child_status) != 0) { 735 errno = WEXITSTATUS(child_status); 736 zerror(zlogp, B_TRUE, "mount of %s failed", dir); 737 return (-1); 738 } 739 740 return (0); 741 } 742 743 /* 744 * env variable name format 745 * _ZONECFG_{resource name}_{identifying attr. name}_{property name} 746 * Any dashes (-) in the property names are replaced with underscore (_). 747 */ 748 static void 749 set_zonecfg_env(char *rsrc, char *attr, char *name, char *val) 750 { 751 char *p; 752 char nm[MAXNAMELEN]; 753 754 (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s_%s", rsrc, attr, name); 755 756 p = nm; 757 while ((p = strchr(p, '-')) != NULL) 758 *p++ = '_'; 759 760 (void) setenv(nm, val, 1); 761 } 762 763 /* 764 * Export zonecfg network and device properties into environment for the boot 765 * and state change hooks. 766 * If debug is true, export the brand hook debug env. variable as well. 767 * 768 * We could export more of the config in the future, as necessary. 769 */ 770 static int 771 setup_subproc_env(boolean_t debug) 772 { 773 int res; 774 struct zone_nwiftab ntab; 775 struct zone_devtab dtab; 776 char net_resources[MAXNAMELEN * 2]; 777 char dev_resources[MAXNAMELEN * 2]; 778 779 net_resources[0] = '\0'; 780 if ((res = zonecfg_setnwifent(snap_hndl)) != Z_OK) 781 goto done; 782 783 while (zonecfg_getnwifent(snap_hndl, &ntab) == Z_OK) { 784 struct zone_res_attrtab *rap; 785 char *phys; 786 787 phys = ntab.zone_nwif_physical; 788 789 (void) strlcat(net_resources, phys, sizeof (net_resources)); 790 (void) strlcat(net_resources, " ", sizeof (net_resources)); 791 792 set_zonecfg_env(RSRC_NET, phys, "physical", phys); 793 794 set_zonecfg_env(RSRC_NET, phys, "address", 795 ntab.zone_nwif_address); 796 set_zonecfg_env(RSRC_NET, phys, "allowed-address", 797 ntab.zone_nwif_allowed_address); 798 set_zonecfg_env(RSRC_NET, phys, "defrouter", 799 ntab.zone_nwif_defrouter); 800 set_zonecfg_env(RSRC_NET, phys, "global-nic", 801 ntab.zone_nwif_gnic); 802 set_zonecfg_env(RSRC_NET, phys, "mac-addr", ntab.zone_nwif_mac); 803 set_zonecfg_env(RSRC_NET, phys, "vlan-id", 804 ntab.zone_nwif_vlan_id); 805 806 for (rap = ntab.zone_nwif_attrp; rap != NULL; 807 rap = rap->zone_res_attr_next) 808 set_zonecfg_env(RSRC_NET, phys, rap->zone_res_attr_name, 809 rap->zone_res_attr_value); 810 nwifent_free_attrs(&ntab); 811 } 812 813 (void) setenv("_ZONECFG_net_resources", net_resources, 1); 814 815 (void) zonecfg_endnwifent(snap_hndl); 816 817 if ((res = zonecfg_setdevent(snap_hndl)) != Z_OK) 818 goto done; 819 820 while (zonecfg_getdevent(snap_hndl, &dtab) == Z_OK) { 821 struct zone_res_attrtab *rap; 822 char *match; 823 824 match = dtab.zone_dev_match; 825 826 (void) strlcat(dev_resources, match, sizeof (dev_resources)); 827 (void) strlcat(dev_resources, " ", sizeof (dev_resources)); 828 829 for (rap = dtab.zone_dev_attrp; rap != NULL; 830 rap = rap->zone_res_attr_next) 831 set_zonecfg_env(RSRC_DEV, match, 832 rap->zone_res_attr_name, rap->zone_res_attr_value); 833 } 834 835 (void) zonecfg_enddevent(snap_hndl); 836 837 if (debug) 838 (void) setenv("_ZONEADMD_brand_debug", "1", 1); 839 else 840 (void) setenv("_ZONEADMD_brand_debug", "", 1); 841 842 res = Z_OK; 843 844 done: 845 return (res); 846 } 847 848 void 849 nwifent_free_attrs(struct zone_nwiftab *np) 850 { 851 struct zone_res_attrtab *rap; 852 853 for (rap = np->zone_nwif_attrp; rap != NULL; ) { 854 struct zone_res_attrtab *tp = rap; 855 856 rap = rap->zone_res_attr_next; 857 free(tp); 858 } 859 } 860 861 /* 862 * If retstr is not NULL, the output of the subproc is returned in the str, 863 * otherwise it is output using zerror(). Any memory allocated for retstr 864 * should be freed by the caller. 865 */ 866 int 867 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr, boolean_t debug) 868 { 869 char buf[1024]; /* arbitrary large amount */ 870 char *inbuf; 871 FILE *file; 872 int status; 873 int rd_cnt; 874 875 if (retstr != NULL) { 876 if ((*retstr = malloc(1024)) == NULL) { 877 zerror(zlogp, B_FALSE, "out of memory"); 878 return (-1); 879 } 880 inbuf = *retstr; 881 rd_cnt = 0; 882 } else { 883 inbuf = buf; 884 } 885 886 if (setup_subproc_env(debug) != Z_OK) { 887 zerror(zlogp, B_FALSE, "failed to setup environment"); 888 return (-1); 889 } 890 891 file = popen(cmdbuf, "r"); 892 if (file == NULL) { 893 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf); 894 return (-1); 895 } 896 897 while (fgets(inbuf, 1024, file) != NULL) { 898 if (retstr == NULL) { 899 if (zlogp != &logsys) { 900 int last = strlen(inbuf) - 1; 901 902 if (inbuf[last] == '\n') 903 inbuf[last] = '\0'; 904 zerror(zlogp, B_FALSE, "%s", inbuf); 905 } 906 } else { 907 char *p; 908 909 rd_cnt += 1024 - 1; 910 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) { 911 zerror(zlogp, B_FALSE, "out of memory"); 912 (void) pclose(file); 913 return (-1); 914 } 915 916 *retstr = p; 917 inbuf = *retstr + rd_cnt; 918 } 919 } 920 status = pclose(file); 921 922 if (WIFSIGNALED(status)) { 923 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to " 924 "signal %d", cmdbuf, WTERMSIG(status)); 925 return (-1); 926 } 927 assert(WIFEXITED(status)); 928 if (WEXITSTATUS(status) == ZEXIT_EXEC) { 929 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf); 930 return (-1); 931 } 932 return (WEXITSTATUS(status)); 933 } 934 935 /* 936 * Get the path for this zone's init(1M) (or equivalent) process. First look 937 * for a zone-specific init-name attr, then get it from the brand. 938 */ 939 static int 940 get_initname(brand_handle_t bh, char *initname, int len) 941 { 942 struct zone_attrtab a; 943 944 bzero(&a, sizeof (a)); 945 (void) strlcpy(a.zone_attr_name, "init-name", 946 sizeof (a.zone_attr_name)); 947 948 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) { 949 (void) strlcpy(initname, a.zone_attr_value, len); 950 return (0); 951 } 952 953 return (brand_get_initname(bh, initname, len)); 954 } 955 956 /* 957 * Get the restart-init flag for this zone's init(1M) (or equivalent) process. 958 * First look for a zone-specific restart-init attr, then get it from the brand. 959 */ 960 static boolean_t 961 restartinit(brand_handle_t bh) 962 { 963 struct zone_attrtab a; 964 965 bzero(&a, sizeof (a)); 966 (void) strlcpy(a.zone_attr_name, "restart-init", 967 sizeof (a.zone_attr_name)); 968 969 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) { 970 if (strcmp(a.zone_attr_value, "false") == 0) 971 return (B_FALSE); 972 return (B_TRUE); 973 } 974 975 return (brand_restartinit(bh)); 976 } 977 978 static int 979 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate, boolean_t debug) 980 { 981 zoneid_t zoneid; 982 struct stat st; 983 char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN]; 984 char nbootargs[BOOTARGS_MAX]; 985 char cmdbuf[MAXPATHLEN]; 986 fs_callback_t cb; 987 brand_handle_t bh; 988 zone_iptype_t iptype; 989 boolean_t links_loaded = B_FALSE; 990 dladm_status_t status; 991 char errmsg[DLADM_STRSIZE]; 992 int err; 993 boolean_t restart_init; 994 995 if (brand_prestatechg(zlogp, zstate, Z_BOOT, debug) != 0) 996 return (-1); 997 998 if ((zoneid = getzoneidbyname(zone_name)) == -1) { 999 zerror(zlogp, B_TRUE, "unable to get zoneid"); 1000 goto bad; 1001 } 1002 1003 cb.zlogp = zlogp; 1004 cb.zoneid = zoneid; 1005 cb.mount_cmd = B_FALSE; 1006 1007 /* Get a handle to the brand info for this zone */ 1008 if ((bh = brand_open(brand_name)) == NULL) { 1009 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 1010 goto bad; 1011 } 1012 1013 /* 1014 * Get the list of filesystems to mount from the brand 1015 * configuration. These mounts are done via a thread that will 1016 * enter the zone, so they are done from within the context of the 1017 * zone. 1018 */ 1019 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) { 1020 zerror(zlogp, B_FALSE, "unable to mount filesystems"); 1021 brand_close(bh); 1022 goto bad; 1023 } 1024 1025 /* 1026 * Get the brand's boot callback if it exists. 1027 */ 1028 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 1029 zerror(zlogp, B_FALSE, "unable to determine zone path"); 1030 brand_close(bh); 1031 goto bad; 1032 } 1033 (void) strcpy(cmdbuf, EXEC_PREFIX); 1034 if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN, 1035 sizeof (cmdbuf) - EXEC_LEN) != 0) { 1036 zerror(zlogp, B_FALSE, 1037 "unable to determine branded zone's boot callback"); 1038 brand_close(bh); 1039 goto bad; 1040 } 1041 1042 /* Get the path for this zone's init(1M) (or equivalent) process. */ 1043 if (get_initname(bh, init_file, MAXPATHLEN) != 0) { 1044 zerror(zlogp, B_FALSE, 1045 "unable to determine zone's init(1M) location"); 1046 brand_close(bh); 1047 goto bad; 1048 } 1049 1050 /* See if we should restart init if it dies. */ 1051 restart_init = restartinit(bh); 1052 1053 brand_close(bh); 1054 1055 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file, 1056 bad_boot_arg); 1057 if (err == Z_INVAL) 1058 eventstream_write(Z_EVT_ZONE_BADARGS); 1059 else if (err != Z_OK) 1060 goto bad; 1061 1062 assert(init_file[0] != '\0'); 1063 1064 /* Try to anticipate possible problems: Make sure init is executable. */ 1065 if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 1066 zerror(zlogp, B_FALSE, "unable to determine zone root"); 1067 goto bad; 1068 } 1069 1070 (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file); 1071 1072 if (stat(initpath, &st) == -1) { 1073 zerror(zlogp, B_TRUE, "could not stat %s", initpath); 1074 goto bad; 1075 } 1076 1077 if ((st.st_mode & S_IXUSR) == 0) { 1078 zerror(zlogp, B_FALSE, "%s is not executable", initpath); 1079 goto bad; 1080 } 1081 1082 /* 1083 * Exclusive stack zones interact with the dlmgmtd running in the 1084 * global zone. dladm_zone_boot() tells dlmgmtd that this zone is 1085 * booting, and loads its datalinks from the zone's datalink 1086 * configuration file. 1087 */ 1088 if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) { 1089 status = dladm_zone_boot(dld_handle, zoneid); 1090 if (status != DLADM_STATUS_OK) { 1091 zerror(zlogp, B_FALSE, "unable to load zone datalinks: " 1092 " %s", dladm_status2str(status, errmsg)); 1093 goto bad; 1094 } 1095 links_loaded = B_TRUE; 1096 } 1097 1098 /* 1099 * If there is a brand 'boot' callback, execute it now to give the 1100 * brand one last chance to do any additional setup before the zone 1101 * is booted. 1102 */ 1103 if ((strlen(cmdbuf) > EXEC_LEN) && 1104 (do_subproc(zlogp, cmdbuf, NULL, debug) != Z_OK)) { 1105 zerror(zlogp, B_FALSE, "%s failed", cmdbuf); 1106 goto bad; 1107 } 1108 1109 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) { 1110 zerror(zlogp, B_TRUE, "could not set zone boot file"); 1111 goto bad; 1112 } 1113 1114 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) { 1115 zerror(zlogp, B_TRUE, "could not set zone boot arguments"); 1116 goto bad; 1117 } 1118 1119 if (!restart_init && zone_setattr(zoneid, ZONE_ATTR_INITNORESTART, 1120 NULL, 0) == -1) { 1121 zerror(zlogp, B_TRUE, "could not set zone init-no-restart"); 1122 goto bad; 1123 } 1124 1125 /* 1126 * Inform zonestatd of a new zone so that it can install a door for 1127 * the zone to contact it. 1128 */ 1129 notify_zonestatd(zone_id); 1130 1131 if (zone_boot(zoneid) == -1) { 1132 zerror(zlogp, B_TRUE, "unable to boot zone"); 1133 goto bad; 1134 } 1135 1136 if (brand_poststatechg(zlogp, zstate, Z_BOOT, debug) != 0) 1137 goto bad; 1138 1139 /* Startup a thread to perform memory capping for the zone. */ 1140 create_mcap_thread(zlogp, zone_id); 1141 1142 return (0); 1143 1144 bad: 1145 /* 1146 * If something goes wrong, we up the zones's state to the target 1147 * state, RUNNING, and then invoke the hook as if we're halting. 1148 */ 1149 (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT, debug); 1150 if (links_loaded) 1151 (void) dladm_zone_halt(dld_handle, zoneid); 1152 return (-1); 1153 } 1154 1155 static int 1156 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate, 1157 boolean_t debug) 1158 { 1159 int err; 1160 1161 if (brand_prestatechg(zlogp, zstate, Z_HALT, debug) != 0) 1162 return (-1); 1163 1164 /* Shutting down, stop the memcap thread */ 1165 destroy_mcap_thread(); 1166 1167 if (vplat_teardown(zlogp, unmount_cmd, rebooting, debug) != 0) { 1168 if (!bringup_failure_recovery) 1169 zerror(zlogp, B_FALSE, "unable to destroy zone"); 1170 return (-1); 1171 } 1172 1173 if (brand_poststatechg(zlogp, zstate, Z_HALT, debug) != 0) 1174 return (-1); 1175 1176 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 1177 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 1178 zonecfg_strerror(err)); 1179 1180 zonecfg_fini_handle(snap_hndl); 1181 snap_hndl = NULL; 1182 1183 return (0); 1184 } 1185 1186 /* 1187 * Generate AUE_zone_state for a command that boots a zone. 1188 */ 1189 static void 1190 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val, 1191 char *new_state) 1192 { 1193 adt_session_data_t *ah; 1194 adt_event_data_t *event; 1195 int pass_fail, fail_reason; 1196 1197 if (!adt_audit_enabled()) 1198 return; 1199 1200 if (return_val == 0) { 1201 pass_fail = ADT_SUCCESS; 1202 fail_reason = ADT_SUCCESS; 1203 } else { 1204 pass_fail = ADT_FAILURE; 1205 fail_reason = ADT_FAIL_VALUE_PROGRAM; 1206 } 1207 1208 if (adt_start_session(&ah, NULL, 0)) { 1209 zerror(zlogp, B_TRUE, gettext("audit failure.")); 1210 return; 1211 } 1212 if (adt_set_from_ucred(ah, uc, ADT_NEW)) { 1213 zerror(zlogp, B_TRUE, gettext("audit failure.")); 1214 (void) adt_end_session(ah); 1215 return; 1216 } 1217 1218 event = adt_alloc_event(ah, ADT_zone_state); 1219 if (event == NULL) { 1220 zerror(zlogp, B_TRUE, gettext("audit failure.")); 1221 (void) adt_end_session(ah); 1222 return; 1223 } 1224 event->adt_zone_state.zonename = zone_name; 1225 event->adt_zone_state.new_state = new_state; 1226 1227 if (adt_put_event(event, pass_fail, fail_reason)) 1228 zerror(zlogp, B_TRUE, gettext("audit failure.")); 1229 1230 adt_free_event(event); 1231 1232 (void) adt_end_session(ah); 1233 } 1234 1235 /* 1236 * The main routine for the door server that deals with zone state transitions. 1237 */ 1238 /* ARGSUSED */ 1239 static void 1240 server(void *cookie, char *args, size_t alen, door_desc_t *dp, 1241 uint_t n_desc) 1242 { 1243 ucred_t *uc = NULL; 1244 const priv_set_t *eset; 1245 1246 zone_state_t zstate; 1247 zone_cmd_t cmd; 1248 boolean_t debug; 1249 zone_cmd_arg_t *zargp; 1250 1251 boolean_t kernelcall; 1252 1253 int rval = -1; 1254 uint64_t uniqid; 1255 zoneid_t zoneid = -1; 1256 zlog_t zlog; 1257 zlog_t *zlogp; 1258 zone_cmd_rval_t *rvalp; 1259 size_t rlen = getpagesize(); /* conservative */ 1260 fs_callback_t cb; 1261 brand_handle_t bh; 1262 1263 /* LINTED E_BAD_PTR_CAST_ALIGN */ 1264 zargp = (zone_cmd_arg_t *)args; 1265 1266 /* 1267 * When we get the door unref message, we've fdetach'd the door, and 1268 * it is time for us to shut down zoneadmd. 1269 */ 1270 if (zargp == DOOR_UNREF_DATA) { 1271 /* 1272 * See comment at end of main() for info on the last rites. 1273 */ 1274 exit(0); 1275 } 1276 1277 if (zargp == NULL) { 1278 (void) door_return(NULL, 0, 0, 0); 1279 } 1280 1281 rvalp = alloca(rlen); 1282 bzero(rvalp, rlen); 1283 zlog.logfile = NULL; 1284 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1; 1285 zlog.buf = rvalp->errbuf; 1286 zlog.log = zlog.buf; 1287 /* defer initialization of zlog.locale until after credential check */ 1288 zlogp = &zlog; 1289 1290 if (alen != sizeof (zone_cmd_arg_t)) { 1291 /* 1292 * This really shouldn't be happening. 1293 */ 1294 zerror(&logsys, B_FALSE, "argument size (%d bytes) " 1295 "unexpected (expected %d bytes)", alen, 1296 sizeof (zone_cmd_arg_t)); 1297 goto out; 1298 } 1299 cmd = zargp->cmd; 1300 debug = zargp->debug; 1301 1302 if (door_ucred(&uc) != 0) { 1303 zerror(&logsys, B_TRUE, "door_ucred"); 1304 goto out; 1305 } 1306 eset = ucred_getprivset(uc, PRIV_EFFECTIVE); 1307 if (ucred_getzoneid(uc) != GLOBAL_ZONEID || 1308 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) : 1309 ucred_geteuid(uc) != 0)) { 1310 zerror(&logsys, B_FALSE, "insufficient privileges"); 1311 goto out; 1312 } 1313 1314 kernelcall = ucred_getpid(uc) == 0; 1315 1316 /* 1317 * This is safe because we only use a zlog_t throughout the 1318 * duration of a door call; i.e., by the time the pointer 1319 * might become invalid, the door call would be over. 1320 */ 1321 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale; 1322 1323 (void) mutex_lock(&lock); 1324 1325 /* 1326 * Once we start to really die off, we don't want more connections. 1327 */ 1328 if (in_death_throes) { 1329 (void) mutex_unlock(&lock); 1330 ucred_free(uc); 1331 (void) door_return(NULL, 0, 0, 0); 1332 thr_exit(NULL); 1333 } 1334 1335 /* 1336 * Check for validity of command. 1337 */ 1338 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT && 1339 cmd != Z_REBOOT && cmd != Z_HALT && cmd != Z_NOTE_UNINSTALLING && 1340 cmd != Z_MOUNT && cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) { 1341 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd); 1342 goto out; 1343 } 1344 1345 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) { 1346 /* 1347 * Can't happen 1348 */ 1349 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d", 1350 cmd); 1351 goto out; 1352 } 1353 /* 1354 * We ignore the possibility of someone calling zone_create(2) 1355 * explicitly; all requests must come through zoneadmd. 1356 */ 1357 if (zone_get_state(zone_name, &zstate) != Z_OK) { 1358 /* 1359 * Something terribly wrong happened 1360 */ 1361 zerror(&logsys, B_FALSE, "unable to determine state of zone"); 1362 goto out; 1363 } 1364 1365 if (kernelcall) { 1366 /* 1367 * Kernel-initiated requests may lose their validity if the 1368 * zone_t the kernel was referring to has gone away. 1369 */ 1370 if ((zoneid = getzoneidbyname(zone_name)) == -1 || 1371 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid, 1372 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) { 1373 /* 1374 * We're not talking about the same zone. The request 1375 * must have arrived too late. Return error. 1376 */ 1377 rval = -1; 1378 goto out; 1379 } 1380 zlogp = &logsys; /* Log errors to syslog */ 1381 } 1382 1383 /* 1384 * If we are being asked to forcibly mount or boot a zone, we 1385 * pretend that an INCOMPLETE zone is actually INSTALLED. 1386 */ 1387 if (zstate == ZONE_STATE_INCOMPLETE && 1388 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT)) 1389 zstate = ZONE_STATE_INSTALLED; 1390 1391 switch (zstate) { 1392 case ZONE_STATE_CONFIGURED: 1393 case ZONE_STATE_INCOMPLETE: 1394 /* 1395 * Not our area of expertise; we just print a nice message 1396 * and die off. 1397 */ 1398 zerror(zlogp, B_FALSE, 1399 "%s operation is invalid for zones in state '%s'", 1400 z_cmd_name(cmd), zone_state_str(zstate)); 1401 break; 1402 1403 case ZONE_STATE_INSTALLED: 1404 switch (cmd) { 1405 case Z_READY: 1406 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate, debug); 1407 if (rval == 0) 1408 eventstream_write(Z_EVT_ZONE_READIED); 1409 break; 1410 case Z_BOOT: 1411 case Z_FORCEBOOT: 1412 eventstream_write(Z_EVT_ZONE_BOOTING); 1413 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate, 1414 debug)) == 0) { 1415 rval = zone_bootup(zlogp, zargp->bootbuf, 1416 zstate, debug); 1417 } 1418 audit_put_record(zlogp, uc, rval, "boot"); 1419 if (rval != 0) { 1420 bringup_failure_recovery = B_TRUE; 1421 (void) zone_halt(zlogp, B_FALSE, B_FALSE, 1422 zstate, debug); 1423 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1424 } 1425 break; 1426 case Z_HALT: 1427 if (kernelcall) /* Invalid; can't happen */ 1428 abort(); 1429 /* 1430 * We could have two clients racing to halt this 1431 * zone; the second client loses, but his request 1432 * doesn't fail, since the zone is now in the desired 1433 * state. 1434 */ 1435 zerror(zlogp, B_FALSE, "zone is already halted"); 1436 rval = 0; 1437 break; 1438 case Z_REBOOT: 1439 if (kernelcall) /* Invalid; can't happen */ 1440 abort(); 1441 zerror(zlogp, B_FALSE, "%s operation is invalid " 1442 "for zones in state '%s'", z_cmd_name(cmd), 1443 zone_state_str(zstate)); 1444 rval = -1; 1445 break; 1446 case Z_NOTE_UNINSTALLING: 1447 if (kernelcall) /* Invalid; can't happen */ 1448 abort(); 1449 /* 1450 * Tell the console to print out a message about this. 1451 * Once it does, we will be in_death_throes. 1452 */ 1453 eventstream_write(Z_EVT_ZONE_UNINSTALLING); 1454 break; 1455 case Z_MOUNT: 1456 case Z_FORCEMOUNT: 1457 if (kernelcall) /* Invalid; can't happen */ 1458 abort(); 1459 if (!zone_isnative && !zone_iscluster && 1460 !zone_islabeled) { 1461 /* 1462 * -U mounts the zone without lofs mounting 1463 * zone file systems back into the scratch 1464 * zone. This is required when mounting 1465 * non-native branded zones. 1466 */ 1467 (void) strlcpy(zargp->bootbuf, "-U", 1468 BOOTARGS_MAX); 1469 } 1470 1471 rval = zone_ready(zlogp, 1472 strcmp(zargp->bootbuf, "-U") == 0 ? 1473 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate, debug); 1474 if (rval != 0) 1475 break; 1476 1477 eventstream_write(Z_EVT_ZONE_READIED); 1478 1479 /* 1480 * Get a handle to the default brand info. 1481 * We must always use the default brand file system 1482 * list when mounting the zone. 1483 */ 1484 if ((bh = brand_open(default_brand)) == NULL) { 1485 rval = -1; 1486 break; 1487 } 1488 1489 /* 1490 * Get the list of filesystems to mount from 1491 * the brand configuration. These mounts are done 1492 * via a thread that will enter the zone, so they 1493 * are done from within the context of the zone. 1494 */ 1495 cb.zlogp = zlogp; 1496 cb.zoneid = zone_id; 1497 cb.mount_cmd = B_TRUE; 1498 rval = brand_platform_iter_mounts(bh, 1499 mount_early_fs, &cb); 1500 1501 brand_close(bh); 1502 1503 /* 1504 * Ordinarily, /dev/fd would be mounted inside the zone 1505 * by svc:/system/filesystem/usr:default, but since 1506 * we're not booting the zone, we need to do this 1507 * manually. 1508 */ 1509 if (rval == 0) 1510 rval = mount_early_fs(&cb, 1511 "fd", "/dev/fd", "fd", NULL); 1512 break; 1513 case Z_UNMOUNT: 1514 if (kernelcall) /* Invalid; can't happen */ 1515 abort(); 1516 zerror(zlogp, B_FALSE, "zone is already unmounted"); 1517 rval = 0; 1518 break; 1519 } 1520 break; 1521 1522 case ZONE_STATE_READY: 1523 switch (cmd) { 1524 case Z_READY: 1525 /* 1526 * We could have two clients racing to ready this 1527 * zone; the second client loses, but his request 1528 * doesn't fail, since the zone is now in the desired 1529 * state. 1530 */ 1531 zerror(zlogp, B_FALSE, "zone is already ready"); 1532 rval = 0; 1533 break; 1534 case Z_BOOT: 1535 case Z_FORCEBOOT: 1536 (void) strlcpy(boot_args, zargp->bootbuf, 1537 sizeof (boot_args)); 1538 eventstream_write(Z_EVT_ZONE_BOOTING); 1539 rval = zone_bootup(zlogp, zargp->bootbuf, zstate, 1540 debug); 1541 audit_put_record(zlogp, uc, rval, "boot"); 1542 if (rval != 0) { 1543 bringup_failure_recovery = B_TRUE; 1544 (void) zone_halt(zlogp, B_FALSE, B_TRUE, 1545 zstate, debug); 1546 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1547 } 1548 boot_args[0] = '\0'; 1549 break; 1550 case Z_HALT: 1551 if (kernelcall) /* Invalid; can't happen */ 1552 abort(); 1553 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate, 1554 debug)) != 0) 1555 break; 1556 eventstream_write(Z_EVT_ZONE_HALTED); 1557 break; 1558 case Z_REBOOT: 1559 case Z_NOTE_UNINSTALLING: 1560 case Z_MOUNT: 1561 case Z_FORCEMOUNT: 1562 case Z_UNMOUNT: 1563 if (kernelcall) /* Invalid; can't happen */ 1564 abort(); 1565 zerror(zlogp, B_FALSE, "%s operation is invalid " 1566 "for zones in state '%s'", z_cmd_name(cmd), 1567 zone_state_str(zstate)); 1568 rval = -1; 1569 break; 1570 } 1571 break; 1572 1573 case ZONE_STATE_MOUNTED: 1574 switch (cmd) { 1575 case Z_UNMOUNT: 1576 if (kernelcall) /* Invalid; can't happen */ 1577 abort(); 1578 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate, debug); 1579 if (rval == 0) { 1580 eventstream_write(Z_EVT_ZONE_HALTED); 1581 (void) sema_post(&scratch_sem); 1582 } 1583 break; 1584 default: 1585 if (kernelcall) /* Invalid; can't happen */ 1586 abort(); 1587 zerror(zlogp, B_FALSE, "%s operation is invalid " 1588 "for zones in state '%s'", z_cmd_name(cmd), 1589 zone_state_str(zstate)); 1590 rval = -1; 1591 break; 1592 } 1593 break; 1594 1595 case ZONE_STATE_RUNNING: 1596 case ZONE_STATE_SHUTTING_DOWN: 1597 case ZONE_STATE_DOWN: 1598 switch (cmd) { 1599 case Z_READY: 1600 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate, 1601 debug)) != 0) 1602 break; 1603 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate, 1604 debug)) == 0) 1605 eventstream_write(Z_EVT_ZONE_READIED); 1606 else 1607 eventstream_write(Z_EVT_ZONE_HALTED); 1608 break; 1609 case Z_BOOT: 1610 case Z_FORCEBOOT: 1611 /* 1612 * We could have two clients racing to boot this 1613 * zone; the second client loses, but his request 1614 * doesn't fail, since the zone is now in the desired 1615 * state. 1616 */ 1617 zerror(zlogp, B_FALSE, "zone is already booted"); 1618 rval = 0; 1619 break; 1620 case Z_HALT: 1621 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate, 1622 debug)) != 0) 1623 break; 1624 eventstream_write(Z_EVT_ZONE_HALTED); 1625 break; 1626 case Z_REBOOT: 1627 (void) strlcpy(boot_args, zargp->bootbuf, 1628 sizeof (boot_args)); 1629 eventstream_write(Z_EVT_ZONE_REBOOTING); 1630 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate, 1631 debug)) != 0) { 1632 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1633 boot_args[0] = '\0'; 1634 break; 1635 } 1636 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate, 1637 debug)) != 0) { 1638 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1639 boot_args[0] = '\0'; 1640 break; 1641 } 1642 rval = zone_bootup(zlogp, zargp->bootbuf, zstate, 1643 debug); 1644 audit_put_record(zlogp, uc, rval, "reboot"); 1645 if (rval != 0) { 1646 (void) zone_halt(zlogp, B_FALSE, B_TRUE, 1647 zstate, debug); 1648 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1649 } 1650 boot_args[0] = '\0'; 1651 break; 1652 case Z_NOTE_UNINSTALLING: 1653 case Z_MOUNT: 1654 case Z_FORCEMOUNT: 1655 case Z_UNMOUNT: 1656 zerror(zlogp, B_FALSE, "%s operation is invalid " 1657 "for zones in state '%s'", z_cmd_name(cmd), 1658 zone_state_str(zstate)); 1659 rval = -1; 1660 break; 1661 } 1662 break; 1663 default: 1664 abort(); 1665 } 1666 1667 /* 1668 * Because the state of the zone may have changed, we make sure 1669 * to wake the console poller, which is in charge of initiating 1670 * the shutdown procedure as necessary. 1671 */ 1672 eventstream_write(Z_EVT_NULL); 1673 1674 out: 1675 (void) mutex_unlock(&lock); 1676 if (kernelcall) { 1677 rvalp = NULL; 1678 rlen = 0; 1679 } else { 1680 rvalp->rval = rval; 1681 } 1682 if (uc != NULL) 1683 ucred_free(uc); 1684 (void) door_return((char *)rvalp, rlen, NULL, 0); 1685 thr_exit(NULL); 1686 } 1687 1688 static int 1689 setup_door(zlog_t *zlogp) 1690 { 1691 if ((zone_door = door_create(server, NULL, 1692 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) { 1693 zerror(zlogp, B_TRUE, "%s failed", "door_create"); 1694 return (-1); 1695 } 1696 (void) fdetach(zone_door_path); 1697 1698 if (fattach(zone_door, zone_door_path) != 0) { 1699 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path); 1700 (void) door_revoke(zone_door); 1701 (void) fdetach(zone_door_path); 1702 zone_door = -1; 1703 return (-1); 1704 } 1705 return (0); 1706 } 1707 1708 /* 1709 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this 1710 * is where zoneadmd itself will check to see that another instance of 1711 * zoneadmd isn't already controlling this zone. 1712 * 1713 * The idea here is that we want to open the path to which we will 1714 * attach our door, lock it, and then make sure that no-one has beat us 1715 * to fattach(3c)ing onto it. 1716 * 1717 * fattach(3c) is really a mount, so there are actually two possible 1718 * vnodes we could be dealing with. Our strategy is as follows: 1719 * 1720 * - If the file we opened is a regular file (common case): 1721 * There is no fattach(3c)ed door, so we have a chance of becoming 1722 * the managing zoneadmd. We attempt to lock the file: if it is 1723 * already locked, that means someone else raced us here, so we 1724 * lose and give up. zoneadm(1m) will try to contact the zoneadmd 1725 * that beat us to it. 1726 * 1727 * - If the file we opened is a namefs file: 1728 * This means there is already an established door fattach(3c)'ed 1729 * to the rendezvous path. We've lost the race, so we give up. 1730 * Note that in this case we also try to grab the file lock, and 1731 * will succeed in acquiring it since the vnode locked by the 1732 * "winning" zoneadmd was a regular one, and the one we locked was 1733 * the fattach(3c)'ed door node. At any rate, no harm is done, and 1734 * we just return to zoneadm(1m) which knows to retry. 1735 */ 1736 static int 1737 make_daemon_exclusive(zlog_t *zlogp) 1738 { 1739 int doorfd = -1; 1740 int err, ret = -1; 1741 struct stat st; 1742 struct flock flock; 1743 zone_state_t zstate; 1744 1745 top: 1746 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1747 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1748 zonecfg_strerror(err)); 1749 goto out; 1750 } 1751 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR, 1752 S_IREAD|S_IWRITE)) < 0) { 1753 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path); 1754 goto out; 1755 } 1756 if (fstat(doorfd, &st) < 0) { 1757 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path); 1758 goto out; 1759 } 1760 /* 1761 * Lock the file to synchronize with other zoneadmd 1762 */ 1763 flock.l_type = F_WRLCK; 1764 flock.l_whence = SEEK_SET; 1765 flock.l_start = (off_t)0; 1766 flock.l_len = (off_t)0; 1767 if (fcntl(doorfd, F_SETLK, &flock) < 0) { 1768 /* 1769 * Someone else raced us here and grabbed the lock file 1770 * first. A warning here is inappropriate since nothing 1771 * went wrong. 1772 */ 1773 goto out; 1774 } 1775 1776 if (strcmp(st.st_fstype, "namefs") == 0) { 1777 struct door_info info; 1778 1779 /* 1780 * There is already something fattach()'ed to this file. 1781 * Lets see what the door is up to. 1782 */ 1783 if (door_info(doorfd, &info) == 0 && info.di_target != -1) { 1784 /* 1785 * Another zoneadmd process seems to be in 1786 * control of the situation and we don't need to 1787 * be here. A warning here is inappropriate 1788 * since nothing went wrong. 1789 * 1790 * If the door has been revoked, the zoneadmd 1791 * process currently managing the zone is going 1792 * away. We'll return control to zoneadm(1m) 1793 * which will try again (by which time zoneadmd 1794 * will hopefully have exited). 1795 */ 1796 goto out; 1797 } 1798 1799 /* 1800 * If we got this far, there's a fattach(3c)'ed door 1801 * that belongs to a process that has exited, which can 1802 * happen if the previous zoneadmd died unexpectedly. 1803 * 1804 * Let user know that something is amiss, but that we can 1805 * recover; if the zone is in the installed state, then don't 1806 * message, since having a running zoneadmd isn't really 1807 * expected/needed. We want to keep occurences of this message 1808 * limited to times when zoneadmd is picking back up from a 1809 * zoneadmd that died while the zone was in some non-trivial 1810 * state. 1811 */ 1812 if (zstate > ZONE_STATE_INSTALLED) { 1813 static zoneid_t zid; 1814 1815 zerror(zlogp, B_FALSE, 1816 "zone '%s': WARNING: zone is in state '%s', but " 1817 "zoneadmd does not appear to be available; " 1818 "restarted zoneadmd to recover.", 1819 zone_name, zone_state_str(zstate)); 1820 1821 /* 1822 * Startup a thread to perform memory capping for the 1823 * zone. 1824 */ 1825 if ((zid = getzoneidbyname(zone_name)) != -1) 1826 create_mcap_thread(zlogp, zid); 1827 1828 /* recover the global configuration snapshot */ 1829 if (snap_hndl == NULL) { 1830 if ((snap_hndl = zonecfg_init_handle()) 1831 == NULL || 1832 zonecfg_create_snapshot(zone_name) 1833 != Z_OK || 1834 zonecfg_get_snapshot_handle(zone_name, 1835 snap_hndl) != Z_OK) { 1836 zerror(zlogp, B_FALSE, "recovering " 1837 "zone configuration handle"); 1838 goto out; 1839 } 1840 } 1841 } 1842 1843 (void) fdetach(zone_door_path); 1844 (void) close(doorfd); 1845 goto top; 1846 } 1847 ret = 0; 1848 out: 1849 (void) close(doorfd); 1850 return (ret); 1851 } 1852 1853 /* 1854 * Run the query hook with the 'env' parameter. It should return a 1855 * string of tab-delimited key-value pairs, each of which should be set 1856 * in the environment. 1857 * 1858 * Because the env_vars string values become part of the environment, the 1859 * string is static and we don't free it. 1860 */ 1861 static int 1862 set_brand_env(zlog_t *zlogp) 1863 { 1864 int ret = 0; 1865 int err; 1866 boolean_t snapped = B_FALSE; 1867 static char *env_vars = NULL; 1868 char buf[2 * MAXPATHLEN]; 1869 1870 if (query_hook[0] == '\0' || env_vars != NULL) 1871 return (0); 1872 1873 if (snprintf(buf, sizeof (buf), "%s env", query_hook) > sizeof (buf)) 1874 return (-1); 1875 1876 if (snap_hndl == NULL) { 1877 if ((snap_hndl = zonecfg_init_handle()) == NULL) { 1878 zerror(zlogp, B_TRUE, 1879 "getting zone configuration handle"); 1880 return (-1); 1881 } 1882 snapped = B_TRUE; 1883 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) { 1884 zerror(zlogp, B_FALSE, "unable to create snapshot: %s", 1885 zonecfg_strerror(err)); 1886 ret = -1; 1887 goto done; 1888 } 1889 if ((err = zonecfg_get_snapshot_handle(zone_name, snap_hndl)) 1890 != Z_OK) { 1891 zerror(zlogp, B_FALSE, "unable to get snapshot: %s", 1892 zonecfg_strerror(err)); 1893 ret = -1; 1894 goto done; 1895 } 1896 } 1897 1898 if (do_subproc(zlogp, buf, &env_vars, B_FALSE) != 0) { 1899 ret = -1; 1900 goto done; 1901 } 1902 1903 if (env_vars != NULL) { 1904 char *sp; 1905 1906 sp = strtok(env_vars, "\t"); 1907 while (sp != NULL) { 1908 if (putenv(sp) != 0) { 1909 ret = -1; 1910 break; 1911 } 1912 sp = strtok(NULL, "\t"); 1913 } 1914 } 1915 1916 done: 1917 if (snapped) { 1918 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 1919 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 1920 zonecfg_strerror(err)); 1921 1922 zonecfg_fini_handle(snap_hndl); 1923 snap_hndl = NULL; 1924 } 1925 1926 return (ret); 1927 } 1928 1929 /* 1930 * Setup the brand's pre and post state change callbacks, as well as the 1931 * query callback, if any of these exist. 1932 */ 1933 static int 1934 brand_callback_init(brand_handle_t bh, char *zone_name) 1935 { 1936 char zpath[MAXPATHLEN]; 1937 1938 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) 1939 return (-1); 1940 1941 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX, 1942 sizeof (pre_statechg_hook)); 1943 1944 if (brand_get_prestatechange(bh, zone_name, zpath, 1945 pre_statechg_hook + EXEC_LEN, 1946 sizeof (pre_statechg_hook) - EXEC_LEN) != 0) 1947 return (-1); 1948 1949 if (strlen(pre_statechg_hook) <= EXEC_LEN) 1950 pre_statechg_hook[0] = '\0'; 1951 1952 (void) strlcpy(post_statechg_hook, EXEC_PREFIX, 1953 sizeof (post_statechg_hook)); 1954 1955 if (brand_get_poststatechange(bh, zone_name, zpath, 1956 post_statechg_hook + EXEC_LEN, 1957 sizeof (post_statechg_hook) - EXEC_LEN) != 0) 1958 return (-1); 1959 1960 if (strlen(post_statechg_hook) <= EXEC_LEN) 1961 post_statechg_hook[0] = '\0'; 1962 1963 (void) strlcpy(query_hook, EXEC_PREFIX, 1964 sizeof (query_hook)); 1965 1966 if (brand_get_query(bh, zone_name, zpath, query_hook + EXEC_LEN, 1967 sizeof (query_hook) - EXEC_LEN) != 0) 1968 return (-1); 1969 1970 if (strlen(query_hook) <= EXEC_LEN) 1971 query_hook[0] = '\0'; 1972 1973 return (0); 1974 } 1975 1976 int 1977 main(int argc, char *argv[]) 1978 { 1979 int opt; 1980 zoneid_t zid; 1981 priv_set_t *privset; 1982 zone_state_t zstate; 1983 char parents_locale[MAXPATHLEN]; 1984 brand_handle_t bh; 1985 int err; 1986 1987 pid_t pid; 1988 sigset_t blockset; 1989 sigset_t block_cld; 1990 1991 struct { 1992 sema_t sem; 1993 int status; 1994 zlog_t log; 1995 } *shstate; 1996 size_t shstatelen = getpagesize(); 1997 1998 zlog_t errlog; 1999 zlog_t *zlogp; 2000 2001 int ctfd; 2002 2003 progname = get_execbasename(argv[0]); 2004 2005 /* 2006 * Make sure stderr is unbuffered 2007 */ 2008 (void) setbuffer(stderr, NULL, 0); 2009 2010 /* 2011 * Get out of the way of mounted filesystems, since we will daemonize 2012 * soon. 2013 */ 2014 (void) chdir("/"); 2015 2016 /* 2017 * Use the default system umask per PSARC 1998/110 rather than 2018 * anything that may have been set by the caller. 2019 */ 2020 (void) umask(CMASK); 2021 2022 /* 2023 * Initially we want to use our parent's locale. 2024 */ 2025 (void) setlocale(LC_ALL, ""); 2026 (void) textdomain(TEXT_DOMAIN); 2027 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL), 2028 sizeof (parents_locale)); 2029 2030 /* 2031 * This zlog_t is used for writing to stderr 2032 */ 2033 errlog.logfile = stderr; 2034 errlog.buflen = errlog.loglen = 0; 2035 errlog.buf = errlog.log = NULL; 2036 errlog.locale = parents_locale; 2037 2038 /* 2039 * We start off writing to stderr until we're ready to daemonize. 2040 */ 2041 zlogp = &errlog; 2042 2043 /* 2044 * Process options. 2045 */ 2046 while ((opt = getopt(argc, argv, "R:z:")) != EOF) { 2047 switch (opt) { 2048 case 'R': 2049 zonecfg_set_root(optarg); 2050 break; 2051 case 'z': 2052 zone_name = optarg; 2053 break; 2054 default: 2055 usage(); 2056 } 2057 } 2058 2059 if (zone_name == NULL) 2060 usage(); 2061 2062 /* 2063 * Because usage() prints directly to stderr, it has gettext() 2064 * wrapping, which depends on the locale. But since zerror() calls 2065 * localize() which tweaks the locale, it is not safe to call zerror() 2066 * until after the last call to usage(). Fortunately, the last call 2067 * to usage() is just above and the first call to zerror() is just 2068 * below. Don't mess this up. 2069 */ 2070 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) { 2071 zerror(zlogp, B_FALSE, "cannot manage the %s zone", 2072 GLOBAL_ZONENAME); 2073 return (1); 2074 } 2075 2076 if (zone_get_id(zone_name, &zid) != 0) { 2077 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name, 2078 zonecfg_strerror(Z_NO_ZONE)); 2079 return (1); 2080 } 2081 2082 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 2083 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 2084 zonecfg_strerror(err)); 2085 return (1); 2086 } 2087 if (zstate < ZONE_STATE_INCOMPLETE) { 2088 zerror(zlogp, B_FALSE, 2089 "cannot manage a zone which is in state '%s'", 2090 zone_state_str(zstate)); 2091 return (1); 2092 } 2093 2094 if (zonecfg_default_brand(default_brand, 2095 sizeof (default_brand)) != Z_OK) { 2096 zerror(zlogp, B_FALSE, "unable to determine default brand"); 2097 return (1); 2098 } 2099 2100 /* Get a handle to the brand info for this zone */ 2101 if (zone_get_brand(zone_name, brand_name, sizeof (brand_name)) 2102 != Z_OK) { 2103 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 2104 return (1); 2105 } 2106 zone_isnative = (strcmp(brand_name, NATIVE_BRAND_NAME) == 0); 2107 zone_islabeled = (strcmp(brand_name, LABELED_BRAND_NAME) == 0); 2108 2109 /* 2110 * In the alternate root environment, the only supported 2111 * operations are mount and unmount. In this case, just treat 2112 * the zone as native if it is cluster. Cluster zones can be 2113 * native for the purpose of LU or upgrade, and the cluster 2114 * brand may not exist in the miniroot (such as in net install 2115 * upgrade). 2116 */ 2117 if (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0) { 2118 zone_iscluster = B_TRUE; 2119 if (zonecfg_in_alt_root()) { 2120 (void) strlcpy(brand_name, default_brand, 2121 sizeof (brand_name)); 2122 } 2123 } else { 2124 zone_iscluster = B_FALSE; 2125 } 2126 2127 if ((bh = brand_open(brand_name)) == NULL) { 2128 zerror(zlogp, B_FALSE, "unable to open zone brand"); 2129 return (1); 2130 } 2131 2132 /* Get state change brand hooks. */ 2133 if (brand_callback_init(bh, zone_name) == -1) { 2134 zerror(zlogp, B_TRUE, 2135 "failed to initialize brand state change hooks"); 2136 brand_close(bh); 2137 return (1); 2138 } 2139 2140 brand_close(bh); 2141 2142 /* 2143 * Check that we have all privileges. It would be nice to pare 2144 * this down, but this is at least a first cut. 2145 */ 2146 if ((privset = priv_allocset()) == NULL) { 2147 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 2148 return (1); 2149 } 2150 2151 if (getppriv(PRIV_EFFECTIVE, privset) != 0) { 2152 zerror(zlogp, B_TRUE, "%s failed", "getppriv"); 2153 priv_freeset(privset); 2154 return (1); 2155 } 2156 2157 if (priv_isfullset(privset) == B_FALSE) { 2158 zerror(zlogp, B_FALSE, "You lack sufficient privilege to " 2159 "run this command (all privs required)"); 2160 priv_freeset(privset); 2161 return (1); 2162 } 2163 priv_freeset(privset); 2164 2165 if (set_brand_env(zlogp) != 0) { 2166 zerror(zlogp, B_FALSE, "Unable to setup brand's environment"); 2167 return (1); 2168 } 2169 2170 if (mkzonedir(zlogp) != 0) 2171 return (1); 2172 2173 /* 2174 * Pre-fork: setup shared state 2175 */ 2176 if ((shstate = (void *)mmap(NULL, shstatelen, 2177 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) == 2178 MAP_FAILED) { 2179 zerror(zlogp, B_TRUE, "%s failed", "mmap"); 2180 return (1); 2181 } 2182 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) { 2183 zerror(zlogp, B_TRUE, "%s failed", "sema_init()"); 2184 (void) munmap((char *)shstate, shstatelen); 2185 return (1); 2186 } 2187 shstate->log.logfile = NULL; 2188 shstate->log.buflen = shstatelen - sizeof (*shstate); 2189 shstate->log.loglen = shstate->log.buflen; 2190 shstate->log.buf = (char *)shstate + sizeof (*shstate); 2191 shstate->log.log = shstate->log.buf; 2192 shstate->log.locale = parents_locale; 2193 shstate->status = -1; 2194 2195 /* 2196 * We need a SIGCHLD handler so the sema_wait() below will wake 2197 * up if the child dies without doing a sema_post(). 2198 */ 2199 (void) sigset(SIGCHLD, sigchld); 2200 /* 2201 * We must mask SIGCHLD until after we've coped with the fork 2202 * sufficiently to deal with it; otherwise we can race and 2203 * receive the signal before pid has been initialized 2204 * (yes, this really happens). 2205 */ 2206 (void) sigemptyset(&block_cld); 2207 (void) sigaddset(&block_cld, SIGCHLD); 2208 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL); 2209 2210 if ((ctfd = init_template()) == -1) { 2211 zerror(zlogp, B_TRUE, "failed to create contract"); 2212 return (1); 2213 } 2214 2215 /* 2216 * Do not let another thread localize a message while we are forking. 2217 */ 2218 (void) mutex_lock(&msglock); 2219 pid = fork(); 2220 (void) mutex_unlock(&msglock); 2221 2222 /* 2223 * In all cases (parent, child, and in the event of an error) we 2224 * don't want to cause creation of contracts on subsequent fork()s. 2225 */ 2226 (void) ct_tmpl_clear(ctfd); 2227 (void) close(ctfd); 2228 2229 if (pid == -1) { 2230 zerror(zlogp, B_TRUE, "could not fork"); 2231 return (1); 2232 2233 } else if (pid > 0) { /* parent */ 2234 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 2235 /* 2236 * This marks a window of vulnerability in which we receive 2237 * the SIGCLD before falling into sema_wait (normally we would 2238 * get woken up from sema_wait with EINTR upon receipt of 2239 * SIGCLD). So we may need to use some other scheme like 2240 * sema_posting in the sigcld handler. 2241 * blech 2242 */ 2243 (void) sema_wait(&shstate->sem); 2244 (void) sema_destroy(&shstate->sem); 2245 if (shstate->status != 0) 2246 (void) waitpid(pid, NULL, WNOHANG); 2247 /* 2248 * It's ok if we die with SIGPIPE. It's not like we could have 2249 * done anything about it. 2250 */ 2251 (void) fprintf(stderr, "%s", shstate->log.buf); 2252 _exit(shstate->status == 0 ? 0 : 1); 2253 } 2254 2255 /* 2256 * The child charges on. 2257 */ 2258 (void) sigset(SIGCHLD, SIG_DFL); 2259 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 2260 2261 /* 2262 * SIGPIPE can be delivered if we write to a socket for which the 2263 * peer endpoint is gone. That can lead to too-early termination 2264 * of zoneadmd, and that's not good eats. 2265 */ 2266 (void) sigset(SIGPIPE, SIG_IGN); 2267 /* 2268 * Stop using stderr 2269 */ 2270 zlogp = &shstate->log; 2271 2272 /* 2273 * We don't need stdout/stderr from now on. 2274 */ 2275 closefrom(0); 2276 2277 /* 2278 * Initialize the syslog zlog_t. This needs to be done after 2279 * the call to closefrom(). 2280 */ 2281 logsys.buf = logsys.log = NULL; 2282 logsys.buflen = logsys.loglen = 0; 2283 logsys.logfile = NULL; 2284 logsys.locale = DEFAULT_LOCALE; 2285 2286 openlog("zoneadmd", LOG_PID, LOG_DAEMON); 2287 2288 /* 2289 * The eventstream is used to publish state changes in the zone 2290 * from the door threads to the console I/O poller. 2291 */ 2292 if (eventstream_init() == -1) { 2293 zerror(zlogp, B_TRUE, "unable to create eventstream"); 2294 goto child_out; 2295 } 2296 2297 (void) snprintf(zone_door_path, sizeof (zone_door_path), 2298 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name); 2299 2300 /* 2301 * See if another zoneadmd is running for this zone. If not, then we 2302 * can now modify system state. 2303 */ 2304 if (make_daemon_exclusive(zlogp) == -1) 2305 goto child_out; 2306 2307 2308 /* 2309 * Create/join a new session; we need to be careful of what we do with 2310 * the console from now on so we don't end up being the session leader 2311 * for the terminal we're going to be handing out. 2312 */ 2313 (void) setsid(); 2314 2315 /* 2316 * This thread shouldn't be receiving any signals; in particular, 2317 * SIGCHLD should be received by the thread doing the fork(). 2318 */ 2319 (void) sigfillset(&blockset); 2320 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL); 2321 2322 /* 2323 * Setup the console device and get ready to serve the console; 2324 * once this has completed, we're ready to let console clients 2325 * make an attempt to connect (they will block until 2326 * serve_console_sock() below gets called, and any pending 2327 * connection is accept()ed). 2328 */ 2329 if (!zonecfg_in_alt_root() && init_console(zlogp) < 0) 2330 goto child_out; 2331 2332 /* 2333 * Take the lock now, so that when the door server gets going, we 2334 * are guaranteed that it won't take a request until we are sure 2335 * that everything is completely set up. See the child_out: label 2336 * below to see why this matters. 2337 */ 2338 (void) mutex_lock(&lock); 2339 2340 /* Init semaphore for scratch zones. */ 2341 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) { 2342 zerror(zlogp, B_TRUE, 2343 "failed to initialize semaphore for scratch zone"); 2344 goto child_out; 2345 } 2346 2347 /* open the dladm handle */ 2348 if (dladm_open(&dld_handle) != DLADM_STATUS_OK) { 2349 zerror(zlogp, B_FALSE, "failed to open dladm handle"); 2350 goto child_out; 2351 } 2352 2353 /* 2354 * Note: door setup must occur *after* the console is setup. 2355 * This is so that as zlogin tests the door to see if zoneadmd 2356 * is ready yet, we know that the console will get serviced 2357 * once door_info() indicates that the door is "up". 2358 */ 2359 if (setup_door(zlogp) == -1) 2360 goto child_out; 2361 2362 /* 2363 * Things seem OK so far; tell the parent process that we're done 2364 * with setup tasks. This will cause the parent to exit, signalling 2365 * to zoneadm, zlogin, or whatever forked it that we are ready to 2366 * service requests. 2367 */ 2368 shstate->status = 0; 2369 (void) sema_post(&shstate->sem); 2370 (void) munmap((char *)shstate, shstatelen); 2371 shstate = NULL; 2372 2373 (void) mutex_unlock(&lock); 2374 2375 /* 2376 * zlogp is now invalid, so reset it to the syslog logger. 2377 */ 2378 zlogp = &logsys; 2379 2380 /* 2381 * Now that we are free of any parents, switch to the default locale. 2382 */ 2383 (void) setlocale(LC_ALL, DEFAULT_LOCALE); 2384 2385 /* 2386 * At this point the setup portion of main() is basically done, so 2387 * we reuse this thread to manage the zone console. When 2388 * serve_console() has returned, we are past the point of no return 2389 * in the life of this zoneadmd. 2390 */ 2391 if (zonecfg_in_alt_root()) { 2392 /* 2393 * This is just awful, but mounted scratch zones don't (and 2394 * can't) have consoles. We just wait for unmount instead. 2395 */ 2396 while (sema_wait(&scratch_sem) == EINTR) 2397 ; 2398 } else { 2399 serve_console(zlogp); 2400 assert(in_death_throes); 2401 } 2402 2403 /* 2404 * This is the next-to-last part of the exit interlock. Upon calling 2405 * fdetach(), the door will go unreferenced; once any 2406 * outstanding requests (like the door thread doing Z_HALT) are 2407 * done, the door will get an UNREF notification; when it handles 2408 * the UNREF, the door server will cause the exit. It's possible 2409 * that fdetach() can fail because the file is in use, in which 2410 * case we'll retry the operation. 2411 */ 2412 assert(!MUTEX_HELD(&lock)); 2413 for (;;) { 2414 if ((fdetach(zone_door_path) == 0) || (errno != EBUSY)) 2415 break; 2416 yield(); 2417 } 2418 2419 for (;;) 2420 (void) pause(); 2421 2422 child_out: 2423 assert(pid == 0); 2424 if (shstate != NULL) { 2425 shstate->status = -1; 2426 (void) sema_post(&shstate->sem); 2427 (void) munmap((char *)shstate, shstatelen); 2428 } 2429 2430 /* 2431 * This might trigger an unref notification, but if so, 2432 * we are still holding the lock, so our call to exit will 2433 * ultimately win the race and will publish the right exit 2434 * code. 2435 */ 2436 if (zone_door != -1) { 2437 assert(MUTEX_HELD(&lock)); 2438 (void) door_revoke(zone_door); 2439 (void) fdetach(zone_door_path); 2440 } 2441 2442 if (dld_handle != NULL) 2443 dladm_close(dld_handle); 2444 2445 return (1); /* return from main() forcibly exits an MT process */ 2446 }