1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 * Copyright (c) 2011, Joyent Inc. All rights reserved.
27 */
28
29 /*
30 * zoneadmd manages zones; one zoneadmd process is launched for each
31 * non-global zone on the system. This daemon juggles four jobs:
32 *
33 * - Implement setup and teardown of the zone "virtual platform": mount and
34 * unmount filesystems; create and destroy network interfaces; communicate
35 * with devfsadmd to lay out devices for the zone; instantiate the zone
36 * console device; configure process runtime attributes such as resource
37 * controls, pool bindings, fine-grained privileges.
38 *
39 * - Launch the zone's init(1M) process.
40 *
41 * - Implement a door server; clients (like zoneadm) connect to the door
42 * server and request zone state changes. The kernel is also a client of
43 * this door server. A request to halt or reboot the zone which originates
44 * *inside* the zone results in a door upcall from the kernel into zoneadmd.
45 *
46 * One minor problem is that messages emitted by zoneadmd need to be passed
47 * back to the zoneadm process making the request. These messages need to
48 * be rendered in the client's locale; so, this is passed in as part of the
49 * request. The exception is the kernel upcall to zoneadmd, in which case
50 * messages are syslog'd.
51 *
52 * To make all of this work, the Makefile adds -a to xgettext to extract *all*
53 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those
54 * strings which do not need to be translated.
55 *
56 * - Act as a console server for zlogin -C processes; see comments in zcons.c
57 * for more information about the zone console architecture.
58 *
59 * DESIGN NOTES
60 *
61 * Restart:
62 * A chief design constraint of zoneadmd is that it should be restartable in
63 * the case that the administrator kills it off, or it suffers a fatal error,
64 * without the running zone being impacted; this is akin to being able to
65 * reboot the service processor of a server without affecting the OS instance.
66 */
67
68 #include <sys/param.h>
69 #include <sys/mman.h>
70 #include <sys/types.h>
71 #include <sys/stat.h>
72 #include <sys/sysmacros.h>
73
74 #include <bsm/adt.h>
75 #include <bsm/adt_event.h>
76
77 #include <alloca.h>
78 #include <assert.h>
79 #include <errno.h>
80 #include <door.h>
81 #include <fcntl.h>
82 #include <locale.h>
83 #include <signal.h>
84 #include <stdarg.h>
85 #include <stdio.h>
86 #include <stdlib.h>
87 #include <string.h>
88 #include <strings.h>
89 #include <synch.h>
90 #include <syslog.h>
91 #include <thread.h>
92 #include <unistd.h>
93 #include <wait.h>
94 #include <limits.h>
95 #include <zone.h>
96 #include <libbrand.h>
97 #include <sys/brand.h>
98 #include <libcontract.h>
99 #include <libcontract_priv.h>
100 #include <sys/brand.h>
101 #include <sys/contract/process.h>
102 #include <sys/ctfs.h>
103 #include <libdladm.h>
104 #include <sys/dls_mgmt.h>
105 #include <libscf.h>
106
107 #include <libzonecfg.h>
108 #include <zonestat_impl.h>
109 #include "zoneadmd.h"
110
111 static char *progname;
112 char *zone_name; /* zone which we are managing */
113 char pool_name[MAXNAMELEN];
114 char default_brand[MAXNAMELEN];
115 char brand_name[MAXNAMELEN];
116 boolean_t zone_isnative;
117 boolean_t zone_iscluster;
118 boolean_t zone_islabeled;
119 boolean_t shutdown_in_progress;
120 static zoneid_t zone_id;
121 static zoneid_t zone_did = 0;
122 dladm_handle_t dld_handle = NULL;
123
124 static char pre_statechg_hook[2 * MAXPATHLEN];
125 static char post_statechg_hook[2 * MAXPATHLEN];
126 char query_hook[2 * MAXPATHLEN];
127
128 zlog_t logsys;
129
130 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */
131 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */
132
133 static sema_t scratch_sem; /* for scratch zones */
134
135 static char zone_door_path[MAXPATHLEN];
136 static int zone_door = -1;
137
138 boolean_t in_death_throes = B_FALSE; /* daemon is dying */
139 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */
140
141 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */
142 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
143 #endif
144
145 #define DEFAULT_LOCALE "C"
146
147 static const char *
148 z_cmd_name(zone_cmd_t zcmd)
149 {
150 /* This list needs to match the enum in sys/zone.h */
151 static const char *zcmdstr[] = {
152 "ready", "boot", "forceboot", "reboot", "halt",
153 "note_uninstalling", "mount", "forcemount", "unmount",
154 "shutdown"
155 };
156
157 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr))
158 return ("unknown");
159 else
160 return (zcmdstr[(int)zcmd]);
161 }
162
163 static char *
164 get_execbasename(char *execfullname)
165 {
166 char *last_slash, *execbasename;
167
168 /* guard against '/' at end of command invocation */
169 for (;;) {
170 last_slash = strrchr(execfullname, '/');
171 if (last_slash == NULL) {
172 execbasename = execfullname;
173 break;
174 } else {
175 execbasename = last_slash + 1;
176 if (*execbasename == '\0') {
177 *last_slash = '\0';
178 continue;
179 }
180 break;
181 }
182 }
183 return (execbasename);
184 }
185
186 static void
187 usage(void)
188 {
189 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname);
190 (void) fprintf(stderr,
191 gettext("\tNote: %s should not be run directly.\n"), progname);
192 exit(2);
193 }
194
195 /* ARGSUSED */
196 static void
197 sigchld(int sig)
198 {
199 }
200
201 char *
202 localize_msg(char *locale, const char *msg)
203 {
204 char *out;
205
206 (void) mutex_lock(&msglock);
207 (void) setlocale(LC_MESSAGES, locale);
208 out = gettext(msg);
209 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE);
210 (void) mutex_unlock(&msglock);
211 return (out);
212 }
213
214 /* PRINTFLIKE3 */
215 void
216 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...)
217 {
218 va_list alist;
219 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */
220 char *bp;
221 int saved_errno = errno;
222
223 if (zlogp == NULL)
224 return;
225 if (zlogp == &logsys)
226 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ",
227 zone_name);
228 else
229 buf[0] = '\0';
230 bp = &(buf[strlen(buf)]);
231
232 /*
233 * In theory, the locale pointer should be set to either "C" or a
234 * char array, so it should never be NULL
235 */
236 assert(zlogp->locale != NULL);
237 /* Locale is per process, but we are multi-threaded... */
238 fmt = localize_msg(zlogp->locale, fmt);
239
240 va_start(alist, fmt);
241 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist);
242 va_end(alist);
243 bp = &(buf[strlen(buf)]);
244 if (use_strerror)
245 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s",
246 strerror(saved_errno));
247 if (zlogp == &logsys) {
248 (void) syslog(LOG_ERR, "%s", buf);
249 } else if (zlogp->logfile != NULL) {
250 (void) fprintf(zlogp->logfile, "%s\n", buf);
251 } else {
252 size_t buflen;
253 size_t copylen;
254
255 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf);
256 copylen = MIN(buflen, zlogp->loglen);
257 zlogp->log += copylen;
258 zlogp->loglen -= copylen;
259 }
260 }
261
262 /*
263 * Emit a warning for any boot arguments which are unrecognized. Since
264 * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we
265 * put the arguments into an argv style array, use getopt to process them,
266 * and put the resultant argument string back into outargs.
267 *
268 * During the filtering, we pull out any arguments which are truly "boot"
269 * arguments, leaving only those which are to be passed intact to the
270 * progenitor process. The one we support at the moment is -i, which
271 * indicates to the kernel which program should be launched as 'init'.
272 *
273 * A return of Z_INVAL indicates specifically that the arguments are
274 * not valid; this is a non-fatal error. Except for Z_OK, all other return
275 * values are treated as fatal.
276 */
277 static int
278 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
279 char *init_file, char *badarg)
280 {
281 int argc = 0, argc_save;
282 int i;
283 int err;
284 char *arg, *lasts, **argv = NULL, **argv_save;
285 char zonecfg_args[BOOTARGS_MAX];
286 char scratchargs[BOOTARGS_MAX], *sargs;
287 char c;
288
289 bzero(outargs, BOOTARGS_MAX);
290 bzero(badarg, BOOTARGS_MAX);
291
292 /*
293 * If the user didn't specify transient boot arguments, check
294 * to see if there were any specified in the zone configuration,
295 * and use them if applicable.
296 */
297 if (inargs == NULL || inargs[0] == '\0') {
298 zone_dochandle_t handle;
299 if ((handle = zonecfg_init_handle()) == NULL) {
300 zerror(zlogp, B_TRUE,
301 "getting zone configuration handle");
302 return (Z_BAD_HANDLE);
303 }
304 err = zonecfg_get_snapshot_handle(zone_name, handle);
305 if (err != Z_OK) {
306 zerror(zlogp, B_FALSE,
307 "invalid configuration snapshot");
308 zonecfg_fini_handle(handle);
309 return (Z_BAD_HANDLE);
310 }
311
312 bzero(zonecfg_args, sizeof (zonecfg_args));
313 (void) zonecfg_get_bootargs(handle, zonecfg_args,
314 sizeof (zonecfg_args));
315 inargs = zonecfg_args;
316 zonecfg_fini_handle(handle);
317 }
318
319 if (strlen(inargs) >= BOOTARGS_MAX) {
320 zerror(zlogp, B_FALSE, "boot argument string too long");
321 return (Z_INVAL);
322 }
323
324 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
325 sargs = scratchargs;
326 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
327 sargs = NULL;
328 argc++;
329 }
330
331 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) {
332 zerror(zlogp, B_FALSE, "memory allocation failed");
333 return (Z_NOMEM);
334 }
335
336 argv_save = argv;
337 argc_save = argc;
338
339 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
340 sargs = scratchargs;
341 i = 0;
342 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
343 sargs = NULL;
344 if ((argv[i] = strdup(arg)) == NULL) {
345 err = Z_NOMEM;
346 zerror(zlogp, B_FALSE, "memory allocation failed");
347 goto done;
348 }
349 i++;
350 }
351
352 /*
353 * We preserve compatibility with the Solaris system boot behavior,
354 * which allows:
355 *
356 * # reboot kernel/unix -s -m verbose
357 *
358 * In this example, kernel/unix tells the booter what file to
359 * boot. We don't want reboot in a zone to be gratuitously different,
360 * so we silently ignore the boot file, if necessary.
361 */
362 if (argv[0] == NULL)
363 goto done;
364
365 assert(argv[0][0] != ' ');
366 assert(argv[0][0] != '\t');
367
368 if (argv[0][0] != '-' && argv[0][0] != '\0') {
369 argv = &argv[1];
370 argc--;
371 }
372
373 optind = 0;
374 opterr = 0;
375 err = Z_OK;
376 while ((c = getopt(argc, argv, "fi:m:s")) != -1) {
377 switch (c) {
378 case 'i':
379 /*
380 * -i is handled by the runtime and is not passed
381 * along to userland
382 */
383 (void) strlcpy(init_file, optarg, MAXPATHLEN);
384 break;
385 case 'f':
386 /* This has already been processed by zoneadm */
387 break;
388 case 'm':
389 case 's':
390 /* These pass through unmolested */
391 (void) snprintf(outargs, BOOTARGS_MAX,
392 "%s -%c %s ", outargs, c, optarg ? optarg : "");
393 break;
394 case '?':
395 /*
396 * We warn about unknown arguments but pass them
397 * along anyway-- if someone wants to develop their
398 * own init replacement, they can pass it whatever
399 * args they want.
400 */
401 err = Z_INVAL;
402 (void) snprintf(outargs, BOOTARGS_MAX,
403 "%s -%c", outargs, optopt);
404 (void) snprintf(badarg, BOOTARGS_MAX,
405 "%s -%c", badarg, optopt);
406 break;
407 }
408 }
409
410 /*
411 * For Solaris Zones we warn about and discard non-option arguments.
412 * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar
413 * to the kernel, we concat up all the other remaining boot args.
414 * and warn on them as a group.
415 */
416 if (optind < argc) {
417 err = Z_INVAL;
418 while (optind < argc) {
419 (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s",
420 badarg, strlen(badarg) > 0 ? " " : "",
421 argv[optind]);
422 optind++;
423 }
424 zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot "
425 "arguments `%s'.", badarg);
426 }
427
428 done:
429 for (i = 0; i < argc_save; i++) {
430 if (argv_save[i] != NULL)
431 free(argv_save[i]);
432 }
433 free(argv_save);
434 return (err);
435 }
436
437
438 static int
439 mkzonedir(zlog_t *zlogp)
440 {
441 struct stat st;
442 /*
443 * We must create and lock everyone but root out of ZONES_TMPDIR
444 * since anyone can open any UNIX domain socket, regardless of
445 * its file system permissions. Sigh...
446 */
447 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
448 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR);
449 return (-1);
450 }
451 /* paranoia */
452 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) {
453 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR);
454 return (-1);
455 }
456 (void) chmod(ZONES_TMPDIR, S_IRWXU);
457 return (0);
458 }
459
460 /*
461 * Run the brand's pre-state change callback, if it exists.
462 */
463 static int
464 brand_prestatechg(zlog_t *zlogp, int state, int cmd)
465 {
466 char cmdbuf[2 * MAXPATHLEN];
467 const char *altroot;
468
469 if (pre_statechg_hook[0] == '\0')
470 return (0);
471
472 altroot = zonecfg_get_root();
473 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", pre_statechg_hook,
474 state, cmd, altroot) > sizeof (cmdbuf))
475 return (-1);
476
477 if (do_subproc(zlogp, cmdbuf, NULL) != 0)
478 return (-1);
479
480 return (0);
481 }
482
483 /*
484 * Run the brand's post-state change callback, if it exists.
485 */
486 static int
487 brand_poststatechg(zlog_t *zlogp, int state, int cmd)
488 {
489 char cmdbuf[2 * MAXPATHLEN];
490 const char *altroot;
491
492 if (post_statechg_hook[0] == '\0')
493 return (0);
494
495 altroot = zonecfg_get_root();
496 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook,
497 state, cmd, altroot) > sizeof (cmdbuf))
498 return (-1);
499
500 if (do_subproc(zlogp, cmdbuf, NULL) != 0)
501 return (-1);
502
503 return (0);
504 }
505
506 /*
507 * Notify zonestatd of the new zone. If zonestatd is not running, this
508 * will do nothing.
509 */
510 static void
511 notify_zonestatd(zoneid_t zoneid)
512 {
513 int cmd[2];
514 int fd;
515 door_arg_t params;
516
517 fd = open(ZS_DOOR_PATH, O_RDONLY);
518 if (fd < 0)
519 return;
520
521 cmd[0] = ZSD_CMD_NEW_ZONE;
522 cmd[1] = zoneid;
523 params.data_ptr = (char *)&cmd;
524 params.data_size = sizeof (cmd);
525 params.desc_ptr = NULL;
526 params.desc_num = 0;
527 params.rbuf = NULL;
528 params.rsize = 0;
529 (void) door_call(fd, ¶ms);
530 (void) close(fd);
531 }
532
533 /*
534 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is
535 * 'true' if this is being invoked as part of the processing for the "mount"
536 * subcommand.
537 */
538 static int
539 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate)
540 {
541 int err;
542
543 if (brand_prestatechg(zlogp, zstate, Z_READY) != 0)
544 return (-1);
545
546 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
547 zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
548 zonecfg_strerror(err));
549 goto bad;
550 }
551
552 if (zone_did == 0)
553 zone_did = zone_get_did(zone_name);
554
555 if ((zone_id = vplat_create(zlogp, mount_cmd, zone_did)) == -1) {
556 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
557 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
558 zonecfg_strerror(err));
559 goto bad;
560 }
561 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) {
562 bringup_failure_recovery = B_TRUE;
563 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE);
564 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
565 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
566 zonecfg_strerror(err));
567 goto bad;
568 }
569
570 if (brand_poststatechg(zlogp, zstate, Z_READY) != 0)
571 goto bad;
572
573 return (0);
574
575 bad:
576 /*
577 * If something goes wrong, we up the zones's state to the target
578 * state, READY, and then invoke the hook as if we're halting.
579 */
580 (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT);
581 return (-1);
582 }
583
584 int
585 init_template(void)
586 {
587 int fd;
588 int err = 0;
589
590 fd = open64(CTFS_ROOT "/process/template", O_RDWR);
591 if (fd == -1)
592 return (-1);
593
594 /*
595 * For now, zoneadmd doesn't do anything with the contract.
596 * Deliver no events, don't inherit, and allow it to be orphaned.
597 */
598 err |= ct_tmpl_set_critical(fd, 0);
599 err |= ct_tmpl_set_informative(fd, 0);
600 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
601 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
602 if (err || ct_tmpl_activate(fd)) {
603 (void) close(fd);
604 return (-1);
605 }
606
607 return (fd);
608 }
609
610 typedef struct fs_callback {
611 zlog_t *zlogp;
612 zoneid_t zoneid;
613 boolean_t mount_cmd;
614 } fs_callback_t;
615
616 static int
617 mount_early_fs(void *data, const char *spec, const char *dir,
618 const char *fstype, const char *opt)
619 {
620 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp;
621 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid;
622 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd;
623 char rootpath[MAXPATHLEN];
624 pid_t child;
625 int child_status;
626 int tmpl_fd;
627 int rv;
628 ctid_t ct;
629
630 /* determine the zone rootpath */
631 if (mount_cmd) {
632 char zonepath[MAXPATHLEN];
633 char luroot[MAXPATHLEN];
634
635 if (zone_get_zonepath(zone_name,
636 zonepath, sizeof (zonepath)) != Z_OK) {
637 zerror(zlogp, B_FALSE, "unable to determine zone path");
638 return (-1);
639 }
640
641 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
642 resolve_lofs(zlogp, luroot, sizeof (luroot));
643 (void) strlcpy(rootpath, luroot, sizeof (rootpath));
644 } else {
645 if (zone_get_rootpath(zone_name,
646 rootpath, sizeof (rootpath)) != Z_OK) {
647 zerror(zlogp, B_FALSE, "unable to determine zone root");
648 return (-1);
649 }
650 }
651
652 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) {
653 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point",
654 rootpath, dir);
655 return (-1);
656 } else if (rv > 0) {
657 /* The mount point path doesn't exist, create it now. */
658 if (make_one_dir(zlogp, rootpath, dir,
659 DEFAULT_DIR_MODE, DEFAULT_DIR_USER,
660 DEFAULT_DIR_GROUP) != 0) {
661 zerror(zlogp, B_FALSE, "failed to create mount point");
662 return (-1);
663 }
664
665 /*
666 * Now this might seem weird, but we need to invoke
667 * valid_mount_path() again. Why? Because it checks
668 * to make sure that the mount point path is canonical,
669 * which it can only do if the path exists, so now that
670 * we've created the path we have to verify it again.
671 */
672 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir,
673 fstype)) < 0) {
674 zerror(zlogp, B_FALSE,
675 "%s%s is not a valid mount point", rootpath, dir);
676 return (-1);
677 }
678 }
679
680 if ((tmpl_fd = init_template()) == -1) {
681 zerror(zlogp, B_TRUE, "failed to create contract");
682 return (-1);
683 }
684
685 if ((child = fork()) == -1) {
686 (void) ct_tmpl_clear(tmpl_fd);
687 (void) close(tmpl_fd);
688 zerror(zlogp, B_TRUE, "failed to fork");
689 return (-1);
690
691 } else if (child == 0) { /* child */
692 char opt_buf[MAX_MNTOPT_STR];
693 int optlen = 0;
694 int mflag = MS_DATA;
695
696 (void) ct_tmpl_clear(tmpl_fd);
697 /*
698 * Even though there are no procs running in the zone, we
699 * do this for paranoia's sake.
700 */
701 (void) closefrom(0);
702
703 if (zone_enter(zoneid) == -1) {
704 _exit(errno);
705 }
706 if (opt != NULL) {
707 /*
708 * The mount() system call is incredibly annoying.
709 * If options are specified, we need to copy them
710 * into a temporary buffer since the mount() system
711 * call will overwrite the options string. It will
712 * also fail if the new option string it wants to
713 * write is bigger than the one we passed in, so
714 * you must pass in a buffer of the maximum possible
715 * option string length. sigh.
716 */
717 (void) strlcpy(opt_buf, opt, sizeof (opt_buf));
718 opt = opt_buf;
719 optlen = MAX_MNTOPT_STR;
720 mflag = MS_OPTIONSTR;
721 }
722 if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0)
723 _exit(errno);
724 _exit(0);
725 }
726
727 /* parent */
728 if (contract_latest(&ct) == -1)
729 ct = -1;
730 (void) ct_tmpl_clear(tmpl_fd);
731 (void) close(tmpl_fd);
732 if (waitpid(child, &child_status, 0) != child) {
733 /* unexpected: we must have been signalled */
734 (void) contract_abandon_id(ct);
735 return (-1);
736 }
737 (void) contract_abandon_id(ct);
738 if (WEXITSTATUS(child_status) != 0) {
739 errno = WEXITSTATUS(child_status);
740 zerror(zlogp, B_TRUE, "mount of %s failed", dir);
741 return (-1);
742 }
743
744 return (0);
745 }
746
747 /*
748 * If retstr is not NULL, the output of the subproc is returned in the str,
749 * otherwise it is output using zerror(). Any memory allocated for retstr
750 * should be freed by the caller.
751 */
752 int
753 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
754 {
755 char buf[1024]; /* arbitrary large amount */
756 char *inbuf;
757 FILE *file;
758 int status;
759 int rd_cnt;
760
761 if (retstr != NULL) {
762 if ((*retstr = malloc(1024)) == NULL) {
763 zerror(zlogp, B_FALSE, "out of memory");
764 return (-1);
765 }
766 inbuf = *retstr;
767 rd_cnt = 0;
768 } else {
769 inbuf = buf;
770 }
771
772 file = popen(cmdbuf, "r");
773 if (file == NULL) {
774 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf);
775 return (-1);
776 }
777
778 while (fgets(inbuf, 1024, file) != NULL) {
779 if (retstr == NULL) {
780 if (zlogp != &logsys)
781 zerror(zlogp, B_FALSE, "%s", inbuf);
782 } else {
783 char *p;
784
785 rd_cnt += 1024 - 1;
786 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) {
787 zerror(zlogp, B_FALSE, "out of memory");
788 (void) pclose(file);
789 return (-1);
790 }
791
792 *retstr = p;
793 inbuf = *retstr + rd_cnt;
794 }
795 }
796 status = pclose(file);
797
798 if (WIFSIGNALED(status)) {
799 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
800 "signal %d", cmdbuf, WTERMSIG(status));
801 return (-1);
802 }
803 assert(WIFEXITED(status));
804 if (WEXITSTATUS(status) == ZEXIT_EXEC) {
805 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf);
806 return (-1);
807 }
808 return (WEXITSTATUS(status));
809 }
810
811 static int
812 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
813 {
814 zoneid_t zoneid;
815 struct stat st;
816 char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
817 char nbootargs[BOOTARGS_MAX];
818 char cmdbuf[MAXPATHLEN];
819 fs_callback_t cb;
820 brand_handle_t bh;
821 zone_iptype_t iptype;
822 boolean_t links_loaded = B_FALSE;
823 dladm_status_t status;
824 char errmsg[DLADM_STRSIZE];
825 int err;
826 boolean_t restart_init;
827
828 if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0)
829 return (-1);
830
831 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
832 zerror(zlogp, B_TRUE, "unable to get zoneid");
833 goto bad;
834 }
835
836 cb.zlogp = zlogp;
837 cb.zoneid = zoneid;
838 cb.mount_cmd = B_FALSE;
839
840 /* Get a handle to the brand info for this zone */
841 if ((bh = brand_open(brand_name)) == NULL) {
842 zerror(zlogp, B_FALSE, "unable to determine zone brand");
843 goto bad;
844 }
845
846 /*
847 * Get the list of filesystems to mount from the brand
848 * configuration. These mounts are done via a thread that will
849 * enter the zone, so they are done from within the context of the
850 * zone.
851 */
852 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) {
853 zerror(zlogp, B_FALSE, "unable to mount filesystems");
854 brand_close(bh);
855 goto bad;
856 }
857
858 /*
859 * Get the brand's boot callback if it exists.
860 */
861 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
862 zerror(zlogp, B_FALSE, "unable to determine zone path");
863 brand_close(bh);
864 goto bad;
865 }
866 (void) strcpy(cmdbuf, EXEC_PREFIX);
867 if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
868 sizeof (cmdbuf) - EXEC_LEN) != 0) {
869 zerror(zlogp, B_FALSE,
870 "unable to determine branded zone's boot callback");
871 brand_close(bh);
872 goto bad;
873 }
874
875 /* Get the path for this zone's init(1M) (or equivalent) process. */
876 if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) {
877 zerror(zlogp, B_FALSE,
878 "unable to determine zone's init(1M) location");
879 brand_close(bh);
880 goto bad;
881 }
882
883 /* See if this zone's brand should restart init if it dies. */
884 restart_init = brand_restartinit(bh);
885
886 brand_close(bh);
887
888 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file,
889 bad_boot_arg);
890 if (err == Z_INVAL)
891 eventstream_write(Z_EVT_ZONE_BADARGS);
892 else if (err != Z_OK)
893 goto bad;
894
895 assert(init_file[0] != '\0');
896
897 /* Try to anticipate possible problems: Make sure init is executable. */
898 if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
899 zerror(zlogp, B_FALSE, "unable to determine zone root");
900 goto bad;
901 }
902
903 (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file);
904
905 if (stat(initpath, &st) == -1) {
906 zerror(zlogp, B_TRUE, "could not stat %s", initpath);
907 goto bad;
908 }
909
910 if ((st.st_mode & S_IXUSR) == 0) {
911 zerror(zlogp, B_FALSE, "%s is not executable", initpath);
912 goto bad;
913 }
914
915 /*
916 * Exclusive stack zones interact with the dlmgmtd running in the
917 * global zone. dladm_zone_boot() tells dlmgmtd that this zone is
918 * booting, and loads its datalinks from the zone's datalink
919 * configuration file.
920 */
921 if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) {
922 status = dladm_zone_boot(dld_handle, zoneid);
923 if (status != DLADM_STATUS_OK) {
924 zerror(zlogp, B_FALSE, "unable to load zone datalinks: "
925 " %s", dladm_status2str(status, errmsg));
926 goto bad;
927 }
928 links_loaded = B_TRUE;
929 }
930
931 /*
932 * If there is a brand 'boot' callback, execute it now to give the
933 * brand one last chance to do any additional setup before the zone
934 * is booted.
935 */
936 if ((strlen(cmdbuf) > EXEC_LEN) &&
937 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
938 zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
939 goto bad;
940 }
941
942 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) {
943 zerror(zlogp, B_TRUE, "could not set zone boot file");
944 goto bad;
945 }
946
947 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) {
948 zerror(zlogp, B_TRUE, "could not set zone boot arguments");
949 goto bad;
950 }
951
952 if (!restart_init && zone_setattr(zoneid, ZONE_ATTR_INITNORESTART,
953 NULL, 0) == -1) {
954 zerror(zlogp, B_TRUE, "could not set zone init-no-restart");
955 goto bad;
956 }
957
958 /*
959 * Inform zonestatd of a new zone so that it can install a door for
960 * the zone to contact it.
961 */
962 notify_zonestatd(zone_id);
963
964 if (zone_boot(zoneid) == -1) {
965 zerror(zlogp, B_TRUE, "unable to boot zone");
966 goto bad;
967 }
968
969 if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0)
970 goto bad;
971
972 return (0);
973
974 bad:
975 /*
976 * If something goes wrong, we up the zones's state to the target
977 * state, RUNNING, and then invoke the hook as if we're halting.
978 */
979 (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT);
980 if (links_loaded)
981 (void) dladm_zone_halt(dld_handle, zoneid);
982 return (-1);
983 }
984
985 static int
986 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate)
987 {
988 int err;
989
990 if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0)
991 return (-1);
992
993 if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) {
994 if (!bringup_failure_recovery)
995 zerror(zlogp, B_FALSE, "unable to destroy zone");
996 return (-1);
997 }
998
999 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
1000 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
1001 zonecfg_strerror(err));
1002
1003 if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0)
1004 return (-1);
1005
1006 return (0);
1007 }
1008
1009 static int
1010 zone_graceful_shutdown(zlog_t *zlogp)
1011 {
1012 zoneid_t zoneid;
1013 pid_t child;
1014 char cmdbuf[MAXPATHLEN];
1015 brand_handle_t bh = NULL;
1016 char zpath[MAXPATHLEN];
1017 ctid_t ct;
1018 int tmpl_fd;
1019 int child_status;
1020
1021 if (shutdown_in_progress) {
1022 zerror(zlogp, B_FALSE, "shutdown already in progress");
1023 return (-1);
1024 }
1025
1026 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
1027 zerror(zlogp, B_TRUE, "unable to get zoneid");
1028 return (-1);
1029 }
1030
1031 /* Get a handle to the brand info for this zone */
1032 if ((bh = brand_open(brand_name)) == NULL) {
1033 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1034 return (-1);
1035 }
1036
1037 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
1038 zerror(zlogp, B_FALSE, "unable to determine zone path");
1039 brand_close(bh);
1040 return (-1);
1041 }
1042
1043 /*
1044 * If there is a brand 'shutdown' callback, execute it now to give the
1045 * brand a chance to cleanup any custom configuration.
1046 */
1047 (void) strcpy(cmdbuf, EXEC_PREFIX);
1048 if (brand_get_shutdown(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
1049 sizeof (cmdbuf) - EXEC_LEN) != 0 || strlen(cmdbuf) <= EXEC_LEN) {
1050 (void) strcat(cmdbuf, SHUTDOWN_DEFAULT);
1051 }
1052 brand_close(bh);
1053
1054 if ((tmpl_fd = init_template()) == -1) {
1055 zerror(zlogp, B_TRUE, "failed to create contract");
1056 return (-1);
1057 }
1058
1059 if ((child = fork()) == -1) {
1060 (void) ct_tmpl_clear(tmpl_fd);
1061 (void) close(tmpl_fd);
1062 zerror(zlogp, B_TRUE, "failed to fork");
1063 return (-1);
1064 } else if (child == 0) {
1065 (void) ct_tmpl_clear(tmpl_fd);
1066 if (zone_enter(zoneid) == -1) {
1067 _exit(errno);
1068 }
1069 _exit(execl("/bin/sh", "sh", "-c", cmdbuf, (char *)NULL));
1070 }
1071
1072 if (contract_latest(&ct) == -1)
1073 ct = -1;
1074 (void) ct_tmpl_clear(tmpl_fd);
1075 (void) close(tmpl_fd);
1076
1077 if (waitpid(child, &child_status, 0) != child) {
1078 /* unexpected: we must have been signalled */
1079 (void) contract_abandon_id(ct);
1080 return (-1);
1081 }
1082
1083 (void) contract_abandon_id(ct);
1084 if (WEXITSTATUS(child_status) != 0) {
1085 errno = WEXITSTATUS(child_status);
1086 zerror(zlogp, B_FALSE, "unable to shutdown zone");
1087 return (-1);
1088 }
1089
1090 shutdown_in_progress = B_TRUE;
1091
1092 return (0);
1093 }
1094
1095 static int
1096 zone_wait_shutdown(zlog_t *zlogp)
1097 {
1098 zone_state_t zstate;
1099 uint64_t *tm = NULL;
1100 scf_simple_prop_t *prop = NULL;
1101 int timeout;
1102 int tries;
1103 int rc = -1;
1104
1105 /* Get default stop timeout from SMF framework */
1106 timeout = SHUTDOWN_WAIT;
1107 if ((prop = scf_simple_prop_get(NULL, SHUTDOWN_FMRI, "stop",
1108 SCF_PROPERTY_TIMEOUT)) != NULL) {
1109 if ((tm = scf_simple_prop_next_count(prop)) != NULL) {
1110 if (tm != 0)
1111 timeout = *tm;
1112 }
1113 scf_simple_prop_free(prop);
1114 }
1115
1116 /* allow time for zone to shutdown cleanly */
1117 for (tries = 0; tries < timeout; tries ++) {
1118 (void) sleep(1);
1119 if (zone_get_state(zone_name, &zstate) == Z_OK &&
1120 zstate == ZONE_STATE_INSTALLED) {
1121 rc = 0;
1122 break;
1123 }
1124 }
1125
1126 if (rc != 0)
1127 zerror(zlogp, B_FALSE, "unable to shutdown zone");
1128
1129 shutdown_in_progress = B_FALSE;
1130
1131 return (rc);
1132 }
1133
1134
1135
1136 /*
1137 * Generate AUE_zone_state for a command that boots a zone.
1138 */
1139 static void
1140 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val,
1141 char *new_state)
1142 {
1143 adt_session_data_t *ah;
1144 adt_event_data_t *event;
1145 int pass_fail, fail_reason;
1146
1147 if (!adt_audit_enabled())
1148 return;
1149
1150 if (return_val == 0) {
1151 pass_fail = ADT_SUCCESS;
1152 fail_reason = ADT_SUCCESS;
1153 } else {
1154 pass_fail = ADT_FAILURE;
1155 fail_reason = ADT_FAIL_VALUE_PROGRAM;
1156 }
1157
1158 if (adt_start_session(&ah, NULL, 0)) {
1159 zerror(zlogp, B_TRUE, gettext("audit failure."));
1160 return;
1161 }
1162 if (adt_set_from_ucred(ah, uc, ADT_NEW)) {
1163 zerror(zlogp, B_TRUE, gettext("audit failure."));
1164 (void) adt_end_session(ah);
1165 return;
1166 }
1167
1168 event = adt_alloc_event(ah, ADT_zone_state);
1169 if (event == NULL) {
1170 zerror(zlogp, B_TRUE, gettext("audit failure."));
1171 (void) adt_end_session(ah);
1172 return;
1173 }
1174 event->adt_zone_state.zonename = zone_name;
1175 event->adt_zone_state.new_state = new_state;
1176
1177 if (adt_put_event(event, pass_fail, fail_reason))
1178 zerror(zlogp, B_TRUE, gettext("audit failure."));
1179
1180 adt_free_event(event);
1181
1182 (void) adt_end_session(ah);
1183 }
1184
1185 /*
1186 * The main routine for the door server that deals with zone state transitions.
1187 */
1188 /* ARGSUSED */
1189 static void
1190 server(void *cookie, char *args, size_t alen, door_desc_t *dp,
1191 uint_t n_desc)
1192 {
1193 ucred_t *uc = NULL;
1194 const priv_set_t *eset;
1195
1196 zone_state_t zstate;
1197 zone_cmd_t cmd;
1198 zone_cmd_arg_t *zargp;
1199
1200 boolean_t kernelcall;
1201
1202 int rval = -1;
1203 uint64_t uniqid;
1204 zoneid_t zoneid = -1;
1205 zlog_t zlog;
1206 zlog_t *zlogp;
1207 zone_cmd_rval_t *rvalp;
1208 size_t rlen = getpagesize(); /* conservative */
1209 fs_callback_t cb;
1210 brand_handle_t bh;
1211 boolean_t wait_shut = B_FALSE;
1212
1213 /* LINTED E_BAD_PTR_CAST_ALIGN */
1214 zargp = (zone_cmd_arg_t *)args;
1215
1216 /*
1217 * When we get the door unref message, we've fdetach'd the door, and
1218 * it is time for us to shut down zoneadmd.
1219 */
1220 if (zargp == DOOR_UNREF_DATA) {
1221 /*
1222 * See comment at end of main() for info on the last rites.
1223 */
1224 exit(0);
1225 }
1226
1227 if (zargp == NULL) {
1228 (void) door_return(NULL, 0, 0, 0);
1229 }
1230
1231 rvalp = alloca(rlen);
1232 bzero(rvalp, rlen);
1233 zlog.logfile = NULL;
1234 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1;
1235 zlog.buf = rvalp->errbuf;
1236 zlog.log = zlog.buf;
1237 /* defer initialization of zlog.locale until after credential check */
1238 zlogp = &zlog;
1239
1240 if (alen != sizeof (zone_cmd_arg_t)) {
1241 /*
1242 * This really shouldn't be happening.
1243 */
1244 zerror(&logsys, B_FALSE, "argument size (%d bytes) "
1245 "unexpected (expected %d bytes)", alen,
1246 sizeof (zone_cmd_arg_t));
1247 goto out;
1248 }
1249 cmd = zargp->cmd;
1250
1251 if (door_ucred(&uc) != 0) {
1252 zerror(&logsys, B_TRUE, "door_ucred");
1253 goto out;
1254 }
1255 eset = ucred_getprivset(uc, PRIV_EFFECTIVE);
1256 if (ucred_getzoneid(uc) != GLOBAL_ZONEID ||
1257 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) :
1258 ucred_geteuid(uc) != 0)) {
1259 zerror(&logsys, B_FALSE, "insufficient privileges");
1260 goto out;
1261 }
1262
1263 kernelcall = ucred_getpid(uc) == 0;
1264
1265 /*
1266 * This is safe because we only use a zlog_t throughout the
1267 * duration of a door call; i.e., by the time the pointer
1268 * might become invalid, the door call would be over.
1269 */
1270 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale;
1271
1272 (void) mutex_lock(&lock);
1273
1274 /*
1275 * Once we start to really die off, we don't want more connections.
1276 */
1277 if (in_death_throes) {
1278 (void) mutex_unlock(&lock);
1279 ucred_free(uc);
1280 (void) door_return(NULL, 0, 0, 0);
1281 thr_exit(NULL);
1282 }
1283
1284 /*
1285 * Check for validity of command.
1286 */
1287 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT &&
1288 cmd != Z_REBOOT && cmd != Z_SHUTDOWN && cmd != Z_HALT &&
1289 cmd != Z_NOTE_UNINSTALLING && cmd != Z_MOUNT &&
1290 cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) {
1291 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd);
1292 goto out;
1293 }
1294
1295 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) {
1296 /*
1297 * Can't happen
1298 */
1299 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d",
1300 cmd);
1301 goto out;
1302 }
1303 /*
1304 * We ignore the possibility of someone calling zone_create(2)
1305 * explicitly; all requests must come through zoneadmd.
1306 */
1307 if (zone_get_state(zone_name, &zstate) != Z_OK) {
1308 /*
1309 * Something terribly wrong happened
1310 */
1311 zerror(&logsys, B_FALSE, "unable to determine state of zone");
1312 goto out;
1313 }
1314
1315 if (kernelcall) {
1316 /*
1317 * Kernel-initiated requests may lose their validity if the
1318 * zone_t the kernel was referring to has gone away.
1319 */
1320 if ((zoneid = getzoneidbyname(zone_name)) == -1 ||
1321 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid,
1322 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) {
1323 /*
1324 * We're not talking about the same zone. The request
1325 * must have arrived too late. Return error.
1326 */
1327 rval = -1;
1328 goto out;
1329 }
1330 zlogp = &logsys; /* Log errors to syslog */
1331 }
1332
1333 /*
1334 * If we are being asked to forcibly mount or boot a zone, we
1335 * pretend that an INCOMPLETE zone is actually INSTALLED.
1336 */
1337 if (zstate == ZONE_STATE_INCOMPLETE &&
1338 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT))
1339 zstate = ZONE_STATE_INSTALLED;
1340
1341 switch (zstate) {
1342 case ZONE_STATE_CONFIGURED:
1343 case ZONE_STATE_INCOMPLETE:
1344 /*
1345 * Not our area of expertise; we just print a nice message
1346 * and die off.
1347 */
1348 zerror(zlogp, B_FALSE,
1349 "%s operation is invalid for zones in state '%s'",
1350 z_cmd_name(cmd), zone_state_str(zstate));
1351 break;
1352
1353 case ZONE_STATE_INSTALLED:
1354 switch (cmd) {
1355 case Z_READY:
1356 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate);
1357 if (rval == 0)
1358 eventstream_write(Z_EVT_ZONE_READIED);
1359 break;
1360 case Z_BOOT:
1361 case Z_FORCEBOOT:
1362 eventstream_write(Z_EVT_ZONE_BOOTING);
1363 if ((rval = zone_ready(zlogp, Z_MNT_BOOT,
1364 zstate)) == 0) {
1365 rval = zone_bootup(zlogp, zargp->bootbuf,
1366 zstate);
1367 }
1368 audit_put_record(zlogp, uc, rval, "boot");
1369 if (rval != 0) {
1370 bringup_failure_recovery = B_TRUE;
1371 (void) zone_halt(zlogp, B_FALSE, B_FALSE,
1372 zstate);
1373 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1374 }
1375 break;
1376 case Z_SHUTDOWN:
1377 case Z_HALT:
1378 if (kernelcall) /* Invalid; can't happen */
1379 abort();
1380 /*
1381 * We could have two clients racing to halt this
1382 * zone; the second client loses, but its request
1383 * doesn't fail, since the zone is now in the desired
1384 * state.
1385 */
1386 zerror(zlogp, B_FALSE, "zone is already halted");
1387 rval = 0;
1388 break;
1389 case Z_REBOOT:
1390 if (kernelcall) /* Invalid; can't happen */
1391 abort();
1392 zerror(zlogp, B_FALSE, "%s operation is invalid "
1393 "for zones in state '%s'", z_cmd_name(cmd),
1394 zone_state_str(zstate));
1395 rval = -1;
1396 break;
1397 case Z_NOTE_UNINSTALLING:
1398 if (kernelcall) /* Invalid; can't happen */
1399 abort();
1400 /*
1401 * Tell the console to print out a message about this.
1402 * Once it does, we will be in_death_throes.
1403 */
1404 eventstream_write(Z_EVT_ZONE_UNINSTALLING);
1405 break;
1406 case Z_MOUNT:
1407 case Z_FORCEMOUNT:
1408 if (kernelcall) /* Invalid; can't happen */
1409 abort();
1410 if (!zone_isnative && !zone_iscluster &&
1411 !zone_islabeled) {
1412 /*
1413 * -U mounts the zone without lofs mounting
1414 * zone file systems back into the scratch
1415 * zone. This is required when mounting
1416 * non-native branded zones.
1417 */
1418 (void) strlcpy(zargp->bootbuf, "-U",
1419 BOOTARGS_MAX);
1420 }
1421
1422 rval = zone_ready(zlogp,
1423 strcmp(zargp->bootbuf, "-U") == 0 ?
1424 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate);
1425 if (rval != 0)
1426 break;
1427
1428 eventstream_write(Z_EVT_ZONE_READIED);
1429
1430 /*
1431 * Get a handle to the default brand info.
1432 * We must always use the default brand file system
1433 * list when mounting the zone.
1434 */
1435 if ((bh = brand_open(default_brand)) == NULL) {
1436 rval = -1;
1437 break;
1438 }
1439
1440 /*
1441 * Get the list of filesystems to mount from
1442 * the brand configuration. These mounts are done
1443 * via a thread that will enter the zone, so they
1444 * are done from within the context of the zone.
1445 */
1446 cb.zlogp = zlogp;
1447 cb.zoneid = zone_id;
1448 cb.mount_cmd = B_TRUE;
1449 rval = brand_platform_iter_mounts(bh,
1450 mount_early_fs, &cb);
1451
1452 brand_close(bh);
1453
1454 /*
1455 * Ordinarily, /dev/fd would be mounted inside the zone
1456 * by svc:/system/filesystem/usr:default, but since
1457 * we're not booting the zone, we need to do this
1458 * manually.
1459 */
1460 if (rval == 0)
1461 rval = mount_early_fs(&cb,
1462 "fd", "/dev/fd", "fd", NULL);
1463 break;
1464 case Z_UNMOUNT:
1465 if (kernelcall) /* Invalid; can't happen */
1466 abort();
1467 zerror(zlogp, B_FALSE, "zone is already unmounted");
1468 rval = 0;
1469 break;
1470 }
1471 break;
1472
1473 case ZONE_STATE_READY:
1474 switch (cmd) {
1475 case Z_READY:
1476 /*
1477 * We could have two clients racing to ready this
1478 * zone; the second client loses, but its request
1479 * doesn't fail, since the zone is now in the desired
1480 * state.
1481 */
1482 zerror(zlogp, B_FALSE, "zone is already ready");
1483 rval = 0;
1484 break;
1485 case Z_BOOT:
1486 (void) strlcpy(boot_args, zargp->bootbuf,
1487 sizeof (boot_args));
1488 eventstream_write(Z_EVT_ZONE_BOOTING);
1489 rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1490 audit_put_record(zlogp, uc, rval, "boot");
1491 if (rval != 0) {
1492 bringup_failure_recovery = B_TRUE;
1493 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1494 zstate);
1495 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1496 }
1497 boot_args[0] = '\0';
1498 break;
1499 case Z_HALT:
1500 if (kernelcall) /* Invalid; can't happen */
1501 abort();
1502 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1503 != 0)
1504 break;
1505 eventstream_write(Z_EVT_ZONE_HALTED);
1506 break;
1507 case Z_SHUTDOWN:
1508 case Z_REBOOT:
1509 case Z_NOTE_UNINSTALLING:
1510 case Z_MOUNT:
1511 case Z_UNMOUNT:
1512 if (kernelcall) /* Invalid; can't happen */
1513 abort();
1514 zerror(zlogp, B_FALSE, "%s operation is invalid "
1515 "for zones in state '%s'", z_cmd_name(cmd),
1516 zone_state_str(zstate));
1517 rval = -1;
1518 break;
1519 }
1520 break;
1521
1522 case ZONE_STATE_MOUNTED:
1523 switch (cmd) {
1524 case Z_UNMOUNT:
1525 if (kernelcall) /* Invalid; can't happen */
1526 abort();
1527 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate);
1528 if (rval == 0) {
1529 eventstream_write(Z_EVT_ZONE_HALTED);
1530 (void) sema_post(&scratch_sem);
1531 }
1532 break;
1533 default:
1534 if (kernelcall) /* Invalid; can't happen */
1535 abort();
1536 zerror(zlogp, B_FALSE, "%s operation is invalid "
1537 "for zones in state '%s'", z_cmd_name(cmd),
1538 zone_state_str(zstate));
1539 rval = -1;
1540 break;
1541 }
1542 break;
1543
1544 case ZONE_STATE_RUNNING:
1545 case ZONE_STATE_SHUTTING_DOWN:
1546 case ZONE_STATE_DOWN:
1547 switch (cmd) {
1548 case Z_READY:
1549 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1550 != 0)
1551 break;
1552 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1553 == 0)
1554 eventstream_write(Z_EVT_ZONE_READIED);
1555 else
1556 eventstream_write(Z_EVT_ZONE_HALTED);
1557 break;
1558 case Z_BOOT:
1559 /*
1560 * We could have two clients racing to boot this
1561 * zone; the second client loses, but its request
1562 * doesn't fail, since the zone is now in the desired
1563 * state.
1564 */
1565 zerror(zlogp, B_FALSE, "zone is already booted");
1566 rval = 0;
1567 break;
1568 case Z_HALT:
1569 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1570 != 0)
1571 break;
1572 eventstream_write(Z_EVT_ZONE_HALTED);
1573 break;
1574 case Z_REBOOT:
1575 (void) strlcpy(boot_args, zargp->bootbuf,
1576 sizeof (boot_args));
1577 eventstream_write(Z_EVT_ZONE_REBOOTING);
1578 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1579 != 0) {
1580 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1581 boot_args[0] = '\0';
1582 break;
1583 }
1584 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1585 != 0) {
1586 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1587 boot_args[0] = '\0';
1588 break;
1589 }
1590 rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1591 audit_put_record(zlogp, uc, rval, "reboot");
1592 if (rval != 0) {
1593 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1594 zstate);
1595 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1596 }
1597 boot_args[0] = '\0';
1598 break;
1599 case Z_SHUTDOWN:
1600 if ((rval = zone_graceful_shutdown(zlogp)) == 0) {
1601 wait_shut = B_TRUE;
1602 }
1603 break;
1604 case Z_NOTE_UNINSTALLING:
1605 case Z_MOUNT:
1606 case Z_UNMOUNT:
1607 zerror(zlogp, B_FALSE, "%s operation is invalid "
1608 "for zones in state '%s'", z_cmd_name(cmd),
1609 zone_state_str(zstate));
1610 rval = -1;
1611 break;
1612 }
1613 break;
1614 default:
1615 abort();
1616 }
1617
1618 /*
1619 * Because the state of the zone may have changed, we make sure
1620 * to wake the console poller, which is in charge of initiating
1621 * the shutdown procedure as necessary.
1622 */
1623 eventstream_write(Z_EVT_NULL);
1624
1625 out:
1626 (void) mutex_unlock(&lock);
1627
1628 /* Wait for the Z_SHUTDOWN commands to complete */
1629 if (wait_shut)
1630 rval = zone_wait_shutdown(zlogp);
1631
1632 if (kernelcall) {
1633 rvalp = NULL;
1634 rlen = 0;
1635 } else {
1636 rvalp->rval = rval;
1637 }
1638 if (uc != NULL)
1639 ucred_free(uc);
1640 (void) door_return((char *)rvalp, rlen, NULL, 0);
1641 thr_exit(NULL);
1642 }
1643
1644 static int
1645 setup_door(zlog_t *zlogp)
1646 {
1647 if ((zone_door = door_create(server, NULL,
1648 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) {
1649 zerror(zlogp, B_TRUE, "%s failed", "door_create");
1650 return (-1);
1651 }
1652 (void) fdetach(zone_door_path);
1653
1654 if (fattach(zone_door, zone_door_path) != 0) {
1655 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path);
1656 (void) door_revoke(zone_door);
1657 (void) fdetach(zone_door_path);
1658 zone_door = -1;
1659 return (-1);
1660 }
1661 return (0);
1662 }
1663
1664 /*
1665 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this
1666 * is where zoneadmd itself will check to see that another instance of
1667 * zoneadmd isn't already controlling this zone.
1668 *
1669 * The idea here is that we want to open the path to which we will
1670 * attach our door, lock it, and then make sure that no-one has beat us
1671 * to fattach(3c)ing onto it.
1672 *
1673 * fattach(3c) is really a mount, so there are actually two possible
1674 * vnodes we could be dealing with. Our strategy is as follows:
1675 *
1676 * - If the file we opened is a regular file (common case):
1677 * There is no fattach(3c)ed door, so we have a chance of becoming
1678 * the managing zoneadmd. We attempt to lock the file: if it is
1679 * already locked, that means someone else raced us here, so we
1680 * lose and give up. zoneadm(1m) will try to contact the zoneadmd
1681 * that beat us to it.
1682 *
1683 * - If the file we opened is a namefs file:
1684 * This means there is already an established door fattach(3c)'ed
1685 * to the rendezvous path. We've lost the race, so we give up.
1686 * Note that in this case we also try to grab the file lock, and
1687 * will succeed in acquiring it since the vnode locked by the
1688 * "winning" zoneadmd was a regular one, and the one we locked was
1689 * the fattach(3c)'ed door node. At any rate, no harm is done, and
1690 * we just return to zoneadm(1m) which knows to retry.
1691 */
1692 static int
1693 make_daemon_exclusive(zlog_t *zlogp)
1694 {
1695 int doorfd = -1;
1696 int err, ret = -1;
1697 struct stat st;
1698 struct flock flock;
1699 zone_state_t zstate;
1700
1701 top:
1702 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
1703 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
1704 zonecfg_strerror(err));
1705 goto out;
1706 }
1707 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR,
1708 S_IREAD|S_IWRITE)) < 0) {
1709 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path);
1710 goto out;
1711 }
1712 if (fstat(doorfd, &st) < 0) {
1713 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path);
1714 goto out;
1715 }
1716 /*
1717 * Lock the file to synchronize with other zoneadmd
1718 */
1719 flock.l_type = F_WRLCK;
1720 flock.l_whence = SEEK_SET;
1721 flock.l_start = (off_t)0;
1722 flock.l_len = (off_t)0;
1723 if (fcntl(doorfd, F_SETLK, &flock) < 0) {
1724 /*
1725 * Someone else raced us here and grabbed the lock file
1726 * first. A warning here is inappropriate since nothing
1727 * went wrong.
1728 */
1729 goto out;
1730 }
1731
1732 if (strcmp(st.st_fstype, "namefs") == 0) {
1733 struct door_info info;
1734
1735 /*
1736 * There is already something fattach()'ed to this file.
1737 * Lets see what the door is up to.
1738 */
1739 if (door_info(doorfd, &info) == 0 && info.di_target != -1) {
1740 /*
1741 * Another zoneadmd process seems to be in
1742 * control of the situation and we don't need to
1743 * be here. A warning here is inappropriate
1744 * since nothing went wrong.
1745 *
1746 * If the door has been revoked, the zoneadmd
1747 * process currently managing the zone is going
1748 * away. We'll return control to zoneadm(1m)
1749 * which will try again (by which time zoneadmd
1750 * will hopefully have exited).
1751 */
1752 goto out;
1753 }
1754
1755 /*
1756 * If we got this far, there's a fattach(3c)'ed door
1757 * that belongs to a process that has exited, which can
1758 * happen if the previous zoneadmd died unexpectedly.
1759 *
1760 * Let user know that something is amiss, but that we can
1761 * recover; if the zone is in the installed state, then don't
1762 * message, since having a running zoneadmd isn't really
1763 * expected/needed. We want to keep occurences of this message
1764 * limited to times when zoneadmd is picking back up from a
1765 * zoneadmd that died while the zone was in some non-trivial
1766 * state.
1767 */
1768 if (zstate > ZONE_STATE_INSTALLED) {
1769 zerror(zlogp, B_FALSE,
1770 "zone '%s': WARNING: zone is in state '%s', but "
1771 "zoneadmd does not appear to be available; "
1772 "restarted zoneadmd to recover.",
1773 zone_name, zone_state_str(zstate));
1774 }
1775
1776 (void) fdetach(zone_door_path);
1777 (void) close(doorfd);
1778 goto top;
1779 }
1780 ret = 0;
1781 out:
1782 (void) close(doorfd);
1783 return (ret);
1784 }
1785
1786 /*
1787 * Setup the brand's pre and post state change callbacks, as well as the
1788 * query callback, if any of these exist.
1789 */
1790 static int
1791 brand_callback_init(brand_handle_t bh, char *zone_name)
1792 {
1793 char zpath[MAXPATHLEN];
1794
1795 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK)
1796 return (-1);
1797
1798 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX,
1799 sizeof (pre_statechg_hook));
1800
1801 if (brand_get_prestatechange(bh, zone_name, zpath,
1802 pre_statechg_hook + EXEC_LEN,
1803 sizeof (pre_statechg_hook) - EXEC_LEN) != 0)
1804 return (-1);
1805
1806 if (strlen(pre_statechg_hook) <= EXEC_LEN)
1807 pre_statechg_hook[0] = '\0';
1808
1809 (void) strlcpy(post_statechg_hook, EXEC_PREFIX,
1810 sizeof (post_statechg_hook));
1811
1812 if (brand_get_poststatechange(bh, zone_name, zpath,
1813 post_statechg_hook + EXEC_LEN,
1814 sizeof (post_statechg_hook) - EXEC_LEN) != 0)
1815 return (-1);
1816
1817 if (strlen(post_statechg_hook) <= EXEC_LEN)
1818 post_statechg_hook[0] = '\0';
1819
1820 (void) strlcpy(query_hook, EXEC_PREFIX,
1821 sizeof (query_hook));
1822
1823 if (brand_get_query(bh, zone_name, zpath, query_hook + EXEC_LEN,
1824 sizeof (query_hook) - EXEC_LEN) != 0)
1825 return (-1);
1826
1827 if (strlen(query_hook) <= EXEC_LEN)
1828 query_hook[0] = '\0';
1829
1830 return (0);
1831 }
1832
1833 int
1834 main(int argc, char *argv[])
1835 {
1836 int opt;
1837 zoneid_t zid;
1838 priv_set_t *privset;
1839 zone_state_t zstate;
1840 char parents_locale[MAXPATHLEN];
1841 brand_handle_t bh;
1842 int err;
1843
1844 pid_t pid;
1845 sigset_t blockset;
1846 sigset_t block_cld;
1847
1848 struct {
1849 sema_t sem;
1850 int status;
1851 zlog_t log;
1852 } *shstate;
1853 size_t shstatelen = getpagesize();
1854
1855 zlog_t errlog;
1856 zlog_t *zlogp;
1857
1858 int ctfd;
1859
1860 progname = get_execbasename(argv[0]);
1861
1862 /*
1863 * Make sure stderr is unbuffered
1864 */
1865 (void) setbuffer(stderr, NULL, 0);
1866
1867 /*
1868 * Get out of the way of mounted filesystems, since we will daemonize
1869 * soon.
1870 */
1871 (void) chdir("/");
1872
1873 /*
1874 * Use the default system umask per PSARC 1998/110 rather than
1875 * anything that may have been set by the caller.
1876 */
1877 (void) umask(CMASK);
1878
1879 /*
1880 * Initially we want to use our parent's locale.
1881 */
1882 (void) setlocale(LC_ALL, "");
1883 (void) textdomain(TEXT_DOMAIN);
1884 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL),
1885 sizeof (parents_locale));
1886
1887 /*
1888 * This zlog_t is used for writing to stderr
1889 */
1890 errlog.logfile = stderr;
1891 errlog.buflen = errlog.loglen = 0;
1892 errlog.buf = errlog.log = NULL;
1893 errlog.locale = parents_locale;
1894
1895 /*
1896 * We start off writing to stderr until we're ready to daemonize.
1897 */
1898 zlogp = &errlog;
1899
1900 /*
1901 * Process options.
1902 */
1903 while ((opt = getopt(argc, argv, "R:z:")) != EOF) {
1904 switch (opt) {
1905 case 'R':
1906 zonecfg_set_root(optarg);
1907 break;
1908 case 'z':
1909 zone_name = optarg;
1910 break;
1911 default:
1912 usage();
1913 }
1914 }
1915
1916 if (zone_name == NULL)
1917 usage();
1918
1919 /*
1920 * Because usage() prints directly to stderr, it has gettext()
1921 * wrapping, which depends on the locale. But since zerror() calls
1922 * localize() which tweaks the locale, it is not safe to call zerror()
1923 * until after the last call to usage(). Fortunately, the last call
1924 * to usage() is just above and the first call to zerror() is just
1925 * below. Don't mess this up.
1926 */
1927 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) {
1928 zerror(zlogp, B_FALSE, "cannot manage the %s zone",
1929 GLOBAL_ZONENAME);
1930 return (1);
1931 }
1932
1933 if (zone_get_id(zone_name, &zid) != 0) {
1934 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name,
1935 zonecfg_strerror(Z_NO_ZONE));
1936 return (1);
1937 }
1938
1939 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
1940 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
1941 zonecfg_strerror(err));
1942 return (1);
1943 }
1944 if (zstate < ZONE_STATE_INCOMPLETE) {
1945 zerror(zlogp, B_FALSE,
1946 "cannot manage a zone which is in state '%s'",
1947 zone_state_str(zstate));
1948 return (1);
1949 }
1950
1951 if (zonecfg_default_brand(default_brand,
1952 sizeof (default_brand)) != Z_OK) {
1953 zerror(zlogp, B_FALSE, "unable to determine default brand");
1954 return (1);
1955 }
1956
1957 /* Get a handle to the brand info for this zone */
1958 if (zone_get_brand(zone_name, brand_name, sizeof (brand_name))
1959 != Z_OK) {
1960 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1961 return (1);
1962 }
1963 zone_isnative = (strcmp(brand_name, NATIVE_BRAND_NAME) == 0);
1964 zone_islabeled = (strcmp(brand_name, LABELED_BRAND_NAME) == 0);
1965
1966 /*
1967 * In the alternate root environment, the only supported
1968 * operations are mount and unmount. In this case, just treat
1969 * the zone as native if it is cluster. Cluster zones can be
1970 * native for the purpose of LU or upgrade, and the cluster
1971 * brand may not exist in the miniroot (such as in net install
1972 * upgrade).
1973 */
1974 if (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0) {
1975 zone_iscluster = B_TRUE;
1976 if (zonecfg_in_alt_root()) {
1977 (void) strlcpy(brand_name, default_brand,
1978 sizeof (brand_name));
1979 }
1980 } else {
1981 zone_iscluster = B_FALSE;
1982 }
1983
1984 if ((bh = brand_open(brand_name)) == NULL) {
1985 zerror(zlogp, B_FALSE, "unable to open zone brand");
1986 return (1);
1987 }
1988
1989 /* Get state change brand hooks. */
1990 if (brand_callback_init(bh, zone_name) == -1) {
1991 zerror(zlogp, B_TRUE,
1992 "failed to initialize brand state change hooks");
1993 brand_close(bh);
1994 return (1);
1995 }
1996
1997 brand_close(bh);
1998
1999 /*
2000 * Check that we have all privileges. It would be nice to pare
2001 * this down, but this is at least a first cut.
2002 */
2003 if ((privset = priv_allocset()) == NULL) {
2004 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
2005 return (1);
2006 }
2007
2008 if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
2009 zerror(zlogp, B_TRUE, "%s failed", "getppriv");
2010 priv_freeset(privset);
2011 return (1);
2012 }
2013
2014 if (priv_isfullset(privset) == B_FALSE) {
2015 zerror(zlogp, B_FALSE, "You lack sufficient privilege to "
2016 "run this command (all privs required)");
2017 priv_freeset(privset);
2018 return (1);
2019 }
2020 priv_freeset(privset);
2021
2022 if (mkzonedir(zlogp) != 0)
2023 return (1);
2024
2025 /*
2026 * Pre-fork: setup shared state
2027 */
2028 if ((shstate = (void *)mmap(NULL, shstatelen,
2029 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) ==
2030 MAP_FAILED) {
2031 zerror(zlogp, B_TRUE, "%s failed", "mmap");
2032 return (1);
2033 }
2034 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) {
2035 zerror(zlogp, B_TRUE, "%s failed", "sema_init()");
2036 (void) munmap((char *)shstate, shstatelen);
2037 return (1);
2038 }
2039 shstate->log.logfile = NULL;
2040 shstate->log.buflen = shstatelen - sizeof (*shstate);
2041 shstate->log.loglen = shstate->log.buflen;
2042 shstate->log.buf = (char *)shstate + sizeof (*shstate);
2043 shstate->log.log = shstate->log.buf;
2044 shstate->log.locale = parents_locale;
2045 shstate->status = -1;
2046
2047 /*
2048 * We need a SIGCHLD handler so the sema_wait() below will wake
2049 * up if the child dies without doing a sema_post().
2050 */
2051 (void) sigset(SIGCHLD, sigchld);
2052 /*
2053 * We must mask SIGCHLD until after we've coped with the fork
2054 * sufficiently to deal with it; otherwise we can race and
2055 * receive the signal before pid has been initialized
2056 * (yes, this really happens).
2057 */
2058 (void) sigemptyset(&block_cld);
2059 (void) sigaddset(&block_cld, SIGCHLD);
2060 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
2061
2062 /*
2063 * The parent only needs stderr after the fork, so close other fd's
2064 * that we inherited from zoneadm so that the parent doesn't have those
2065 * open while waiting. The child will close the rest after the fork.
2066 */
2067 closefrom(3);
2068
2069 if ((ctfd = init_template()) == -1) {
2070 zerror(zlogp, B_TRUE, "failed to create contract");
2071 return (1);
2072 }
2073
2074 /*
2075 * Do not let another thread localize a message while we are forking.
2076 */
2077 (void) mutex_lock(&msglock);
2078 pid = fork();
2079 (void) mutex_unlock(&msglock);
2080
2081 /*
2082 * In all cases (parent, child, and in the event of an error) we
2083 * don't want to cause creation of contracts on subsequent fork()s.
2084 */
2085 (void) ct_tmpl_clear(ctfd);
2086 (void) close(ctfd);
2087
2088 if (pid == -1) {
2089 zerror(zlogp, B_TRUE, "could not fork");
2090 return (1);
2091
2092 } else if (pid > 0) { /* parent */
2093 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2094 /*
2095 * This marks a window of vulnerability in which we receive
2096 * the SIGCLD before falling into sema_wait (normally we would
2097 * get woken up from sema_wait with EINTR upon receipt of
2098 * SIGCLD). So we may need to use some other scheme like
2099 * sema_posting in the sigcld handler.
2100 * blech
2101 */
2102 (void) sema_wait(&shstate->sem);
2103 (void) sema_destroy(&shstate->sem);
2104 if (shstate->status != 0)
2105 (void) waitpid(pid, NULL, WNOHANG);
2106 /*
2107 * It's ok if we die with SIGPIPE. It's not like we could have
2108 * done anything about it.
2109 */
2110 (void) fprintf(stderr, "%s", shstate->log.buf);
2111 _exit(shstate->status == 0 ? 0 : 1);
2112 }
2113
2114 /*
2115 * The child charges on.
2116 */
2117 (void) sigset(SIGCHLD, SIG_DFL);
2118 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2119
2120 /*
2121 * SIGPIPE can be delivered if we write to a socket for which the
2122 * peer endpoint is gone. That can lead to too-early termination
2123 * of zoneadmd, and that's not good eats.
2124 */
2125 (void) sigset(SIGPIPE, SIG_IGN);
2126 /*
2127 * Stop using stderr
2128 */
2129 zlogp = &shstate->log;
2130
2131 /*
2132 * We don't need stdout/stderr from now on.
2133 */
2134 closefrom(0);
2135
2136 /*
2137 * Initialize the syslog zlog_t. This needs to be done after
2138 * the call to closefrom().
2139 */
2140 logsys.buf = logsys.log = NULL;
2141 logsys.buflen = logsys.loglen = 0;
2142 logsys.logfile = NULL;
2143 logsys.locale = DEFAULT_LOCALE;
2144
2145 openlog("zoneadmd", LOG_PID, LOG_DAEMON);
2146
2147 /*
2148 * The eventstream is used to publish state changes in the zone
2149 * from the door threads to the console I/O poller.
2150 */
2151 if (eventstream_init() == -1) {
2152 zerror(zlogp, B_TRUE, "unable to create eventstream");
2153 goto child_out;
2154 }
2155
2156 (void) snprintf(zone_door_path, sizeof (zone_door_path),
2157 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name);
2158
2159 /*
2160 * See if another zoneadmd is running for this zone. If not, then we
2161 * can now modify system state.
2162 */
2163 if (make_daemon_exclusive(zlogp) == -1)
2164 goto child_out;
2165
2166
2167 /*
2168 * Create/join a new session; we need to be careful of what we do with
2169 * the console from now on so we don't end up being the session leader
2170 * for the terminal we're going to be handing out.
2171 */
2172 (void) setsid();
2173
2174 /*
2175 * This thread shouldn't be receiving any signals; in particular,
2176 * SIGCHLD should be received by the thread doing the fork().
2177 */
2178 (void) sigfillset(&blockset);
2179 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL);
2180
2181 /*
2182 * Setup the console device and get ready to serve the console;
2183 * once this has completed, we're ready to let console clients
2184 * make an attempt to connect (they will block until
2185 * serve_console_sock() below gets called, and any pending
2186 * connection is accept()ed).
2187 */
2188 if (!zonecfg_in_alt_root() && init_console(zlogp) < 0)
2189 goto child_out;
2190
2191 /*
2192 * Take the lock now, so that when the door server gets going, we
2193 * are guaranteed that it won't take a request until we are sure
2194 * that everything is completely set up. See the child_out: label
2195 * below to see why this matters.
2196 */
2197 (void) mutex_lock(&lock);
2198
2199 /* Init semaphore for scratch zones. */
2200 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) {
2201 zerror(zlogp, B_TRUE,
2202 "failed to initialize semaphore for scratch zone");
2203 goto child_out;
2204 }
2205
2206 /* open the dladm handle */
2207 if (dladm_open(&dld_handle) != DLADM_STATUS_OK) {
2208 zerror(zlogp, B_FALSE, "failed to open dladm handle");
2209 goto child_out;
2210 }
2211
2212 /*
2213 * Note: door setup must occur *after* the console is setup.
2214 * This is so that as zlogin tests the door to see if zoneadmd
2215 * is ready yet, we know that the console will get serviced
2216 * once door_info() indicates that the door is "up".
2217 */
2218 if (setup_door(zlogp) == -1)
2219 goto child_out;
2220
2221 /*
2222 * Things seem OK so far; tell the parent process that we're done
2223 * with setup tasks. This will cause the parent to exit, signalling
2224 * to zoneadm, zlogin, or whatever forked it that we are ready to
2225 * service requests.
2226 */
2227 shstate->status = 0;
2228 (void) sema_post(&shstate->sem);
2229 (void) munmap((char *)shstate, shstatelen);
2230 shstate = NULL;
2231
2232 (void) mutex_unlock(&lock);
2233
2234 /*
2235 * zlogp is now invalid, so reset it to the syslog logger.
2236 */
2237 zlogp = &logsys;
2238
2239 /*
2240 * Now that we are free of any parents, switch to the default locale.
2241 */
2242 (void) setlocale(LC_ALL, DEFAULT_LOCALE);
2243
2244 /*
2245 * At this point the setup portion of main() is basically done, so
2246 * we reuse this thread to manage the zone console. When
2247 * serve_console() has returned, we are past the point of no return
2248 * in the life of this zoneadmd.
2249 */
2250 if (zonecfg_in_alt_root()) {
2251 /*
2252 * This is just awful, but mounted scratch zones don't (and
2253 * can't) have consoles. We just wait for unmount instead.
2254 */
2255 while (sema_wait(&scratch_sem) == EINTR)
2256 ;
2257 } else {
2258 serve_console(zlogp);
2259 assert(in_death_throes);
2260 }
2261
2262 /*
2263 * This is the next-to-last part of the exit interlock. Upon calling
2264 * fdetach(), the door will go unreferenced; once any
2265 * outstanding requests (like the door thread doing Z_HALT) are
2266 * done, the door will get an UNREF notification; when it handles
2267 * the UNREF, the door server will cause the exit. It's possible
2268 * that fdetach() can fail because the file is in use, in which
2269 * case we'll retry the operation.
2270 */
2271 assert(!MUTEX_HELD(&lock));
2272 for (;;) {
2273 if ((fdetach(zone_door_path) == 0) || (errno != EBUSY))
2274 break;
2275 yield();
2276 }
2277
2278 for (;;)
2279 (void) pause();
2280
2281 child_out:
2282 assert(pid == 0);
2283
2284 shstate->status = -1;
2285 (void) sema_post(&shstate->sem);
2286 (void) munmap((char *)shstate, shstatelen);
2287
2288 /*
2289 * This might trigger an unref notification, but if so,
2290 * we are still holding the lock, so our call to exit will
2291 * ultimately win the race and will publish the right exit
2292 * code.
2293 */
2294 if (zone_door != -1) {
2295 assert(MUTEX_HELD(&lock));
2296 (void) door_revoke(zone_door);
2297 (void) fdetach(zone_door_path);
2298 }
2299
2300 if (dld_handle != NULL)
2301 dladm_close(dld_handle);
2302
2303 return (1); /* return from main() forcibly exits an MT process */
2304 }