Print this page
10141 smatch fix for zoneadmd
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/cmd/zoneadmd/zoneadmd.c
+++ new/usr/src/cmd/zoneadmd/zoneadmd.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
25 25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 26 */
27 27
28 28 /*
29 29 * zoneadmd manages zones; one zoneadmd process is launched for each
30 30 * non-global zone on the system. This daemon juggles four jobs:
31 31 *
32 32 * - Implement setup and teardown of the zone "virtual platform": mount and
33 33 * unmount filesystems; create and destroy network interfaces; communicate
34 34 * with devfsadmd to lay out devices for the zone; instantiate the zone
35 35 * console device; configure process runtime attributes such as resource
36 36 * controls, pool bindings, fine-grained privileges.
37 37 *
38 38 * - Launch the zone's init(1M) process.
39 39 *
40 40 * - Implement a door server; clients (like zoneadm) connect to the door
41 41 * server and request zone state changes. The kernel is also a client of
42 42 * this door server. A request to halt or reboot the zone which originates
43 43 * *inside* the zone results in a door upcall from the kernel into zoneadmd.
44 44 *
45 45 * One minor problem is that messages emitted by zoneadmd need to be passed
46 46 * back to the zoneadm process making the request. These messages need to
47 47 * be rendered in the client's locale; so, this is passed in as part of the
48 48 * request. The exception is the kernel upcall to zoneadmd, in which case
49 49 * messages are syslog'd.
50 50 *
51 51 * To make all of this work, the Makefile adds -a to xgettext to extract *all*
52 52 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those
53 53 * strings which do not need to be translated.
54 54 *
55 55 * - Act as a console server for zlogin -C processes; see comments in zcons.c
56 56 * for more information about the zone console architecture.
57 57 *
58 58 * DESIGN NOTES
59 59 *
60 60 * Restart:
61 61 * A chief design constraint of zoneadmd is that it should be restartable in
62 62 * the case that the administrator kills it off, or it suffers a fatal error,
63 63 * without the running zone being impacted; this is akin to being able to
64 64 * reboot the service processor of a server without affecting the OS instance.
65 65 */
66 66
67 67 #include <sys/param.h>
68 68 #include <sys/mman.h>
69 69 #include <sys/types.h>
70 70 #include <sys/stat.h>
71 71 #include <sys/sysmacros.h>
72 72
73 73 #include <bsm/adt.h>
74 74 #include <bsm/adt_event.h>
75 75
76 76 #include <alloca.h>
77 77 #include <assert.h>
78 78 #include <errno.h>
79 79 #include <door.h>
80 80 #include <fcntl.h>
81 81 #include <locale.h>
82 82 #include <signal.h>
83 83 #include <stdarg.h>
84 84 #include <stdio.h>
85 85 #include <stdlib.h>
86 86 #include <string.h>
87 87 #include <strings.h>
88 88 #include <synch.h>
89 89 #include <syslog.h>
90 90 #include <thread.h>
91 91 #include <unistd.h>
92 92 #include <wait.h>
93 93 #include <limits.h>
94 94 #include <zone.h>
95 95 #include <libbrand.h>
96 96 #include <sys/brand.h>
97 97 #include <libcontract.h>
98 98 #include <libcontract_priv.h>
99 99 #include <sys/brand.h>
100 100 #include <sys/contract/process.h>
101 101 #include <sys/ctfs.h>
102 102 #include <libdladm.h>
103 103 #include <sys/dls_mgmt.h>
104 104 #include <libscf.h>
105 105
106 106 #include <libzonecfg.h>
107 107 #include <zonestat_impl.h>
108 108 #include "zoneadmd.h"
109 109
110 110 static char *progname;
111 111 char *zone_name; /* zone which we are managing */
112 112 char pool_name[MAXNAMELEN];
113 113 char default_brand[MAXNAMELEN];
114 114 char brand_name[MAXNAMELEN];
115 115 boolean_t zone_isnative;
116 116 boolean_t zone_iscluster;
117 117 boolean_t zone_islabeled;
118 118 boolean_t shutdown_in_progress;
119 119 static zoneid_t zone_id;
120 120 dladm_handle_t dld_handle = NULL;
121 121
122 122 static char pre_statechg_hook[2 * MAXPATHLEN];
123 123 static char post_statechg_hook[2 * MAXPATHLEN];
124 124 char query_hook[2 * MAXPATHLEN];
125 125
126 126 zlog_t logsys;
127 127
128 128 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */
129 129 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */
130 130
131 131 static sema_t scratch_sem; /* for scratch zones */
132 132
133 133 static char zone_door_path[MAXPATHLEN];
134 134 static int zone_door = -1;
135 135
136 136 boolean_t in_death_throes = B_FALSE; /* daemon is dying */
137 137 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */
138 138
139 139 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */
140 140 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
141 141 #endif
142 142
143 143 #define DEFAULT_LOCALE "C"
144 144
145 145 static const char *
146 146 z_cmd_name(zone_cmd_t zcmd)
147 147 {
148 148 /* This list needs to match the enum in sys/zone.h */
149 149 static const char *zcmdstr[] = {
150 150 "ready", "boot", "forceboot", "reboot", "halt",
151 151 "note_uninstalling", "mount", "forcemount", "unmount",
152 152 "shutdown"
153 153 };
154 154
155 155 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr))
156 156 return ("unknown");
157 157 else
158 158 return (zcmdstr[(int)zcmd]);
159 159 }
160 160
161 161 static char *
162 162 get_execbasename(char *execfullname)
163 163 {
164 164 char *last_slash, *execbasename;
165 165
166 166 /* guard against '/' at end of command invocation */
167 167 for (;;) {
168 168 last_slash = strrchr(execfullname, '/');
169 169 if (last_slash == NULL) {
170 170 execbasename = execfullname;
171 171 break;
172 172 } else {
173 173 execbasename = last_slash + 1;
174 174 if (*execbasename == '\0') {
175 175 *last_slash = '\0';
176 176 continue;
177 177 }
178 178 break;
179 179 }
180 180 }
181 181 return (execbasename);
182 182 }
183 183
184 184 static void
185 185 usage(void)
186 186 {
187 187 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname);
188 188 (void) fprintf(stderr,
189 189 gettext("\tNote: %s should not be run directly.\n"), progname);
190 190 exit(2);
191 191 }
192 192
193 193 /* ARGSUSED */
194 194 static void
195 195 sigchld(int sig)
196 196 {
197 197 }
198 198
199 199 char *
200 200 localize_msg(char *locale, const char *msg)
201 201 {
202 202 char *out;
203 203
204 204 (void) mutex_lock(&msglock);
205 205 (void) setlocale(LC_MESSAGES, locale);
206 206 out = gettext(msg);
207 207 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE);
208 208 (void) mutex_unlock(&msglock);
209 209 return (out);
210 210 }
211 211
212 212 /* PRINTFLIKE3 */
213 213 void
214 214 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...)
215 215 {
216 216 va_list alist;
217 217 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */
218 218 char *bp;
219 219 int saved_errno = errno;
220 220
221 221 if (zlogp == NULL)
222 222 return;
223 223 if (zlogp == &logsys)
224 224 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ",
225 225 zone_name);
226 226 else
227 227 buf[0] = '\0';
228 228 bp = &(buf[strlen(buf)]);
229 229
230 230 /*
231 231 * In theory, the locale pointer should be set to either "C" or a
232 232 * char array, so it should never be NULL
233 233 */
234 234 assert(zlogp->locale != NULL);
235 235 /* Locale is per process, but we are multi-threaded... */
236 236 fmt = localize_msg(zlogp->locale, fmt);
237 237
238 238 va_start(alist, fmt);
239 239 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist);
240 240 va_end(alist);
241 241 bp = &(buf[strlen(buf)]);
242 242 if (use_strerror)
243 243 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s",
244 244 strerror(saved_errno));
245 245 if (zlogp == &logsys) {
246 246 (void) syslog(LOG_ERR, "%s", buf);
247 247 } else if (zlogp->logfile != NULL) {
248 248 (void) fprintf(zlogp->logfile, "%s\n", buf);
249 249 } else {
250 250 size_t buflen;
251 251 size_t copylen;
252 252
253 253 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf);
254 254 copylen = MIN(buflen, zlogp->loglen);
255 255 zlogp->log += copylen;
256 256 zlogp->loglen -= copylen;
257 257 }
258 258 }
259 259
260 260 /*
261 261 * Emit a warning for any boot arguments which are unrecognized. Since
262 262 * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we
263 263 * put the arguments into an argv style array, use getopt to process them,
264 264 * and put the resultant argument string back into outargs.
265 265 *
266 266 * During the filtering, we pull out any arguments which are truly "boot"
267 267 * arguments, leaving only those which are to be passed intact to the
268 268 * progenitor process. The one we support at the moment is -i, which
269 269 * indicates to the kernel which program should be launched as 'init'.
270 270 *
271 271 * A return of Z_INVAL indicates specifically that the arguments are
272 272 * not valid; this is a non-fatal error. Except for Z_OK, all other return
273 273 * values are treated as fatal.
274 274 */
275 275 static int
276 276 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
277 277 char *init_file, char *badarg)
278 278 {
279 279 int argc = 0, argc_save;
280 280 int i;
281 281 int err;
282 282 char *arg, *lasts, **argv = NULL, **argv_save;
283 283 char zonecfg_args[BOOTARGS_MAX];
284 284 char scratchargs[BOOTARGS_MAX], *sargs;
285 285 char c;
286 286
287 287 bzero(outargs, BOOTARGS_MAX);
288 288 bzero(badarg, BOOTARGS_MAX);
289 289
290 290 /*
291 291 * If the user didn't specify transient boot arguments, check
292 292 * to see if there were any specified in the zone configuration,
293 293 * and use them if applicable.
294 294 */
295 295 if (inargs == NULL || inargs[0] == '\0') {
296 296 zone_dochandle_t handle;
297 297 if ((handle = zonecfg_init_handle()) == NULL) {
298 298 zerror(zlogp, B_TRUE,
299 299 "getting zone configuration handle");
300 300 return (Z_BAD_HANDLE);
301 301 }
302 302 err = zonecfg_get_snapshot_handle(zone_name, handle);
303 303 if (err != Z_OK) {
304 304 zerror(zlogp, B_FALSE,
305 305 "invalid configuration snapshot");
306 306 zonecfg_fini_handle(handle);
307 307 return (Z_BAD_HANDLE);
308 308 }
309 309
310 310 bzero(zonecfg_args, sizeof (zonecfg_args));
311 311 (void) zonecfg_get_bootargs(handle, zonecfg_args,
312 312 sizeof (zonecfg_args));
313 313 inargs = zonecfg_args;
314 314 zonecfg_fini_handle(handle);
315 315 }
316 316
317 317 if (strlen(inargs) >= BOOTARGS_MAX) {
318 318 zerror(zlogp, B_FALSE, "boot argument string too long");
319 319 return (Z_INVAL);
320 320 }
321 321
322 322 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
323 323 sargs = scratchargs;
324 324 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
325 325 sargs = NULL;
326 326 argc++;
327 327 }
328 328
329 329 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) {
330 330 zerror(zlogp, B_FALSE, "memory allocation failed");
331 331 return (Z_NOMEM);
332 332 }
333 333
334 334 argv_save = argv;
335 335 argc_save = argc;
336 336
337 337 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
338 338 sargs = scratchargs;
339 339 i = 0;
340 340 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
341 341 sargs = NULL;
342 342 if ((argv[i] = strdup(arg)) == NULL) {
343 343 err = Z_NOMEM;
344 344 zerror(zlogp, B_FALSE, "memory allocation failed");
345 345 goto done;
346 346 }
347 347 i++;
348 348 }
349 349
350 350 /*
351 351 * We preserve compatibility with the Solaris system boot behavior,
352 352 * which allows:
353 353 *
354 354 * # reboot kernel/unix -s -m verbose
355 355 *
356 356 * In this example, kernel/unix tells the booter what file to
357 357 * boot. We don't want reboot in a zone to be gratuitously different,
358 358 * so we silently ignore the boot file, if necessary.
359 359 */
360 360 if (argv[0] == NULL)
361 361 goto done;
362 362
363 363 assert(argv[0][0] != ' ');
364 364 assert(argv[0][0] != '\t');
365 365
366 366 if (argv[0][0] != '-' && argv[0][0] != '\0') {
367 367 argv = &argv[1];
368 368 argc--;
369 369 }
370 370
371 371 optind = 0;
372 372 opterr = 0;
373 373 err = Z_OK;
374 374 while ((c = getopt(argc, argv, "fi:m:s")) != -1) {
375 375 switch (c) {
376 376 case 'i':
377 377 /*
378 378 * -i is handled by the runtime and is not passed
379 379 * along to userland
380 380 */
381 381 (void) strlcpy(init_file, optarg, MAXPATHLEN);
382 382 break;
383 383 case 'f':
384 384 /* This has already been processed by zoneadm */
385 385 break;
386 386 case 'm':
387 387 case 's':
388 388 /* These pass through unmolested */
389 389 (void) snprintf(outargs, BOOTARGS_MAX,
390 390 "%s -%c %s ", outargs, c, optarg ? optarg : "");
391 391 break;
392 392 case '?':
393 393 /*
394 394 * We warn about unknown arguments but pass them
395 395 * along anyway-- if someone wants to develop their
396 396 * own init replacement, they can pass it whatever
397 397 * args they want.
398 398 */
399 399 err = Z_INVAL;
400 400 (void) snprintf(outargs, BOOTARGS_MAX,
401 401 "%s -%c", outargs, optopt);
402 402 (void) snprintf(badarg, BOOTARGS_MAX,
403 403 "%s -%c", badarg, optopt);
404 404 break;
405 405 }
406 406 }
407 407
408 408 /*
409 409 * For Solaris Zones we warn about and discard non-option arguments.
410 410 * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar
411 411 * to the kernel, we concat up all the other remaining boot args.
412 412 * and warn on them as a group.
413 413 */
414 414 if (optind < argc) {
415 415 err = Z_INVAL;
416 416 while (optind < argc) {
417 417 (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s",
418 418 badarg, strlen(badarg) > 0 ? " " : "",
419 419 argv[optind]);
420 420 optind++;
421 421 }
422 422 zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot "
423 423 "arguments `%s'.", badarg);
424 424 }
425 425
426 426 done:
427 427 for (i = 0; i < argc_save; i++) {
428 428 if (argv_save[i] != NULL)
429 429 free(argv_save[i]);
430 430 }
431 431 free(argv_save);
432 432 return (err);
433 433 }
434 434
435 435
436 436 static int
437 437 mkzonedir(zlog_t *zlogp)
438 438 {
439 439 struct stat st;
440 440 /*
441 441 * We must create and lock everyone but root out of ZONES_TMPDIR
442 442 * since anyone can open any UNIX domain socket, regardless of
443 443 * its file system permissions. Sigh...
444 444 */
445 445 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
446 446 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR);
447 447 return (-1);
448 448 }
449 449 /* paranoia */
450 450 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) {
451 451 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR);
452 452 return (-1);
453 453 }
454 454 (void) chmod(ZONES_TMPDIR, S_IRWXU);
455 455 return (0);
456 456 }
457 457
458 458 /*
459 459 * Run the brand's pre-state change callback, if it exists.
460 460 */
461 461 static int
462 462 brand_prestatechg(zlog_t *zlogp, int state, int cmd)
463 463 {
464 464 char cmdbuf[2 * MAXPATHLEN];
465 465 const char *altroot;
466 466
467 467 if (pre_statechg_hook[0] == '\0')
468 468 return (0);
469 469
470 470 altroot = zonecfg_get_root();
471 471 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", pre_statechg_hook,
472 472 state, cmd, altroot) > sizeof (cmdbuf))
473 473 return (-1);
474 474
475 475 if (do_subproc(zlogp, cmdbuf, NULL) != 0)
476 476 return (-1);
477 477
478 478 return (0);
479 479 }
480 480
481 481 /*
482 482 * Run the brand's post-state change callback, if it exists.
483 483 */
484 484 static int
485 485 brand_poststatechg(zlog_t *zlogp, int state, int cmd)
486 486 {
487 487 char cmdbuf[2 * MAXPATHLEN];
488 488 const char *altroot;
489 489
490 490 if (post_statechg_hook[0] == '\0')
491 491 return (0);
492 492
493 493 altroot = zonecfg_get_root();
494 494 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook,
495 495 state, cmd, altroot) > sizeof (cmdbuf))
496 496 return (-1);
497 497
498 498 if (do_subproc(zlogp, cmdbuf, NULL) != 0)
499 499 return (-1);
500 500
501 501 return (0);
502 502 }
503 503
504 504 /*
505 505 * Notify zonestatd of the new zone. If zonestatd is not running, this
506 506 * will do nothing.
507 507 */
508 508 static void
509 509 notify_zonestatd(zoneid_t zoneid)
510 510 {
511 511 int cmd[2];
512 512 int fd;
513 513 door_arg_t params;
514 514
515 515 fd = open(ZS_DOOR_PATH, O_RDONLY);
516 516 if (fd < 0)
517 517 return;
518 518
519 519 cmd[0] = ZSD_CMD_NEW_ZONE;
520 520 cmd[1] = zoneid;
521 521 params.data_ptr = (char *)&cmd;
522 522 params.data_size = sizeof (cmd);
523 523 params.desc_ptr = NULL;
524 524 params.desc_num = 0;
525 525 params.rbuf = NULL;
526 526 params.rsize = NULL;
527 527 (void) door_call(fd, ¶ms);
528 528 (void) close(fd);
529 529 }
530 530
531 531 /*
532 532 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is
533 533 * 'true' if this is being invoked as part of the processing for the "mount"
534 534 * subcommand.
535 535 */
536 536 static int
537 537 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate)
538 538 {
539 539 int err;
540 540
541 541 if (brand_prestatechg(zlogp, zstate, Z_READY) != 0)
542 542 return (-1);
543 543
544 544 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
545 545 zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
546 546 zonecfg_strerror(err));
547 547 goto bad;
548 548 }
549 549
550 550 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) {
551 551 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
552 552 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
553 553 zonecfg_strerror(err));
554 554 goto bad;
555 555 }
556 556 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) {
557 557 bringup_failure_recovery = B_TRUE;
558 558 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE);
559 559 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
560 560 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
561 561 zonecfg_strerror(err));
562 562 goto bad;
563 563 }
564 564
565 565 if (brand_poststatechg(zlogp, zstate, Z_READY) != 0)
566 566 goto bad;
567 567
568 568 return (0);
569 569
570 570 bad:
571 571 /*
572 572 * If something goes wrong, we up the zones's state to the target
573 573 * state, READY, and then invoke the hook as if we're halting.
574 574 */
575 575 (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT);
576 576 return (-1);
577 577 }
578 578
579 579 int
580 580 init_template(void)
581 581 {
582 582 int fd;
583 583 int err = 0;
584 584
585 585 fd = open64(CTFS_ROOT "/process/template", O_RDWR);
586 586 if (fd == -1)
587 587 return (-1);
588 588
589 589 /*
590 590 * For now, zoneadmd doesn't do anything with the contract.
591 591 * Deliver no events, don't inherit, and allow it to be orphaned.
592 592 */
593 593 err |= ct_tmpl_set_critical(fd, 0);
594 594 err |= ct_tmpl_set_informative(fd, 0);
595 595 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
596 596 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
597 597 if (err || ct_tmpl_activate(fd)) {
598 598 (void) close(fd);
599 599 return (-1);
600 600 }
601 601
602 602 return (fd);
603 603 }
604 604
605 605 typedef struct fs_callback {
606 606 zlog_t *zlogp;
607 607 zoneid_t zoneid;
608 608 boolean_t mount_cmd;
609 609 } fs_callback_t;
610 610
611 611 static int
612 612 mount_early_fs(void *data, const char *spec, const char *dir,
613 613 const char *fstype, const char *opt)
614 614 {
615 615 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp;
616 616 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid;
617 617 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd;
618 618 char rootpath[MAXPATHLEN];
619 619 pid_t child;
620 620 int child_status;
621 621 int tmpl_fd;
622 622 int rv;
623 623 ctid_t ct;
624 624
625 625 /* determine the zone rootpath */
626 626 if (mount_cmd) {
627 627 char zonepath[MAXPATHLEN];
628 628 char luroot[MAXPATHLEN];
629 629
630 630 if (zone_get_zonepath(zone_name,
631 631 zonepath, sizeof (zonepath)) != Z_OK) {
632 632 zerror(zlogp, B_FALSE, "unable to determine zone path");
633 633 return (-1);
634 634 }
635 635
636 636 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
637 637 resolve_lofs(zlogp, luroot, sizeof (luroot));
638 638 (void) strlcpy(rootpath, luroot, sizeof (rootpath));
639 639 } else {
640 640 if (zone_get_rootpath(zone_name,
641 641 rootpath, sizeof (rootpath)) != Z_OK) {
642 642 zerror(zlogp, B_FALSE, "unable to determine zone root");
643 643 return (-1);
644 644 }
645 645 }
646 646
647 647 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) {
648 648 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point",
649 649 rootpath, dir);
650 650 return (-1);
651 651 } else if (rv > 0) {
652 652 /* The mount point path doesn't exist, create it now. */
653 653 if (make_one_dir(zlogp, rootpath, dir,
654 654 DEFAULT_DIR_MODE, DEFAULT_DIR_USER,
655 655 DEFAULT_DIR_GROUP) != 0) {
656 656 zerror(zlogp, B_FALSE, "failed to create mount point");
657 657 return (-1);
658 658 }
659 659
660 660 /*
661 661 * Now this might seem weird, but we need to invoke
662 662 * valid_mount_path() again. Why? Because it checks
663 663 * to make sure that the mount point path is canonical,
664 664 * which it can only do if the path exists, so now that
665 665 * we've created the path we have to verify it again.
666 666 */
667 667 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir,
668 668 fstype)) < 0) {
669 669 zerror(zlogp, B_FALSE,
670 670 "%s%s is not a valid mount point", rootpath, dir);
671 671 return (-1);
672 672 }
673 673 }
674 674
675 675 if ((tmpl_fd = init_template()) == -1) {
676 676 zerror(zlogp, B_TRUE, "failed to create contract");
677 677 return (-1);
678 678 }
679 679
680 680 if ((child = fork()) == -1) {
681 681 (void) ct_tmpl_clear(tmpl_fd);
682 682 (void) close(tmpl_fd);
683 683 zerror(zlogp, B_TRUE, "failed to fork");
684 684 return (-1);
685 685
686 686 } else if (child == 0) { /* child */
687 687 char opt_buf[MAX_MNTOPT_STR];
688 688 int optlen = 0;
689 689 int mflag = MS_DATA;
690 690
691 691 (void) ct_tmpl_clear(tmpl_fd);
692 692 /*
693 693 * Even though there are no procs running in the zone, we
694 694 * do this for paranoia's sake.
695 695 */
696 696 (void) closefrom(0);
697 697
698 698 if (zone_enter(zoneid) == -1) {
699 699 _exit(errno);
700 700 }
701 701 if (opt != NULL) {
702 702 /*
703 703 * The mount() system call is incredibly annoying.
704 704 * If options are specified, we need to copy them
705 705 * into a temporary buffer since the mount() system
706 706 * call will overwrite the options string. It will
707 707 * also fail if the new option string it wants to
708 708 * write is bigger than the one we passed in, so
709 709 * you must pass in a buffer of the maximum possible
710 710 * option string length. sigh.
711 711 */
712 712 (void) strlcpy(opt_buf, opt, sizeof (opt_buf));
713 713 opt = opt_buf;
714 714 optlen = MAX_MNTOPT_STR;
715 715 mflag = MS_OPTIONSTR;
716 716 }
717 717 if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0)
718 718 _exit(errno);
719 719 _exit(0);
720 720 }
721 721
722 722 /* parent */
723 723 if (contract_latest(&ct) == -1)
724 724 ct = -1;
725 725 (void) ct_tmpl_clear(tmpl_fd);
726 726 (void) close(tmpl_fd);
727 727 if (waitpid(child, &child_status, 0) != child) {
728 728 /* unexpected: we must have been signalled */
729 729 (void) contract_abandon_id(ct);
730 730 return (-1);
731 731 }
732 732 (void) contract_abandon_id(ct);
733 733 if (WEXITSTATUS(child_status) != 0) {
734 734 errno = WEXITSTATUS(child_status);
735 735 zerror(zlogp, B_TRUE, "mount of %s failed", dir);
736 736 return (-1);
737 737 }
738 738
739 739 return (0);
740 740 }
741 741
742 742 /*
743 743 * If retstr is not NULL, the output of the subproc is returned in the str,
744 744 * otherwise it is output using zerror(). Any memory allocated for retstr
745 745 * should be freed by the caller.
746 746 */
747 747 int
748 748 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
749 749 {
750 750 char buf[1024]; /* arbitrary large amount */
751 751 char *inbuf;
752 752 FILE *file;
753 753 int status;
754 754 int rd_cnt;
755 755
756 756 if (retstr != NULL) {
757 757 if ((*retstr = malloc(1024)) == NULL) {
758 758 zerror(zlogp, B_FALSE, "out of memory");
759 759 return (-1);
760 760 }
761 761 inbuf = *retstr;
762 762 rd_cnt = 0;
763 763 } else {
764 764 inbuf = buf;
765 765 }
766 766
767 767 file = popen(cmdbuf, "r");
768 768 if (file == NULL) {
769 769 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf);
770 770 return (-1);
771 771 }
772 772
773 773 while (fgets(inbuf, 1024, file) != NULL) {
774 774 if (retstr == NULL) {
775 775 if (zlogp != &logsys)
776 776 zerror(zlogp, B_FALSE, "%s", inbuf);
777 777 } else {
778 778 char *p;
779 779
780 780 rd_cnt += 1024 - 1;
781 781 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) {
782 782 zerror(zlogp, B_FALSE, "out of memory");
783 783 (void) pclose(file);
784 784 return (-1);
785 785 }
786 786
787 787 *retstr = p;
788 788 inbuf = *retstr + rd_cnt;
789 789 }
790 790 }
791 791 status = pclose(file);
792 792
793 793 if (WIFSIGNALED(status)) {
794 794 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
795 795 "signal %d", cmdbuf, WTERMSIG(status));
796 796 return (-1);
797 797 }
798 798 assert(WIFEXITED(status));
799 799 if (WEXITSTATUS(status) == ZEXIT_EXEC) {
800 800 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf);
801 801 return (-1);
802 802 }
803 803 return (WEXITSTATUS(status));
804 804 }
805 805
806 806 static int
807 807 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
808 808 {
809 809 zoneid_t zoneid;
810 810 struct stat st;
811 811 char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
812 812 char nbootargs[BOOTARGS_MAX];
813 813 char cmdbuf[MAXPATHLEN];
814 814 fs_callback_t cb;
815 815 brand_handle_t bh;
816 816 zone_iptype_t iptype;
817 817 boolean_t links_loaded = B_FALSE;
818 818 dladm_status_t status;
819 819 char errmsg[DLADM_STRSIZE];
820 820 int err;
821 821 boolean_t restart_init;
822 822
823 823 if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0)
824 824 return (-1);
825 825
826 826 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
827 827 zerror(zlogp, B_TRUE, "unable to get zoneid");
828 828 goto bad;
829 829 }
830 830
831 831 cb.zlogp = zlogp;
832 832 cb.zoneid = zoneid;
833 833 cb.mount_cmd = B_FALSE;
834 834
835 835 /* Get a handle to the brand info for this zone */
836 836 if ((bh = brand_open(brand_name)) == NULL) {
837 837 zerror(zlogp, B_FALSE, "unable to determine zone brand");
838 838 goto bad;
839 839 }
840 840
841 841 /*
842 842 * Get the list of filesystems to mount from the brand
843 843 * configuration. These mounts are done via a thread that will
844 844 * enter the zone, so they are done from within the context of the
845 845 * zone.
846 846 */
847 847 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) {
848 848 zerror(zlogp, B_FALSE, "unable to mount filesystems");
849 849 brand_close(bh);
850 850 goto bad;
851 851 }
852 852
853 853 /*
854 854 * Get the brand's boot callback if it exists.
855 855 */
856 856 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
857 857 zerror(zlogp, B_FALSE, "unable to determine zone path");
858 858 brand_close(bh);
859 859 goto bad;
860 860 }
861 861 (void) strcpy(cmdbuf, EXEC_PREFIX);
862 862 if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
863 863 sizeof (cmdbuf) - EXEC_LEN) != 0) {
864 864 zerror(zlogp, B_FALSE,
865 865 "unable to determine branded zone's boot callback");
866 866 brand_close(bh);
867 867 goto bad;
868 868 }
869 869
870 870 /* Get the path for this zone's init(1M) (or equivalent) process. */
871 871 if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) {
872 872 zerror(zlogp, B_FALSE,
873 873 "unable to determine zone's init(1M) location");
874 874 brand_close(bh);
875 875 goto bad;
876 876 }
877 877
878 878 /* See if this zone's brand should restart init if it dies. */
879 879 restart_init = brand_restartinit(bh);
880 880
881 881 brand_close(bh);
882 882
883 883 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file,
884 884 bad_boot_arg);
885 885 if (err == Z_INVAL)
886 886 eventstream_write(Z_EVT_ZONE_BADARGS);
887 887 else if (err != Z_OK)
888 888 goto bad;
889 889
890 890 assert(init_file[0] != '\0');
891 891
892 892 /* Try to anticipate possible problems: Make sure init is executable. */
893 893 if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
894 894 zerror(zlogp, B_FALSE, "unable to determine zone root");
895 895 goto bad;
896 896 }
897 897
898 898 (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file);
899 899
900 900 if (stat(initpath, &st) == -1) {
901 901 zerror(zlogp, B_TRUE, "could not stat %s", initpath);
902 902 goto bad;
903 903 }
904 904
905 905 if ((st.st_mode & S_IXUSR) == 0) {
906 906 zerror(zlogp, B_FALSE, "%s is not executable", initpath);
907 907 goto bad;
908 908 }
909 909
910 910 /*
911 911 * Exclusive stack zones interact with the dlmgmtd running in the
912 912 * global zone. dladm_zone_boot() tells dlmgmtd that this zone is
913 913 * booting, and loads its datalinks from the zone's datalink
914 914 * configuration file.
915 915 */
916 916 if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) {
917 917 status = dladm_zone_boot(dld_handle, zoneid);
918 918 if (status != DLADM_STATUS_OK) {
919 919 zerror(zlogp, B_FALSE, "unable to load zone datalinks: "
920 920 " %s", dladm_status2str(status, errmsg));
921 921 goto bad;
922 922 }
923 923 links_loaded = B_TRUE;
924 924 }
925 925
926 926 /*
927 927 * If there is a brand 'boot' callback, execute it now to give the
928 928 * brand one last chance to do any additional setup before the zone
929 929 * is booted.
930 930 */
931 931 if ((strlen(cmdbuf) > EXEC_LEN) &&
932 932 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
933 933 zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
934 934 goto bad;
935 935 }
936 936
937 937 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) {
938 938 zerror(zlogp, B_TRUE, "could not set zone boot file");
939 939 goto bad;
940 940 }
941 941
942 942 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) {
943 943 zerror(zlogp, B_TRUE, "could not set zone boot arguments");
944 944 goto bad;
945 945 }
946 946
947 947 if (!restart_init && zone_setattr(zoneid, ZONE_ATTR_INITNORESTART,
948 948 NULL, 0) == -1) {
949 949 zerror(zlogp, B_TRUE, "could not set zone init-no-restart");
950 950 goto bad;
951 951 }
952 952
953 953 /*
954 954 * Inform zonestatd of a new zone so that it can install a door for
955 955 * the zone to contact it.
956 956 */
957 957 notify_zonestatd(zone_id);
958 958
959 959 if (zone_boot(zoneid) == -1) {
960 960 zerror(zlogp, B_TRUE, "unable to boot zone");
961 961 goto bad;
962 962 }
963 963
964 964 if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0)
965 965 goto bad;
966 966
967 967 return (0);
968 968
969 969 bad:
970 970 /*
971 971 * If something goes wrong, we up the zones's state to the target
972 972 * state, RUNNING, and then invoke the hook as if we're halting.
973 973 */
974 974 (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT);
975 975 if (links_loaded)
976 976 (void) dladm_zone_halt(dld_handle, zoneid);
977 977 return (-1);
978 978 }
979 979
980 980 static int
981 981 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate)
982 982 {
983 983 int err;
984 984
985 985 if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0)
986 986 return (-1);
987 987
988 988 if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) {
989 989 if (!bringup_failure_recovery)
990 990 zerror(zlogp, B_FALSE, "unable to destroy zone");
991 991 return (-1);
992 992 }
993 993
994 994 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
995 995 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
996 996 zonecfg_strerror(err));
997 997
998 998 if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0)
999 999 return (-1);
1000 1000
1001 1001 return (0);
1002 1002 }
1003 1003
1004 1004 static int
1005 1005 zone_graceful_shutdown(zlog_t *zlogp)
1006 1006 {
1007 1007 zoneid_t zoneid;
1008 1008 pid_t child;
1009 1009 char cmdbuf[MAXPATHLEN];
1010 1010 brand_handle_t bh = NULL;
1011 1011 char zpath[MAXPATHLEN];
1012 1012 ctid_t ct;
1013 1013 int tmpl_fd;
1014 1014 int child_status;
1015 1015
1016 1016 if (shutdown_in_progress) {
1017 1017 zerror(zlogp, B_FALSE, "shutdown already in progress");
1018 1018 return (-1);
1019 1019 }
1020 1020
1021 1021 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
1022 1022 zerror(zlogp, B_TRUE, "unable to get zoneid");
1023 1023 return (-1);
1024 1024 }
1025 1025
1026 1026 /* Get a handle to the brand info for this zone */
1027 1027 if ((bh = brand_open(brand_name)) == NULL) {
1028 1028 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1029 1029 return (-1);
1030 1030 }
1031 1031
1032 1032 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
1033 1033 zerror(zlogp, B_FALSE, "unable to determine zone path");
1034 1034 brand_close(bh);
1035 1035 return (-1);
1036 1036 }
1037 1037
1038 1038 /*
1039 1039 * If there is a brand 'shutdown' callback, execute it now to give the
1040 1040 * brand a chance to cleanup any custom configuration.
1041 1041 */
1042 1042 (void) strcpy(cmdbuf, EXEC_PREFIX);
1043 1043 if (brand_get_shutdown(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
1044 1044 sizeof (cmdbuf) - EXEC_LEN) != 0 || strlen(cmdbuf) <= EXEC_LEN) {
1045 1045 (void) strcat(cmdbuf, SHUTDOWN_DEFAULT);
1046 1046 }
1047 1047 brand_close(bh);
1048 1048
1049 1049 if ((tmpl_fd = init_template()) == -1) {
1050 1050 zerror(zlogp, B_TRUE, "failed to create contract");
1051 1051 return (-1);
1052 1052 }
1053 1053
1054 1054 if ((child = fork()) == -1) {
1055 1055 (void) ct_tmpl_clear(tmpl_fd);
1056 1056 (void) close(tmpl_fd);
1057 1057 zerror(zlogp, B_TRUE, "failed to fork");
1058 1058 return (-1);
1059 1059 } else if (child == 0) {
1060 1060 (void) ct_tmpl_clear(tmpl_fd);
1061 1061 if (zone_enter(zoneid) == -1) {
1062 1062 _exit(errno);
1063 1063 }
1064 1064 _exit(execl("/bin/sh", "sh", "-c", cmdbuf, (char *)NULL));
1065 1065 }
1066 1066
1067 1067 if (contract_latest(&ct) == -1)
1068 1068 ct = -1;
1069 1069 (void) ct_tmpl_clear(tmpl_fd);
1070 1070 (void) close(tmpl_fd);
1071 1071
1072 1072 if (waitpid(child, &child_status, 0) != child) {
1073 1073 /* unexpected: we must have been signalled */
1074 1074 (void) contract_abandon_id(ct);
1075 1075 return (-1);
1076 1076 }
1077 1077
1078 1078 (void) contract_abandon_id(ct);
1079 1079 if (WEXITSTATUS(child_status) != 0) {
1080 1080 errno = WEXITSTATUS(child_status);
1081 1081 zerror(zlogp, B_FALSE, "unable to shutdown zone");
1082 1082 return (-1);
1083 1083 }
1084 1084
1085 1085 shutdown_in_progress = B_TRUE;
1086 1086
1087 1087 return (0);
1088 1088 }
1089 1089
1090 1090 static int
1091 1091 zone_wait_shutdown(zlog_t *zlogp)
1092 1092 {
1093 1093 zone_state_t zstate;
1094 1094 uint64_t *tm = NULL;
1095 1095 scf_simple_prop_t *prop = NULL;
1096 1096 int timeout;
1097 1097 int tries;
1098 1098 int rc = -1;
1099 1099
1100 1100 /* Get default stop timeout from SMF framework */
1101 1101 timeout = SHUTDOWN_WAIT;
1102 1102 if ((prop = scf_simple_prop_get(NULL, SHUTDOWN_FMRI, "stop",
1103 1103 SCF_PROPERTY_TIMEOUT)) != NULL) {
1104 1104 if ((tm = scf_simple_prop_next_count(prop)) != NULL) {
1105 1105 if (tm != 0)
1106 1106 timeout = *tm;
1107 1107 }
1108 1108 scf_simple_prop_free(prop);
1109 1109 }
1110 1110
1111 1111 /* allow time for zone to shutdown cleanly */
1112 1112 for (tries = 0; tries < timeout; tries ++) {
1113 1113 (void) sleep(1);
1114 1114 if (zone_get_state(zone_name, &zstate) == Z_OK &&
1115 1115 zstate == ZONE_STATE_INSTALLED) {
1116 1116 rc = 0;
1117 1117 break;
1118 1118 }
1119 1119 }
1120 1120
1121 1121 if (rc != 0)
1122 1122 zerror(zlogp, B_FALSE, "unable to shutdown zone");
1123 1123
1124 1124 shutdown_in_progress = B_FALSE;
1125 1125
1126 1126 return (rc);
1127 1127 }
1128 1128
1129 1129
1130 1130
1131 1131 /*
1132 1132 * Generate AUE_zone_state for a command that boots a zone.
1133 1133 */
1134 1134 static void
1135 1135 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val,
1136 1136 char *new_state)
1137 1137 {
1138 1138 adt_session_data_t *ah;
1139 1139 adt_event_data_t *event;
1140 1140 int pass_fail, fail_reason;
1141 1141
1142 1142 if (!adt_audit_enabled())
1143 1143 return;
1144 1144
1145 1145 if (return_val == 0) {
1146 1146 pass_fail = ADT_SUCCESS;
1147 1147 fail_reason = ADT_SUCCESS;
1148 1148 } else {
1149 1149 pass_fail = ADT_FAILURE;
1150 1150 fail_reason = ADT_FAIL_VALUE_PROGRAM;
1151 1151 }
1152 1152
1153 1153 if (adt_start_session(&ah, NULL, 0)) {
1154 1154 zerror(zlogp, B_TRUE, gettext("audit failure."));
1155 1155 return;
1156 1156 }
1157 1157 if (adt_set_from_ucred(ah, uc, ADT_NEW)) {
1158 1158 zerror(zlogp, B_TRUE, gettext("audit failure."));
1159 1159 (void) adt_end_session(ah);
1160 1160 return;
1161 1161 }
1162 1162
1163 1163 event = adt_alloc_event(ah, ADT_zone_state);
1164 1164 if (event == NULL) {
1165 1165 zerror(zlogp, B_TRUE, gettext("audit failure."));
1166 1166 (void) adt_end_session(ah);
1167 1167 return;
1168 1168 }
1169 1169 event->adt_zone_state.zonename = zone_name;
1170 1170 event->adt_zone_state.new_state = new_state;
1171 1171
1172 1172 if (adt_put_event(event, pass_fail, fail_reason))
1173 1173 zerror(zlogp, B_TRUE, gettext("audit failure."));
1174 1174
1175 1175 adt_free_event(event);
1176 1176
1177 1177 (void) adt_end_session(ah);
1178 1178 }
1179 1179
1180 1180 /*
1181 1181 * The main routine for the door server that deals with zone state transitions.
1182 1182 */
1183 1183 /* ARGSUSED */
1184 1184 static void
1185 1185 server(void *cookie, char *args, size_t alen, door_desc_t *dp,
1186 1186 uint_t n_desc)
1187 1187 {
1188 1188 ucred_t *uc = NULL;
1189 1189 const priv_set_t *eset;
1190 1190
1191 1191 zone_state_t zstate;
1192 1192 zone_cmd_t cmd;
1193 1193 zone_cmd_arg_t *zargp;
1194 1194
1195 1195 boolean_t kernelcall;
1196 1196
1197 1197 int rval = -1;
1198 1198 uint64_t uniqid;
1199 1199 zoneid_t zoneid = -1;
1200 1200 zlog_t zlog;
1201 1201 zlog_t *zlogp;
1202 1202 zone_cmd_rval_t *rvalp;
1203 1203 size_t rlen = getpagesize(); /* conservative */
1204 1204 fs_callback_t cb;
1205 1205 brand_handle_t bh;
1206 1206 boolean_t wait_shut = B_FALSE;
1207 1207
1208 1208 /* LINTED E_BAD_PTR_CAST_ALIGN */
1209 1209 zargp = (zone_cmd_arg_t *)args;
1210 1210
1211 1211 /*
1212 1212 * When we get the door unref message, we've fdetach'd the door, and
1213 1213 * it is time for us to shut down zoneadmd.
1214 1214 */
1215 1215 if (zargp == DOOR_UNREF_DATA) {
1216 1216 /*
1217 1217 * See comment at end of main() for info on the last rites.
1218 1218 */
1219 1219 exit(0);
1220 1220 }
1221 1221
1222 1222 if (zargp == NULL) {
1223 1223 (void) door_return(NULL, 0, 0, 0);
1224 1224 }
1225 1225
1226 1226 rvalp = alloca(rlen);
1227 1227 bzero(rvalp, rlen);
1228 1228 zlog.logfile = NULL;
1229 1229 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1;
1230 1230 zlog.buf = rvalp->errbuf;
1231 1231 zlog.log = zlog.buf;
1232 1232 /* defer initialization of zlog.locale until after credential check */
1233 1233 zlogp = &zlog;
1234 1234
1235 1235 if (alen != sizeof (zone_cmd_arg_t)) {
1236 1236 /*
1237 1237 * This really shouldn't be happening.
1238 1238 */
1239 1239 zerror(&logsys, B_FALSE, "argument size (%d bytes) "
1240 1240 "unexpected (expected %d bytes)", alen,
1241 1241 sizeof (zone_cmd_arg_t));
1242 1242 goto out;
1243 1243 }
1244 1244 cmd = zargp->cmd;
1245 1245
1246 1246 if (door_ucred(&uc) != 0) {
1247 1247 zerror(&logsys, B_TRUE, "door_ucred");
1248 1248 goto out;
1249 1249 }
1250 1250 eset = ucred_getprivset(uc, PRIV_EFFECTIVE);
1251 1251 if (ucred_getzoneid(uc) != GLOBAL_ZONEID ||
1252 1252 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) :
1253 1253 ucred_geteuid(uc) != 0)) {
1254 1254 zerror(&logsys, B_FALSE, "insufficient privileges");
1255 1255 goto out;
1256 1256 }
1257 1257
1258 1258 kernelcall = ucred_getpid(uc) == 0;
1259 1259
1260 1260 /*
1261 1261 * This is safe because we only use a zlog_t throughout the
1262 1262 * duration of a door call; i.e., by the time the pointer
1263 1263 * might become invalid, the door call would be over.
1264 1264 */
1265 1265 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale;
1266 1266
1267 1267 (void) mutex_lock(&lock);
1268 1268
1269 1269 /*
1270 1270 * Once we start to really die off, we don't want more connections.
1271 1271 */
1272 1272 if (in_death_throes) {
1273 1273 (void) mutex_unlock(&lock);
1274 1274 ucred_free(uc);
1275 1275 (void) door_return(NULL, 0, 0, 0);
1276 1276 thr_exit(NULL);
1277 1277 }
1278 1278
1279 1279 /*
1280 1280 * Check for validity of command.
1281 1281 */
1282 1282 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT &&
1283 1283 cmd != Z_REBOOT && cmd != Z_SHUTDOWN && cmd != Z_HALT &&
1284 1284 cmd != Z_NOTE_UNINSTALLING && cmd != Z_MOUNT &&
1285 1285 cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) {
1286 1286 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd);
1287 1287 goto out;
1288 1288 }
1289 1289
1290 1290 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) {
1291 1291 /*
1292 1292 * Can't happen
1293 1293 */
1294 1294 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d",
1295 1295 cmd);
1296 1296 goto out;
1297 1297 }
1298 1298 /*
1299 1299 * We ignore the possibility of someone calling zone_create(2)
1300 1300 * explicitly; all requests must come through zoneadmd.
1301 1301 */
1302 1302 if (zone_get_state(zone_name, &zstate) != Z_OK) {
1303 1303 /*
1304 1304 * Something terribly wrong happened
1305 1305 */
1306 1306 zerror(&logsys, B_FALSE, "unable to determine state of zone");
1307 1307 goto out;
1308 1308 }
1309 1309
1310 1310 if (kernelcall) {
1311 1311 /*
1312 1312 * Kernel-initiated requests may lose their validity if the
1313 1313 * zone_t the kernel was referring to has gone away.
1314 1314 */
1315 1315 if ((zoneid = getzoneidbyname(zone_name)) == -1 ||
1316 1316 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid,
1317 1317 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) {
1318 1318 /*
1319 1319 * We're not talking about the same zone. The request
1320 1320 * must have arrived too late. Return error.
1321 1321 */
1322 1322 rval = -1;
1323 1323 goto out;
1324 1324 }
1325 1325 zlogp = &logsys; /* Log errors to syslog */
1326 1326 }
1327 1327
1328 1328 /*
1329 1329 * If we are being asked to forcibly mount or boot a zone, we
1330 1330 * pretend that an INCOMPLETE zone is actually INSTALLED.
1331 1331 */
1332 1332 if (zstate == ZONE_STATE_INCOMPLETE &&
1333 1333 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT))
1334 1334 zstate = ZONE_STATE_INSTALLED;
1335 1335
1336 1336 switch (zstate) {
1337 1337 case ZONE_STATE_CONFIGURED:
1338 1338 case ZONE_STATE_INCOMPLETE:
1339 1339 /*
1340 1340 * Not our area of expertise; we just print a nice message
1341 1341 * and die off.
1342 1342 */
1343 1343 zerror(zlogp, B_FALSE,
1344 1344 "%s operation is invalid for zones in state '%s'",
1345 1345 z_cmd_name(cmd), zone_state_str(zstate));
1346 1346 break;
1347 1347
1348 1348 case ZONE_STATE_INSTALLED:
1349 1349 switch (cmd) {
1350 1350 case Z_READY:
1351 1351 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate);
1352 1352 if (rval == 0)
1353 1353 eventstream_write(Z_EVT_ZONE_READIED);
1354 1354 break;
1355 1355 case Z_BOOT:
1356 1356 case Z_FORCEBOOT:
1357 1357 eventstream_write(Z_EVT_ZONE_BOOTING);
1358 1358 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1359 1359 == 0) {
1360 1360 rval = zone_bootup(zlogp, zargp->bootbuf,
1361 1361 zstate);
1362 1362 }
1363 1363 audit_put_record(zlogp, uc, rval, "boot");
1364 1364 if (rval != 0) {
1365 1365 bringup_failure_recovery = B_TRUE;
1366 1366 (void) zone_halt(zlogp, B_FALSE, B_FALSE,
1367 1367 zstate);
1368 1368 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1369 1369 }
1370 1370 break;
1371 1371 case Z_SHUTDOWN:
1372 1372 case Z_HALT:
1373 1373 if (kernelcall) /* Invalid; can't happen */
1374 1374 abort();
1375 1375 /*
1376 1376 * We could have two clients racing to halt this
1377 1377 * zone; the second client loses, but its request
1378 1378 * doesn't fail, since the zone is now in the desired
1379 1379 * state.
1380 1380 */
1381 1381 zerror(zlogp, B_FALSE, "zone is already halted");
1382 1382 rval = 0;
1383 1383 break;
1384 1384 case Z_REBOOT:
1385 1385 if (kernelcall) /* Invalid; can't happen */
1386 1386 abort();
1387 1387 zerror(zlogp, B_FALSE, "%s operation is invalid "
1388 1388 "for zones in state '%s'", z_cmd_name(cmd),
1389 1389 zone_state_str(zstate));
1390 1390 rval = -1;
1391 1391 break;
1392 1392 case Z_NOTE_UNINSTALLING:
1393 1393 if (kernelcall) /* Invalid; can't happen */
1394 1394 abort();
1395 1395 /*
1396 1396 * Tell the console to print out a message about this.
1397 1397 * Once it does, we will be in_death_throes.
1398 1398 */
1399 1399 eventstream_write(Z_EVT_ZONE_UNINSTALLING);
1400 1400 break;
1401 1401 case Z_MOUNT:
1402 1402 case Z_FORCEMOUNT:
1403 1403 if (kernelcall) /* Invalid; can't happen */
1404 1404 abort();
1405 1405 if (!zone_isnative && !zone_iscluster &&
1406 1406 !zone_islabeled) {
1407 1407 /*
1408 1408 * -U mounts the zone without lofs mounting
1409 1409 * zone file systems back into the scratch
1410 1410 * zone. This is required when mounting
1411 1411 * non-native branded zones.
1412 1412 */
1413 1413 (void) strlcpy(zargp->bootbuf, "-U",
1414 1414 BOOTARGS_MAX);
1415 1415 }
1416 1416
1417 1417 rval = zone_ready(zlogp,
1418 1418 strcmp(zargp->bootbuf, "-U") == 0 ?
1419 1419 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate);
1420 1420 if (rval != 0)
1421 1421 break;
1422 1422
1423 1423 eventstream_write(Z_EVT_ZONE_READIED);
1424 1424
1425 1425 /*
1426 1426 * Get a handle to the default brand info.
1427 1427 * We must always use the default brand file system
1428 1428 * list when mounting the zone.
1429 1429 */
1430 1430 if ((bh = brand_open(default_brand)) == NULL) {
1431 1431 rval = -1;
1432 1432 break;
1433 1433 }
1434 1434
1435 1435 /*
1436 1436 * Get the list of filesystems to mount from
1437 1437 * the brand configuration. These mounts are done
1438 1438 * via a thread that will enter the zone, so they
1439 1439 * are done from within the context of the zone.
1440 1440 */
1441 1441 cb.zlogp = zlogp;
1442 1442 cb.zoneid = zone_id;
1443 1443 cb.mount_cmd = B_TRUE;
1444 1444 rval = brand_platform_iter_mounts(bh,
1445 1445 mount_early_fs, &cb);
1446 1446
1447 1447 brand_close(bh);
1448 1448
1449 1449 /*
1450 1450 * Ordinarily, /dev/fd would be mounted inside the zone
1451 1451 * by svc:/system/filesystem/usr:default, but since
1452 1452 * we're not booting the zone, we need to do this
1453 1453 * manually.
1454 1454 */
1455 1455 if (rval == 0)
1456 1456 rval = mount_early_fs(&cb,
1457 1457 "fd", "/dev/fd", "fd", NULL);
1458 1458 break;
1459 1459 case Z_UNMOUNT:
1460 1460 if (kernelcall) /* Invalid; can't happen */
1461 1461 abort();
1462 1462 zerror(zlogp, B_FALSE, "zone is already unmounted");
1463 1463 rval = 0;
1464 1464 break;
1465 1465 }
1466 1466 break;
1467 1467
1468 1468 case ZONE_STATE_READY:
1469 1469 switch (cmd) {
1470 1470 case Z_READY:
1471 1471 /*
1472 1472 * We could have two clients racing to ready this
1473 1473 * zone; the second client loses, but its request
1474 1474 * doesn't fail, since the zone is now in the desired
1475 1475 * state.
1476 1476 */
1477 1477 zerror(zlogp, B_FALSE, "zone is already ready");
1478 1478 rval = 0;
1479 1479 break;
1480 1480 case Z_BOOT:
1481 1481 (void) strlcpy(boot_args, zargp->bootbuf,
1482 1482 sizeof (boot_args));
1483 1483 eventstream_write(Z_EVT_ZONE_BOOTING);
1484 1484 rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1485 1485 audit_put_record(zlogp, uc, rval, "boot");
1486 1486 if (rval != 0) {
1487 1487 bringup_failure_recovery = B_TRUE;
1488 1488 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1489 1489 zstate);
1490 1490 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1491 1491 }
1492 1492 boot_args[0] = '\0';
1493 1493 break;
1494 1494 case Z_HALT:
1495 1495 if (kernelcall) /* Invalid; can't happen */
1496 1496 abort();
1497 1497 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1498 1498 != 0)
1499 1499 break;
1500 1500 eventstream_write(Z_EVT_ZONE_HALTED);
1501 1501 break;
1502 1502 case Z_SHUTDOWN:
1503 1503 case Z_REBOOT:
1504 1504 case Z_NOTE_UNINSTALLING:
1505 1505 case Z_MOUNT:
1506 1506 case Z_UNMOUNT:
1507 1507 if (kernelcall) /* Invalid; can't happen */
1508 1508 abort();
1509 1509 zerror(zlogp, B_FALSE, "%s operation is invalid "
1510 1510 "for zones in state '%s'", z_cmd_name(cmd),
1511 1511 zone_state_str(zstate));
1512 1512 rval = -1;
1513 1513 break;
1514 1514 }
1515 1515 break;
1516 1516
1517 1517 case ZONE_STATE_MOUNTED:
1518 1518 switch (cmd) {
1519 1519 case Z_UNMOUNT:
1520 1520 if (kernelcall) /* Invalid; can't happen */
1521 1521 abort();
1522 1522 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate);
1523 1523 if (rval == 0) {
1524 1524 eventstream_write(Z_EVT_ZONE_HALTED);
1525 1525 (void) sema_post(&scratch_sem);
1526 1526 }
1527 1527 break;
1528 1528 default:
1529 1529 if (kernelcall) /* Invalid; can't happen */
1530 1530 abort();
1531 1531 zerror(zlogp, B_FALSE, "%s operation is invalid "
1532 1532 "for zones in state '%s'", z_cmd_name(cmd),
1533 1533 zone_state_str(zstate));
1534 1534 rval = -1;
1535 1535 break;
1536 1536 }
1537 1537 break;
1538 1538
1539 1539 case ZONE_STATE_RUNNING:
1540 1540 case ZONE_STATE_SHUTTING_DOWN:
1541 1541 case ZONE_STATE_DOWN:
1542 1542 switch (cmd) {
1543 1543 case Z_READY:
1544 1544 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1545 1545 != 0)
1546 1546 break;
1547 1547 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0)
1548 1548 eventstream_write(Z_EVT_ZONE_READIED);
1549 1549 else
1550 1550 eventstream_write(Z_EVT_ZONE_HALTED);
1551 1551 break;
1552 1552 case Z_BOOT:
1553 1553 /*
1554 1554 * We could have two clients racing to boot this
1555 1555 * zone; the second client loses, but its request
1556 1556 * doesn't fail, since the zone is now in the desired
1557 1557 * state.
1558 1558 */
1559 1559 zerror(zlogp, B_FALSE, "zone is already booted");
1560 1560 rval = 0;
1561 1561 break;
1562 1562 case Z_HALT:
1563 1563 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1564 1564 != 0)
1565 1565 break;
1566 1566 eventstream_write(Z_EVT_ZONE_HALTED);
1567 1567 break;
1568 1568 case Z_REBOOT:
1569 1569 (void) strlcpy(boot_args, zargp->bootbuf,
1570 1570 sizeof (boot_args));
1571 1571 eventstream_write(Z_EVT_ZONE_REBOOTING);
1572 1572 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1573 1573 != 0) {
1574 1574 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1575 1575 boot_args[0] = '\0';
1576 1576 break;
1577 1577 }
1578 1578 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1579 1579 != 0) {
1580 1580 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1581 1581 boot_args[0] = '\0';
1582 1582 break;
1583 1583 }
1584 1584 rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1585 1585 audit_put_record(zlogp, uc, rval, "reboot");
1586 1586 if (rval != 0) {
1587 1587 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1588 1588 zstate);
1589 1589 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1590 1590 }
1591 1591 boot_args[0] = '\0';
1592 1592 break;
1593 1593 case Z_SHUTDOWN:
1594 1594 if ((rval = zone_graceful_shutdown(zlogp)) == 0) {
1595 1595 wait_shut = B_TRUE;
1596 1596 }
1597 1597 break;
1598 1598 case Z_NOTE_UNINSTALLING:
1599 1599 case Z_MOUNT:
1600 1600 case Z_UNMOUNT:
1601 1601 zerror(zlogp, B_FALSE, "%s operation is invalid "
1602 1602 "for zones in state '%s'", z_cmd_name(cmd),
1603 1603 zone_state_str(zstate));
1604 1604 rval = -1;
1605 1605 break;
1606 1606 }
1607 1607 break;
1608 1608 default:
1609 1609 abort();
1610 1610 }
1611 1611
1612 1612 /*
1613 1613 * Because the state of the zone may have changed, we make sure
1614 1614 * to wake the console poller, which is in charge of initiating
1615 1615 * the shutdown procedure as necessary.
1616 1616 */
1617 1617 eventstream_write(Z_EVT_NULL);
1618 1618
1619 1619 out:
1620 1620 (void) mutex_unlock(&lock);
1621 1621
1622 1622 /* Wait for the Z_SHUTDOWN commands to complete */
1623 1623 if (wait_shut)
1624 1624 rval = zone_wait_shutdown(zlogp);
1625 1625
1626 1626 if (kernelcall) {
1627 1627 rvalp = NULL;
1628 1628 rlen = 0;
1629 1629 } else {
1630 1630 rvalp->rval = rval;
1631 1631 }
1632 1632 if (uc != NULL)
1633 1633 ucred_free(uc);
1634 1634 (void) door_return((char *)rvalp, rlen, NULL, 0);
1635 1635 thr_exit(NULL);
1636 1636 }
1637 1637
1638 1638 static int
1639 1639 setup_door(zlog_t *zlogp)
1640 1640 {
1641 1641 if ((zone_door = door_create(server, NULL,
1642 1642 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) {
1643 1643 zerror(zlogp, B_TRUE, "%s failed", "door_create");
1644 1644 return (-1);
1645 1645 }
1646 1646 (void) fdetach(zone_door_path);
1647 1647
1648 1648 if (fattach(zone_door, zone_door_path) != 0) {
1649 1649 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path);
1650 1650 (void) door_revoke(zone_door);
1651 1651 (void) fdetach(zone_door_path);
1652 1652 zone_door = -1;
1653 1653 return (-1);
1654 1654 }
1655 1655 return (0);
1656 1656 }
1657 1657
1658 1658 /*
1659 1659 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this
1660 1660 * is where zoneadmd itself will check to see that another instance of
1661 1661 * zoneadmd isn't already controlling this zone.
1662 1662 *
1663 1663 * The idea here is that we want to open the path to which we will
1664 1664 * attach our door, lock it, and then make sure that no-one has beat us
1665 1665 * to fattach(3c)ing onto it.
1666 1666 *
1667 1667 * fattach(3c) is really a mount, so there are actually two possible
1668 1668 * vnodes we could be dealing with. Our strategy is as follows:
1669 1669 *
1670 1670 * - If the file we opened is a regular file (common case):
1671 1671 * There is no fattach(3c)ed door, so we have a chance of becoming
1672 1672 * the managing zoneadmd. We attempt to lock the file: if it is
1673 1673 * already locked, that means someone else raced us here, so we
1674 1674 * lose and give up. zoneadm(1m) will try to contact the zoneadmd
1675 1675 * that beat us to it.
1676 1676 *
1677 1677 * - If the file we opened is a namefs file:
1678 1678 * This means there is already an established door fattach(3c)'ed
1679 1679 * to the rendezvous path. We've lost the race, so we give up.
1680 1680 * Note that in this case we also try to grab the file lock, and
1681 1681 * will succeed in acquiring it since the vnode locked by the
1682 1682 * "winning" zoneadmd was a regular one, and the one we locked was
1683 1683 * the fattach(3c)'ed door node. At any rate, no harm is done, and
1684 1684 * we just return to zoneadm(1m) which knows to retry.
1685 1685 */
1686 1686 static int
1687 1687 make_daemon_exclusive(zlog_t *zlogp)
1688 1688 {
1689 1689 int doorfd = -1;
1690 1690 int err, ret = -1;
1691 1691 struct stat st;
1692 1692 struct flock flock;
1693 1693 zone_state_t zstate;
1694 1694
1695 1695 top:
1696 1696 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
1697 1697 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
1698 1698 zonecfg_strerror(err));
1699 1699 goto out;
1700 1700 }
1701 1701 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR,
1702 1702 S_IREAD|S_IWRITE)) < 0) {
1703 1703 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path);
1704 1704 goto out;
1705 1705 }
1706 1706 if (fstat(doorfd, &st) < 0) {
1707 1707 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path);
1708 1708 goto out;
1709 1709 }
1710 1710 /*
1711 1711 * Lock the file to synchronize with other zoneadmd
1712 1712 */
1713 1713 flock.l_type = F_WRLCK;
1714 1714 flock.l_whence = SEEK_SET;
1715 1715 flock.l_start = (off_t)0;
1716 1716 flock.l_len = (off_t)0;
1717 1717 if (fcntl(doorfd, F_SETLK, &flock) < 0) {
1718 1718 /*
1719 1719 * Someone else raced us here and grabbed the lock file
1720 1720 * first. A warning here is inappropriate since nothing
1721 1721 * went wrong.
1722 1722 */
1723 1723 goto out;
1724 1724 }
1725 1725
1726 1726 if (strcmp(st.st_fstype, "namefs") == 0) {
1727 1727 struct door_info info;
1728 1728
1729 1729 /*
1730 1730 * There is already something fattach()'ed to this file.
1731 1731 * Lets see what the door is up to.
1732 1732 */
1733 1733 if (door_info(doorfd, &info) == 0 && info.di_target != -1) {
1734 1734 /*
1735 1735 * Another zoneadmd process seems to be in
1736 1736 * control of the situation and we don't need to
1737 1737 * be here. A warning here is inappropriate
1738 1738 * since nothing went wrong.
1739 1739 *
1740 1740 * If the door has been revoked, the zoneadmd
1741 1741 * process currently managing the zone is going
1742 1742 * away. We'll return control to zoneadm(1m)
1743 1743 * which will try again (by which time zoneadmd
1744 1744 * will hopefully have exited).
1745 1745 */
1746 1746 goto out;
1747 1747 }
1748 1748
1749 1749 /*
1750 1750 * If we got this far, there's a fattach(3c)'ed door
1751 1751 * that belongs to a process that has exited, which can
1752 1752 * happen if the previous zoneadmd died unexpectedly.
1753 1753 *
1754 1754 * Let user know that something is amiss, but that we can
1755 1755 * recover; if the zone is in the installed state, then don't
1756 1756 * message, since having a running zoneadmd isn't really
1757 1757 * expected/needed. We want to keep occurences of this message
1758 1758 * limited to times when zoneadmd is picking back up from a
1759 1759 * zoneadmd that died while the zone was in some non-trivial
1760 1760 * state.
1761 1761 */
1762 1762 if (zstate > ZONE_STATE_INSTALLED) {
1763 1763 zerror(zlogp, B_FALSE,
1764 1764 "zone '%s': WARNING: zone is in state '%s', but "
1765 1765 "zoneadmd does not appear to be available; "
1766 1766 "restarted zoneadmd to recover.",
1767 1767 zone_name, zone_state_str(zstate));
1768 1768 }
1769 1769
1770 1770 (void) fdetach(zone_door_path);
1771 1771 (void) close(doorfd);
1772 1772 goto top;
1773 1773 }
1774 1774 ret = 0;
1775 1775 out:
1776 1776 (void) close(doorfd);
1777 1777 return (ret);
1778 1778 }
1779 1779
1780 1780 /*
1781 1781 * Setup the brand's pre and post state change callbacks, as well as the
1782 1782 * query callback, if any of these exist.
1783 1783 */
1784 1784 static int
1785 1785 brand_callback_init(brand_handle_t bh, char *zone_name)
1786 1786 {
1787 1787 char zpath[MAXPATHLEN];
1788 1788
1789 1789 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK)
1790 1790 return (-1);
1791 1791
1792 1792 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX,
1793 1793 sizeof (pre_statechg_hook));
1794 1794
1795 1795 if (brand_get_prestatechange(bh, zone_name, zpath,
1796 1796 pre_statechg_hook + EXEC_LEN,
1797 1797 sizeof (pre_statechg_hook) - EXEC_LEN) != 0)
1798 1798 return (-1);
1799 1799
1800 1800 if (strlen(pre_statechg_hook) <= EXEC_LEN)
1801 1801 pre_statechg_hook[0] = '\0';
1802 1802
1803 1803 (void) strlcpy(post_statechg_hook, EXEC_PREFIX,
1804 1804 sizeof (post_statechg_hook));
1805 1805
1806 1806 if (brand_get_poststatechange(bh, zone_name, zpath,
1807 1807 post_statechg_hook + EXEC_LEN,
1808 1808 sizeof (post_statechg_hook) - EXEC_LEN) != 0)
1809 1809 return (-1);
1810 1810
1811 1811 if (strlen(post_statechg_hook) <= EXEC_LEN)
1812 1812 post_statechg_hook[0] = '\0';
1813 1813
1814 1814 (void) strlcpy(query_hook, EXEC_PREFIX,
1815 1815 sizeof (query_hook));
1816 1816
1817 1817 if (brand_get_query(bh, zone_name, zpath, query_hook + EXEC_LEN,
1818 1818 sizeof (query_hook) - EXEC_LEN) != 0)
1819 1819 return (-1);
1820 1820
1821 1821 if (strlen(query_hook) <= EXEC_LEN)
1822 1822 query_hook[0] = '\0';
1823 1823
1824 1824 return (0);
1825 1825 }
1826 1826
1827 1827 int
1828 1828 main(int argc, char *argv[])
1829 1829 {
1830 1830 int opt;
1831 1831 zoneid_t zid;
1832 1832 priv_set_t *privset;
1833 1833 zone_state_t zstate;
1834 1834 char parents_locale[MAXPATHLEN];
1835 1835 brand_handle_t bh;
1836 1836 int err;
1837 1837
1838 1838 pid_t pid;
1839 1839 sigset_t blockset;
1840 1840 sigset_t block_cld;
1841 1841
1842 1842 struct {
1843 1843 sema_t sem;
1844 1844 int status;
1845 1845 zlog_t log;
1846 1846 } *shstate;
1847 1847 size_t shstatelen = getpagesize();
1848 1848
1849 1849 zlog_t errlog;
1850 1850 zlog_t *zlogp;
1851 1851
1852 1852 int ctfd;
1853 1853
1854 1854 progname = get_execbasename(argv[0]);
1855 1855
1856 1856 /*
1857 1857 * Make sure stderr is unbuffered
1858 1858 */
1859 1859 (void) setbuffer(stderr, NULL, 0);
1860 1860
1861 1861 /*
1862 1862 * Get out of the way of mounted filesystems, since we will daemonize
1863 1863 * soon.
1864 1864 */
1865 1865 (void) chdir("/");
1866 1866
1867 1867 /*
1868 1868 * Use the default system umask per PSARC 1998/110 rather than
1869 1869 * anything that may have been set by the caller.
1870 1870 */
1871 1871 (void) umask(CMASK);
1872 1872
1873 1873 /*
1874 1874 * Initially we want to use our parent's locale.
1875 1875 */
1876 1876 (void) setlocale(LC_ALL, "");
1877 1877 (void) textdomain(TEXT_DOMAIN);
1878 1878 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL),
1879 1879 sizeof (parents_locale));
1880 1880
1881 1881 /*
1882 1882 * This zlog_t is used for writing to stderr
1883 1883 */
1884 1884 errlog.logfile = stderr;
1885 1885 errlog.buflen = errlog.loglen = 0;
1886 1886 errlog.buf = errlog.log = NULL;
1887 1887 errlog.locale = parents_locale;
1888 1888
1889 1889 /*
1890 1890 * We start off writing to stderr until we're ready to daemonize.
1891 1891 */
1892 1892 zlogp = &errlog;
1893 1893
1894 1894 /*
1895 1895 * Process options.
1896 1896 */
1897 1897 while ((opt = getopt(argc, argv, "R:z:")) != EOF) {
1898 1898 switch (opt) {
1899 1899 case 'R':
1900 1900 zonecfg_set_root(optarg);
1901 1901 break;
1902 1902 case 'z':
1903 1903 zone_name = optarg;
1904 1904 break;
1905 1905 default:
1906 1906 usage();
1907 1907 }
1908 1908 }
1909 1909
1910 1910 if (zone_name == NULL)
1911 1911 usage();
1912 1912
1913 1913 /*
1914 1914 * Because usage() prints directly to stderr, it has gettext()
1915 1915 * wrapping, which depends on the locale. But since zerror() calls
1916 1916 * localize() which tweaks the locale, it is not safe to call zerror()
1917 1917 * until after the last call to usage(). Fortunately, the last call
1918 1918 * to usage() is just above and the first call to zerror() is just
1919 1919 * below. Don't mess this up.
1920 1920 */
1921 1921 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) {
1922 1922 zerror(zlogp, B_FALSE, "cannot manage the %s zone",
1923 1923 GLOBAL_ZONENAME);
1924 1924 return (1);
1925 1925 }
1926 1926
1927 1927 if (zone_get_id(zone_name, &zid) != 0) {
1928 1928 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name,
1929 1929 zonecfg_strerror(Z_NO_ZONE));
1930 1930 return (1);
1931 1931 }
1932 1932
1933 1933 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
1934 1934 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
1935 1935 zonecfg_strerror(err));
1936 1936 return (1);
1937 1937 }
1938 1938 if (zstate < ZONE_STATE_INCOMPLETE) {
1939 1939 zerror(zlogp, B_FALSE,
1940 1940 "cannot manage a zone which is in state '%s'",
1941 1941 zone_state_str(zstate));
1942 1942 return (1);
1943 1943 }
1944 1944
1945 1945 if (zonecfg_default_brand(default_brand,
1946 1946 sizeof (default_brand)) != Z_OK) {
1947 1947 zerror(zlogp, B_FALSE, "unable to determine default brand");
1948 1948 return (1);
1949 1949 }
1950 1950
1951 1951 /* Get a handle to the brand info for this zone */
1952 1952 if (zone_get_brand(zone_name, brand_name, sizeof (brand_name))
1953 1953 != Z_OK) {
1954 1954 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1955 1955 return (1);
1956 1956 }
1957 1957 zone_isnative = (strcmp(brand_name, NATIVE_BRAND_NAME) == 0);
1958 1958 zone_islabeled = (strcmp(brand_name, LABELED_BRAND_NAME) == 0);
1959 1959
1960 1960 /*
1961 1961 * In the alternate root environment, the only supported
1962 1962 * operations are mount and unmount. In this case, just treat
1963 1963 * the zone as native if it is cluster. Cluster zones can be
1964 1964 * native for the purpose of LU or upgrade, and the cluster
1965 1965 * brand may not exist in the miniroot (such as in net install
1966 1966 * upgrade).
1967 1967 */
1968 1968 if (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0) {
1969 1969 zone_iscluster = B_TRUE;
1970 1970 if (zonecfg_in_alt_root()) {
1971 1971 (void) strlcpy(brand_name, default_brand,
1972 1972 sizeof (brand_name));
1973 1973 }
1974 1974 } else {
1975 1975 zone_iscluster = B_FALSE;
1976 1976 }
1977 1977
1978 1978 if ((bh = brand_open(brand_name)) == NULL) {
1979 1979 zerror(zlogp, B_FALSE, "unable to open zone brand");
1980 1980 return (1);
1981 1981 }
1982 1982
1983 1983 /* Get state change brand hooks. */
1984 1984 if (brand_callback_init(bh, zone_name) == -1) {
1985 1985 zerror(zlogp, B_TRUE,
1986 1986 "failed to initialize brand state change hooks");
1987 1987 brand_close(bh);
1988 1988 return (1);
1989 1989 }
1990 1990
1991 1991 brand_close(bh);
1992 1992
1993 1993 /*
1994 1994 * Check that we have all privileges. It would be nice to pare
1995 1995 * this down, but this is at least a first cut.
1996 1996 */
1997 1997 if ((privset = priv_allocset()) == NULL) {
1998 1998 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
1999 1999 return (1);
2000 2000 }
2001 2001
2002 2002 if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
2003 2003 zerror(zlogp, B_TRUE, "%s failed", "getppriv");
2004 2004 priv_freeset(privset);
2005 2005 return (1);
2006 2006 }
2007 2007
2008 2008 if (priv_isfullset(privset) == B_FALSE) {
2009 2009 zerror(zlogp, B_FALSE, "You lack sufficient privilege to "
2010 2010 "run this command (all privs required)");
2011 2011 priv_freeset(privset);
2012 2012 return (1);
2013 2013 }
2014 2014 priv_freeset(privset);
2015 2015
2016 2016 if (mkzonedir(zlogp) != 0)
2017 2017 return (1);
2018 2018
2019 2019 /*
2020 2020 * Pre-fork: setup shared state
2021 2021 */
2022 2022 if ((shstate = (void *)mmap(NULL, shstatelen,
2023 2023 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) ==
2024 2024 MAP_FAILED) {
2025 2025 zerror(zlogp, B_TRUE, "%s failed", "mmap");
2026 2026 return (1);
2027 2027 }
2028 2028 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) {
2029 2029 zerror(zlogp, B_TRUE, "%s failed", "sema_init()");
2030 2030 (void) munmap((char *)shstate, shstatelen);
2031 2031 return (1);
2032 2032 }
2033 2033 shstate->log.logfile = NULL;
2034 2034 shstate->log.buflen = shstatelen - sizeof (*shstate);
2035 2035 shstate->log.loglen = shstate->log.buflen;
2036 2036 shstate->log.buf = (char *)shstate + sizeof (*shstate);
2037 2037 shstate->log.log = shstate->log.buf;
2038 2038 shstate->log.locale = parents_locale;
2039 2039 shstate->status = -1;
2040 2040
2041 2041 /*
2042 2042 * We need a SIGCHLD handler so the sema_wait() below will wake
2043 2043 * up if the child dies without doing a sema_post().
2044 2044 */
2045 2045 (void) sigset(SIGCHLD, sigchld);
2046 2046 /*
2047 2047 * We must mask SIGCHLD until after we've coped with the fork
2048 2048 * sufficiently to deal with it; otherwise we can race and
2049 2049 * receive the signal before pid has been initialized
2050 2050 * (yes, this really happens).
2051 2051 */
2052 2052 (void) sigemptyset(&block_cld);
2053 2053 (void) sigaddset(&block_cld, SIGCHLD);
2054 2054 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
2055 2055
2056 2056 /*
2057 2057 * The parent only needs stderr after the fork, so close other fd's
2058 2058 * that we inherited from zoneadm so that the parent doesn't have those
2059 2059 * open while waiting. The child will close the rest after the fork.
2060 2060 */
2061 2061 closefrom(3);
2062 2062
2063 2063 if ((ctfd = init_template()) == -1) {
2064 2064 zerror(zlogp, B_TRUE, "failed to create contract");
2065 2065 return (1);
2066 2066 }
2067 2067
2068 2068 /*
2069 2069 * Do not let another thread localize a message while we are forking.
2070 2070 */
2071 2071 (void) mutex_lock(&msglock);
2072 2072 pid = fork();
2073 2073 (void) mutex_unlock(&msglock);
2074 2074
2075 2075 /*
2076 2076 * In all cases (parent, child, and in the event of an error) we
2077 2077 * don't want to cause creation of contracts on subsequent fork()s.
2078 2078 */
2079 2079 (void) ct_tmpl_clear(ctfd);
2080 2080 (void) close(ctfd);
2081 2081
2082 2082 if (pid == -1) {
2083 2083 zerror(zlogp, B_TRUE, "could not fork");
2084 2084 return (1);
2085 2085
2086 2086 } else if (pid > 0) { /* parent */
2087 2087 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2088 2088 /*
2089 2089 * This marks a window of vulnerability in which we receive
2090 2090 * the SIGCLD before falling into sema_wait (normally we would
2091 2091 * get woken up from sema_wait with EINTR upon receipt of
2092 2092 * SIGCLD). So we may need to use some other scheme like
2093 2093 * sema_posting in the sigcld handler.
2094 2094 * blech
2095 2095 */
2096 2096 (void) sema_wait(&shstate->sem);
2097 2097 (void) sema_destroy(&shstate->sem);
2098 2098 if (shstate->status != 0)
2099 2099 (void) waitpid(pid, NULL, WNOHANG);
2100 2100 /*
2101 2101 * It's ok if we die with SIGPIPE. It's not like we could have
2102 2102 * done anything about it.
2103 2103 */
2104 2104 (void) fprintf(stderr, "%s", shstate->log.buf);
2105 2105 _exit(shstate->status == 0 ? 0 : 1);
2106 2106 }
2107 2107
2108 2108 /*
2109 2109 * The child charges on.
2110 2110 */
2111 2111 (void) sigset(SIGCHLD, SIG_DFL);
2112 2112 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2113 2113
2114 2114 /*
2115 2115 * SIGPIPE can be delivered if we write to a socket for which the
2116 2116 * peer endpoint is gone. That can lead to too-early termination
2117 2117 * of zoneadmd, and that's not good eats.
2118 2118 */
2119 2119 (void) sigset(SIGPIPE, SIG_IGN);
2120 2120 /*
2121 2121 * Stop using stderr
2122 2122 */
2123 2123 zlogp = &shstate->log;
2124 2124
2125 2125 /*
2126 2126 * We don't need stdout/stderr from now on.
2127 2127 */
2128 2128 closefrom(0);
2129 2129
2130 2130 /*
2131 2131 * Initialize the syslog zlog_t. This needs to be done after
2132 2132 * the call to closefrom().
2133 2133 */
2134 2134 logsys.buf = logsys.log = NULL;
2135 2135 logsys.buflen = logsys.loglen = 0;
2136 2136 logsys.logfile = NULL;
2137 2137 logsys.locale = DEFAULT_LOCALE;
2138 2138
2139 2139 openlog("zoneadmd", LOG_PID, LOG_DAEMON);
2140 2140
2141 2141 /*
2142 2142 * The eventstream is used to publish state changes in the zone
2143 2143 * from the door threads to the console I/O poller.
2144 2144 */
2145 2145 if (eventstream_init() == -1) {
2146 2146 zerror(zlogp, B_TRUE, "unable to create eventstream");
2147 2147 goto child_out;
2148 2148 }
2149 2149
2150 2150 (void) snprintf(zone_door_path, sizeof (zone_door_path),
2151 2151 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name);
2152 2152
2153 2153 /*
2154 2154 * See if another zoneadmd is running for this zone. If not, then we
2155 2155 * can now modify system state.
2156 2156 */
2157 2157 if (make_daemon_exclusive(zlogp) == -1)
2158 2158 goto child_out;
2159 2159
2160 2160
2161 2161 /*
2162 2162 * Create/join a new session; we need to be careful of what we do with
2163 2163 * the console from now on so we don't end up being the session leader
2164 2164 * for the terminal we're going to be handing out.
2165 2165 */
2166 2166 (void) setsid();
2167 2167
2168 2168 /*
2169 2169 * This thread shouldn't be receiving any signals; in particular,
2170 2170 * SIGCHLD should be received by the thread doing the fork().
2171 2171 */
2172 2172 (void) sigfillset(&blockset);
2173 2173 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL);
2174 2174
2175 2175 /*
2176 2176 * Setup the console device and get ready to serve the console;
2177 2177 * once this has completed, we're ready to let console clients
2178 2178 * make an attempt to connect (they will block until
2179 2179 * serve_console_sock() below gets called, and any pending
2180 2180 * connection is accept()ed).
2181 2181 */
2182 2182 if (!zonecfg_in_alt_root() && init_console(zlogp) < 0)
2183 2183 goto child_out;
2184 2184
2185 2185 /*
2186 2186 * Take the lock now, so that when the door server gets going, we
2187 2187 * are guaranteed that it won't take a request until we are sure
2188 2188 * that everything is completely set up. See the child_out: label
2189 2189 * below to see why this matters.
2190 2190 */
2191 2191 (void) mutex_lock(&lock);
2192 2192
2193 2193 /* Init semaphore for scratch zones. */
2194 2194 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) {
2195 2195 zerror(zlogp, B_TRUE,
2196 2196 "failed to initialize semaphore for scratch zone");
2197 2197 goto child_out;
2198 2198 }
2199 2199
2200 2200 /* open the dladm handle */
2201 2201 if (dladm_open(&dld_handle) != DLADM_STATUS_OK) {
2202 2202 zerror(zlogp, B_FALSE, "failed to open dladm handle");
2203 2203 goto child_out;
2204 2204 }
2205 2205
2206 2206 /*
2207 2207 * Note: door setup must occur *after* the console is setup.
2208 2208 * This is so that as zlogin tests the door to see if zoneadmd
2209 2209 * is ready yet, we know that the console will get serviced
2210 2210 * once door_info() indicates that the door is "up".
2211 2211 */
2212 2212 if (setup_door(zlogp) == -1)
2213 2213 goto child_out;
2214 2214
2215 2215 /*
2216 2216 * Things seem OK so far; tell the parent process that we're done
2217 2217 * with setup tasks. This will cause the parent to exit, signalling
2218 2218 * to zoneadm, zlogin, or whatever forked it that we are ready to
2219 2219 * service requests.
2220 2220 */
2221 2221 shstate->status = 0;
2222 2222 (void) sema_post(&shstate->sem);
2223 2223 (void) munmap((char *)shstate, shstatelen);
2224 2224 shstate = NULL;
2225 2225
2226 2226 (void) mutex_unlock(&lock);
2227 2227
2228 2228 /*
2229 2229 * zlogp is now invalid, so reset it to the syslog logger.
2230 2230 */
2231 2231 zlogp = &logsys;
2232 2232
2233 2233 /*
2234 2234 * Now that we are free of any parents, switch to the default locale.
2235 2235 */
2236 2236 (void) setlocale(LC_ALL, DEFAULT_LOCALE);
2237 2237
2238 2238 /*
2239 2239 * At this point the setup portion of main() is basically done, so
2240 2240 * we reuse this thread to manage the zone console. When
2241 2241 * serve_console() has returned, we are past the point of no return
2242 2242 * in the life of this zoneadmd.
2243 2243 */
2244 2244 if (zonecfg_in_alt_root()) {
2245 2245 /*
2246 2246 * This is just awful, but mounted scratch zones don't (and
2247 2247 * can't) have consoles. We just wait for unmount instead.
2248 2248 */
2249 2249 while (sema_wait(&scratch_sem) == EINTR)
2250 2250 ;
2251 2251 } else {
2252 2252 serve_console(zlogp);
2253 2253 assert(in_death_throes);
2254 2254 }
2255 2255
2256 2256 /*
2257 2257 * This is the next-to-last part of the exit interlock. Upon calling
2258 2258 * fdetach(), the door will go unreferenced; once any
2259 2259 * outstanding requests (like the door thread doing Z_HALT) are
2260 2260 * done, the door will get an UNREF notification; when it handles
2261 2261 * the UNREF, the door server will cause the exit. It's possible
2262 2262 * that fdetach() can fail because the file is in use, in which
2263 2263 * case we'll retry the operation.
2264 2264 */
2265 2265 assert(!MUTEX_HELD(&lock));
2266 2266 for (;;) {
↓ open down ↓ |
2266 lines elided |
↑ open up ↑ |
2267 2267 if ((fdetach(zone_door_path) == 0) || (errno != EBUSY))
2268 2268 break;
2269 2269 yield();
2270 2270 }
2271 2271
2272 2272 for (;;)
2273 2273 (void) pause();
2274 2274
2275 2275 child_out:
2276 2276 assert(pid == 0);
2277 - if (shstate != NULL) {
2278 - shstate->status = -1;
2279 - (void) sema_post(&shstate->sem);
2280 - (void) munmap((char *)shstate, shstatelen);
2281 - }
2282 2277
2278 + shstate->status = -1;
2279 + (void) sema_post(&shstate->sem);
2280 + (void) munmap((char *)shstate, shstatelen);
2281 +
2283 2282 /*
2284 2283 * This might trigger an unref notification, but if so,
2285 2284 * we are still holding the lock, so our call to exit will
2286 2285 * ultimately win the race and will publish the right exit
2287 2286 * code.
2288 2287 */
2289 2288 if (zone_door != -1) {
2290 2289 assert(MUTEX_HELD(&lock));
2291 2290 (void) door_revoke(zone_door);
2292 2291 (void) fdetach(zone_door_path);
2293 2292 }
2294 2293
2295 2294 if (dld_handle != NULL)
2296 2295 dladm_close(dld_handle);
2297 2296
2298 2297 return (1); /* return from main() forcibly exits an MT process */
2299 2298 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX