Print this page
OS-192 zone_create() warning on headnode
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/cmd/zoneadmd/zoneadmd.c
+++ new/usr/src/cmd/zoneadmd/zoneadmd.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
25 25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 + * Copyright (c) 2011, Joyent Inc. All rights reserved.
26 27 */
27 28
28 29 /*
29 30 * zoneadmd manages zones; one zoneadmd process is launched for each
30 31 * non-global zone on the system. This daemon juggles four jobs:
31 32 *
32 33 * - Implement setup and teardown of the zone "virtual platform": mount and
33 34 * unmount filesystems; create and destroy network interfaces; communicate
34 35 * with devfsadmd to lay out devices for the zone; instantiate the zone
35 36 * console device; configure process runtime attributes such as resource
36 37 * controls, pool bindings, fine-grained privileges.
37 38 *
38 39 * - Launch the zone's init(1M) process.
39 40 *
40 41 * - Implement a door server; clients (like zoneadm) connect to the door
41 42 * server and request zone state changes. The kernel is also a client of
42 43 * this door server. A request to halt or reboot the zone which originates
43 44 * *inside* the zone results in a door upcall from the kernel into zoneadmd.
44 45 *
45 46 * One minor problem is that messages emitted by zoneadmd need to be passed
46 47 * back to the zoneadm process making the request. These messages need to
47 48 * be rendered in the client's locale; so, this is passed in as part of the
48 49 * request. The exception is the kernel upcall to zoneadmd, in which case
49 50 * messages are syslog'd.
50 51 *
51 52 * To make all of this work, the Makefile adds -a to xgettext to extract *all*
52 53 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those
53 54 * strings which do not need to be translated.
54 55 *
55 56 * - Act as a console server for zlogin -C processes; see comments in zcons.c
56 57 * for more information about the zone console architecture.
57 58 *
58 59 * DESIGN NOTES
59 60 *
60 61 * Restart:
61 62 * A chief design constraint of zoneadmd is that it should be restartable in
62 63 * the case that the administrator kills it off, or it suffers a fatal error,
63 64 * without the running zone being impacted; this is akin to being able to
64 65 * reboot the service processor of a server without affecting the OS instance.
65 66 */
66 67
67 68 #include <sys/param.h>
68 69 #include <sys/mman.h>
69 70 #include <sys/types.h>
70 71 #include <sys/stat.h>
71 72 #include <sys/sysmacros.h>
72 73
73 74 #include <bsm/adt.h>
74 75 #include <bsm/adt_event.h>
75 76
76 77 #include <alloca.h>
77 78 #include <assert.h>
78 79 #include <errno.h>
79 80 #include <door.h>
80 81 #include <fcntl.h>
81 82 #include <locale.h>
82 83 #include <signal.h>
83 84 #include <stdarg.h>
84 85 #include <stdio.h>
85 86 #include <stdlib.h>
86 87 #include <string.h>
87 88 #include <strings.h>
88 89 #include <synch.h>
89 90 #include <syslog.h>
90 91 #include <thread.h>
91 92 #include <unistd.h>
92 93 #include <wait.h>
93 94 #include <limits.h>
94 95 #include <zone.h>
95 96 #include <libbrand.h>
96 97 #include <sys/brand.h>
97 98 #include <libcontract.h>
98 99 #include <libcontract_priv.h>
99 100 #include <sys/brand.h>
100 101 #include <sys/contract/process.h>
101 102 #include <sys/ctfs.h>
102 103 #include <libdladm.h>
103 104 #include <sys/dls_mgmt.h>
104 105 #include <libscf.h>
105 106
106 107 #include <libzonecfg.h>
107 108 #include <zonestat_impl.h>
108 109 #include "zoneadmd.h"
109 110
↓ open down ↓ |
74 lines elided |
↑ open up ↑ |
110 111 static char *progname;
111 112 char *zone_name; /* zone which we are managing */
112 113 char pool_name[MAXNAMELEN];
113 114 char default_brand[MAXNAMELEN];
114 115 char brand_name[MAXNAMELEN];
115 116 boolean_t zone_isnative;
116 117 boolean_t zone_iscluster;
117 118 boolean_t zone_islabeled;
118 119 boolean_t shutdown_in_progress;
119 120 static zoneid_t zone_id;
121 +static zoneid_t zone_did = 0;
120 122 dladm_handle_t dld_handle = NULL;
121 123
122 124 static char pre_statechg_hook[2 * MAXPATHLEN];
123 125 static char post_statechg_hook[2 * MAXPATHLEN];
124 126 char query_hook[2 * MAXPATHLEN];
125 127
126 128 zlog_t logsys;
127 129
128 130 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */
129 131 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */
130 132
131 133 static sema_t scratch_sem; /* for scratch zones */
132 134
133 135 static char zone_door_path[MAXPATHLEN];
134 136 static int zone_door = -1;
135 137
136 138 boolean_t in_death_throes = B_FALSE; /* daemon is dying */
137 139 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */
138 140
139 141 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */
140 142 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
141 143 #endif
142 144
143 145 #define DEFAULT_LOCALE "C"
144 146
145 147 static const char *
146 148 z_cmd_name(zone_cmd_t zcmd)
147 149 {
148 150 /* This list needs to match the enum in sys/zone.h */
149 151 static const char *zcmdstr[] = {
150 152 "ready", "boot", "forceboot", "reboot", "halt",
151 153 "note_uninstalling", "mount", "forcemount", "unmount",
152 154 "shutdown"
153 155 };
154 156
155 157 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr))
156 158 return ("unknown");
157 159 else
158 160 return (zcmdstr[(int)zcmd]);
159 161 }
160 162
161 163 static char *
162 164 get_execbasename(char *execfullname)
163 165 {
164 166 char *last_slash, *execbasename;
165 167
166 168 /* guard against '/' at end of command invocation */
167 169 for (;;) {
168 170 last_slash = strrchr(execfullname, '/');
169 171 if (last_slash == NULL) {
170 172 execbasename = execfullname;
171 173 break;
172 174 } else {
173 175 execbasename = last_slash + 1;
174 176 if (*execbasename == '\0') {
175 177 *last_slash = '\0';
176 178 continue;
177 179 }
178 180 break;
179 181 }
180 182 }
181 183 return (execbasename);
182 184 }
183 185
184 186 static void
185 187 usage(void)
186 188 {
187 189 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname);
188 190 (void) fprintf(stderr,
189 191 gettext("\tNote: %s should not be run directly.\n"), progname);
190 192 exit(2);
191 193 }
192 194
193 195 /* ARGSUSED */
194 196 static void
195 197 sigchld(int sig)
196 198 {
197 199 }
198 200
199 201 char *
200 202 localize_msg(char *locale, const char *msg)
201 203 {
202 204 char *out;
203 205
204 206 (void) mutex_lock(&msglock);
205 207 (void) setlocale(LC_MESSAGES, locale);
206 208 out = gettext(msg);
207 209 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE);
208 210 (void) mutex_unlock(&msglock);
209 211 return (out);
210 212 }
211 213
212 214 /* PRINTFLIKE3 */
213 215 void
214 216 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...)
215 217 {
216 218 va_list alist;
217 219 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */
218 220 char *bp;
219 221 int saved_errno = errno;
220 222
221 223 if (zlogp == NULL)
222 224 return;
223 225 if (zlogp == &logsys)
224 226 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ",
225 227 zone_name);
226 228 else
227 229 buf[0] = '\0';
228 230 bp = &(buf[strlen(buf)]);
229 231
230 232 /*
231 233 * In theory, the locale pointer should be set to either "C" or a
232 234 * char array, so it should never be NULL
233 235 */
234 236 assert(zlogp->locale != NULL);
235 237 /* Locale is per process, but we are multi-threaded... */
236 238 fmt = localize_msg(zlogp->locale, fmt);
237 239
238 240 va_start(alist, fmt);
239 241 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist);
240 242 va_end(alist);
241 243 bp = &(buf[strlen(buf)]);
242 244 if (use_strerror)
243 245 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s",
244 246 strerror(saved_errno));
245 247 if (zlogp == &logsys) {
246 248 (void) syslog(LOG_ERR, "%s", buf);
247 249 } else if (zlogp->logfile != NULL) {
248 250 (void) fprintf(zlogp->logfile, "%s\n", buf);
249 251 } else {
250 252 size_t buflen;
251 253 size_t copylen;
252 254
253 255 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf);
254 256 copylen = MIN(buflen, zlogp->loglen);
255 257 zlogp->log += copylen;
256 258 zlogp->loglen -= copylen;
257 259 }
258 260 }
259 261
260 262 /*
261 263 * Emit a warning for any boot arguments which are unrecognized. Since
262 264 * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we
263 265 * put the arguments into an argv style array, use getopt to process them,
264 266 * and put the resultant argument string back into outargs.
265 267 *
266 268 * During the filtering, we pull out any arguments which are truly "boot"
267 269 * arguments, leaving only those which are to be passed intact to the
268 270 * progenitor process. The one we support at the moment is -i, which
269 271 * indicates to the kernel which program should be launched as 'init'.
270 272 *
271 273 * A return of Z_INVAL indicates specifically that the arguments are
272 274 * not valid; this is a non-fatal error. Except for Z_OK, all other return
273 275 * values are treated as fatal.
274 276 */
275 277 static int
276 278 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
277 279 char *init_file, char *badarg)
278 280 {
279 281 int argc = 0, argc_save;
280 282 int i;
281 283 int err;
282 284 char *arg, *lasts, **argv = NULL, **argv_save;
283 285 char zonecfg_args[BOOTARGS_MAX];
284 286 char scratchargs[BOOTARGS_MAX], *sargs;
285 287 char c;
286 288
287 289 bzero(outargs, BOOTARGS_MAX);
288 290 bzero(badarg, BOOTARGS_MAX);
289 291
290 292 /*
291 293 * If the user didn't specify transient boot arguments, check
292 294 * to see if there were any specified in the zone configuration,
293 295 * and use them if applicable.
294 296 */
295 297 if (inargs == NULL || inargs[0] == '\0') {
296 298 zone_dochandle_t handle;
297 299 if ((handle = zonecfg_init_handle()) == NULL) {
298 300 zerror(zlogp, B_TRUE,
299 301 "getting zone configuration handle");
300 302 return (Z_BAD_HANDLE);
301 303 }
302 304 err = zonecfg_get_snapshot_handle(zone_name, handle);
303 305 if (err != Z_OK) {
304 306 zerror(zlogp, B_FALSE,
305 307 "invalid configuration snapshot");
306 308 zonecfg_fini_handle(handle);
307 309 return (Z_BAD_HANDLE);
308 310 }
309 311
310 312 bzero(zonecfg_args, sizeof (zonecfg_args));
311 313 (void) zonecfg_get_bootargs(handle, zonecfg_args,
312 314 sizeof (zonecfg_args));
313 315 inargs = zonecfg_args;
314 316 zonecfg_fini_handle(handle);
315 317 }
316 318
317 319 if (strlen(inargs) >= BOOTARGS_MAX) {
318 320 zerror(zlogp, B_FALSE, "boot argument string too long");
319 321 return (Z_INVAL);
320 322 }
321 323
322 324 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
323 325 sargs = scratchargs;
324 326 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
325 327 sargs = NULL;
326 328 argc++;
327 329 }
328 330
329 331 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) {
330 332 zerror(zlogp, B_FALSE, "memory allocation failed");
331 333 return (Z_NOMEM);
332 334 }
333 335
334 336 argv_save = argv;
335 337 argc_save = argc;
336 338
337 339 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
338 340 sargs = scratchargs;
339 341 i = 0;
340 342 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
341 343 sargs = NULL;
342 344 if ((argv[i] = strdup(arg)) == NULL) {
343 345 err = Z_NOMEM;
344 346 zerror(zlogp, B_FALSE, "memory allocation failed");
345 347 goto done;
346 348 }
347 349 i++;
348 350 }
349 351
350 352 /*
351 353 * We preserve compatibility with the Solaris system boot behavior,
352 354 * which allows:
353 355 *
354 356 * # reboot kernel/unix -s -m verbose
355 357 *
356 358 * In this example, kernel/unix tells the booter what file to
357 359 * boot. We don't want reboot in a zone to be gratuitously different,
358 360 * so we silently ignore the boot file, if necessary.
359 361 */
360 362 if (argv[0] == NULL)
361 363 goto done;
362 364
363 365 assert(argv[0][0] != ' ');
364 366 assert(argv[0][0] != '\t');
365 367
366 368 if (argv[0][0] != '-' && argv[0][0] != '\0') {
367 369 argv = &argv[1];
368 370 argc--;
369 371 }
370 372
371 373 optind = 0;
372 374 opterr = 0;
373 375 err = Z_OK;
374 376 while ((c = getopt(argc, argv, "fi:m:s")) != -1) {
375 377 switch (c) {
376 378 case 'i':
377 379 /*
378 380 * -i is handled by the runtime and is not passed
379 381 * along to userland
380 382 */
381 383 (void) strlcpy(init_file, optarg, MAXPATHLEN);
382 384 break;
383 385 case 'f':
384 386 /* This has already been processed by zoneadm */
385 387 break;
386 388 case 'm':
387 389 case 's':
388 390 /* These pass through unmolested */
389 391 (void) snprintf(outargs, BOOTARGS_MAX,
390 392 "%s -%c %s ", outargs, c, optarg ? optarg : "");
391 393 break;
392 394 case '?':
393 395 /*
394 396 * We warn about unknown arguments but pass them
395 397 * along anyway-- if someone wants to develop their
396 398 * own init replacement, they can pass it whatever
397 399 * args they want.
398 400 */
399 401 err = Z_INVAL;
400 402 (void) snprintf(outargs, BOOTARGS_MAX,
401 403 "%s -%c", outargs, optopt);
402 404 (void) snprintf(badarg, BOOTARGS_MAX,
403 405 "%s -%c", badarg, optopt);
404 406 break;
405 407 }
406 408 }
407 409
408 410 /*
409 411 * For Solaris Zones we warn about and discard non-option arguments.
410 412 * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar
411 413 * to the kernel, we concat up all the other remaining boot args.
412 414 * and warn on them as a group.
413 415 */
414 416 if (optind < argc) {
415 417 err = Z_INVAL;
416 418 while (optind < argc) {
417 419 (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s",
418 420 badarg, strlen(badarg) > 0 ? " " : "",
419 421 argv[optind]);
420 422 optind++;
421 423 }
422 424 zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot "
423 425 "arguments `%s'.", badarg);
424 426 }
425 427
426 428 done:
427 429 for (i = 0; i < argc_save; i++) {
428 430 if (argv_save[i] != NULL)
429 431 free(argv_save[i]);
430 432 }
431 433 free(argv_save);
432 434 return (err);
433 435 }
434 436
435 437
436 438 static int
437 439 mkzonedir(zlog_t *zlogp)
438 440 {
439 441 struct stat st;
440 442 /*
441 443 * We must create and lock everyone but root out of ZONES_TMPDIR
442 444 * since anyone can open any UNIX domain socket, regardless of
443 445 * its file system permissions. Sigh...
444 446 */
445 447 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
446 448 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR);
447 449 return (-1);
448 450 }
449 451 /* paranoia */
450 452 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) {
451 453 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR);
452 454 return (-1);
453 455 }
454 456 (void) chmod(ZONES_TMPDIR, S_IRWXU);
455 457 return (0);
456 458 }
457 459
458 460 /*
459 461 * Run the brand's pre-state change callback, if it exists.
460 462 */
461 463 static int
462 464 brand_prestatechg(zlog_t *zlogp, int state, int cmd)
463 465 {
464 466 char cmdbuf[2 * MAXPATHLEN];
465 467 const char *altroot;
466 468
467 469 if (pre_statechg_hook[0] == '\0')
468 470 return (0);
469 471
470 472 altroot = zonecfg_get_root();
471 473 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", pre_statechg_hook,
472 474 state, cmd, altroot) > sizeof (cmdbuf))
473 475 return (-1);
474 476
475 477 if (do_subproc(zlogp, cmdbuf, NULL) != 0)
476 478 return (-1);
477 479
478 480 return (0);
479 481 }
480 482
481 483 /*
482 484 * Run the brand's post-state change callback, if it exists.
483 485 */
484 486 static int
485 487 brand_poststatechg(zlog_t *zlogp, int state, int cmd)
486 488 {
487 489 char cmdbuf[2 * MAXPATHLEN];
488 490 const char *altroot;
489 491
490 492 if (post_statechg_hook[0] == '\0')
491 493 return (0);
492 494
493 495 altroot = zonecfg_get_root();
494 496 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook,
495 497 state, cmd, altroot) > sizeof (cmdbuf))
496 498 return (-1);
497 499
498 500 if (do_subproc(zlogp, cmdbuf, NULL) != 0)
499 501 return (-1);
500 502
501 503 return (0);
502 504 }
503 505
504 506 /*
505 507 * Notify zonestatd of the new zone. If zonestatd is not running, this
506 508 * will do nothing.
507 509 */
508 510 static void
509 511 notify_zonestatd(zoneid_t zoneid)
510 512 {
511 513 int cmd[2];
512 514 int fd;
513 515 door_arg_t params;
514 516
515 517 fd = open(ZS_DOOR_PATH, O_RDONLY);
516 518 if (fd < 0)
517 519 return;
518 520
519 521 cmd[0] = ZSD_CMD_NEW_ZONE;
520 522 cmd[1] = zoneid;
521 523 params.data_ptr = (char *)&cmd;
522 524 params.data_size = sizeof (cmd);
523 525 params.desc_ptr = NULL;
524 526 params.desc_num = 0;
525 527 params.rbuf = NULL;
526 528 params.rsize = 0;
527 529 (void) door_call(fd, ¶ms);
528 530 (void) close(fd);
529 531 }
530 532
531 533 /*
532 534 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is
533 535 * 'true' if this is being invoked as part of the processing for the "mount"
534 536 * subcommand.
535 537 */
536 538 static int
537 539 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate)
538 540 {
539 541 int err;
↓ open down ↓ |
410 lines elided |
↑ open up ↑ |
540 542
541 543 if (brand_prestatechg(zlogp, zstate, Z_READY) != 0)
542 544 return (-1);
543 545
544 546 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
545 547 zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
546 548 zonecfg_strerror(err));
547 549 goto bad;
548 550 }
549 551
550 - if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) {
552 + if (zone_did == 0)
553 + zone_did = zone_get_did(zone_name);
554 +
555 + if ((zone_id = vplat_create(zlogp, mount_cmd, zone_did)) == -1) {
551 556 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
552 557 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
553 558 zonecfg_strerror(err));
554 559 goto bad;
555 560 }
556 561 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) {
557 562 bringup_failure_recovery = B_TRUE;
558 563 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE);
559 564 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
560 565 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
561 566 zonecfg_strerror(err));
562 567 goto bad;
563 568 }
564 569
565 570 if (brand_poststatechg(zlogp, zstate, Z_READY) != 0)
566 571 goto bad;
567 572
568 573 return (0);
569 574
570 575 bad:
571 576 /*
572 577 * If something goes wrong, we up the zones's state to the target
573 578 * state, READY, and then invoke the hook as if we're halting.
574 579 */
575 580 (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT);
576 581 return (-1);
577 582 }
578 583
579 584 int
580 585 init_template(void)
581 586 {
582 587 int fd;
583 588 int err = 0;
584 589
585 590 fd = open64(CTFS_ROOT "/process/template", O_RDWR);
586 591 if (fd == -1)
587 592 return (-1);
588 593
589 594 /*
590 595 * For now, zoneadmd doesn't do anything with the contract.
591 596 * Deliver no events, don't inherit, and allow it to be orphaned.
592 597 */
593 598 err |= ct_tmpl_set_critical(fd, 0);
594 599 err |= ct_tmpl_set_informative(fd, 0);
595 600 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
596 601 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
597 602 if (err || ct_tmpl_activate(fd)) {
598 603 (void) close(fd);
599 604 return (-1);
600 605 }
601 606
602 607 return (fd);
603 608 }
604 609
605 610 typedef struct fs_callback {
606 611 zlog_t *zlogp;
607 612 zoneid_t zoneid;
608 613 boolean_t mount_cmd;
609 614 } fs_callback_t;
610 615
611 616 static int
612 617 mount_early_fs(void *data, const char *spec, const char *dir,
613 618 const char *fstype, const char *opt)
614 619 {
615 620 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp;
616 621 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid;
617 622 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd;
618 623 char rootpath[MAXPATHLEN];
619 624 pid_t child;
620 625 int child_status;
621 626 int tmpl_fd;
622 627 int rv;
623 628 ctid_t ct;
624 629
625 630 /* determine the zone rootpath */
626 631 if (mount_cmd) {
627 632 char zonepath[MAXPATHLEN];
628 633 char luroot[MAXPATHLEN];
629 634
630 635 if (zone_get_zonepath(zone_name,
631 636 zonepath, sizeof (zonepath)) != Z_OK) {
632 637 zerror(zlogp, B_FALSE, "unable to determine zone path");
633 638 return (-1);
634 639 }
635 640
636 641 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
637 642 resolve_lofs(zlogp, luroot, sizeof (luroot));
638 643 (void) strlcpy(rootpath, luroot, sizeof (rootpath));
639 644 } else {
640 645 if (zone_get_rootpath(zone_name,
641 646 rootpath, sizeof (rootpath)) != Z_OK) {
642 647 zerror(zlogp, B_FALSE, "unable to determine zone root");
643 648 return (-1);
644 649 }
645 650 }
646 651
647 652 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) {
648 653 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point",
649 654 rootpath, dir);
650 655 return (-1);
651 656 } else if (rv > 0) {
652 657 /* The mount point path doesn't exist, create it now. */
653 658 if (make_one_dir(zlogp, rootpath, dir,
654 659 DEFAULT_DIR_MODE, DEFAULT_DIR_USER,
655 660 DEFAULT_DIR_GROUP) != 0) {
656 661 zerror(zlogp, B_FALSE, "failed to create mount point");
657 662 return (-1);
658 663 }
659 664
660 665 /*
661 666 * Now this might seem weird, but we need to invoke
662 667 * valid_mount_path() again. Why? Because it checks
663 668 * to make sure that the mount point path is canonical,
664 669 * which it can only do if the path exists, so now that
665 670 * we've created the path we have to verify it again.
666 671 */
667 672 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir,
668 673 fstype)) < 0) {
669 674 zerror(zlogp, B_FALSE,
670 675 "%s%s is not a valid mount point", rootpath, dir);
671 676 return (-1);
672 677 }
673 678 }
674 679
675 680 if ((tmpl_fd = init_template()) == -1) {
676 681 zerror(zlogp, B_TRUE, "failed to create contract");
677 682 return (-1);
678 683 }
679 684
680 685 if ((child = fork()) == -1) {
681 686 (void) ct_tmpl_clear(tmpl_fd);
682 687 (void) close(tmpl_fd);
683 688 zerror(zlogp, B_TRUE, "failed to fork");
684 689 return (-1);
685 690
686 691 } else if (child == 0) { /* child */
687 692 char opt_buf[MAX_MNTOPT_STR];
688 693 int optlen = 0;
689 694 int mflag = MS_DATA;
690 695
691 696 (void) ct_tmpl_clear(tmpl_fd);
692 697 /*
693 698 * Even though there are no procs running in the zone, we
694 699 * do this for paranoia's sake.
695 700 */
696 701 (void) closefrom(0);
697 702
698 703 if (zone_enter(zoneid) == -1) {
699 704 _exit(errno);
700 705 }
701 706 if (opt != NULL) {
702 707 /*
703 708 * The mount() system call is incredibly annoying.
704 709 * If options are specified, we need to copy them
705 710 * into a temporary buffer since the mount() system
706 711 * call will overwrite the options string. It will
707 712 * also fail if the new option string it wants to
708 713 * write is bigger than the one we passed in, so
709 714 * you must pass in a buffer of the maximum possible
710 715 * option string length. sigh.
711 716 */
712 717 (void) strlcpy(opt_buf, opt, sizeof (opt_buf));
713 718 opt = opt_buf;
714 719 optlen = MAX_MNTOPT_STR;
715 720 mflag = MS_OPTIONSTR;
716 721 }
717 722 if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0)
718 723 _exit(errno);
719 724 _exit(0);
720 725 }
721 726
722 727 /* parent */
723 728 if (contract_latest(&ct) == -1)
724 729 ct = -1;
725 730 (void) ct_tmpl_clear(tmpl_fd);
726 731 (void) close(tmpl_fd);
727 732 if (waitpid(child, &child_status, 0) != child) {
728 733 /* unexpected: we must have been signalled */
729 734 (void) contract_abandon_id(ct);
730 735 return (-1);
731 736 }
732 737 (void) contract_abandon_id(ct);
733 738 if (WEXITSTATUS(child_status) != 0) {
734 739 errno = WEXITSTATUS(child_status);
735 740 zerror(zlogp, B_TRUE, "mount of %s failed", dir);
736 741 return (-1);
737 742 }
738 743
739 744 return (0);
740 745 }
741 746
742 747 /*
743 748 * If retstr is not NULL, the output of the subproc is returned in the str,
744 749 * otherwise it is output using zerror(). Any memory allocated for retstr
745 750 * should be freed by the caller.
746 751 */
747 752 int
748 753 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
749 754 {
750 755 char buf[1024]; /* arbitrary large amount */
751 756 char *inbuf;
752 757 FILE *file;
753 758 int status;
754 759 int rd_cnt;
755 760
756 761 if (retstr != NULL) {
757 762 if ((*retstr = malloc(1024)) == NULL) {
758 763 zerror(zlogp, B_FALSE, "out of memory");
759 764 return (-1);
760 765 }
761 766 inbuf = *retstr;
762 767 rd_cnt = 0;
763 768 } else {
764 769 inbuf = buf;
765 770 }
766 771
767 772 file = popen(cmdbuf, "r");
768 773 if (file == NULL) {
769 774 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf);
770 775 return (-1);
771 776 }
772 777
773 778 while (fgets(inbuf, 1024, file) != NULL) {
774 779 if (retstr == NULL) {
775 780 if (zlogp != &logsys)
776 781 zerror(zlogp, B_FALSE, "%s", inbuf);
777 782 } else {
778 783 char *p;
779 784
780 785 rd_cnt += 1024 - 1;
781 786 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) {
782 787 zerror(zlogp, B_FALSE, "out of memory");
783 788 (void) pclose(file);
784 789 return (-1);
785 790 }
786 791
787 792 *retstr = p;
788 793 inbuf = *retstr + rd_cnt;
789 794 }
790 795 }
791 796 status = pclose(file);
792 797
793 798 if (WIFSIGNALED(status)) {
794 799 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
795 800 "signal %d", cmdbuf, WTERMSIG(status));
796 801 return (-1);
797 802 }
798 803 assert(WIFEXITED(status));
799 804 if (WEXITSTATUS(status) == ZEXIT_EXEC) {
800 805 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf);
801 806 return (-1);
802 807 }
803 808 return (WEXITSTATUS(status));
804 809 }
805 810
806 811 static int
807 812 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
808 813 {
809 814 zoneid_t zoneid;
810 815 struct stat st;
811 816 char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
812 817 char nbootargs[BOOTARGS_MAX];
813 818 char cmdbuf[MAXPATHLEN];
814 819 fs_callback_t cb;
815 820 brand_handle_t bh;
816 821 zone_iptype_t iptype;
817 822 boolean_t links_loaded = B_FALSE;
818 823 dladm_status_t status;
819 824 char errmsg[DLADM_STRSIZE];
820 825 int err;
821 826 boolean_t restart_init;
822 827
823 828 if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0)
824 829 return (-1);
825 830
826 831 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
827 832 zerror(zlogp, B_TRUE, "unable to get zoneid");
828 833 goto bad;
829 834 }
830 835
831 836 cb.zlogp = zlogp;
832 837 cb.zoneid = zoneid;
833 838 cb.mount_cmd = B_FALSE;
834 839
835 840 /* Get a handle to the brand info for this zone */
836 841 if ((bh = brand_open(brand_name)) == NULL) {
837 842 zerror(zlogp, B_FALSE, "unable to determine zone brand");
838 843 goto bad;
839 844 }
840 845
841 846 /*
842 847 * Get the list of filesystems to mount from the brand
843 848 * configuration. These mounts are done via a thread that will
844 849 * enter the zone, so they are done from within the context of the
845 850 * zone.
846 851 */
847 852 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) {
848 853 zerror(zlogp, B_FALSE, "unable to mount filesystems");
849 854 brand_close(bh);
850 855 goto bad;
851 856 }
852 857
853 858 /*
854 859 * Get the brand's boot callback if it exists.
855 860 */
856 861 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
857 862 zerror(zlogp, B_FALSE, "unable to determine zone path");
858 863 brand_close(bh);
859 864 goto bad;
860 865 }
861 866 (void) strcpy(cmdbuf, EXEC_PREFIX);
862 867 if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
863 868 sizeof (cmdbuf) - EXEC_LEN) != 0) {
864 869 zerror(zlogp, B_FALSE,
865 870 "unable to determine branded zone's boot callback");
866 871 brand_close(bh);
867 872 goto bad;
868 873 }
869 874
870 875 /* Get the path for this zone's init(1M) (or equivalent) process. */
871 876 if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) {
872 877 zerror(zlogp, B_FALSE,
873 878 "unable to determine zone's init(1M) location");
874 879 brand_close(bh);
875 880 goto bad;
876 881 }
877 882
878 883 /* See if this zone's brand should restart init if it dies. */
879 884 restart_init = brand_restartinit(bh);
880 885
881 886 brand_close(bh);
882 887
883 888 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file,
884 889 bad_boot_arg);
885 890 if (err == Z_INVAL)
886 891 eventstream_write(Z_EVT_ZONE_BADARGS);
887 892 else if (err != Z_OK)
888 893 goto bad;
889 894
890 895 assert(init_file[0] != '\0');
891 896
892 897 /* Try to anticipate possible problems: Make sure init is executable. */
893 898 if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
894 899 zerror(zlogp, B_FALSE, "unable to determine zone root");
895 900 goto bad;
896 901 }
897 902
898 903 (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file);
899 904
900 905 if (stat(initpath, &st) == -1) {
901 906 zerror(zlogp, B_TRUE, "could not stat %s", initpath);
902 907 goto bad;
903 908 }
904 909
905 910 if ((st.st_mode & S_IXUSR) == 0) {
906 911 zerror(zlogp, B_FALSE, "%s is not executable", initpath);
907 912 goto bad;
908 913 }
909 914
910 915 /*
911 916 * Exclusive stack zones interact with the dlmgmtd running in the
912 917 * global zone. dladm_zone_boot() tells dlmgmtd that this zone is
913 918 * booting, and loads its datalinks from the zone's datalink
914 919 * configuration file.
915 920 */
916 921 if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) {
917 922 status = dladm_zone_boot(dld_handle, zoneid);
918 923 if (status != DLADM_STATUS_OK) {
919 924 zerror(zlogp, B_FALSE, "unable to load zone datalinks: "
920 925 " %s", dladm_status2str(status, errmsg));
921 926 goto bad;
922 927 }
923 928 links_loaded = B_TRUE;
924 929 }
925 930
926 931 /*
927 932 * If there is a brand 'boot' callback, execute it now to give the
928 933 * brand one last chance to do any additional setup before the zone
929 934 * is booted.
930 935 */
931 936 if ((strlen(cmdbuf) > EXEC_LEN) &&
932 937 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
933 938 zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
934 939 goto bad;
935 940 }
936 941
937 942 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) {
938 943 zerror(zlogp, B_TRUE, "could not set zone boot file");
939 944 goto bad;
940 945 }
941 946
942 947 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) {
943 948 zerror(zlogp, B_TRUE, "could not set zone boot arguments");
944 949 goto bad;
945 950 }
946 951
947 952 if (!restart_init && zone_setattr(zoneid, ZONE_ATTR_INITNORESTART,
948 953 NULL, 0) == -1) {
949 954 zerror(zlogp, B_TRUE, "could not set zone init-no-restart");
950 955 goto bad;
951 956 }
952 957
953 958 /*
954 959 * Inform zonestatd of a new zone so that it can install a door for
955 960 * the zone to contact it.
956 961 */
957 962 notify_zonestatd(zone_id);
958 963
959 964 if (zone_boot(zoneid) == -1) {
960 965 zerror(zlogp, B_TRUE, "unable to boot zone");
961 966 goto bad;
962 967 }
963 968
964 969 if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0)
965 970 goto bad;
966 971
967 972 return (0);
968 973
969 974 bad:
970 975 /*
971 976 * If something goes wrong, we up the zones's state to the target
972 977 * state, RUNNING, and then invoke the hook as if we're halting.
973 978 */
974 979 (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT);
975 980 if (links_loaded)
976 981 (void) dladm_zone_halt(dld_handle, zoneid);
977 982 return (-1);
978 983 }
979 984
980 985 static int
981 986 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate)
982 987 {
983 988 int err;
984 989
985 990 if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0)
986 991 return (-1);
987 992
988 993 if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) {
989 994 if (!bringup_failure_recovery)
990 995 zerror(zlogp, B_FALSE, "unable to destroy zone");
991 996 return (-1);
992 997 }
993 998
994 999 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
995 1000 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
996 1001 zonecfg_strerror(err));
997 1002
998 1003 if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0)
999 1004 return (-1);
1000 1005
1001 1006 return (0);
1002 1007 }
1003 1008
1004 1009 static int
1005 1010 zone_graceful_shutdown(zlog_t *zlogp)
1006 1011 {
1007 1012 zoneid_t zoneid;
1008 1013 pid_t child;
1009 1014 char cmdbuf[MAXPATHLEN];
1010 1015 brand_handle_t bh = NULL;
1011 1016 char zpath[MAXPATHLEN];
1012 1017 ctid_t ct;
1013 1018 int tmpl_fd;
1014 1019 int child_status;
1015 1020
1016 1021 if (shutdown_in_progress) {
1017 1022 zerror(zlogp, B_FALSE, "shutdown already in progress");
1018 1023 return (-1);
1019 1024 }
1020 1025
1021 1026 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
1022 1027 zerror(zlogp, B_TRUE, "unable to get zoneid");
1023 1028 return (-1);
1024 1029 }
1025 1030
1026 1031 /* Get a handle to the brand info for this zone */
1027 1032 if ((bh = brand_open(brand_name)) == NULL) {
1028 1033 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1029 1034 return (-1);
1030 1035 }
1031 1036
1032 1037 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
1033 1038 zerror(zlogp, B_FALSE, "unable to determine zone path");
1034 1039 brand_close(bh);
1035 1040 return (-1);
1036 1041 }
1037 1042
1038 1043 /*
1039 1044 * If there is a brand 'shutdown' callback, execute it now to give the
1040 1045 * brand a chance to cleanup any custom configuration.
1041 1046 */
1042 1047 (void) strcpy(cmdbuf, EXEC_PREFIX);
1043 1048 if (brand_get_shutdown(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
1044 1049 sizeof (cmdbuf) - EXEC_LEN) != 0 || strlen(cmdbuf) <= EXEC_LEN) {
1045 1050 (void) strcat(cmdbuf, SHUTDOWN_DEFAULT);
1046 1051 }
1047 1052 brand_close(bh);
1048 1053
1049 1054 if ((tmpl_fd = init_template()) == -1) {
1050 1055 zerror(zlogp, B_TRUE, "failed to create contract");
1051 1056 return (-1);
1052 1057 }
1053 1058
1054 1059 if ((child = fork()) == -1) {
1055 1060 (void) ct_tmpl_clear(tmpl_fd);
1056 1061 (void) close(tmpl_fd);
1057 1062 zerror(zlogp, B_TRUE, "failed to fork");
1058 1063 return (-1);
1059 1064 } else if (child == 0) {
1060 1065 (void) ct_tmpl_clear(tmpl_fd);
1061 1066 if (zone_enter(zoneid) == -1) {
1062 1067 _exit(errno);
1063 1068 }
1064 1069 _exit(execl("/bin/sh", "sh", "-c", cmdbuf, (char *)NULL));
1065 1070 }
1066 1071
1067 1072 if (contract_latest(&ct) == -1)
1068 1073 ct = -1;
1069 1074 (void) ct_tmpl_clear(tmpl_fd);
1070 1075 (void) close(tmpl_fd);
1071 1076
1072 1077 if (waitpid(child, &child_status, 0) != child) {
1073 1078 /* unexpected: we must have been signalled */
1074 1079 (void) contract_abandon_id(ct);
1075 1080 return (-1);
1076 1081 }
1077 1082
1078 1083 (void) contract_abandon_id(ct);
1079 1084 if (WEXITSTATUS(child_status) != 0) {
1080 1085 errno = WEXITSTATUS(child_status);
1081 1086 zerror(zlogp, B_FALSE, "unable to shutdown zone");
1082 1087 return (-1);
1083 1088 }
1084 1089
1085 1090 shutdown_in_progress = B_TRUE;
1086 1091
1087 1092 return (0);
1088 1093 }
1089 1094
1090 1095 static int
1091 1096 zone_wait_shutdown(zlog_t *zlogp)
1092 1097 {
1093 1098 zone_state_t zstate;
1094 1099 uint64_t *tm = NULL;
1095 1100 scf_simple_prop_t *prop = NULL;
1096 1101 int timeout;
1097 1102 int tries;
1098 1103 int rc = -1;
1099 1104
1100 1105 /* Get default stop timeout from SMF framework */
1101 1106 timeout = SHUTDOWN_WAIT;
1102 1107 if ((prop = scf_simple_prop_get(NULL, SHUTDOWN_FMRI, "stop",
1103 1108 SCF_PROPERTY_TIMEOUT)) != NULL) {
1104 1109 if ((tm = scf_simple_prop_next_count(prop)) != NULL) {
1105 1110 if (tm != 0)
1106 1111 timeout = *tm;
1107 1112 }
1108 1113 scf_simple_prop_free(prop);
1109 1114 }
1110 1115
1111 1116 /* allow time for zone to shutdown cleanly */
1112 1117 for (tries = 0; tries < timeout; tries ++) {
1113 1118 (void) sleep(1);
1114 1119 if (zone_get_state(zone_name, &zstate) == Z_OK &&
1115 1120 zstate == ZONE_STATE_INSTALLED) {
1116 1121 rc = 0;
1117 1122 break;
1118 1123 }
1119 1124 }
1120 1125
1121 1126 if (rc != 0)
1122 1127 zerror(zlogp, B_FALSE, "unable to shutdown zone");
1123 1128
1124 1129 shutdown_in_progress = B_FALSE;
1125 1130
1126 1131 return (rc);
1127 1132 }
1128 1133
1129 1134
1130 1135
1131 1136 /*
1132 1137 * Generate AUE_zone_state for a command that boots a zone.
1133 1138 */
1134 1139 static void
1135 1140 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val,
1136 1141 char *new_state)
1137 1142 {
1138 1143 adt_session_data_t *ah;
1139 1144 adt_event_data_t *event;
1140 1145 int pass_fail, fail_reason;
1141 1146
1142 1147 if (!adt_audit_enabled())
1143 1148 return;
1144 1149
1145 1150 if (return_val == 0) {
1146 1151 pass_fail = ADT_SUCCESS;
1147 1152 fail_reason = ADT_SUCCESS;
1148 1153 } else {
1149 1154 pass_fail = ADT_FAILURE;
1150 1155 fail_reason = ADT_FAIL_VALUE_PROGRAM;
1151 1156 }
1152 1157
1153 1158 if (adt_start_session(&ah, NULL, 0)) {
1154 1159 zerror(zlogp, B_TRUE, gettext("audit failure."));
1155 1160 return;
1156 1161 }
1157 1162 if (adt_set_from_ucred(ah, uc, ADT_NEW)) {
1158 1163 zerror(zlogp, B_TRUE, gettext("audit failure."));
1159 1164 (void) adt_end_session(ah);
1160 1165 return;
1161 1166 }
1162 1167
1163 1168 event = adt_alloc_event(ah, ADT_zone_state);
1164 1169 if (event == NULL) {
1165 1170 zerror(zlogp, B_TRUE, gettext("audit failure."));
1166 1171 (void) adt_end_session(ah);
1167 1172 return;
1168 1173 }
1169 1174 event->adt_zone_state.zonename = zone_name;
1170 1175 event->adt_zone_state.new_state = new_state;
1171 1176
1172 1177 if (adt_put_event(event, pass_fail, fail_reason))
1173 1178 zerror(zlogp, B_TRUE, gettext("audit failure."));
1174 1179
1175 1180 adt_free_event(event);
1176 1181
1177 1182 (void) adt_end_session(ah);
1178 1183 }
1179 1184
1180 1185 /*
1181 1186 * The main routine for the door server that deals with zone state transitions.
1182 1187 */
1183 1188 /* ARGSUSED */
1184 1189 static void
1185 1190 server(void *cookie, char *args, size_t alen, door_desc_t *dp,
1186 1191 uint_t n_desc)
1187 1192 {
1188 1193 ucred_t *uc = NULL;
1189 1194 const priv_set_t *eset;
1190 1195
1191 1196 zone_state_t zstate;
1192 1197 zone_cmd_t cmd;
1193 1198 zone_cmd_arg_t *zargp;
1194 1199
1195 1200 boolean_t kernelcall;
1196 1201
1197 1202 int rval = -1;
1198 1203 uint64_t uniqid;
1199 1204 zoneid_t zoneid = -1;
1200 1205 zlog_t zlog;
1201 1206 zlog_t *zlogp;
1202 1207 zone_cmd_rval_t *rvalp;
1203 1208 size_t rlen = getpagesize(); /* conservative */
1204 1209 fs_callback_t cb;
1205 1210 brand_handle_t bh;
1206 1211 boolean_t wait_shut = B_FALSE;
1207 1212
1208 1213 /* LINTED E_BAD_PTR_CAST_ALIGN */
1209 1214 zargp = (zone_cmd_arg_t *)args;
1210 1215
1211 1216 /*
1212 1217 * When we get the door unref message, we've fdetach'd the door, and
1213 1218 * it is time for us to shut down zoneadmd.
1214 1219 */
1215 1220 if (zargp == DOOR_UNREF_DATA) {
1216 1221 /*
1217 1222 * See comment at end of main() for info on the last rites.
1218 1223 */
1219 1224 exit(0);
1220 1225 }
1221 1226
1222 1227 if (zargp == NULL) {
1223 1228 (void) door_return(NULL, 0, 0, 0);
1224 1229 }
1225 1230
1226 1231 rvalp = alloca(rlen);
1227 1232 bzero(rvalp, rlen);
1228 1233 zlog.logfile = NULL;
1229 1234 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1;
1230 1235 zlog.buf = rvalp->errbuf;
1231 1236 zlog.log = zlog.buf;
1232 1237 /* defer initialization of zlog.locale until after credential check */
1233 1238 zlogp = &zlog;
1234 1239
1235 1240 if (alen != sizeof (zone_cmd_arg_t)) {
1236 1241 /*
1237 1242 * This really shouldn't be happening.
1238 1243 */
1239 1244 zerror(&logsys, B_FALSE, "argument size (%d bytes) "
1240 1245 "unexpected (expected %d bytes)", alen,
1241 1246 sizeof (zone_cmd_arg_t));
1242 1247 goto out;
1243 1248 }
1244 1249 cmd = zargp->cmd;
1245 1250
1246 1251 if (door_ucred(&uc) != 0) {
1247 1252 zerror(&logsys, B_TRUE, "door_ucred");
1248 1253 goto out;
1249 1254 }
1250 1255 eset = ucred_getprivset(uc, PRIV_EFFECTIVE);
1251 1256 if (ucred_getzoneid(uc) != GLOBAL_ZONEID ||
1252 1257 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) :
1253 1258 ucred_geteuid(uc) != 0)) {
1254 1259 zerror(&logsys, B_FALSE, "insufficient privileges");
1255 1260 goto out;
1256 1261 }
1257 1262
1258 1263 kernelcall = ucred_getpid(uc) == 0;
1259 1264
1260 1265 /*
1261 1266 * This is safe because we only use a zlog_t throughout the
1262 1267 * duration of a door call; i.e., by the time the pointer
1263 1268 * might become invalid, the door call would be over.
1264 1269 */
1265 1270 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale;
1266 1271
1267 1272 (void) mutex_lock(&lock);
1268 1273
1269 1274 /*
1270 1275 * Once we start to really die off, we don't want more connections.
1271 1276 */
1272 1277 if (in_death_throes) {
1273 1278 (void) mutex_unlock(&lock);
1274 1279 ucred_free(uc);
1275 1280 (void) door_return(NULL, 0, 0, 0);
1276 1281 thr_exit(NULL);
1277 1282 }
1278 1283
1279 1284 /*
1280 1285 * Check for validity of command.
1281 1286 */
1282 1287 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT &&
1283 1288 cmd != Z_REBOOT && cmd != Z_SHUTDOWN && cmd != Z_HALT &&
1284 1289 cmd != Z_NOTE_UNINSTALLING && cmd != Z_MOUNT &&
1285 1290 cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) {
1286 1291 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd);
1287 1292 goto out;
1288 1293 }
1289 1294
1290 1295 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) {
1291 1296 /*
1292 1297 * Can't happen
1293 1298 */
1294 1299 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d",
1295 1300 cmd);
1296 1301 goto out;
1297 1302 }
1298 1303 /*
1299 1304 * We ignore the possibility of someone calling zone_create(2)
1300 1305 * explicitly; all requests must come through zoneadmd.
1301 1306 */
1302 1307 if (zone_get_state(zone_name, &zstate) != Z_OK) {
1303 1308 /*
1304 1309 * Something terribly wrong happened
1305 1310 */
1306 1311 zerror(&logsys, B_FALSE, "unable to determine state of zone");
1307 1312 goto out;
1308 1313 }
1309 1314
1310 1315 if (kernelcall) {
1311 1316 /*
1312 1317 * Kernel-initiated requests may lose their validity if the
1313 1318 * zone_t the kernel was referring to has gone away.
1314 1319 */
1315 1320 if ((zoneid = getzoneidbyname(zone_name)) == -1 ||
1316 1321 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid,
1317 1322 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) {
1318 1323 /*
1319 1324 * We're not talking about the same zone. The request
1320 1325 * must have arrived too late. Return error.
1321 1326 */
1322 1327 rval = -1;
1323 1328 goto out;
1324 1329 }
1325 1330 zlogp = &logsys; /* Log errors to syslog */
1326 1331 }
1327 1332
1328 1333 /*
1329 1334 * If we are being asked to forcibly mount or boot a zone, we
1330 1335 * pretend that an INCOMPLETE zone is actually INSTALLED.
1331 1336 */
1332 1337 if (zstate == ZONE_STATE_INCOMPLETE &&
1333 1338 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT))
1334 1339 zstate = ZONE_STATE_INSTALLED;
1335 1340
1336 1341 switch (zstate) {
1337 1342 case ZONE_STATE_CONFIGURED:
1338 1343 case ZONE_STATE_INCOMPLETE:
1339 1344 /*
1340 1345 * Not our area of expertise; we just print a nice message
1341 1346 * and die off.
1342 1347 */
1343 1348 zerror(zlogp, B_FALSE,
1344 1349 "%s operation is invalid for zones in state '%s'",
1345 1350 z_cmd_name(cmd), zone_state_str(zstate));
1346 1351 break;
1347 1352
↓ open down ↓ |
787 lines elided |
↑ open up ↑ |
1348 1353 case ZONE_STATE_INSTALLED:
1349 1354 switch (cmd) {
1350 1355 case Z_READY:
1351 1356 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate);
1352 1357 if (rval == 0)
1353 1358 eventstream_write(Z_EVT_ZONE_READIED);
1354 1359 break;
1355 1360 case Z_BOOT:
1356 1361 case Z_FORCEBOOT:
1357 1362 eventstream_write(Z_EVT_ZONE_BOOTING);
1358 - if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1359 - == 0) {
1363 + if ((rval = zone_ready(zlogp, Z_MNT_BOOT,
1364 + zstate)) == 0) {
1360 1365 rval = zone_bootup(zlogp, zargp->bootbuf,
1361 1366 zstate);
1362 1367 }
1363 1368 audit_put_record(zlogp, uc, rval, "boot");
1364 1369 if (rval != 0) {
1365 1370 bringup_failure_recovery = B_TRUE;
1366 1371 (void) zone_halt(zlogp, B_FALSE, B_FALSE,
1367 1372 zstate);
1368 1373 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1369 1374 }
1370 1375 break;
1371 1376 case Z_SHUTDOWN:
1372 1377 case Z_HALT:
1373 1378 if (kernelcall) /* Invalid; can't happen */
1374 1379 abort();
1375 1380 /*
1376 1381 * We could have two clients racing to halt this
1377 1382 * zone; the second client loses, but its request
1378 1383 * doesn't fail, since the zone is now in the desired
1379 1384 * state.
1380 1385 */
1381 1386 zerror(zlogp, B_FALSE, "zone is already halted");
1382 1387 rval = 0;
1383 1388 break;
1384 1389 case Z_REBOOT:
1385 1390 if (kernelcall) /* Invalid; can't happen */
1386 1391 abort();
1387 1392 zerror(zlogp, B_FALSE, "%s operation is invalid "
1388 1393 "for zones in state '%s'", z_cmd_name(cmd),
1389 1394 zone_state_str(zstate));
1390 1395 rval = -1;
1391 1396 break;
1392 1397 case Z_NOTE_UNINSTALLING:
1393 1398 if (kernelcall) /* Invalid; can't happen */
1394 1399 abort();
1395 1400 /*
1396 1401 * Tell the console to print out a message about this.
1397 1402 * Once it does, we will be in_death_throes.
1398 1403 */
1399 1404 eventstream_write(Z_EVT_ZONE_UNINSTALLING);
1400 1405 break;
1401 1406 case Z_MOUNT:
1402 1407 case Z_FORCEMOUNT:
1403 1408 if (kernelcall) /* Invalid; can't happen */
1404 1409 abort();
1405 1410 if (!zone_isnative && !zone_iscluster &&
1406 1411 !zone_islabeled) {
1407 1412 /*
1408 1413 * -U mounts the zone without lofs mounting
1409 1414 * zone file systems back into the scratch
1410 1415 * zone. This is required when mounting
1411 1416 * non-native branded zones.
1412 1417 */
1413 1418 (void) strlcpy(zargp->bootbuf, "-U",
1414 1419 BOOTARGS_MAX);
1415 1420 }
1416 1421
1417 1422 rval = zone_ready(zlogp,
1418 1423 strcmp(zargp->bootbuf, "-U") == 0 ?
1419 1424 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate);
1420 1425 if (rval != 0)
1421 1426 break;
1422 1427
1423 1428 eventstream_write(Z_EVT_ZONE_READIED);
1424 1429
1425 1430 /*
1426 1431 * Get a handle to the default brand info.
1427 1432 * We must always use the default brand file system
1428 1433 * list when mounting the zone.
1429 1434 */
1430 1435 if ((bh = brand_open(default_brand)) == NULL) {
1431 1436 rval = -1;
1432 1437 break;
1433 1438 }
1434 1439
1435 1440 /*
1436 1441 * Get the list of filesystems to mount from
1437 1442 * the brand configuration. These mounts are done
1438 1443 * via a thread that will enter the zone, so they
1439 1444 * are done from within the context of the zone.
1440 1445 */
1441 1446 cb.zlogp = zlogp;
1442 1447 cb.zoneid = zone_id;
1443 1448 cb.mount_cmd = B_TRUE;
1444 1449 rval = brand_platform_iter_mounts(bh,
1445 1450 mount_early_fs, &cb);
1446 1451
1447 1452 brand_close(bh);
1448 1453
1449 1454 /*
1450 1455 * Ordinarily, /dev/fd would be mounted inside the zone
1451 1456 * by svc:/system/filesystem/usr:default, but since
1452 1457 * we're not booting the zone, we need to do this
1453 1458 * manually.
1454 1459 */
1455 1460 if (rval == 0)
1456 1461 rval = mount_early_fs(&cb,
1457 1462 "fd", "/dev/fd", "fd", NULL);
1458 1463 break;
1459 1464 case Z_UNMOUNT:
1460 1465 if (kernelcall) /* Invalid; can't happen */
1461 1466 abort();
1462 1467 zerror(zlogp, B_FALSE, "zone is already unmounted");
1463 1468 rval = 0;
1464 1469 break;
1465 1470 }
1466 1471 break;
1467 1472
1468 1473 case ZONE_STATE_READY:
1469 1474 switch (cmd) {
1470 1475 case Z_READY:
1471 1476 /*
1472 1477 * We could have two clients racing to ready this
1473 1478 * zone; the second client loses, but its request
1474 1479 * doesn't fail, since the zone is now in the desired
1475 1480 * state.
1476 1481 */
1477 1482 zerror(zlogp, B_FALSE, "zone is already ready");
1478 1483 rval = 0;
1479 1484 break;
1480 1485 case Z_BOOT:
1481 1486 (void) strlcpy(boot_args, zargp->bootbuf,
1482 1487 sizeof (boot_args));
1483 1488 eventstream_write(Z_EVT_ZONE_BOOTING);
1484 1489 rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1485 1490 audit_put_record(zlogp, uc, rval, "boot");
1486 1491 if (rval != 0) {
1487 1492 bringup_failure_recovery = B_TRUE;
1488 1493 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1489 1494 zstate);
1490 1495 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1491 1496 }
1492 1497 boot_args[0] = '\0';
1493 1498 break;
1494 1499 case Z_HALT:
1495 1500 if (kernelcall) /* Invalid; can't happen */
1496 1501 abort();
1497 1502 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1498 1503 != 0)
1499 1504 break;
1500 1505 eventstream_write(Z_EVT_ZONE_HALTED);
1501 1506 break;
1502 1507 case Z_SHUTDOWN:
1503 1508 case Z_REBOOT:
1504 1509 case Z_NOTE_UNINSTALLING:
1505 1510 case Z_MOUNT:
1506 1511 case Z_UNMOUNT:
1507 1512 if (kernelcall) /* Invalid; can't happen */
1508 1513 abort();
1509 1514 zerror(zlogp, B_FALSE, "%s operation is invalid "
1510 1515 "for zones in state '%s'", z_cmd_name(cmd),
1511 1516 zone_state_str(zstate));
1512 1517 rval = -1;
1513 1518 break;
1514 1519 }
1515 1520 break;
1516 1521
1517 1522 case ZONE_STATE_MOUNTED:
1518 1523 switch (cmd) {
1519 1524 case Z_UNMOUNT:
1520 1525 if (kernelcall) /* Invalid; can't happen */
1521 1526 abort();
1522 1527 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate);
1523 1528 if (rval == 0) {
1524 1529 eventstream_write(Z_EVT_ZONE_HALTED);
1525 1530 (void) sema_post(&scratch_sem);
1526 1531 }
1527 1532 break;
1528 1533 default:
1529 1534 if (kernelcall) /* Invalid; can't happen */
1530 1535 abort();
1531 1536 zerror(zlogp, B_FALSE, "%s operation is invalid "
1532 1537 "for zones in state '%s'", z_cmd_name(cmd),
1533 1538 zone_state_str(zstate));
1534 1539 rval = -1;
1535 1540 break;
1536 1541 }
↓ open down ↓ |
167 lines elided |
↑ open up ↑ |
1537 1542 break;
1538 1543
1539 1544 case ZONE_STATE_RUNNING:
1540 1545 case ZONE_STATE_SHUTTING_DOWN:
1541 1546 case ZONE_STATE_DOWN:
1542 1547 switch (cmd) {
1543 1548 case Z_READY:
1544 1549 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1545 1550 != 0)
1546 1551 break;
1547 - if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0)
1552 + if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1553 + == 0)
1548 1554 eventstream_write(Z_EVT_ZONE_READIED);
1549 1555 else
1550 1556 eventstream_write(Z_EVT_ZONE_HALTED);
1551 1557 break;
1552 1558 case Z_BOOT:
1553 1559 /*
1554 1560 * We could have two clients racing to boot this
1555 1561 * zone; the second client loses, but its request
1556 1562 * doesn't fail, since the zone is now in the desired
1557 1563 * state.
1558 1564 */
1559 1565 zerror(zlogp, B_FALSE, "zone is already booted");
1560 1566 rval = 0;
1561 1567 break;
1562 1568 case Z_HALT:
1563 1569 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1564 1570 != 0)
1565 1571 break;
1566 1572 eventstream_write(Z_EVT_ZONE_HALTED);
1567 1573 break;
1568 1574 case Z_REBOOT:
1569 1575 (void) strlcpy(boot_args, zargp->bootbuf,
1570 1576 sizeof (boot_args));
1571 1577 eventstream_write(Z_EVT_ZONE_REBOOTING);
1572 1578 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1573 1579 != 0) {
1574 1580 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1575 1581 boot_args[0] = '\0';
1576 1582 break;
1577 1583 }
1578 1584 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1579 1585 != 0) {
1580 1586 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1581 1587 boot_args[0] = '\0';
1582 1588 break;
1583 1589 }
1584 1590 rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1585 1591 audit_put_record(zlogp, uc, rval, "reboot");
1586 1592 if (rval != 0) {
1587 1593 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1588 1594 zstate);
1589 1595 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1590 1596 }
1591 1597 boot_args[0] = '\0';
1592 1598 break;
1593 1599 case Z_SHUTDOWN:
1594 1600 if ((rval = zone_graceful_shutdown(zlogp)) == 0) {
1595 1601 wait_shut = B_TRUE;
1596 1602 }
1597 1603 break;
1598 1604 case Z_NOTE_UNINSTALLING:
1599 1605 case Z_MOUNT:
1600 1606 case Z_UNMOUNT:
1601 1607 zerror(zlogp, B_FALSE, "%s operation is invalid "
1602 1608 "for zones in state '%s'", z_cmd_name(cmd),
1603 1609 zone_state_str(zstate));
1604 1610 rval = -1;
1605 1611 break;
1606 1612 }
1607 1613 break;
1608 1614 default:
1609 1615 abort();
1610 1616 }
1611 1617
1612 1618 /*
1613 1619 * Because the state of the zone may have changed, we make sure
1614 1620 * to wake the console poller, which is in charge of initiating
1615 1621 * the shutdown procedure as necessary.
1616 1622 */
1617 1623 eventstream_write(Z_EVT_NULL);
1618 1624
1619 1625 out:
1620 1626 (void) mutex_unlock(&lock);
1621 1627
1622 1628 /* Wait for the Z_SHUTDOWN commands to complete */
1623 1629 if (wait_shut)
1624 1630 rval = zone_wait_shutdown(zlogp);
1625 1631
1626 1632 if (kernelcall) {
1627 1633 rvalp = NULL;
1628 1634 rlen = 0;
1629 1635 } else {
1630 1636 rvalp->rval = rval;
1631 1637 }
1632 1638 if (uc != NULL)
1633 1639 ucred_free(uc);
1634 1640 (void) door_return((char *)rvalp, rlen, NULL, 0);
1635 1641 thr_exit(NULL);
1636 1642 }
1637 1643
1638 1644 static int
1639 1645 setup_door(zlog_t *zlogp)
1640 1646 {
1641 1647 if ((zone_door = door_create(server, NULL,
1642 1648 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) {
1643 1649 zerror(zlogp, B_TRUE, "%s failed", "door_create");
1644 1650 return (-1);
1645 1651 }
1646 1652 (void) fdetach(zone_door_path);
1647 1653
1648 1654 if (fattach(zone_door, zone_door_path) != 0) {
1649 1655 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path);
1650 1656 (void) door_revoke(zone_door);
1651 1657 (void) fdetach(zone_door_path);
1652 1658 zone_door = -1;
1653 1659 return (-1);
1654 1660 }
1655 1661 return (0);
1656 1662 }
1657 1663
1658 1664 /*
1659 1665 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this
1660 1666 * is where zoneadmd itself will check to see that another instance of
1661 1667 * zoneadmd isn't already controlling this zone.
1662 1668 *
1663 1669 * The idea here is that we want to open the path to which we will
1664 1670 * attach our door, lock it, and then make sure that no-one has beat us
1665 1671 * to fattach(3c)ing onto it.
1666 1672 *
1667 1673 * fattach(3c) is really a mount, so there are actually two possible
1668 1674 * vnodes we could be dealing with. Our strategy is as follows:
1669 1675 *
1670 1676 * - If the file we opened is a regular file (common case):
1671 1677 * There is no fattach(3c)ed door, so we have a chance of becoming
1672 1678 * the managing zoneadmd. We attempt to lock the file: if it is
1673 1679 * already locked, that means someone else raced us here, so we
1674 1680 * lose and give up. zoneadm(1m) will try to contact the zoneadmd
1675 1681 * that beat us to it.
1676 1682 *
1677 1683 * - If the file we opened is a namefs file:
1678 1684 * This means there is already an established door fattach(3c)'ed
1679 1685 * to the rendezvous path. We've lost the race, so we give up.
1680 1686 * Note that in this case we also try to grab the file lock, and
1681 1687 * will succeed in acquiring it since the vnode locked by the
1682 1688 * "winning" zoneadmd was a regular one, and the one we locked was
1683 1689 * the fattach(3c)'ed door node. At any rate, no harm is done, and
1684 1690 * we just return to zoneadm(1m) which knows to retry.
1685 1691 */
1686 1692 static int
1687 1693 make_daemon_exclusive(zlog_t *zlogp)
1688 1694 {
1689 1695 int doorfd = -1;
1690 1696 int err, ret = -1;
1691 1697 struct stat st;
1692 1698 struct flock flock;
1693 1699 zone_state_t zstate;
1694 1700
1695 1701 top:
1696 1702 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
1697 1703 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
1698 1704 zonecfg_strerror(err));
1699 1705 goto out;
1700 1706 }
1701 1707 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR,
1702 1708 S_IREAD|S_IWRITE)) < 0) {
1703 1709 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path);
1704 1710 goto out;
1705 1711 }
1706 1712 if (fstat(doorfd, &st) < 0) {
1707 1713 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path);
1708 1714 goto out;
1709 1715 }
1710 1716 /*
1711 1717 * Lock the file to synchronize with other zoneadmd
1712 1718 */
1713 1719 flock.l_type = F_WRLCK;
1714 1720 flock.l_whence = SEEK_SET;
1715 1721 flock.l_start = (off_t)0;
1716 1722 flock.l_len = (off_t)0;
1717 1723 if (fcntl(doorfd, F_SETLK, &flock) < 0) {
1718 1724 /*
1719 1725 * Someone else raced us here and grabbed the lock file
1720 1726 * first. A warning here is inappropriate since nothing
1721 1727 * went wrong.
1722 1728 */
1723 1729 goto out;
1724 1730 }
1725 1731
1726 1732 if (strcmp(st.st_fstype, "namefs") == 0) {
1727 1733 struct door_info info;
1728 1734
1729 1735 /*
1730 1736 * There is already something fattach()'ed to this file.
1731 1737 * Lets see what the door is up to.
1732 1738 */
1733 1739 if (door_info(doorfd, &info) == 0 && info.di_target != -1) {
1734 1740 /*
1735 1741 * Another zoneadmd process seems to be in
1736 1742 * control of the situation and we don't need to
1737 1743 * be here. A warning here is inappropriate
1738 1744 * since nothing went wrong.
1739 1745 *
1740 1746 * If the door has been revoked, the zoneadmd
1741 1747 * process currently managing the zone is going
1742 1748 * away. We'll return control to zoneadm(1m)
1743 1749 * which will try again (by which time zoneadmd
1744 1750 * will hopefully have exited).
1745 1751 */
1746 1752 goto out;
1747 1753 }
1748 1754
1749 1755 /*
1750 1756 * If we got this far, there's a fattach(3c)'ed door
1751 1757 * that belongs to a process that has exited, which can
1752 1758 * happen if the previous zoneadmd died unexpectedly.
1753 1759 *
1754 1760 * Let user know that something is amiss, but that we can
1755 1761 * recover; if the zone is in the installed state, then don't
1756 1762 * message, since having a running zoneadmd isn't really
1757 1763 * expected/needed. We want to keep occurences of this message
1758 1764 * limited to times when zoneadmd is picking back up from a
1759 1765 * zoneadmd that died while the zone was in some non-trivial
1760 1766 * state.
1761 1767 */
1762 1768 if (zstate > ZONE_STATE_INSTALLED) {
1763 1769 zerror(zlogp, B_FALSE,
1764 1770 "zone '%s': WARNING: zone is in state '%s', but "
1765 1771 "zoneadmd does not appear to be available; "
1766 1772 "restarted zoneadmd to recover.",
1767 1773 zone_name, zone_state_str(zstate));
1768 1774 }
1769 1775
1770 1776 (void) fdetach(zone_door_path);
1771 1777 (void) close(doorfd);
1772 1778 goto top;
1773 1779 }
1774 1780 ret = 0;
1775 1781 out:
1776 1782 (void) close(doorfd);
1777 1783 return (ret);
1778 1784 }
1779 1785
1780 1786 /*
1781 1787 * Setup the brand's pre and post state change callbacks, as well as the
1782 1788 * query callback, if any of these exist.
1783 1789 */
1784 1790 static int
1785 1791 brand_callback_init(brand_handle_t bh, char *zone_name)
1786 1792 {
1787 1793 char zpath[MAXPATHLEN];
1788 1794
1789 1795 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK)
1790 1796 return (-1);
1791 1797
1792 1798 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX,
1793 1799 sizeof (pre_statechg_hook));
1794 1800
1795 1801 if (brand_get_prestatechange(bh, zone_name, zpath,
1796 1802 pre_statechg_hook + EXEC_LEN,
1797 1803 sizeof (pre_statechg_hook) - EXEC_LEN) != 0)
1798 1804 return (-1);
1799 1805
1800 1806 if (strlen(pre_statechg_hook) <= EXEC_LEN)
1801 1807 pre_statechg_hook[0] = '\0';
1802 1808
1803 1809 (void) strlcpy(post_statechg_hook, EXEC_PREFIX,
1804 1810 sizeof (post_statechg_hook));
1805 1811
1806 1812 if (brand_get_poststatechange(bh, zone_name, zpath,
1807 1813 post_statechg_hook + EXEC_LEN,
1808 1814 sizeof (post_statechg_hook) - EXEC_LEN) != 0)
1809 1815 return (-1);
1810 1816
1811 1817 if (strlen(post_statechg_hook) <= EXEC_LEN)
1812 1818 post_statechg_hook[0] = '\0';
1813 1819
1814 1820 (void) strlcpy(query_hook, EXEC_PREFIX,
1815 1821 sizeof (query_hook));
1816 1822
1817 1823 if (brand_get_query(bh, zone_name, zpath, query_hook + EXEC_LEN,
1818 1824 sizeof (query_hook) - EXEC_LEN) != 0)
1819 1825 return (-1);
1820 1826
1821 1827 if (strlen(query_hook) <= EXEC_LEN)
1822 1828 query_hook[0] = '\0';
1823 1829
1824 1830 return (0);
1825 1831 }
1826 1832
1827 1833 int
1828 1834 main(int argc, char *argv[])
1829 1835 {
1830 1836 int opt;
1831 1837 zoneid_t zid;
1832 1838 priv_set_t *privset;
1833 1839 zone_state_t zstate;
1834 1840 char parents_locale[MAXPATHLEN];
1835 1841 brand_handle_t bh;
1836 1842 int err;
1837 1843
1838 1844 pid_t pid;
1839 1845 sigset_t blockset;
1840 1846 sigset_t block_cld;
1841 1847
1842 1848 struct {
1843 1849 sema_t sem;
1844 1850 int status;
1845 1851 zlog_t log;
1846 1852 } *shstate;
1847 1853 size_t shstatelen = getpagesize();
1848 1854
1849 1855 zlog_t errlog;
1850 1856 zlog_t *zlogp;
1851 1857
1852 1858 int ctfd;
1853 1859
1854 1860 progname = get_execbasename(argv[0]);
1855 1861
1856 1862 /*
1857 1863 * Make sure stderr is unbuffered
1858 1864 */
1859 1865 (void) setbuffer(stderr, NULL, 0);
1860 1866
1861 1867 /*
1862 1868 * Get out of the way of mounted filesystems, since we will daemonize
1863 1869 * soon.
1864 1870 */
1865 1871 (void) chdir("/");
1866 1872
1867 1873 /*
1868 1874 * Use the default system umask per PSARC 1998/110 rather than
1869 1875 * anything that may have been set by the caller.
1870 1876 */
1871 1877 (void) umask(CMASK);
1872 1878
1873 1879 /*
1874 1880 * Initially we want to use our parent's locale.
1875 1881 */
1876 1882 (void) setlocale(LC_ALL, "");
1877 1883 (void) textdomain(TEXT_DOMAIN);
1878 1884 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL),
1879 1885 sizeof (parents_locale));
1880 1886
1881 1887 /*
1882 1888 * This zlog_t is used for writing to stderr
1883 1889 */
1884 1890 errlog.logfile = stderr;
1885 1891 errlog.buflen = errlog.loglen = 0;
1886 1892 errlog.buf = errlog.log = NULL;
1887 1893 errlog.locale = parents_locale;
1888 1894
1889 1895 /*
1890 1896 * We start off writing to stderr until we're ready to daemonize.
1891 1897 */
1892 1898 zlogp = &errlog;
1893 1899
1894 1900 /*
1895 1901 * Process options.
1896 1902 */
1897 1903 while ((opt = getopt(argc, argv, "R:z:")) != EOF) {
1898 1904 switch (opt) {
1899 1905 case 'R':
1900 1906 zonecfg_set_root(optarg);
1901 1907 break;
1902 1908 case 'z':
1903 1909 zone_name = optarg;
1904 1910 break;
1905 1911 default:
1906 1912 usage();
1907 1913 }
1908 1914 }
1909 1915
1910 1916 if (zone_name == NULL)
1911 1917 usage();
1912 1918
1913 1919 /*
1914 1920 * Because usage() prints directly to stderr, it has gettext()
1915 1921 * wrapping, which depends on the locale. But since zerror() calls
1916 1922 * localize() which tweaks the locale, it is not safe to call zerror()
1917 1923 * until after the last call to usage(). Fortunately, the last call
1918 1924 * to usage() is just above and the first call to zerror() is just
1919 1925 * below. Don't mess this up.
1920 1926 */
1921 1927 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) {
1922 1928 zerror(zlogp, B_FALSE, "cannot manage the %s zone",
1923 1929 GLOBAL_ZONENAME);
1924 1930 return (1);
1925 1931 }
1926 1932
1927 1933 if (zone_get_id(zone_name, &zid) != 0) {
1928 1934 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name,
1929 1935 zonecfg_strerror(Z_NO_ZONE));
1930 1936 return (1);
1931 1937 }
1932 1938
1933 1939 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
1934 1940 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
1935 1941 zonecfg_strerror(err));
1936 1942 return (1);
1937 1943 }
1938 1944 if (zstate < ZONE_STATE_INCOMPLETE) {
1939 1945 zerror(zlogp, B_FALSE,
1940 1946 "cannot manage a zone which is in state '%s'",
1941 1947 zone_state_str(zstate));
1942 1948 return (1);
1943 1949 }
1944 1950
1945 1951 if (zonecfg_default_brand(default_brand,
1946 1952 sizeof (default_brand)) != Z_OK) {
1947 1953 zerror(zlogp, B_FALSE, "unable to determine default brand");
1948 1954 return (1);
1949 1955 }
1950 1956
1951 1957 /* Get a handle to the brand info for this zone */
1952 1958 if (zone_get_brand(zone_name, brand_name, sizeof (brand_name))
1953 1959 != Z_OK) {
1954 1960 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1955 1961 return (1);
1956 1962 }
1957 1963 zone_isnative = (strcmp(brand_name, NATIVE_BRAND_NAME) == 0);
1958 1964 zone_islabeled = (strcmp(brand_name, LABELED_BRAND_NAME) == 0);
1959 1965
1960 1966 /*
1961 1967 * In the alternate root environment, the only supported
1962 1968 * operations are mount and unmount. In this case, just treat
1963 1969 * the zone as native if it is cluster. Cluster zones can be
1964 1970 * native for the purpose of LU or upgrade, and the cluster
1965 1971 * brand may not exist in the miniroot (such as in net install
1966 1972 * upgrade).
1967 1973 */
1968 1974 if (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0) {
1969 1975 zone_iscluster = B_TRUE;
1970 1976 if (zonecfg_in_alt_root()) {
1971 1977 (void) strlcpy(brand_name, default_brand,
1972 1978 sizeof (brand_name));
1973 1979 }
1974 1980 } else {
1975 1981 zone_iscluster = B_FALSE;
1976 1982 }
1977 1983
1978 1984 if ((bh = brand_open(brand_name)) == NULL) {
1979 1985 zerror(zlogp, B_FALSE, "unable to open zone brand");
1980 1986 return (1);
1981 1987 }
1982 1988
1983 1989 /* Get state change brand hooks. */
1984 1990 if (brand_callback_init(bh, zone_name) == -1) {
1985 1991 zerror(zlogp, B_TRUE,
1986 1992 "failed to initialize brand state change hooks");
1987 1993 brand_close(bh);
1988 1994 return (1);
1989 1995 }
1990 1996
1991 1997 brand_close(bh);
1992 1998
1993 1999 /*
1994 2000 * Check that we have all privileges. It would be nice to pare
1995 2001 * this down, but this is at least a first cut.
1996 2002 */
1997 2003 if ((privset = priv_allocset()) == NULL) {
1998 2004 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
1999 2005 return (1);
2000 2006 }
2001 2007
2002 2008 if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
2003 2009 zerror(zlogp, B_TRUE, "%s failed", "getppriv");
2004 2010 priv_freeset(privset);
2005 2011 return (1);
2006 2012 }
2007 2013
2008 2014 if (priv_isfullset(privset) == B_FALSE) {
2009 2015 zerror(zlogp, B_FALSE, "You lack sufficient privilege to "
2010 2016 "run this command (all privs required)");
2011 2017 priv_freeset(privset);
2012 2018 return (1);
2013 2019 }
2014 2020 priv_freeset(privset);
2015 2021
2016 2022 if (mkzonedir(zlogp) != 0)
2017 2023 return (1);
2018 2024
2019 2025 /*
2020 2026 * Pre-fork: setup shared state
2021 2027 */
2022 2028 if ((shstate = (void *)mmap(NULL, shstatelen,
2023 2029 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) ==
2024 2030 MAP_FAILED) {
2025 2031 zerror(zlogp, B_TRUE, "%s failed", "mmap");
2026 2032 return (1);
2027 2033 }
2028 2034 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) {
2029 2035 zerror(zlogp, B_TRUE, "%s failed", "sema_init()");
2030 2036 (void) munmap((char *)shstate, shstatelen);
2031 2037 return (1);
2032 2038 }
2033 2039 shstate->log.logfile = NULL;
2034 2040 shstate->log.buflen = shstatelen - sizeof (*shstate);
2035 2041 shstate->log.loglen = shstate->log.buflen;
2036 2042 shstate->log.buf = (char *)shstate + sizeof (*shstate);
2037 2043 shstate->log.log = shstate->log.buf;
2038 2044 shstate->log.locale = parents_locale;
2039 2045 shstate->status = -1;
2040 2046
2041 2047 /*
2042 2048 * We need a SIGCHLD handler so the sema_wait() below will wake
2043 2049 * up if the child dies without doing a sema_post().
2044 2050 */
2045 2051 (void) sigset(SIGCHLD, sigchld);
2046 2052 /*
2047 2053 * We must mask SIGCHLD until after we've coped with the fork
2048 2054 * sufficiently to deal with it; otherwise we can race and
2049 2055 * receive the signal before pid has been initialized
2050 2056 * (yes, this really happens).
2051 2057 */
2052 2058 (void) sigemptyset(&block_cld);
2053 2059 (void) sigaddset(&block_cld, SIGCHLD);
2054 2060 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
2055 2061
2056 2062 /*
2057 2063 * The parent only needs stderr after the fork, so close other fd's
2058 2064 * that we inherited from zoneadm so that the parent doesn't have those
2059 2065 * open while waiting. The child will close the rest after the fork.
2060 2066 */
2061 2067 closefrom(3);
2062 2068
2063 2069 if ((ctfd = init_template()) == -1) {
2064 2070 zerror(zlogp, B_TRUE, "failed to create contract");
2065 2071 return (1);
2066 2072 }
2067 2073
2068 2074 /*
2069 2075 * Do not let another thread localize a message while we are forking.
2070 2076 */
2071 2077 (void) mutex_lock(&msglock);
2072 2078 pid = fork();
2073 2079 (void) mutex_unlock(&msglock);
2074 2080
2075 2081 /*
2076 2082 * In all cases (parent, child, and in the event of an error) we
2077 2083 * don't want to cause creation of contracts on subsequent fork()s.
2078 2084 */
2079 2085 (void) ct_tmpl_clear(ctfd);
2080 2086 (void) close(ctfd);
2081 2087
2082 2088 if (pid == -1) {
2083 2089 zerror(zlogp, B_TRUE, "could not fork");
2084 2090 return (1);
2085 2091
2086 2092 } else if (pid > 0) { /* parent */
2087 2093 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2088 2094 /*
2089 2095 * This marks a window of vulnerability in which we receive
2090 2096 * the SIGCLD before falling into sema_wait (normally we would
2091 2097 * get woken up from sema_wait with EINTR upon receipt of
2092 2098 * SIGCLD). So we may need to use some other scheme like
2093 2099 * sema_posting in the sigcld handler.
2094 2100 * blech
2095 2101 */
2096 2102 (void) sema_wait(&shstate->sem);
2097 2103 (void) sema_destroy(&shstate->sem);
2098 2104 if (shstate->status != 0)
2099 2105 (void) waitpid(pid, NULL, WNOHANG);
2100 2106 /*
2101 2107 * It's ok if we die with SIGPIPE. It's not like we could have
2102 2108 * done anything about it.
2103 2109 */
2104 2110 (void) fprintf(stderr, "%s", shstate->log.buf);
2105 2111 _exit(shstate->status == 0 ? 0 : 1);
2106 2112 }
2107 2113
2108 2114 /*
2109 2115 * The child charges on.
2110 2116 */
2111 2117 (void) sigset(SIGCHLD, SIG_DFL);
2112 2118 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2113 2119
2114 2120 /*
2115 2121 * SIGPIPE can be delivered if we write to a socket for which the
2116 2122 * peer endpoint is gone. That can lead to too-early termination
2117 2123 * of zoneadmd, and that's not good eats.
2118 2124 */
2119 2125 (void) sigset(SIGPIPE, SIG_IGN);
2120 2126 /*
2121 2127 * Stop using stderr
2122 2128 */
2123 2129 zlogp = &shstate->log;
2124 2130
2125 2131 /*
2126 2132 * We don't need stdout/stderr from now on.
2127 2133 */
2128 2134 closefrom(0);
2129 2135
2130 2136 /*
2131 2137 * Initialize the syslog zlog_t. This needs to be done after
2132 2138 * the call to closefrom().
2133 2139 */
2134 2140 logsys.buf = logsys.log = NULL;
2135 2141 logsys.buflen = logsys.loglen = 0;
2136 2142 logsys.logfile = NULL;
2137 2143 logsys.locale = DEFAULT_LOCALE;
2138 2144
2139 2145 openlog("zoneadmd", LOG_PID, LOG_DAEMON);
2140 2146
2141 2147 /*
2142 2148 * The eventstream is used to publish state changes in the zone
2143 2149 * from the door threads to the console I/O poller.
2144 2150 */
2145 2151 if (eventstream_init() == -1) {
2146 2152 zerror(zlogp, B_TRUE, "unable to create eventstream");
2147 2153 goto child_out;
2148 2154 }
2149 2155
2150 2156 (void) snprintf(zone_door_path, sizeof (zone_door_path),
2151 2157 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name);
2152 2158
2153 2159 /*
2154 2160 * See if another zoneadmd is running for this zone. If not, then we
2155 2161 * can now modify system state.
2156 2162 */
2157 2163 if (make_daemon_exclusive(zlogp) == -1)
2158 2164 goto child_out;
2159 2165
2160 2166
2161 2167 /*
2162 2168 * Create/join a new session; we need to be careful of what we do with
2163 2169 * the console from now on so we don't end up being the session leader
2164 2170 * for the terminal we're going to be handing out.
2165 2171 */
2166 2172 (void) setsid();
2167 2173
2168 2174 /*
2169 2175 * This thread shouldn't be receiving any signals; in particular,
2170 2176 * SIGCHLD should be received by the thread doing the fork().
2171 2177 */
2172 2178 (void) sigfillset(&blockset);
2173 2179 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL);
2174 2180
2175 2181 /*
2176 2182 * Setup the console device and get ready to serve the console;
2177 2183 * once this has completed, we're ready to let console clients
2178 2184 * make an attempt to connect (they will block until
2179 2185 * serve_console_sock() below gets called, and any pending
2180 2186 * connection is accept()ed).
2181 2187 */
2182 2188 if (!zonecfg_in_alt_root() && init_console(zlogp) < 0)
2183 2189 goto child_out;
2184 2190
2185 2191 /*
2186 2192 * Take the lock now, so that when the door server gets going, we
2187 2193 * are guaranteed that it won't take a request until we are sure
2188 2194 * that everything is completely set up. See the child_out: label
2189 2195 * below to see why this matters.
2190 2196 */
2191 2197 (void) mutex_lock(&lock);
2192 2198
2193 2199 /* Init semaphore for scratch zones. */
2194 2200 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) {
2195 2201 zerror(zlogp, B_TRUE,
2196 2202 "failed to initialize semaphore for scratch zone");
2197 2203 goto child_out;
2198 2204 }
2199 2205
2200 2206 /* open the dladm handle */
2201 2207 if (dladm_open(&dld_handle) != DLADM_STATUS_OK) {
2202 2208 zerror(zlogp, B_FALSE, "failed to open dladm handle");
2203 2209 goto child_out;
2204 2210 }
2205 2211
2206 2212 /*
2207 2213 * Note: door setup must occur *after* the console is setup.
2208 2214 * This is so that as zlogin tests the door to see if zoneadmd
2209 2215 * is ready yet, we know that the console will get serviced
2210 2216 * once door_info() indicates that the door is "up".
2211 2217 */
2212 2218 if (setup_door(zlogp) == -1)
2213 2219 goto child_out;
2214 2220
2215 2221 /*
2216 2222 * Things seem OK so far; tell the parent process that we're done
2217 2223 * with setup tasks. This will cause the parent to exit, signalling
2218 2224 * to zoneadm, zlogin, or whatever forked it that we are ready to
2219 2225 * service requests.
2220 2226 */
2221 2227 shstate->status = 0;
2222 2228 (void) sema_post(&shstate->sem);
2223 2229 (void) munmap((char *)shstate, shstatelen);
2224 2230 shstate = NULL;
2225 2231
2226 2232 (void) mutex_unlock(&lock);
2227 2233
2228 2234 /*
2229 2235 * zlogp is now invalid, so reset it to the syslog logger.
2230 2236 */
2231 2237 zlogp = &logsys;
2232 2238
2233 2239 /*
2234 2240 * Now that we are free of any parents, switch to the default locale.
2235 2241 */
2236 2242 (void) setlocale(LC_ALL, DEFAULT_LOCALE);
2237 2243
2238 2244 /*
2239 2245 * At this point the setup portion of main() is basically done, so
2240 2246 * we reuse this thread to manage the zone console. When
2241 2247 * serve_console() has returned, we are past the point of no return
2242 2248 * in the life of this zoneadmd.
2243 2249 */
2244 2250 if (zonecfg_in_alt_root()) {
2245 2251 /*
2246 2252 * This is just awful, but mounted scratch zones don't (and
2247 2253 * can't) have consoles. We just wait for unmount instead.
2248 2254 */
2249 2255 while (sema_wait(&scratch_sem) == EINTR)
2250 2256 ;
2251 2257 } else {
2252 2258 serve_console(zlogp);
2253 2259 assert(in_death_throes);
2254 2260 }
2255 2261
2256 2262 /*
2257 2263 * This is the next-to-last part of the exit interlock. Upon calling
2258 2264 * fdetach(), the door will go unreferenced; once any
2259 2265 * outstanding requests (like the door thread doing Z_HALT) are
2260 2266 * done, the door will get an UNREF notification; when it handles
2261 2267 * the UNREF, the door server will cause the exit. It's possible
2262 2268 * that fdetach() can fail because the file is in use, in which
2263 2269 * case we'll retry the operation.
2264 2270 */
2265 2271 assert(!MUTEX_HELD(&lock));
2266 2272 for (;;) {
2267 2273 if ((fdetach(zone_door_path) == 0) || (errno != EBUSY))
2268 2274 break;
2269 2275 yield();
2270 2276 }
2271 2277
2272 2278 for (;;)
2273 2279 (void) pause();
2274 2280
2275 2281 child_out:
2276 2282 assert(pid == 0);
2277 2283
2278 2284 shstate->status = -1;
2279 2285 (void) sema_post(&shstate->sem);
2280 2286 (void) munmap((char *)shstate, shstatelen);
2281 2287
2282 2288 /*
2283 2289 * This might trigger an unref notification, but if so,
2284 2290 * we are still holding the lock, so our call to exit will
2285 2291 * ultimately win the race and will publish the right exit
2286 2292 * code.
2287 2293 */
2288 2294 if (zone_door != -1) {
2289 2295 assert(MUTEX_HELD(&lock));
2290 2296 (void) door_revoke(zone_door);
2291 2297 (void) fdetach(zone_door_path);
2292 2298 }
2293 2299
2294 2300 if (dld_handle != NULL)
2295 2301 dladm_close(dld_handle);
2296 2302
2297 2303 return (1); /* return from main() forcibly exits an MT process */
2298 2304 }
↓ open down ↓ |
741 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX