Print this page
2594 implement graceful shutdown for local zones in zoneadm
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/cmd/zoneadmd/zoneadmd.c
+++ new/usr/src/cmd/zoneadmd/zoneadmd.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 + * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
24 25 */
25 26
26 27 /*
27 28 * zoneadmd manages zones; one zoneadmd process is launched for each
28 29 * non-global zone on the system. This daemon juggles four jobs:
29 30 *
30 31 * - Implement setup and teardown of the zone "virtual platform": mount and
31 32 * unmount filesystems; create and destroy network interfaces; communicate
32 33 * with devfsadmd to lay out devices for the zone; instantiate the zone
33 34 * console device; configure process runtime attributes such as resource
34 35 * controls, pool bindings, fine-grained privileges.
35 36 *
36 37 * - Launch the zone's init(1M) process.
37 38 *
38 39 * - Implement a door server; clients (like zoneadm) connect to the door
39 40 * server and request zone state changes. The kernel is also a client of
40 41 * this door server. A request to halt or reboot the zone which originates
41 42 * *inside* the zone results in a door upcall from the kernel into zoneadmd.
42 43 *
43 44 * One minor problem is that messages emitted by zoneadmd need to be passed
44 45 * back to the zoneadm process making the request. These messages need to
45 46 * be rendered in the client's locale; so, this is passed in as part of the
46 47 * request. The exception is the kernel upcall to zoneadmd, in which case
47 48 * messages are syslog'd.
48 49 *
49 50 * To make all of this work, the Makefile adds -a to xgettext to extract *all*
50 51 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those
51 52 * strings which do not need to be translated.
52 53 *
53 54 * - Act as a console server for zlogin -C processes; see comments in zcons.c
54 55 * for more information about the zone console architecture.
55 56 *
56 57 * DESIGN NOTES
57 58 *
58 59 * Restart:
59 60 * A chief design constraint of zoneadmd is that it should be restartable in
60 61 * the case that the administrator kills it off, or it suffers a fatal error,
61 62 * without the running zone being impacted; this is akin to being able to
62 63 * reboot the service processor of a server without affecting the OS instance.
63 64 */
64 65
65 66 #include <sys/param.h>
66 67 #include <sys/mman.h>
67 68 #include <sys/types.h>
68 69 #include <sys/stat.h>
69 70 #include <sys/sysmacros.h>
70 71
71 72 #include <bsm/adt.h>
72 73 #include <bsm/adt_event.h>
73 74
74 75 #include <alloca.h>
75 76 #include <assert.h>
76 77 #include <errno.h>
77 78 #include <door.h>
78 79 #include <fcntl.h>
79 80 #include <locale.h>
80 81 #include <signal.h>
81 82 #include <stdarg.h>
82 83 #include <stdio.h>
83 84 #include <stdlib.h>
84 85 #include <string.h>
85 86 #include <strings.h>
86 87 #include <synch.h>
87 88 #include <syslog.h>
88 89 #include <thread.h>
89 90 #include <unistd.h>
90 91 #include <wait.h>
91 92 #include <limits.h>
↓ open down ↓ |
58 lines elided |
↑ open up ↑ |
92 93 #include <zone.h>
93 94 #include <libbrand.h>
94 95 #include <sys/brand.h>
95 96 #include <libcontract.h>
96 97 #include <libcontract_priv.h>
97 98 #include <sys/brand.h>
98 99 #include <sys/contract/process.h>
99 100 #include <sys/ctfs.h>
100 101 #include <libdladm.h>
101 102 #include <sys/dls_mgmt.h>
103 +#include <libscf.h>
102 104
103 105 #include <libzonecfg.h>
104 106 #include <zonestat_impl.h>
105 107 #include "zoneadmd.h"
106 108
107 109 static char *progname;
108 110 char *zone_name; /* zone which we are managing */
109 111 char pool_name[MAXNAMELEN];
110 112 char default_brand[MAXNAMELEN];
111 113 char brand_name[MAXNAMELEN];
112 114 boolean_t zone_isnative;
113 115 boolean_t zone_iscluster;
114 116 boolean_t zone_islabeled;
117 +boolean_t shutdown_in_progress;
115 118 static zoneid_t zone_id;
116 119 dladm_handle_t dld_handle = NULL;
117 120
118 121 static char pre_statechg_hook[2 * MAXPATHLEN];
119 122 static char post_statechg_hook[2 * MAXPATHLEN];
120 123 char query_hook[2 * MAXPATHLEN];
121 124
122 125 zlog_t logsys;
123 126
124 127 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */
125 128 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */
126 129
127 130 static sema_t scratch_sem; /* for scratch zones */
128 131
129 132 static char zone_door_path[MAXPATHLEN];
130 133 static int zone_door = -1;
131 134
132 135 boolean_t in_death_throes = B_FALSE; /* daemon is dying */
133 136 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */
134 137
135 138 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */
136 139 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
137 140 #endif
138 141
139 142 #define DEFAULT_LOCALE "C"
140 143
141 144 static const char *
142 145 z_cmd_name(zone_cmd_t zcmd)
143 146 {
144 147 /* This list needs to match the enum in sys/zone.h */
145 148 static const char *zcmdstr[] = {
146 149 "ready", "boot", "forceboot", "reboot", "halt",
147 - "note_uninstalling", "mount", "forcemount", "unmount"
150 + "note_uninstalling", "mount", "forcemount", "unmount",
151 + "shutdown"
148 152 };
149 153
150 154 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr))
151 155 return ("unknown");
152 156 else
153 157 return (zcmdstr[(int)zcmd]);
154 158 }
155 159
156 160 static char *
157 161 get_execbasename(char *execfullname)
158 162 {
159 163 char *last_slash, *execbasename;
160 164
161 165 /* guard against '/' at end of command invocation */
162 166 for (;;) {
163 167 last_slash = strrchr(execfullname, '/');
164 168 if (last_slash == NULL) {
165 169 execbasename = execfullname;
166 170 break;
167 171 } else {
168 172 execbasename = last_slash + 1;
169 173 if (*execbasename == '\0') {
170 174 *last_slash = '\0';
171 175 continue;
172 176 }
173 177 break;
174 178 }
175 179 }
176 180 return (execbasename);
177 181 }
178 182
179 183 static void
180 184 usage(void)
181 185 {
182 186 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname);
183 187 (void) fprintf(stderr,
184 188 gettext("\tNote: %s should not be run directly.\n"), progname);
185 189 exit(2);
186 190 }
187 191
188 192 /* ARGSUSED */
189 193 static void
190 194 sigchld(int sig)
191 195 {
192 196 }
193 197
194 198 char *
195 199 localize_msg(char *locale, const char *msg)
196 200 {
197 201 char *out;
198 202
199 203 (void) mutex_lock(&msglock);
200 204 (void) setlocale(LC_MESSAGES, locale);
201 205 out = gettext(msg);
202 206 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE);
203 207 (void) mutex_unlock(&msglock);
204 208 return (out);
205 209 }
206 210
207 211 /* PRINTFLIKE3 */
208 212 void
209 213 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...)
210 214 {
211 215 va_list alist;
212 216 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */
213 217 char *bp;
214 218 int saved_errno = errno;
215 219
216 220 if (zlogp == NULL)
217 221 return;
218 222 if (zlogp == &logsys)
219 223 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ",
220 224 zone_name);
221 225 else
222 226 buf[0] = '\0';
223 227 bp = &(buf[strlen(buf)]);
224 228
225 229 /*
226 230 * In theory, the locale pointer should be set to either "C" or a
227 231 * char array, so it should never be NULL
228 232 */
229 233 assert(zlogp->locale != NULL);
230 234 /* Locale is per process, but we are multi-threaded... */
231 235 fmt = localize_msg(zlogp->locale, fmt);
232 236
233 237 va_start(alist, fmt);
234 238 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist);
235 239 va_end(alist);
236 240 bp = &(buf[strlen(buf)]);
237 241 if (use_strerror)
238 242 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s",
239 243 strerror(saved_errno));
240 244 if (zlogp == &logsys) {
241 245 (void) syslog(LOG_ERR, "%s", buf);
242 246 } else if (zlogp->logfile != NULL) {
243 247 (void) fprintf(zlogp->logfile, "%s\n", buf);
244 248 } else {
245 249 size_t buflen;
246 250 size_t copylen;
247 251
248 252 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf);
249 253 copylen = MIN(buflen, zlogp->loglen);
250 254 zlogp->log += copylen;
251 255 zlogp->loglen -= copylen;
252 256 }
253 257 }
254 258
255 259 /*
256 260 * Emit a warning for any boot arguments which are unrecognized. Since
257 261 * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we
258 262 * put the arguments into an argv style array, use getopt to process them,
259 263 * and put the resultant argument string back into outargs.
260 264 *
261 265 * During the filtering, we pull out any arguments which are truly "boot"
262 266 * arguments, leaving only those which are to be passed intact to the
263 267 * progenitor process. The one we support at the moment is -i, which
264 268 * indicates to the kernel which program should be launched as 'init'.
265 269 *
266 270 * A return of Z_INVAL indicates specifically that the arguments are
267 271 * not valid; this is a non-fatal error. Except for Z_OK, all other return
268 272 * values are treated as fatal.
269 273 */
270 274 static int
271 275 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
272 276 char *init_file, char *badarg)
273 277 {
274 278 int argc = 0, argc_save;
275 279 int i;
276 280 int err;
277 281 char *arg, *lasts, **argv = NULL, **argv_save;
278 282 char zonecfg_args[BOOTARGS_MAX];
279 283 char scratchargs[BOOTARGS_MAX], *sargs;
280 284 char c;
281 285
282 286 bzero(outargs, BOOTARGS_MAX);
283 287 bzero(badarg, BOOTARGS_MAX);
284 288
285 289 /*
286 290 * If the user didn't specify transient boot arguments, check
287 291 * to see if there were any specified in the zone configuration,
288 292 * and use them if applicable.
289 293 */
290 294 if (inargs == NULL || inargs[0] == '\0') {
291 295 zone_dochandle_t handle;
292 296 if ((handle = zonecfg_init_handle()) == NULL) {
293 297 zerror(zlogp, B_TRUE,
294 298 "getting zone configuration handle");
295 299 return (Z_BAD_HANDLE);
296 300 }
297 301 err = zonecfg_get_snapshot_handle(zone_name, handle);
298 302 if (err != Z_OK) {
299 303 zerror(zlogp, B_FALSE,
300 304 "invalid configuration snapshot");
301 305 zonecfg_fini_handle(handle);
302 306 return (Z_BAD_HANDLE);
303 307 }
304 308
305 309 bzero(zonecfg_args, sizeof (zonecfg_args));
306 310 (void) zonecfg_get_bootargs(handle, zonecfg_args,
307 311 sizeof (zonecfg_args));
308 312 inargs = zonecfg_args;
309 313 zonecfg_fini_handle(handle);
310 314 }
311 315
312 316 if (strlen(inargs) >= BOOTARGS_MAX) {
313 317 zerror(zlogp, B_FALSE, "boot argument string too long");
314 318 return (Z_INVAL);
315 319 }
316 320
317 321 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
318 322 sargs = scratchargs;
319 323 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
320 324 sargs = NULL;
321 325 argc++;
322 326 }
323 327
324 328 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) {
325 329 zerror(zlogp, B_FALSE, "memory allocation failed");
326 330 return (Z_NOMEM);
327 331 }
328 332
329 333 argv_save = argv;
330 334 argc_save = argc;
331 335
332 336 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
333 337 sargs = scratchargs;
334 338 i = 0;
335 339 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
336 340 sargs = NULL;
337 341 if ((argv[i] = strdup(arg)) == NULL) {
338 342 err = Z_NOMEM;
339 343 zerror(zlogp, B_FALSE, "memory allocation failed");
340 344 goto done;
341 345 }
342 346 i++;
343 347 }
344 348
345 349 /*
346 350 * We preserve compatibility with the Solaris system boot behavior,
347 351 * which allows:
348 352 *
349 353 * # reboot kernel/unix -s -m verbose
350 354 *
351 355 * In this example, kernel/unix tells the booter what file to
352 356 * boot. We don't want reboot in a zone to be gratuitously different,
353 357 * so we silently ignore the boot file, if necessary.
354 358 */
355 359 if (argv[0] == NULL)
356 360 goto done;
357 361
358 362 assert(argv[0][0] != ' ');
359 363 assert(argv[0][0] != '\t');
360 364
361 365 if (argv[0][0] != '-' && argv[0][0] != '\0') {
362 366 argv = &argv[1];
363 367 argc--;
364 368 }
365 369
366 370 optind = 0;
367 371 opterr = 0;
368 372 err = Z_OK;
369 373 while ((c = getopt(argc, argv, "fi:m:s")) != -1) {
370 374 switch (c) {
371 375 case 'i':
372 376 /*
373 377 * -i is handled by the runtime and is not passed
374 378 * along to userland
375 379 */
376 380 (void) strlcpy(init_file, optarg, MAXPATHLEN);
377 381 break;
378 382 case 'f':
379 383 /* This has already been processed by zoneadm */
380 384 break;
381 385 case 'm':
382 386 case 's':
383 387 /* These pass through unmolested */
384 388 (void) snprintf(outargs, BOOTARGS_MAX,
385 389 "%s -%c %s ", outargs, c, optarg ? optarg : "");
386 390 break;
387 391 case '?':
388 392 /*
389 393 * We warn about unknown arguments but pass them
390 394 * along anyway-- if someone wants to develop their
391 395 * own init replacement, they can pass it whatever
392 396 * args they want.
393 397 */
394 398 err = Z_INVAL;
395 399 (void) snprintf(outargs, BOOTARGS_MAX,
396 400 "%s -%c", outargs, optopt);
397 401 (void) snprintf(badarg, BOOTARGS_MAX,
398 402 "%s -%c", badarg, optopt);
399 403 break;
400 404 }
401 405 }
402 406
403 407 /*
404 408 * For Solaris Zones we warn about and discard non-option arguments.
405 409 * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar
406 410 * to the kernel, we concat up all the other remaining boot args.
407 411 * and warn on them as a group.
408 412 */
409 413 if (optind < argc) {
410 414 err = Z_INVAL;
411 415 while (optind < argc) {
412 416 (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s",
413 417 badarg, strlen(badarg) > 0 ? " " : "",
414 418 argv[optind]);
415 419 optind++;
416 420 }
417 421 zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot "
418 422 "arguments `%s'.", badarg);
419 423 }
420 424
421 425 done:
422 426 for (i = 0; i < argc_save; i++) {
423 427 if (argv_save[i] != NULL)
424 428 free(argv_save[i]);
425 429 }
426 430 free(argv_save);
427 431 return (err);
428 432 }
429 433
430 434
431 435 static int
432 436 mkzonedir(zlog_t *zlogp)
433 437 {
434 438 struct stat st;
435 439 /*
436 440 * We must create and lock everyone but root out of ZONES_TMPDIR
437 441 * since anyone can open any UNIX domain socket, regardless of
438 442 * its file system permissions. Sigh...
439 443 */
440 444 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
441 445 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR);
442 446 return (-1);
443 447 }
444 448 /* paranoia */
445 449 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) {
446 450 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR);
447 451 return (-1);
448 452 }
449 453 (void) chmod(ZONES_TMPDIR, S_IRWXU);
450 454 return (0);
451 455 }
452 456
453 457 /*
454 458 * Run the brand's pre-state change callback, if it exists.
455 459 */
456 460 static int
457 461 brand_prestatechg(zlog_t *zlogp, int state, int cmd)
458 462 {
459 463 char cmdbuf[2 * MAXPATHLEN];
460 464 const char *altroot;
461 465
462 466 if (pre_statechg_hook[0] == '\0')
463 467 return (0);
464 468
465 469 altroot = zonecfg_get_root();
466 470 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", pre_statechg_hook,
467 471 state, cmd, altroot) > sizeof (cmdbuf))
468 472 return (-1);
469 473
470 474 if (do_subproc(zlogp, cmdbuf, NULL) != 0)
471 475 return (-1);
472 476
473 477 return (0);
474 478 }
475 479
476 480 /*
477 481 * Run the brand's post-state change callback, if it exists.
478 482 */
479 483 static int
480 484 brand_poststatechg(zlog_t *zlogp, int state, int cmd)
481 485 {
482 486 char cmdbuf[2 * MAXPATHLEN];
483 487 const char *altroot;
484 488
485 489 if (post_statechg_hook[0] == '\0')
486 490 return (0);
487 491
488 492 altroot = zonecfg_get_root();
489 493 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook,
490 494 state, cmd, altroot) > sizeof (cmdbuf))
491 495 return (-1);
492 496
493 497 if (do_subproc(zlogp, cmdbuf, NULL) != 0)
494 498 return (-1);
495 499
496 500 return (0);
497 501 }
498 502
499 503 /*
500 504 * Notify zonestatd of the new zone. If zonestatd is not running, this
501 505 * will do nothing.
502 506 */
503 507 static void
504 508 notify_zonestatd(zoneid_t zoneid)
505 509 {
506 510 int cmd[2];
507 511 int fd;
508 512 door_arg_t params;
509 513
510 514 fd = open(ZS_DOOR_PATH, O_RDONLY);
511 515 if (fd < 0)
512 516 return;
513 517
514 518 cmd[0] = ZSD_CMD_NEW_ZONE;
515 519 cmd[1] = zoneid;
516 520 params.data_ptr = (char *)&cmd;
517 521 params.data_size = sizeof (cmd);
518 522 params.desc_ptr = NULL;
519 523 params.desc_num = 0;
520 524 params.rbuf = NULL;
521 525 params.rsize = NULL;
522 526 (void) door_call(fd, ¶ms);
523 527 (void) close(fd);
524 528 }
525 529
526 530 /*
527 531 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is
528 532 * 'true' if this is being invoked as part of the processing for the "mount"
529 533 * subcommand.
530 534 */
531 535 static int
532 536 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate)
533 537 {
534 538 int err;
535 539
536 540 if (brand_prestatechg(zlogp, zstate, Z_READY) != 0)
537 541 return (-1);
538 542
539 543 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
540 544 zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
541 545 zonecfg_strerror(err));
542 546 goto bad;
543 547 }
544 548
545 549 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) {
546 550 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
547 551 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
548 552 zonecfg_strerror(err));
549 553 goto bad;
550 554 }
551 555 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) {
552 556 bringup_failure_recovery = B_TRUE;
553 557 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE);
554 558 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
555 559 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
556 560 zonecfg_strerror(err));
557 561 goto bad;
558 562 }
559 563
560 564 if (brand_poststatechg(zlogp, zstate, Z_READY) != 0)
561 565 goto bad;
562 566
563 567 return (0);
564 568
565 569 bad:
566 570 /*
567 571 * If something goes wrong, we up the zones's state to the target
568 572 * state, READY, and then invoke the hook as if we're halting.
569 573 */
570 574 (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT);
571 575 return (-1);
572 576 }
573 577
574 578 int
575 579 init_template(void)
576 580 {
577 581 int fd;
578 582 int err = 0;
579 583
580 584 fd = open64(CTFS_ROOT "/process/template", O_RDWR);
581 585 if (fd == -1)
582 586 return (-1);
583 587
584 588 /*
585 589 * For now, zoneadmd doesn't do anything with the contract.
586 590 * Deliver no events, don't inherit, and allow it to be orphaned.
587 591 */
588 592 err |= ct_tmpl_set_critical(fd, 0);
589 593 err |= ct_tmpl_set_informative(fd, 0);
590 594 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
591 595 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
592 596 if (err || ct_tmpl_activate(fd)) {
593 597 (void) close(fd);
594 598 return (-1);
595 599 }
596 600
597 601 return (fd);
598 602 }
599 603
600 604 typedef struct fs_callback {
601 605 zlog_t *zlogp;
602 606 zoneid_t zoneid;
603 607 boolean_t mount_cmd;
604 608 } fs_callback_t;
605 609
606 610 static int
607 611 mount_early_fs(void *data, const char *spec, const char *dir,
608 612 const char *fstype, const char *opt)
609 613 {
610 614 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp;
611 615 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid;
612 616 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd;
613 617 char rootpath[MAXPATHLEN];
614 618 pid_t child;
615 619 int child_status;
616 620 int tmpl_fd;
617 621 int rv;
618 622 ctid_t ct;
619 623
620 624 /* determine the zone rootpath */
621 625 if (mount_cmd) {
622 626 char zonepath[MAXPATHLEN];
623 627 char luroot[MAXPATHLEN];
624 628
625 629 if (zone_get_zonepath(zone_name,
626 630 zonepath, sizeof (zonepath)) != Z_OK) {
627 631 zerror(zlogp, B_FALSE, "unable to determine zone path");
628 632 return (-1);
629 633 }
630 634
631 635 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
632 636 resolve_lofs(zlogp, luroot, sizeof (luroot));
633 637 (void) strlcpy(rootpath, luroot, sizeof (rootpath));
634 638 } else {
635 639 if (zone_get_rootpath(zone_name,
636 640 rootpath, sizeof (rootpath)) != Z_OK) {
637 641 zerror(zlogp, B_FALSE, "unable to determine zone root");
638 642 return (-1);
639 643 }
640 644 }
641 645
642 646 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) {
643 647 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point",
644 648 rootpath, dir);
645 649 return (-1);
646 650 } else if (rv > 0) {
647 651 /* The mount point path doesn't exist, create it now. */
648 652 if (make_one_dir(zlogp, rootpath, dir,
649 653 DEFAULT_DIR_MODE, DEFAULT_DIR_USER,
650 654 DEFAULT_DIR_GROUP) != 0) {
651 655 zerror(zlogp, B_FALSE, "failed to create mount point");
652 656 return (-1);
653 657 }
654 658
655 659 /*
656 660 * Now this might seem weird, but we need to invoke
657 661 * valid_mount_path() again. Why? Because it checks
658 662 * to make sure that the mount point path is canonical,
659 663 * which it can only do if the path exists, so now that
660 664 * we've created the path we have to verify it again.
661 665 */
662 666 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir,
663 667 fstype)) < 0) {
664 668 zerror(zlogp, B_FALSE,
665 669 "%s%s is not a valid mount point", rootpath, dir);
666 670 return (-1);
667 671 }
668 672 }
669 673
670 674 if ((tmpl_fd = init_template()) == -1) {
671 675 zerror(zlogp, B_TRUE, "failed to create contract");
672 676 return (-1);
673 677 }
674 678
675 679 if ((child = fork()) == -1) {
676 680 (void) ct_tmpl_clear(tmpl_fd);
677 681 (void) close(tmpl_fd);
678 682 zerror(zlogp, B_TRUE, "failed to fork");
679 683 return (-1);
680 684
681 685 } else if (child == 0) { /* child */
682 686 char opt_buf[MAX_MNTOPT_STR];
683 687 int optlen = 0;
684 688 int mflag = MS_DATA;
685 689
686 690 (void) ct_tmpl_clear(tmpl_fd);
687 691 /*
688 692 * Even though there are no procs running in the zone, we
689 693 * do this for paranoia's sake.
690 694 */
691 695 (void) closefrom(0);
692 696
693 697 if (zone_enter(zoneid) == -1) {
694 698 _exit(errno);
695 699 }
696 700 if (opt != NULL) {
697 701 /*
698 702 * The mount() system call is incredibly annoying.
699 703 * If options are specified, we need to copy them
700 704 * into a temporary buffer since the mount() system
701 705 * call will overwrite the options string. It will
702 706 * also fail if the new option string it wants to
703 707 * write is bigger than the one we passed in, so
704 708 * you must pass in a buffer of the maximum possible
705 709 * option string length. sigh.
706 710 */
707 711 (void) strlcpy(opt_buf, opt, sizeof (opt_buf));
708 712 opt = opt_buf;
709 713 optlen = MAX_MNTOPT_STR;
710 714 mflag = MS_OPTIONSTR;
711 715 }
712 716 if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0)
713 717 _exit(errno);
714 718 _exit(0);
715 719 }
716 720
717 721 /* parent */
718 722 if (contract_latest(&ct) == -1)
719 723 ct = -1;
720 724 (void) ct_tmpl_clear(tmpl_fd);
721 725 (void) close(tmpl_fd);
722 726 if (waitpid(child, &child_status, 0) != child) {
723 727 /* unexpected: we must have been signalled */
724 728 (void) contract_abandon_id(ct);
725 729 return (-1);
726 730 }
727 731 (void) contract_abandon_id(ct);
728 732 if (WEXITSTATUS(child_status) != 0) {
729 733 errno = WEXITSTATUS(child_status);
730 734 zerror(zlogp, B_TRUE, "mount of %s failed", dir);
731 735 return (-1);
732 736 }
733 737
734 738 return (0);
735 739 }
736 740
737 741 /*
738 742 * If retstr is not NULL, the output of the subproc is returned in the str,
739 743 * otherwise it is output using zerror(). Any memory allocated for retstr
740 744 * should be freed by the caller.
741 745 */
742 746 int
743 747 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
744 748 {
745 749 char buf[1024]; /* arbitrary large amount */
746 750 char *inbuf;
747 751 FILE *file;
748 752 int status;
749 753 int rd_cnt;
750 754
751 755 if (retstr != NULL) {
752 756 if ((*retstr = malloc(1024)) == NULL) {
753 757 zerror(zlogp, B_FALSE, "out of memory");
754 758 return (-1);
755 759 }
756 760 inbuf = *retstr;
757 761 rd_cnt = 0;
758 762 } else {
759 763 inbuf = buf;
760 764 }
761 765
762 766 file = popen(cmdbuf, "r");
763 767 if (file == NULL) {
764 768 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf);
765 769 return (-1);
766 770 }
767 771
768 772 while (fgets(inbuf, 1024, file) != NULL) {
769 773 if (retstr == NULL) {
770 774 if (zlogp != &logsys)
771 775 zerror(zlogp, B_FALSE, "%s", inbuf);
772 776 } else {
773 777 char *p;
774 778
775 779 rd_cnt += 1024 - 1;
776 780 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) {
777 781 zerror(zlogp, B_FALSE, "out of memory");
778 782 (void) pclose(file);
779 783 return (-1);
780 784 }
781 785
782 786 *retstr = p;
783 787 inbuf = *retstr + rd_cnt;
784 788 }
785 789 }
786 790 status = pclose(file);
787 791
788 792 if (WIFSIGNALED(status)) {
789 793 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
790 794 "signal %d", cmdbuf, WTERMSIG(status));
791 795 return (-1);
792 796 }
793 797 assert(WIFEXITED(status));
794 798 if (WEXITSTATUS(status) == ZEXIT_EXEC) {
795 799 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf);
796 800 return (-1);
797 801 }
798 802 return (WEXITSTATUS(status));
799 803 }
800 804
801 805 static int
802 806 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
803 807 {
804 808 zoneid_t zoneid;
805 809 struct stat st;
806 810 char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
807 811 char nbootargs[BOOTARGS_MAX];
808 812 char cmdbuf[MAXPATHLEN];
809 813 fs_callback_t cb;
810 814 brand_handle_t bh;
811 815 zone_iptype_t iptype;
812 816 boolean_t links_loaded = B_FALSE;
813 817 dladm_status_t status;
814 818 char errmsg[DLADM_STRSIZE];
815 819 int err;
816 820
817 821 if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0)
818 822 return (-1);
819 823
820 824 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
821 825 zerror(zlogp, B_TRUE, "unable to get zoneid");
822 826 goto bad;
823 827 }
824 828
825 829 cb.zlogp = zlogp;
826 830 cb.zoneid = zoneid;
827 831 cb.mount_cmd = B_FALSE;
828 832
829 833 /* Get a handle to the brand info for this zone */
830 834 if ((bh = brand_open(brand_name)) == NULL) {
831 835 zerror(zlogp, B_FALSE, "unable to determine zone brand");
832 836 goto bad;
833 837 }
834 838
835 839 /*
836 840 * Get the list of filesystems to mount from the brand
837 841 * configuration. These mounts are done via a thread that will
838 842 * enter the zone, so they are done from within the context of the
839 843 * zone.
840 844 */
841 845 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) {
842 846 zerror(zlogp, B_FALSE, "unable to mount filesystems");
843 847 brand_close(bh);
844 848 goto bad;
845 849 }
846 850
847 851 /*
848 852 * Get the brand's boot callback if it exists.
849 853 */
850 854 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
851 855 zerror(zlogp, B_FALSE, "unable to determine zone path");
852 856 brand_close(bh);
853 857 goto bad;
854 858 }
855 859 (void) strcpy(cmdbuf, EXEC_PREFIX);
856 860 if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
857 861 sizeof (cmdbuf) - EXEC_LEN) != 0) {
858 862 zerror(zlogp, B_FALSE,
859 863 "unable to determine branded zone's boot callback");
860 864 brand_close(bh);
861 865 goto bad;
862 866 }
863 867
864 868 /* Get the path for this zone's init(1M) (or equivalent) process. */
865 869 if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) {
866 870 zerror(zlogp, B_FALSE,
867 871 "unable to determine zone's init(1M) location");
868 872 brand_close(bh);
869 873 goto bad;
870 874 }
871 875
872 876 brand_close(bh);
873 877
874 878 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file,
875 879 bad_boot_arg);
876 880 if (err == Z_INVAL)
877 881 eventstream_write(Z_EVT_ZONE_BADARGS);
878 882 else if (err != Z_OK)
879 883 goto bad;
880 884
881 885 assert(init_file[0] != '\0');
882 886
883 887 /* Try to anticipate possible problems: Make sure init is executable. */
884 888 if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
885 889 zerror(zlogp, B_FALSE, "unable to determine zone root");
886 890 goto bad;
887 891 }
888 892
889 893 (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file);
890 894
891 895 if (stat(initpath, &st) == -1) {
892 896 zerror(zlogp, B_TRUE, "could not stat %s", initpath);
893 897 goto bad;
894 898 }
895 899
896 900 if ((st.st_mode & S_IXUSR) == 0) {
897 901 zerror(zlogp, B_FALSE, "%s is not executable", initpath);
898 902 goto bad;
899 903 }
900 904
901 905 /*
902 906 * Exclusive stack zones interact with the dlmgmtd running in the
903 907 * global zone. dladm_zone_boot() tells dlmgmtd that this zone is
904 908 * booting, and loads its datalinks from the zone's datalink
905 909 * configuration file.
906 910 */
907 911 if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) {
908 912 status = dladm_zone_boot(dld_handle, zoneid);
909 913 if (status != DLADM_STATUS_OK) {
910 914 zerror(zlogp, B_FALSE, "unable to load zone datalinks: "
911 915 " %s", dladm_status2str(status, errmsg));
912 916 goto bad;
913 917 }
914 918 links_loaded = B_TRUE;
915 919 }
916 920
917 921 /*
918 922 * If there is a brand 'boot' callback, execute it now to give the
919 923 * brand one last chance to do any additional setup before the zone
920 924 * is booted.
921 925 */
922 926 if ((strlen(cmdbuf) > EXEC_LEN) &&
923 927 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
924 928 zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
925 929 goto bad;
926 930 }
927 931
928 932 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) {
929 933 zerror(zlogp, B_TRUE, "could not set zone boot file");
930 934 goto bad;
931 935 }
932 936
933 937 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) {
934 938 zerror(zlogp, B_TRUE, "could not set zone boot arguments");
935 939 goto bad;
936 940 }
937 941
938 942 /*
939 943 * Inform zonestatd of a new zone so that it can install a door for
940 944 * the zone to contact it.
941 945 */
942 946 notify_zonestatd(zone_id);
943 947
944 948 if (zone_boot(zoneid) == -1) {
945 949 zerror(zlogp, B_TRUE, "unable to boot zone");
946 950 goto bad;
947 951 }
948 952
949 953 if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0)
950 954 goto bad;
951 955
952 956 return (0);
953 957
954 958 bad:
955 959 /*
956 960 * If something goes wrong, we up the zones's state to the target
957 961 * state, RUNNING, and then invoke the hook as if we're halting.
958 962 */
959 963 (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT);
960 964 if (links_loaded)
961 965 (void) dladm_zone_halt(dld_handle, zoneid);
962 966 return (-1);
963 967 }
964 968
965 969 static int
966 970 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate)
967 971 {
968 972 int err;
969 973
970 974 if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0)
971 975 return (-1);
972 976
973 977 if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) {
974 978 if (!bringup_failure_recovery)
975 979 zerror(zlogp, B_FALSE, "unable to destroy zone");
976 980 return (-1);
977 981 }
978 982
↓ open down ↓ |
821 lines elided |
↑ open up ↑ |
979 983 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
980 984 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
981 985 zonecfg_strerror(err));
982 986
983 987 if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0)
984 988 return (-1);
985 989
986 990 return (0);
987 991 }
988 992
993 +static int
994 +zone_graceful_shutdown(zlog_t *zlogp)
995 +{
996 + zoneid_t zoneid;
997 + pid_t child;
998 + char cmdbuf[MAXPATHLEN];
999 + brand_handle_t bh = NULL;
1000 + char zpath[MAXPATHLEN];
1001 + ctid_t ct;
1002 + int tmpl_fd;
1003 + int child_status;
1004 +
1005 + if (shutdown_in_progress) {
1006 + zerror(zlogp, B_FALSE, "shutdown already in progress");
1007 + return (-1);
1008 + }
1009 +
1010 + if ((zoneid = getzoneidbyname(zone_name)) == -1) {
1011 + zerror(zlogp, B_TRUE, "unable to get zoneid");
1012 + return (-1);
1013 + }
1014 +
1015 + /* Get a handle to the brand info for this zone */
1016 + if ((bh = brand_open(brand_name)) == NULL) {
1017 + zerror(zlogp, B_FALSE, "unable to determine zone brand");
1018 + return (-1);
1019 + }
1020 +
1021 + if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
1022 + zerror(zlogp, B_FALSE, "unable to determine zone path");
1023 + brand_close(bh);
1024 + return (-1);
1025 + }
1026 +
1027 + /*
1028 + * If there is a brand 'shutdown' callback, execute it now to give the
1029 + * brand a chance to cleanup any custom configuration.
1030 + */
1031 + (void) strcpy(cmdbuf, EXEC_PREFIX);
1032 + if (brand_get_shutdown(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
1033 + sizeof (cmdbuf) - EXEC_LEN) != 0 || strlen(cmdbuf) <= EXEC_LEN) {
1034 + (void) strcat(cmdbuf, SHUTDOWN_DEFAULT);
1035 + }
1036 + brand_close(bh);
1037 +
1038 + if ((tmpl_fd = init_template()) == -1) {
1039 + zerror(zlogp, B_TRUE, "failed to create contract");
1040 + return (-1);
1041 + }
1042 +
1043 + if ((child = fork()) == -1) {
1044 + (void) ct_tmpl_clear(tmpl_fd);
1045 + (void) close(tmpl_fd);
1046 + zerror(zlogp, B_TRUE, "failed to fork");
1047 + return (-1);
1048 + } else if (child == 0) {
1049 + (void) ct_tmpl_clear(tmpl_fd);
1050 + if (zone_enter(zoneid) == -1) {
1051 + _exit(errno);
1052 + }
1053 + _exit(execl("/bin/sh", "sh", "-c", cmdbuf, (char *)NULL));
1054 + }
1055 +
1056 + if (contract_latest(&ct) == -1)
1057 + ct = -1;
1058 + (void) ct_tmpl_clear(tmpl_fd);
1059 + (void) close(tmpl_fd);
1060 +
1061 + if (waitpid(child, &child_status, 0) != child) {
1062 + /* unexpected: we must have been signalled */
1063 + (void) contract_abandon_id(ct);
1064 + return (-1);
1065 + }
1066 +
1067 + (void) contract_abandon_id(ct);
1068 + if (WEXITSTATUS(child_status) != 0) {
1069 + errno = WEXITSTATUS(child_status);
1070 + zerror(zlogp, B_FALSE, "unable to shutdown zone");
1071 + return (-1);
1072 + }
1073 +
1074 + shutdown_in_progress = B_TRUE;
1075 +
1076 + return (0);
1077 +}
1078 +
1079 +static int
1080 +zone_wait_shutdown(zlog_t *zlogp)
1081 +{
1082 + zone_state_t zstate;
1083 + uint64_t *tm = NULL;
1084 + scf_simple_prop_t *prop = NULL;
1085 + int timeout;
1086 + int tries;
1087 + int rc = -1;
1088 +
1089 + /* Get default stop timeout from SMF framework */
1090 + timeout = SHUTDOWN_WAIT;
1091 + if ((prop = scf_simple_prop_get(NULL, SHUTDOWN_FMRI, "stop",
1092 + SCF_PROPERTY_TIMEOUT)) != NULL) {
1093 + if ((tm = scf_simple_prop_next_count(prop)) != NULL) {
1094 + if (tm != 0)
1095 + timeout = *tm;
1096 + }
1097 + scf_simple_prop_free(prop);
1098 + }
1099 +
1100 + /* allow time for zone to shutdown cleanly */
1101 + for (tries = 0; tries < timeout; tries ++) {
1102 + (void) sleep(1);
1103 + if (zone_get_state(zone_name, &zstate) == Z_OK &&
1104 + zstate == ZONE_STATE_INSTALLED) {
1105 + rc = 0;
1106 + break;
1107 + }
1108 + }
1109 +
1110 + if (rc != 0)
1111 + zerror(zlogp, B_FALSE, "unable to shutdown zone");
1112 +
1113 + shutdown_in_progress = B_FALSE;
1114 +
1115 + return (rc);
1116 +}
1117 +
1118 +
1119 +
989 1120 /*
990 1121 * Generate AUE_zone_state for a command that boots a zone.
991 1122 */
992 1123 static void
993 1124 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val,
994 1125 char *new_state)
995 1126 {
996 1127 adt_session_data_t *ah;
997 1128 adt_event_data_t *event;
998 1129 int pass_fail, fail_reason;
999 1130
1000 1131 if (!adt_audit_enabled())
1001 1132 return;
1002 1133
1003 1134 if (return_val == 0) {
1004 1135 pass_fail = ADT_SUCCESS;
1005 1136 fail_reason = ADT_SUCCESS;
1006 1137 } else {
1007 1138 pass_fail = ADT_FAILURE;
1008 1139 fail_reason = ADT_FAIL_VALUE_PROGRAM;
1009 1140 }
1010 1141
1011 1142 if (adt_start_session(&ah, NULL, 0)) {
1012 1143 zerror(zlogp, B_TRUE, gettext("audit failure."));
1013 1144 return;
1014 1145 }
1015 1146 if (adt_set_from_ucred(ah, uc, ADT_NEW)) {
1016 1147 zerror(zlogp, B_TRUE, gettext("audit failure."));
1017 1148 (void) adt_end_session(ah);
1018 1149 return;
1019 1150 }
1020 1151
1021 1152 event = adt_alloc_event(ah, ADT_zone_state);
1022 1153 if (event == NULL) {
1023 1154 zerror(zlogp, B_TRUE, gettext("audit failure."));
1024 1155 (void) adt_end_session(ah);
1025 1156 return;
1026 1157 }
1027 1158 event->adt_zone_state.zonename = zone_name;
1028 1159 event->adt_zone_state.new_state = new_state;
1029 1160
1030 1161 if (adt_put_event(event, pass_fail, fail_reason))
1031 1162 zerror(zlogp, B_TRUE, gettext("audit failure."));
1032 1163
1033 1164 adt_free_event(event);
1034 1165
1035 1166 (void) adt_end_session(ah);
1036 1167 }
1037 1168
1038 1169 /*
1039 1170 * The main routine for the door server that deals with zone state transitions.
1040 1171 */
1041 1172 /* ARGSUSED */
1042 1173 static void
1043 1174 server(void *cookie, char *args, size_t alen, door_desc_t *dp,
1044 1175 uint_t n_desc)
1045 1176 {
1046 1177 ucred_t *uc = NULL;
1047 1178 const priv_set_t *eset;
1048 1179
1049 1180 zone_state_t zstate;
1050 1181 zone_cmd_t cmd;
1051 1182 zone_cmd_arg_t *zargp;
1052 1183
1053 1184 boolean_t kernelcall;
↓ open down ↓ |
55 lines elided |
↑ open up ↑ |
1054 1185
1055 1186 int rval = -1;
1056 1187 uint64_t uniqid;
1057 1188 zoneid_t zoneid = -1;
1058 1189 zlog_t zlog;
1059 1190 zlog_t *zlogp;
1060 1191 zone_cmd_rval_t *rvalp;
1061 1192 size_t rlen = getpagesize(); /* conservative */
1062 1193 fs_callback_t cb;
1063 1194 brand_handle_t bh;
1195 + boolean_t wait_shut = B_FALSE;
1064 1196
1065 1197 /* LINTED E_BAD_PTR_CAST_ALIGN */
1066 1198 zargp = (zone_cmd_arg_t *)args;
1067 1199
1068 1200 /*
1069 1201 * When we get the door unref message, we've fdetach'd the door, and
1070 1202 * it is time for us to shut down zoneadmd.
1071 1203 */
1072 1204 if (zargp == DOOR_UNREF_DATA) {
1073 1205 /*
1074 1206 * See comment at end of main() for info on the last rites.
1075 1207 */
1076 1208 exit(0);
1077 1209 }
1078 1210
1079 1211 if (zargp == NULL) {
1080 1212 (void) door_return(NULL, 0, 0, 0);
1081 1213 }
1082 1214
1083 1215 rvalp = alloca(rlen);
1084 1216 bzero(rvalp, rlen);
1085 1217 zlog.logfile = NULL;
1086 1218 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1;
1087 1219 zlog.buf = rvalp->errbuf;
1088 1220 zlog.log = zlog.buf;
1089 1221 /* defer initialization of zlog.locale until after credential check */
1090 1222 zlogp = &zlog;
1091 1223
1092 1224 if (alen != sizeof (zone_cmd_arg_t)) {
1093 1225 /*
1094 1226 * This really shouldn't be happening.
1095 1227 */
1096 1228 zerror(&logsys, B_FALSE, "argument size (%d bytes) "
1097 1229 "unexpected (expected %d bytes)", alen,
1098 1230 sizeof (zone_cmd_arg_t));
1099 1231 goto out;
1100 1232 }
1101 1233 cmd = zargp->cmd;
1102 1234
1103 1235 if (door_ucred(&uc) != 0) {
1104 1236 zerror(&logsys, B_TRUE, "door_ucred");
1105 1237 goto out;
1106 1238 }
1107 1239 eset = ucred_getprivset(uc, PRIV_EFFECTIVE);
1108 1240 if (ucred_getzoneid(uc) != GLOBAL_ZONEID ||
1109 1241 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) :
1110 1242 ucred_geteuid(uc) != 0)) {
1111 1243 zerror(&logsys, B_FALSE, "insufficient privileges");
1112 1244 goto out;
1113 1245 }
1114 1246
1115 1247 kernelcall = ucred_getpid(uc) == 0;
1116 1248
1117 1249 /*
1118 1250 * This is safe because we only use a zlog_t throughout the
1119 1251 * duration of a door call; i.e., by the time the pointer
1120 1252 * might become invalid, the door call would be over.
1121 1253 */
1122 1254 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale;
1123 1255
1124 1256 (void) mutex_lock(&lock);
1125 1257
1126 1258 /*
1127 1259 * Once we start to really die off, we don't want more connections.
1128 1260 */
1129 1261 if (in_death_throes) {
↓ open down ↓ |
56 lines elided |
↑ open up ↑ |
1130 1262 (void) mutex_unlock(&lock);
1131 1263 ucred_free(uc);
1132 1264 (void) door_return(NULL, 0, 0, 0);
1133 1265 thr_exit(NULL);
1134 1266 }
1135 1267
1136 1268 /*
1137 1269 * Check for validity of command.
1138 1270 */
1139 1271 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT &&
1140 - cmd != Z_REBOOT && cmd != Z_HALT && cmd != Z_NOTE_UNINSTALLING &&
1141 - cmd != Z_MOUNT && cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) {
1272 + cmd != Z_REBOOT && cmd != Z_SHUTDOWN && cmd != Z_HALT &&
1273 + cmd != Z_NOTE_UNINSTALLING && cmd != Z_MOUNT &&
1274 + cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) {
1142 1275 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd);
1143 1276 goto out;
1144 1277 }
1145 1278
1146 1279 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) {
1147 1280 /*
1148 1281 * Can't happen
1149 1282 */
1150 1283 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d",
1151 1284 cmd);
1152 1285 goto out;
1153 1286 }
1154 1287 /*
1155 1288 * We ignore the possibility of someone calling zone_create(2)
1156 1289 * explicitly; all requests must come through zoneadmd.
1157 1290 */
1158 1291 if (zone_get_state(zone_name, &zstate) != Z_OK) {
1159 1292 /*
1160 1293 * Something terribly wrong happened
1161 1294 */
1162 1295 zerror(&logsys, B_FALSE, "unable to determine state of zone");
1163 1296 goto out;
1164 1297 }
1165 1298
1166 1299 if (kernelcall) {
1167 1300 /*
1168 1301 * Kernel-initiated requests may lose their validity if the
1169 1302 * zone_t the kernel was referring to has gone away.
1170 1303 */
1171 1304 if ((zoneid = getzoneidbyname(zone_name)) == -1 ||
1172 1305 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid,
1173 1306 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) {
1174 1307 /*
1175 1308 * We're not talking about the same zone. The request
1176 1309 * must have arrived too late. Return error.
1177 1310 */
1178 1311 rval = -1;
1179 1312 goto out;
1180 1313 }
1181 1314 zlogp = &logsys; /* Log errors to syslog */
1182 1315 }
1183 1316
1184 1317 /*
1185 1318 * If we are being asked to forcibly mount or boot a zone, we
1186 1319 * pretend that an INCOMPLETE zone is actually INSTALLED.
1187 1320 */
1188 1321 if (zstate == ZONE_STATE_INCOMPLETE &&
1189 1322 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT))
1190 1323 zstate = ZONE_STATE_INSTALLED;
1191 1324
1192 1325 switch (zstate) {
1193 1326 case ZONE_STATE_CONFIGURED:
1194 1327 case ZONE_STATE_INCOMPLETE:
1195 1328 /*
1196 1329 * Not our area of expertise; we just print a nice message
1197 1330 * and die off.
1198 1331 */
1199 1332 zerror(zlogp, B_FALSE,
1200 1333 "%s operation is invalid for zones in state '%s'",
1201 1334 z_cmd_name(cmd), zone_state_str(zstate));
1202 1335 break;
1203 1336
1204 1337 case ZONE_STATE_INSTALLED:
1205 1338 switch (cmd) {
1206 1339 case Z_READY:
1207 1340 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate);
1208 1341 if (rval == 0)
1209 1342 eventstream_write(Z_EVT_ZONE_READIED);
1210 1343 break;
1211 1344 case Z_BOOT:
1212 1345 case Z_FORCEBOOT:
1213 1346 eventstream_write(Z_EVT_ZONE_BOOTING);
1214 1347 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1215 1348 == 0) {
1216 1349 rval = zone_bootup(zlogp, zargp->bootbuf,
↓ open down ↓ |
65 lines elided |
↑ open up ↑ |
1217 1350 zstate);
1218 1351 }
1219 1352 audit_put_record(zlogp, uc, rval, "boot");
1220 1353 if (rval != 0) {
1221 1354 bringup_failure_recovery = B_TRUE;
1222 1355 (void) zone_halt(zlogp, B_FALSE, B_FALSE,
1223 1356 zstate);
1224 1357 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1225 1358 }
1226 1359 break;
1360 + case Z_SHUTDOWN:
1227 1361 case Z_HALT:
1228 1362 if (kernelcall) /* Invalid; can't happen */
1229 1363 abort();
1230 1364 /*
1231 1365 * We could have two clients racing to halt this
1232 1366 * zone; the second client loses, but his request
1233 1367 * doesn't fail, since the zone is now in the desired
1234 1368 * state.
1235 1369 */
1236 1370 zerror(zlogp, B_FALSE, "zone is already halted");
1237 1371 rval = 0;
1238 1372 break;
1239 1373 case Z_REBOOT:
1240 1374 if (kernelcall) /* Invalid; can't happen */
1241 1375 abort();
1242 1376 zerror(zlogp, B_FALSE, "%s operation is invalid "
1243 1377 "for zones in state '%s'", z_cmd_name(cmd),
1244 1378 zone_state_str(zstate));
1245 1379 rval = -1;
1246 1380 break;
1247 1381 case Z_NOTE_UNINSTALLING:
1248 1382 if (kernelcall) /* Invalid; can't happen */
1249 1383 abort();
1250 1384 /*
1251 1385 * Tell the console to print out a message about this.
1252 1386 * Once it does, we will be in_death_throes.
1253 1387 */
1254 1388 eventstream_write(Z_EVT_ZONE_UNINSTALLING);
1255 1389 break;
1256 1390 case Z_MOUNT:
1257 1391 case Z_FORCEMOUNT:
1258 1392 if (kernelcall) /* Invalid; can't happen */
1259 1393 abort();
1260 1394 if (!zone_isnative && !zone_iscluster &&
1261 1395 !zone_islabeled) {
1262 1396 /*
1263 1397 * -U mounts the zone without lofs mounting
1264 1398 * zone file systems back into the scratch
1265 1399 * zone. This is required when mounting
1266 1400 * non-native branded zones.
1267 1401 */
1268 1402 (void) strlcpy(zargp->bootbuf, "-U",
1269 1403 BOOTARGS_MAX);
1270 1404 }
1271 1405
1272 1406 rval = zone_ready(zlogp,
1273 1407 strcmp(zargp->bootbuf, "-U") == 0 ?
1274 1408 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate);
1275 1409 if (rval != 0)
1276 1410 break;
1277 1411
1278 1412 eventstream_write(Z_EVT_ZONE_READIED);
1279 1413
1280 1414 /*
1281 1415 * Get a handle to the default brand info.
1282 1416 * We must always use the default brand file system
1283 1417 * list when mounting the zone.
1284 1418 */
1285 1419 if ((bh = brand_open(default_brand)) == NULL) {
1286 1420 rval = -1;
1287 1421 break;
1288 1422 }
1289 1423
1290 1424 /*
1291 1425 * Get the list of filesystems to mount from
1292 1426 * the brand configuration. These mounts are done
1293 1427 * via a thread that will enter the zone, so they
1294 1428 * are done from within the context of the zone.
1295 1429 */
1296 1430 cb.zlogp = zlogp;
1297 1431 cb.zoneid = zone_id;
1298 1432 cb.mount_cmd = B_TRUE;
1299 1433 rval = brand_platform_iter_mounts(bh,
1300 1434 mount_early_fs, &cb);
1301 1435
1302 1436 brand_close(bh);
1303 1437
1304 1438 /*
1305 1439 * Ordinarily, /dev/fd would be mounted inside the zone
1306 1440 * by svc:/system/filesystem/usr:default, but since
1307 1441 * we're not booting the zone, we need to do this
1308 1442 * manually.
1309 1443 */
1310 1444 if (rval == 0)
1311 1445 rval = mount_early_fs(&cb,
1312 1446 "fd", "/dev/fd", "fd", NULL);
1313 1447 break;
1314 1448 case Z_UNMOUNT:
1315 1449 if (kernelcall) /* Invalid; can't happen */
1316 1450 abort();
1317 1451 zerror(zlogp, B_FALSE, "zone is already unmounted");
1318 1452 rval = 0;
1319 1453 break;
1320 1454 }
1321 1455 break;
1322 1456
1323 1457 case ZONE_STATE_READY:
1324 1458 switch (cmd) {
1325 1459 case Z_READY:
1326 1460 /*
1327 1461 * We could have two clients racing to ready this
1328 1462 * zone; the second client loses, but his request
1329 1463 * doesn't fail, since the zone is now in the desired
1330 1464 * state.
1331 1465 */
1332 1466 zerror(zlogp, B_FALSE, "zone is already ready");
1333 1467 rval = 0;
1334 1468 break;
1335 1469 case Z_BOOT:
1336 1470 (void) strlcpy(boot_args, zargp->bootbuf,
1337 1471 sizeof (boot_args));
1338 1472 eventstream_write(Z_EVT_ZONE_BOOTING);
1339 1473 rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1340 1474 audit_put_record(zlogp, uc, rval, "boot");
1341 1475 if (rval != 0) {
1342 1476 bringup_failure_recovery = B_TRUE;
1343 1477 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1344 1478 zstate);
1345 1479 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1346 1480 }
↓ open down ↓ |
110 lines elided |
↑ open up ↑ |
1347 1481 boot_args[0] = '\0';
1348 1482 break;
1349 1483 case Z_HALT:
1350 1484 if (kernelcall) /* Invalid; can't happen */
1351 1485 abort();
1352 1486 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1353 1487 != 0)
1354 1488 break;
1355 1489 eventstream_write(Z_EVT_ZONE_HALTED);
1356 1490 break;
1491 + case Z_SHUTDOWN:
1357 1492 case Z_REBOOT:
1358 1493 case Z_NOTE_UNINSTALLING:
1359 1494 case Z_MOUNT:
1360 1495 case Z_UNMOUNT:
1361 1496 if (kernelcall) /* Invalid; can't happen */
1362 1497 abort();
1363 1498 zerror(zlogp, B_FALSE, "%s operation is invalid "
1364 1499 "for zones in state '%s'", z_cmd_name(cmd),
1365 1500 zone_state_str(zstate));
1366 1501 rval = -1;
1367 1502 break;
1368 1503 }
1369 1504 break;
1370 1505
1371 1506 case ZONE_STATE_MOUNTED:
1372 1507 switch (cmd) {
1373 1508 case Z_UNMOUNT:
1374 1509 if (kernelcall) /* Invalid; can't happen */
1375 1510 abort();
1376 1511 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate);
1377 1512 if (rval == 0) {
1378 1513 eventstream_write(Z_EVT_ZONE_HALTED);
1379 1514 (void) sema_post(&scratch_sem);
1380 1515 }
1381 1516 break;
1382 1517 default:
1383 1518 if (kernelcall) /* Invalid; can't happen */
1384 1519 abort();
1385 1520 zerror(zlogp, B_FALSE, "%s operation is invalid "
1386 1521 "for zones in state '%s'", z_cmd_name(cmd),
1387 1522 zone_state_str(zstate));
1388 1523 rval = -1;
1389 1524 break;
1390 1525 }
1391 1526 break;
1392 1527
1393 1528 case ZONE_STATE_RUNNING:
1394 1529 case ZONE_STATE_SHUTTING_DOWN:
1395 1530 case ZONE_STATE_DOWN:
1396 1531 switch (cmd) {
1397 1532 case Z_READY:
1398 1533 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1399 1534 != 0)
1400 1535 break;
1401 1536 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0)
1402 1537 eventstream_write(Z_EVT_ZONE_READIED);
1403 1538 else
1404 1539 eventstream_write(Z_EVT_ZONE_HALTED);
1405 1540 break;
1406 1541 case Z_BOOT:
1407 1542 /*
1408 1543 * We could have two clients racing to boot this
1409 1544 * zone; the second client loses, but his request
1410 1545 * doesn't fail, since the zone is now in the desired
1411 1546 * state.
1412 1547 */
1413 1548 zerror(zlogp, B_FALSE, "zone is already booted");
1414 1549 rval = 0;
1415 1550 break;
1416 1551 case Z_HALT:
1417 1552 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1418 1553 != 0)
1419 1554 break;
1420 1555 eventstream_write(Z_EVT_ZONE_HALTED);
1421 1556 break;
1422 1557 case Z_REBOOT:
1423 1558 (void) strlcpy(boot_args, zargp->bootbuf,
1424 1559 sizeof (boot_args));
1425 1560 eventstream_write(Z_EVT_ZONE_REBOOTING);
1426 1561 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1427 1562 != 0) {
1428 1563 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1429 1564 boot_args[0] = '\0';
1430 1565 break;
1431 1566 }
1432 1567 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1433 1568 != 0) {
1434 1569 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1435 1570 boot_args[0] = '\0';
1436 1571 break;
↓ open down ↓ |
70 lines elided |
↑ open up ↑ |
1437 1572 }
1438 1573 rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1439 1574 audit_put_record(zlogp, uc, rval, "reboot");
1440 1575 if (rval != 0) {
1441 1576 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1442 1577 zstate);
1443 1578 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1444 1579 }
1445 1580 boot_args[0] = '\0';
1446 1581 break;
1582 + case Z_SHUTDOWN:
1583 + if ((rval = zone_graceful_shutdown(zlogp)) == 0) {
1584 + wait_shut = B_TRUE;
1585 + }
1586 + break;
1447 1587 case Z_NOTE_UNINSTALLING:
1448 1588 case Z_MOUNT:
1449 1589 case Z_UNMOUNT:
1450 1590 zerror(zlogp, B_FALSE, "%s operation is invalid "
1451 1591 "for zones in state '%s'", z_cmd_name(cmd),
1452 1592 zone_state_str(zstate));
1453 1593 rval = -1;
1454 1594 break;
1455 1595 }
1456 1596 break;
1457 1597 default:
1458 1598 abort();
1459 1599 }
↓ open down ↓ |
3 lines elided |
↑ open up ↑ |
1460 1600
1461 1601 /*
1462 1602 * Because the state of the zone may have changed, we make sure
1463 1603 * to wake the console poller, which is in charge of initiating
1464 1604 * the shutdown procedure as necessary.
1465 1605 */
1466 1606 eventstream_write(Z_EVT_NULL);
1467 1607
1468 1608 out:
1469 1609 (void) mutex_unlock(&lock);
1610 +
1611 + /* Wait for the Z_SHUTDOWN commands to complete */
1612 + if (wait_shut)
1613 + rval = zone_wait_shutdown(zlogp);
1614 +
1470 1615 if (kernelcall) {
1471 1616 rvalp = NULL;
1472 1617 rlen = 0;
1473 1618 } else {
1474 1619 rvalp->rval = rval;
1475 1620 }
1476 1621 if (uc != NULL)
1477 1622 ucred_free(uc);
1478 1623 (void) door_return((char *)rvalp, rlen, NULL, 0);
1479 1624 thr_exit(NULL);
1480 1625 }
1481 1626
1482 1627 static int
1483 1628 setup_door(zlog_t *zlogp)
1484 1629 {
1485 1630 if ((zone_door = door_create(server, NULL,
1486 1631 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) {
1487 1632 zerror(zlogp, B_TRUE, "%s failed", "door_create");
1488 1633 return (-1);
1489 1634 }
1490 1635 (void) fdetach(zone_door_path);
1491 1636
1492 1637 if (fattach(zone_door, zone_door_path) != 0) {
1493 1638 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path);
1494 1639 (void) door_revoke(zone_door);
1495 1640 (void) fdetach(zone_door_path);
1496 1641 zone_door = -1;
1497 1642 return (-1);
1498 1643 }
1499 1644 return (0);
1500 1645 }
1501 1646
1502 1647 /*
1503 1648 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this
1504 1649 * is where zoneadmd itself will check to see that another instance of
1505 1650 * zoneadmd isn't already controlling this zone.
1506 1651 *
1507 1652 * The idea here is that we want to open the path to which we will
1508 1653 * attach our door, lock it, and then make sure that no-one has beat us
1509 1654 * to fattach(3c)ing onto it.
1510 1655 *
1511 1656 * fattach(3c) is really a mount, so there are actually two possible
1512 1657 * vnodes we could be dealing with. Our strategy is as follows:
1513 1658 *
1514 1659 * - If the file we opened is a regular file (common case):
1515 1660 * There is no fattach(3c)ed door, so we have a chance of becoming
1516 1661 * the managing zoneadmd. We attempt to lock the file: if it is
1517 1662 * already locked, that means someone else raced us here, so we
1518 1663 * lose and give up. zoneadm(1m) will try to contact the zoneadmd
1519 1664 * that beat us to it.
1520 1665 *
1521 1666 * - If the file we opened is a namefs file:
1522 1667 * This means there is already an established door fattach(3c)'ed
1523 1668 * to the rendezvous path. We've lost the race, so we give up.
1524 1669 * Note that in this case we also try to grab the file lock, and
1525 1670 * will succeed in acquiring it since the vnode locked by the
1526 1671 * "winning" zoneadmd was a regular one, and the one we locked was
1527 1672 * the fattach(3c)'ed door node. At any rate, no harm is done, and
1528 1673 * we just return to zoneadm(1m) which knows to retry.
1529 1674 */
1530 1675 static int
1531 1676 make_daemon_exclusive(zlog_t *zlogp)
1532 1677 {
1533 1678 int doorfd = -1;
1534 1679 int err, ret = -1;
1535 1680 struct stat st;
1536 1681 struct flock flock;
1537 1682 zone_state_t zstate;
1538 1683
1539 1684 top:
1540 1685 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
1541 1686 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
1542 1687 zonecfg_strerror(err));
1543 1688 goto out;
1544 1689 }
1545 1690 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR,
1546 1691 S_IREAD|S_IWRITE)) < 0) {
1547 1692 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path);
1548 1693 goto out;
1549 1694 }
1550 1695 if (fstat(doorfd, &st) < 0) {
1551 1696 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path);
1552 1697 goto out;
1553 1698 }
1554 1699 /*
1555 1700 * Lock the file to synchronize with other zoneadmd
1556 1701 */
1557 1702 flock.l_type = F_WRLCK;
1558 1703 flock.l_whence = SEEK_SET;
1559 1704 flock.l_start = (off_t)0;
1560 1705 flock.l_len = (off_t)0;
1561 1706 if (fcntl(doorfd, F_SETLK, &flock) < 0) {
1562 1707 /*
1563 1708 * Someone else raced us here and grabbed the lock file
1564 1709 * first. A warning here is inappropriate since nothing
1565 1710 * went wrong.
1566 1711 */
1567 1712 goto out;
1568 1713 }
1569 1714
1570 1715 if (strcmp(st.st_fstype, "namefs") == 0) {
1571 1716 struct door_info info;
1572 1717
1573 1718 /*
1574 1719 * There is already something fattach()'ed to this file.
1575 1720 * Lets see what the door is up to.
1576 1721 */
1577 1722 if (door_info(doorfd, &info) == 0 && info.di_target != -1) {
1578 1723 /*
1579 1724 * Another zoneadmd process seems to be in
1580 1725 * control of the situation and we don't need to
1581 1726 * be here. A warning here is inappropriate
1582 1727 * since nothing went wrong.
1583 1728 *
1584 1729 * If the door has been revoked, the zoneadmd
1585 1730 * process currently managing the zone is going
1586 1731 * away. We'll return control to zoneadm(1m)
1587 1732 * which will try again (by which time zoneadmd
1588 1733 * will hopefully have exited).
1589 1734 */
1590 1735 goto out;
1591 1736 }
1592 1737
1593 1738 /*
1594 1739 * If we got this far, there's a fattach(3c)'ed door
1595 1740 * that belongs to a process that has exited, which can
1596 1741 * happen if the previous zoneadmd died unexpectedly.
1597 1742 *
1598 1743 * Let user know that something is amiss, but that we can
1599 1744 * recover; if the zone is in the installed state, then don't
1600 1745 * message, since having a running zoneadmd isn't really
1601 1746 * expected/needed. We want to keep occurences of this message
1602 1747 * limited to times when zoneadmd is picking back up from a
1603 1748 * zoneadmd that died while the zone was in some non-trivial
1604 1749 * state.
1605 1750 */
1606 1751 if (zstate > ZONE_STATE_INSTALLED) {
1607 1752 zerror(zlogp, B_FALSE,
1608 1753 "zone '%s': WARNING: zone is in state '%s', but "
1609 1754 "zoneadmd does not appear to be available; "
1610 1755 "restarted zoneadmd to recover.",
1611 1756 zone_name, zone_state_str(zstate));
1612 1757 }
1613 1758
1614 1759 (void) fdetach(zone_door_path);
1615 1760 (void) close(doorfd);
1616 1761 goto top;
1617 1762 }
1618 1763 ret = 0;
1619 1764 out:
1620 1765 (void) close(doorfd);
1621 1766 return (ret);
1622 1767 }
1623 1768
1624 1769 /*
1625 1770 * Setup the brand's pre and post state change callbacks, as well as the
1626 1771 * query callback, if any of these exist.
1627 1772 */
1628 1773 static int
1629 1774 brand_callback_init(brand_handle_t bh, char *zone_name)
1630 1775 {
1631 1776 char zpath[MAXPATHLEN];
1632 1777
1633 1778 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK)
1634 1779 return (-1);
1635 1780
1636 1781 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX,
1637 1782 sizeof (pre_statechg_hook));
1638 1783
1639 1784 if (brand_get_prestatechange(bh, zone_name, zpath,
1640 1785 pre_statechg_hook + EXEC_LEN,
1641 1786 sizeof (pre_statechg_hook) - EXEC_LEN) != 0)
1642 1787 return (-1);
1643 1788
1644 1789 if (strlen(pre_statechg_hook) <= EXEC_LEN)
1645 1790 pre_statechg_hook[0] = '\0';
1646 1791
1647 1792 (void) strlcpy(post_statechg_hook, EXEC_PREFIX,
1648 1793 sizeof (post_statechg_hook));
1649 1794
1650 1795 if (brand_get_poststatechange(bh, zone_name, zpath,
1651 1796 post_statechg_hook + EXEC_LEN,
1652 1797 sizeof (post_statechg_hook) - EXEC_LEN) != 0)
1653 1798 return (-1);
1654 1799
1655 1800 if (strlen(post_statechg_hook) <= EXEC_LEN)
1656 1801 post_statechg_hook[0] = '\0';
1657 1802
1658 1803 (void) strlcpy(query_hook, EXEC_PREFIX,
1659 1804 sizeof (query_hook));
1660 1805
1661 1806 if (brand_get_query(bh, zone_name, zpath, query_hook + EXEC_LEN,
1662 1807 sizeof (query_hook) - EXEC_LEN) != 0)
1663 1808 return (-1);
1664 1809
1665 1810 if (strlen(query_hook) <= EXEC_LEN)
1666 1811 query_hook[0] = '\0';
1667 1812
1668 1813 return (0);
1669 1814 }
1670 1815
1671 1816 int
1672 1817 main(int argc, char *argv[])
1673 1818 {
1674 1819 int opt;
1675 1820 zoneid_t zid;
1676 1821 priv_set_t *privset;
1677 1822 zone_state_t zstate;
1678 1823 char parents_locale[MAXPATHLEN];
1679 1824 brand_handle_t bh;
1680 1825 int err;
1681 1826
1682 1827 pid_t pid;
1683 1828 sigset_t blockset;
1684 1829 sigset_t block_cld;
1685 1830
1686 1831 struct {
1687 1832 sema_t sem;
1688 1833 int status;
1689 1834 zlog_t log;
1690 1835 } *shstate;
1691 1836 size_t shstatelen = getpagesize();
1692 1837
1693 1838 zlog_t errlog;
1694 1839 zlog_t *zlogp;
1695 1840
1696 1841 int ctfd;
1697 1842
1698 1843 progname = get_execbasename(argv[0]);
1699 1844
1700 1845 /*
1701 1846 * Make sure stderr is unbuffered
1702 1847 */
1703 1848 (void) setbuffer(stderr, NULL, 0);
1704 1849
1705 1850 /*
1706 1851 * Get out of the way of mounted filesystems, since we will daemonize
1707 1852 * soon.
1708 1853 */
1709 1854 (void) chdir("/");
1710 1855
1711 1856 /*
1712 1857 * Use the default system umask per PSARC 1998/110 rather than
1713 1858 * anything that may have been set by the caller.
1714 1859 */
1715 1860 (void) umask(CMASK);
1716 1861
1717 1862 /*
1718 1863 * Initially we want to use our parent's locale.
1719 1864 */
1720 1865 (void) setlocale(LC_ALL, "");
1721 1866 (void) textdomain(TEXT_DOMAIN);
1722 1867 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL),
1723 1868 sizeof (parents_locale));
1724 1869
1725 1870 /*
1726 1871 * This zlog_t is used for writing to stderr
1727 1872 */
1728 1873 errlog.logfile = stderr;
1729 1874 errlog.buflen = errlog.loglen = 0;
1730 1875 errlog.buf = errlog.log = NULL;
1731 1876 errlog.locale = parents_locale;
1732 1877
1733 1878 /*
1734 1879 * We start off writing to stderr until we're ready to daemonize.
1735 1880 */
1736 1881 zlogp = &errlog;
1737 1882
1738 1883 /*
1739 1884 * Process options.
1740 1885 */
1741 1886 while ((opt = getopt(argc, argv, "R:z:")) != EOF) {
1742 1887 switch (opt) {
1743 1888 case 'R':
1744 1889 zonecfg_set_root(optarg);
1745 1890 break;
1746 1891 case 'z':
1747 1892 zone_name = optarg;
1748 1893 break;
1749 1894 default:
1750 1895 usage();
1751 1896 }
1752 1897 }
1753 1898
1754 1899 if (zone_name == NULL)
1755 1900 usage();
1756 1901
1757 1902 /*
1758 1903 * Because usage() prints directly to stderr, it has gettext()
1759 1904 * wrapping, which depends on the locale. But since zerror() calls
1760 1905 * localize() which tweaks the locale, it is not safe to call zerror()
1761 1906 * until after the last call to usage(). Fortunately, the last call
1762 1907 * to usage() is just above and the first call to zerror() is just
1763 1908 * below. Don't mess this up.
1764 1909 */
1765 1910 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) {
1766 1911 zerror(zlogp, B_FALSE, "cannot manage the %s zone",
1767 1912 GLOBAL_ZONENAME);
1768 1913 return (1);
1769 1914 }
1770 1915
1771 1916 if (zone_get_id(zone_name, &zid) != 0) {
1772 1917 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name,
1773 1918 zonecfg_strerror(Z_NO_ZONE));
1774 1919 return (1);
1775 1920 }
1776 1921
1777 1922 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
1778 1923 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
1779 1924 zonecfg_strerror(err));
1780 1925 return (1);
1781 1926 }
1782 1927 if (zstate < ZONE_STATE_INCOMPLETE) {
1783 1928 zerror(zlogp, B_FALSE,
1784 1929 "cannot manage a zone which is in state '%s'",
1785 1930 zone_state_str(zstate));
1786 1931 return (1);
1787 1932 }
1788 1933
1789 1934 if (zonecfg_default_brand(default_brand,
1790 1935 sizeof (default_brand)) != Z_OK) {
1791 1936 zerror(zlogp, B_FALSE, "unable to determine default brand");
1792 1937 return (1);
1793 1938 }
1794 1939
1795 1940 /* Get a handle to the brand info for this zone */
1796 1941 if (zone_get_brand(zone_name, brand_name, sizeof (brand_name))
1797 1942 != Z_OK) {
1798 1943 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1799 1944 return (1);
1800 1945 }
1801 1946 zone_isnative = (strcmp(brand_name, NATIVE_BRAND_NAME) == 0);
1802 1947 zone_islabeled = (strcmp(brand_name, LABELED_BRAND_NAME) == 0);
1803 1948
1804 1949 /*
1805 1950 * In the alternate root environment, the only supported
1806 1951 * operations are mount and unmount. In this case, just treat
1807 1952 * the zone as native if it is cluster. Cluster zones can be
1808 1953 * native for the purpose of LU or upgrade, and the cluster
1809 1954 * brand may not exist in the miniroot (such as in net install
1810 1955 * upgrade).
1811 1956 */
1812 1957 if (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0) {
1813 1958 zone_iscluster = B_TRUE;
1814 1959 if (zonecfg_in_alt_root()) {
1815 1960 (void) strlcpy(brand_name, default_brand,
1816 1961 sizeof (brand_name));
1817 1962 }
1818 1963 } else {
1819 1964 zone_iscluster = B_FALSE;
1820 1965 }
1821 1966
1822 1967 if ((bh = brand_open(brand_name)) == NULL) {
1823 1968 zerror(zlogp, B_FALSE, "unable to open zone brand");
1824 1969 return (1);
1825 1970 }
1826 1971
1827 1972 /* Get state change brand hooks. */
1828 1973 if (brand_callback_init(bh, zone_name) == -1) {
1829 1974 zerror(zlogp, B_TRUE,
1830 1975 "failed to initialize brand state change hooks");
1831 1976 brand_close(bh);
1832 1977 return (1);
1833 1978 }
1834 1979
1835 1980 brand_close(bh);
1836 1981
1837 1982 /*
1838 1983 * Check that we have all privileges. It would be nice to pare
1839 1984 * this down, but this is at least a first cut.
1840 1985 */
1841 1986 if ((privset = priv_allocset()) == NULL) {
1842 1987 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
1843 1988 return (1);
1844 1989 }
1845 1990
1846 1991 if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
1847 1992 zerror(zlogp, B_TRUE, "%s failed", "getppriv");
1848 1993 priv_freeset(privset);
1849 1994 return (1);
1850 1995 }
1851 1996
1852 1997 if (priv_isfullset(privset) == B_FALSE) {
1853 1998 zerror(zlogp, B_FALSE, "You lack sufficient privilege to "
1854 1999 "run this command (all privs required)");
1855 2000 priv_freeset(privset);
1856 2001 return (1);
1857 2002 }
1858 2003 priv_freeset(privset);
1859 2004
1860 2005 if (mkzonedir(zlogp) != 0)
1861 2006 return (1);
1862 2007
1863 2008 /*
1864 2009 * Pre-fork: setup shared state
1865 2010 */
1866 2011 if ((shstate = (void *)mmap(NULL, shstatelen,
1867 2012 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) ==
1868 2013 MAP_FAILED) {
1869 2014 zerror(zlogp, B_TRUE, "%s failed", "mmap");
1870 2015 return (1);
1871 2016 }
1872 2017 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) {
1873 2018 zerror(zlogp, B_TRUE, "%s failed", "sema_init()");
1874 2019 (void) munmap((char *)shstate, shstatelen);
1875 2020 return (1);
1876 2021 }
1877 2022 shstate->log.logfile = NULL;
1878 2023 shstate->log.buflen = shstatelen - sizeof (*shstate);
1879 2024 shstate->log.loglen = shstate->log.buflen;
1880 2025 shstate->log.buf = (char *)shstate + sizeof (*shstate);
1881 2026 shstate->log.log = shstate->log.buf;
1882 2027 shstate->log.locale = parents_locale;
1883 2028 shstate->status = -1;
1884 2029
1885 2030 /*
1886 2031 * We need a SIGCHLD handler so the sema_wait() below will wake
1887 2032 * up if the child dies without doing a sema_post().
1888 2033 */
1889 2034 (void) sigset(SIGCHLD, sigchld);
1890 2035 /*
1891 2036 * We must mask SIGCHLD until after we've coped with the fork
1892 2037 * sufficiently to deal with it; otherwise we can race and
1893 2038 * receive the signal before pid has been initialized
1894 2039 * (yes, this really happens).
1895 2040 */
1896 2041 (void) sigemptyset(&block_cld);
1897 2042 (void) sigaddset(&block_cld, SIGCHLD);
1898 2043 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1899 2044
1900 2045 if ((ctfd = init_template()) == -1) {
1901 2046 zerror(zlogp, B_TRUE, "failed to create contract");
1902 2047 return (1);
1903 2048 }
1904 2049
1905 2050 /*
1906 2051 * Do not let another thread localize a message while we are forking.
1907 2052 */
1908 2053 (void) mutex_lock(&msglock);
1909 2054 pid = fork();
1910 2055 (void) mutex_unlock(&msglock);
1911 2056
1912 2057 /*
1913 2058 * In all cases (parent, child, and in the event of an error) we
1914 2059 * don't want to cause creation of contracts on subsequent fork()s.
1915 2060 */
1916 2061 (void) ct_tmpl_clear(ctfd);
1917 2062 (void) close(ctfd);
1918 2063
1919 2064 if (pid == -1) {
1920 2065 zerror(zlogp, B_TRUE, "could not fork");
1921 2066 return (1);
1922 2067
1923 2068 } else if (pid > 0) { /* parent */
1924 2069 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1925 2070 /*
1926 2071 * This marks a window of vulnerability in which we receive
1927 2072 * the SIGCLD before falling into sema_wait (normally we would
1928 2073 * get woken up from sema_wait with EINTR upon receipt of
1929 2074 * SIGCLD). So we may need to use some other scheme like
1930 2075 * sema_posting in the sigcld handler.
1931 2076 * blech
1932 2077 */
1933 2078 (void) sema_wait(&shstate->sem);
1934 2079 (void) sema_destroy(&shstate->sem);
1935 2080 if (shstate->status != 0)
1936 2081 (void) waitpid(pid, NULL, WNOHANG);
1937 2082 /*
1938 2083 * It's ok if we die with SIGPIPE. It's not like we could have
1939 2084 * done anything about it.
1940 2085 */
1941 2086 (void) fprintf(stderr, "%s", shstate->log.buf);
1942 2087 _exit(shstate->status == 0 ? 0 : 1);
1943 2088 }
1944 2089
1945 2090 /*
1946 2091 * The child charges on.
1947 2092 */
1948 2093 (void) sigset(SIGCHLD, SIG_DFL);
1949 2094 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1950 2095
1951 2096 /*
1952 2097 * SIGPIPE can be delivered if we write to a socket for which the
1953 2098 * peer endpoint is gone. That can lead to too-early termination
1954 2099 * of zoneadmd, and that's not good eats.
1955 2100 */
1956 2101 (void) sigset(SIGPIPE, SIG_IGN);
1957 2102 /*
1958 2103 * Stop using stderr
1959 2104 */
1960 2105 zlogp = &shstate->log;
1961 2106
1962 2107 /*
1963 2108 * We don't need stdout/stderr from now on.
1964 2109 */
1965 2110 closefrom(0);
1966 2111
1967 2112 /*
1968 2113 * Initialize the syslog zlog_t. This needs to be done after
1969 2114 * the call to closefrom().
1970 2115 */
1971 2116 logsys.buf = logsys.log = NULL;
1972 2117 logsys.buflen = logsys.loglen = 0;
1973 2118 logsys.logfile = NULL;
1974 2119 logsys.locale = DEFAULT_LOCALE;
1975 2120
1976 2121 openlog("zoneadmd", LOG_PID, LOG_DAEMON);
1977 2122
1978 2123 /*
1979 2124 * The eventstream is used to publish state changes in the zone
1980 2125 * from the door threads to the console I/O poller.
1981 2126 */
1982 2127 if (eventstream_init() == -1) {
1983 2128 zerror(zlogp, B_TRUE, "unable to create eventstream");
1984 2129 goto child_out;
1985 2130 }
1986 2131
1987 2132 (void) snprintf(zone_door_path, sizeof (zone_door_path),
1988 2133 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name);
1989 2134
1990 2135 /*
1991 2136 * See if another zoneadmd is running for this zone. If not, then we
1992 2137 * can now modify system state.
1993 2138 */
1994 2139 if (make_daemon_exclusive(zlogp) == -1)
1995 2140 goto child_out;
1996 2141
1997 2142
1998 2143 /*
1999 2144 * Create/join a new session; we need to be careful of what we do with
2000 2145 * the console from now on so we don't end up being the session leader
2001 2146 * for the terminal we're going to be handing out.
2002 2147 */
2003 2148 (void) setsid();
2004 2149
2005 2150 /*
2006 2151 * This thread shouldn't be receiving any signals; in particular,
2007 2152 * SIGCHLD should be received by the thread doing the fork().
2008 2153 */
2009 2154 (void) sigfillset(&blockset);
2010 2155 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL);
2011 2156
2012 2157 /*
2013 2158 * Setup the console device and get ready to serve the console;
2014 2159 * once this has completed, we're ready to let console clients
2015 2160 * make an attempt to connect (they will block until
2016 2161 * serve_console_sock() below gets called, and any pending
2017 2162 * connection is accept()ed).
2018 2163 */
2019 2164 if (!zonecfg_in_alt_root() && init_console(zlogp) < 0)
2020 2165 goto child_out;
2021 2166
2022 2167 /*
2023 2168 * Take the lock now, so that when the door server gets going, we
2024 2169 * are guaranteed that it won't take a request until we are sure
2025 2170 * that everything is completely set up. See the child_out: label
2026 2171 * below to see why this matters.
2027 2172 */
2028 2173 (void) mutex_lock(&lock);
2029 2174
2030 2175 /* Init semaphore for scratch zones. */
2031 2176 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) {
2032 2177 zerror(zlogp, B_TRUE,
2033 2178 "failed to initialize semaphore for scratch zone");
2034 2179 goto child_out;
2035 2180 }
2036 2181
2037 2182 /* open the dladm handle */
2038 2183 if (dladm_open(&dld_handle) != DLADM_STATUS_OK) {
2039 2184 zerror(zlogp, B_FALSE, "failed to open dladm handle");
2040 2185 goto child_out;
2041 2186 }
2042 2187
2043 2188 /*
2044 2189 * Note: door setup must occur *after* the console is setup.
2045 2190 * This is so that as zlogin tests the door to see if zoneadmd
2046 2191 * is ready yet, we know that the console will get serviced
2047 2192 * once door_info() indicates that the door is "up".
2048 2193 */
2049 2194 if (setup_door(zlogp) == -1)
2050 2195 goto child_out;
2051 2196
2052 2197 /*
2053 2198 * Things seem OK so far; tell the parent process that we're done
2054 2199 * with setup tasks. This will cause the parent to exit, signalling
2055 2200 * to zoneadm, zlogin, or whatever forked it that we are ready to
2056 2201 * service requests.
2057 2202 */
2058 2203 shstate->status = 0;
2059 2204 (void) sema_post(&shstate->sem);
2060 2205 (void) munmap((char *)shstate, shstatelen);
2061 2206 shstate = NULL;
2062 2207
2063 2208 (void) mutex_unlock(&lock);
2064 2209
2065 2210 /*
2066 2211 * zlogp is now invalid, so reset it to the syslog logger.
2067 2212 */
2068 2213 zlogp = &logsys;
2069 2214
2070 2215 /*
2071 2216 * Now that we are free of any parents, switch to the default locale.
2072 2217 */
2073 2218 (void) setlocale(LC_ALL, DEFAULT_LOCALE);
2074 2219
2075 2220 /*
2076 2221 * At this point the setup portion of main() is basically done, so
2077 2222 * we reuse this thread to manage the zone console. When
2078 2223 * serve_console() has returned, we are past the point of no return
2079 2224 * in the life of this zoneadmd.
2080 2225 */
2081 2226 if (zonecfg_in_alt_root()) {
2082 2227 /*
2083 2228 * This is just awful, but mounted scratch zones don't (and
2084 2229 * can't) have consoles. We just wait for unmount instead.
2085 2230 */
2086 2231 while (sema_wait(&scratch_sem) == EINTR)
2087 2232 ;
2088 2233 } else {
2089 2234 serve_console(zlogp);
2090 2235 assert(in_death_throes);
2091 2236 }
2092 2237
2093 2238 /*
2094 2239 * This is the next-to-last part of the exit interlock. Upon calling
2095 2240 * fdetach(), the door will go unreferenced; once any
2096 2241 * outstanding requests (like the door thread doing Z_HALT) are
2097 2242 * done, the door will get an UNREF notification; when it handles
2098 2243 * the UNREF, the door server will cause the exit. It's possible
2099 2244 * that fdetach() can fail because the file is in use, in which
2100 2245 * case we'll retry the operation.
2101 2246 */
2102 2247 assert(!MUTEX_HELD(&lock));
2103 2248 for (;;) {
2104 2249 if ((fdetach(zone_door_path) == 0) || (errno != EBUSY))
2105 2250 break;
2106 2251 yield();
2107 2252 }
2108 2253
2109 2254 for (;;)
2110 2255 (void) pause();
2111 2256
2112 2257 child_out:
2113 2258 assert(pid == 0);
2114 2259 if (shstate != NULL) {
2115 2260 shstate->status = -1;
2116 2261 (void) sema_post(&shstate->sem);
2117 2262 (void) munmap((char *)shstate, shstatelen);
2118 2263 }
2119 2264
2120 2265 /*
2121 2266 * This might trigger an unref notification, but if so,
2122 2267 * we are still holding the lock, so our call to exit will
2123 2268 * ultimately win the race and will publish the right exit
2124 2269 * code.
2125 2270 */
2126 2271 if (zone_door != -1) {
2127 2272 assert(MUTEX_HELD(&lock));
2128 2273 (void) door_revoke(zone_door);
2129 2274 (void) fdetach(zone_door_path);
2130 2275 }
2131 2276
2132 2277 if (dld_handle != NULL)
2133 2278 dladm_close(dld_handle);
2134 2279
2135 2280 return (1); /* return from main() forcibly exits an MT process */
2136 2281 }
↓ open down ↓ |
657 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX