1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2013 DEY Storage Systems, Inc.
  24  * Copyright (c) 2014 Gary Mills
  25  * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
  26  * Copyright 2019 Joyent, Inc.
  27  * Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
  28  */
  29 
  30 /*
  31  * zlogin provides three types of login which allow users in the global
  32  * zone to access non-global zones.
  33  *
  34  * - "interactive login" is similar to rlogin(1); for example, the user could
  35  *   issue 'zlogin my-zone' or 'zlogin -e ^ -l me my-zone'.   The user is
  36  *   granted a new pty (which is then shoved into the zone), and an I/O
  37  *   loop between parent and child processes takes care of the interactive
  38  *   session.  In this mode, login(1) (and its -c option, which means
  39  *   "already authenticated") is employed to take care of the initialization
  40  *   of the user's session.
  41  *
  42  * - "non-interactive login" is similar to su(1M); the user could issue
  43  *   'zlogin my-zone ls -l' and the command would be run as specified.
  44  *   In this mode, zlogin sets up pipes as the communication channel, and
  45  *   'su' is used to do the login setup work.
  46  *
  47  * - "console login" is the equivalent to accessing the tip line for a
  48  *   zone.  For example, the user can issue 'zlogin -C my-zone'.
  49  *   In this mode, zlogin contacts the zoneadmd process via unix domain
  50  *   socket.  If zoneadmd is not running, it starts it.  This allows the
  51  *   console to be available anytime the zone is installed, regardless of
  52  *   whether it is running.
  53  */
  54 
  55 #include <sys/socket.h>
  56 #include <sys/termios.h>
  57 #include <sys/utsname.h>
  58 #include <sys/stat.h>
  59 #include <sys/types.h>
  60 #include <sys/contract/process.h>
  61 #include <sys/ctfs.h>
  62 #include <sys/brand.h>
  63 #include <sys/wait.h>
  64 #include <alloca.h>
  65 #include <assert.h>
  66 #include <ctype.h>
  67 #include <paths.h>
  68 #include <door.h>
  69 #include <errno.h>
  70 #include <nss_dbdefs.h>
  71 #include <poll.h>
  72 #include <priv.h>
  73 #include <pwd.h>
  74 #include <unistd.h>
  75 #include <utmpx.h>
  76 #include <sac.h>
  77 #include <signal.h>
  78 #include <stdarg.h>
  79 #include <stdio.h>
  80 #include <stdlib.h>
  81 #include <string.h>
  82 #include <strings.h>
  83 #include <stropts.h>
  84 #include <wait.h>
  85 #include <zone.h>
  86 #include <fcntl.h>
  87 #include <libdevinfo.h>
  88 #include <libintl.h>
  89 #include <locale.h>
  90 #include <libzonecfg.h>
  91 #include <libcontract.h>
  92 #include <libbrand.h>
  93 #include <auth_list.h>
  94 #include <auth_attr.h>
  95 #include <secdb.h>
  96 
  97 static int masterfd;
  98 static struct termios save_termios;
  99 static struct termios effective_termios;
 100 static int save_fd;
 101 static struct winsize winsize;
 102 static volatile int dead;
 103 static volatile pid_t child_pid = -1;
 104 static int interactive = 0;
 105 static priv_set_t *dropprivs;
 106 
 107 static int nocmdchar = 0;
 108 static int failsafe = 0;
 109 static int disconnect = 0;
 110 static char cmdchar = '~';
 111 static int quiet = 0;
 112 
 113 static int pollerr = 0;
 114 
 115 static const char *pname;
 116 static char *username;
 117 
 118 extern int __xpg4;      /* 0 if not an xpg4/6-compiled program */
 119 
 120 /*
 121  * When forced_login is true, the user is not prompted
 122  * for an authentication password in the target zone.
 123  */
 124 static boolean_t forced_login = B_FALSE;
 125 
 126 #if !defined(TEXT_DOMAIN)               /* should be defined by cc -D */
 127 #define TEXT_DOMAIN     "SYS_TEST"      /* Use this only if it wasn't */
 128 #endif
 129 
 130 #define SUPATH  "/usr/bin/su"
 131 #define FAILSAFESHELL   "/sbin/sh"
 132 #define DEFAULTSHELL    "/sbin/sh"
 133 #define DEF_PATH        "/usr/sbin:/usr/bin"
 134 
 135 #define CLUSTER_BRAND_NAME      "cluster"
 136 
 137 /*
 138  * The ZLOGIN_BUFSIZ is larger than PIPE_BUF so we can be sure we're clearing
 139  * out the pipe when the child is exiting.  The ZLOGIN_RDBUFSIZ must be less
 140  * than ZLOGIN_BUFSIZ (because we share the buffer in doio).  This value is
 141  * also chosen in conjunction with the HI_WATER setting to make sure we
 142  * don't fill up the pipe.  We can write FIFOHIWAT (16k) into the pipe before
 143  * blocking.  By having ZLOGIN_RDBUFSIZ set to 1k and HI_WATER set to 8k, we
 144  * know we can always write a ZLOGIN_RDBUFSIZ chunk into the pipe when there
 145  * is less than HI_WATER data already in the pipe.
 146  */
 147 #define ZLOGIN_BUFSIZ   8192
 148 #define ZLOGIN_RDBUFSIZ 1024
 149 #define HI_WATER        8192
 150 
 151 /*
 152  * See canonify() below.  CANONIFY_LEN is the maximum length that a
 153  * "canonical" sequence will expand to (backslash, three octal digits, NUL).
 154  */
 155 #define CANONIFY_LEN 5
 156 
 157 static void
 158 usage(void)
 159 {
 160         (void) fprintf(stderr, gettext("usage: %s [ -dnQCES ] [ -e cmdchar ] "
 161             "[-l user] zonename [command [args ...] ]\n"), pname);
 162         exit(2);
 163 }
 164 
 165 static const char *
 166 getpname(const char *arg0)
 167 {
 168         const char *p = strrchr(arg0, '/');
 169 
 170         if (p == NULL)
 171                 p = arg0;
 172         else
 173                 p++;
 174 
 175         pname = p;
 176         return (p);
 177 }
 178 
 179 static void
 180 zerror(const char *fmt, ...)
 181 {
 182         va_list alist;
 183 
 184         (void) fprintf(stderr, "%s: ", pname);
 185         va_start(alist, fmt);
 186         (void) vfprintf(stderr, fmt, alist);
 187         va_end(alist);
 188         (void) fprintf(stderr, "\n");
 189 }
 190 
 191 static void
 192 zperror(const char *str)
 193 {
 194         const char *estr;
 195 
 196         if ((estr = strerror(errno)) != NULL)
 197                 (void) fprintf(stderr, "%s: %s: %s\n", pname, str, estr);
 198         else
 199                 (void) fprintf(stderr, "%s: %s: errno %d\n", pname, str, errno);
 200 }
 201 
 202 /*
 203  * The first part of our privilege dropping scheme needs to be called before
 204  * fork(), since we must have it for security; we don't want to be surprised
 205  * later that we couldn't allocate the privset.
 206  */
 207 static int
 208 prefork_dropprivs()
 209 {
 210         if ((dropprivs = priv_allocset()) == NULL)
 211                 return (1);
 212 
 213         priv_basicset(dropprivs);
 214         (void) priv_delset(dropprivs, PRIV_PROC_INFO);
 215         (void) priv_delset(dropprivs, PRIV_PROC_FORK);
 216         (void) priv_delset(dropprivs, PRIV_PROC_EXEC);
 217         (void) priv_delset(dropprivs, PRIV_FILE_LINK_ANY);
 218 
 219         /*
 220          * We need to keep the basic privilege PROC_SESSION and all unknown
 221          * basic privileges as well as the privileges PROC_ZONE and
 222          * PROC_OWNER in order to query session information and
 223          * send signals.
 224          */
 225         if (interactive == 0) {
 226                 (void) priv_addset(dropprivs, PRIV_PROC_ZONE);
 227                 (void) priv_addset(dropprivs, PRIV_PROC_OWNER);
 228         } else {
 229                 (void) priv_delset(dropprivs, PRIV_PROC_SESSION);
 230         }
 231 
 232         return (0);
 233 }
 234 
 235 /*
 236  * The second part of the privilege drop.  We are paranoid about being attacked
 237  * by the zone, so we drop all privileges.  This should prevent a compromise
 238  * which gets us to fork(), exec(), symlink(), etc.
 239  */
 240 static void
 241 postfork_dropprivs()
 242 {
 243         if ((setppriv(PRIV_SET, PRIV_PERMITTED, dropprivs)) == -1) {
 244                 zperror(gettext("Warning: could not set permitted privileges"));
 245         }
 246         if ((setppriv(PRIV_SET, PRIV_LIMIT, dropprivs)) == -1) {
 247                 zperror(gettext("Warning: could not set limit privileges"));
 248         }
 249         if ((setppriv(PRIV_SET, PRIV_INHERITABLE, dropprivs)) == -1) {
 250                 zperror(gettext("Warning: could not set inheritable "
 251                     "privileges"));
 252         }
 253 }
 254 
 255 /*
 256  * Create the unix domain socket and call the zoneadmd server; handshake
 257  * with it to determine whether it will allow us to connect.
 258  */
 259 static int
 260 get_console_master(const char *zname)
 261 {
 262         int sockfd = -1;
 263         struct sockaddr_un servaddr;
 264         char clientid[MAXPATHLEN];
 265         char handshake[MAXPATHLEN], c;
 266         int msglen;
 267         int i = 0, err = 0;
 268 
 269         if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
 270                 zperror(gettext("could not create socket"));
 271                 return (-1);
 272         }
 273 
 274         bzero(&servaddr, sizeof (servaddr));
 275         servaddr.sun_family = AF_UNIX;
 276         (void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path),
 277             "%s/%s.console_sock", ZONES_TMPDIR, zname);
 278 
 279         if (connect(sockfd, (struct sockaddr *)&servaddr,
 280             sizeof (servaddr)) == -1) {
 281                 zperror(gettext("Could not connect to zone console"));
 282                 goto bad;
 283         }
 284         masterfd = sockfd;
 285 
 286         msglen = snprintf(clientid, sizeof (clientid), "IDENT %lu %s %d\n",
 287             getpid(), setlocale(LC_MESSAGES, NULL), disconnect);
 288 
 289         if (msglen >= sizeof (clientid) || msglen < 0) {
 290                 zerror("protocol error");
 291                 goto bad;
 292         }
 293 
 294         if (write(masterfd, clientid, msglen) != msglen) {
 295                 zerror("protocol error");
 296                 goto bad;
 297         }
 298 
 299         bzero(handshake, sizeof (handshake));
 300 
 301         /*
 302          * Take care not to accumulate more than our fill, and leave room for
 303          * the NUL at the end.
 304          */
 305         while ((err = read(masterfd, &c, 1)) == 1) {
 306                 if (i >= (sizeof (handshake) - 1))
 307                         break;
 308                 if (c == '\n')
 309                         break;
 310                 handshake[i] = c;
 311                 i++;
 312         }
 313 
 314         /*
 315          * If something went wrong during the handshake we bail; perhaps
 316          * the server died off.
 317          */
 318         if (err == -1) {
 319                 zperror(gettext("Could not connect to zone console"));
 320                 goto bad;
 321         }
 322 
 323         if (strncmp(handshake, "OK", sizeof (handshake)) == 0)
 324                 return (0);
 325 
 326         zerror(gettext("Console is already in use by process ID %s."),
 327             handshake);
 328 bad:
 329         (void) close(sockfd);
 330         masterfd = -1;
 331         return (-1);
 332 }
 333 
 334 
 335 /*
 336  * Routines to handle pty creation upon zone entry and to shuttle I/O back
 337  * and forth between the two terminals.  We also compute and store the
 338  * name of the slave terminal associated with the master side.
 339  */
 340 static int
 341 get_master_pty()
 342 {
 343         if ((masterfd = open("/dev/ptmx", O_RDWR|O_NONBLOCK)) < 0) {
 344                 zperror(gettext("failed to obtain a pseudo-tty"));
 345                 return (-1);
 346         }
 347         if (tcgetattr(STDIN_FILENO, &save_termios) == -1) {
 348                 zperror(gettext("failed to get terminal settings from stdin"));
 349                 return (-1);
 350         }
 351         (void) ioctl(STDIN_FILENO, TIOCGWINSZ, (char *)&winsize);
 352 
 353         return (0);
 354 }
 355 
 356 /*
 357  * This is a bit tricky; normally a pts device will belong to the zone it
 358  * is granted to.  But in the case of "entering" a zone, we need to establish
 359  * the pty before entering the zone so that we can vector I/O to and from it
 360  * from the global zone.
 361  *
 362  * We use the zonept() call to let the ptm driver know what we are up to;
 363  * the only other hairy bit is the setting of zoneslavename (which happens
 364  * above, in get_master_pty()).
 365  */
 366 static int
 367 init_slave_pty(zoneid_t zoneid, char *devroot)
 368 {
 369         int slavefd = -1;
 370         char *slavename, zoneslavename[MAXPATHLEN];
 371 
 372         /*
 373          * Set slave permissions, zone the pts, then unlock it.
 374          */
 375         if (grantpt(masterfd) != 0) {
 376                 zperror(gettext("grantpt failed"));
 377                 return (-1);
 378         }
 379 
 380         if (unlockpt(masterfd) != 0) {
 381                 zperror(gettext("unlockpt failed"));
 382                 return (-1);
 383         }
 384 
 385         /*
 386          * We must open the slave side before zoning this pty; otherwise
 387          * the kernel would refuse us the open-- zoning a pty makes it
 388          * inaccessible to the global zone.  Note we are trying to open
 389          * the device node via the $ZONEROOT/dev path for this pty.
 390          *
 391          * Later we'll close the slave out when once we've opened it again
 392          * from within the target zone.  Blarg.
 393          */
 394         if ((slavename = ptsname(masterfd)) == NULL) {
 395                 zperror(gettext("failed to get name for pseudo-tty"));
 396                 return (-1);
 397         }
 398 
 399         (void) snprintf(zoneslavename, sizeof (zoneslavename), "%s%s",
 400             devroot, slavename);
 401 
 402         if ((slavefd = open(zoneslavename, O_RDWR)) < 0) {
 403                 zerror(gettext("failed to open %s: %s"), zoneslavename,
 404                     strerror(errno));
 405                 return (-1);
 406         }
 407 
 408         /*
 409          * Push hardware emulation (ptem), line discipline (ldterm),
 410          * and V7/4BSD/Xenix compatibility (ttcompat) modules.
 411          */
 412         if (ioctl(slavefd, I_PUSH, "ptem") == -1) {
 413                 zperror(gettext("failed to push ptem module"));
 414                 if (!failsafe)
 415                         goto bad;
 416         }
 417 
 418         /*
 419          * Anchor the stream to prevent malicious I_POPs; we prefer to do
 420          * this prior to entering the zone so that we can detect any errors
 421          * early, and so that we can set the anchor from the global zone.
 422          */
 423         if (ioctl(slavefd, I_ANCHOR) == -1) {
 424                 zperror(gettext("failed to set stream anchor"));
 425                 if (!failsafe)
 426                         goto bad;
 427         }
 428 
 429         if (ioctl(slavefd, I_PUSH, "ldterm") == -1) {
 430                 zperror(gettext("failed to push ldterm module"));
 431                 if (!failsafe)
 432                         goto bad;
 433         }
 434         if (ioctl(slavefd, I_PUSH, "ttcompat") == -1) {
 435                 zperror(gettext("failed to push ttcompat module"));
 436                 if (!failsafe)
 437                         goto bad;
 438         }
 439 
 440         /*
 441          * Propagate terminal settings from the external term to the new one.
 442          */
 443         if (tcsetattr(slavefd, TCSAFLUSH, &save_termios) == -1) {
 444                 zperror(gettext("failed to set terminal settings"));
 445                 if (!failsafe)
 446                         goto bad;
 447         }
 448         (void) ioctl(slavefd, TIOCSWINSZ, (char *)&winsize);
 449 
 450         if (zonept(masterfd, zoneid) != 0) {
 451                 zperror(gettext("could not set zoneid of pty"));
 452                 goto bad;
 453         }
 454 
 455         return (slavefd);
 456 
 457 bad:
 458         (void) close(slavefd);
 459         return (-1);
 460 }
 461 
 462 /*
 463  * Place terminal into raw mode.
 464  */
 465 static int
 466 set_tty_rawmode(int fd)
 467 {
 468         struct termios term;
 469         if (tcgetattr(fd, &term) < 0) {
 470                 zperror(gettext("failed to get user terminal settings"));
 471                 return (-1);
 472         }
 473 
 474         /* Stash for later, so we can revert back to previous mode */
 475         save_termios = term;
 476         save_fd = fd;
 477 
 478         /* disable 8->7 bit strip, start/stop, enable any char to restart */
 479         term.c_iflag &= ~(ISTRIP|IXON|IXANY);
 480         /* disable NL->CR, CR->NL, ignore CR, UPPER->lower */
 481         term.c_iflag &= ~(INLCR|ICRNL|IGNCR|IUCLC);
 482         /* disable output post-processing */
 483         term.c_oflag &= ~OPOST;
 484         /* disable canonical mode, signal chars, echo & extended functions */
 485         term.c_lflag &= ~(ICANON|ISIG|ECHO|IEXTEN);
 486 
 487         term.c_cc[VMIN] = 1;    /* byte-at-a-time */
 488         term.c_cc[VTIME] = 0;
 489 
 490         if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &term)) {
 491                 zperror(gettext("failed to set user terminal to raw mode"));
 492                 return (-1);
 493         }
 494 
 495         /*
 496          * We need to know the value of VEOF so that we can properly process for
 497          * client-side ~<EOF>.  But we have obliterated VEOF in term,
 498          * because VMIN overloads the same array slot in non-canonical mode.
 499          * Stupid @&^%!
 500          *
 501          * So here we construct the "effective" termios from the current
 502          * terminal settings, and the corrected VEOF and VEOL settings.
 503          */
 504         if (tcgetattr(STDIN_FILENO, &effective_termios) < 0) {
 505                 zperror(gettext("failed to get user terminal settings"));
 506                 return (-1);
 507         }
 508         effective_termios.c_cc[VEOF] = save_termios.c_cc[VEOF];
 509         effective_termios.c_cc[VEOL] = save_termios.c_cc[VEOL];
 510 
 511         return (0);
 512 }
 513 
 514 /*
 515  * Copy terminal window size from our terminal to the pts.
 516  */
 517 /*ARGSUSED*/
 518 static void
 519 sigwinch(int s)
 520 {
 521         struct winsize ws;
 522 
 523         if (ioctl(0, TIOCGWINSZ, &ws) == 0)
 524                 (void) ioctl(masterfd, TIOCSWINSZ, &ws);
 525 }
 526 
 527 static volatile int close_on_sig = -1;
 528 
 529 static void
 530 /*ARGSUSED*/
 531 sigcld(int s)
 532 {
 533         int status;
 534         pid_t pid;
 535 
 536         /*
 537          * Peek at the exit status.  If this isn't the process we cared
 538          * about, then just reap it.
 539          */
 540         if ((pid = waitpid(child_pid, &status, WNOHANG|WNOWAIT)) != -1) {
 541                 if (pid == child_pid &&
 542                     (WIFEXITED(status) || WIFSIGNALED(status))) {
 543                         dead = 1;
 544                         if (close_on_sig != -1) {
 545                                 (void) write(close_on_sig, "a", 1);
 546                                 (void) close(close_on_sig);
 547                                 close_on_sig = -1;
 548                         }
 549                 } else {
 550                         (void) waitpid(pid, &status, WNOHANG);
 551                 }
 552         }
 553 }
 554 
 555 /*
 556  * Some signals (currently, SIGINT) must be forwarded on to the process
 557  * group of the child process.
 558  */
 559 static void
 560 sig_forward(int s)
 561 {
 562         if (child_pid != -1) {
 563                 (void) sigsend(P_PGID, child_pid, s);
 564         }
 565 }
 566 
 567 /*
 568  * reset terminal settings for global environment
 569  */
 570 static void
 571 reset_tty()
 572 {
 573         (void) tcsetattr(save_fd, TCSADRAIN, &save_termios);
 574 }
 575 
 576 /*
 577  * Convert character to printable representation, for display with locally
 578  * echoed command characters (like when we need to display ~^D)
 579  */
 580 static void
 581 canonify(char c, char *cc)
 582 {
 583         if (isprint(c)) {
 584                 cc[0] = c;
 585                 cc[1] = '\0';
 586         } else if (c >= 0 && c <= 31) {   /* ^@ through ^_ */
 587                 cc[0] = '^';
 588                 cc[1] = c + '@';
 589                 cc[2] = '\0';
 590         } else {
 591                 cc[0] = '\\';
 592                 cc[1] = ((c >> 6) & 7) + '0';
 593                 cc[2] = ((c >> 3) & 7) + '0';
 594                 cc[3] = (c & 7) + '0';
 595                 cc[4] = '\0';
 596         }
 597 }
 598 
 599 /*
 600  * process_user_input watches the input stream for the escape sequence for
 601  * 'quit' (by default, tilde-period).  Because we might be fed just one
 602  * keystroke at a time, state associated with the user input (are we at the
 603  * beginning of the line?  are we locally echoing the next character?) is
 604  * maintained by beginning_of_line and local_echo across calls to the routine.
 605  * If the write to outfd fails, we'll try to read from infd in an attempt
 606  * to prevent deadlock between the two processes.
 607  *
 608  * This routine returns -1 when the 'quit' escape sequence has been issued,
 609  * or an error is encountered, 1 if stdin is EOF, and 0 otherwise.
 610  */
 611 static int
 612 process_user_input(int outfd, int infd)
 613 {
 614         static boolean_t beginning_of_line = B_TRUE;
 615         static boolean_t local_echo = B_FALSE;
 616         char ibuf[ZLOGIN_BUFSIZ];
 617         int nbytes;
 618         char *buf = ibuf;
 619 
 620         nbytes = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
 621         if (nbytes == -1 && (errno != EINTR || dead))
 622                 return (-1);
 623 
 624         if (nbytes == -1)       /* The read was interrupted. */
 625                 return (0);
 626 
 627         /* 0 read means EOF, close the pipe to the child */
 628         if (nbytes == 0)
 629                 return (1);
 630 
 631         for (char c = *buf; nbytes > 0; c = *buf, --nbytes) {
 632                 buf++;
 633                 if (beginning_of_line && !nocmdchar) {
 634                         beginning_of_line = B_FALSE;
 635                         if (c == cmdchar) {
 636                                 local_echo = B_TRUE;
 637                                 continue;
 638                         }
 639                 } else if (local_echo) {
 640                         local_echo = B_FALSE;
 641                         if (c == '.' || c == effective_termios.c_cc[VEOF]) {
 642                                 char cc[CANONIFY_LEN];
 643 
 644                                 canonify(c, cc);
 645                                 (void) write(STDOUT_FILENO, &cmdchar, 1);
 646                                 (void) write(STDOUT_FILENO, cc, strlen(cc));
 647                                 return (-1);
 648                         }
 649                 }
 650 retry:
 651                 if (write(outfd, &c, 1) <= 0) {
 652                         /*
 653                          * Since the fd we are writing to is opened with
 654                          * O_NONBLOCK it is possible to get EAGAIN if the
 655                          * pipe is full.  One way this could happen is if we
 656                          * are writing a lot of data into the pipe in this loop
 657                          * and the application on the other end is echoing that
 658                          * data back out to its stdout.  The output pipe can
 659                          * fill up since we are stuck here in this loop and not
 660                          * draining the other pipe.  We can try to read some of
 661                          * the data to see if we can drain the pipe so that the
 662                          * application can continue to make progress.  The read
 663                          * is non-blocking so we won't hang here.  We also wait
 664                          * a bit before retrying since there could be other
 665                          * reasons why the pipe is full and we don't want to
 666                          * continuously retry.
 667                          */
 668                         if (errno == EAGAIN) {
 669                                 struct timespec rqtp;
 670                                 int ln;
 671                                 char obuf[ZLOGIN_BUFSIZ];
 672 
 673                                 if ((ln = read(infd, obuf, ZLOGIN_BUFSIZ)) > 0)
 674                                         (void) write(STDOUT_FILENO, obuf, ln);
 675 
 676                                 /* sleep for 10 milliseconds */
 677                                 rqtp.tv_sec = 0;
 678                                 rqtp.tv_nsec = MSEC2NSEC(10);
 679                                 (void) nanosleep(&rqtp, NULL);
 680                                 if (!dead)
 681                                         goto retry;
 682                         }
 683 
 684                         return (-1);
 685                 }
 686                 beginning_of_line = (c == '\r' || c == '\n' ||
 687                     c == effective_termios.c_cc[VKILL] ||
 688                     c == effective_termios.c_cc[VEOL] ||
 689                     c == effective_termios.c_cc[VSUSP] ||
 690                     c == effective_termios.c_cc[VINTR]);
 691         }
 692         return (0);
 693 }
 694 
 695 /*
 696  * This function prevents deadlock between zlogin and the application in the
 697  * zone that it is talking to.  This can happen when we read from zlogin's
 698  * stdin and write the data down the pipe to the application.  If the pipe
 699  * is full, we'll block in the write.  Because zlogin could be blocked in
 700  * the write, it would never read the application's stdout/stderr so the
 701  * application can then block on those writes (when the pipe fills up).  If the
 702  * the application gets blocked this way, it can never get around to reading
 703  * its stdin so that zlogin can unblock from its write.  Once in this state,
 704  * the two processes are deadlocked.
 705  *
 706  * To prevent this, we want to verify that we can write into the pipe before we
 707  * read from our stdin.  If the pipe already is pretty full, we bypass the read
 708  * for now.  We'll circle back here again after the poll() so that we can
 709  * try again.  When this function is called, we already know there is data
 710  * ready to read on STDIN_FILENO.  We return -1 if there is a problem, 1 if
 711  * stdin is EOF, and 0 if everything is ok (even though we might not have
 712  * read/written any data into the pipe on this iteration).
 713  */
 714 static int
 715 process_raw_input(int stdin_fd, int appin_fd)
 716 {
 717         int cc;
 718         struct stat64 sb;
 719         char ibuf[ZLOGIN_RDBUFSIZ];
 720 
 721         /* Check how much data is already in the pipe */
 722         if (fstat64(appin_fd, &sb) == -1) {
 723                 perror("stat failed");
 724                 return (-1);
 725         }
 726 
 727         if (dead)
 728                 return (-1);
 729 
 730         /*
 731          * The pipe already has a lot of data in it,  don't write any more
 732          * right now.
 733          */
 734         if (sb.st_size >= HI_WATER)
 735                 return (0);
 736 
 737         cc = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
 738         if (cc == -1 && (errno != EINTR || dead))
 739                 return (-1);
 740 
 741         if (cc == -1)   /* The read was interrupted. */
 742                 return (0);
 743 
 744         /* 0 read means EOF, close the pipe to the child */
 745         if (cc == 0)
 746                 return (1);
 747 
 748         /*
 749          * stdin_fd is stdin of the target; so, the thing we'll write the user
 750          * data *to*.
 751          */
 752         if (write(stdin_fd, ibuf, cc) == -1)
 753                 return (-1);
 754 
 755         return (0);
 756 }
 757 
 758 /*
 759  * Write the output from the application running in the zone.  We can get
 760  * a signal during the write (usually it would be SIGCHLD when the application
 761  * has exited) so we loop to make sure we have written all of the data we read.
 762  */
 763 static int
 764 process_output(int in_fd, int out_fd)
 765 {
 766         int wrote = 0;
 767         int cc;
 768         char ibuf[ZLOGIN_BUFSIZ];
 769 
 770         cc = read(in_fd, ibuf, ZLOGIN_BUFSIZ);
 771         if (cc == -1 && (errno != EINTR || dead))
 772                 return (-1);
 773         if (cc == 0) {
 774                 /*
 775                  * A return value of 0 when calling read() on a terminal
 776                  * indicates end-of-file pre-XPG4 and no data available
 777                  * for XPG4 and above.
 778                  */
 779                 if (__xpg4 == 0)
 780                         return (-1);
 781                 return (0);
 782         }
 783         if (cc == -1)   /* The read was interrupted. */
 784                 return (0);
 785 
 786         do {
 787                 int len;
 788 
 789                 len = write(out_fd, ibuf + wrote, cc - wrote);
 790                 if (len == -1 && errno != EINTR)
 791                         return (-1);
 792                 if (len != -1)
 793                         wrote += len;
 794         } while (wrote < cc);
 795 
 796         return (0);
 797 }
 798 
 799 /*
 800  * This is the main I/O loop, and is shared across all zlogin modes.
 801  * Parameters:
 802  *      stdin_fd:  The fd representing 'stdin' for the slave side; input to
 803  *                 the zone will be written here.
 804  *
 805  *      appin_fd:  The fd representing the other end of the 'stdin' pipe (when
 806  *                 we're running non-interactive); used in process_raw_input
 807  *                 to ensure we don't fill up the application's stdin pipe.
 808  *
 809  *      stdout_fd: The fd representing 'stdout' for the slave side; output
 810  *                 from the zone will arrive here.
 811  *
 812  *      stderr_fd: The fd representing 'stderr' for the slave side; output
 813  *                 from the zone will arrive here.
 814  *
 815  *      raw_mode:  If TRUE, then no processing (for example, for '~.') will
 816  *                 be performed on the input coming from STDIN.
 817  *
 818  * stderr_fd may be specified as -1 if there is no stderr (only non-interactive
 819  * mode supplies a stderr).
 820  *
 821  */
 822 static void
 823 doio(int stdin_fd, int appin_fd, int stdout_fd, int stderr_fd, int sig_fd,
 824     boolean_t raw_mode)
 825 {
 826         struct pollfd pollfds[4];
 827         char ibuf[ZLOGIN_BUFSIZ];
 828         int cc, ret;
 829 
 830         /* read from stdout of zone and write to stdout of global zone */
 831         pollfds[0].fd = stdout_fd;
 832         pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
 833 
 834         /* read from stderr of zone and write to stderr of global zone */
 835         pollfds[1].fd = stderr_fd;
 836         pollfds[1].events = pollfds[0].events;
 837 
 838         /* read from stdin of global zone and write to stdin of zone */
 839         pollfds[2].fd = STDIN_FILENO;
 840         pollfds[2].events = pollfds[0].events;
 841 
 842         /* read from signalling pipe so we know when child dies */
 843         pollfds[3].fd = sig_fd;
 844         pollfds[3].events = pollfds[0].events;
 845 
 846         for (;;) {
 847                 pollfds[0].revents = pollfds[1].revents =
 848                     pollfds[2].revents = pollfds[3].revents = 0;
 849 
 850                 if (dead)
 851                         break;
 852 
 853                 /*
 854                  * There is a race condition here where we can receive the
 855                  * child death signal, set the dead flag, but since we have
 856                  * passed the test above, we would go into poll and hang.
 857                  * To avoid this we use the sig_fd as an additional poll fd.
 858                  * The signal handler writes into the other end of this pipe
 859                  * when the child dies so that the poll will always see that
 860                  * input and proceed.  We just loop around at that point and
 861                  * then notice the dead flag.
 862                  */
 863 
 864                 ret = poll(pollfds,
 865                     sizeof (pollfds) / sizeof (struct pollfd), -1);
 866 
 867                 if (ret == -1 && errno != EINTR) {
 868                         perror("poll failed");
 869                         break;
 870                 }
 871 
 872                 if (errno == EINTR && dead) {
 873                         break;
 874                 }
 875 
 876                 /* event from master side stdout */
 877                 if (pollfds[0].revents) {
 878                         if (pollfds[0].revents &
 879                             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 880                                 if (process_output(stdout_fd, STDOUT_FILENO)
 881                                     != 0)
 882                                         break;
 883                         } else {
 884                                 pollerr = pollfds[0].revents;
 885                                 break;
 886                         }
 887                 }
 888 
 889                 /* event from master side stderr */
 890                 if (pollfds[1].revents) {
 891                         if (pollfds[1].revents &
 892                             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 893                                 if (process_output(stderr_fd, STDERR_FILENO)
 894                                     != 0)
 895                                         break;
 896                         } else {
 897                                 pollerr = pollfds[1].revents;
 898                                 break;
 899                         }
 900                 }
 901 
 902                 /* event from user STDIN side */
 903                 if (pollfds[2].revents) {
 904                         if (pollfds[2].revents &
 905                             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 906                                 /*
 907                                  * stdin fd is stdin of the target; so,
 908                                  * the thing we'll write the user data *to*.
 909                                  *
 910                                  * Also, unlike on the output side, we
 911                                  * close the pipe on a zero-length message.
 912                                  */
 913                                 int res;
 914 
 915                                 if (raw_mode)
 916                                         res = process_raw_input(stdin_fd,
 917                                             appin_fd);
 918                                 else
 919                                         res = process_user_input(stdin_fd,
 920                                             stdout_fd);
 921 
 922                                 if (res < 0)
 923                                         break;
 924                                 if (res > 0) {
 925                                         /* EOF (close) child's stdin_fd */
 926                                         pollfds[2].fd = -1;
 927                                         while ((res = close(stdin_fd)) != 0 &&
 928                                             errno == EINTR)
 929                                                 ;
 930                                         if (res != 0)
 931                                                 break;
 932                                 }
 933 
 934                         } else if (raw_mode && pollfds[2].revents & POLLHUP) {
 935                                 /*
 936                                  * It's OK to get a POLLHUP on STDIN-- it
 937                                  * always happens if you do:
 938                                  *
 939                                  * echo foo | zlogin <zone> <command>
 940                                  *
 941                                  * We reset fd to -1 in this case to clear
 942                                  * the condition and close the pipe (EOF) to
 943                                  * the other side in order to wrap things up.
 944                                  */
 945                                 int res;
 946 
 947                                 pollfds[2].fd = -1;
 948                                 while ((res = close(stdin_fd)) != 0 &&
 949                                     errno == EINTR)
 950                                         ;
 951                                 if (res != 0)
 952                                         break;
 953                         } else {
 954                                 pollerr = pollfds[2].revents;
 955                                 break;
 956                         }
 957                 }
 958         }
 959 
 960         /*
 961          * We are in the midst of dying, but try to poll with a short
 962          * timeout to see if we can catch the last bit of I/O from the
 963          * children.
 964          */
 965 retry:
 966         pollfds[0].revents = pollfds[1].revents = 0;
 967         (void) poll(pollfds, 2, 100);
 968         if (pollfds[0].revents &
 969             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 970                 if ((cc = read(stdout_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
 971                         (void) write(STDOUT_FILENO, ibuf, cc);
 972                         goto retry;
 973                 }
 974         }
 975         if (pollfds[1].revents &
 976             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 977                 if ((cc = read(stderr_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
 978                         (void) write(STDERR_FILENO, ibuf, cc);
 979                         goto retry;
 980                 }
 981         }
 982 }
 983 
 984 /*
 985  * Fetch the user_cmd brand hook for getting a user's passwd(4) entry.
 986  */
 987 static const char *
 988 zone_get_user_cmd(brand_handle_t bh, const char *login, char *user_cmd,
 989     size_t len)
 990 {
 991         bzero(user_cmd, sizeof (user_cmd));
 992         if (brand_get_user_cmd(bh, login, user_cmd, len) != 0)
 993                 return (NULL);
 994 
 995         return (user_cmd);
 996 }
 997 
 998 /* From libc */
 999 extern int str2passwd(const char *, int, void *, char *, int);
1000 
1001 /*
1002  * exec() the user_cmd brand hook, and convert the output string to a
1003  * struct passwd.  This is to be called after zone_enter().
1004  *
1005  */
1006 static struct passwd *
1007 zone_get_user_pw(const char *user_cmd, struct passwd *pwent, char *pwbuf,
1008     int pwbuflen)
1009 {
1010         char pwline[NSS_BUFLEN_PASSWD];
1011         char *cin = NULL;
1012         FILE *fin;
1013         int status;
1014 
1015         assert(getzoneid() != GLOBAL_ZONEID);
1016 
1017         if ((fin = popen(user_cmd, "r")) == NULL)
1018                 return (NULL);
1019 
1020         while (cin == NULL && !feof(fin))
1021                 cin = fgets(pwline, sizeof (pwline), fin);
1022 
1023         if (cin == NULL) {
1024                 (void) pclose(fin);
1025                 return (NULL);
1026         }
1027 
1028         status = pclose(fin);
1029         if (!WIFEXITED(status))
1030                 return (NULL);
1031         if (WEXITSTATUS(status) != 0)
1032                 return (NULL);
1033 
1034         if (str2passwd(pwline, sizeof (pwline), pwent, pwbuf, pwbuflen) == 0)
1035                 return (pwent);
1036         else
1037                 return (NULL);
1038 }
1039 
1040 static char **
1041 zone_login_cmd(brand_handle_t bh, const char *login)
1042 {
1043         static char result_buf[ARG_MAX];
1044         char **new_argv, *ptr, *lasts;
1045         int n, a;
1046 
1047         /* Get the login command for the target zone. */
1048         bzero(result_buf, sizeof (result_buf));
1049 
1050         if (forced_login) {
1051                 if (brand_get_forcedlogin_cmd(bh, login,
1052                     result_buf, sizeof (result_buf)) != 0)
1053                         return (NULL);
1054         } else {
1055                 if (brand_get_login_cmd(bh, login,
1056                     result_buf, sizeof (result_buf)) != 0)
1057                         return (NULL);
1058         }
1059 
1060         /*
1061          * We got back a string that we'd like to execute.  But since
1062          * we're not doing the execution via a shell we'll need to convert
1063          * the exec string to an array of strings.  We'll do that here
1064          * but we're going to be very simplistic about it and break stuff
1065          * up based on spaces.  We're not even going to support any kind
1066          * of quoting or escape characters.  It's truly amazing that
1067          * there is no library function in OpenSolaris to do this for us.
1068          */
1069 
1070         /*
1071          * Be paranoid.  Since we're deliniating based on spaces make
1072          * sure there are no adjacent spaces.
1073          */
1074         if (strstr(result_buf, "  ") != NULL)
1075                 return (NULL);
1076 
1077         /* Remove any trailing whitespace.  */
1078         n = strlen(result_buf);
1079         if (result_buf[n - 1] == ' ')
1080                 result_buf[n - 1] = '\0';
1081 
1082         /* Count how many elements there are in the exec string. */
1083         ptr = result_buf;
1084         for (n = 2; ((ptr = strchr(ptr + 1, (int)' ')) != NULL); n++)
1085                 ;
1086 
1087         /* Allocate the argv array that we're going to return. */
1088         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1089                 return (NULL);
1090 
1091         /* Tokenize the exec string and return. */
1092         a = 0;
1093         new_argv[a++] = result_buf;
1094         if (n > 2) {
1095                 (void) strtok_r(result_buf, " ", &lasts);
1096                 while ((new_argv[a++] = strtok_r(NULL, " ", &lasts)) != NULL)
1097                         ;
1098         } else {
1099                 new_argv[a++] = NULL;
1100         }
1101         assert(n == a);
1102         return (new_argv);
1103 }
1104 
1105 /*
1106  * Prepare argv array for exec'd process; if we're passing commands to the
1107  * new process, then use su(1M) to do the invocation.  Otherwise, use
1108  * 'login -z <from_zonename> -f' (-z is an undocumented option which tells
1109  * login that we're coming from another zone, and to disregard its CONSOLE
1110  * checks).
1111  */
1112 static char **
1113 prep_args(brand_handle_t bh, const char *login, char **argv)
1114 {
1115         int argc = 0, a = 0, i, n = -1;
1116         char **new_argv;
1117 
1118         if (argv != NULL) {
1119                 size_t subshell_len = 1;
1120                 char *subshell;
1121 
1122                 while (argv[argc] != NULL)
1123                         argc++;
1124 
1125                 for (i = 0; i < argc; i++) {
1126                         subshell_len += strlen(argv[i]) + 1;
1127                 }
1128                 if ((subshell = calloc(1, subshell_len)) == NULL)
1129                         return (NULL);
1130 
1131                 for (i = 0; i < argc; i++) {
1132                         (void) strcat(subshell, argv[i]);
1133                         (void) strcat(subshell, " ");
1134                 }
1135 
1136                 if (failsafe) {
1137                         n = 4;
1138                         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1139                                 return (NULL);
1140 
1141                         new_argv[a++] = FAILSAFESHELL;
1142                 } else {
1143                         n = 5;
1144                         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1145                                 return (NULL);
1146 
1147                         new_argv[a++] = SUPATH;
1148                         if (strcmp(login, "root") != 0) {
1149                                 new_argv[a++] = "-";
1150                                 n++;
1151                         }
1152                         new_argv[a++] = (char *)login;
1153                 }
1154                 new_argv[a++] = "-c";
1155                 new_argv[a++] = subshell;
1156                 new_argv[a++] = NULL;
1157                 assert(a == n);
1158         } else {
1159                 if (failsafe) {
1160                         n = 2;
1161                         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1162                                 return (NULL);
1163                         new_argv[a++] = FAILSAFESHELL;
1164                         new_argv[a++] = NULL;
1165                         assert(n == a);
1166                 } else {
1167                         new_argv = zone_login_cmd(bh, login);
1168                 }
1169         }
1170 
1171         return (new_argv);
1172 }
1173 
1174 /*
1175  * Helper routine for prep_env below.
1176  */
1177 static char *
1178 add_env(char *name, char *value)
1179 {
1180         size_t sz = strlen(name) + strlen(value) + 2; /* name, =, value, NUL */
1181         char *str;
1182 
1183         if ((str = malloc(sz)) == NULL)
1184                 return (NULL);
1185 
1186         (void) snprintf(str, sz, "%s=%s", name, value);
1187         return (str);
1188 }
1189 
1190 /*
1191  * Prepare envp array for exec'd process.
1192  */
1193 static char **
1194 prep_env()
1195 {
1196         int e = 0, size = 1;
1197         char **new_env, *estr;
1198         char *term = getenv("TERM");
1199 
1200         size++; /* for $PATH */
1201         if (term != NULL)
1202                 size++;
1203 
1204         /*
1205          * In failsafe mode we set $HOME, since '-l' isn't valid in this mode.
1206          * We also set $SHELL, since neither login nor su will be around to do
1207          * it.
1208          */
1209         if (failsafe)
1210                 size += 2;
1211 
1212         if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1213                 return (NULL);
1214 
1215         if ((estr = add_env("PATH", DEF_PATH)) == NULL)
1216                 return (NULL);
1217         new_env[e++] = estr;
1218 
1219         if (term != NULL) {
1220                 if ((estr = add_env("TERM", term)) == NULL)
1221                         return (NULL);
1222                 new_env[e++] = estr;
1223         }
1224 
1225         if (failsafe) {
1226                 if ((estr = add_env("HOME", "/")) == NULL)
1227                         return (NULL);
1228                 new_env[e++] = estr;
1229 
1230                 if ((estr = add_env("SHELL", FAILSAFESHELL)) == NULL)
1231                         return (NULL);
1232                 new_env[e++] = estr;
1233         }
1234 
1235         new_env[e++] = NULL;
1236 
1237         assert(e == size);
1238 
1239         return (new_env);
1240 }
1241 
1242 /*
1243  * Finish the preparation of the envp array for exec'd non-interactive
1244  * zlogins.  This is called in the child process *after* we zone_enter(), since
1245  * it derives things we can only know within the zone, such as $HOME, $SHELL,
1246  * etc.  We need only do this in the non-interactive, mode, since otherwise
1247  * login(1) will do it.  We don't do this in failsafe mode, since it presents
1248  * additional ways in which the command could fail, and we'd prefer to avoid
1249  * that.
1250  */
1251 static char **
1252 prep_env_noninteractive(const char *user_cmd, char **env)
1253 {
1254         size_t size;
1255         char **new_env;
1256         int e, i;
1257         char *estr;
1258         char varmail[LOGNAME_MAX + 11]; /* strlen(/var/mail/) = 10, NUL */
1259         char pwbuf[NSS_BUFLEN_PASSWD + 1];
1260         struct passwd pwent;
1261         struct passwd *pw = NULL;
1262 
1263         assert(env != NULL);
1264         assert(failsafe == 0);
1265 
1266         /*
1267          * Exec the "user_cmd" brand hook to get a pwent for the
1268          * login user.  If this fails, HOME will be set to "/", SHELL
1269          * will be set to $DEFAULTSHELL, and we will continue to exec
1270          * SUPATH <login> -c <cmd>.
1271          */
1272         pw = zone_get_user_pw(user_cmd, &pwent, pwbuf, sizeof (pwbuf));
1273 
1274         /*
1275          * Get existing envp size.
1276          */
1277         for (size = 0; env[size] != NULL; size++)
1278                 ;
1279 
1280         e = size;
1281 
1282         /*
1283          * Finish filling out the environment; we duplicate the environment
1284          * setup described in login(1), for lack of a better precedent.
1285          */
1286         if (pw != NULL)
1287                 size += 3;      /* LOGNAME, HOME, MAIL */
1288         else
1289                 size += 1;      /* HOME */
1290 
1291         size++; /* always fill in SHELL */
1292         size++; /* terminating NULL */
1293 
1294         if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1295                 goto malloc_fail;
1296 
1297         /*
1298          * Copy existing elements of env into new_env.
1299          */
1300         for (i = 0; env[i] != NULL; i++) {
1301                 if ((new_env[i] = strdup(env[i])) == NULL)
1302                         goto malloc_fail;
1303         }
1304         assert(e == i);
1305 
1306         if (pw != NULL) {
1307                 if ((estr = add_env("LOGNAME", pw->pw_name)) == NULL)
1308                         goto malloc_fail;
1309                 new_env[e++] = estr;
1310 
1311                 if ((estr = add_env("HOME", pw->pw_dir)) == NULL)
1312                         goto malloc_fail;
1313                 new_env[e++] = estr;
1314 
1315                 if (chdir(pw->pw_dir) != 0)
1316                         zerror(gettext("Could not chdir to home directory "
1317                             "%s: %s"), pw->pw_dir, strerror(errno));
1318 
1319                 (void) snprintf(varmail, sizeof (varmail), "/var/mail/%s",
1320                     pw->pw_name);
1321                 if ((estr = add_env("MAIL", varmail)) == NULL)
1322                         goto malloc_fail;
1323                 new_env[e++] = estr;
1324         } else {
1325                 if ((estr = add_env("HOME", "/")) == NULL)
1326                         goto malloc_fail;
1327                 new_env[e++] = estr;
1328         }
1329 
1330         if (pw != NULL && strlen(pw->pw_shell) > 0) {
1331                 if ((estr = add_env("SHELL", pw->pw_shell)) == NULL)
1332                         goto malloc_fail;
1333                 new_env[e++] = estr;
1334         } else {
1335                 if ((estr = add_env("SHELL", DEFAULTSHELL)) == NULL)
1336                         goto malloc_fail;
1337                 new_env[e++] = estr;
1338         }
1339 
1340         new_env[e++] = NULL;    /* add terminating NULL */
1341 
1342         assert(e == size);
1343         return (new_env);
1344 
1345 malloc_fail:
1346         zperror(gettext("failed to allocate memory for process environment"));
1347         return (NULL);
1348 }
1349 
1350 static int
1351 close_func(void *slavefd, int fd)
1352 {
1353         if (fd != *(int *)slavefd)
1354                 (void) close(fd);
1355         return (0);
1356 }
1357 
1358 static void
1359 set_cmdchar(char *cmdcharstr)
1360 {
1361         char c;
1362         long lc;
1363 
1364         if ((c = *cmdcharstr) != '\\') {
1365                 cmdchar = c;
1366                 return;
1367         }
1368 
1369         c = cmdcharstr[1];
1370         if (c == '\0' || c == '\\') {
1371                 cmdchar = '\\';
1372                 return;
1373         }
1374 
1375         if (c < '0' || c > '7') {
1376                 zerror(gettext("Unrecognized escape character option %s"),
1377                     cmdcharstr);
1378                 usage();
1379         }
1380 
1381         lc = strtol(cmdcharstr + 1, NULL, 8);
1382         if (lc < 0 || lc > 255) {
1383                 zerror(gettext("Octal escape character '%s' too large"),
1384                     cmdcharstr);
1385                 usage();
1386         }
1387         cmdchar = (char)lc;
1388 }
1389 
1390 static int
1391 setup_utmpx(char *slavename)
1392 {
1393         struct utmpx ut;
1394 
1395         bzero(&ut, sizeof (ut));
1396         (void) strncpy(ut.ut_user, ".zlogin", sizeof (ut.ut_user));
1397         (void) strncpy(ut.ut_line, slavename, sizeof (ut.ut_line));
1398         ut.ut_pid = getpid();
1399         ut.ut_id[0] = 'z';
1400         ut.ut_id[1] = ut.ut_id[2] = ut.ut_id[3] = (char)SC_WILDC;
1401         ut.ut_type = LOGIN_PROCESS;
1402         (void) time(&ut.ut_tv.tv_sec);
1403 
1404         if (makeutx(&ut) == NULL) {
1405                 zerror(gettext("makeutx failed"));
1406                 return (-1);
1407         }
1408         return (0);
1409 }
1410 
1411 static void
1412 release_lock_file(int lockfd)
1413 {
1414         (void) close(lockfd);
1415 }
1416 
1417 static int
1418 grab_lock_file(const char *zone_name, int *lockfd)
1419 {
1420         char pathbuf[PATH_MAX];
1421         struct flock flock;
1422 
1423         if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
1424                 zerror(gettext("could not mkdir %s: %s"), ZONES_TMPDIR,
1425                     strerror(errno));
1426                 return (-1);
1427         }
1428         (void) chmod(ZONES_TMPDIR, S_IRWXU);
1429         (void) snprintf(pathbuf, sizeof (pathbuf), "%s/%s.zoneadm.lock",
1430             ZONES_TMPDIR, zone_name);
1431 
1432         if ((*lockfd = open(pathbuf, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR)) < 0) {
1433                 zerror(gettext("could not open %s: %s"), pathbuf,
1434                     strerror(errno));
1435                 return (-1);
1436         }
1437         /*
1438          * Lock the file to synchronize with other zoneadmds
1439          */
1440         flock.l_type = F_WRLCK;
1441         flock.l_whence = SEEK_SET;
1442         flock.l_start = (off_t)0;
1443         flock.l_len = (off_t)0;
1444         if (fcntl(*lockfd, F_SETLKW, &flock) < 0) {
1445                 zerror(gettext("unable to lock %s: %s"), pathbuf,
1446                     strerror(errno));
1447                 release_lock_file(*lockfd);
1448                 return (-1);
1449         }
1450         return (Z_OK);
1451 }
1452 
1453 static int
1454 start_zoneadmd(const char *zone_name)
1455 {
1456         pid_t retval;
1457         int pstatus = 0, error = -1, lockfd, doorfd;
1458         struct door_info info;
1459         char doorpath[MAXPATHLEN];
1460 
1461         (void) snprintf(doorpath, sizeof (doorpath), ZONE_DOOR_PATH, zone_name);
1462 
1463         if (grab_lock_file(zone_name, &lockfd) != Z_OK)
1464                 return (-1);
1465         /*
1466          * We must do the door check with the lock held.  Otherwise, we
1467          * might race against another zoneadm/zlogin process and wind
1468          * up with two processes trying to start zoneadmd at the same
1469          * time.  zoneadmd will detect this, and fail, but we prefer this
1470          * to be as seamless as is practical, from a user perspective.
1471          */
1472         if ((doorfd = open(doorpath, O_RDONLY)) < 0) {
1473                 if (errno != ENOENT) {
1474                         zerror("failed to open %s: %s", doorpath,
1475                             strerror(errno));
1476                         goto out;
1477                 }
1478         } else {
1479                 /*
1480                  * Seems to be working ok.
1481                  */
1482                 if (door_info(doorfd, &info) == 0 &&
1483                     ((info.di_attributes & DOOR_REVOKED) == 0)) {
1484                         error = 0;
1485                         goto out;
1486                 }
1487         }
1488 
1489         if ((child_pid = fork()) == -1) {
1490                 zperror(gettext("could not fork"));
1491                 goto out;
1492         } else if (child_pid == 0) {
1493                 /* child process */
1494                 (void) execl("/usr/lib/zones/zoneadmd", "zoneadmd", "-z",
1495                     zone_name, NULL);
1496                 zperror(gettext("could not exec zoneadmd"));
1497                 _exit(1);
1498         }
1499 
1500         /* parent process */
1501         do {
1502                 retval = waitpid(child_pid, &pstatus, 0);
1503         } while (retval != child_pid);
1504         if (WIFSIGNALED(pstatus) ||
1505             (WIFEXITED(pstatus) && WEXITSTATUS(pstatus) != 0)) {
1506                 zerror(gettext("could not start %s"), "zoneadmd");
1507                 goto out;
1508         }
1509         error = 0;
1510 out:
1511         release_lock_file(lockfd);
1512         (void) close(doorfd);
1513         return (error);
1514 }
1515 
1516 static int
1517 init_template(void)
1518 {
1519         int fd;
1520         int err = 0;
1521 
1522         fd = open64(CTFS_ROOT "/process/template", O_RDWR);
1523         if (fd == -1)
1524                 return (-1);
1525 
1526         /*
1527          * zlogin doesn't do anything with the contract.
1528          * Deliver no events, don't inherit, and allow it to be orphaned.
1529          */
1530         err |= ct_tmpl_set_critical(fd, 0);
1531         err |= ct_tmpl_set_informative(fd, 0);
1532         err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
1533         err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
1534         if (err || ct_tmpl_activate(fd)) {
1535                 (void) close(fd);
1536                 return (-1);
1537         }
1538 
1539         return (fd);
1540 }
1541 
1542 static int
1543 noninteractive_login(char *zonename, const char *user_cmd, zoneid_t zoneid,
1544     char **new_args, char **new_env)
1545 {
1546         pid_t retval;
1547         int stdin_pipe[2], stdout_pipe[2], stderr_pipe[2], dead_child_pipe[2];
1548         int child_status;
1549         int tmpl_fd;
1550         sigset_t block_cld;
1551 
1552         if ((tmpl_fd = init_template()) == -1) {
1553                 reset_tty();
1554                 zperror(gettext("could not create contract"));
1555                 return (1);
1556         }
1557 
1558         if (pipe(stdin_pipe) != 0) {
1559                 zperror(gettext("could not create STDIN pipe"));
1560                 return (1);
1561         }
1562         /*
1563          * When the user types ^D, we get a zero length message on STDIN.
1564          * We need to echo that down the pipe to send it to the other side;
1565          * but by default, pipes don't propagate zero-length messages.  We
1566          * toggle that behavior off using I_SWROPT.  See streamio(7i).
1567          */
1568         if (ioctl(stdin_pipe[0], I_SWROPT, SNDZERO) != 0) {
1569                 zperror(gettext("could not configure STDIN pipe"));
1570                 return (1);
1571 
1572         }
1573         if (pipe(stdout_pipe) != 0) {
1574                 zperror(gettext("could not create STDOUT pipe"));
1575                 return (1);
1576         }
1577         if (pipe(stderr_pipe) != 0) {
1578                 zperror(gettext("could not create STDERR pipe"));
1579                 return (1);
1580         }
1581 
1582         if (pipe(dead_child_pipe) != 0) {
1583                 zperror(gettext("could not create signalling pipe"));
1584                 return (1);
1585         }
1586         close_on_sig = dead_child_pipe[0];
1587 
1588         /*
1589          * If any of the pipe FD's winds up being less than STDERR, then we
1590          * have a mess on our hands-- and we are lacking some of the I/O
1591          * streams we would expect anyway.  So we bail.
1592          */
1593         if (stdin_pipe[0] <= STDERR_FILENO ||
1594             stdin_pipe[1] <= STDERR_FILENO ||
1595             stdout_pipe[0] <= STDERR_FILENO ||
1596             stdout_pipe[1] <= STDERR_FILENO ||
1597             stderr_pipe[0] <= STDERR_FILENO ||
1598             stderr_pipe[1] <= STDERR_FILENO ||
1599             dead_child_pipe[0] <= STDERR_FILENO ||
1600             dead_child_pipe[1] <= STDERR_FILENO) {
1601                 zperror(gettext("process lacks valid STDIN, STDOUT, STDERR"));
1602                 return (1);
1603         }
1604 
1605         if (prefork_dropprivs() != 0) {
1606                 zperror(gettext("could not allocate privilege set"));
1607                 return (1);
1608         }
1609 
1610         (void) sigset(SIGCLD, sigcld);
1611         (void) sigemptyset(&block_cld);
1612         (void) sigaddset(&block_cld, SIGCLD);
1613         (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1614 
1615         if ((child_pid = fork()) == -1) {
1616                 (void) ct_tmpl_clear(tmpl_fd);
1617                 (void) close(tmpl_fd);
1618                 zperror(gettext("could not fork"));
1619                 return (1);
1620         } else if (child_pid == 0) { /* child process */
1621                 (void) ct_tmpl_clear(tmpl_fd);
1622 
1623                 /*
1624                  * Do a dance to get the pipes hooked up as FD's 0, 1 and 2.
1625                  */
1626                 (void) close(STDIN_FILENO);
1627                 (void) close(STDOUT_FILENO);
1628                 (void) close(STDERR_FILENO);
1629                 (void) dup2(stdin_pipe[1], STDIN_FILENO);
1630                 (void) dup2(stdout_pipe[1], STDOUT_FILENO);
1631                 (void) dup2(stderr_pipe[1], STDERR_FILENO);
1632                 (void) closefrom(STDERR_FILENO + 1);
1633 
1634                 (void) sigset(SIGCLD, SIG_DFL);
1635                 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1636                 /*
1637                  * In case any of stdin, stdout or stderr are streams,
1638                  * anchor them to prevent malicious I_POPs.
1639                  */
1640                 (void) ioctl(STDIN_FILENO, I_ANCHOR);
1641                 (void) ioctl(STDOUT_FILENO, I_ANCHOR);
1642                 (void) ioctl(STDERR_FILENO, I_ANCHOR);
1643 
1644                 if (zone_enter(zoneid) == -1) {
1645                         zerror(gettext("could not enter zone %s: %s"),
1646                             zonename, strerror(errno));
1647                         _exit(1);
1648                 }
1649 
1650                 /*
1651                  * For non-native zones, tell libc where it can find locale
1652                  * specific getttext() messages.
1653                  */
1654                 if (access("/.SUNWnative/usr/lib/locale", R_OK) == 0)
1655                         (void) bindtextdomain(TEXT_DOMAIN,
1656                             "/.SUNWnative/usr/lib/locale");
1657                 else if (access("/native/usr/lib/locale", R_OK) == 0)
1658                         (void) bindtextdomain(TEXT_DOMAIN,
1659                             "/native/usr/lib/locale");
1660 
1661                 if (!failsafe)
1662                         new_env = prep_env_noninteractive(user_cmd, new_env);
1663 
1664                 if (new_env == NULL) {
1665                         _exit(1);
1666                 }
1667 
1668                 /*
1669                  * Move into a new process group; the zone_enter will have
1670                  * placed us into zsched's session, and we want to be in
1671                  * a unique process group.
1672                  */
1673                 (void) setpgid(getpid(), getpid());
1674 
1675                 /*
1676                  * The child needs to run as root to
1677                  * execute the su program.
1678                  */
1679                 if (setuid(0) == -1) {
1680                         zperror(gettext("insufficient privilege"));
1681                         return (1);
1682                 }
1683 
1684                 (void) execve(new_args[0], new_args, new_env);
1685                 zperror(gettext("exec failure"));
1686                 _exit(1);
1687         }
1688         /* parent */
1689 
1690         /* close pipe sides written by child */
1691         (void) close(stdout_pipe[1]);
1692         (void) close(stderr_pipe[1]);
1693 
1694         (void) sigset(SIGINT, sig_forward);
1695 
1696         postfork_dropprivs();
1697 
1698         (void) ct_tmpl_clear(tmpl_fd);
1699         (void) close(tmpl_fd);
1700 
1701         (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1702         doio(stdin_pipe[0], stdin_pipe[1], stdout_pipe[0], stderr_pipe[0],
1703             dead_child_pipe[1], B_TRUE);
1704         do {
1705                 retval = waitpid(child_pid, &child_status, 0);
1706                 if (retval == -1) {
1707                         child_status = 0;
1708                 }
1709         } while (retval != child_pid && errno != ECHILD);
1710 
1711         return (WEXITSTATUS(child_status));
1712 }
1713 
1714 static char *
1715 get_username()
1716 {
1717         uid_t   uid;
1718         struct passwd *nptr;
1719 
1720         /*
1721          * Authorizations are checked to restrict access based on the
1722          * requested operation and zone name, It is assumed that the
1723          * program is running with all privileges, but that the real
1724          * user ID is that of the user or role on whose behalf we are
1725          * operating. So we start by getting the username that will be
1726          * used for subsequent authorization checks.
1727          */
1728 
1729         uid = getuid();
1730         if ((nptr = getpwuid(uid)) == NULL) {
1731                 zerror(gettext("could not get user name."));
1732                 _exit(1);
1733         }
1734         return (nptr->pw_name);
1735 }
1736 
1737 int
1738 main(int argc, char **argv)
1739 {
1740         int arg, console = 0;
1741         zoneid_t zoneid;
1742         zone_state_t st;
1743         char *login = "root";
1744         int lflag = 0;
1745         int nflag = 0;
1746         char *zonename = NULL;
1747         char **proc_args = NULL;
1748         char **new_args, **new_env;
1749         sigset_t block_cld;
1750         char devroot[MAXPATHLEN];
1751         char *slavename, slaveshortname[MAXPATHLEN];
1752         priv_set_t *privset;
1753         int tmpl_fd;
1754         char zonebrand[MAXNAMELEN];
1755         char default_brand[MAXNAMELEN];
1756         struct stat sb;
1757         char kernzone[ZONENAME_MAX];
1758         brand_handle_t bh;
1759         char user_cmd[MAXPATHLEN];
1760         char authname[MAXAUTHS];
1761 
1762         (void) setlocale(LC_ALL, "");
1763         (void) textdomain(TEXT_DOMAIN);
1764 
1765         (void) getpname(argv[0]);
1766         username = get_username();
1767 
1768         while ((arg = getopt(argc, argv, "dnECR:Se:l:Q")) != EOF) {
1769                 switch (arg) {
1770                 case 'C':
1771                         console = 1;
1772                         break;
1773                 case 'E':
1774                         nocmdchar = 1;
1775                         break;
1776                 case 'R':       /* undocumented */
1777                         if (*optarg != '/') {
1778                                 zerror(gettext("root path must be absolute."));
1779                                 exit(2);
1780                         }
1781                         if (stat(optarg, &sb) == -1 || !S_ISDIR(sb.st_mode)) {
1782                                 zerror(
1783                                     gettext("root path must be a directory."));
1784                                 exit(2);
1785                         }
1786                         zonecfg_set_root(optarg);
1787                         break;
1788                 case 'Q':
1789                         quiet = 1;
1790                         break;
1791                 case 'S':
1792                         failsafe = 1;
1793                         break;
1794                 case 'd':
1795                         disconnect = 1;
1796                         break;
1797                 case 'e':
1798                         set_cmdchar(optarg);
1799                         break;
1800                 case 'l':
1801                         login = optarg;
1802                         lflag = 1;
1803                         break;
1804                 case 'n':
1805                         nflag = 1;
1806                         break;
1807                 default:
1808                         usage();
1809                 }
1810         }
1811 
1812         if (console != 0) {
1813 
1814                 if (lflag != 0) {
1815                         zerror(gettext(
1816                             "-l may not be specified for console login"));
1817                         usage();
1818                 }
1819 
1820                 if (nflag != 0) {
1821                         zerror(gettext(
1822                             "-n may not be specified for console login"));
1823                         usage();
1824                 }
1825 
1826                 if (failsafe != 0) {
1827                         zerror(gettext(
1828                             "-S may not be specified for console login"));
1829                         usage();
1830                 }
1831 
1832                 if (zonecfg_in_alt_root()) {
1833                         zerror(gettext(
1834                             "-R may not be specified for console login"));
1835                         exit(2);
1836                 }
1837 
1838         }
1839 
1840         if (failsafe != 0 && lflag != 0) {
1841                 zerror(gettext("-l may not be specified for failsafe login"));
1842                 usage();
1843         }
1844 
1845         if (!console && disconnect != 0) {
1846                 zerror(gettext(
1847                     "-d may only be specified with console login"));
1848                 usage();
1849         }
1850 
1851         if (optind == (argc - 1)) {
1852                 /*
1853                  * zone name, no process name; this should be an interactive
1854                  * as long as STDIN is really a tty.
1855                  */
1856                 if (nflag != 0) {
1857                         zerror(gettext(
1858                             "-n may not be specified for interactive login"));
1859                         usage();
1860                 }
1861                 if (isatty(STDIN_FILENO))
1862                         interactive = 1;
1863                 zonename = argv[optind];
1864         } else if (optind < (argc - 1)) {
1865                 if (console) {
1866                         zerror(gettext("Commands may not be specified for "
1867                             "console login."));
1868                         usage();
1869                 }
1870                 /* zone name and process name, and possibly some args */
1871                 zonename = argv[optind];
1872                 proc_args = &argv[optind + 1];
1873                 interactive = 0;
1874         } else {
1875                 usage();
1876         }
1877 
1878         if (getzoneid() != GLOBAL_ZONEID) {
1879                 zerror(gettext("'%s' may only be used from the global zone"),
1880                     pname);
1881                 return (1);
1882         }
1883 
1884         if (strcmp(zonename, GLOBAL_ZONENAME) == 0) {
1885                 zerror(gettext("'%s' not applicable to the global zone"),
1886                     pname);
1887                 return (1);
1888         }
1889 
1890         if (zone_get_state(zonename, &st) != Z_OK) {
1891                 zerror(gettext("zone '%s' unknown"), zonename);
1892                 return (1);
1893         }
1894 
1895         if (st < ZONE_STATE_INSTALLED) {
1896                 zerror(gettext("cannot login to a zone which is '%s'"),
1897                     zone_state_str(st));
1898                 return (1);
1899         }
1900 
1901         /*
1902          * In both console and non-console cases, we require all privs.
1903          * In the console case, because we may need to startup zoneadmd.
1904          * In the non-console case in order to do zone_enter(2), zonept()
1905          * and other tasks.
1906          */
1907 
1908         if ((privset = priv_allocset()) == NULL) {
1909                 zperror(gettext("priv_allocset failed"));
1910                 return (1);
1911         }
1912 
1913         if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
1914                 zperror(gettext("getppriv failed"));
1915                 priv_freeset(privset);
1916                 return (1);
1917         }
1918 
1919         if (priv_isfullset(privset) == B_FALSE) {
1920                 zerror(gettext("You lack sufficient privilege to run "
1921                     "this command (all privs required)"));
1922                 priv_freeset(privset);
1923                 return (1);
1924         }
1925         priv_freeset(privset);
1926 
1927         /*
1928          * Check if user is authorized for requested usage of the zone
1929          */
1930 
1931         (void) snprintf(authname, MAXAUTHS, "%s%s%s",
1932             ZONE_MANAGE_AUTH, KV_OBJECT, zonename);
1933         if (chkauthattr(authname, username) == 0) {
1934                 if (console) {
1935                         zerror(gettext("%s is not authorized for console "
1936                             "access to  %s zone."),
1937                             username, zonename);
1938                         return (1);
1939                 } else {
1940                         (void) snprintf(authname, MAXAUTHS, "%s%s%s",
1941                             ZONE_LOGIN_AUTH, KV_OBJECT, zonename);
1942                         if (failsafe || !interactive) {
1943                                 zerror(gettext("%s is not authorized for  "
1944                                     "failsafe or non-interactive login "
1945                                     "to  %s zone."), username, zonename);
1946                                 return (1);
1947                         } else if (chkauthattr(authname, username) == 0) {
1948                                 zerror(gettext("%s is not authorized "
1949                                     " to login to %s zone."),
1950                                     username, zonename);
1951                                 return (1);
1952                         }
1953                 }
1954         } else {
1955                 forced_login = B_TRUE;
1956         }
1957 
1958         /*
1959          * The console is a separate case from the rest of the code; handle
1960          * it first.
1961          */
1962         if (console) {
1963                 /*
1964                  * Ensure that zoneadmd for this zone is running.
1965                  */
1966                 if (start_zoneadmd(zonename) == -1)
1967                         return (1);
1968 
1969                 /*
1970                  * Make contact with zoneadmd.
1971                  */
1972                 if (get_console_master(zonename) == -1)
1973                         return (1);
1974 
1975                 if (!quiet)
1976                         (void) printf(
1977                             gettext("[Connected to zone '%s' console]\n"),
1978                             zonename);
1979 
1980                 if (set_tty_rawmode(STDIN_FILENO) == -1) {
1981                         reset_tty();
1982                         zperror(gettext("failed to set stdin pty to raw mode"));
1983                         return (1);
1984                 }
1985 
1986                 (void) sigset(SIGWINCH, sigwinch);
1987                 (void) sigwinch(0);
1988 
1989                 /*
1990                  * Run the I/O loop until we get disconnected.
1991                  */
1992                 doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
1993                 reset_tty();
1994                 if (!quiet)
1995                         (void) printf(
1996                             gettext("\n[Connection to zone '%s' console "
1997                             "closed]\n"), zonename);
1998 
1999                 return (0);
2000         }
2001 
2002         if (st != ZONE_STATE_RUNNING && st != ZONE_STATE_MOUNTED) {
2003                 zerror(gettext("login allowed only to running zones "
2004                     "(%s is '%s')."), zonename, zone_state_str(st));
2005                 return (1);
2006         }
2007 
2008         (void) strlcpy(kernzone, zonename, sizeof (kernzone));
2009         if (zonecfg_in_alt_root()) {
2010                 FILE *fp = zonecfg_open_scratch("", B_FALSE);
2011 
2012                 if (fp == NULL || zonecfg_find_scratch(fp, zonename,
2013                     zonecfg_get_root(), kernzone, sizeof (kernzone)) == -1) {
2014                         zerror(gettext("cannot find scratch zone %s"),
2015                             zonename);
2016                         if (fp != NULL)
2017                                 zonecfg_close_scratch(fp);
2018                         return (1);
2019                 }
2020                 zonecfg_close_scratch(fp);
2021         }
2022 
2023         if ((zoneid = getzoneidbyname(kernzone)) == -1) {
2024                 zerror(gettext("failed to get zoneid for zone '%s'"),
2025                     zonename);
2026                 return (1);
2027         }
2028 
2029         /*
2030          * We need the zone root path only if we are setting up a pty.
2031          */
2032         if (zone_get_devroot(zonename, devroot, sizeof (devroot)) == -1) {
2033                 zerror(gettext("could not get dev path for zone %s"),
2034                     zonename);
2035                 return (1);
2036         }
2037 
2038         if (zone_get_brand(zonename, zonebrand, sizeof (zonebrand)) != Z_OK) {
2039                 zerror(gettext("could not get brand for zone %s"), zonename);
2040                 return (1);
2041         }
2042         /*
2043          * In the alternate root environment, the only supported
2044          * operations are mount and unmount.  In this case, just treat
2045          * the zone as native if it is cluster.  Cluster zones can be
2046          * native for the purpose of LU or upgrade, and the cluster
2047          * brand may not exist in the miniroot (such as in net install
2048          * upgrade).
2049          */
2050         if (zonecfg_default_brand(default_brand,
2051             sizeof (default_brand)) != Z_OK) {
2052                 zerror(gettext("unable to determine default brand"));
2053                 return (1);
2054         }
2055         if (zonecfg_in_alt_root() &&
2056             strcmp(zonebrand, CLUSTER_BRAND_NAME) == 0) {
2057                 (void) strlcpy(zonebrand, default_brand, sizeof (zonebrand));
2058         }
2059 
2060         if ((bh = brand_open(zonebrand)) == NULL) {
2061                 zerror(gettext("could not open brand for zone %s"), zonename);
2062                 return (1);
2063         }
2064 
2065         if ((new_args = prep_args(bh, login, proc_args)) == NULL) {
2066                 zperror(gettext("could not assemble new arguments"));
2067                 brand_close(bh);
2068                 return (1);
2069         }
2070         /*
2071          * Get the brand specific user_cmd.  This command is used to get
2072          * a passwd(4) entry for login.
2073          */
2074         if (!interactive && !failsafe) {
2075                 if (zone_get_user_cmd(bh, login, user_cmd,
2076                     sizeof (user_cmd)) == NULL) {
2077                         zerror(gettext("could not get user_cmd for zone %s"),
2078                             zonename);
2079                         brand_close(bh);
2080                         return (1);
2081                 }
2082         }
2083         brand_close(bh);
2084 
2085         if ((new_env = prep_env()) == NULL) {
2086                 zperror(gettext("could not assemble new environment"));
2087                 return (1);
2088         }
2089 
2090         if (!interactive) {
2091                 if (nflag) {
2092                         int nfd;
2093 
2094                         if ((nfd = open(_PATH_DEVNULL, O_RDONLY)) < 0) {
2095                                 zperror(gettext("failed to open null device"));
2096                                 return (1);
2097                         }
2098                         if (nfd != STDIN_FILENO) {
2099                                 if (dup2(nfd, STDIN_FILENO) < 0) {
2100                                         zperror(gettext(
2101                                             "failed to dup2 null device"));
2102                                         return (1);
2103                                 }
2104                                 (void) close(nfd);
2105                         }
2106                         /* /dev/null is now standard input */
2107                 }
2108                 return (noninteractive_login(zonename, user_cmd, zoneid,
2109                     new_args, new_env));
2110         }
2111 
2112         if (zonecfg_in_alt_root()) {
2113                 zerror(gettext("cannot use interactive login with scratch "
2114                     "zone"));
2115                 return (1);
2116         }
2117 
2118         /*
2119          * Things are more complex in interactive mode; we get the
2120          * master side of the pty, then place the user's terminal into
2121          * raw mode.
2122          */
2123         if (get_master_pty() == -1) {
2124                 zerror(gettext("could not setup master pty device"));
2125                 return (1);
2126         }
2127 
2128         /*
2129          * Compute the "short name" of the pts.  /dev/pts/2 --> pts/2
2130          */
2131         if ((slavename = ptsname(masterfd)) == NULL) {
2132                 zperror(gettext("failed to get name for pseudo-tty"));
2133                 return (1);
2134         }
2135         if (strncmp(slavename, "/dev/", strlen("/dev/")) == 0)
2136                 (void) strlcpy(slaveshortname, slavename + strlen("/dev/"),
2137                     sizeof (slaveshortname));
2138         else
2139                 (void) strlcpy(slaveshortname, slavename,
2140                     sizeof (slaveshortname));
2141 
2142         if (!quiet)
2143                 (void) printf(gettext("[Connected to zone '%s' %s]\n"),
2144                     zonename, slaveshortname);
2145 
2146         if (set_tty_rawmode(STDIN_FILENO) == -1) {
2147                 reset_tty();
2148                 zperror(gettext("failed to set stdin pty to raw mode"));
2149                 return (1);
2150         }
2151 
2152         if (prefork_dropprivs() != 0) {
2153                 reset_tty();
2154                 zperror(gettext("could not allocate privilege set"));
2155                 return (1);
2156         }
2157 
2158         /*
2159          * We must mask SIGCLD until after we have coped with the fork
2160          * sufficiently to deal with it; otherwise we can race and receive the
2161          * signal before child_pid has been initialized (yes, this really
2162          * happens).
2163          */
2164         (void) sigset(SIGCLD, sigcld);
2165         (void) sigemptyset(&block_cld);
2166         (void) sigaddset(&block_cld, SIGCLD);
2167         (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
2168 
2169         /*
2170          * We activate the contract template at the last minute to
2171          * avoid intermediate functions that could be using fork(2)
2172          * internally.
2173          */
2174         if ((tmpl_fd = init_template()) == -1) {
2175                 reset_tty();
2176                 zperror(gettext("could not create contract"));
2177                 return (1);
2178         }
2179 
2180         if ((child_pid = fork()) == -1) {
2181                 (void) ct_tmpl_clear(tmpl_fd);
2182                 reset_tty();
2183                 zperror(gettext("could not fork"));
2184                 return (1);
2185         } else if (child_pid == 0) { /* child process */
2186                 int slavefd, newslave;
2187 
2188                 (void) ct_tmpl_clear(tmpl_fd);
2189                 (void) close(tmpl_fd);
2190 
2191                 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2192 
2193                 if ((slavefd = init_slave_pty(zoneid, devroot)) == -1)
2194                         return (1);
2195 
2196                 /*
2197                  * Close all fds except for the slave pty.
2198                  */
2199                 (void) fdwalk(close_func, &slavefd);
2200 
2201                 /*
2202                  * Temporarily dup slavefd to stderr; that way if we have
2203                  * to print out that zone_enter failed, the output will
2204                  * have somewhere to go.
2205                  */
2206                 if (slavefd != STDERR_FILENO)
2207                         (void) dup2(slavefd, STDERR_FILENO);
2208 
2209                 if (zone_enter(zoneid) == -1) {
2210                         zerror(gettext("could not enter zone %s: %s"),
2211                             zonename, strerror(errno));
2212                         return (1);
2213                 }
2214 
2215                 if (slavefd != STDERR_FILENO)
2216                         (void) close(STDERR_FILENO);
2217 
2218                 /*
2219                  * We take pains to get this process into a new process
2220                  * group, and subsequently a new session.  In this way,
2221                  * we'll have a session which doesn't yet have a controlling
2222                  * terminal.  When we open the slave, it will become the
2223                  * controlling terminal; no PIDs concerning pgrps or sids
2224                  * will leak inappropriately into the zone.
2225                  */
2226                 (void) setpgrp();
2227 
2228                 /*
2229                  * We need the slave pty to be referenced from the zone's
2230                  * /dev in order to ensure that the devt's, etc are all
2231                  * correct.  Otherwise we break ttyname and the like.
2232                  */
2233                 if ((newslave = open(slavename, O_RDWR)) == -1) {
2234                         (void) close(slavefd);
2235                         return (1);
2236                 }
2237                 (void) close(slavefd);
2238                 slavefd = newslave;
2239 
2240                 /*
2241                  * dup the slave to the various FDs, so that when the
2242                  * spawned process does a write/read it maps to the slave
2243                  * pty.
2244                  */
2245                 (void) dup2(slavefd, STDIN_FILENO);
2246                 (void) dup2(slavefd, STDOUT_FILENO);
2247                 (void) dup2(slavefd, STDERR_FILENO);
2248                 if (slavefd != STDIN_FILENO && slavefd != STDOUT_FILENO &&
2249                     slavefd != STDERR_FILENO) {
2250                         (void) close(slavefd);
2251                 }
2252 
2253                 /*
2254                  * In failsafe mode, we don't use login(1), so don't try
2255                  * setting up a utmpx entry.
2256                  */
2257                 if (!failsafe)
2258                         if (setup_utmpx(slaveshortname) == -1)
2259                                 return (1);
2260 
2261                 /*
2262                  * The child needs to run as root to
2263                  * execute the brand's login program.
2264                  */
2265                 if (setuid(0) == -1) {
2266                         zperror(gettext("insufficient privilege"));
2267                         return (1);
2268                 }
2269 
2270                 (void) execve(new_args[0], new_args, new_env);
2271                 zperror(gettext("exec failure"));
2272                 return (1);
2273         }
2274 
2275         (void) ct_tmpl_clear(tmpl_fd);
2276         (void) close(tmpl_fd);
2277 
2278         /*
2279          * The rest is only for the parent process.
2280          */
2281         (void) sigset(SIGWINCH, sigwinch);
2282 
2283         postfork_dropprivs();
2284 
2285         (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2286         doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
2287 
2288         reset_tty();
2289         if (!quiet)
2290                 (void) fprintf(stderr,
2291                     gettext("\n[Connection to zone '%s' %s closed]\n"),
2292                     zonename, slaveshortname);
2293 
2294         if (pollerr != 0) {
2295                 (void) fprintf(stderr, gettext("Error: connection closed due "
2296                     "to unexpected pollevents=0x%x.\n"), pollerr);
2297                 return (1);
2298         }
2299 
2300         return (0);
2301 }