1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2013 DEY Storage Systems, Inc.
  24  * Copyright (c) 2014 Gary Mills
  25  * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
  26  * Copyright 2019 Joyent, Inc.
  27  */
  28 
  29 /*
  30  * zlogin provides three types of login which allow users in the global
  31  * zone to access non-global zones.
  32  *
  33  * - "interactive login" is similar to rlogin(1); for example, the user could
  34  *   issue 'zlogin my-zone' or 'zlogin -e ^ -l me my-zone'.   The user is
  35  *   granted a new pty (which is then shoved into the zone), and an I/O
  36  *   loop between parent and child processes takes care of the interactive
  37  *   session.  In this mode, login(1) (and its -c option, which means
  38  *   "already authenticated") is employed to take care of the initialization
  39  *   of the user's session.
  40  *
  41  * - "non-interactive login" is similar to su(1M); the user could issue
  42  *   'zlogin my-zone ls -l' and the command would be run as specified.
  43  *   In this mode, zlogin sets up pipes as the communication channel, and
  44  *   'su' is used to do the login setup work.
  45  *
  46  * - "console login" is the equivalent to accessing the tip line for a
  47  *   zone.  For example, the user can issue 'zlogin -C my-zone'.
  48  *   In this mode, zlogin contacts the zoneadmd process via unix domain
  49  *   socket.  If zoneadmd is not running, it starts it.  This allows the
  50  *   console to be available anytime the zone is installed, regardless of
  51  *   whether it is running.
  52  */
  53 
  54 #include <sys/socket.h>
  55 #include <sys/termios.h>
  56 #include <sys/utsname.h>
  57 #include <sys/stat.h>
  58 #include <sys/types.h>
  59 #include <sys/contract/process.h>
  60 #include <sys/ctfs.h>
  61 #include <sys/brand.h>
  62 #include <sys/wait.h>
  63 #include <alloca.h>
  64 #include <assert.h>
  65 #include <ctype.h>
  66 #include <paths.h>
  67 #include <door.h>
  68 #include <errno.h>
  69 #include <nss_dbdefs.h>
  70 #include <poll.h>
  71 #include <priv.h>
  72 #include <pwd.h>
  73 #include <unistd.h>
  74 #include <utmpx.h>
  75 #include <sac.h>
  76 #include <signal.h>
  77 #include <stdarg.h>
  78 #include <stdio.h>
  79 #include <stdlib.h>
  80 #include <string.h>
  81 #include <strings.h>
  82 #include <stropts.h>
  83 #include <wait.h>
  84 #include <zone.h>
  85 #include <fcntl.h>
  86 #include <libdevinfo.h>
  87 #include <libintl.h>
  88 #include <locale.h>
  89 #include <libzonecfg.h>
  90 #include <libcontract.h>
  91 #include <libbrand.h>
  92 #include <auth_list.h>
  93 #include <auth_attr.h>
  94 #include <secdb.h>
  95 
  96 static int masterfd;
  97 static struct termios save_termios;
  98 static struct termios effective_termios;
  99 static int save_fd;
 100 static struct winsize winsize;
 101 static volatile int dead;
 102 static volatile pid_t child_pid = -1;
 103 static int interactive = 0;
 104 static priv_set_t *dropprivs;
 105 
 106 static int nocmdchar = 0;
 107 static int failsafe = 0;
 108 static int disconnect = 0;
 109 static char cmdchar = '~';
 110 static int quiet = 0;
 111 
 112 static int pollerr = 0;
 113 
 114 static const char *pname;
 115 static char *username;
 116 
 117 /*
 118  * When forced_login is true, the user is not prompted
 119  * for an authentication password in the target zone.
 120  */
 121 static boolean_t forced_login = B_FALSE;
 122 
 123 #if !defined(TEXT_DOMAIN)               /* should be defined by cc -D */
 124 #define TEXT_DOMAIN     "SYS_TEST"      /* Use this only if it wasn't */
 125 #endif
 126 
 127 #define SUPATH  "/usr/bin/su"
 128 #define FAILSAFESHELL   "/sbin/sh"
 129 #define DEFAULTSHELL    "/sbin/sh"
 130 #define DEF_PATH        "/usr/sbin:/usr/bin"
 131 
 132 #define CLUSTER_BRAND_NAME      "cluster"
 133 
 134 /*
 135  * The ZLOGIN_BUFSIZ is larger than PIPE_BUF so we can be sure we're clearing
 136  * out the pipe when the child is exiting.  The ZLOGIN_RDBUFSIZ must be less
 137  * than ZLOGIN_BUFSIZ (because we share the buffer in doio).  This value is
 138  * also chosen in conjunction with the HI_WATER setting to make sure we
 139  * don't fill up the pipe.  We can write FIFOHIWAT (16k) into the pipe before
 140  * blocking.  By having ZLOGIN_RDBUFSIZ set to 1k and HI_WATER set to 8k, we
 141  * know we can always write a ZLOGIN_RDBUFSIZ chunk into the pipe when there
 142  * is less than HI_WATER data already in the pipe.
 143  */
 144 #define ZLOGIN_BUFSIZ   8192
 145 #define ZLOGIN_RDBUFSIZ 1024
 146 #define HI_WATER        8192
 147 
 148 /*
 149  * See canonify() below.  CANONIFY_LEN is the maximum length that a
 150  * "canonical" sequence will expand to (backslash, three octal digits, NUL).
 151  */
 152 #define CANONIFY_LEN 5
 153 
 154 static void
 155 usage(void)
 156 {
 157         (void) fprintf(stderr, gettext("usage: %s [ -dnQCES ] [ -e cmdchar ] "
 158             "[-l user] zonename [command [args ...] ]\n"), pname);
 159         exit(2);
 160 }
 161 
 162 static const char *
 163 getpname(const char *arg0)
 164 {
 165         const char *p = strrchr(arg0, '/');
 166 
 167         if (p == NULL)
 168                 p = arg0;
 169         else
 170                 p++;
 171 
 172         pname = p;
 173         return (p);
 174 }
 175 
 176 static void
 177 zerror(const char *fmt, ...)
 178 {
 179         va_list alist;
 180 
 181         (void) fprintf(stderr, "%s: ", pname);
 182         va_start(alist, fmt);
 183         (void) vfprintf(stderr, fmt, alist);
 184         va_end(alist);
 185         (void) fprintf(stderr, "\n");
 186 }
 187 
 188 static void
 189 zperror(const char *str)
 190 {
 191         const char *estr;
 192 
 193         if ((estr = strerror(errno)) != NULL)
 194                 (void) fprintf(stderr, "%s: %s: %s\n", pname, str, estr);
 195         else
 196                 (void) fprintf(stderr, "%s: %s: errno %d\n", pname, str, errno);
 197 }
 198 
 199 /*
 200  * The first part of our privilege dropping scheme needs to be called before
 201  * fork(), since we must have it for security; we don't want to be surprised
 202  * later that we couldn't allocate the privset.
 203  */
 204 static int
 205 prefork_dropprivs()
 206 {
 207         if ((dropprivs = priv_allocset()) == NULL)
 208                 return (1);
 209 
 210         priv_basicset(dropprivs);
 211         (void) priv_delset(dropprivs, PRIV_PROC_INFO);
 212         (void) priv_delset(dropprivs, PRIV_PROC_FORK);
 213         (void) priv_delset(dropprivs, PRIV_PROC_EXEC);
 214         (void) priv_delset(dropprivs, PRIV_FILE_LINK_ANY);
 215 
 216         /*
 217          * We need to keep the basic privilege PROC_SESSION and all unknown
 218          * basic privileges as well as the privileges PROC_ZONE and
 219          * PROC_OWNER in order to query session information and
 220          * send signals.
 221          */
 222         if (interactive == 0) {
 223                 (void) priv_addset(dropprivs, PRIV_PROC_ZONE);
 224                 (void) priv_addset(dropprivs, PRIV_PROC_OWNER);
 225         } else {
 226                 (void) priv_delset(dropprivs, PRIV_PROC_SESSION);
 227         }
 228 
 229         return (0);
 230 }
 231 
 232 /*
 233  * The second part of the privilege drop.  We are paranoid about being attacked
 234  * by the zone, so we drop all privileges.  This should prevent a compromise
 235  * which gets us to fork(), exec(), symlink(), etc.
 236  */
 237 static void
 238 postfork_dropprivs()
 239 {
 240         if ((setppriv(PRIV_SET, PRIV_PERMITTED, dropprivs)) == -1) {
 241                 zperror(gettext("Warning: could not set permitted privileges"));
 242         }
 243         if ((setppriv(PRIV_SET, PRIV_LIMIT, dropprivs)) == -1) {
 244                 zperror(gettext("Warning: could not set limit privileges"));
 245         }
 246         if ((setppriv(PRIV_SET, PRIV_INHERITABLE, dropprivs)) == -1) {
 247                 zperror(gettext("Warning: could not set inheritable "
 248                     "privileges"));
 249         }
 250 }
 251 
 252 /*
 253  * Create the unix domain socket and call the zoneadmd server; handshake
 254  * with it to determine whether it will allow us to connect.
 255  */
 256 static int
 257 get_console_master(const char *zname)
 258 {
 259         int sockfd = -1;
 260         struct sockaddr_un servaddr;
 261         char clientid[MAXPATHLEN];
 262         char handshake[MAXPATHLEN], c;
 263         int msglen;
 264         int i = 0, err = 0;
 265 
 266         if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
 267                 zperror(gettext("could not create socket"));
 268                 return (-1);
 269         }
 270 
 271         bzero(&servaddr, sizeof (servaddr));
 272         servaddr.sun_family = AF_UNIX;
 273         (void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path),
 274             "%s/%s.console_sock", ZONES_TMPDIR, zname);
 275 
 276         if (connect(sockfd, (struct sockaddr *)&servaddr,
 277             sizeof (servaddr)) == -1) {
 278                 zperror(gettext("Could not connect to zone console"));
 279                 goto bad;
 280         }
 281         masterfd = sockfd;
 282 
 283         msglen = snprintf(clientid, sizeof (clientid), "IDENT %lu %s %d\n",
 284             getpid(), setlocale(LC_MESSAGES, NULL), disconnect);
 285 
 286         if (msglen >= sizeof (clientid) || msglen < 0) {
 287                 zerror("protocol error");
 288                 goto bad;
 289         }
 290 
 291         if (write(masterfd, clientid, msglen) != msglen) {
 292                 zerror("protocol error");
 293                 goto bad;
 294         }
 295 
 296         bzero(handshake, sizeof (handshake));
 297 
 298         /*
 299          * Take care not to accumulate more than our fill, and leave room for
 300          * the NUL at the end.
 301          */
 302         while ((err = read(masterfd, &c, 1)) == 1) {
 303                 if (i >= (sizeof (handshake) - 1))
 304                         break;
 305                 if (c == '\n')
 306                         break;
 307                 handshake[i] = c;
 308                 i++;
 309         }
 310 
 311         /*
 312          * If something went wrong during the handshake we bail; perhaps
 313          * the server died off.
 314          */
 315         if (err == -1) {
 316                 zperror(gettext("Could not connect to zone console"));
 317                 goto bad;
 318         }
 319 
 320         if (strncmp(handshake, "OK", sizeof (handshake)) == 0)
 321                 return (0);
 322 
 323         zerror(gettext("Console is already in use by process ID %s."),
 324             handshake);
 325 bad:
 326         (void) close(sockfd);
 327         masterfd = -1;
 328         return (-1);
 329 }
 330 
 331 
 332 /*
 333  * Routines to handle pty creation upon zone entry and to shuttle I/O back
 334  * and forth between the two terminals.  We also compute and store the
 335  * name of the slave terminal associated with the master side.
 336  */
 337 static int
 338 get_master_pty()
 339 {
 340         if ((masterfd = open("/dev/ptmx", O_RDWR|O_NONBLOCK)) < 0) {
 341                 zperror(gettext("failed to obtain a pseudo-tty"));
 342                 return (-1);
 343         }
 344         if (tcgetattr(STDIN_FILENO, &save_termios) == -1) {
 345                 zperror(gettext("failed to get terminal settings from stdin"));
 346                 return (-1);
 347         }
 348         (void) ioctl(STDIN_FILENO, TIOCGWINSZ, (char *)&winsize);
 349 
 350         return (0);
 351 }
 352 
 353 /*
 354  * This is a bit tricky; normally a pts device will belong to the zone it
 355  * is granted to.  But in the case of "entering" a zone, we need to establish
 356  * the pty before entering the zone so that we can vector I/O to and from it
 357  * from the global zone.
 358  *
 359  * We use the zonept() call to let the ptm driver know what we are up to;
 360  * the only other hairy bit is the setting of zoneslavename (which happens
 361  * above, in get_master_pty()).
 362  */
 363 static int
 364 init_slave_pty(zoneid_t zoneid, char *devroot)
 365 {
 366         int slavefd = -1;
 367         char *slavename, zoneslavename[MAXPATHLEN];
 368 
 369         /*
 370          * Set slave permissions, zone the pts, then unlock it.
 371          */
 372         if (grantpt(masterfd) != 0) {
 373                 zperror(gettext("grantpt failed"));
 374                 return (-1);
 375         }
 376 
 377         if (unlockpt(masterfd) != 0) {
 378                 zperror(gettext("unlockpt failed"));
 379                 return (-1);
 380         }
 381 
 382         /*
 383          * We must open the slave side before zoning this pty; otherwise
 384          * the kernel would refuse us the open-- zoning a pty makes it
 385          * inaccessible to the global zone.  Note we are trying to open
 386          * the device node via the $ZONEROOT/dev path for this pty.
 387          *
 388          * Later we'll close the slave out when once we've opened it again
 389          * from within the target zone.  Blarg.
 390          */
 391         if ((slavename = ptsname(masterfd)) == NULL) {
 392                 zperror(gettext("failed to get name for pseudo-tty"));
 393                 return (-1);
 394         }
 395 
 396         (void) snprintf(zoneslavename, sizeof (zoneslavename), "%s%s",
 397             devroot, slavename);
 398 
 399         if ((slavefd = open(zoneslavename, O_RDWR)) < 0) {
 400                 zerror(gettext("failed to open %s: %s"), zoneslavename,
 401                     strerror(errno));
 402                 return (-1);
 403         }
 404 
 405         /*
 406          * Push hardware emulation (ptem), line discipline (ldterm),
 407          * and V7/4BSD/Xenix compatibility (ttcompat) modules.
 408          */
 409         if (ioctl(slavefd, I_PUSH, "ptem") == -1) {
 410                 zperror(gettext("failed to push ptem module"));
 411                 if (!failsafe)
 412                         goto bad;
 413         }
 414 
 415         /*
 416          * Anchor the stream to prevent malicious I_POPs; we prefer to do
 417          * this prior to entering the zone so that we can detect any errors
 418          * early, and so that we can set the anchor from the global zone.
 419          */
 420         if (ioctl(slavefd, I_ANCHOR) == -1) {
 421                 zperror(gettext("failed to set stream anchor"));
 422                 if (!failsafe)
 423                         goto bad;
 424         }
 425 
 426         if (ioctl(slavefd, I_PUSH, "ldterm") == -1) {
 427                 zperror(gettext("failed to push ldterm module"));
 428                 if (!failsafe)
 429                         goto bad;
 430         }
 431         if (ioctl(slavefd, I_PUSH, "ttcompat") == -1) {
 432                 zperror(gettext("failed to push ttcompat module"));
 433                 if (!failsafe)
 434                         goto bad;
 435         }
 436 
 437         /*
 438          * Propagate terminal settings from the external term to the new one.
 439          */
 440         if (tcsetattr(slavefd, TCSAFLUSH, &save_termios) == -1) {
 441                 zperror(gettext("failed to set terminal settings"));
 442                 if (!failsafe)
 443                         goto bad;
 444         }
 445         (void) ioctl(slavefd, TIOCSWINSZ, (char *)&winsize);
 446 
 447         if (zonept(masterfd, zoneid) != 0) {
 448                 zperror(gettext("could not set zoneid of pty"));
 449                 goto bad;
 450         }
 451 
 452         return (slavefd);
 453 
 454 bad:
 455         (void) close(slavefd);
 456         return (-1);
 457 }
 458 
 459 /*
 460  * Place terminal into raw mode.
 461  */
 462 static int
 463 set_tty_rawmode(int fd)
 464 {
 465         struct termios term;
 466         if (tcgetattr(fd, &term) < 0) {
 467                 zperror(gettext("failed to get user terminal settings"));
 468                 return (-1);
 469         }
 470 
 471         /* Stash for later, so we can revert back to previous mode */
 472         save_termios = term;
 473         save_fd = fd;
 474 
 475         /* disable 8->7 bit strip, start/stop, enable any char to restart */
 476         term.c_iflag &= ~(ISTRIP|IXON|IXANY);
 477         /* disable NL->CR, CR->NL, ignore CR, UPPER->lower */
 478         term.c_iflag &= ~(INLCR|ICRNL|IGNCR|IUCLC);
 479         /* disable output post-processing */
 480         term.c_oflag &= ~OPOST;
 481         /* disable canonical mode, signal chars, echo & extended functions */
 482         term.c_lflag &= ~(ICANON|ISIG|ECHO|IEXTEN);
 483 
 484         term.c_cc[VMIN] = 1;    /* byte-at-a-time */
 485         term.c_cc[VTIME] = 0;
 486 
 487         if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &term)) {
 488                 zperror(gettext("failed to set user terminal to raw mode"));
 489                 return (-1);
 490         }
 491 
 492         /*
 493          * We need to know the value of VEOF so that we can properly process for
 494          * client-side ~<EOF>.  But we have obliterated VEOF in term,
 495          * because VMIN overloads the same array slot in non-canonical mode.
 496          * Stupid @&^%!
 497          *
 498          * So here we construct the "effective" termios from the current
 499          * terminal settings, and the corrected VEOF and VEOL settings.
 500          */
 501         if (tcgetattr(STDIN_FILENO, &effective_termios) < 0) {
 502                 zperror(gettext("failed to get user terminal settings"));
 503                 return (-1);
 504         }
 505         effective_termios.c_cc[VEOF] = save_termios.c_cc[VEOF];
 506         effective_termios.c_cc[VEOL] = save_termios.c_cc[VEOL];
 507 
 508         return (0);
 509 }
 510 
 511 /*
 512  * Copy terminal window size from our terminal to the pts.
 513  */
 514 /*ARGSUSED*/
 515 static void
 516 sigwinch(int s)
 517 {
 518         struct winsize ws;
 519 
 520         if (ioctl(0, TIOCGWINSZ, &ws) == 0)
 521                 (void) ioctl(masterfd, TIOCSWINSZ, &ws);
 522 }
 523 
 524 static volatile int close_on_sig = -1;
 525 
 526 static void
 527 /*ARGSUSED*/
 528 sigcld(int s)
 529 {
 530         int status;
 531         pid_t pid;
 532 
 533         /*
 534          * Peek at the exit status.  If this isn't the process we cared
 535          * about, then just reap it.
 536          */
 537         if ((pid = waitpid(child_pid, &status, WNOHANG|WNOWAIT)) != -1) {
 538                 if (pid == child_pid &&
 539                     (WIFEXITED(status) || WIFSIGNALED(status))) {
 540                         dead = 1;
 541                         if (close_on_sig != -1) {
 542                                 (void) write(close_on_sig, "a", 1);
 543                                 (void) close(close_on_sig);
 544                                 close_on_sig = -1;
 545                         }
 546                 } else {
 547                         (void) waitpid(pid, &status, WNOHANG);
 548                 }
 549         }
 550 }
 551 
 552 /*
 553  * Some signals (currently, SIGINT) must be forwarded on to the process
 554  * group of the child process.
 555  */
 556 static void
 557 sig_forward(int s)
 558 {
 559         if (child_pid != -1) {
 560                 (void) sigsend(P_PGID, child_pid, s);
 561         }
 562 }
 563 
 564 /*
 565  * reset terminal settings for global environment
 566  */
 567 static void
 568 reset_tty()
 569 {
 570         (void) tcsetattr(save_fd, TCSADRAIN, &save_termios);
 571 }
 572 
 573 /*
 574  * Convert character to printable representation, for display with locally
 575  * echoed command characters (like when we need to display ~^D)
 576  */
 577 static void
 578 canonify(char c, char *cc)
 579 {
 580         if (isprint(c)) {
 581                 cc[0] = c;
 582                 cc[1] = '\0';
 583         } else if (c >= 0 && c <= 31) {   /* ^@ through ^_ */
 584                 cc[0] = '^';
 585                 cc[1] = c + '@';
 586                 cc[2] = '\0';
 587         } else {
 588                 cc[0] = '\\';
 589                 cc[1] = ((c >> 6) & 7) + '0';
 590                 cc[2] = ((c >> 3) & 7) + '0';
 591                 cc[3] = (c & 7) + '0';
 592                 cc[4] = '\0';
 593         }
 594 }
 595 
 596 /*
 597  * process_user_input watches the input stream for the escape sequence for
 598  * 'quit' (by default, tilde-period).  Because we might be fed just one
 599  * keystroke at a time, state associated with the user input (are we at the
 600  * beginning of the line?  are we locally echoing the next character?) is
 601  * maintained by beginning_of_line and local_echo across calls to the routine.
 602  * If the write to outfd fails, we'll try to read from infd in an attempt
 603  * to prevent deadlock between the two processes.
 604  *
 605  * This routine returns -1 when the 'quit' escape sequence has been issued,
 606  * or an error is encountered, 1 if stdin is EOF, and 0 otherwise.
 607  */
 608 static int
 609 process_user_input(int outfd, int infd)
 610 {
 611         static boolean_t beginning_of_line = B_TRUE;
 612         static boolean_t local_echo = B_FALSE;
 613         char ibuf[ZLOGIN_BUFSIZ];
 614         int nbytes;
 615         char *buf = ibuf;
 616 
 617         nbytes = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
 618         if (nbytes == -1 && (errno != EINTR || dead))
 619                 return (-1);
 620 
 621         if (nbytes == -1)       /* The read was interrupted. */
 622                 return (0);
 623 
 624         /* 0 read means EOF, close the pipe to the child */
 625         if (nbytes == 0)
 626                 return (1);
 627 
 628         for (char c = *buf; nbytes > 0; c = *buf, --nbytes) {
 629                 buf++;
 630                 if (beginning_of_line && !nocmdchar) {
 631                         beginning_of_line = B_FALSE;
 632                         if (c == cmdchar) {
 633                                 local_echo = B_TRUE;
 634                                 continue;
 635                         }
 636                 } else if (local_echo) {
 637                         local_echo = B_FALSE;
 638                         if (c == '.' || c == effective_termios.c_cc[VEOF]) {
 639                                 char cc[CANONIFY_LEN];
 640 
 641                                 canonify(c, cc);
 642                                 (void) write(STDOUT_FILENO, &cmdchar, 1);
 643                                 (void) write(STDOUT_FILENO, cc, strlen(cc));
 644                                 return (-1);
 645                         }
 646                 }
 647 retry:
 648                 if (write(outfd, &c, 1) <= 0) {
 649                         /*
 650                          * Since the fd we are writing to is opened with
 651                          * O_NONBLOCK it is possible to get EAGAIN if the
 652                          * pipe is full.  One way this could happen is if we
 653                          * are writing a lot of data into the pipe in this loop
 654                          * and the application on the other end is echoing that
 655                          * data back out to its stdout.  The output pipe can
 656                          * fill up since we are stuck here in this loop and not
 657                          * draining the other pipe.  We can try to read some of
 658                          * the data to see if we can drain the pipe so that the
 659                          * application can continue to make progress.  The read
 660                          * is non-blocking so we won't hang here.  We also wait
 661                          * a bit before retrying since there could be other
 662                          * reasons why the pipe is full and we don't want to
 663                          * continuously retry.
 664                          */
 665                         if (errno == EAGAIN) {
 666                                 struct timespec rqtp;
 667                                 int ln;
 668                                 char obuf[ZLOGIN_BUFSIZ];
 669 
 670                                 if ((ln = read(infd, obuf, ZLOGIN_BUFSIZ)) > 0)
 671                                         (void) write(STDOUT_FILENO, obuf, ln);
 672 
 673                                 /* sleep for 10 milliseconds */
 674                                 rqtp.tv_sec = 0;
 675                                 rqtp.tv_nsec = MSEC2NSEC(10);
 676                                 (void) nanosleep(&rqtp, NULL);
 677                                 if (!dead)
 678                                         goto retry;
 679                         }
 680 
 681                         return (-1);
 682                 }
 683                 beginning_of_line = (c == '\r' || c == '\n' ||
 684                     c == effective_termios.c_cc[VKILL] ||
 685                     c == effective_termios.c_cc[VEOL] ||
 686                     c == effective_termios.c_cc[VSUSP] ||
 687                     c == effective_termios.c_cc[VINTR]);
 688         }
 689         return (0);
 690 }
 691 
 692 /*
 693  * This function prevents deadlock between zlogin and the application in the
 694  * zone that it is talking to.  This can happen when we read from zlogin's
 695  * stdin and write the data down the pipe to the application.  If the pipe
 696  * is full, we'll block in the write.  Because zlogin could be blocked in
 697  * the write, it would never read the application's stdout/stderr so the
 698  * application can then block on those writes (when the pipe fills up).  If the
 699  * the application gets blocked this way, it can never get around to reading
 700  * its stdin so that zlogin can unblock from its write.  Once in this state,
 701  * the two processes are deadlocked.
 702  *
 703  * To prevent this, we want to verify that we can write into the pipe before we
 704  * read from our stdin.  If the pipe already is pretty full, we bypass the read
 705  * for now.  We'll circle back here again after the poll() so that we can
 706  * try again.  When this function is called, we already know there is data
 707  * ready to read on STDIN_FILENO.  We return -1 if there is a problem, 1 if
 708  * stdin is EOF, and 0 if everything is ok (even though we might not have
 709  * read/written any data into the pipe on this iteration).
 710  */
 711 static int
 712 process_raw_input(int stdin_fd, int appin_fd)
 713 {
 714         int cc;
 715         struct stat64 sb;
 716         char ibuf[ZLOGIN_RDBUFSIZ];
 717 
 718         /* Check how much data is already in the pipe */
 719         if (fstat64(appin_fd, &sb) == -1) {
 720                 perror("stat failed");
 721                 return (-1);
 722         }
 723 
 724         if (dead)
 725                 return (-1);
 726 
 727         /*
 728          * The pipe already has a lot of data in it,  don't write any more
 729          * right now.
 730          */
 731         if (sb.st_size >= HI_WATER)
 732                 return (0);
 733 
 734         cc = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
 735         if (cc == -1 && (errno != EINTR || dead))
 736                 return (-1);
 737 
 738         if (cc == -1)   /* The read was interrupted. */
 739                 return (0);
 740 
 741         /* 0 read means EOF, close the pipe to the child */
 742         if (cc == 0)
 743                 return (1);
 744 
 745         /*
 746          * stdin_fd is stdin of the target; so, the thing we'll write the user
 747          * data *to*.
 748          */
 749         if (write(stdin_fd, ibuf, cc) == -1)
 750                 return (-1);
 751 
 752         return (0);
 753 }
 754 
 755 /*
 756  * Write the output from the application running in the zone.  We can get
 757  * a signal during the write (usually it would be SIGCHLD when the application
 758  * has exited) so we loop to make sure we have written all of the data we read.
 759  */
 760 static int
 761 process_output(int in_fd, int out_fd)
 762 {
 763         int wrote = 0;
 764         int cc;
 765         char ibuf[ZLOGIN_BUFSIZ];
 766 
 767         cc = read(in_fd, ibuf, ZLOGIN_BUFSIZ);
 768         if (cc == -1 && (errno != EINTR || dead))
 769                 return (-1);
 770         if (cc == 0)    /* EOF */
 771                 return (-1);
 772         if (cc == -1)   /* The read was interrupted. */
 773                 return (0);
 774 
 775         do {
 776                 int len;
 777 
 778                 len = write(out_fd, ibuf + wrote, cc - wrote);
 779                 if (len == -1 && errno != EINTR)
 780                         return (-1);
 781                 if (len != -1)
 782                         wrote += len;
 783         } while (wrote < cc);
 784 
 785         return (0);
 786 }
 787 
 788 /*
 789  * This is the main I/O loop, and is shared across all zlogin modes.
 790  * Parameters:
 791  *      stdin_fd:  The fd representing 'stdin' for the slave side; input to
 792  *                 the zone will be written here.
 793  *
 794  *      appin_fd:  The fd representing the other end of the 'stdin' pipe (when
 795  *                 we're running non-interactive); used in process_raw_input
 796  *                 to ensure we don't fill up the application's stdin pipe.
 797  *
 798  *      stdout_fd: The fd representing 'stdout' for the slave side; output
 799  *                 from the zone will arrive here.
 800  *
 801  *      stderr_fd: The fd representing 'stderr' for the slave side; output
 802  *                 from the zone will arrive here.
 803  *
 804  *      raw_mode:  If TRUE, then no processing (for example, for '~.') will
 805  *                 be performed on the input coming from STDIN.
 806  *
 807  * stderr_fd may be specified as -1 if there is no stderr (only non-interactive
 808  * mode supplies a stderr).
 809  *
 810  */
 811 static void
 812 doio(int stdin_fd, int appin_fd, int stdout_fd, int stderr_fd, int sig_fd,
 813     boolean_t raw_mode)
 814 {
 815         struct pollfd pollfds[4];
 816         char ibuf[ZLOGIN_BUFSIZ];
 817         int cc, ret;
 818 
 819         /* read from stdout of zone and write to stdout of global zone */
 820         pollfds[0].fd = stdout_fd;
 821         pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
 822 
 823         /* read from stderr of zone and write to stderr of global zone */
 824         pollfds[1].fd = stderr_fd;
 825         pollfds[1].events = pollfds[0].events;
 826 
 827         /* read from stdin of global zone and write to stdin of zone */
 828         pollfds[2].fd = STDIN_FILENO;
 829         pollfds[2].events = pollfds[0].events;
 830 
 831         /* read from signalling pipe so we know when child dies */
 832         pollfds[3].fd = sig_fd;
 833         pollfds[3].events = pollfds[0].events;
 834 
 835         for (;;) {
 836                 pollfds[0].revents = pollfds[1].revents =
 837                     pollfds[2].revents = pollfds[3].revents = 0;
 838 
 839                 if (dead)
 840                         break;
 841 
 842                 /*
 843                  * There is a race condition here where we can receive the
 844                  * child death signal, set the dead flag, but since we have
 845                  * passed the test above, we would go into poll and hang.
 846                  * To avoid this we use the sig_fd as an additional poll fd.
 847                  * The signal handler writes into the other end of this pipe
 848                  * when the child dies so that the poll will always see that
 849                  * input and proceed.  We just loop around at that point and
 850                  * then notice the dead flag.
 851                  */
 852 
 853                 ret = poll(pollfds,
 854                     sizeof (pollfds) / sizeof (struct pollfd), -1);
 855 
 856                 if (ret == -1 && errno != EINTR) {
 857                         perror("poll failed");
 858                         break;
 859                 }
 860 
 861                 if (errno == EINTR && dead) {
 862                         break;
 863                 }
 864 
 865                 /* event from master side stdout */
 866                 if (pollfds[0].revents) {
 867                         if (pollfds[0].revents &
 868                             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 869                                 if (process_output(stdout_fd, STDOUT_FILENO)
 870                                     != 0)
 871                                         break;
 872                         } else {
 873                                 pollerr = pollfds[0].revents;
 874                                 break;
 875                         }
 876                 }
 877 
 878                 /* event from master side stderr */
 879                 if (pollfds[1].revents) {
 880                         if (pollfds[1].revents &
 881                             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 882                                 if (process_output(stderr_fd, STDERR_FILENO)
 883                                     != 0)
 884                                         break;
 885                         } else {
 886                                 pollerr = pollfds[1].revents;
 887                                 break;
 888                         }
 889                 }
 890 
 891                 /* event from user STDIN side */
 892                 if (pollfds[2].revents) {
 893                         if (pollfds[2].revents &
 894                             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 895                                 /*
 896                                  * stdin fd is stdin of the target; so,
 897                                  * the thing we'll write the user data *to*.
 898                                  *
 899                                  * Also, unlike on the output side, we
 900                                  * close the pipe on a zero-length message.
 901                                  */
 902                                 int res;
 903 
 904                                 if (raw_mode)
 905                                         res = process_raw_input(stdin_fd,
 906                                             appin_fd);
 907                                 else
 908                                         res = process_user_input(stdin_fd,
 909                                             stdout_fd);
 910 
 911                                 if (res < 0)
 912                                         break;
 913                                 if (res > 0) {
 914                                         /* EOF (close) child's stdin_fd */
 915                                         pollfds[2].fd = -1;
 916                                         while ((res = close(stdin_fd)) != 0 &&
 917                                             errno == EINTR)
 918                                                 ;
 919                                         if (res != 0)
 920                                                 break;
 921                                 }
 922 
 923                         } else if (raw_mode && pollfds[2].revents & POLLHUP) {
 924                                 /*
 925                                  * It's OK to get a POLLHUP on STDIN-- it
 926                                  * always happens if you do:
 927                                  *
 928                                  * echo foo | zlogin <zone> <command>
 929                                  *
 930                                  * We reset fd to -1 in this case to clear
 931                                  * the condition and close the pipe (EOF) to
 932                                  * the other side in order to wrap things up.
 933                                  */
 934                                 int res;
 935 
 936                                 pollfds[2].fd = -1;
 937                                 while ((res = close(stdin_fd)) != 0 &&
 938                                     errno == EINTR)
 939                                         ;
 940                                 if (res != 0)
 941                                         break;
 942                         } else {
 943                                 pollerr = pollfds[2].revents;
 944                                 break;
 945                         }
 946                 }
 947         }
 948 
 949         /*
 950          * We are in the midst of dying, but try to poll with a short
 951          * timeout to see if we can catch the last bit of I/O from the
 952          * children.
 953          */
 954 retry:
 955         pollfds[0].revents = pollfds[1].revents = 0;
 956         (void) poll(pollfds, 2, 100);
 957         if (pollfds[0].revents &
 958             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 959                 if ((cc = read(stdout_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
 960                         (void) write(STDOUT_FILENO, ibuf, cc);
 961                         goto retry;
 962                 }
 963         }
 964         if (pollfds[1].revents &
 965             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 966                 if ((cc = read(stderr_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
 967                         (void) write(STDERR_FILENO, ibuf, cc);
 968                         goto retry;
 969                 }
 970         }
 971 }
 972 
 973 /*
 974  * Fetch the user_cmd brand hook for getting a user's passwd(4) entry.
 975  */
 976 static const char *
 977 zone_get_user_cmd(brand_handle_t bh, const char *login, char *user_cmd,
 978     size_t len)
 979 {
 980         bzero(user_cmd, sizeof (user_cmd));
 981         if (brand_get_user_cmd(bh, login, user_cmd, len) != 0)
 982                 return (NULL);
 983 
 984         return (user_cmd);
 985 }
 986 
 987 /* From libc */
 988 extern int str2passwd(const char *, int, void *, char *, int);
 989 
 990 /*
 991  * exec() the user_cmd brand hook, and convert the output string to a
 992  * struct passwd.  This is to be called after zone_enter().
 993  *
 994  */
 995 static struct passwd *
 996 zone_get_user_pw(const char *user_cmd, struct passwd *pwent, char *pwbuf,
 997     int pwbuflen)
 998 {
 999         char pwline[NSS_BUFLEN_PASSWD];
1000         char *cin = NULL;
1001         FILE *fin;
1002         int status;
1003 
1004         assert(getzoneid() != GLOBAL_ZONEID);
1005 
1006         if ((fin = popen(user_cmd, "r")) == NULL)
1007                 return (NULL);
1008 
1009         while (cin == NULL && !feof(fin))
1010                 cin = fgets(pwline, sizeof (pwline), fin);
1011 
1012         if (cin == NULL) {
1013                 (void) pclose(fin);
1014                 return (NULL);
1015         }
1016 
1017         status = pclose(fin);
1018         if (!WIFEXITED(status))
1019                 return (NULL);
1020         if (WEXITSTATUS(status) != 0)
1021                 return (NULL);
1022 
1023         if (str2passwd(pwline, sizeof (pwline), pwent, pwbuf, pwbuflen) == 0)
1024                 return (pwent);
1025         else
1026                 return (NULL);
1027 }
1028 
1029 static char **
1030 zone_login_cmd(brand_handle_t bh, const char *login)
1031 {
1032         static char result_buf[ARG_MAX];
1033         char **new_argv, *ptr, *lasts;
1034         int n, a;
1035 
1036         /* Get the login command for the target zone. */
1037         bzero(result_buf, sizeof (result_buf));
1038 
1039         if (forced_login) {
1040                 if (brand_get_forcedlogin_cmd(bh, login,
1041                     result_buf, sizeof (result_buf)) != 0)
1042                         return (NULL);
1043         } else {
1044                 if (brand_get_login_cmd(bh, login,
1045                     result_buf, sizeof (result_buf)) != 0)
1046                         return (NULL);
1047         }
1048 
1049         /*
1050          * We got back a string that we'd like to execute.  But since
1051          * we're not doing the execution via a shell we'll need to convert
1052          * the exec string to an array of strings.  We'll do that here
1053          * but we're going to be very simplistic about it and break stuff
1054          * up based on spaces.  We're not even going to support any kind
1055          * of quoting or escape characters.  It's truly amazing that
1056          * there is no library function in OpenSolaris to do this for us.
1057          */
1058 
1059         /*
1060          * Be paranoid.  Since we're deliniating based on spaces make
1061          * sure there are no adjacent spaces.
1062          */
1063         if (strstr(result_buf, "  ") != NULL)
1064                 return (NULL);
1065 
1066         /* Remove any trailing whitespace.  */
1067         n = strlen(result_buf);
1068         if (result_buf[n - 1] == ' ')
1069                 result_buf[n - 1] = '\0';
1070 
1071         /* Count how many elements there are in the exec string. */
1072         ptr = result_buf;
1073         for (n = 2; ((ptr = strchr(ptr + 1, (int)' ')) != NULL); n++)
1074                 ;
1075 
1076         /* Allocate the argv array that we're going to return. */
1077         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1078                 return (NULL);
1079 
1080         /* Tokenize the exec string and return. */
1081         a = 0;
1082         new_argv[a++] = result_buf;
1083         if (n > 2) {
1084                 (void) strtok_r(result_buf, " ", &lasts);
1085                 while ((new_argv[a++] = strtok_r(NULL, " ", &lasts)) != NULL)
1086                         ;
1087         } else {
1088                 new_argv[a++] = NULL;
1089         }
1090         assert(n == a);
1091         return (new_argv);
1092 }
1093 
1094 /*
1095  * Prepare argv array for exec'd process; if we're passing commands to the
1096  * new process, then use su(1M) to do the invocation.  Otherwise, use
1097  * 'login -z <from_zonename> -f' (-z is an undocumented option which tells
1098  * login that we're coming from another zone, and to disregard its CONSOLE
1099  * checks).
1100  */
1101 static char **
1102 prep_args(brand_handle_t bh, const char *login, char **argv)
1103 {
1104         int argc = 0, a = 0, i, n = -1;
1105         char **new_argv;
1106 
1107         if (argv != NULL) {
1108                 size_t subshell_len = 1;
1109                 char *subshell;
1110 
1111                 while (argv[argc] != NULL)
1112                         argc++;
1113 
1114                 for (i = 0; i < argc; i++) {
1115                         subshell_len += strlen(argv[i]) + 1;
1116                 }
1117                 if ((subshell = calloc(1, subshell_len)) == NULL)
1118                         return (NULL);
1119 
1120                 for (i = 0; i < argc; i++) {
1121                         (void) strcat(subshell, argv[i]);
1122                         (void) strcat(subshell, " ");
1123                 }
1124 
1125                 if (failsafe) {
1126                         n = 4;
1127                         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1128                                 return (NULL);
1129 
1130                         new_argv[a++] = FAILSAFESHELL;
1131                 } else {
1132                         n = 5;
1133                         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1134                                 return (NULL);
1135 
1136                         new_argv[a++] = SUPATH;
1137                         if (strcmp(login, "root") != 0) {
1138                                 new_argv[a++] = "-";
1139                                 n++;
1140                         }
1141                         new_argv[a++] = (char *)login;
1142                 }
1143                 new_argv[a++] = "-c";
1144                 new_argv[a++] = subshell;
1145                 new_argv[a++] = NULL;
1146                 assert(a == n);
1147         } else {
1148                 if (failsafe) {
1149                         n = 2;
1150                         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1151                                 return (NULL);
1152                         new_argv[a++] = FAILSAFESHELL;
1153                         new_argv[a++] = NULL;
1154                         assert(n == a);
1155                 } else {
1156                         new_argv = zone_login_cmd(bh, login);
1157                 }
1158         }
1159 
1160         return (new_argv);
1161 }
1162 
1163 /*
1164  * Helper routine for prep_env below.
1165  */
1166 static char *
1167 add_env(char *name, char *value)
1168 {
1169         size_t sz = strlen(name) + strlen(value) + 2; /* name, =, value, NUL */
1170         char *str;
1171 
1172         if ((str = malloc(sz)) == NULL)
1173                 return (NULL);
1174 
1175         (void) snprintf(str, sz, "%s=%s", name, value);
1176         return (str);
1177 }
1178 
1179 /*
1180  * Prepare envp array for exec'd process.
1181  */
1182 static char **
1183 prep_env()
1184 {
1185         int e = 0, size = 1;
1186         char **new_env, *estr;
1187         char *term = getenv("TERM");
1188 
1189         size++; /* for $PATH */
1190         if (term != NULL)
1191                 size++;
1192 
1193         /*
1194          * In failsafe mode we set $HOME, since '-l' isn't valid in this mode.
1195          * We also set $SHELL, since neither login nor su will be around to do
1196          * it.
1197          */
1198         if (failsafe)
1199                 size += 2;
1200 
1201         if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1202                 return (NULL);
1203 
1204         if ((estr = add_env("PATH", DEF_PATH)) == NULL)
1205                 return (NULL);
1206         new_env[e++] = estr;
1207 
1208         if (term != NULL) {
1209                 if ((estr = add_env("TERM", term)) == NULL)
1210                         return (NULL);
1211                 new_env[e++] = estr;
1212         }
1213 
1214         if (failsafe) {
1215                 if ((estr = add_env("HOME", "/")) == NULL)
1216                         return (NULL);
1217                 new_env[e++] = estr;
1218 
1219                 if ((estr = add_env("SHELL", FAILSAFESHELL)) == NULL)
1220                         return (NULL);
1221                 new_env[e++] = estr;
1222         }
1223 
1224         new_env[e++] = NULL;
1225 
1226         assert(e == size);
1227 
1228         return (new_env);
1229 }
1230 
1231 /*
1232  * Finish the preparation of the envp array for exec'd non-interactive
1233  * zlogins.  This is called in the child process *after* we zone_enter(), since
1234  * it derives things we can only know within the zone, such as $HOME, $SHELL,
1235  * etc.  We need only do this in the non-interactive, mode, since otherwise
1236  * login(1) will do it.  We don't do this in failsafe mode, since it presents
1237  * additional ways in which the command could fail, and we'd prefer to avoid
1238  * that.
1239  */
1240 static char **
1241 prep_env_noninteractive(const char *user_cmd, char **env)
1242 {
1243         size_t size;
1244         char **new_env;
1245         int e, i;
1246         char *estr;
1247         char varmail[LOGNAME_MAX + 11]; /* strlen(/var/mail/) = 10, NUL */
1248         char pwbuf[NSS_BUFLEN_PASSWD + 1];
1249         struct passwd pwent;
1250         struct passwd *pw = NULL;
1251 
1252         assert(env != NULL);
1253         assert(failsafe == 0);
1254 
1255         /*
1256          * Exec the "user_cmd" brand hook to get a pwent for the
1257          * login user.  If this fails, HOME will be set to "/", SHELL
1258          * will be set to $DEFAULTSHELL, and we will continue to exec
1259          * SUPATH <login> -c <cmd>.
1260          */
1261         pw = zone_get_user_pw(user_cmd, &pwent, pwbuf, sizeof (pwbuf));
1262 
1263         /*
1264          * Get existing envp size.
1265          */
1266         for (size = 0; env[size] != NULL; size++)
1267                 ;
1268 
1269         e = size;
1270 
1271         /*
1272          * Finish filling out the environment; we duplicate the environment
1273          * setup described in login(1), for lack of a better precedent.
1274          */
1275         if (pw != NULL)
1276                 size += 3;      /* LOGNAME, HOME, MAIL */
1277         else
1278                 size += 1;      /* HOME */
1279 
1280         size++; /* always fill in SHELL */
1281         size++; /* terminating NULL */
1282 
1283         if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1284                 goto malloc_fail;
1285 
1286         /*
1287          * Copy existing elements of env into new_env.
1288          */
1289         for (i = 0; env[i] != NULL; i++) {
1290                 if ((new_env[i] = strdup(env[i])) == NULL)
1291                         goto malloc_fail;
1292         }
1293         assert(e == i);
1294 
1295         if (pw != NULL) {
1296                 if ((estr = add_env("LOGNAME", pw->pw_name)) == NULL)
1297                         goto malloc_fail;
1298                 new_env[e++] = estr;
1299 
1300                 if ((estr = add_env("HOME", pw->pw_dir)) == NULL)
1301                         goto malloc_fail;
1302                 new_env[e++] = estr;
1303 
1304                 if (chdir(pw->pw_dir) != 0)
1305                         zerror(gettext("Could not chdir to home directory "
1306                             "%s: %s"), pw->pw_dir, strerror(errno));
1307 
1308                 (void) snprintf(varmail, sizeof (varmail), "/var/mail/%s",
1309                     pw->pw_name);
1310                 if ((estr = add_env("MAIL", varmail)) == NULL)
1311                         goto malloc_fail;
1312                 new_env[e++] = estr;
1313         } else {
1314                 if ((estr = add_env("HOME", "/")) == NULL)
1315                         goto malloc_fail;
1316                 new_env[e++] = estr;
1317         }
1318 
1319         if (pw != NULL && strlen(pw->pw_shell) > 0) {
1320                 if ((estr = add_env("SHELL", pw->pw_shell)) == NULL)
1321                         goto malloc_fail;
1322                 new_env[e++] = estr;
1323         } else {
1324                 if ((estr = add_env("SHELL", DEFAULTSHELL)) == NULL)
1325                         goto malloc_fail;
1326                 new_env[e++] = estr;
1327         }
1328 
1329         new_env[e++] = NULL;    /* add terminating NULL */
1330 
1331         assert(e == size);
1332         return (new_env);
1333 
1334 malloc_fail:
1335         zperror(gettext("failed to allocate memory for process environment"));
1336         return (NULL);
1337 }
1338 
1339 static int
1340 close_func(void *slavefd, int fd)
1341 {
1342         if (fd != *(int *)slavefd)
1343                 (void) close(fd);
1344         return (0);
1345 }
1346 
1347 static void
1348 set_cmdchar(char *cmdcharstr)
1349 {
1350         char c;
1351         long lc;
1352 
1353         if ((c = *cmdcharstr) != '\\') {
1354                 cmdchar = c;
1355                 return;
1356         }
1357 
1358         c = cmdcharstr[1];
1359         if (c == '\0' || c == '\\') {
1360                 cmdchar = '\\';
1361                 return;
1362         }
1363 
1364         if (c < '0' || c > '7') {
1365                 zerror(gettext("Unrecognized escape character option %s"),
1366                     cmdcharstr);
1367                 usage();
1368         }
1369 
1370         lc = strtol(cmdcharstr + 1, NULL, 8);
1371         if (lc < 0 || lc > 255) {
1372                 zerror(gettext("Octal escape character '%s' too large"),
1373                     cmdcharstr);
1374                 usage();
1375         }
1376         cmdchar = (char)lc;
1377 }
1378 
1379 static int
1380 setup_utmpx(char *slavename)
1381 {
1382         struct utmpx ut;
1383 
1384         bzero(&ut, sizeof (ut));
1385         (void) strncpy(ut.ut_user, ".zlogin", sizeof (ut.ut_user));
1386         (void) strncpy(ut.ut_line, slavename, sizeof (ut.ut_line));
1387         ut.ut_pid = getpid();
1388         ut.ut_id[0] = 'z';
1389         ut.ut_id[1] = ut.ut_id[2] = ut.ut_id[3] = (char)SC_WILDC;
1390         ut.ut_type = LOGIN_PROCESS;
1391         (void) time(&ut.ut_tv.tv_sec);
1392 
1393         if (makeutx(&ut) == NULL) {
1394                 zerror(gettext("makeutx failed"));
1395                 return (-1);
1396         }
1397         return (0);
1398 }
1399 
1400 static void
1401 release_lock_file(int lockfd)
1402 {
1403         (void) close(lockfd);
1404 }
1405 
1406 static int
1407 grab_lock_file(const char *zone_name, int *lockfd)
1408 {
1409         char pathbuf[PATH_MAX];
1410         struct flock flock;
1411 
1412         if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
1413                 zerror(gettext("could not mkdir %s: %s"), ZONES_TMPDIR,
1414                     strerror(errno));
1415                 return (-1);
1416         }
1417         (void) chmod(ZONES_TMPDIR, S_IRWXU);
1418         (void) snprintf(pathbuf, sizeof (pathbuf), "%s/%s.zoneadm.lock",
1419             ZONES_TMPDIR, zone_name);
1420 
1421         if ((*lockfd = open(pathbuf, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR)) < 0) {
1422                 zerror(gettext("could not open %s: %s"), pathbuf,
1423                     strerror(errno));
1424                 return (-1);
1425         }
1426         /*
1427          * Lock the file to synchronize with other zoneadmds
1428          */
1429         flock.l_type = F_WRLCK;
1430         flock.l_whence = SEEK_SET;
1431         flock.l_start = (off_t)0;
1432         flock.l_len = (off_t)0;
1433         if (fcntl(*lockfd, F_SETLKW, &flock) < 0) {
1434                 zerror(gettext("unable to lock %s: %s"), pathbuf,
1435                     strerror(errno));
1436                 release_lock_file(*lockfd);
1437                 return (-1);
1438         }
1439         return (Z_OK);
1440 }
1441 
1442 static int
1443 start_zoneadmd(const char *zone_name)
1444 {
1445         pid_t retval;
1446         int pstatus = 0, error = -1, lockfd, doorfd;
1447         struct door_info info;
1448         char doorpath[MAXPATHLEN];
1449 
1450         (void) snprintf(doorpath, sizeof (doorpath), ZONE_DOOR_PATH, zone_name);
1451 
1452         if (grab_lock_file(zone_name, &lockfd) != Z_OK)
1453                 return (-1);
1454         /*
1455          * We must do the door check with the lock held.  Otherwise, we
1456          * might race against another zoneadm/zlogin process and wind
1457          * up with two processes trying to start zoneadmd at the same
1458          * time.  zoneadmd will detect this, and fail, but we prefer this
1459          * to be as seamless as is practical, from a user perspective.
1460          */
1461         if ((doorfd = open(doorpath, O_RDONLY)) < 0) {
1462                 if (errno != ENOENT) {
1463                         zerror("failed to open %s: %s", doorpath,
1464                             strerror(errno));
1465                         goto out;
1466                 }
1467         } else {
1468                 /*
1469                  * Seems to be working ok.
1470                  */
1471                 if (door_info(doorfd, &info) == 0 &&
1472                     ((info.di_attributes & DOOR_REVOKED) == 0)) {
1473                         error = 0;
1474                         goto out;
1475                 }
1476         }
1477 
1478         if ((child_pid = fork()) == -1) {
1479                 zperror(gettext("could not fork"));
1480                 goto out;
1481         } else if (child_pid == 0) {
1482                 /* child process */
1483                 (void) execl("/usr/lib/zones/zoneadmd", "zoneadmd", "-z",
1484                     zone_name, NULL);
1485                 zperror(gettext("could not exec zoneadmd"));
1486                 _exit(1);
1487         }
1488 
1489         /* parent process */
1490         do {
1491                 retval = waitpid(child_pid, &pstatus, 0);
1492         } while (retval != child_pid);
1493         if (WIFSIGNALED(pstatus) ||
1494             (WIFEXITED(pstatus) && WEXITSTATUS(pstatus) != 0)) {
1495                 zerror(gettext("could not start %s"), "zoneadmd");
1496                 goto out;
1497         }
1498         error = 0;
1499 out:
1500         release_lock_file(lockfd);
1501         (void) close(doorfd);
1502         return (error);
1503 }
1504 
1505 static int
1506 init_template(void)
1507 {
1508         int fd;
1509         int err = 0;
1510 
1511         fd = open64(CTFS_ROOT "/process/template", O_RDWR);
1512         if (fd == -1)
1513                 return (-1);
1514 
1515         /*
1516          * zlogin doesn't do anything with the contract.
1517          * Deliver no events, don't inherit, and allow it to be orphaned.
1518          */
1519         err |= ct_tmpl_set_critical(fd, 0);
1520         err |= ct_tmpl_set_informative(fd, 0);
1521         err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
1522         err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
1523         if (err || ct_tmpl_activate(fd)) {
1524                 (void) close(fd);
1525                 return (-1);
1526         }
1527 
1528         return (fd);
1529 }
1530 
1531 static int
1532 noninteractive_login(char *zonename, const char *user_cmd, zoneid_t zoneid,
1533     char **new_args, char **new_env)
1534 {
1535         pid_t retval;
1536         int stdin_pipe[2], stdout_pipe[2], stderr_pipe[2], dead_child_pipe[2];
1537         int child_status;
1538         int tmpl_fd;
1539         sigset_t block_cld;
1540 
1541         if ((tmpl_fd = init_template()) == -1) {
1542                 reset_tty();
1543                 zperror(gettext("could not create contract"));
1544                 return (1);
1545         }
1546 
1547         if (pipe(stdin_pipe) != 0) {
1548                 zperror(gettext("could not create STDIN pipe"));
1549                 return (1);
1550         }
1551         /*
1552          * When the user types ^D, we get a zero length message on STDIN.
1553          * We need to echo that down the pipe to send it to the other side;
1554          * but by default, pipes don't propagate zero-length messages.  We
1555          * toggle that behavior off using I_SWROPT.  See streamio(7i).
1556          */
1557         if (ioctl(stdin_pipe[0], I_SWROPT, SNDZERO) != 0) {
1558                 zperror(gettext("could not configure STDIN pipe"));
1559                 return (1);
1560 
1561         }
1562         if (pipe(stdout_pipe) != 0) {
1563                 zperror(gettext("could not create STDOUT pipe"));
1564                 return (1);
1565         }
1566         if (pipe(stderr_pipe) != 0) {
1567                 zperror(gettext("could not create STDERR pipe"));
1568                 return (1);
1569         }
1570 
1571         if (pipe(dead_child_pipe) != 0) {
1572                 zperror(gettext("could not create signalling pipe"));
1573                 return (1);
1574         }
1575         close_on_sig = dead_child_pipe[0];
1576 
1577         /*
1578          * If any of the pipe FD's winds up being less than STDERR, then we
1579          * have a mess on our hands-- and we are lacking some of the I/O
1580          * streams we would expect anyway.  So we bail.
1581          */
1582         if (stdin_pipe[0] <= STDERR_FILENO ||
1583             stdin_pipe[1] <= STDERR_FILENO ||
1584             stdout_pipe[0] <= STDERR_FILENO ||
1585             stdout_pipe[1] <= STDERR_FILENO ||
1586             stderr_pipe[0] <= STDERR_FILENO ||
1587             stderr_pipe[1] <= STDERR_FILENO ||
1588             dead_child_pipe[0] <= STDERR_FILENO ||
1589             dead_child_pipe[1] <= STDERR_FILENO) {
1590                 zperror(gettext("process lacks valid STDIN, STDOUT, STDERR"));
1591                 return (1);
1592         }
1593 
1594         if (prefork_dropprivs() != 0) {
1595                 zperror(gettext("could not allocate privilege set"));
1596                 return (1);
1597         }
1598 
1599         (void) sigset(SIGCLD, sigcld);
1600         (void) sigemptyset(&block_cld);
1601         (void) sigaddset(&block_cld, SIGCLD);
1602         (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1603 
1604         if ((child_pid = fork()) == -1) {
1605                 (void) ct_tmpl_clear(tmpl_fd);
1606                 (void) close(tmpl_fd);
1607                 zperror(gettext("could not fork"));
1608                 return (1);
1609         } else if (child_pid == 0) { /* child process */
1610                 (void) ct_tmpl_clear(tmpl_fd);
1611 
1612                 /*
1613                  * Do a dance to get the pipes hooked up as FD's 0, 1 and 2.
1614                  */
1615                 (void) close(STDIN_FILENO);
1616                 (void) close(STDOUT_FILENO);
1617                 (void) close(STDERR_FILENO);
1618                 (void) dup2(stdin_pipe[1], STDIN_FILENO);
1619                 (void) dup2(stdout_pipe[1], STDOUT_FILENO);
1620                 (void) dup2(stderr_pipe[1], STDERR_FILENO);
1621                 (void) closefrom(STDERR_FILENO + 1);
1622 
1623                 (void) sigset(SIGCLD, SIG_DFL);
1624                 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1625                 /*
1626                  * In case any of stdin, stdout or stderr are streams,
1627                  * anchor them to prevent malicious I_POPs.
1628                  */
1629                 (void) ioctl(STDIN_FILENO, I_ANCHOR);
1630                 (void) ioctl(STDOUT_FILENO, I_ANCHOR);
1631                 (void) ioctl(STDERR_FILENO, I_ANCHOR);
1632 
1633                 if (zone_enter(zoneid) == -1) {
1634                         zerror(gettext("could not enter zone %s: %s"),
1635                             zonename, strerror(errno));
1636                         _exit(1);
1637                 }
1638 
1639                 /*
1640                  * For non-native zones, tell libc where it can find locale
1641                  * specific getttext() messages.
1642                  */
1643                 if (access("/.SUNWnative/usr/lib/locale", R_OK) == 0)
1644                         (void) bindtextdomain(TEXT_DOMAIN,
1645                             "/.SUNWnative/usr/lib/locale");
1646                 else if (access("/native/usr/lib/locale", R_OK) == 0)
1647                         (void) bindtextdomain(TEXT_DOMAIN,
1648                             "/native/usr/lib/locale");
1649 
1650                 if (!failsafe)
1651                         new_env = prep_env_noninteractive(user_cmd, new_env);
1652 
1653                 if (new_env == NULL) {
1654                         _exit(1);
1655                 }
1656 
1657                 /*
1658                  * Move into a new process group; the zone_enter will have
1659                  * placed us into zsched's session, and we want to be in
1660                  * a unique process group.
1661                  */
1662                 (void) setpgid(getpid(), getpid());
1663 
1664                 /*
1665                  * The child needs to run as root to
1666                  * execute the su program.
1667                  */
1668                 if (setuid(0) == -1) {
1669                         zperror(gettext("insufficient privilege"));
1670                         return (1);
1671                 }
1672 
1673                 (void) execve(new_args[0], new_args, new_env);
1674                 zperror(gettext("exec failure"));
1675                 _exit(1);
1676         }
1677         /* parent */
1678 
1679         /* close pipe sides written by child */
1680         (void) close(stdout_pipe[1]);
1681         (void) close(stderr_pipe[1]);
1682 
1683         (void) sigset(SIGINT, sig_forward);
1684 
1685         postfork_dropprivs();
1686 
1687         (void) ct_tmpl_clear(tmpl_fd);
1688         (void) close(tmpl_fd);
1689 
1690         (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1691         doio(stdin_pipe[0], stdin_pipe[1], stdout_pipe[0], stderr_pipe[0],
1692             dead_child_pipe[1], B_TRUE);
1693         do {
1694                 retval = waitpid(child_pid, &child_status, 0);
1695                 if (retval == -1) {
1696                         child_status = 0;
1697                 }
1698         } while (retval != child_pid && errno != ECHILD);
1699 
1700         return (WEXITSTATUS(child_status));
1701 }
1702 
1703 static char *
1704 get_username()
1705 {
1706         uid_t   uid;
1707         struct passwd *nptr;
1708 
1709         /*
1710          * Authorizations are checked to restrict access based on the
1711          * requested operation and zone name, It is assumed that the
1712          * program is running with all privileges, but that the real
1713          * user ID is that of the user or role on whose behalf we are
1714          * operating. So we start by getting the username that will be
1715          * used for subsequent authorization checks.
1716          */
1717 
1718         uid = getuid();
1719         if ((nptr = getpwuid(uid)) == NULL) {
1720                 zerror(gettext("could not get user name."));
1721                 _exit(1);
1722         }
1723         return (nptr->pw_name);
1724 }
1725 
1726 int
1727 main(int argc, char **argv)
1728 {
1729         int arg, console = 0;
1730         zoneid_t zoneid;
1731         zone_state_t st;
1732         char *login = "root";
1733         int lflag = 0;
1734         int nflag = 0;
1735         char *zonename = NULL;
1736         char **proc_args = NULL;
1737         char **new_args, **new_env;
1738         sigset_t block_cld;
1739         char devroot[MAXPATHLEN];
1740         char *slavename, slaveshortname[MAXPATHLEN];
1741         priv_set_t *privset;
1742         int tmpl_fd;
1743         char zonebrand[MAXNAMELEN];
1744         char default_brand[MAXNAMELEN];
1745         struct stat sb;
1746         char kernzone[ZONENAME_MAX];
1747         brand_handle_t bh;
1748         char user_cmd[MAXPATHLEN];
1749         char authname[MAXAUTHS];
1750 
1751         (void) setlocale(LC_ALL, "");
1752         (void) textdomain(TEXT_DOMAIN);
1753 
1754         (void) getpname(argv[0]);
1755         username = get_username();
1756 
1757         while ((arg = getopt(argc, argv, "dnECR:Se:l:Q")) != EOF) {
1758                 switch (arg) {
1759                 case 'C':
1760                         console = 1;
1761                         break;
1762                 case 'E':
1763                         nocmdchar = 1;
1764                         break;
1765                 case 'R':       /* undocumented */
1766                         if (*optarg != '/') {
1767                                 zerror(gettext("root path must be absolute."));
1768                                 exit(2);
1769                         }
1770                         if (stat(optarg, &sb) == -1 || !S_ISDIR(sb.st_mode)) {
1771                                 zerror(
1772                                     gettext("root path must be a directory."));
1773                                 exit(2);
1774                         }
1775                         zonecfg_set_root(optarg);
1776                         break;
1777                 case 'Q':
1778                         quiet = 1;
1779                         break;
1780                 case 'S':
1781                         failsafe = 1;
1782                         break;
1783                 case 'd':
1784                         disconnect = 1;
1785                         break;
1786                 case 'e':
1787                         set_cmdchar(optarg);
1788                         break;
1789                 case 'l':
1790                         login = optarg;
1791                         lflag = 1;
1792                         break;
1793                 case 'n':
1794                         nflag = 1;
1795                         break;
1796                 default:
1797                         usage();
1798                 }
1799         }
1800 
1801         if (console != 0) {
1802 
1803                 if (lflag != 0) {
1804                         zerror(gettext(
1805                             "-l may not be specified for console login"));
1806                         usage();
1807                 }
1808 
1809                 if (nflag != 0) {
1810                         zerror(gettext(
1811                             "-n may not be specified for console login"));
1812                         usage();
1813                 }
1814 
1815                 if (failsafe != 0) {
1816                         zerror(gettext(
1817                             "-S may not be specified for console login"));
1818                         usage();
1819                 }
1820 
1821                 if (zonecfg_in_alt_root()) {
1822                         zerror(gettext(
1823                             "-R may not be specified for console login"));
1824                         exit(2);
1825                 }
1826 
1827         }
1828 
1829         if (failsafe != 0 && lflag != 0) {
1830                 zerror(gettext("-l may not be specified for failsafe login"));
1831                 usage();
1832         }
1833 
1834         if (!console && disconnect != 0) {
1835                 zerror(gettext(
1836                     "-d may only be specified with console login"));
1837                 usage();
1838         }
1839 
1840         if (optind == (argc - 1)) {
1841                 /*
1842                  * zone name, no process name; this should be an interactive
1843                  * as long as STDIN is really a tty.
1844                  */
1845                 if (nflag != 0) {
1846                         zerror(gettext(
1847                             "-n may not be specified for interactive login"));
1848                         usage();
1849                 }
1850                 if (isatty(STDIN_FILENO))
1851                         interactive = 1;
1852                 zonename = argv[optind];
1853         } else if (optind < (argc - 1)) {
1854                 if (console) {
1855                         zerror(gettext("Commands may not be specified for "
1856                             "console login."));
1857                         usage();
1858                 }
1859                 /* zone name and process name, and possibly some args */
1860                 zonename = argv[optind];
1861                 proc_args = &argv[optind + 1];
1862                 interactive = 0;
1863         } else {
1864                 usage();
1865         }
1866 
1867         if (getzoneid() != GLOBAL_ZONEID) {
1868                 zerror(gettext("'%s' may only be used from the global zone"),
1869                     pname);
1870                 return (1);
1871         }
1872 
1873         if (strcmp(zonename, GLOBAL_ZONENAME) == 0) {
1874                 zerror(gettext("'%s' not applicable to the global zone"),
1875                     pname);
1876                 return (1);
1877         }
1878 
1879         if (zone_get_state(zonename, &st) != Z_OK) {
1880                 zerror(gettext("zone '%s' unknown"), zonename);
1881                 return (1);
1882         }
1883 
1884         if (st < ZONE_STATE_INSTALLED) {
1885                 zerror(gettext("cannot login to a zone which is '%s'"),
1886                     zone_state_str(st));
1887                 return (1);
1888         }
1889 
1890         /*
1891          * In both console and non-console cases, we require all privs.
1892          * In the console case, because we may need to startup zoneadmd.
1893          * In the non-console case in order to do zone_enter(2), zonept()
1894          * and other tasks.
1895          */
1896 
1897         if ((privset = priv_allocset()) == NULL) {
1898                 zperror(gettext("priv_allocset failed"));
1899                 return (1);
1900         }
1901 
1902         if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
1903                 zperror(gettext("getppriv failed"));
1904                 priv_freeset(privset);
1905                 return (1);
1906         }
1907 
1908         if (priv_isfullset(privset) == B_FALSE) {
1909                 zerror(gettext("You lack sufficient privilege to run "
1910                     "this command (all privs required)"));
1911                 priv_freeset(privset);
1912                 return (1);
1913         }
1914         priv_freeset(privset);
1915 
1916         /*
1917          * Check if user is authorized for requested usage of the zone
1918          */
1919 
1920         (void) snprintf(authname, MAXAUTHS, "%s%s%s",
1921             ZONE_MANAGE_AUTH, KV_OBJECT, zonename);
1922         if (chkauthattr(authname, username) == 0) {
1923                 if (console) {
1924                         zerror(gettext("%s is not authorized for console "
1925                             "access to  %s zone."),
1926                             username, zonename);
1927                         return (1);
1928                 } else {
1929                         (void) snprintf(authname, MAXAUTHS, "%s%s%s",
1930                             ZONE_LOGIN_AUTH, KV_OBJECT, zonename);
1931                         if (failsafe || !interactive) {
1932                                 zerror(gettext("%s is not authorized for  "
1933                                     "failsafe or non-interactive login "
1934                                     "to  %s zone."), username, zonename);
1935                                 return (1);
1936                         } else if (chkauthattr(authname, username) == 0) {
1937                                 zerror(gettext("%s is not authorized "
1938                                     " to login to %s zone."),
1939                                     username, zonename);
1940                                 return (1);
1941                         }
1942                 }
1943         } else {
1944                 forced_login = B_TRUE;
1945         }
1946 
1947         /*
1948          * The console is a separate case from the rest of the code; handle
1949          * it first.
1950          */
1951         if (console) {
1952                 /*
1953                  * Ensure that zoneadmd for this zone is running.
1954                  */
1955                 if (start_zoneadmd(zonename) == -1)
1956                         return (1);
1957 
1958                 /*
1959                  * Make contact with zoneadmd.
1960                  */
1961                 if (get_console_master(zonename) == -1)
1962                         return (1);
1963 
1964                 if (!quiet)
1965                         (void) printf(
1966                             gettext("[Connected to zone '%s' console]\n"),
1967                             zonename);
1968 
1969                 if (set_tty_rawmode(STDIN_FILENO) == -1) {
1970                         reset_tty();
1971                         zperror(gettext("failed to set stdin pty to raw mode"));
1972                         return (1);
1973                 }
1974 
1975                 (void) sigset(SIGWINCH, sigwinch);
1976                 (void) sigwinch(0);
1977 
1978                 /*
1979                  * Run the I/O loop until we get disconnected.
1980                  */
1981                 doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
1982                 reset_tty();
1983                 if (!quiet)
1984                         (void) printf(
1985                             gettext("\n[Connection to zone '%s' console "
1986                             "closed]\n"), zonename);
1987 
1988                 return (0);
1989         }
1990 
1991         if (st != ZONE_STATE_RUNNING && st != ZONE_STATE_MOUNTED) {
1992                 zerror(gettext("login allowed only to running zones "
1993                     "(%s is '%s')."), zonename, zone_state_str(st));
1994                 return (1);
1995         }
1996 
1997         (void) strlcpy(kernzone, zonename, sizeof (kernzone));
1998         if (zonecfg_in_alt_root()) {
1999                 FILE *fp = zonecfg_open_scratch("", B_FALSE);
2000 
2001                 if (fp == NULL || zonecfg_find_scratch(fp, zonename,
2002                     zonecfg_get_root(), kernzone, sizeof (kernzone)) == -1) {
2003                         zerror(gettext("cannot find scratch zone %s"),
2004                             zonename);
2005                         if (fp != NULL)
2006                                 zonecfg_close_scratch(fp);
2007                         return (1);
2008                 }
2009                 zonecfg_close_scratch(fp);
2010         }
2011 
2012         if ((zoneid = getzoneidbyname(kernzone)) == -1) {
2013                 zerror(gettext("failed to get zoneid for zone '%s'"),
2014                     zonename);
2015                 return (1);
2016         }
2017 
2018         /*
2019          * We need the zone root path only if we are setting up a pty.
2020          */
2021         if (zone_get_devroot(zonename, devroot, sizeof (devroot)) == -1) {
2022                 zerror(gettext("could not get dev path for zone %s"),
2023                     zonename);
2024                 return (1);
2025         }
2026 
2027         if (zone_get_brand(zonename, zonebrand, sizeof (zonebrand)) != Z_OK) {
2028                 zerror(gettext("could not get brand for zone %s"), zonename);
2029                 return (1);
2030         }
2031         /*
2032          * In the alternate root environment, the only supported
2033          * operations are mount and unmount.  In this case, just treat
2034          * the zone as native if it is cluster.  Cluster zones can be
2035          * native for the purpose of LU or upgrade, and the cluster
2036          * brand may not exist in the miniroot (such as in net install
2037          * upgrade).
2038          */
2039         if (zonecfg_default_brand(default_brand,
2040             sizeof (default_brand)) != Z_OK) {
2041                 zerror(gettext("unable to determine default brand"));
2042                 return (1);
2043         }
2044         if (zonecfg_in_alt_root() &&
2045             strcmp(zonebrand, CLUSTER_BRAND_NAME) == 0) {
2046                 (void) strlcpy(zonebrand, default_brand, sizeof (zonebrand));
2047         }
2048 
2049         if ((bh = brand_open(zonebrand)) == NULL) {
2050                 zerror(gettext("could not open brand for zone %s"), zonename);
2051                 return (1);
2052         }
2053 
2054         if ((new_args = prep_args(bh, login, proc_args)) == NULL) {
2055                 zperror(gettext("could not assemble new arguments"));
2056                 brand_close(bh);
2057                 return (1);
2058         }
2059         /*
2060          * Get the brand specific user_cmd.  This command is used to get
2061          * a passwd(4) entry for login.
2062          */
2063         if (!interactive && !failsafe) {
2064                 if (zone_get_user_cmd(bh, login, user_cmd,
2065                     sizeof (user_cmd)) == NULL) {
2066                         zerror(gettext("could not get user_cmd for zone %s"),
2067                             zonename);
2068                         brand_close(bh);
2069                         return (1);
2070                 }
2071         }
2072         brand_close(bh);
2073 
2074         if ((new_env = prep_env()) == NULL) {
2075                 zperror(gettext("could not assemble new environment"));
2076                 return (1);
2077         }
2078 
2079         if (!interactive) {
2080                 if (nflag) {
2081                         int nfd;
2082 
2083                         if ((nfd = open(_PATH_DEVNULL, O_RDONLY)) < 0) {
2084                                 zperror(gettext("failed to open null device"));
2085                                 return (1);
2086                         }
2087                         if (nfd != STDIN_FILENO) {
2088                                 if (dup2(nfd, STDIN_FILENO) < 0) {
2089                                         zperror(gettext(
2090                                             "failed to dup2 null device"));
2091                                         return (1);
2092                                 }
2093                                 (void) close(nfd);
2094                         }
2095                         /* /dev/null is now standard input */
2096                 }
2097                 return (noninteractive_login(zonename, user_cmd, zoneid,
2098                     new_args, new_env));
2099         }
2100 
2101         if (zonecfg_in_alt_root()) {
2102                 zerror(gettext("cannot use interactive login with scratch "
2103                     "zone"));
2104                 return (1);
2105         }
2106 
2107         /*
2108          * Things are more complex in interactive mode; we get the
2109          * master side of the pty, then place the user's terminal into
2110          * raw mode.
2111          */
2112         if (get_master_pty() == -1) {
2113                 zerror(gettext("could not setup master pty device"));
2114                 return (1);
2115         }
2116 
2117         /*
2118          * Compute the "short name" of the pts.  /dev/pts/2 --> pts/2
2119          */
2120         if ((slavename = ptsname(masterfd)) == NULL) {
2121                 zperror(gettext("failed to get name for pseudo-tty"));
2122                 return (1);
2123         }
2124         if (strncmp(slavename, "/dev/", strlen("/dev/")) == 0)
2125                 (void) strlcpy(slaveshortname, slavename + strlen("/dev/"),
2126                     sizeof (slaveshortname));
2127         else
2128                 (void) strlcpy(slaveshortname, slavename,
2129                     sizeof (slaveshortname));
2130 
2131         if (!quiet)
2132                 (void) printf(gettext("[Connected to zone '%s' %s]\n"),
2133                     zonename, slaveshortname);
2134 
2135         if (set_tty_rawmode(STDIN_FILENO) == -1) {
2136                 reset_tty();
2137                 zperror(gettext("failed to set stdin pty to raw mode"));
2138                 return (1);
2139         }
2140 
2141         if (prefork_dropprivs() != 0) {
2142                 reset_tty();
2143                 zperror(gettext("could not allocate privilege set"));
2144                 return (1);
2145         }
2146 
2147         /*
2148          * We must mask SIGCLD until after we have coped with the fork
2149          * sufficiently to deal with it; otherwise we can race and receive the
2150          * signal before child_pid has been initialized (yes, this really
2151          * happens).
2152          */
2153         (void) sigset(SIGCLD, sigcld);
2154         (void) sigemptyset(&block_cld);
2155         (void) sigaddset(&block_cld, SIGCLD);
2156         (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
2157 
2158         /*
2159          * We activate the contract template at the last minute to
2160          * avoid intermediate functions that could be using fork(2)
2161          * internally.
2162          */
2163         if ((tmpl_fd = init_template()) == -1) {
2164                 reset_tty();
2165                 zperror(gettext("could not create contract"));
2166                 return (1);
2167         }
2168 
2169         if ((child_pid = fork()) == -1) {
2170                 (void) ct_tmpl_clear(tmpl_fd);
2171                 reset_tty();
2172                 zperror(gettext("could not fork"));
2173                 return (1);
2174         } else if (child_pid == 0) { /* child process */
2175                 int slavefd, newslave;
2176 
2177                 (void) ct_tmpl_clear(tmpl_fd);
2178                 (void) close(tmpl_fd);
2179 
2180                 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2181 
2182                 if ((slavefd = init_slave_pty(zoneid, devroot)) == -1)
2183                         return (1);
2184 
2185                 /*
2186                  * Close all fds except for the slave pty.
2187                  */
2188                 (void) fdwalk(close_func, &slavefd);
2189 
2190                 /*
2191                  * Temporarily dup slavefd to stderr; that way if we have
2192                  * to print out that zone_enter failed, the output will
2193                  * have somewhere to go.
2194                  */
2195                 if (slavefd != STDERR_FILENO)
2196                         (void) dup2(slavefd, STDERR_FILENO);
2197 
2198                 if (zone_enter(zoneid) == -1) {
2199                         zerror(gettext("could not enter zone %s: %s"),
2200                             zonename, strerror(errno));
2201                         return (1);
2202                 }
2203 
2204                 if (slavefd != STDERR_FILENO)
2205                         (void) close(STDERR_FILENO);
2206 
2207                 /*
2208                  * We take pains to get this process into a new process
2209                  * group, and subsequently a new session.  In this way,
2210                  * we'll have a session which doesn't yet have a controlling
2211                  * terminal.  When we open the slave, it will become the
2212                  * controlling terminal; no PIDs concerning pgrps or sids
2213                  * will leak inappropriately into the zone.
2214                  */
2215                 (void) setpgrp();
2216 
2217                 /*
2218                  * We need the slave pty to be referenced from the zone's
2219                  * /dev in order to ensure that the devt's, etc are all
2220                  * correct.  Otherwise we break ttyname and the like.
2221                  */
2222                 if ((newslave = open(slavename, O_RDWR)) == -1) {
2223                         (void) close(slavefd);
2224                         return (1);
2225                 }
2226                 (void) close(slavefd);
2227                 slavefd = newslave;
2228 
2229                 /*
2230                  * dup the slave to the various FDs, so that when the
2231                  * spawned process does a write/read it maps to the slave
2232                  * pty.
2233                  */
2234                 (void) dup2(slavefd, STDIN_FILENO);
2235                 (void) dup2(slavefd, STDOUT_FILENO);
2236                 (void) dup2(slavefd, STDERR_FILENO);
2237                 if (slavefd != STDIN_FILENO && slavefd != STDOUT_FILENO &&
2238                     slavefd != STDERR_FILENO) {
2239                         (void) close(slavefd);
2240                 }
2241 
2242                 /*
2243                  * In failsafe mode, we don't use login(1), so don't try
2244                  * setting up a utmpx entry.
2245                  */
2246                 if (!failsafe)
2247                         if (setup_utmpx(slaveshortname) == -1)
2248                                 return (1);
2249 
2250                 /*
2251                  * The child needs to run as root to
2252                  * execute the brand's login program.
2253                  */
2254                 if (setuid(0) == -1) {
2255                         zperror(gettext("insufficient privilege"));
2256                         return (1);
2257                 }
2258 
2259                 (void) execve(new_args[0], new_args, new_env);
2260                 zperror(gettext("exec failure"));
2261                 return (1);
2262         }
2263 
2264         (void) ct_tmpl_clear(tmpl_fd);
2265         (void) close(tmpl_fd);
2266 
2267         /*
2268          * The rest is only for the parent process.
2269          */
2270         (void) sigset(SIGWINCH, sigwinch);
2271 
2272         postfork_dropprivs();
2273 
2274         (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2275         doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
2276 
2277         reset_tty();
2278         if (!quiet)
2279                 (void) fprintf(stderr,
2280                     gettext("\n[Connection to zone '%s' %s closed]\n"),
2281                     zonename, slaveshortname);
2282 
2283         if (pollerr != 0) {
2284                 (void) fprintf(stderr, gettext("Error: connection closed due "
2285                     "to unexpected pollevents=0x%x.\n"), pollerr);
2286                 return (1);
2287         }
2288 
2289         return (0);
2290 }