1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2013 DEY Storage Systems, Inc.
  24  * Copyright (c) 2014 Gary Mills
  25  * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
  26  * Copyright 2019 Joyent, Inc.
  27  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
  28  */
  29 
  30 /*
  31  * zlogin provides three types of login which allow users in the global
  32  * zone to access non-global zones.
  33  *
  34  * - "interactive login" is similar to rlogin(1); for example, the user could
  35  *   issue 'zlogin my-zone' or 'zlogin -e ^ -l me my-zone'.   The user is
  36  *   granted a new pty (which is then shoved into the zone), and an I/O
  37  *   loop between parent and child processes takes care of the interactive
  38  *   session.  In this mode, login(1) (and its -c option, which means
  39  *   "already authenticated") is employed to take care of the initialization
  40  *   of the user's session.
  41  *
  42  * - "non-interactive login" is similar to su(1M); the user could issue
  43  *   'zlogin my-zone ls -l' and the command would be run as specified.
  44  *   In this mode, zlogin sets up pipes as the communication channel, and
  45  *   'su' is used to do the login setup work.
  46  *
  47  * - "console login" is the equivalent to accessing the tip line for a
  48  *   zone.  For example, the user can issue 'zlogin -C my-zone'.
  49  *   In this mode, zlogin contacts the zoneadmd process via unix domain
  50  *   socket.  If zoneadmd is not running, it starts it.  This allows the
  51  *   console to be available anytime the zone is installed, regardless of
  52  *   whether it is running.
  53  */
  54 
  55 #include <sys/socket.h>
  56 #include <sys/termios.h>
  57 #include <sys/utsname.h>
  58 #include <sys/stat.h>
  59 #include <sys/types.h>
  60 #include <sys/contract/process.h>
  61 #include <sys/ctfs.h>
  62 #include <sys/brand.h>
  63 #include <sys/wait.h>
  64 #include <alloca.h>
  65 #include <assert.h>
  66 #include <ctype.h>
  67 #include <paths.h>
  68 #include <door.h>
  69 #include <errno.h>
  70 #include <nss_dbdefs.h>
  71 #include <poll.h>
  72 #include <priv.h>
  73 #include <pwd.h>
  74 #include <unistd.h>
  75 #include <utmpx.h>
  76 #include <sac.h>
  77 #include <signal.h>
  78 #include <stdarg.h>
  79 #include <stdio.h>
  80 #include <stdlib.h>
  81 #include <string.h>
  82 #include <strings.h>
  83 #include <stropts.h>
  84 #include <wait.h>
  85 #include <zone.h>
  86 #include <fcntl.h>
  87 #include <libdevinfo.h>
  88 #include <libintl.h>
  89 #include <locale.h>
  90 #include <libzonecfg.h>
  91 #include <libcontract.h>
  92 #include <libbrand.h>
  93 #include <auth_list.h>
  94 #include <auth_attr.h>
  95 #include <secdb.h>
  96 
  97 static int masterfd;
  98 static struct termios save_termios;
  99 static struct termios effective_termios;
 100 static int save_fd;
 101 static struct winsize winsize;
 102 static volatile int dead;
 103 static volatile pid_t child_pid = -1;
 104 static int interactive = 0;
 105 static priv_set_t *dropprivs;
 106 
 107 static int nocmdchar = 0;
 108 static int failsafe = 0;
 109 static int disconnect = 0;
 110 static char cmdchar = '~';
 111 static int quiet = 0;
 112 
 113 static int pollerr = 0;
 114 
 115 static const char *pname;
 116 static char *username;
 117 
 118 /*
 119  * When forced_login is true, the user is not prompted
 120  * for an authentication password in the target zone.
 121  */
 122 static boolean_t forced_login = B_FALSE;
 123 
 124 #if !defined(TEXT_DOMAIN)               /* should be defined by cc -D */
 125 #define TEXT_DOMAIN     "SYS_TEST"      /* Use this only if it wasn't */
 126 #endif
 127 
 128 #define SUPATH  "/usr/bin/su"
 129 #define FAILSAFESHELL   "/sbin/sh"
 130 #define DEFAULTSHELL    "/sbin/sh"
 131 #define DEF_PATH        "/usr/sbin:/usr/bin"
 132 
 133 #define CLUSTER_BRAND_NAME      "cluster"
 134 
 135 /*
 136  * The ZLOGIN_BUFSIZ is larger than PIPE_BUF so we can be sure we're clearing
 137  * out the pipe when the child is exiting.  The ZLOGIN_RDBUFSIZ must be less
 138  * than ZLOGIN_BUFSIZ (because we share the buffer in doio).  This value is
 139  * also chosen in conjunction with the HI_WATER setting to make sure we
 140  * don't fill up the pipe.  We can write FIFOHIWAT (16k) into the pipe before
 141  * blocking.  By having ZLOGIN_RDBUFSIZ set to 1k and HI_WATER set to 8k, we
 142  * know we can always write a ZLOGIN_RDBUFSIZ chunk into the pipe when there
 143  * is less than HI_WATER data already in the pipe.
 144  */
 145 #define ZLOGIN_BUFSIZ   8192
 146 #define ZLOGIN_RDBUFSIZ 1024
 147 #define HI_WATER        8192
 148 
 149 /*
 150  * See canonify() below.  CANONIFY_LEN is the maximum length that a
 151  * "canonical" sequence will expand to (backslash, three octal digits, NUL).
 152  */
 153 #define CANONIFY_LEN 5
 154 
 155 static void
 156 usage(void)
 157 {
 158         (void) fprintf(stderr, gettext("usage: %s [ -dnQCES ] [ -e cmdchar ] "
 159             "[-l user] zonename [command [args ...] ]\n"), pname);
 160         exit(2);
 161 }
 162 
 163 static const char *
 164 getpname(const char *arg0)
 165 {
 166         const char *p = strrchr(arg0, '/');
 167 
 168         if (p == NULL)
 169                 p = arg0;
 170         else
 171                 p++;
 172 
 173         pname = p;
 174         return (p);
 175 }
 176 
 177 static void
 178 zerror(const char *fmt, ...)
 179 {
 180         va_list alist;
 181 
 182         (void) fprintf(stderr, "%s: ", pname);
 183         va_start(alist, fmt);
 184         (void) vfprintf(stderr, fmt, alist);
 185         va_end(alist);
 186         (void) fprintf(stderr, "\n");
 187 }
 188 
 189 static void
 190 zperror(const char *str)
 191 {
 192         const char *estr;
 193 
 194         if ((estr = strerror(errno)) != NULL)
 195                 (void) fprintf(stderr, "%s: %s: %s\n", pname, str, estr);
 196         else
 197                 (void) fprintf(stderr, "%s: %s: errno %d\n", pname, str, errno);
 198 }
 199 
 200 /*
 201  * The first part of our privilege dropping scheme needs to be called before
 202  * fork(), since we must have it for security; we don't want to be surprised
 203  * later that we couldn't allocate the privset.
 204  */
 205 static int
 206 prefork_dropprivs()
 207 {
 208         if ((dropprivs = priv_allocset()) == NULL)
 209                 return (1);
 210 
 211         priv_basicset(dropprivs);
 212         (void) priv_delset(dropprivs, PRIV_PROC_INFO);
 213         (void) priv_delset(dropprivs, PRIV_PROC_FORK);
 214         (void) priv_delset(dropprivs, PRIV_PROC_EXEC);
 215         (void) priv_delset(dropprivs, PRIV_FILE_LINK_ANY);
 216 
 217         /*
 218          * We need to keep the basic privilege PROC_SESSION and all unknown
 219          * basic privileges as well as the privileges PROC_ZONE and
 220          * PROC_OWNER in order to query session information and
 221          * send signals.
 222          */
 223         if (interactive == 0) {
 224                 (void) priv_addset(dropprivs, PRIV_PROC_ZONE);
 225                 (void) priv_addset(dropprivs, PRIV_PROC_OWNER);
 226         } else {
 227                 (void) priv_delset(dropprivs, PRIV_PROC_SESSION);
 228         }
 229 
 230         return (0);
 231 }
 232 
 233 /*
 234  * The second part of the privilege drop.  We are paranoid about being attacked
 235  * by the zone, so we drop all privileges.  This should prevent a compromise
 236  * which gets us to fork(), exec(), symlink(), etc.
 237  */
 238 static void
 239 postfork_dropprivs()
 240 {
 241         if ((setppriv(PRIV_SET, PRIV_PERMITTED, dropprivs)) == -1) {
 242                 zperror(gettext("Warning: could not set permitted privileges"));
 243         }
 244         if ((setppriv(PRIV_SET, PRIV_LIMIT, dropprivs)) == -1) {
 245                 zperror(gettext("Warning: could not set limit privileges"));
 246         }
 247         if ((setppriv(PRIV_SET, PRIV_INHERITABLE, dropprivs)) == -1) {
 248                 zperror(gettext("Warning: could not set inheritable "
 249                     "privileges"));
 250         }
 251 }
 252 
 253 /*
 254  * Create the unix domain socket and call the zoneadmd server; handshake
 255  * with it to determine whether it will allow us to connect.
 256  */
 257 static int
 258 get_console_master(const char *zname)
 259 {
 260         int sockfd = -1;
 261         struct sockaddr_un servaddr;
 262         char clientid[MAXPATHLEN];
 263         char handshake[MAXPATHLEN], c;
 264         int msglen;
 265         int i = 0, err = 0;
 266 
 267         if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
 268                 zperror(gettext("could not create socket"));
 269                 return (-1);
 270         }
 271 
 272         bzero(&servaddr, sizeof (servaddr));
 273         servaddr.sun_family = AF_UNIX;
 274         (void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path),
 275             "%s/%s.console_sock", ZONES_TMPDIR, zname);
 276 
 277         if (connect(sockfd, (struct sockaddr *)&servaddr,
 278             sizeof (servaddr)) == -1) {
 279                 zperror(gettext("Could not connect to zone console"));
 280                 goto bad;
 281         }
 282         masterfd = sockfd;
 283 
 284         msglen = snprintf(clientid, sizeof (clientid), "IDENT %lu %s %d\n",
 285             getpid(), setlocale(LC_MESSAGES, NULL), disconnect);
 286 
 287         if (msglen >= sizeof (clientid) || msglen < 0) {
 288                 zerror("protocol error");
 289                 goto bad;
 290         }
 291 
 292         if (write(masterfd, clientid, msglen) != msglen) {
 293                 zerror("protocol error");
 294                 goto bad;
 295         }
 296 
 297         bzero(handshake, sizeof (handshake));
 298 
 299         /*
 300          * Take care not to accumulate more than our fill, and leave room for
 301          * the NUL at the end.
 302          */
 303         while ((err = read(masterfd, &c, 1)) == 1) {
 304                 if (i >= (sizeof (handshake) - 1))
 305                         break;
 306                 if (c == '\n')
 307                         break;
 308                 handshake[i] = c;
 309                 i++;
 310         }
 311 
 312         /*
 313          * If something went wrong during the handshake we bail; perhaps
 314          * the server died off.
 315          */
 316         if (err == -1) {
 317                 zperror(gettext("Could not connect to zone console"));
 318                 goto bad;
 319         }
 320 
 321         if (strncmp(handshake, "OK", sizeof (handshake)) == 0)
 322                 return (0);
 323 
 324         zerror(gettext("Console is already in use by process ID %s."),
 325             handshake);
 326 bad:
 327         (void) close(sockfd);
 328         masterfd = -1;
 329         return (-1);
 330 }
 331 
 332 
 333 /*
 334  * Routines to handle pty creation upon zone entry and to shuttle I/O back
 335  * and forth between the two terminals.  We also compute and store the
 336  * name of the slave terminal associated with the master side.
 337  */
 338 static int
 339 get_master_pty()
 340 {
 341         if ((masterfd = open("/dev/ptmx", O_RDWR|O_NONBLOCK)) < 0) {
 342                 zperror(gettext("failed to obtain a pseudo-tty"));
 343                 return (-1);
 344         }
 345         if (tcgetattr(STDIN_FILENO, &save_termios) == -1) {
 346                 zperror(gettext("failed to get terminal settings from stdin"));
 347                 return (-1);
 348         }
 349         (void) ioctl(STDIN_FILENO, TIOCGWINSZ, (char *)&winsize);
 350 
 351         return (0);
 352 }
 353 
 354 /*
 355  * This is a bit tricky; normally a pts device will belong to the zone it
 356  * is granted to.  But in the case of "entering" a zone, we need to establish
 357  * the pty before entering the zone so that we can vector I/O to and from it
 358  * from the global zone.
 359  *
 360  * We use the zonept() call to let the ptm driver know what we are up to;
 361  * the only other hairy bit is the setting of zoneslavename (which happens
 362  * above, in get_master_pty()).
 363  */
 364 static int
 365 init_slave_pty(zoneid_t zoneid, char *devroot)
 366 {
 367         int slavefd = -1;
 368         char *slavename, zoneslavename[MAXPATHLEN];
 369 
 370         /*
 371          * Set slave permissions, zone the pts, then unlock it.
 372          */
 373         if (grantpt(masterfd) != 0) {
 374                 zperror(gettext("grantpt failed"));
 375                 return (-1);
 376         }
 377 
 378         if (unlockpt(masterfd) != 0) {
 379                 zperror(gettext("unlockpt failed"));
 380                 return (-1);
 381         }
 382 
 383         /*
 384          * We must open the slave side before zoning this pty; otherwise
 385          * the kernel would refuse us the open-- zoning a pty makes it
 386          * inaccessible to the global zone.  Note we are trying to open
 387          * the device node via the $ZONEROOT/dev path for this pty.
 388          *
 389          * Later we'll close the slave out when once we've opened it again
 390          * from within the target zone.  Blarg.
 391          */
 392         if ((slavename = ptsname(masterfd)) == NULL) {
 393                 zperror(gettext("failed to get name for pseudo-tty"));
 394                 return (-1);
 395         }
 396 
 397         (void) snprintf(zoneslavename, sizeof (zoneslavename), "%s%s",
 398             devroot, slavename);
 399 
 400         if ((slavefd = open(zoneslavename, O_RDWR)) < 0) {
 401                 zerror(gettext("failed to open %s: %s"), zoneslavename,
 402                     strerror(errno));
 403                 return (-1);
 404         }
 405 
 406         /*
 407          * Push hardware emulation (ptem), line discipline (ldterm),
 408          * and V7/4BSD/Xenix compatibility (ttcompat) modules.
 409          */
 410         if (ioctl(slavefd, I_PUSH, "ptem") == -1) {
 411                 zperror(gettext("failed to push ptem module"));
 412                 if (!failsafe)
 413                         goto bad;
 414         }
 415 
 416         /*
 417          * Anchor the stream to prevent malicious I_POPs; we prefer to do
 418          * this prior to entering the zone so that we can detect any errors
 419          * early, and so that we can set the anchor from the global zone.
 420          */
 421         if (ioctl(slavefd, I_ANCHOR) == -1) {
 422                 zperror(gettext("failed to set stream anchor"));
 423                 if (!failsafe)
 424                         goto bad;
 425         }
 426 
 427         if (ioctl(slavefd, I_PUSH, "ldterm") == -1) {
 428                 zperror(gettext("failed to push ldterm module"));
 429                 if (!failsafe)
 430                         goto bad;
 431         }
 432         if (ioctl(slavefd, I_PUSH, "ttcompat") == -1) {
 433                 zperror(gettext("failed to push ttcompat module"));
 434                 if (!failsafe)
 435                         goto bad;
 436         }
 437 
 438         /*
 439          * Propagate terminal settings from the external term to the new one.
 440          */
 441         if (tcsetattr(slavefd, TCSAFLUSH, &save_termios) == -1) {
 442                 zperror(gettext("failed to set terminal settings"));
 443                 if (!failsafe)
 444                         goto bad;
 445         }
 446         (void) ioctl(slavefd, TIOCSWINSZ, (char *)&winsize);
 447 
 448         if (zonept(masterfd, zoneid) != 0) {
 449                 zperror(gettext("could not set zoneid of pty"));
 450                 goto bad;
 451         }
 452 
 453         return (slavefd);
 454 
 455 bad:
 456         (void) close(slavefd);
 457         return (-1);
 458 }
 459 
 460 /*
 461  * Place terminal into raw mode.
 462  */
 463 static int
 464 set_tty_rawmode(int fd)
 465 {
 466         struct termios term;
 467         if (tcgetattr(fd, &term) < 0) {
 468                 zperror(gettext("failed to get user terminal settings"));
 469                 return (-1);
 470         }
 471 
 472         /* Stash for later, so we can revert back to previous mode */
 473         save_termios = term;
 474         save_fd = fd;
 475 
 476         /* disable 8->7 bit strip, start/stop, enable any char to restart */
 477         term.c_iflag &= ~(ISTRIP|IXON|IXANY);
 478         /* disable NL->CR, CR->NL, ignore CR, UPPER->lower */
 479         term.c_iflag &= ~(INLCR|ICRNL|IGNCR|IUCLC);
 480         /* disable output post-processing */
 481         term.c_oflag &= ~OPOST;
 482         /* disable canonical mode, signal chars, echo & extended functions */
 483         term.c_lflag &= ~(ICANON|ISIG|ECHO|IEXTEN);
 484 
 485         term.c_cc[VMIN] = 1;    /* byte-at-a-time */
 486         term.c_cc[VTIME] = 0;
 487 
 488         if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &term)) {
 489                 zperror(gettext("failed to set user terminal to raw mode"));
 490                 return (-1);
 491         }
 492 
 493         /*
 494          * We need to know the value of VEOF so that we can properly process for
 495          * client-side ~<EOF>.  But we have obliterated VEOF in term,
 496          * because VMIN overloads the same array slot in non-canonical mode.
 497          * Stupid @&^%!
 498          *
 499          * So here we construct the "effective" termios from the current
 500          * terminal settings, and the corrected VEOF and VEOL settings.
 501          */
 502         if (tcgetattr(STDIN_FILENO, &effective_termios) < 0) {
 503                 zperror(gettext("failed to get user terminal settings"));
 504                 return (-1);
 505         }
 506         effective_termios.c_cc[VEOF] = save_termios.c_cc[VEOF];
 507         effective_termios.c_cc[VEOL] = save_termios.c_cc[VEOL];
 508 
 509         return (0);
 510 }
 511 
 512 /*
 513  * Copy terminal window size from our terminal to the pts.
 514  */
 515 /*ARGSUSED*/
 516 static void
 517 sigwinch(int s)
 518 {
 519         struct winsize ws;
 520 
 521         if (ioctl(0, TIOCGWINSZ, &ws) == 0)
 522                 (void) ioctl(masterfd, TIOCSWINSZ, &ws);
 523 }
 524 
 525 static volatile int close_on_sig = -1;
 526 
 527 static void
 528 /*ARGSUSED*/
 529 sigcld(int s)
 530 {
 531         int status;
 532         pid_t pid;
 533 
 534         /*
 535          * Peek at the exit status.  If this isn't the process we cared
 536          * about, then just reap it.
 537          */
 538         if ((pid = waitpid(child_pid, &status, WNOHANG|WNOWAIT)) != -1) {
 539                 if (pid == child_pid &&
 540                     (WIFEXITED(status) || WIFSIGNALED(status))) {
 541                         dead = 1;
 542                         if (close_on_sig != -1) {
 543                                 (void) write(close_on_sig, "a", 1);
 544                                 (void) close(close_on_sig);
 545                                 close_on_sig = -1;
 546                         }
 547                 } else {
 548                         (void) waitpid(pid, &status, WNOHANG);
 549                 }
 550         }
 551 }
 552 
 553 /*
 554  * Some signals (currently, SIGINT) must be forwarded on to the process
 555  * group of the child process.
 556  */
 557 static void
 558 sig_forward(int s)
 559 {
 560         if (child_pid != -1) {
 561                 (void) sigsend(P_PGID, child_pid, s);
 562         }
 563 }
 564 
 565 /*
 566  * reset terminal settings for global environment
 567  */
 568 static void
 569 reset_tty()
 570 {
 571         (void) tcsetattr(save_fd, TCSADRAIN, &save_termios);
 572 }
 573 
 574 /*
 575  * Convert character to printable representation, for display with locally
 576  * echoed command characters (like when we need to display ~^D)
 577  */
 578 static void
 579 canonify(char c, char *cc)
 580 {
 581         if (isprint(c)) {
 582                 cc[0] = c;
 583                 cc[1] = '\0';
 584         } else if (c >= 0 && c <= 31) {   /* ^@ through ^_ */
 585                 cc[0] = '^';
 586                 cc[1] = c + '@';
 587                 cc[2] = '\0';
 588         } else {
 589                 cc[0] = '\\';
 590                 cc[1] = ((c >> 6) & 7) + '0';
 591                 cc[2] = ((c >> 3) & 7) + '0';
 592                 cc[3] = (c & 7) + '0';
 593                 cc[4] = '\0';
 594         }
 595 }
 596 
 597 /*
 598  * process_user_input watches the input stream for the escape sequence for
 599  * 'quit' (by default, tilde-period).  Because we might be fed just one
 600  * keystroke at a time, state associated with the user input (are we at the
 601  * beginning of the line?  are we locally echoing the next character?) is
 602  * maintained by beginning_of_line and local_echo across calls to the routine.
 603  * If the write to outfd fails, we'll try to read from infd in an attempt
 604  * to prevent deadlock between the two processes.
 605  *
 606  * This routine returns -1 when the 'quit' escape sequence has been issued,
 607  * or an error is encountered, 1 if stdin is EOF, and 0 otherwise.
 608  */
 609 static int
 610 process_user_input(int outfd, int infd)
 611 {
 612         static boolean_t beginning_of_line = B_TRUE;
 613         static boolean_t local_echo = B_FALSE;
 614         char ibuf[ZLOGIN_BUFSIZ];
 615         int nbytes;
 616         char *buf = ibuf;
 617 
 618         nbytes = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
 619         if (nbytes == -1 && (errno != EINTR || dead))
 620                 return (-1);
 621 
 622         if (nbytes == -1)       /* The read was interrupted. */
 623                 return (0);
 624 
 625         /* 0 read means EOF, close the pipe to the child */
 626         if (nbytes == 0)
 627                 return (1);
 628 
 629         for (char c = *buf; nbytes > 0; c = *buf, --nbytes) {
 630                 buf++;
 631                 if (beginning_of_line && !nocmdchar) {
 632                         beginning_of_line = B_FALSE;
 633                         if (c == cmdchar) {
 634                                 local_echo = B_TRUE;
 635                                 continue;
 636                         }
 637                 } else if (local_echo) {
 638                         local_echo = B_FALSE;
 639                         if (c == '.' || c == effective_termios.c_cc[VEOF]) {
 640                                 char cc[CANONIFY_LEN];
 641 
 642                                 canonify(c, cc);
 643                                 (void) write(STDOUT_FILENO, &cmdchar, 1);
 644                                 (void) write(STDOUT_FILENO, cc, strlen(cc));
 645                                 return (-1);
 646                         }
 647                 }
 648 retry:
 649                 if (write(outfd, &c, 1) <= 0) {
 650                         /*
 651                          * Since the fd we are writing to is opened with
 652                          * O_NONBLOCK it is possible to get EAGAIN if the
 653                          * pipe is full.  One way this could happen is if we
 654                          * are writing a lot of data into the pipe in this loop
 655                          * and the application on the other end is echoing that
 656                          * data back out to its stdout.  The output pipe can
 657                          * fill up since we are stuck here in this loop and not
 658                          * draining the other pipe.  We can try to read some of
 659                          * the data to see if we can drain the pipe so that the
 660                          * application can continue to make progress.  The read
 661                          * is non-blocking so we won't hang here.  We also wait
 662                          * a bit before retrying since there could be other
 663                          * reasons why the pipe is full and we don't want to
 664                          * continuously retry.
 665                          */
 666                         if (errno == EAGAIN) {
 667                                 struct timespec rqtp;
 668                                 int ln;
 669                                 char obuf[ZLOGIN_BUFSIZ];
 670 
 671                                 if ((ln = read(infd, obuf, ZLOGIN_BUFSIZ)) > 0)
 672                                         (void) write(STDOUT_FILENO, obuf, ln);
 673 
 674                                 /* sleep for 10 milliseconds */
 675                                 rqtp.tv_sec = 0;
 676                                 rqtp.tv_nsec = MSEC2NSEC(10);
 677                                 (void) nanosleep(&rqtp, NULL);
 678                                 if (!dead)
 679                                         goto retry;
 680                         }
 681 
 682                         return (-1);
 683                 }
 684                 beginning_of_line = (c == '\r' || c == '\n' ||
 685                     c == effective_termios.c_cc[VKILL] ||
 686                     c == effective_termios.c_cc[VEOL] ||
 687                     c == effective_termios.c_cc[VSUSP] ||
 688                     c == effective_termios.c_cc[VINTR]);
 689         }
 690         return (0);
 691 }
 692 
 693 /*
 694  * This function prevents deadlock between zlogin and the application in the
 695  * zone that it is talking to.  This can happen when we read from zlogin's
 696  * stdin and write the data down the pipe to the application.  If the pipe
 697  * is full, we'll block in the write.  Because zlogin could be blocked in
 698  * the write, it would never read the application's stdout/stderr so the
 699  * application can then block on those writes (when the pipe fills up).  If the
 700  * the application gets blocked this way, it can never get around to reading
 701  * its stdin so that zlogin can unblock from its write.  Once in this state,
 702  * the two processes are deadlocked.
 703  *
 704  * To prevent this, we want to verify that we can write into the pipe before we
 705  * read from our stdin.  If the pipe already is pretty full, we bypass the read
 706  * for now.  We'll circle back here again after the poll() so that we can
 707  * try again.  When this function is called, we already know there is data
 708  * ready to read on STDIN_FILENO.  We return -1 if there is a problem, 1 if
 709  * stdin is EOF, and 0 if everything is ok (even though we might not have
 710  * read/written any data into the pipe on this iteration).
 711  */
 712 static int
 713 process_raw_input(int stdin_fd, int appin_fd)
 714 {
 715         int cc;
 716         struct stat64 sb;
 717         char ibuf[ZLOGIN_RDBUFSIZ];
 718 
 719         /* Check how much data is already in the pipe */
 720         if (fstat64(appin_fd, &sb) == -1) {
 721                 perror("stat failed");
 722                 return (-1);
 723         }
 724 
 725         if (dead)
 726                 return (-1);
 727 
 728         /*
 729          * The pipe already has a lot of data in it,  don't write any more
 730          * right now.
 731          */
 732         if (sb.st_size >= HI_WATER)
 733                 return (0);
 734 
 735         cc = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
 736         if (cc == -1 && (errno != EINTR || dead))
 737                 return (-1);
 738 
 739         if (cc == -1)   /* The read was interrupted. */
 740                 return (0);
 741 
 742         /* 0 read means EOF, close the pipe to the child */
 743         if (cc == 0)
 744                 return (1);
 745 
 746         /*
 747          * stdin_fd is stdin of the target; so, the thing we'll write the user
 748          * data *to*.
 749          */
 750         if (write(stdin_fd, ibuf, cc) == -1)
 751                 return (-1);
 752 
 753         return (0);
 754 }
 755 
 756 /*
 757  * Write the output from the application running in the zone.  We can get
 758  * a signal during the write (usually it would be SIGCHLD when the application
 759  * has exited) so we loop to make sure we have written all of the data we read.
 760  */
 761 static int
 762 process_output(int in_fd, int out_fd)
 763 {
 764         int wrote = 0;
 765         int cc;
 766         char ibuf[ZLOGIN_BUFSIZ];
 767 
 768         cc = read(in_fd, ibuf, ZLOGIN_BUFSIZ);
 769         if (cc == -1 && (errno != EINTR || dead))
 770                 return (-1);
 771         if (cc == 0)
 772                 return (-1);    /* EOF */
 773         if (cc == -1)   /* The read was interrupted. */
 774                 return (0);
 775 
 776         do {
 777                 int len;
 778 
 779                 len = write(out_fd, ibuf + wrote, cc - wrote);
 780                 if (len == -1 && errno != EINTR)
 781                         return (-1);
 782                 if (len != -1)
 783                         wrote += len;
 784         } while (wrote < cc);
 785 
 786         return (0);
 787 }
 788 
 789 /*
 790  * This is the main I/O loop, and is shared across all zlogin modes.
 791  * Parameters:
 792  *      stdin_fd:  The fd representing 'stdin' for the slave side; input to
 793  *                 the zone will be written here.
 794  *
 795  *      appin_fd:  The fd representing the other end of the 'stdin' pipe (when
 796  *                 we're running non-interactive); used in process_raw_input
 797  *                 to ensure we don't fill up the application's stdin pipe.
 798  *
 799  *      stdout_fd: The fd representing 'stdout' for the slave side; output
 800  *                 from the zone will arrive here.
 801  *
 802  *      stderr_fd: The fd representing 'stderr' for the slave side; output
 803  *                 from the zone will arrive here.
 804  *
 805  *      raw_mode:  If TRUE, then no processing (for example, for '~.') will
 806  *                 be performed on the input coming from STDIN.
 807  *
 808  * stderr_fd may be specified as -1 if there is no stderr (only non-interactive
 809  * mode supplies a stderr).
 810  *
 811  */
 812 static void
 813 doio(int stdin_fd, int appin_fd, int stdout_fd, int stderr_fd, int sig_fd,
 814     boolean_t raw_mode)
 815 {
 816         struct pollfd pollfds[4];
 817         char ibuf[ZLOGIN_BUFSIZ];
 818         int cc, ret;
 819 
 820         /* read from stdout of zone and write to stdout of global zone */
 821         pollfds[0].fd = stdout_fd;
 822         pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
 823 
 824         /* read from stderr of zone and write to stderr of global zone */
 825         pollfds[1].fd = stderr_fd;
 826         pollfds[1].events = pollfds[0].events;
 827 
 828         /* read from stdin of global zone and write to stdin of zone */
 829         pollfds[2].fd = STDIN_FILENO;
 830         pollfds[2].events = pollfds[0].events;
 831 
 832         /* read from signalling pipe so we know when child dies */
 833         pollfds[3].fd = sig_fd;
 834         pollfds[3].events = pollfds[0].events;
 835 
 836         for (;;) {
 837                 pollfds[0].revents = pollfds[1].revents =
 838                     pollfds[2].revents = pollfds[3].revents = 0;
 839 
 840                 if (dead)
 841                         break;
 842 
 843                 /*
 844                  * There is a race condition here where we can receive the
 845                  * child death signal, set the dead flag, but since we have
 846                  * passed the test above, we would go into poll and hang.
 847                  * To avoid this we use the sig_fd as an additional poll fd.
 848                  * The signal handler writes into the other end of this pipe
 849                  * when the child dies so that the poll will always see that
 850                  * input and proceed.  We just loop around at that point and
 851                  * then notice the dead flag.
 852                  */
 853 
 854                 ret = poll(pollfds,
 855                     sizeof (pollfds) / sizeof (struct pollfd), -1);
 856 
 857                 if (ret == -1 && errno != EINTR) {
 858                         perror("poll failed");
 859                         break;
 860                 }
 861 
 862                 if (errno == EINTR && dead) {
 863                         break;
 864                 }
 865 
 866                 /* event from master side stdout */
 867                 if (pollfds[0].revents) {
 868                         if (pollfds[0].revents &
 869                             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 870                                 if (process_output(stdout_fd, STDOUT_FILENO)
 871                                     != 0)
 872                                         break;
 873                         } else {
 874                                 pollerr = pollfds[0].revents;
 875                                 break;
 876                         }
 877                 }
 878 
 879                 /* event from master side stderr */
 880                 if (pollfds[1].revents) {
 881                         if (pollfds[1].revents &
 882                             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 883                                 if (process_output(stderr_fd, STDERR_FILENO)
 884                                     != 0)
 885                                         break;
 886                         } else {
 887                                 pollerr = pollfds[1].revents;
 888                                 break;
 889                         }
 890                 }
 891 
 892                 /* event from user STDIN side */
 893                 if (pollfds[2].revents) {
 894                         if (pollfds[2].revents &
 895                             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 896                                 /*
 897                                  * stdin fd is stdin of the target; so,
 898                                  * the thing we'll write the user data *to*.
 899                                  *
 900                                  * Also, unlike on the output side, we
 901                                  * close the pipe on a zero-length message.
 902                                  */
 903                                 int res;
 904 
 905                                 if (raw_mode)
 906                                         res = process_raw_input(stdin_fd,
 907                                             appin_fd);
 908                                 else
 909                                         res = process_user_input(stdin_fd,
 910                                             stdout_fd);
 911 
 912                                 if (res < 0)
 913                                         break;
 914                                 if (res > 0) {
 915                                         /* EOF (close) child's stdin_fd */
 916                                         pollfds[2].fd = -1;
 917                                         while ((res = close(stdin_fd)) != 0 &&
 918                                             errno == EINTR)
 919                                                 ;
 920                                         if (res != 0)
 921                                                 break;
 922                                 }
 923 
 924                         } else if (raw_mode && pollfds[2].revents & POLLHUP) {
 925                                 /*
 926                                  * It's OK to get a POLLHUP on STDIN-- it
 927                                  * always happens if you do:
 928                                  *
 929                                  * echo foo | zlogin <zone> <command>
 930                                  *
 931                                  * We reset fd to -1 in this case to clear
 932                                  * the condition and close the pipe (EOF) to
 933                                  * the other side in order to wrap things up.
 934                                  */
 935                                 int res;
 936 
 937                                 pollfds[2].fd = -1;
 938                                 while ((res = close(stdin_fd)) != 0 &&
 939                                     errno == EINTR)
 940                                         ;
 941                                 if (res != 0)
 942                                         break;
 943                         } else {
 944                                 pollerr = pollfds[2].revents;
 945                                 break;
 946                         }
 947                 }
 948         }
 949 
 950         /*
 951          * We are in the midst of dying, but try to poll with a short
 952          * timeout to see if we can catch the last bit of I/O from the
 953          * children.
 954          */
 955 retry:
 956         pollfds[0].revents = pollfds[1].revents = 0;
 957         (void) poll(pollfds, 2, 100);
 958         if (pollfds[0].revents &
 959             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 960                 if ((cc = read(stdout_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
 961                         (void) write(STDOUT_FILENO, ibuf, cc);
 962                         goto retry;
 963                 }
 964         }
 965         if (pollfds[1].revents &
 966             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 967                 if ((cc = read(stderr_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
 968                         (void) write(STDERR_FILENO, ibuf, cc);
 969                         goto retry;
 970                 }
 971         }
 972 }
 973 
 974 /*
 975  * Fetch the user_cmd brand hook for getting a user's passwd(4) entry.
 976  */
 977 static const char *
 978 zone_get_user_cmd(brand_handle_t bh, const char *login, char *user_cmd,
 979     size_t len)
 980 {
 981         bzero(user_cmd, sizeof (user_cmd));
 982         if (brand_get_user_cmd(bh, login, user_cmd, len) != 0)
 983                 return (NULL);
 984 
 985         return (user_cmd);
 986 }
 987 
 988 /* From libc */
 989 extern int str2passwd(const char *, int, void *, char *, int);
 990 
 991 /*
 992  * exec() the user_cmd brand hook, and convert the output string to a
 993  * struct passwd.  This is to be called after zone_enter().
 994  *
 995  */
 996 static struct passwd *
 997 zone_get_user_pw(const char *user_cmd, struct passwd *pwent, char *pwbuf,
 998     int pwbuflen)
 999 {
1000         char pwline[NSS_BUFLEN_PASSWD];
1001         char *cin = NULL;
1002         FILE *fin;
1003         int status;
1004 
1005         assert(getzoneid() != GLOBAL_ZONEID);
1006 
1007         if ((fin = popen(user_cmd, "r")) == NULL)
1008                 return (NULL);
1009 
1010         while (cin == NULL && !feof(fin))
1011                 cin = fgets(pwline, sizeof (pwline), fin);
1012 
1013         if (cin == NULL) {
1014                 (void) pclose(fin);
1015                 return (NULL);
1016         }
1017 
1018         status = pclose(fin);
1019         if (!WIFEXITED(status))
1020                 return (NULL);
1021         if (WEXITSTATUS(status) != 0)
1022                 return (NULL);
1023 
1024         if (str2passwd(pwline, sizeof (pwline), pwent, pwbuf, pwbuflen) == 0)
1025                 return (pwent);
1026         else
1027                 return (NULL);
1028 }
1029 
1030 static char **
1031 zone_login_cmd(brand_handle_t bh, const char *login)
1032 {
1033         static char result_buf[ARG_MAX];
1034         char **new_argv, *ptr, *lasts;
1035         int n, a;
1036 
1037         /* Get the login command for the target zone. */
1038         bzero(result_buf, sizeof (result_buf));
1039 
1040         if (forced_login) {
1041                 if (brand_get_forcedlogin_cmd(bh, login,
1042                     result_buf, sizeof (result_buf)) != 0)
1043                         return (NULL);
1044         } else {
1045                 if (brand_get_login_cmd(bh, login,
1046                     result_buf, sizeof (result_buf)) != 0)
1047                         return (NULL);
1048         }
1049 
1050         /*
1051          * We got back a string that we'd like to execute.  But since
1052          * we're not doing the execution via a shell we'll need to convert
1053          * the exec string to an array of strings.  We'll do that here
1054          * but we're going to be very simplistic about it and break stuff
1055          * up based on spaces.  We're not even going to support any kind
1056          * of quoting or escape characters.  It's truly amazing that
1057          * there is no library function in OpenSolaris to do this for us.
1058          */
1059 
1060         /*
1061          * Be paranoid.  Since we're deliniating based on spaces make
1062          * sure there are no adjacent spaces.
1063          */
1064         if (strstr(result_buf, "  ") != NULL)
1065                 return (NULL);
1066 
1067         /* Remove any trailing whitespace.  */
1068         n = strlen(result_buf);
1069         if (result_buf[n - 1] == ' ')
1070                 result_buf[n - 1] = '\0';
1071 
1072         /* Count how many elements there are in the exec string. */
1073         ptr = result_buf;
1074         for (n = 2; ((ptr = strchr(ptr + 1, (int)' ')) != NULL); n++)
1075                 ;
1076 
1077         /* Allocate the argv array that we're going to return. */
1078         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1079                 return (NULL);
1080 
1081         /* Tokenize the exec string and return. */
1082         a = 0;
1083         new_argv[a++] = result_buf;
1084         if (n > 2) {
1085                 (void) strtok_r(result_buf, " ", &lasts);
1086                 while ((new_argv[a++] = strtok_r(NULL, " ", &lasts)) != NULL)
1087                         ;
1088         } else {
1089                 new_argv[a++] = NULL;
1090         }
1091         assert(n == a);
1092         return (new_argv);
1093 }
1094 
1095 /*
1096  * Prepare argv array for exec'd process; if we're passing commands to the
1097  * new process, then use su(1M) to do the invocation.  Otherwise, use
1098  * 'login -z <from_zonename> -f' (-z is an undocumented option which tells
1099  * login that we're coming from another zone, and to disregard its CONSOLE
1100  * checks).
1101  */
1102 static char **
1103 prep_args(brand_handle_t bh, const char *login, char **argv)
1104 {
1105         int argc = 0, a = 0, i, n = -1;
1106         char **new_argv;
1107 
1108         if (argv != NULL) {
1109                 size_t subshell_len = 1;
1110                 char *subshell;
1111 
1112                 while (argv[argc] != NULL)
1113                         argc++;
1114 
1115                 for (i = 0; i < argc; i++) {
1116                         subshell_len += strlen(argv[i]) + 1;
1117                 }
1118                 if ((subshell = calloc(1, subshell_len)) == NULL)
1119                         return (NULL);
1120 
1121                 for (i = 0; i < argc; i++) {
1122                         (void) strcat(subshell, argv[i]);
1123                         (void) strcat(subshell, " ");
1124                 }
1125 
1126                 if (failsafe) {
1127                         n = 4;
1128                         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1129                                 return (NULL);
1130 
1131                         new_argv[a++] = FAILSAFESHELL;
1132                 } else {
1133                         n = 5;
1134                         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1135                                 return (NULL);
1136 
1137                         new_argv[a++] = SUPATH;
1138                         if (strcmp(login, "root") != 0) {
1139                                 new_argv[a++] = "-";
1140                                 n++;
1141                         }
1142                         new_argv[a++] = (char *)login;
1143                 }
1144                 new_argv[a++] = "-c";
1145                 new_argv[a++] = subshell;
1146                 new_argv[a++] = NULL;
1147                 assert(a == n);
1148         } else {
1149                 if (failsafe) {
1150                         n = 2;
1151                         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1152                                 return (NULL);
1153                         new_argv[a++] = FAILSAFESHELL;
1154                         new_argv[a++] = NULL;
1155                         assert(n == a);
1156                 } else {
1157                         new_argv = zone_login_cmd(bh, login);
1158                 }
1159         }
1160 
1161         return (new_argv);
1162 }
1163 
1164 /*
1165  * Helper routine for prep_env below.
1166  */
1167 static char *
1168 add_env(char *name, char *value)
1169 {
1170         size_t sz = strlen(name) + strlen(value) + 2; /* name, =, value, NUL */
1171         char *str;
1172 
1173         if ((str = malloc(sz)) == NULL)
1174                 return (NULL);
1175 
1176         (void) snprintf(str, sz, "%s=%s", name, value);
1177         return (str);
1178 }
1179 
1180 /*
1181  * Prepare envp array for exec'd process.
1182  */
1183 static char **
1184 prep_env()
1185 {
1186         int e = 0, size = 1;
1187         char **new_env, *estr;
1188         char *term = getenv("TERM");
1189 
1190         size++; /* for $PATH */
1191         if (term != NULL)
1192                 size++;
1193 
1194         /*
1195          * In failsafe mode we set $HOME, since '-l' isn't valid in this mode.
1196          * We also set $SHELL, since neither login nor su will be around to do
1197          * it.
1198          */
1199         if (failsafe)
1200                 size += 2;
1201 
1202         if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1203                 return (NULL);
1204 
1205         if ((estr = add_env("PATH", DEF_PATH)) == NULL)
1206                 return (NULL);
1207         new_env[e++] = estr;
1208 
1209         if (term != NULL) {
1210                 if ((estr = add_env("TERM", term)) == NULL)
1211                         return (NULL);
1212                 new_env[e++] = estr;
1213         }
1214 
1215         if (failsafe) {
1216                 if ((estr = add_env("HOME", "/")) == NULL)
1217                         return (NULL);
1218                 new_env[e++] = estr;
1219 
1220                 if ((estr = add_env("SHELL", FAILSAFESHELL)) == NULL)
1221                         return (NULL);
1222                 new_env[e++] = estr;
1223         }
1224 
1225         new_env[e++] = NULL;
1226 
1227         assert(e == size);
1228 
1229         return (new_env);
1230 }
1231 
1232 /*
1233  * Finish the preparation of the envp array for exec'd non-interactive
1234  * zlogins.  This is called in the child process *after* we zone_enter(), since
1235  * it derives things we can only know within the zone, such as $HOME, $SHELL,
1236  * etc.  We need only do this in the non-interactive, mode, since otherwise
1237  * login(1) will do it.  We don't do this in failsafe mode, since it presents
1238  * additional ways in which the command could fail, and we'd prefer to avoid
1239  * that.
1240  */
1241 static char **
1242 prep_env_noninteractive(const char *user_cmd, char **env)
1243 {
1244         size_t size;
1245         char **new_env;
1246         int e, i;
1247         char *estr;
1248         char varmail[LOGNAME_MAX + 11]; /* strlen(/var/mail/) = 10, NUL */
1249         char pwbuf[NSS_BUFLEN_PASSWD + 1];
1250         struct passwd pwent;
1251         struct passwd *pw = NULL;
1252 
1253         assert(env != NULL);
1254         assert(failsafe == 0);
1255 
1256         /*
1257          * Exec the "user_cmd" brand hook to get a pwent for the
1258          * login user.  If this fails, HOME will be set to "/", SHELL
1259          * will be set to $DEFAULTSHELL, and we will continue to exec
1260          * SUPATH <login> -c <cmd>.
1261          */
1262         pw = zone_get_user_pw(user_cmd, &pwent, pwbuf, sizeof (pwbuf));
1263 
1264         /*
1265          * Get existing envp size.
1266          */
1267         for (size = 0; env[size] != NULL; size++)
1268                 ;
1269 
1270         e = size;
1271 
1272         /*
1273          * Finish filling out the environment; we duplicate the environment
1274          * setup described in login(1), for lack of a better precedent.
1275          */
1276         if (pw != NULL)
1277                 size += 3;      /* LOGNAME, HOME, MAIL */
1278         else
1279                 size += 1;      /* HOME */
1280 
1281         size++; /* always fill in SHELL */
1282         size++; /* terminating NULL */
1283 
1284         if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1285                 goto malloc_fail;
1286 
1287         /*
1288          * Copy existing elements of env into new_env.
1289          */
1290         for (i = 0; env[i] != NULL; i++) {
1291                 if ((new_env[i] = strdup(env[i])) == NULL)
1292                         goto malloc_fail;
1293         }
1294         assert(e == i);
1295 
1296         if (pw != NULL) {
1297                 if ((estr = add_env("LOGNAME", pw->pw_name)) == NULL)
1298                         goto malloc_fail;
1299                 new_env[e++] = estr;
1300 
1301                 if ((estr = add_env("HOME", pw->pw_dir)) == NULL)
1302                         goto malloc_fail;
1303                 new_env[e++] = estr;
1304 
1305                 if (chdir(pw->pw_dir) != 0)
1306                         zerror(gettext("Could not chdir to home directory "
1307                             "%s: %s"), pw->pw_dir, strerror(errno));
1308 
1309                 (void) snprintf(varmail, sizeof (varmail), "/var/mail/%s",
1310                     pw->pw_name);
1311                 if ((estr = add_env("MAIL", varmail)) == NULL)
1312                         goto malloc_fail;
1313                 new_env[e++] = estr;
1314         } else {
1315                 if ((estr = add_env("HOME", "/")) == NULL)
1316                         goto malloc_fail;
1317                 new_env[e++] = estr;
1318         }
1319 
1320         if (pw != NULL && strlen(pw->pw_shell) > 0) {
1321                 if ((estr = add_env("SHELL", pw->pw_shell)) == NULL)
1322                         goto malloc_fail;
1323                 new_env[e++] = estr;
1324         } else {
1325                 if ((estr = add_env("SHELL", DEFAULTSHELL)) == NULL)
1326                         goto malloc_fail;
1327                 new_env[e++] = estr;
1328         }
1329 
1330         new_env[e++] = NULL;    /* add terminating NULL */
1331 
1332         assert(e == size);
1333         return (new_env);
1334 
1335 malloc_fail:
1336         zperror(gettext("failed to allocate memory for process environment"));
1337         return (NULL);
1338 }
1339 
1340 static int
1341 close_func(void *slavefd, int fd)
1342 {
1343         if (fd != *(int *)slavefd)
1344                 (void) close(fd);
1345         return (0);
1346 }
1347 
1348 static void
1349 set_cmdchar(char *cmdcharstr)
1350 {
1351         char c;
1352         long lc;
1353 
1354         if ((c = *cmdcharstr) != '\\') {
1355                 cmdchar = c;
1356                 return;
1357         }
1358 
1359         c = cmdcharstr[1];
1360         if (c == '\0' || c == '\\') {
1361                 cmdchar = '\\';
1362                 return;
1363         }
1364 
1365         if (c < '0' || c > '7') {
1366                 zerror(gettext("Unrecognized escape character option %s"),
1367                     cmdcharstr);
1368                 usage();
1369         }
1370 
1371         lc = strtol(cmdcharstr + 1, NULL, 8);
1372         if (lc < 0 || lc > 255) {
1373                 zerror(gettext("Octal escape character '%s' too large"),
1374                     cmdcharstr);
1375                 usage();
1376         }
1377         cmdchar = (char)lc;
1378 }
1379 
1380 static int
1381 setup_utmpx(char *slavename)
1382 {
1383         struct utmpx ut;
1384 
1385         bzero(&ut, sizeof (ut));
1386         (void) strncpy(ut.ut_user, ".zlogin", sizeof (ut.ut_user));
1387         (void) strncpy(ut.ut_line, slavename, sizeof (ut.ut_line));
1388         ut.ut_pid = getpid();
1389         ut.ut_id[0] = 'z';
1390         ut.ut_id[1] = ut.ut_id[2] = ut.ut_id[3] = (char)SC_WILDC;
1391         ut.ut_type = LOGIN_PROCESS;
1392         (void) time(&ut.ut_tv.tv_sec);
1393 
1394         if (makeutx(&ut) == NULL) {
1395                 zerror(gettext("makeutx failed"));
1396                 return (-1);
1397         }
1398         return (0);
1399 }
1400 
1401 static void
1402 release_lock_file(int lockfd)
1403 {
1404         (void) close(lockfd);
1405 }
1406 
1407 static int
1408 grab_lock_file(const char *zone_name, int *lockfd)
1409 {
1410         char pathbuf[PATH_MAX];
1411         struct flock flock;
1412 
1413         if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
1414                 zerror(gettext("could not mkdir %s: %s"), ZONES_TMPDIR,
1415                     strerror(errno));
1416                 return (-1);
1417         }
1418         (void) chmod(ZONES_TMPDIR, S_IRWXU);
1419         (void) snprintf(pathbuf, sizeof (pathbuf), "%s/%s.zoneadm.lock",
1420             ZONES_TMPDIR, zone_name);
1421 
1422         if ((*lockfd = open(pathbuf, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR)) < 0) {
1423                 zerror(gettext("could not open %s: %s"), pathbuf,
1424                     strerror(errno));
1425                 return (-1);
1426         }
1427         /*
1428          * Lock the file to synchronize with other zoneadmds
1429          */
1430         flock.l_type = F_WRLCK;
1431         flock.l_whence = SEEK_SET;
1432         flock.l_start = (off_t)0;
1433         flock.l_len = (off_t)0;
1434         if (fcntl(*lockfd, F_SETLKW, &flock) < 0) {
1435                 zerror(gettext("unable to lock %s: %s"), pathbuf,
1436                     strerror(errno));
1437                 release_lock_file(*lockfd);
1438                 return (-1);
1439         }
1440         return (Z_OK);
1441 }
1442 
1443 static int
1444 start_zoneadmd(const char *zone_name)
1445 {
1446         pid_t retval;
1447         int pstatus = 0, error = -1, lockfd, doorfd;
1448         struct door_info info;
1449         char doorpath[MAXPATHLEN];
1450 
1451         (void) snprintf(doorpath, sizeof (doorpath), ZONE_DOOR_PATH, zone_name);
1452 
1453         if (grab_lock_file(zone_name, &lockfd) != Z_OK)
1454                 return (-1);
1455         /*
1456          * We must do the door check with the lock held.  Otherwise, we
1457          * might race against another zoneadm/zlogin process and wind
1458          * up with two processes trying to start zoneadmd at the same
1459          * time.  zoneadmd will detect this, and fail, but we prefer this
1460          * to be as seamless as is practical, from a user perspective.
1461          */
1462         if ((doorfd = open(doorpath, O_RDONLY)) < 0) {
1463                 if (errno != ENOENT) {
1464                         zerror("failed to open %s: %s", doorpath,
1465                             strerror(errno));
1466                         goto out;
1467                 }
1468         } else {
1469                 /*
1470                  * Seems to be working ok.
1471                  */
1472                 if (door_info(doorfd, &info) == 0 &&
1473                     ((info.di_attributes & DOOR_REVOKED) == 0)) {
1474                         error = 0;
1475                         goto out;
1476                 }
1477         }
1478 
1479         if ((child_pid = fork()) == -1) {
1480                 zperror(gettext("could not fork"));
1481                 goto out;
1482         } else if (child_pid == 0) {
1483                 /* child process */
1484                 (void) execl("/usr/lib/zones/zoneadmd", "zoneadmd", "-z",
1485                     zone_name, NULL);
1486                 zperror(gettext("could not exec zoneadmd"));
1487                 _exit(1);
1488         }
1489 
1490         /* parent process */
1491         do {
1492                 retval = waitpid(child_pid, &pstatus, 0);
1493         } while (retval != child_pid);
1494         if (WIFSIGNALED(pstatus) ||
1495             (WIFEXITED(pstatus) && WEXITSTATUS(pstatus) != 0)) {
1496                 zerror(gettext("could not start %s"), "zoneadmd");
1497                 goto out;
1498         }
1499         error = 0;
1500 out:
1501         release_lock_file(lockfd);
1502         (void) close(doorfd);
1503         return (error);
1504 }
1505 
1506 static int
1507 init_template(void)
1508 {
1509         int fd;
1510         int err = 0;
1511 
1512         fd = open64(CTFS_ROOT "/process/template", O_RDWR);
1513         if (fd == -1)
1514                 return (-1);
1515 
1516         /*
1517          * zlogin doesn't do anything with the contract.
1518          * Deliver no events, don't inherit, and allow it to be orphaned.
1519          */
1520         err |= ct_tmpl_set_critical(fd, 0);
1521         err |= ct_tmpl_set_informative(fd, 0);
1522         err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
1523         err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
1524         if (err || ct_tmpl_activate(fd)) {
1525                 (void) close(fd);
1526                 return (-1);
1527         }
1528 
1529         return (fd);
1530 }
1531 
1532 static int
1533 noninteractive_login(char *zonename, const char *user_cmd, zoneid_t zoneid,
1534     char **new_args, char **new_env)
1535 {
1536         pid_t retval;
1537         int stdin_pipe[2], stdout_pipe[2], stderr_pipe[2], dead_child_pipe[2];
1538         int child_status;
1539         int tmpl_fd;
1540         sigset_t block_cld;
1541 
1542         if ((tmpl_fd = init_template()) == -1) {
1543                 reset_tty();
1544                 zperror(gettext("could not create contract"));
1545                 return (1);
1546         }
1547 
1548         if (pipe(stdin_pipe) != 0) {
1549                 zperror(gettext("could not create STDIN pipe"));
1550                 return (1);
1551         }
1552         /*
1553          * When the user types ^D, we get a zero length message on STDIN.
1554          * We need to echo that down the pipe to send it to the other side;
1555          * but by default, pipes don't propagate zero-length messages.  We
1556          * toggle that behavior off using I_SWROPT.  See streamio(7i).
1557          */
1558         if (ioctl(stdin_pipe[0], I_SWROPT, SNDZERO) != 0) {
1559                 zperror(gettext("could not configure STDIN pipe"));
1560                 return (1);
1561 
1562         }
1563         if (pipe(stdout_pipe) != 0) {
1564                 zperror(gettext("could not create STDOUT pipe"));
1565                 return (1);
1566         }
1567         if (pipe(stderr_pipe) != 0) {
1568                 zperror(gettext("could not create STDERR pipe"));
1569                 return (1);
1570         }
1571 
1572         if (pipe(dead_child_pipe) != 0) {
1573                 zperror(gettext("could not create signalling pipe"));
1574                 return (1);
1575         }
1576         close_on_sig = dead_child_pipe[0];
1577 
1578         /*
1579          * If any of the pipe FD's winds up being less than STDERR, then we
1580          * have a mess on our hands-- and we are lacking some of the I/O
1581          * streams we would expect anyway.  So we bail.
1582          */
1583         if (stdin_pipe[0] <= STDERR_FILENO ||
1584             stdin_pipe[1] <= STDERR_FILENO ||
1585             stdout_pipe[0] <= STDERR_FILENO ||
1586             stdout_pipe[1] <= STDERR_FILENO ||
1587             stderr_pipe[0] <= STDERR_FILENO ||
1588             stderr_pipe[1] <= STDERR_FILENO ||
1589             dead_child_pipe[0] <= STDERR_FILENO ||
1590             dead_child_pipe[1] <= STDERR_FILENO) {
1591                 zperror(gettext("process lacks valid STDIN, STDOUT, STDERR"));
1592                 return (1);
1593         }
1594 
1595         if (prefork_dropprivs() != 0) {
1596                 zperror(gettext("could not allocate privilege set"));
1597                 return (1);
1598         }
1599 
1600         (void) sigset(SIGCLD, sigcld);
1601         (void) sigemptyset(&block_cld);
1602         (void) sigaddset(&block_cld, SIGCLD);
1603         (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1604 
1605         if ((child_pid = fork()) == -1) {
1606                 (void) ct_tmpl_clear(tmpl_fd);
1607                 (void) close(tmpl_fd);
1608                 zperror(gettext("could not fork"));
1609                 return (1);
1610         } else if (child_pid == 0) { /* child process */
1611                 (void) ct_tmpl_clear(tmpl_fd);
1612 
1613                 /*
1614                  * Do a dance to get the pipes hooked up as FD's 0, 1 and 2.
1615                  */
1616                 (void) close(STDIN_FILENO);
1617                 (void) close(STDOUT_FILENO);
1618                 (void) close(STDERR_FILENO);
1619                 (void) dup2(stdin_pipe[1], STDIN_FILENO);
1620                 (void) dup2(stdout_pipe[1], STDOUT_FILENO);
1621                 (void) dup2(stderr_pipe[1], STDERR_FILENO);
1622                 (void) closefrom(STDERR_FILENO + 1);
1623 
1624                 (void) sigset(SIGCLD, SIG_DFL);
1625                 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1626                 /*
1627                  * In case any of stdin, stdout or stderr are streams,
1628                  * anchor them to prevent malicious I_POPs.
1629                  */
1630                 (void) ioctl(STDIN_FILENO, I_ANCHOR);
1631                 (void) ioctl(STDOUT_FILENO, I_ANCHOR);
1632                 (void) ioctl(STDERR_FILENO, I_ANCHOR);
1633 
1634                 if (zone_enter(zoneid) == -1) {
1635                         zerror(gettext("could not enter zone %s: %s"),
1636                             zonename, strerror(errno));
1637                         _exit(1);
1638                 }
1639 
1640                 /*
1641                  * For non-native zones, tell libc where it can find locale
1642                  * specific getttext() messages.
1643                  */
1644                 if (access("/.SUNWnative/usr/lib/locale", R_OK) == 0)
1645                         (void) bindtextdomain(TEXT_DOMAIN,
1646                             "/.SUNWnative/usr/lib/locale");
1647                 else if (access("/native/usr/lib/locale", R_OK) == 0)
1648                         (void) bindtextdomain(TEXT_DOMAIN,
1649                             "/native/usr/lib/locale");
1650 
1651                 if (!failsafe)
1652                         new_env = prep_env_noninteractive(user_cmd, new_env);
1653 
1654                 if (new_env == NULL) {
1655                         _exit(1);
1656                 }
1657 
1658                 /*
1659                  * Move into a new process group; the zone_enter will have
1660                  * placed us into zsched's session, and we want to be in
1661                  * a unique process group.
1662                  */
1663                 (void) setpgid(getpid(), getpid());
1664 
1665                 /*
1666                  * The child needs to run as root to
1667                  * execute the su program.
1668                  */
1669                 if (setuid(0) == -1) {
1670                         zperror(gettext("insufficient privilege"));
1671                         return (1);
1672                 }
1673 
1674                 (void) execve(new_args[0], new_args, new_env);
1675                 zperror(gettext("exec failure"));
1676                 _exit(1);
1677         }
1678         /* parent */
1679 
1680         /* close pipe sides written by child */
1681         (void) close(stdout_pipe[1]);
1682         (void) close(stderr_pipe[1]);
1683 
1684         (void) sigset(SIGINT, sig_forward);
1685 
1686         postfork_dropprivs();
1687 
1688         (void) ct_tmpl_clear(tmpl_fd);
1689         (void) close(tmpl_fd);
1690 
1691         (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1692         doio(stdin_pipe[0], stdin_pipe[1], stdout_pipe[0], stderr_pipe[0],
1693             dead_child_pipe[1], B_TRUE);
1694         do {
1695                 retval = waitpid(child_pid, &child_status, 0);
1696                 if (retval == -1) {
1697                         child_status = 0;
1698                 }
1699         } while (retval != child_pid && errno != ECHILD);
1700 
1701         return (WEXITSTATUS(child_status));
1702 }
1703 
1704 static char *
1705 get_username()
1706 {
1707         uid_t   uid;
1708         struct passwd *nptr;
1709 
1710         /*
1711          * Authorizations are checked to restrict access based on the
1712          * requested operation and zone name, It is assumed that the
1713          * program is running with all privileges, but that the real
1714          * user ID is that of the user or role on whose behalf we are
1715          * operating. So we start by getting the username that will be
1716          * used for subsequent authorization checks.
1717          */
1718 
1719         uid = getuid();
1720         if ((nptr = getpwuid(uid)) == NULL) {
1721                 zerror(gettext("could not get user name."));
1722                 _exit(1);
1723         }
1724         return (nptr->pw_name);
1725 }
1726 
1727 int
1728 main(int argc, char **argv)
1729 {
1730         int arg, console = 0;
1731         zoneid_t zoneid;
1732         zone_state_t st;
1733         char *login = "root";
1734         int lflag = 0;
1735         int nflag = 0;
1736         char *zonename = NULL;
1737         char **proc_args = NULL;
1738         char **new_args, **new_env;
1739         sigset_t block_cld;
1740         char devroot[MAXPATHLEN];
1741         char *slavename, slaveshortname[MAXPATHLEN];
1742         priv_set_t *privset;
1743         int tmpl_fd;
1744         char zonebrand[MAXNAMELEN];
1745         char default_brand[MAXNAMELEN];
1746         struct stat sb;
1747         char kernzone[ZONENAME_MAX];
1748         brand_handle_t bh;
1749         char user_cmd[MAXPATHLEN];
1750         char authname[MAXAUTHS];
1751 
1752         (void) setlocale(LC_ALL, "");
1753         (void) textdomain(TEXT_DOMAIN);
1754 
1755         (void) getpname(argv[0]);
1756         username = get_username();
1757 
1758         while ((arg = getopt(argc, argv, "dnECR:Se:l:Q")) != EOF) {
1759                 switch (arg) {
1760                 case 'C':
1761                         console = 1;
1762                         break;
1763                 case 'E':
1764                         nocmdchar = 1;
1765                         break;
1766                 case 'R':       /* undocumented */
1767                         if (*optarg != '/') {
1768                                 zerror(gettext("root path must be absolute."));
1769                                 exit(2);
1770                         }
1771                         if (stat(optarg, &sb) == -1 || !S_ISDIR(sb.st_mode)) {
1772                                 zerror(
1773                                     gettext("root path must be a directory."));
1774                                 exit(2);
1775                         }
1776                         zonecfg_set_root(optarg);
1777                         break;
1778                 case 'Q':
1779                         quiet = 1;
1780                         break;
1781                 case 'S':
1782                         failsafe = 1;
1783                         break;
1784                 case 'd':
1785                         disconnect = 1;
1786                         break;
1787                 case 'e':
1788                         set_cmdchar(optarg);
1789                         break;
1790                 case 'l':
1791                         login = optarg;
1792                         lflag = 1;
1793                         break;
1794                 case 'n':
1795                         nflag = 1;
1796                         break;
1797                 default:
1798                         usage();
1799                 }
1800         }
1801 
1802         if (console != 0) {
1803 
1804                 if (lflag != 0) {
1805                         zerror(gettext(
1806                             "-l may not be specified for console login"));
1807                         usage();
1808                 }
1809 
1810                 if (nflag != 0) {
1811                         zerror(gettext(
1812                             "-n may not be specified for console login"));
1813                         usage();
1814                 }
1815 
1816                 if (failsafe != 0) {
1817                         zerror(gettext(
1818                             "-S may not be specified for console login"));
1819                         usage();
1820                 }
1821 
1822                 if (zonecfg_in_alt_root()) {
1823                         zerror(gettext(
1824                             "-R may not be specified for console login"));
1825                         exit(2);
1826                 }
1827 
1828         }
1829 
1830         if (failsafe != 0 && lflag != 0) {
1831                 zerror(gettext("-l may not be specified for failsafe login"));
1832                 usage();
1833         }
1834 
1835         if (!console && disconnect != 0) {
1836                 zerror(gettext(
1837                     "-d may only be specified with console login"));
1838                 usage();
1839         }
1840 
1841         if (optind == (argc - 1)) {
1842                 /*
1843                  * zone name, no process name; this should be an interactive
1844                  * as long as STDIN is really a tty.
1845                  */
1846                 if (nflag != 0) {
1847                         zerror(gettext(
1848                             "-n may not be specified for interactive login"));
1849                         usage();
1850                 }
1851                 if (isatty(STDIN_FILENO))
1852                         interactive = 1;
1853                 zonename = argv[optind];
1854         } else if (optind < (argc - 1)) {
1855                 if (console) {
1856                         zerror(gettext("Commands may not be specified for "
1857                             "console login."));
1858                         usage();
1859                 }
1860                 /* zone name and process name, and possibly some args */
1861                 zonename = argv[optind];
1862                 proc_args = &argv[optind + 1];
1863                 interactive = 0;
1864         } else {
1865                 usage();
1866         }
1867 
1868         if (getzoneid() != GLOBAL_ZONEID) {
1869                 zerror(gettext("'%s' may only be used from the global zone"),
1870                     pname);
1871                 return (1);
1872         }
1873 
1874         if (strcmp(zonename, GLOBAL_ZONENAME) == 0) {
1875                 zerror(gettext("'%s' not applicable to the global zone"),
1876                     pname);
1877                 return (1);
1878         }
1879 
1880         if (zone_get_state(zonename, &st) != Z_OK) {
1881                 zerror(gettext("zone '%s' unknown"), zonename);
1882                 return (1);
1883         }
1884 
1885         if (st < ZONE_STATE_INSTALLED) {
1886                 zerror(gettext("cannot login to a zone which is '%s'"),
1887                     zone_state_str(st));
1888                 return (1);
1889         }
1890 
1891         /*
1892          * In both console and non-console cases, we require all privs.
1893          * In the console case, because we may need to startup zoneadmd.
1894          * In the non-console case in order to do zone_enter(2), zonept()
1895          * and other tasks.
1896          */
1897 
1898         if ((privset = priv_allocset()) == NULL) {
1899                 zperror(gettext("priv_allocset failed"));
1900                 return (1);
1901         }
1902 
1903         if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
1904                 zperror(gettext("getppriv failed"));
1905                 priv_freeset(privset);
1906                 return (1);
1907         }
1908 
1909         if (priv_isfullset(privset) == B_FALSE) {
1910                 zerror(gettext("You lack sufficient privilege to run "
1911                     "this command (all privs required)"));
1912                 priv_freeset(privset);
1913                 return (1);
1914         }
1915         priv_freeset(privset);
1916 
1917         /*
1918          * Check if user is authorized for requested usage of the zone
1919          */
1920 
1921         (void) snprintf(authname, MAXAUTHS, "%s%s%s",
1922             ZONE_MANAGE_AUTH, KV_OBJECT, zonename);
1923         if (chkauthattr(authname, username) == 0) {
1924                 if (console) {
1925                         zerror(gettext("%s is not authorized for console "
1926                             "access to  %s zone."),
1927                             username, zonename);
1928                         return (1);
1929                 } else {
1930                         (void) snprintf(authname, MAXAUTHS, "%s%s%s",
1931                             ZONE_LOGIN_AUTH, KV_OBJECT, zonename);
1932                         if (failsafe || !interactive) {
1933                                 zerror(gettext("%s is not authorized for  "
1934                                     "failsafe or non-interactive login "
1935                                     "to  %s zone."), username, zonename);
1936                                 return (1);
1937                         } else if (chkauthattr(authname, username) == 0) {
1938                                 zerror(gettext("%s is not authorized "
1939                                     " to login to %s zone."),
1940                                     username, zonename);
1941                                 return (1);
1942                         }
1943                 }
1944         } else {
1945                 forced_login = B_TRUE;
1946         }
1947 
1948         /*
1949          * The console is a separate case from the rest of the code; handle
1950          * it first.
1951          */
1952         if (console) {
1953                 /*
1954                  * Ensure that zoneadmd for this zone is running.
1955                  */
1956                 if (start_zoneadmd(zonename) == -1)
1957                         return (1);
1958 
1959                 /*
1960                  * Make contact with zoneadmd.
1961                  */
1962                 if (get_console_master(zonename) == -1)
1963                         return (1);
1964 
1965                 if (!quiet)
1966                         (void) printf(
1967                             gettext("[Connected to zone '%s' console]\n"),
1968                             zonename);
1969 
1970                 if (set_tty_rawmode(STDIN_FILENO) == -1) {
1971                         reset_tty();
1972                         zperror(gettext("failed to set stdin pty to raw mode"));
1973                         return (1);
1974                 }
1975 
1976                 (void) sigset(SIGWINCH, sigwinch);
1977                 (void) sigwinch(0);
1978 
1979                 /*
1980                  * Run the I/O loop until we get disconnected.
1981                  */
1982                 doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
1983                 reset_tty();
1984                 if (!quiet)
1985                         (void) printf(
1986                             gettext("\n[Connection to zone '%s' console "
1987                             "closed]\n"), zonename);
1988 
1989                 return (0);
1990         }
1991 
1992         if (st != ZONE_STATE_RUNNING && st != ZONE_STATE_MOUNTED) {
1993                 zerror(gettext("login allowed only to running zones "
1994                     "(%s is '%s')."), zonename, zone_state_str(st));
1995                 return (1);
1996         }
1997 
1998         (void) strlcpy(kernzone, zonename, sizeof (kernzone));
1999         if (zonecfg_in_alt_root()) {
2000                 FILE *fp = zonecfg_open_scratch("", B_FALSE);
2001 
2002                 if (fp == NULL || zonecfg_find_scratch(fp, zonename,
2003                     zonecfg_get_root(), kernzone, sizeof (kernzone)) == -1) {
2004                         zerror(gettext("cannot find scratch zone %s"),
2005                             zonename);
2006                         if (fp != NULL)
2007                                 zonecfg_close_scratch(fp);
2008                         return (1);
2009                 }
2010                 zonecfg_close_scratch(fp);
2011         }
2012 
2013         if ((zoneid = getzoneidbyname(kernzone)) == -1) {
2014                 zerror(gettext("failed to get zoneid for zone '%s'"),
2015                     zonename);
2016                 return (1);
2017         }
2018 
2019         /*
2020          * We need the zone root path only if we are setting up a pty.
2021          */
2022         if (zone_get_devroot(zonename, devroot, sizeof (devroot)) == -1) {
2023                 zerror(gettext("could not get dev path for zone %s"),
2024                     zonename);
2025                 return (1);
2026         }
2027 
2028         if (zone_get_brand(zonename, zonebrand, sizeof (zonebrand)) != Z_OK) {
2029                 zerror(gettext("could not get brand for zone %s"), zonename);
2030                 return (1);
2031         }
2032         /*
2033          * In the alternate root environment, the only supported
2034          * operations are mount and unmount.  In this case, just treat
2035          * the zone as native if it is cluster.  Cluster zones can be
2036          * native for the purpose of LU or upgrade, and the cluster
2037          * brand may not exist in the miniroot (such as in net install
2038          * upgrade).
2039          */
2040         if (zonecfg_default_brand(default_brand,
2041             sizeof (default_brand)) != Z_OK) {
2042                 zerror(gettext("unable to determine default brand"));
2043                 return (1);
2044         }
2045         if (zonecfg_in_alt_root() &&
2046             strcmp(zonebrand, CLUSTER_BRAND_NAME) == 0) {
2047                 (void) strlcpy(zonebrand, default_brand, sizeof (zonebrand));
2048         }
2049 
2050         if ((bh = brand_open(zonebrand)) == NULL) {
2051                 zerror(gettext("could not open brand for zone %s"), zonename);
2052                 return (1);
2053         }
2054 
2055         if ((new_args = prep_args(bh, login, proc_args)) == NULL) {
2056                 zperror(gettext("could not assemble new arguments"));
2057                 brand_close(bh);
2058                 return (1);
2059         }
2060         /*
2061          * Get the brand specific user_cmd.  This command is used to get
2062          * a passwd(4) entry for login.
2063          */
2064         if (!interactive && !failsafe) {
2065                 if (zone_get_user_cmd(bh, login, user_cmd,
2066                     sizeof (user_cmd)) == NULL) {
2067                         zerror(gettext("could not get user_cmd for zone %s"),
2068                             zonename);
2069                         brand_close(bh);
2070                         return (1);
2071                 }
2072         }
2073         brand_close(bh);
2074 
2075         if ((new_env = prep_env()) == NULL) {
2076                 zperror(gettext("could not assemble new environment"));
2077                 return (1);
2078         }
2079 
2080         if (!interactive) {
2081                 if (nflag) {
2082                         int nfd;
2083 
2084                         if ((nfd = open(_PATH_DEVNULL, O_RDONLY)) < 0) {
2085                                 zperror(gettext("failed to open null device"));
2086                                 return (1);
2087                         }
2088                         if (nfd != STDIN_FILENO) {
2089                                 if (dup2(nfd, STDIN_FILENO) < 0) {
2090                                         zperror(gettext(
2091                                             "failed to dup2 null device"));
2092                                         return (1);
2093                                 }
2094                                 (void) close(nfd);
2095                         }
2096                         /* /dev/null is now standard input */
2097                 }
2098                 return (noninteractive_login(zonename, user_cmd, zoneid,
2099                     new_args, new_env));
2100         }
2101 
2102         if (zonecfg_in_alt_root()) {
2103                 zerror(gettext("cannot use interactive login with scratch "
2104                     "zone"));
2105                 return (1);
2106         }
2107 
2108         /*
2109          * Things are more complex in interactive mode; we get the
2110          * master side of the pty, then place the user's terminal into
2111          * raw mode.
2112          */
2113         if (get_master_pty() == -1) {
2114                 zerror(gettext("could not setup master pty device"));
2115                 return (1);
2116         }
2117 
2118         /*
2119          * Compute the "short name" of the pts.  /dev/pts/2 --> pts/2
2120          */
2121         if ((slavename = ptsname(masterfd)) == NULL) {
2122                 zperror(gettext("failed to get name for pseudo-tty"));
2123                 return (1);
2124         }
2125         if (strncmp(slavename, "/dev/", strlen("/dev/")) == 0)
2126                 (void) strlcpy(slaveshortname, slavename + strlen("/dev/"),
2127                     sizeof (slaveshortname));
2128         else
2129                 (void) strlcpy(slaveshortname, slavename,
2130                     sizeof (slaveshortname));
2131 
2132         if (!quiet)
2133                 (void) printf(gettext("[Connected to zone '%s' %s]\n"),
2134                     zonename, slaveshortname);
2135 
2136         if (set_tty_rawmode(STDIN_FILENO) == -1) {
2137                 reset_tty();
2138                 zperror(gettext("failed to set stdin pty to raw mode"));
2139                 return (1);
2140         }
2141 
2142         if (prefork_dropprivs() != 0) {
2143                 reset_tty();
2144                 zperror(gettext("could not allocate privilege set"));
2145                 return (1);
2146         }
2147 
2148         /*
2149          * We must mask SIGCLD until after we have coped with the fork
2150          * sufficiently to deal with it; otherwise we can race and receive the
2151          * signal before child_pid has been initialized (yes, this really
2152          * happens).
2153          */
2154         (void) sigset(SIGCLD, sigcld);
2155         (void) sigemptyset(&block_cld);
2156         (void) sigaddset(&block_cld, SIGCLD);
2157         (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
2158 
2159         /*
2160          * We activate the contract template at the last minute to
2161          * avoid intermediate functions that could be using fork(2)
2162          * internally.
2163          */
2164         if ((tmpl_fd = init_template()) == -1) {
2165                 reset_tty();
2166                 zperror(gettext("could not create contract"));
2167                 return (1);
2168         }
2169 
2170         if ((child_pid = fork()) == -1) {
2171                 (void) ct_tmpl_clear(tmpl_fd);
2172                 reset_tty();
2173                 zperror(gettext("could not fork"));
2174                 return (1);
2175         } else if (child_pid == 0) { /* child process */
2176                 int slavefd, newslave;
2177 
2178                 (void) ct_tmpl_clear(tmpl_fd);
2179                 (void) close(tmpl_fd);
2180 
2181                 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2182 
2183                 if ((slavefd = init_slave_pty(zoneid, devroot)) == -1)
2184                         return (1);
2185 
2186                 /*
2187                  * Close all fds except for the slave pty.
2188                  */
2189                 (void) fdwalk(close_func, &slavefd);
2190 
2191                 /*
2192                  * Temporarily dup slavefd to stderr; that way if we have
2193                  * to print out that zone_enter failed, the output will
2194                  * have somewhere to go.
2195                  */
2196                 if (slavefd != STDERR_FILENO)
2197                         (void) dup2(slavefd, STDERR_FILENO);
2198 
2199                 if (zone_enter(zoneid) == -1) {
2200                         zerror(gettext("could not enter zone %s: %s"),
2201                             zonename, strerror(errno));
2202                         return (1);
2203                 }
2204 
2205                 if (slavefd != STDERR_FILENO)
2206                         (void) close(STDERR_FILENO);
2207 
2208                 /*
2209                  * We take pains to get this process into a new process
2210                  * group, and subsequently a new session.  In this way,
2211                  * we'll have a session which doesn't yet have a controlling
2212                  * terminal.  When we open the slave, it will become the
2213                  * controlling terminal; no PIDs concerning pgrps or sids
2214                  * will leak inappropriately into the zone.
2215                  */
2216                 (void) setpgrp();
2217 
2218                 /*
2219                  * We need the slave pty to be referenced from the zone's
2220                  * /dev in order to ensure that the devt's, etc are all
2221                  * correct.  Otherwise we break ttyname and the like.
2222                  */
2223                 if ((newslave = open(slavename, O_RDWR)) == -1) {
2224                         (void) close(slavefd);
2225                         return (1);
2226                 }
2227                 (void) close(slavefd);
2228                 slavefd = newslave;
2229 
2230                 /*
2231                  * dup the slave to the various FDs, so that when the
2232                  * spawned process does a write/read it maps to the slave
2233                  * pty.
2234                  */
2235                 (void) dup2(slavefd, STDIN_FILENO);
2236                 (void) dup2(slavefd, STDOUT_FILENO);
2237                 (void) dup2(slavefd, STDERR_FILENO);
2238                 if (slavefd != STDIN_FILENO && slavefd != STDOUT_FILENO &&
2239                     slavefd != STDERR_FILENO) {
2240                         (void) close(slavefd);
2241                 }
2242 
2243                 /*
2244                  * In failsafe mode, we don't use login(1), so don't try
2245                  * setting up a utmpx entry.
2246                  */
2247                 if (!failsafe)
2248                         if (setup_utmpx(slaveshortname) == -1)
2249                                 return (1);
2250 
2251                 /*
2252                  * The child needs to run as root to
2253                  * execute the brand's login program.
2254                  */
2255                 if (setuid(0) == -1) {
2256                         zperror(gettext("insufficient privilege"));
2257                         return (1);
2258                 }
2259 
2260                 (void) execve(new_args[0], new_args, new_env);
2261                 zperror(gettext("exec failure"));
2262                 return (1);
2263         }
2264 
2265         (void) ct_tmpl_clear(tmpl_fd);
2266         (void) close(tmpl_fd);
2267 
2268         /*
2269          * The rest is only for the parent process.
2270          */
2271         (void) sigset(SIGWINCH, sigwinch);
2272 
2273         postfork_dropprivs();
2274 
2275         (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2276         doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
2277 
2278         reset_tty();
2279         if (!quiet)
2280                 (void) fprintf(stderr,
2281                     gettext("\n[Connection to zone '%s' %s closed]\n"),
2282                     zonename, slaveshortname);
2283 
2284         if (pollerr != 0) {
2285                 (void) fprintf(stderr, gettext("Error: connection closed due "
2286                     "to unexpected pollevents=0x%x.\n"), pollerr);
2287                 return (1);
2288         }
2289 
2290         return (0);
2291 }