1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2013 DEY Storage Systems, Inc.
  24  */
  25 
  26 /*
  27  * zlogin provides three types of login which allow users in the global
  28  * zone to access non-global zones.
  29  *
  30  * - "interactive login" is similar to rlogin(1); for example, the user could
  31  *   issue 'zlogin my-zone' or 'zlogin -e ^ -l me my-zone'.   The user is
  32  *   granted a new pty (which is then shoved into the zone), and an I/O
  33  *   loop between parent and child processes takes care of the interactive
  34  *   session.  In this mode, login(1) (and its -c option, which means
  35  *   "already authenticated") is employed to take care of the initialization
  36  *   of the user's session.
  37  *
  38  * - "non-interactive login" is similar to su(1M); the user could issue
  39  *   'zlogin my-zone ls -l' and the command would be run as specified.
  40  *   In this mode, zlogin sets up pipes as the communication channel, and
  41  *   'su' is used to do the login setup work.
  42  *
  43  * - "console login" is the equivalent to accessing the tip line for a
  44  *   zone.  For example, the user can issue 'zlogin -C my-zone'.
  45  *   In this mode, zlogin contacts the zoneadmd process via unix domain
  46  *   socket.  If zoneadmd is not running, it starts it.  This allows the
  47  *   console to be available anytime the zone is installed, regardless of
  48  *   whether it is running.
  49  */
  50 
  51 #include <sys/socket.h>
  52 #include <sys/termios.h>
  53 #include <sys/utsname.h>
  54 #include <sys/stat.h>
  55 #include <sys/types.h>
  56 #include <sys/contract/process.h>
  57 #include <sys/ctfs.h>
  58 #include <sys/brand.h>
  59 #include <sys/wait.h>
  60 #include <alloca.h>
  61 #include <assert.h>
  62 #include <ctype.h>
  63 #include <door.h>
  64 #include <errno.h>
  65 #include <nss_dbdefs.h>
  66 #include <poll.h>
  67 #include <priv.h>
  68 #include <pwd.h>
  69 #include <unistd.h>
  70 #include <utmpx.h>
  71 #include <sac.h>
  72 #include <signal.h>
  73 #include <stdarg.h>
  74 #include <stdio.h>
  75 #include <stdlib.h>
  76 #include <string.h>
  77 #include <strings.h>
  78 #include <stropts.h>
  79 #include <wait.h>
  80 #include <zone.h>
  81 #include <fcntl.h>
  82 #include <libdevinfo.h>
  83 #include <libintl.h>
  84 #include <locale.h>
  85 #include <libzonecfg.h>
  86 #include <libcontract.h>
  87 #include <libbrand.h>
  88 #include <auth_list.h>
  89 #include <auth_attr.h>
  90 #include <secdb.h>
  91 
  92 static int masterfd;
  93 static struct termios save_termios;
  94 static struct termios effective_termios;
  95 static int save_fd;
  96 static struct winsize winsize;
  97 static volatile int dead;
  98 static volatile pid_t child_pid = -1;
  99 static int interactive = 0;
 100 static priv_set_t *dropprivs;
 101 
 102 static int nocmdchar = 0;
 103 static int failsafe = 0;
 104 static char cmdchar = '~';
 105 static int quiet = 0;
 106 
 107 static int pollerr = 0;
 108 
 109 static const char *pname;
 110 static char *username;
 111 
 112 /*
 113  * When forced_login is true, the user is not prompted
 114  * for an authentication password in the target zone.
 115  */
 116 static boolean_t forced_login = B_FALSE;
 117 
 118 #if !defined(TEXT_DOMAIN)               /* should be defined by cc -D */
 119 #define TEXT_DOMAIN     "SYS_TEST"      /* Use this only if it wasn't */
 120 #endif
 121 
 122 #define SUPATH  "/usr/bin/su"
 123 #define FAILSAFESHELL   "/sbin/sh"
 124 #define DEFAULTSHELL    "/sbin/sh"
 125 #define DEF_PATH        "/usr/sbin:/usr/bin"
 126 
 127 #define CLUSTER_BRAND_NAME      "cluster"
 128 
 129 /*
 130  * The ZLOGIN_BUFSIZ is larger than PIPE_BUF so we can be sure we're clearing
 131  * out the pipe when the child is exiting.  The ZLOGIN_RDBUFSIZ must be less
 132  * than ZLOGIN_BUFSIZ (because we share the buffer in doio).  This value is
 133  * also chosen in conjunction with the HI_WATER setting to make sure we
 134  * don't fill up the pipe.  We can write FIFOHIWAT (16k) into the pipe before
 135  * blocking.  By having ZLOGIN_RDBUFSIZ set to 1k and HI_WATER set to 8k, we
 136  * know we can always write a ZLOGIN_RDBUFSIZ chunk into the pipe when there
 137  * is less than HI_WATER data already in the pipe.
 138  */
 139 #define ZLOGIN_BUFSIZ   8192
 140 #define ZLOGIN_RDBUFSIZ 1024
 141 #define HI_WATER        8192
 142 
 143 /*
 144  * See canonify() below.  CANONIFY_LEN is the maximum length that a
 145  * "canonical" sequence will expand to (backslash, three octal digits, NUL).
 146  */
 147 #define CANONIFY_LEN 5
 148 
 149 static void
 150 usage(void)
 151 {
 152         (void) fprintf(stderr, gettext("usage: %s [ -QCES ] [ -e cmdchar ] "
 153             "[-l user] zonename [command [args ...] ]\n"), pname);
 154         exit(2);
 155 }
 156 
 157 static const char *
 158 getpname(const char *arg0)
 159 {
 160         const char *p = strrchr(arg0, '/');
 161 
 162         if (p == NULL)
 163                 p = arg0;
 164         else
 165                 p++;
 166 
 167         pname = p;
 168         return (p);
 169 }
 170 
 171 static void
 172 zerror(const char *fmt, ...)
 173 {
 174         va_list alist;
 175 
 176         (void) fprintf(stderr, "%s: ", pname);
 177         va_start(alist, fmt);
 178         (void) vfprintf(stderr, fmt, alist);
 179         va_end(alist);
 180         (void) fprintf(stderr, "\n");
 181 }
 182 
 183 static void
 184 zperror(const char *str)
 185 {
 186         const char *estr;
 187 
 188         if ((estr = strerror(errno)) != NULL)
 189                 (void) fprintf(stderr, "%s: %s: %s\n", pname, str, estr);
 190         else
 191                 (void) fprintf(stderr, "%s: %s: errno %d\n", pname, str, errno);
 192 }
 193 
 194 /*
 195  * The first part of our privilege dropping scheme needs to be called before
 196  * fork(), since we must have it for security; we don't want to be surprised
 197  * later that we couldn't allocate the privset.
 198  */
 199 static int
 200 prefork_dropprivs()
 201 {
 202         if ((dropprivs = priv_allocset()) == NULL)
 203                 return (1);
 204 
 205         priv_basicset(dropprivs);
 206         (void) priv_delset(dropprivs, PRIV_PROC_INFO);
 207         (void) priv_delset(dropprivs, PRIV_PROC_FORK);
 208         (void) priv_delset(dropprivs, PRIV_PROC_EXEC);
 209         (void) priv_delset(dropprivs, PRIV_FILE_LINK_ANY);
 210 
 211         /*
 212          * We need to keep the basic privilege PROC_SESSION and all unknown
 213          * basic privileges as well as the privileges PROC_ZONE and
 214          * PROC_OWNER in order to query session information and
 215          * send signals.
 216          */
 217         if (interactive == 0) {
 218                 (void) priv_addset(dropprivs, PRIV_PROC_ZONE);
 219                 (void) priv_addset(dropprivs, PRIV_PROC_OWNER);
 220         } else {
 221                 (void) priv_delset(dropprivs, PRIV_PROC_SESSION);
 222         }
 223 
 224         return (0);
 225 }
 226 
 227 /*
 228  * The second part of the privilege drop.  We are paranoid about being attacked
 229  * by the zone, so we drop all privileges.  This should prevent a compromise
 230  * which gets us to fork(), exec(), symlink(), etc.
 231  */
 232 static void
 233 postfork_dropprivs()
 234 {
 235         if ((setppriv(PRIV_SET, PRIV_PERMITTED, dropprivs)) == -1) {
 236                 zperror(gettext("Warning: could not set permitted privileges"));
 237         }
 238         if ((setppriv(PRIV_SET, PRIV_LIMIT, dropprivs)) == -1) {
 239                 zperror(gettext("Warning: could not set limit privileges"));
 240         }
 241         if ((setppriv(PRIV_SET, PRIV_INHERITABLE, dropprivs)) == -1) {
 242                 zperror(gettext("Warning: could not set inheritable "
 243                     "privileges"));
 244         }
 245 }
 246 
 247 /*
 248  * Create the unix domain socket and call the zoneadmd server; handshake
 249  * with it to determine whether it will allow us to connect.
 250  */
 251 static int
 252 get_console_master(const char *zname)
 253 {
 254         int sockfd = -1;
 255         struct sockaddr_un servaddr;
 256         char clientid[MAXPATHLEN];
 257         char handshake[MAXPATHLEN], c;
 258         int msglen;
 259         int i = 0, err = 0;
 260 
 261         if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
 262                 zperror(gettext("could not create socket"));
 263                 return (-1);
 264         }
 265 
 266         bzero(&servaddr, sizeof (servaddr));
 267         servaddr.sun_family = AF_UNIX;
 268         (void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path),
 269             "%s/%s.console_sock", ZONES_TMPDIR, zname);
 270 
 271         if (connect(sockfd, (struct sockaddr *)&servaddr,
 272             sizeof (servaddr)) == -1) {
 273                 zperror(gettext("Could not connect to zone console"));
 274                 goto bad;
 275         }
 276         masterfd = sockfd;
 277 
 278         msglen = snprintf(clientid, sizeof (clientid), "IDENT %lu %s\n",
 279             getpid(), setlocale(LC_MESSAGES, NULL));
 280 
 281         if (msglen >= sizeof (clientid) || msglen < 0) {
 282                 zerror("protocol error");
 283                 goto bad;
 284         }
 285 
 286         if (write(masterfd, clientid, msglen) != msglen) {
 287                 zerror("protocol error");
 288                 goto bad;
 289         }
 290 
 291         bzero(handshake, sizeof (handshake));
 292 
 293         /*
 294          * Take care not to accumulate more than our fill, and leave room for
 295          * the NUL at the end.
 296          */
 297         while ((err = read(masterfd, &c, 1)) == 1) {
 298                 if (i >= (sizeof (handshake) - 1))
 299                         break;
 300                 if (c == '\n')
 301                         break;
 302                 handshake[i] = c;
 303                 i++;
 304         }
 305 
 306         /*
 307          * If something went wrong during the handshake we bail; perhaps
 308          * the server died off.
 309          */
 310         if (err == -1) {
 311                 zperror(gettext("Could not connect to zone console"));
 312                 goto bad;
 313         }
 314 
 315         if (strncmp(handshake, "OK", sizeof (handshake)) == 0)
 316                 return (0);
 317 
 318         zerror(gettext("Console is already in use by process ID %s."),
 319             handshake);
 320 bad:
 321         (void) close(sockfd);
 322         masterfd = -1;
 323         return (-1);
 324 }
 325 
 326 
 327 /*
 328  * Routines to handle pty creation upon zone entry and to shuttle I/O back
 329  * and forth between the two terminals.  We also compute and store the
 330  * name of the slave terminal associated with the master side.
 331  */
 332 static int
 333 get_master_pty()
 334 {
 335         if ((masterfd = open("/dev/ptmx", O_RDWR|O_NONBLOCK)) < 0) {
 336                 zperror(gettext("failed to obtain a pseudo-tty"));
 337                 return (-1);
 338         }
 339         if (tcgetattr(STDIN_FILENO, &save_termios) == -1) {
 340                 zperror(gettext("failed to get terminal settings from stdin"));
 341                 return (-1);
 342         }
 343         (void) ioctl(STDIN_FILENO, TIOCGWINSZ, (char *)&winsize);
 344 
 345         return (0);
 346 }
 347 
 348 /*
 349  * This is a bit tricky; normally a pts device will belong to the zone it
 350  * is granted to.  But in the case of "entering" a zone, we need to establish
 351  * the pty before entering the zone so that we can vector I/O to and from it
 352  * from the global zone.
 353  *
 354  * We use the zonept() call to let the ptm driver know what we are up to;
 355  * the only other hairy bit is the setting of zoneslavename (which happens
 356  * above, in get_master_pty()).
 357  */
 358 static int
 359 init_slave_pty(zoneid_t zoneid, char *devroot)
 360 {
 361         int slavefd = -1;
 362         char *slavename, zoneslavename[MAXPATHLEN];
 363 
 364         /*
 365          * Set slave permissions, zone the pts, then unlock it.
 366          */
 367         if (grantpt(masterfd) != 0) {
 368                 zperror(gettext("grantpt failed"));
 369                 return (-1);
 370         }
 371 
 372         if (unlockpt(masterfd) != 0) {
 373                 zperror(gettext("unlockpt failed"));
 374                 return (-1);
 375         }
 376 
 377         /*
 378          * We must open the slave side before zoning this pty; otherwise
 379          * the kernel would refuse us the open-- zoning a pty makes it
 380          * inaccessible to the global zone.  Note we are trying to open
 381          * the device node via the $ZONEROOT/dev path for this pty.
 382          *
 383          * Later we'll close the slave out when once we've opened it again
 384          * from within the target zone.  Blarg.
 385          */
 386         if ((slavename = ptsname(masterfd)) == NULL) {
 387                 zperror(gettext("failed to get name for pseudo-tty"));
 388                 return (-1);
 389         }
 390 
 391         (void) snprintf(zoneslavename, sizeof (zoneslavename), "%s%s",
 392             devroot, slavename);
 393 
 394         if ((slavefd = open(zoneslavename, O_RDWR)) < 0) {
 395                 zerror(gettext("failed to open %s: %s"), zoneslavename,
 396                     strerror(errno));
 397                 return (-1);
 398         }
 399 
 400         /*
 401          * Push hardware emulation (ptem), line discipline (ldterm),
 402          * and V7/4BSD/Xenix compatibility (ttcompat) modules.
 403          */
 404         if (ioctl(slavefd, I_PUSH, "ptem") == -1) {
 405                 zperror(gettext("failed to push ptem module"));
 406                 if (!failsafe)
 407                         goto bad;
 408         }
 409 
 410         /*
 411          * Anchor the stream to prevent malicious I_POPs; we prefer to do
 412          * this prior to entering the zone so that we can detect any errors
 413          * early, and so that we can set the anchor from the global zone.
 414          */
 415         if (ioctl(slavefd, I_ANCHOR) == -1) {
 416                 zperror(gettext("failed to set stream anchor"));
 417                 if (!failsafe)
 418                         goto bad;
 419         }
 420 
 421         if (ioctl(slavefd, I_PUSH, "ldterm") == -1) {
 422                 zperror(gettext("failed to push ldterm module"));
 423                 if (!failsafe)
 424                         goto bad;
 425         }
 426         if (ioctl(slavefd, I_PUSH, "ttcompat") == -1) {
 427                 zperror(gettext("failed to push ttcompat module"));
 428                 if (!failsafe)
 429                         goto bad;
 430         }
 431 
 432         /*
 433          * Propagate terminal settings from the external term to the new one.
 434          */
 435         if (tcsetattr(slavefd, TCSAFLUSH, &save_termios) == -1) {
 436                 zperror(gettext("failed to set terminal settings"));
 437                 if (!failsafe)
 438                         goto bad;
 439         }
 440         (void) ioctl(slavefd, TIOCSWINSZ, (char *)&winsize);
 441 
 442         if (zonept(masterfd, zoneid) != 0) {
 443                 zperror(gettext("could not set zoneid of pty"));
 444                 goto bad;
 445         }
 446 
 447         return (slavefd);
 448 
 449 bad:
 450         (void) close(slavefd);
 451         return (-1);
 452 }
 453 
 454 /*
 455  * Place terminal into raw mode.
 456  */
 457 static int
 458 set_tty_rawmode(int fd)
 459 {
 460         struct termios term;
 461         if (tcgetattr(fd, &term) < 0) {
 462                 zperror(gettext("failed to get user terminal settings"));
 463                 return (-1);
 464         }
 465 
 466         /* Stash for later, so we can revert back to previous mode */
 467         save_termios = term;
 468         save_fd = fd;
 469 
 470         /* disable 8->7 bit strip, start/stop, enable any char to restart */
 471         term.c_iflag &= ~(ISTRIP|IXON|IXANY);
 472         /* disable NL->CR, CR->NL, ignore CR, UPPER->lower */
 473         term.c_iflag &= ~(INLCR|ICRNL|IGNCR|IUCLC);
 474         /* disable output post-processing */
 475         term.c_oflag &= ~OPOST;
 476         /* disable canonical mode, signal chars, echo & extended functions */
 477         term.c_lflag &= ~(ICANON|ISIG|ECHO|IEXTEN);
 478 
 479         term.c_cc[VMIN] = 1;    /* byte-at-a-time */
 480         term.c_cc[VTIME] = 0;
 481 
 482         if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &term)) {
 483                 zperror(gettext("failed to set user terminal to raw mode"));
 484                 return (-1);
 485         }
 486 
 487         /*
 488          * We need to know the value of VEOF so that we can properly process for
 489          * client-side ~<EOF>.  But we have obliterated VEOF in term,
 490          * because VMIN overloads the same array slot in non-canonical mode.
 491          * Stupid @&^%!
 492          *
 493          * So here we construct the "effective" termios from the current
 494          * terminal settings, and the corrected VEOF and VEOL settings.
 495          */
 496         if (tcgetattr(STDIN_FILENO, &effective_termios) < 0) {
 497                 zperror(gettext("failed to get user terminal settings"));
 498                 return (-1);
 499         }
 500         effective_termios.c_cc[VEOF] = save_termios.c_cc[VEOF];
 501         effective_termios.c_cc[VEOL] = save_termios.c_cc[VEOL];
 502 
 503         return (0);
 504 }
 505 
 506 /*
 507  * Copy terminal window size from our terminal to the pts.
 508  */
 509 /*ARGSUSED*/
 510 static void
 511 sigwinch(int s)
 512 {
 513         struct winsize ws;
 514 
 515         if (ioctl(0, TIOCGWINSZ, &ws) == 0)
 516                 (void) ioctl(masterfd, TIOCSWINSZ, &ws);
 517 }
 518 
 519 static volatile int close_on_sig = -1;
 520 
 521 static void
 522 /*ARGSUSED*/
 523 sigcld(int s)
 524 {
 525         int status;
 526         pid_t pid;
 527 
 528         /*
 529          * Peek at the exit status.  If this isn't the process we cared
 530          * about, then just reap it.
 531          */
 532         if ((pid = waitpid(child_pid, &status, WNOHANG|WNOWAIT)) != -1) {
 533                 if (pid == child_pid &&
 534                     (WIFEXITED(status) || WIFSIGNALED(status))) {
 535                         dead = 1;
 536                         if (close_on_sig != -1) {
 537                                 (void) write(close_on_sig, "a", 1);
 538                                 (void) close(close_on_sig);
 539                                 close_on_sig = -1;
 540                         }
 541                 } else {
 542                         (void) waitpid(pid, &status, WNOHANG);
 543                 }
 544         }
 545 }
 546 
 547 /*
 548  * Some signals (currently, SIGINT) must be forwarded on to the process
 549  * group of the child process.
 550  */
 551 static void
 552 sig_forward(int s)
 553 {
 554         if (child_pid != -1) {
 555                 pid_t pgid = getpgid(child_pid);
 556                 if (pgid != -1)
 557                         (void) sigsend(P_PGID, pgid, s);
 558         }
 559 }
 560 
 561 /*
 562  * reset terminal settings for global environment
 563  */
 564 static void
 565 reset_tty()
 566 {
 567         (void) tcsetattr(save_fd, TCSADRAIN, &save_termios);
 568 }
 569 
 570 /*
 571  * Convert character to printable representation, for display with locally
 572  * echoed command characters (like when we need to display ~^D)
 573  */
 574 static void
 575 canonify(char c, char *cc)
 576 {
 577         if (isprint(c)) {
 578                 cc[0] = c;
 579                 cc[1] = '\0';
 580         } else if (c >= 0 && c <= 31) {   /* ^@ through ^_ */
 581                 cc[0] = '^';
 582                 cc[1] = c + '@';
 583                 cc[2] = '\0';
 584         } else {
 585                 cc[0] = '\\';
 586                 cc[1] = ((c >> 6) & 7) + '0';
 587                 cc[2] = ((c >> 3) & 7) + '0';
 588                 cc[3] = (c & 7) + '0';
 589                 cc[4] = '\0';
 590         }
 591 }
 592 
 593 /*
 594  * process_user_input watches the input stream for the escape sequence for
 595  * 'quit' (by default, tilde-period).  Because we might be fed just one
 596  * keystroke at a time, state associated with the user input (are we at the
 597  * beginning of the line?  are we locally echoing the next character?) is
 598  * maintained by beginning_of_line and local_echo across calls to the routine.
 599  * If the write to outfd fails, we'll try to read from infd in an attempt
 600  * to prevent deadlock between the two processes.
 601  *
 602  * This routine returns -1 when the 'quit' escape sequence has been issued,
 603  * or an error is encountered, 1 if stdin is EOF, and 0 otherwise.
 604  */
 605 static int
 606 process_user_input(int outfd, int infd)
 607 {
 608         static boolean_t beginning_of_line = B_TRUE;
 609         static boolean_t local_echo = B_FALSE;
 610         char ibuf[ZLOGIN_BUFSIZ];
 611         int nbytes;
 612         char *buf = ibuf;
 613         char c = *buf;
 614 
 615         nbytes = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
 616         if (nbytes == -1 && (errno != EINTR || dead))
 617                 return (-1);
 618 
 619         if (nbytes == -1)       /* The read was interrupted. */
 620                 return (0);
 621 
 622         /* 0 read means EOF, close the pipe to the child */
 623         if (nbytes == 0)
 624                 return (1);
 625 
 626         for (c = *buf; nbytes > 0; c = *buf, --nbytes) {
 627                 buf++;
 628                 if (beginning_of_line && !nocmdchar) {
 629                         beginning_of_line = B_FALSE;
 630                         if (c == cmdchar) {
 631                                 local_echo = B_TRUE;
 632                                 continue;
 633                         }
 634                 } else if (local_echo) {
 635                         local_echo = B_FALSE;
 636                         if (c == '.' || c == effective_termios.c_cc[VEOF]) {
 637                                 char cc[CANONIFY_LEN];
 638 
 639                                 canonify(c, cc);
 640                                 (void) write(STDOUT_FILENO, &cmdchar, 1);
 641                                 (void) write(STDOUT_FILENO, cc, strlen(cc));
 642                                 return (-1);
 643                         }
 644                 }
 645 retry:
 646                 if (write(outfd, &c, 1) <= 0) {
 647                         /*
 648                          * Since the fd we are writing to is opened with
 649                          * O_NONBLOCK it is possible to get EAGAIN if the
 650                          * pipe is full.  One way this could happen is if we
 651                          * are writing a lot of data into the pipe in this loop
 652                          * and the application on the other end is echoing that
 653                          * data back out to its stdout.  The output pipe can
 654                          * fill up since we are stuck here in this loop and not
 655                          * draining the other pipe.  We can try to read some of
 656                          * the data to see if we can drain the pipe so that the
 657                          * application can continue to make progress.  The read
 658                          * is non-blocking so we won't hang here.  We also wait
 659                          * a bit before retrying since there could be other
 660                          * reasons why the pipe is full and we don't want to
 661                          * continuously retry.
 662                          */
 663                         if (errno == EAGAIN) {
 664                                 struct timespec rqtp;
 665                                 int ln;
 666                                 char obuf[ZLOGIN_BUFSIZ];
 667 
 668                                 if ((ln = read(infd, obuf, ZLOGIN_BUFSIZ)) > 0)
 669                                         (void) write(STDOUT_FILENO, obuf, ln);
 670 
 671                                 /* sleep for 10 milliseconds */
 672                                 rqtp.tv_sec = 0;
 673                                 rqtp.tv_nsec = 10 * (NANOSEC / MILLISEC);
 674                                 (void) nanosleep(&rqtp, NULL);
 675                                 if (!dead)
 676                                         goto retry;
 677                         }
 678 
 679                         return (-1);
 680                 }
 681                 beginning_of_line = (c == '\r' || c == '\n' ||
 682                     c == effective_termios.c_cc[VKILL] ||
 683                     c == effective_termios.c_cc[VEOL] ||
 684                     c == effective_termios.c_cc[VSUSP] ||
 685                     c == effective_termios.c_cc[VINTR]);
 686         }
 687         return (0);
 688 }
 689 
 690 /*
 691  * This function prevents deadlock between zlogin and the application in the
 692  * zone that it is talking to.  This can happen when we read from zlogin's
 693  * stdin and write the data down the pipe to the application.  If the pipe
 694  * is full, we'll block in the write.  Because zlogin could be blocked in
 695  * the write, it would never read the application's stdout/stderr so the
 696  * application can then block on those writes (when the pipe fills up).  If the
 697  * the application gets blocked this way, it can never get around to reading
 698  * its stdin so that zlogin can unblock from its write.  Once in this state,
 699  * the two processes are deadlocked.
 700  *
 701  * To prevent this, we want to verify that we can write into the pipe before we
 702  * read from our stdin.  If the pipe already is pretty full, we bypass the read
 703  * for now.  We'll circle back here again after the poll() so that we can
 704  * try again.  When this function is called, we already know there is data
 705  * ready to read on STDIN_FILENO.  We return -1 if there is a problem, 1 if
 706  * stdin is EOF, and 0 if everything is ok (even though we might not have
 707  * read/written any data into the pipe on this iteration).
 708  */
 709 static int
 710 process_raw_input(int stdin_fd, int appin_fd)
 711 {
 712         int cc;
 713         struct stat64 sb;
 714         char ibuf[ZLOGIN_RDBUFSIZ];
 715 
 716         /* Check how much data is already in the pipe */
 717         if (fstat64(appin_fd, &sb) == -1) {
 718                 perror("stat failed");
 719                 return (-1);
 720         }
 721 
 722         if (dead)
 723                 return (-1);
 724 
 725         /*
 726          * The pipe already has a lot of data in it,  don't write any more
 727          * right now.
 728          */
 729         if (sb.st_size >= HI_WATER)
 730                 return (0);
 731 
 732         cc = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
 733         if (cc == -1 && (errno != EINTR || dead))
 734                 return (-1);
 735 
 736         if (cc == -1)   /* The read was interrupted. */
 737                 return (0);
 738 
 739         /* 0 read means EOF, close the pipe to the child */
 740         if (cc == 0)
 741                 return (1);
 742 
 743         /*
 744          * stdin_fd is stdin of the target; so, the thing we'll write the user
 745          * data *to*.
 746          */
 747         if (write(stdin_fd, ibuf, cc) == -1)
 748                 return (-1);
 749 
 750         return (0);
 751 }
 752 
 753 /*
 754  * Write the output from the application running in the zone.  We can get
 755  * a signal during the write (usually it would be SIGCHLD when the application
 756  * has exited) so we loop to make sure we have written all of the data we read.
 757  */
 758 static int
 759 process_output(int in_fd, int out_fd)
 760 {
 761         int wrote = 0;
 762         int cc;
 763         char ibuf[ZLOGIN_BUFSIZ];
 764 
 765         cc = read(in_fd, ibuf, ZLOGIN_BUFSIZ);
 766         if (cc == -1 && (errno != EINTR || dead))
 767                 return (-1);
 768         if (cc == 0)    /* EOF */
 769                 return (-1);
 770         if (cc == -1)   /* The read was interrupted. */
 771                 return (0);
 772 
 773         do {
 774                 int len;
 775 
 776                 len = write(out_fd, ibuf + wrote, cc - wrote);
 777                 if (len == -1 && errno != EINTR)
 778                         return (-1);
 779                 if (len != -1)
 780                         wrote += len;
 781         } while (wrote < cc);
 782 
 783         return (0);
 784 }
 785 
 786 /*
 787  * This is the main I/O loop, and is shared across all zlogin modes.
 788  * Parameters:
 789  *      stdin_fd:  The fd representing 'stdin' for the slave side; input to
 790  *                 the zone will be written here.
 791  *
 792  *      appin_fd:  The fd representing the other end of the 'stdin' pipe (when
 793  *                 we're running non-interactive); used in process_raw_input
 794  *                 to ensure we don't fill up the application's stdin pipe.
 795  *
 796  *      stdout_fd: The fd representing 'stdout' for the slave side; output
 797  *                 from the zone will arrive here.
 798  *
 799  *      stderr_fd: The fd representing 'stderr' for the slave side; output
 800  *                 from the zone will arrive here.
 801  *
 802  *      raw_mode:  If TRUE, then no processing (for example, for '~.') will
 803  *                 be performed on the input coming from STDIN.
 804  *
 805  * stderr_fd may be specified as -1 if there is no stderr (only non-interactive
 806  * mode supplies a stderr).
 807  *
 808  */
 809 static void
 810 doio(int stdin_fd, int appin_fd, int stdout_fd, int stderr_fd, int sig_fd,
 811     boolean_t raw_mode)
 812 {
 813         struct pollfd pollfds[4];
 814         char ibuf[ZLOGIN_BUFSIZ];
 815         int cc, ret;
 816 
 817         /* read from stdout of zone and write to stdout of global zone */
 818         pollfds[0].fd = stdout_fd;
 819         pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
 820 
 821         /* read from stderr of zone and write to stderr of global zone */
 822         pollfds[1].fd = stderr_fd;
 823         pollfds[1].events = pollfds[0].events;
 824 
 825         /* read from stdin of global zone and write to stdin of zone */
 826         pollfds[2].fd = STDIN_FILENO;
 827         pollfds[2].events = pollfds[0].events;
 828 
 829         /* read from signalling pipe so we know when child dies */
 830         pollfds[3].fd = sig_fd;
 831         pollfds[3].events = pollfds[0].events;
 832 
 833         for (;;) {
 834                 pollfds[0].revents = pollfds[1].revents =
 835                     pollfds[2].revents = pollfds[3].revents = 0;
 836 
 837                 if (dead)
 838                         break;
 839 
 840                 /*
 841                  * There is a race condition here where we can receive the
 842                  * child death signal, set the dead flag, but since we have
 843                  * passed the test above, we would go into poll and hang.
 844                  * To avoid this we use the sig_fd as an additional poll fd.
 845                  * The signal handler writes into the other end of this pipe
 846                  * when the child dies so that the poll will always see that
 847                  * input and proceed.  We just loop around at that point and
 848                  * then notice the dead flag.
 849                  */
 850 
 851                 ret = poll(pollfds,
 852                     sizeof (pollfds) / sizeof (struct pollfd), -1);
 853 
 854                 if (ret == -1 && errno != EINTR) {
 855                         perror("poll failed");
 856                         break;
 857                 }
 858 
 859                 if (errno == EINTR && dead) {
 860                         break;
 861                 }
 862 
 863                 /* event from master side stdout */
 864                 if (pollfds[0].revents) {
 865                         if (pollfds[0].revents &
 866                             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 867                                 if (process_output(stdout_fd, STDOUT_FILENO)
 868                                     != 0)
 869                                         break;
 870                         } else {
 871                                 pollerr = pollfds[0].revents;
 872                                 break;
 873                         }
 874                 }
 875 
 876                 /* event from master side stderr */
 877                 if (pollfds[1].revents) {
 878                         if (pollfds[1].revents &
 879                             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 880                                 if (process_output(stderr_fd, STDERR_FILENO)
 881                                     != 0)
 882                                         break;
 883                         } else {
 884                                 pollerr = pollfds[1].revents;
 885                                 break;
 886                         }
 887                 }
 888 
 889                 /* event from user STDIN side */
 890                 if (pollfds[2].revents) {
 891                         if (pollfds[2].revents &
 892                             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 893                                 /*
 894                                  * stdin fd is stdin of the target; so,
 895                                  * the thing we'll write the user data *to*.
 896                                  *
 897                                  * Also, unlike on the output side, we
 898                                  * close the pipe on a zero-length message.
 899                                  */
 900                                 int res;
 901 
 902                                 if (raw_mode)
 903                                         res = process_raw_input(stdin_fd,
 904                                             appin_fd);
 905                                 else
 906                                         res = process_user_input(stdin_fd,
 907                                             stdout_fd);
 908 
 909                                 if (res < 0)
 910                                         break;
 911                                 if (res > 0) {
 912                                         /* EOF (close) child's stdin_fd */
 913                                         pollfds[2].fd = -1;
 914                                         while ((res = close(stdin_fd)) != 0 &&
 915                                             errno == EINTR)
 916                                                 ;
 917                                         if (res != 0)
 918                                                 break;
 919                                 }
 920 
 921                         } else if (raw_mode && pollfds[2].revents & POLLHUP) {
 922                                 /*
 923                                  * It's OK to get a POLLHUP on STDIN-- it
 924                                  * always happens if you do:
 925                                  *
 926                                  * echo foo | zlogin <zone> <command>
 927                                  *
 928                                  * We reset fd to -1 in this case to clear
 929                                  * the condition and close the pipe (EOF) to
 930                                  * the other side in order to wrap things up.
 931                                  */
 932                                 int res;
 933 
 934                                 pollfds[2].fd = -1;
 935                                 while ((res = close(stdin_fd)) != 0 &&
 936                                     errno == EINTR)
 937                                         ;
 938                                 if (res != 0)
 939                                         break;
 940                         } else {
 941                                 pollerr = pollfds[2].revents;
 942                                 break;
 943                         }
 944                 }
 945         }
 946 
 947         /*
 948          * We are in the midst of dying, but try to poll with a short
 949          * timeout to see if we can catch the last bit of I/O from the
 950          * children.
 951          */
 952 retry:
 953         pollfds[0].revents = pollfds[1].revents = 0;
 954         (void) poll(pollfds, 2, 100);
 955         if (pollfds[0].revents &
 956             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 957                 if ((cc = read(stdout_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
 958                         (void) write(STDOUT_FILENO, ibuf, cc);
 959                         goto retry;
 960                 }
 961         }
 962         if (pollfds[1].revents &
 963             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 964                 if ((cc = read(stderr_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
 965                         (void) write(STDERR_FILENO, ibuf, cc);
 966                         goto retry;
 967                 }
 968         }
 969 }
 970 
 971 /*
 972  * Fetch the user_cmd brand hook for getting a user's passwd(4) entry.
 973  */
 974 static const char *
 975 zone_get_user_cmd(brand_handle_t bh, const char *login, char *user_cmd,
 976     size_t len)
 977 {
 978         bzero(user_cmd, sizeof (user_cmd));
 979         if (brand_get_user_cmd(bh, login, user_cmd, len) != 0)
 980                 return (NULL);
 981 
 982         return (user_cmd);
 983 }
 984 
 985 /* From libc */
 986 extern int str2passwd(const char *, int, void *, char *, int);
 987 
 988 /*
 989  * exec() the user_cmd brand hook, and convert the output string to a
 990  * struct passwd.  This is to be called after zone_enter().
 991  *
 992  */
 993 static struct passwd *
 994 zone_get_user_pw(const char *user_cmd, struct passwd *pwent, char *pwbuf,
 995     int pwbuflen)
 996 {
 997         char pwline[NSS_BUFLEN_PASSWD];
 998         char *cin = NULL;
 999         FILE *fin;
1000         int status;
1001 
1002         assert(getzoneid() != GLOBAL_ZONEID);
1003 
1004         if ((fin = popen(user_cmd, "r")) == NULL)
1005                 return (NULL);
1006 
1007         while (cin == NULL && !feof(fin))
1008                 cin = fgets(pwline, sizeof (pwline), fin);
1009 
1010         if (cin == NULL) {
1011                 (void) pclose(fin);
1012                 return (NULL);
1013         }
1014 
1015         status = pclose(fin);
1016         if (!WIFEXITED(status))
1017                 return (NULL);
1018         if (WEXITSTATUS(status) != 0)
1019                 return (NULL);
1020 
1021         if (str2passwd(pwline, sizeof (pwline), pwent, pwbuf, pwbuflen) == 0)
1022                 return (pwent);
1023         else
1024                 return (NULL);
1025 }
1026 
1027 static char **
1028 zone_login_cmd(brand_handle_t bh, const char *login)
1029 {
1030         static char result_buf[ARG_MAX];
1031         char **new_argv, *ptr, *lasts;
1032         int n, a;
1033 
1034         /* Get the login command for the target zone. */
1035         bzero(result_buf, sizeof (result_buf));
1036 
1037         if (forced_login) {
1038                 if (brand_get_forcedlogin_cmd(bh, login,
1039                     result_buf, sizeof (result_buf)) != 0)
1040                         return (NULL);
1041         } else {
1042                 if (brand_get_login_cmd(bh, login,
1043                     result_buf, sizeof (result_buf)) != 0)
1044                         return (NULL);
1045         }
1046 
1047         /*
1048          * We got back a string that we'd like to execute.  But since
1049          * we're not doing the execution via a shell we'll need to convert
1050          * the exec string to an array of strings.  We'll do that here
1051          * but we're going to be very simplistic about it and break stuff
1052          * up based on spaces.  We're not even going to support any kind
1053          * of quoting or escape characters.  It's truly amazing that
1054          * there is no library function in OpenSolaris to do this for us.
1055          */
1056 
1057         /*
1058          * Be paranoid.  Since we're deliniating based on spaces make
1059          * sure there are no adjacent spaces.
1060          */
1061         if (strstr(result_buf, "  ") != NULL)
1062                 return (NULL);
1063 
1064         /* Remove any trailing whitespace.  */
1065         n = strlen(result_buf);
1066         if (result_buf[n - 1] == ' ')
1067                 result_buf[n - 1] = '\0';
1068 
1069         /* Count how many elements there are in the exec string. */
1070         ptr = result_buf;
1071         for (n = 2; ((ptr = strchr(ptr + 1, (int)' ')) != NULL); n++)
1072                 ;
1073 
1074         /* Allocate the argv array that we're going to return. */
1075         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1076                 return (NULL);
1077 
1078         /* Tokenize the exec string and return. */
1079         a = 0;
1080         new_argv[a++] = result_buf;
1081         if (n > 2) {
1082                 (void) strtok_r(result_buf, " ", &lasts);
1083                 while ((new_argv[a++] = strtok_r(NULL, " ", &lasts)) != NULL)
1084                         ;
1085         } else {
1086                 new_argv[a++] = NULL;
1087         }
1088         assert(n == a);
1089         return (new_argv);
1090 }
1091 
1092 /*
1093  * Prepare argv array for exec'd process; if we're passing commands to the
1094  * new process, then use su(1M) to do the invocation.  Otherwise, use
1095  * 'login -z <from_zonename> -f' (-z is an undocumented option which tells
1096  * login that we're coming from another zone, and to disregard its CONSOLE
1097  * checks).
1098  */
1099 static char **
1100 prep_args(brand_handle_t bh, const char *login, char **argv)
1101 {
1102         int argc = 0, a = 0, i, n = -1;
1103         char **new_argv;
1104 
1105         if (argv != NULL) {
1106                 size_t subshell_len = 1;
1107                 char *subshell;
1108 
1109                 while (argv[argc] != NULL)
1110                         argc++;
1111 
1112                 for (i = 0; i < argc; i++) {
1113                         subshell_len += strlen(argv[i]) + 1;
1114                 }
1115                 if ((subshell = calloc(1, subshell_len)) == NULL)
1116                         return (NULL);
1117 
1118                 for (i = 0; i < argc; i++) {
1119                         (void) strcat(subshell, argv[i]);
1120                         (void) strcat(subshell, " ");
1121                 }
1122 
1123                 if (failsafe) {
1124                         n = 4;
1125                         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1126                                 return (NULL);
1127 
1128                         new_argv[a++] = FAILSAFESHELL;
1129                 } else {
1130                         n = 5;
1131                         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1132                                 return (NULL);
1133 
1134                         new_argv[a++] = SUPATH;
1135                         if (strcmp(login, "root") != 0) {
1136                                 new_argv[a++] = "-";
1137                                 n++;
1138                         }
1139                         new_argv[a++] = (char *)login;
1140                 }
1141                 new_argv[a++] = "-c";
1142                 new_argv[a++] = subshell;
1143                 new_argv[a++] = NULL;
1144                 assert(a == n);
1145         } else {
1146                 if (failsafe) {
1147                         n = 2;
1148                         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1149                                 return (NULL);
1150                         new_argv[a++] = FAILSAFESHELL;
1151                         new_argv[a++] = NULL;
1152                         assert(n == a);
1153                 } else {
1154                         new_argv = zone_login_cmd(bh, login);
1155                 }
1156         }
1157 
1158         return (new_argv);
1159 }
1160 
1161 /*
1162  * Helper routine for prep_env below.
1163  */
1164 static char *
1165 add_env(char *name, char *value)
1166 {
1167         size_t sz = strlen(name) + strlen(value) + 2; /* name, =, value, NUL */
1168         char *str;
1169 
1170         if ((str = malloc(sz)) == NULL)
1171                 return (NULL);
1172 
1173         (void) snprintf(str, sz, "%s=%s", name, value);
1174         return (str);
1175 }
1176 
1177 /*
1178  * Prepare envp array for exec'd process.
1179  */
1180 static char **
1181 prep_env()
1182 {
1183         int e = 0, size = 1;
1184         char **new_env, *estr;
1185         char *term = getenv("TERM");
1186 
1187         size++; /* for $PATH */
1188         if (term != NULL)
1189                 size++;
1190 
1191         /*
1192          * In failsafe mode we set $HOME, since '-l' isn't valid in this mode.
1193          * We also set $SHELL, since neither login nor su will be around to do
1194          * it.
1195          */
1196         if (failsafe)
1197                 size += 2;
1198 
1199         if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1200                 return (NULL);
1201 
1202         if ((estr = add_env("PATH", DEF_PATH)) == NULL)
1203                 return (NULL);
1204         new_env[e++] = estr;
1205 
1206         if (term != NULL) {
1207                 if ((estr = add_env("TERM", term)) == NULL)
1208                         return (NULL);
1209                 new_env[e++] = estr;
1210         }
1211 
1212         if (failsafe) {
1213                 if ((estr = add_env("HOME", "/")) == NULL)
1214                         return (NULL);
1215                 new_env[e++] = estr;
1216 
1217                 if ((estr = add_env("SHELL", FAILSAFESHELL)) == NULL)
1218                         return (NULL);
1219                 new_env[e++] = estr;
1220         }
1221 
1222         new_env[e++] = NULL;
1223 
1224         assert(e == size);
1225 
1226         return (new_env);
1227 }
1228 
1229 /*
1230  * Finish the preparation of the envp array for exec'd non-interactive
1231  * zlogins.  This is called in the child process *after* we zone_enter(), since
1232  * it derives things we can only know within the zone, such as $HOME, $SHELL,
1233  * etc.  We need only do this in the non-interactive, mode, since otherwise
1234  * login(1) will do it.  We don't do this in failsafe mode, since it presents
1235  * additional ways in which the command could fail, and we'd prefer to avoid
1236  * that.
1237  */
1238 static char **
1239 prep_env_noninteractive(const char *user_cmd, char **env)
1240 {
1241         size_t size;
1242         char **new_env;
1243         int e, i;
1244         char *estr;
1245         char varmail[LOGNAME_MAX + 11]; /* strlen(/var/mail/) = 10, NUL */
1246         char pwbuf[NSS_BUFLEN_PASSWD + 1];
1247         struct passwd pwent;
1248         struct passwd *pw = NULL;
1249 
1250         assert(env != NULL);
1251         assert(failsafe == 0);
1252 
1253         /*
1254          * Exec the "user_cmd" brand hook to get a pwent for the
1255          * login user.  If this fails, HOME will be set to "/", SHELL
1256          * will be set to $DEFAULTSHELL, and we will continue to exec
1257          * SUPATH <login> -c <cmd>.
1258          */
1259         pw = zone_get_user_pw(user_cmd, &pwent, pwbuf, sizeof (pwbuf));
1260 
1261         /*
1262          * Get existing envp size.
1263          */
1264         for (size = 0; env[size] != NULL; size++)
1265                 ;
1266 
1267         e = size;
1268 
1269         /*
1270          * Finish filling out the environment; we duplicate the environment
1271          * setup described in login(1), for lack of a better precedent.
1272          */
1273         if (pw != NULL)
1274                 size += 3;      /* LOGNAME, HOME, MAIL */
1275         else
1276                 size += 1;      /* HOME */
1277 
1278         size++; /* always fill in SHELL */
1279         size++; /* terminating NULL */
1280 
1281         if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1282                 goto malloc_fail;
1283 
1284         /*
1285          * Copy existing elements of env into new_env.
1286          */
1287         for (i = 0; env[i] != NULL; i++) {
1288                 if ((new_env[i] = strdup(env[i])) == NULL)
1289                         goto malloc_fail;
1290         }
1291         assert(e == i);
1292 
1293         if (pw != NULL) {
1294                 if ((estr = add_env("LOGNAME", pw->pw_name)) == NULL)
1295                         goto malloc_fail;
1296                 new_env[e++] = estr;
1297 
1298                 if ((estr = add_env("HOME", pw->pw_dir)) == NULL)
1299                         goto malloc_fail;
1300                 new_env[e++] = estr;
1301 
1302                 if (chdir(pw->pw_dir) != 0)
1303                         zerror(gettext("Could not chdir to home directory "
1304                             "%s: %s"), pw->pw_dir, strerror(errno));
1305 
1306                 (void) snprintf(varmail, sizeof (varmail), "/var/mail/%s",
1307                     pw->pw_name);
1308                 if ((estr = add_env("MAIL", varmail)) == NULL)
1309                         goto malloc_fail;
1310                 new_env[e++] = estr;
1311         } else {
1312                 if ((estr = add_env("HOME", "/")) == NULL)
1313                         goto malloc_fail;
1314                 new_env[e++] = estr;
1315         }
1316 
1317         if (pw != NULL && strlen(pw->pw_shell) > 0) {
1318                 if ((estr = add_env("SHELL", pw->pw_shell)) == NULL)
1319                         goto malloc_fail;
1320                 new_env[e++] = estr;
1321         } else {
1322                 if ((estr = add_env("SHELL", DEFAULTSHELL)) == NULL)
1323                         goto malloc_fail;
1324                 new_env[e++] = estr;
1325         }
1326 
1327         new_env[e++] = NULL;    /* add terminating NULL */
1328 
1329         assert(e == size);
1330         return (new_env);
1331 
1332 malloc_fail:
1333         zperror(gettext("failed to allocate memory for process environment"));
1334         return (NULL);
1335 }
1336 
1337 static int
1338 close_func(void *slavefd, int fd)
1339 {
1340         if (fd != *(int *)slavefd)
1341                 (void) close(fd);
1342         return (0);
1343 }
1344 
1345 static void
1346 set_cmdchar(char *cmdcharstr)
1347 {
1348         char c;
1349         long lc;
1350 
1351         if ((c = *cmdcharstr) != '\\') {
1352                 cmdchar = c;
1353                 return;
1354         }
1355 
1356         c = cmdcharstr[1];
1357         if (c == '\0' || c == '\\') {
1358                 cmdchar = '\\';
1359                 return;
1360         }
1361 
1362         if (c < '0' || c > '7') {
1363                 zerror(gettext("Unrecognized escape character option %s"),
1364                     cmdcharstr);
1365                 usage();
1366         }
1367 
1368         lc = strtol(cmdcharstr + 1, NULL, 8);
1369         if (lc < 0 || lc > 255) {
1370                 zerror(gettext("Octal escape character '%s' too large"),
1371                     cmdcharstr);
1372                 usage();
1373         }
1374         cmdchar = (char)lc;
1375 }
1376 
1377 static int
1378 setup_utmpx(char *slavename)
1379 {
1380         struct utmpx ut;
1381 
1382         bzero(&ut, sizeof (ut));
1383         (void) strncpy(ut.ut_user, ".zlogin", sizeof (ut.ut_user));
1384         (void) strncpy(ut.ut_line, slavename, sizeof (ut.ut_line));
1385         ut.ut_pid = getpid();
1386         ut.ut_id[0] = 'z';
1387         ut.ut_id[1] = ut.ut_id[2] = ut.ut_id[3] = (char)SC_WILDC;
1388         ut.ut_type = LOGIN_PROCESS;
1389         (void) time(&ut.ut_tv.tv_sec);
1390 
1391         if (makeutx(&ut) == NULL) {
1392                 zerror(gettext("makeutx failed"));
1393                 return (-1);
1394         }
1395         return (0);
1396 }
1397 
1398 static void
1399 release_lock_file(int lockfd)
1400 {
1401         (void) close(lockfd);
1402 }
1403 
1404 static int
1405 grab_lock_file(const char *zone_name, int *lockfd)
1406 {
1407         char pathbuf[PATH_MAX];
1408         struct flock flock;
1409 
1410         if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
1411                 zerror(gettext("could not mkdir %s: %s"), ZONES_TMPDIR,
1412                     strerror(errno));
1413                 return (-1);
1414         }
1415         (void) chmod(ZONES_TMPDIR, S_IRWXU);
1416         (void) snprintf(pathbuf, sizeof (pathbuf), "%s/%s.zoneadm.lock",
1417             ZONES_TMPDIR, zone_name);
1418 
1419         if ((*lockfd = open(pathbuf, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR)) < 0) {
1420                 zerror(gettext("could not open %s: %s"), pathbuf,
1421                     strerror(errno));
1422                 return (-1);
1423         }
1424         /*
1425          * Lock the file to synchronize with other zoneadmds
1426          */
1427         flock.l_type = F_WRLCK;
1428         flock.l_whence = SEEK_SET;
1429         flock.l_start = (off_t)0;
1430         flock.l_len = (off_t)0;
1431         if (fcntl(*lockfd, F_SETLKW, &flock) < 0) {
1432                 zerror(gettext("unable to lock %s: %s"), pathbuf,
1433                     strerror(errno));
1434                 release_lock_file(*lockfd);
1435                 return (-1);
1436         }
1437         return (Z_OK);
1438 }
1439 
1440 static int
1441 start_zoneadmd(const char *zone_name)
1442 {
1443         pid_t retval;
1444         int pstatus = 0, error = -1, lockfd, doorfd;
1445         struct door_info info;
1446         char doorpath[MAXPATHLEN];
1447 
1448         (void) snprintf(doorpath, sizeof (doorpath), ZONE_DOOR_PATH, zone_name);
1449 
1450         if (grab_lock_file(zone_name, &lockfd) != Z_OK)
1451                 return (-1);
1452         /*
1453          * We must do the door check with the lock held.  Otherwise, we
1454          * might race against another zoneadm/zlogin process and wind
1455          * up with two processes trying to start zoneadmd at the same
1456          * time.  zoneadmd will detect this, and fail, but we prefer this
1457          * to be as seamless as is practical, from a user perspective.
1458          */
1459         if ((doorfd = open(doorpath, O_RDONLY)) < 0) {
1460                 if (errno != ENOENT) {
1461                         zerror("failed to open %s: %s", doorpath,
1462                             strerror(errno));
1463                         goto out;
1464                 }
1465         } else {
1466                 /*
1467                  * Seems to be working ok.
1468                  */
1469                 if (door_info(doorfd, &info) == 0 &&
1470                     ((info.di_attributes & DOOR_REVOKED) == 0)) {
1471                         error = 0;
1472                         goto out;
1473                 }
1474         }
1475 
1476         if ((child_pid = fork()) == -1) {
1477                 zperror(gettext("could not fork"));
1478                 goto out;
1479         } else if (child_pid == 0) {
1480                 /* child process */
1481                 (void) execl("/usr/lib/zones/zoneadmd", "zoneadmd", "-z",
1482                     zone_name, NULL);
1483                 zperror(gettext("could not exec zoneadmd"));
1484                 _exit(1);
1485         }
1486 
1487         /* parent process */
1488         do {
1489                 retval = waitpid(child_pid, &pstatus, 0);
1490         } while (retval != child_pid);
1491         if (WIFSIGNALED(pstatus) ||
1492             (WIFEXITED(pstatus) && WEXITSTATUS(pstatus) != 0)) {
1493                 zerror(gettext("could not start %s"), "zoneadmd");
1494                 goto out;
1495         }
1496         error = 0;
1497 out:
1498         release_lock_file(lockfd);
1499         (void) close(doorfd);
1500         return (error);
1501 }
1502 
1503 static int
1504 init_template(void)
1505 {
1506         int fd;
1507         int err = 0;
1508 
1509         fd = open64(CTFS_ROOT "/process/template", O_RDWR);
1510         if (fd == -1)
1511                 return (-1);
1512 
1513         /*
1514          * zlogin doesn't do anything with the contract.
1515          * Deliver no events, don't inherit, and allow it to be orphaned.
1516          */
1517         err |= ct_tmpl_set_critical(fd, 0);
1518         err |= ct_tmpl_set_informative(fd, 0);
1519         err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
1520         err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
1521         if (err || ct_tmpl_activate(fd)) {
1522                 (void) close(fd);
1523                 return (-1);
1524         }
1525 
1526         return (fd);
1527 }
1528 
1529 static int
1530 noninteractive_login(char *zonename, const char *user_cmd, zoneid_t zoneid,
1531     char **new_args, char **new_env)
1532 {
1533         pid_t retval;
1534         int stdin_pipe[2], stdout_pipe[2], stderr_pipe[2], dead_child_pipe[2];
1535         int child_status;
1536         int tmpl_fd;
1537         sigset_t block_cld;
1538 
1539         if ((tmpl_fd = init_template()) == -1) {
1540                 reset_tty();
1541                 zperror(gettext("could not create contract"));
1542                 return (1);
1543         }
1544 
1545         if (pipe(stdin_pipe) != 0) {
1546                 zperror(gettext("could not create STDIN pipe"));
1547                 return (1);
1548         }
1549         /*
1550          * When the user types ^D, we get a zero length message on STDIN.
1551          * We need to echo that down the pipe to send it to the other side;
1552          * but by default, pipes don't propagate zero-length messages.  We
1553          * toggle that behavior off using I_SWROPT.  See streamio(7i).
1554          */
1555         if (ioctl(stdin_pipe[0], I_SWROPT, SNDZERO) != 0) {
1556                 zperror(gettext("could not configure STDIN pipe"));
1557                 return (1);
1558 
1559         }
1560         if (pipe(stdout_pipe) != 0) {
1561                 zperror(gettext("could not create STDOUT pipe"));
1562                 return (1);
1563         }
1564         if (pipe(stderr_pipe) != 0) {
1565                 zperror(gettext("could not create STDERR pipe"));
1566                 return (1);
1567         }
1568 
1569         if (pipe(dead_child_pipe) != 0) {
1570                 zperror(gettext("could not create signalling pipe"));
1571                 return (1);
1572         }
1573         close_on_sig = dead_child_pipe[0];
1574 
1575         /*
1576          * If any of the pipe FD's winds up being less than STDERR, then we
1577          * have a mess on our hands-- and we are lacking some of the I/O
1578          * streams we would expect anyway.  So we bail.
1579          */
1580         if (stdin_pipe[0] <= STDERR_FILENO ||
1581             stdin_pipe[1] <= STDERR_FILENO ||
1582             stdout_pipe[0] <= STDERR_FILENO ||
1583             stdout_pipe[1] <= STDERR_FILENO ||
1584             stderr_pipe[0] <= STDERR_FILENO ||
1585             stderr_pipe[1] <= STDERR_FILENO ||
1586             dead_child_pipe[0] <= STDERR_FILENO ||
1587             dead_child_pipe[1] <= STDERR_FILENO) {
1588                 zperror(gettext("process lacks valid STDIN, STDOUT, STDERR"));
1589                 return (1);
1590         }
1591 
1592         if (prefork_dropprivs() != 0) {
1593                 zperror(gettext("could not allocate privilege set"));
1594                 return (1);
1595         }
1596 
1597         (void) sigset(SIGCLD, sigcld);
1598         (void) sigemptyset(&block_cld);
1599         (void) sigaddset(&block_cld, SIGCLD);
1600         (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1601 
1602         if ((child_pid = fork()) == -1) {
1603                 (void) ct_tmpl_clear(tmpl_fd);
1604                 (void) close(tmpl_fd);
1605                 zperror(gettext("could not fork"));
1606                 return (1);
1607         } else if (child_pid == 0) { /* child process */
1608                 (void) ct_tmpl_clear(tmpl_fd);
1609 
1610                 /*
1611                  * Do a dance to get the pipes hooked up as FD's 0, 1 and 2.
1612                  */
1613                 (void) close(STDIN_FILENO);
1614                 (void) close(STDOUT_FILENO);
1615                 (void) close(STDERR_FILENO);
1616                 (void) dup2(stdin_pipe[1], STDIN_FILENO);
1617                 (void) dup2(stdout_pipe[1], STDOUT_FILENO);
1618                 (void) dup2(stderr_pipe[1], STDERR_FILENO);
1619                 (void) closefrom(STDERR_FILENO + 1);
1620 
1621                 (void) sigset(SIGCLD, SIG_DFL);
1622                 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1623                 /*
1624                  * In case any of stdin, stdout or stderr are streams,
1625                  * anchor them to prevent malicious I_POPs.
1626                  */
1627                 (void) ioctl(STDIN_FILENO, I_ANCHOR);
1628                 (void) ioctl(STDOUT_FILENO, I_ANCHOR);
1629                 (void) ioctl(STDERR_FILENO, I_ANCHOR);
1630 
1631                 if (zone_enter(zoneid) == -1) {
1632                         zerror(gettext("could not enter zone %s: %s"),
1633                             zonename, strerror(errno));
1634                         _exit(1);
1635                 }
1636 
1637                 /*
1638                  * For non-native zones, tell libc where it can find locale
1639                  * specific getttext() messages.
1640                  */
1641                 if (access("/.SUNWnative/usr/lib/locale", R_OK) == 0)
1642                         (void) bindtextdomain(TEXT_DOMAIN,
1643                             "/.SUNWnative/usr/lib/locale");
1644                 else if (access("/native/usr/lib/locale", R_OK) == 0)
1645                         (void) bindtextdomain(TEXT_DOMAIN,
1646                             "/native/usr/lib/locale");
1647 
1648                 if (!failsafe)
1649                         new_env = prep_env_noninteractive(user_cmd, new_env);
1650 
1651                 if (new_env == NULL) {
1652                         _exit(1);
1653                 }
1654 
1655                 /*
1656                  * Move into a new process group; the zone_enter will have
1657                  * placed us into zsched's session, and we want to be in
1658                  * a unique process group.
1659                  */
1660                 (void) setpgid(getpid(), getpid());
1661 
1662                 /*
1663                  * The child needs to run as root to
1664                  * execute the su program.
1665                  */
1666                 if (setuid(0) == -1) {
1667                         zperror(gettext("insufficient privilege"));
1668                         return (1);
1669                 }
1670 
1671                 (void) execve(new_args[0], new_args, new_env);
1672                 zperror(gettext("exec failure"));
1673                 _exit(1);
1674         }
1675         /* parent */
1676 
1677         /* close pipe sides written by child */
1678         (void) close(stdout_pipe[1]);
1679         (void) close(stderr_pipe[1]);
1680 
1681         (void) sigset(SIGINT, sig_forward);
1682 
1683         postfork_dropprivs();
1684 
1685         (void) ct_tmpl_clear(tmpl_fd);
1686         (void) close(tmpl_fd);
1687 
1688         (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1689         doio(stdin_pipe[0], stdin_pipe[1], stdout_pipe[0], stderr_pipe[0],
1690             dead_child_pipe[1], B_TRUE);
1691         do {
1692                 retval = waitpid(child_pid, &child_status, 0);
1693                 if (retval == -1) {
1694                         child_status = 0;
1695                 }
1696         } while (retval != child_pid && errno != ECHILD);
1697 
1698         return (WEXITSTATUS(child_status));
1699 }
1700 
1701 static char *
1702 get_username()
1703 {
1704         uid_t   uid;
1705         struct passwd *nptr;
1706 
1707         /*
1708          * Authorizations are checked to restrict access based on the
1709          * requested operation and zone name, It is assumed that the
1710          * program is running with all privileges, but that the real
1711          * user ID is that of the user or role on whose behalf we are
1712          * operating. So we start by getting the username that will be
1713          * used for subsequent authorization checks.
1714          */
1715 
1716         uid = getuid();
1717         if ((nptr = getpwuid(uid)) == NULL) {
1718                 zerror(gettext("could not get user name."));
1719                 _exit(1);
1720         }
1721         return (nptr->pw_name);
1722 }
1723 
1724 int
1725 main(int argc, char **argv)
1726 {
1727         int arg, console = 0;
1728         zoneid_t zoneid;
1729         zone_state_t st;
1730         char *login = "root";
1731         int lflag = 0;
1732         char *zonename = NULL;
1733         char **proc_args = NULL;
1734         char **new_args, **new_env;
1735         sigset_t block_cld;
1736         char devroot[MAXPATHLEN];
1737         char *slavename, slaveshortname[MAXPATHLEN];
1738         priv_set_t *privset;
1739         int tmpl_fd;
1740         char zonebrand[MAXNAMELEN];
1741         char default_brand[MAXNAMELEN];
1742         struct stat sb;
1743         char kernzone[ZONENAME_MAX];
1744         brand_handle_t bh;
1745         char user_cmd[MAXPATHLEN];
1746         char authname[MAXAUTHS];
1747 
1748         (void) setlocale(LC_ALL, "");
1749         (void) textdomain(TEXT_DOMAIN);
1750 
1751         (void) getpname(argv[0]);
1752         username = get_username();
1753 
1754         while ((arg = getopt(argc, argv, "ECR:Se:l:Q")) != EOF) {
1755                 switch (arg) {
1756                 case 'C':
1757                         console = 1;
1758                         break;
1759                 case 'E':
1760                         nocmdchar = 1;
1761                         break;
1762                 case 'R':       /* undocumented */
1763                         if (*optarg != '/') {
1764                                 zerror(gettext("root path must be absolute."));
1765                                 exit(2);
1766                         }
1767                         if (stat(optarg, &sb) == -1 || !S_ISDIR(sb.st_mode)) {
1768                                 zerror(
1769                                     gettext("root path must be a directory."));
1770                                 exit(2);
1771                         }
1772                         zonecfg_set_root(optarg);
1773                         break;
1774                 case 'Q':
1775                         quiet = 1;
1776                         break;
1777                 case 'S':
1778                         failsafe = 1;
1779                         break;
1780                 case 'e':
1781                         set_cmdchar(optarg);
1782                         break;
1783                 case 'l':
1784                         login = optarg;
1785                         lflag = 1;
1786                         break;
1787                 default:
1788                         usage();
1789                 }
1790         }
1791 
1792         if (console != 0 && lflag != 0) {
1793                 zerror(gettext("-l may not be specified for console login"));
1794                 usage();
1795         }
1796 
1797         if (console != 0 && failsafe != 0) {
1798                 zerror(gettext("-S may not be specified for console login"));
1799                 usage();
1800         }
1801 
1802         if (console != 0 && zonecfg_in_alt_root()) {
1803                 zerror(gettext("-R may not be specified for console login"));
1804                 exit(2);
1805         }
1806 
1807         if (failsafe != 0 && lflag != 0) {
1808                 zerror(gettext("-l may not be specified for failsafe login"));
1809                 usage();
1810         }
1811 
1812         if (optind == (argc - 1)) {
1813                 /*
1814                  * zone name, no process name; this should be an interactive
1815                  * as long as STDIN is really a tty.
1816                  */
1817                 if (isatty(STDIN_FILENO))
1818                         interactive = 1;
1819                 zonename = argv[optind];
1820         } else if (optind < (argc - 1)) {
1821                 if (console) {
1822                         zerror(gettext("Commands may not be specified for "
1823                             "console login."));
1824                         usage();
1825                 }
1826                 /* zone name and process name, and possibly some args */
1827                 zonename = argv[optind];
1828                 proc_args = &argv[optind + 1];
1829                 interactive = 0;
1830         } else {
1831                 usage();
1832         }
1833 
1834         if (getzoneid() != GLOBAL_ZONEID) {
1835                 zerror(gettext("'%s' may only be used from the global zone"),
1836                     pname);
1837                 return (1);
1838         }
1839 
1840         if (strcmp(zonename, GLOBAL_ZONENAME) == 0) {
1841                 zerror(gettext("'%s' not applicable to the global zone"),
1842                     pname);
1843                 return (1);
1844         }
1845 
1846         if (zone_get_state(zonename, &st) != Z_OK) {
1847                 zerror(gettext("zone '%s' unknown"), zonename);
1848                 return (1);
1849         }
1850 
1851         if (st < ZONE_STATE_INSTALLED) {
1852                 zerror(gettext("cannot login to a zone which is '%s'"),
1853                     zone_state_str(st));
1854                 return (1);
1855         }
1856 
1857         /*
1858          * In both console and non-console cases, we require all privs.
1859          * In the console case, because we may need to startup zoneadmd.
1860          * In the non-console case in order to do zone_enter(2), zonept()
1861          * and other tasks.
1862          */
1863 
1864         if ((privset = priv_allocset()) == NULL) {
1865                 zperror(gettext("priv_allocset failed"));
1866                 return (1);
1867         }
1868 
1869         if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
1870                 zperror(gettext("getppriv failed"));
1871                 priv_freeset(privset);
1872                 return (1);
1873         }
1874 
1875         if (priv_isfullset(privset) == B_FALSE) {
1876                 zerror(gettext("You lack sufficient privilege to run "
1877                     "this command (all privs required)"));
1878                 priv_freeset(privset);
1879                 return (1);
1880         }
1881         priv_freeset(privset);
1882 
1883         /*
1884          * Check if user is authorized for requested usage of the zone
1885          */
1886 
1887         (void) snprintf(authname, MAXAUTHS, "%s%s%s",
1888             ZONE_MANAGE_AUTH, KV_OBJECT, zonename);
1889         if (chkauthattr(authname, username) == 0) {
1890                 if (console) {
1891                         zerror(gettext("%s is not authorized for console "
1892                             "access to  %s zone."),
1893                             username, zonename);
1894                         return (1);
1895                 } else {
1896                         (void) snprintf(authname, MAXAUTHS, "%s%s%s",
1897                             ZONE_LOGIN_AUTH, KV_OBJECT, zonename);
1898                         if (failsafe || !interactive) {
1899                                 zerror(gettext("%s is not authorized for  "
1900                                     "failsafe or non-interactive login "
1901                                     "to  %s zone."), username, zonename);
1902                                 return (1);
1903                         } else if (chkauthattr(authname, username) == 0) {
1904                                 zerror(gettext("%s is not authorized "
1905                                     " to login to %s zone."),
1906                                     username, zonename);
1907                                 return (1);
1908                         }
1909                 }
1910         } else {
1911                 forced_login = B_TRUE;
1912         }
1913 
1914         /*
1915          * The console is a separate case from the rest of the code; handle
1916          * it first.
1917          */
1918         if (console) {
1919                 /*
1920                  * Ensure that zoneadmd for this zone is running.
1921                  */
1922                 if (start_zoneadmd(zonename) == -1)
1923                         return (1);
1924 
1925                 /*
1926                  * Make contact with zoneadmd.
1927                  */
1928                 if (get_console_master(zonename) == -1)
1929                         return (1);
1930 
1931                 if (!quiet)
1932                         (void) printf(
1933                             gettext("[Connected to zone '%s' console]\n"),
1934                             zonename);
1935 
1936                 if (set_tty_rawmode(STDIN_FILENO) == -1) {
1937                         reset_tty();
1938                         zperror(gettext("failed to set stdin pty to raw mode"));
1939                         return (1);
1940                 }
1941 
1942                 (void) sigset(SIGWINCH, sigwinch);
1943                 (void) sigwinch(0);
1944 
1945                 /*
1946                  * Run the I/O loop until we get disconnected.
1947                  */
1948                 doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
1949                 reset_tty();
1950                 if (!quiet)
1951                         (void) printf(
1952                             gettext("\n[Connection to zone '%s' console "
1953                             "closed]\n"), zonename);
1954 
1955                 return (0);
1956         }
1957 
1958         if (st != ZONE_STATE_RUNNING && st != ZONE_STATE_MOUNTED) {
1959                 zerror(gettext("login allowed only to running zones "
1960                     "(%s is '%s')."), zonename, zone_state_str(st));
1961                 return (1);
1962         }
1963 
1964         (void) strlcpy(kernzone, zonename, sizeof (kernzone));
1965         if (zonecfg_in_alt_root()) {
1966                 FILE *fp = zonecfg_open_scratch("", B_FALSE);
1967 
1968                 if (fp == NULL || zonecfg_find_scratch(fp, zonename,
1969                     zonecfg_get_root(), kernzone, sizeof (kernzone)) == -1) {
1970                         zerror(gettext("cannot find scratch zone %s"),
1971                             zonename);
1972                         if (fp != NULL)
1973                                 zonecfg_close_scratch(fp);
1974                         return (1);
1975                 }
1976                 zonecfg_close_scratch(fp);
1977         }
1978 
1979         if ((zoneid = getzoneidbyname(kernzone)) == -1) {
1980                 zerror(gettext("failed to get zoneid for zone '%s'"),
1981                     zonename);
1982                 return (1);
1983         }
1984 
1985         /*
1986          * We need the zone root path only if we are setting up a pty.
1987          */
1988         if (zone_get_devroot(zonename, devroot, sizeof (devroot)) == -1) {
1989                 zerror(gettext("could not get dev path for zone %s"),
1990                     zonename);
1991                 return (1);
1992         }
1993 
1994         if (zone_get_brand(zonename, zonebrand, sizeof (zonebrand)) != Z_OK) {
1995                 zerror(gettext("could not get brand for zone %s"), zonename);
1996                 return (1);
1997         }
1998         /*
1999          * In the alternate root environment, the only supported
2000          * operations are mount and unmount.  In this case, just treat
2001          * the zone as native if it is cluster.  Cluster zones can be
2002          * native for the purpose of LU or upgrade, and the cluster
2003          * brand may not exist in the miniroot (such as in net install
2004          * upgrade).
2005          */
2006         if (zonecfg_default_brand(default_brand,
2007             sizeof (default_brand)) != Z_OK) {
2008                 zerror(gettext("unable to determine default brand"));
2009                 return (1);
2010         }
2011         if (zonecfg_in_alt_root() &&
2012             strcmp(zonebrand, CLUSTER_BRAND_NAME) == 0) {
2013                 (void) strlcpy(zonebrand, default_brand, sizeof (zonebrand));
2014         }
2015 
2016         if ((bh = brand_open(zonebrand)) == NULL) {
2017                 zerror(gettext("could not open brand for zone %s"), zonename);
2018                 return (1);
2019         }
2020 
2021         if ((new_args = prep_args(bh, login, proc_args)) == NULL) {
2022                 zperror(gettext("could not assemble new arguments"));
2023                 brand_close(bh);
2024                 return (1);
2025         }
2026         /*
2027          * Get the brand specific user_cmd.  This command is used to get
2028          * a passwd(4) entry for login.
2029          */
2030         if (!interactive && !failsafe) {
2031                 if (zone_get_user_cmd(bh, login, user_cmd,
2032                     sizeof (user_cmd)) == NULL) {
2033                         zerror(gettext("could not get user_cmd for zone %s"),
2034                             zonename);
2035                         brand_close(bh);
2036                         return (1);
2037                 }
2038         }
2039         brand_close(bh);
2040 
2041         if ((new_env = prep_env()) == NULL) {
2042                 zperror(gettext("could not assemble new environment"));
2043                 return (1);
2044         }
2045 
2046         if (!interactive)
2047                 return (noninteractive_login(zonename, user_cmd, zoneid,
2048                     new_args, new_env));
2049 
2050         if (zonecfg_in_alt_root()) {
2051                 zerror(gettext("cannot use interactive login with scratch "
2052                     "zone"));
2053                 return (1);
2054         }
2055 
2056         /*
2057          * Things are more complex in interactive mode; we get the
2058          * master side of the pty, then place the user's terminal into
2059          * raw mode.
2060          */
2061         if (get_master_pty() == -1) {
2062                 zerror(gettext("could not setup master pty device"));
2063                 return (1);
2064         }
2065 
2066         /*
2067          * Compute the "short name" of the pts.  /dev/pts/2 --> pts/2
2068          */
2069         if ((slavename = ptsname(masterfd)) == NULL) {
2070                 zperror(gettext("failed to get name for pseudo-tty"));
2071                 return (1);
2072         }
2073         if (strncmp(slavename, "/dev/", strlen("/dev/")) == 0)
2074                 (void) strlcpy(slaveshortname, slavename + strlen("/dev/"),
2075                     sizeof (slaveshortname));
2076         else
2077                 (void) strlcpy(slaveshortname, slavename,
2078                     sizeof (slaveshortname));
2079 
2080         if (!quiet)
2081                 (void) printf(gettext("[Connected to zone '%s' %s]\n"),
2082                     zonename, slaveshortname);
2083 
2084         if (set_tty_rawmode(STDIN_FILENO) == -1) {
2085                 reset_tty();
2086                 zperror(gettext("failed to set stdin pty to raw mode"));
2087                 return (1);
2088         }
2089 
2090         if (prefork_dropprivs() != 0) {
2091                 reset_tty();
2092                 zperror(gettext("could not allocate privilege set"));
2093                 return (1);
2094         }
2095 
2096         /*
2097          * We must mask SIGCLD until after we have coped with the fork
2098          * sufficiently to deal with it; otherwise we can race and receive the
2099          * signal before child_pid has been initialized (yes, this really
2100          * happens).
2101          */
2102         (void) sigset(SIGCLD, sigcld);
2103         (void) sigemptyset(&block_cld);
2104         (void) sigaddset(&block_cld, SIGCLD);
2105         (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
2106 
2107         /*
2108          * We activate the contract template at the last minute to
2109          * avoid intermediate functions that could be using fork(2)
2110          * internally.
2111          */
2112         if ((tmpl_fd = init_template()) == -1) {
2113                 reset_tty();
2114                 zperror(gettext("could not create contract"));
2115                 return (1);
2116         }
2117 
2118         if ((child_pid = fork()) == -1) {
2119                 (void) ct_tmpl_clear(tmpl_fd);
2120                 reset_tty();
2121                 zperror(gettext("could not fork"));
2122                 return (1);
2123         } else if (child_pid == 0) { /* child process */
2124                 int slavefd, newslave;
2125 
2126                 (void) ct_tmpl_clear(tmpl_fd);
2127                 (void) close(tmpl_fd);
2128 
2129                 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2130 
2131                 if ((slavefd = init_slave_pty(zoneid, devroot)) == -1)
2132                         return (1);
2133 
2134                 /*
2135                  * Close all fds except for the slave pty.
2136                  */
2137                 (void) fdwalk(close_func, &slavefd);
2138 
2139                 /*
2140                  * Temporarily dup slavefd to stderr; that way if we have
2141                  * to print out that zone_enter failed, the output will
2142                  * have somewhere to go.
2143                  */
2144                 if (slavefd != STDERR_FILENO)
2145                         (void) dup2(slavefd, STDERR_FILENO);
2146 
2147                 if (zone_enter(zoneid) == -1) {
2148                         zerror(gettext("could not enter zone %s: %s"),
2149                             zonename, strerror(errno));
2150                         return (1);
2151                 }
2152 
2153                 if (slavefd != STDERR_FILENO)
2154                         (void) close(STDERR_FILENO);
2155 
2156                 /*
2157                  * We take pains to get this process into a new process
2158                  * group, and subsequently a new session.  In this way,
2159                  * we'll have a session which doesn't yet have a controlling
2160                  * terminal.  When we open the slave, it will become the
2161                  * controlling terminal; no PIDs concerning pgrps or sids
2162                  * will leak inappropriately into the zone.
2163                  */
2164                 (void) setpgrp();
2165 
2166                 /*
2167                  * We need the slave pty to be referenced from the zone's
2168                  * /dev in order to ensure that the devt's, etc are all
2169                  * correct.  Otherwise we break ttyname and the like.
2170                  */
2171                 if ((newslave = open(slavename, O_RDWR)) == -1) {
2172                         (void) close(slavefd);
2173                         return (1);
2174                 }
2175                 (void) close(slavefd);
2176                 slavefd = newslave;
2177 
2178                 /*
2179                  * dup the slave to the various FDs, so that when the
2180                  * spawned process does a write/read it maps to the slave
2181                  * pty.
2182                  */
2183                 (void) dup2(slavefd, STDIN_FILENO);
2184                 (void) dup2(slavefd, STDOUT_FILENO);
2185                 (void) dup2(slavefd, STDERR_FILENO);
2186                 if (slavefd != STDIN_FILENO && slavefd != STDOUT_FILENO &&
2187                     slavefd != STDERR_FILENO) {
2188                         (void) close(slavefd);
2189                 }
2190 
2191                 /*
2192                  * In failsafe mode, we don't use login(1), so don't try
2193                  * setting up a utmpx entry.
2194                  *
2195                  * A branded zone may have very different utmpx semantics.
2196                  * At the moment, we only have two brand types:
2197                  * Solaris-like (native, sn1) and Linux.  In the Solaris
2198                  * case, we know exactly how to do the necessary utmpx
2199                  * setup.  Fortunately for us, the Linux /bin/login is
2200                  * prepared to deal with a non-initialized utmpx entry, so
2201                  * we can simply skip it.  If future brands don't fall into
2202                  * either category, we'll have to add a per-brand utmpx
2203                  * setup hook.
2204                  */
2205                 if (!failsafe && (strcmp(zonebrand, "lx") != 0))
2206                         if (setup_utmpx(slaveshortname) == -1)
2207                                 return (1);
2208 
2209                 /*
2210                  * The child needs to run as root to
2211                  * execute the brand's login program.
2212                  */
2213                 if (setuid(0) == -1) {
2214                         zperror(gettext("insufficient privilege"));
2215                         return (1);
2216                 }
2217 
2218                 (void) execve(new_args[0], new_args, new_env);
2219                 zperror(gettext("exec failure"));
2220                 return (1);
2221         }
2222 
2223         (void) ct_tmpl_clear(tmpl_fd);
2224         (void) close(tmpl_fd);
2225 
2226         /*
2227          * The rest is only for the parent process.
2228          */
2229         (void) sigset(SIGWINCH, sigwinch);
2230 
2231         postfork_dropprivs();
2232 
2233         (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2234         doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
2235 
2236         reset_tty();
2237         if (!quiet)
2238                 (void) fprintf(stderr,
2239                     gettext("\n[Connection to zone '%s' %s closed]\n"),
2240                     zonename, slaveshortname);
2241 
2242         if (pollerr != 0) {
2243                 (void) fprintf(stderr, gettext("Error: connection closed due "
2244                     "to unexpected pollevents=0x%x.\n"), pollerr);
2245                 return (1);
2246         }
2247 
2248         return (0);
2249 }