1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2013 Gary Mills
  24  *
  25  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
  26  */
  27 
  28 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  29 /*        All Rights Reserved   */
  30 
  31 /*
  32  * University Copyright- Copyright (c) 1982, 1986, 1988
  33  * The Regents of the University of California
  34  * All Rights Reserved
  35  *
  36  * University Acknowledgment- Portions of this document are derived from
  37  * software developed by the University of California, Berkeley, and its
  38  * contributors.
  39  */
  40 
  41 /*
  42  * init(1M) is the general process spawning program.  Its primary job is to
  43  * start and restart svc.startd for smf(5).  For backwards-compatibility it also
  44  * spawns and respawns processes according to /etc/inittab and the current
  45  * run-level.  It reads /etc/default/inittab for general configuration.
  46  *
  47  * To change run-levels the system administrator runs init from the command
  48  * line with a level name.  init signals svc.startd via libscf and directs the
  49  * zone's init (pid 1 in the global zone) what to do by sending it a signal;
  50  * these signal numbers are commonly refered to in the code as 'states'.  Valid
  51  * run-levels are [sS0123456].  Additionally, init can be given directives
  52  * [qQabc], which indicate actions to be taken pertaining to /etc/inittab.
  53  *
  54  * When init processes inittab entries, it finds processes that are to be
  55  * spawned at various run-levels.  inittab contains the set of the levels for
  56  * which each inittab entry is valid.
  57  *
  58  * State File and Restartability
  59  *   Premature exit by init(1M) is handled as a special case by the kernel:
  60  *   init(1M) will be immediately re-executed, retaining its original PID.  (PID
  61  *   1 in the global zone.)  To track the processes it has previously spawned,
  62  *   as well as other mutable state, init(1M) regularly updates a state file
  63  *   such that its subsequent invocations have knowledge of its various
  64  *   dependent processes and duties.
  65  *
  66  * Process Contracts
  67  *   We start svc.startd(1M) in a contract and transfer inherited contracts when
  68  *   restarting it.  Everything else is started using the legacy contract
  69  *   template, and the created contracts are abandoned when they become empty.
  70  *
  71  * utmpx Entry Handling
  72  *   Because init(1M) no longer governs the startup process, its knowledge of
  73  *   when utmpx becomes writable is indirect.  However, spawned processes
  74  *   expect to be constructed with valid utmpx entries.  As a result, attempts
  75  *   to write normal entries will be retried until successful.
  76  *
  77  * Maintenance Mode
  78  *   In certain failure scenarios, init(1M) will enter a maintenance mode, in
  79  *   which it invokes sulogin(1M) to allow the operator an opportunity to
  80  *   repair the system.  Normally, this operation is performed as a
  81  *   fork(2)-exec(2)-waitpid(3C) sequence with the parent waiting for repair or
  82  *   diagnosis to be completed.  In the cases that fork(2) requests themselves
  83  *   fail, init(1M) will directly execute sulogin(1M), and allow the kernel to
  84  *   restart init(1M) on exit from the operator session.
  85  *
  86  *   One scenario where init(1M) enters its maintenance mode is when
  87  *   svc.startd(1M) begins to fail rapidly, defined as when the average time
  88  *   between recent failures drops below a given threshold.
  89  */
  90 
  91 #include <sys/contract/process.h>
  92 #include <sys/ctfs.h>
  93 #include <sys/stat.h>
  94 #include <sys/statvfs.h>
  95 #include <sys/stropts.h>
  96 #include <sys/systeminfo.h>
  97 #include <sys/time.h>
  98 #include <sys/termios.h>
  99 #include <sys/tty.h>
 100 #include <sys/types.h>
 101 #include <sys/utsname.h>
 102 
 103 #include <bsm/adt_event.h>
 104 #include <bsm/libbsm.h>
 105 #include <security/pam_appl.h>
 106 
 107 #include <assert.h>
 108 #include <ctype.h>
 109 #include <dirent.h>
 110 #include <errno.h>
 111 #include <fcntl.h>
 112 #include <libcontract.h>
 113 #include <libcontract_priv.h>
 114 #include <libintl.h>
 115 #include <libscf.h>
 116 #include <libscf_priv.h>
 117 #include <poll.h>
 118 #include <procfs.h>
 119 #include <signal.h>
 120 #include <stdarg.h>
 121 #include <stdio.h>
 122 #include <stdio_ext.h>
 123 #include <stdlib.h>
 124 #include <string.h>
 125 #include <strings.h>
 126 #include <syslog.h>
 127 #include <time.h>
 128 #include <ulimit.h>
 129 #include <unistd.h>
 130 #include <utmpx.h>
 131 #include <wait.h>
 132 #include <zone.h>
 133 #include <ucontext.h>
 134 
 135 #undef  sleep
 136 
 137 #define fioctl(p, sptr, cmd)    ioctl(fileno(p), sptr, cmd)
 138 #define min(a, b)               (((a) < (b)) ? (a) : (b))
 139 
 140 #define TRUE    1
 141 #define FALSE   0
 142 #define FAILURE -1
 143 
 144 #define UT_USER_SZ      32      /* Size of a utmpx ut_user field */
 145 #define UT_LINE_SZ      32      /* Size of a utmpx ut_line field */
 146 
 147 /*
 148  * SLEEPTIME    The number of seconds "init" sleeps between wakeups if
 149  *              nothing else requires this "init" wakeup.
 150  */
 151 #define SLEEPTIME       (5 * 60)
 152 
 153 /*
 154  * MAXCMDL      The maximum length of a command string in inittab.
 155  */
 156 #define MAXCMDL 512
 157 
 158 /*
 159  * EXEC         The length of the prefix string added to all comamnds
 160  *              found in inittab.
 161  */
 162 #define EXEC    (sizeof ("exec ") - 1)
 163 
 164 /*
 165  * TWARN        The amount of time between warning signal, SIGTERM,
 166  *              and the fatal kill signal, SIGKILL.
 167  */
 168 #define TWARN   5
 169 
 170 #define id_eq(x, y)     ((x[0] == y[0] && x[1] == y[1] && x[2] == y[2] &&\
 171                         x[3] == y[3]) ? TRUE : FALSE)
 172 
 173 /*
 174  * The kernel's default umask is 022 these days; since some processes inherit
 175  * their umask from init, init will set it from CMASK in /etc/default/init.
 176  * init gets the default umask from the kernel, it sets it to 022 whenever
 177  * it wants to create a file and reverts to CMASK afterwards.
 178  */
 179 
 180 static int cmask;
 181 
 182 /*
 183  * The following definitions, concluding with the 'lvls' array, provide a
 184  * common mapping between level-name (like 'S'), signal number (state),
 185  * run-level mask, and specific properties associated with a run-level.
 186  * This array should be accessed using the routines lvlname_to_state(),
 187  * lvlname_to_mask(), state_to_mask(), and state_to_flags().
 188  */
 189 
 190 /*
 191  * Correspondence of signals to init actions.
 192  */
 193 #define LVLQ            SIGHUP
 194 #define LVL0            SIGINT
 195 #define LVL1            SIGQUIT
 196 #define LVL2            SIGILL
 197 #define LVL3            SIGTRAP
 198 #define LVL4            SIGIOT
 199 #define LVL5            SIGEMT
 200 #define LVL6            SIGFPE
 201 #define SINGLE_USER     SIGBUS
 202 #define LVLa            SIGSEGV
 203 #define LVLb            SIGSYS
 204 #define LVLc            SIGPIPE
 205 
 206 /*
 207  * Bit Mask for each level.  Used to determine legal levels.
 208  */
 209 #define MASK0   0x0001
 210 #define MASK1   0x0002
 211 #define MASK2   0x0004
 212 #define MASK3   0x0008
 213 #define MASK4   0x0010
 214 #define MASK5   0x0020
 215 #define MASK6   0x0040
 216 #define MASKSU  0x0080
 217 #define MASKa   0x0100
 218 #define MASKb   0x0200
 219 #define MASKc   0x0400
 220 
 221 #define MASK_NUMERIC (MASK0 | MASK1 | MASK2 | MASK3 | MASK4 | MASK5 | MASK6)
 222 #define MASK_abc (MASKa | MASKb | MASKc)
 223 
 224 /*
 225  * Flags to indicate properties of various states.
 226  */
 227 #define LSEL_RUNLEVEL   0x0001  /* runlevels you can transition to */
 228 
 229 typedef struct lvl {
 230         int     lvl_state;
 231         int     lvl_mask;
 232         char    lvl_name;
 233         int     lvl_flags;
 234 } lvl_t;
 235 
 236 static lvl_t lvls[] = {
 237         { LVLQ,         0,      'Q', 0                                  },
 238         { LVLQ,         0,      'q', 0                                  },
 239         { LVL0,         MASK0,  '0', LSEL_RUNLEVEL                      },
 240         { LVL1,         MASK1,  '1', LSEL_RUNLEVEL                      },
 241         { LVL2,         MASK2,  '2', LSEL_RUNLEVEL                      },
 242         { LVL3,         MASK3,  '3', LSEL_RUNLEVEL                      },
 243         { LVL4,         MASK4,  '4', LSEL_RUNLEVEL                      },
 244         { LVL5,         MASK5,  '5', LSEL_RUNLEVEL                      },
 245         { LVL6,         MASK6,  '6', LSEL_RUNLEVEL                      },
 246         { SINGLE_USER,  MASKSU, 'S', LSEL_RUNLEVEL                      },
 247         { SINGLE_USER,  MASKSU, 's', LSEL_RUNLEVEL                      },
 248         { LVLa,         MASKa,  'a', 0                                  },
 249         { LVLb,         MASKb,  'b', 0                                  },
 250         { LVLc,         MASKc,  'c', 0                                  }
 251 };
 252 
 253 #define LVL_NELEMS (sizeof (lvls) / sizeof (lvl_t))
 254 
 255 /*
 256  * Legal action field values.
 257  */
 258 #define OFF             0       /* Kill process if on, else ignore */
 259 #define RESPAWN         1       /* Continuously restart process when it dies */
 260 #define ONDEMAND        RESPAWN /* Respawn for a, b, c type processes */
 261 #define ONCE            2       /* Start process, do not respawn when dead */
 262 #define WAIT            3       /* Perform once and wait to complete */
 263 #define BOOT            4       /* Start at boot time only */
 264 #define BOOTWAIT        5       /* Start at boot time and wait to complete */
 265 #define POWERFAIL       6       /* Start on powerfail */
 266 #define POWERWAIT       7       /* Start and wait for complete on powerfail */
 267 #define INITDEFAULT     8       /* Default level "init" should start at */
 268 #define SYSINIT         9       /* Actions performed before init speaks */
 269 
 270 #define M_OFF           0001
 271 #define M_RESPAWN       0002
 272 #define M_ONDEMAND      M_RESPAWN
 273 #define M_ONCE          0004
 274 #define M_WAIT          0010
 275 #define M_BOOT          0020
 276 #define M_BOOTWAIT      0040
 277 #define M_PF            0100
 278 #define M_PWAIT         0200
 279 #define M_INITDEFAULT   0400
 280 #define M_SYSINIT       01000
 281 
 282 /* States for the inittab parser in getcmd(). */
 283 #define ID      1
 284 #define LEVELS  2
 285 #define ACTION  3
 286 #define COMMAND 4
 287 #define COMMENT 5
 288 
 289 /*
 290  * inittab entry id constants
 291  */
 292 #define INITTAB_ENTRY_ID_SIZE 4
 293 #define INITTAB_ENTRY_ID_STR_FORMAT "%.4s"      /* if INITTAB_ENTRY_ID_SIZE */
 294                                                 /* changes, this should */
 295                                                 /* change accordingly */
 296 
 297 /*
 298  * Init can be in any of three main states, "normal" mode where it is
 299  * processing entries for the lines file in a normal fashion, "boot" mode,
 300  * where it is only interested in the boot actions, and "powerfail" mode,
 301  * where it is only interested in powerfail related actions. The following
 302  * masks declare the legal actions for each mode.
 303  */
 304 #define NORMAL_MODES    (M_OFF | M_RESPAWN | M_ONCE | M_WAIT)
 305 #define BOOT_MODES      (M_BOOT | M_BOOTWAIT)
 306 #define PF_MODES        (M_PF | M_PWAIT)
 307 
 308 struct PROC_TABLE {
 309         char    p_id[INITTAB_ENTRY_ID_SIZE];    /* Four letter unique id of */
 310                                                 /* process */
 311         pid_t   p_pid;          /* Process id */
 312         short   p_count;        /* How many respawns of this command in */
 313                                 /*   the current series */
 314         long    p_time;         /* Start time for a series of respawns */
 315         short   p_flags;
 316         short   p_exit;         /* Exit status of a process which died */
 317 };
 318 
 319 /*
 320  * Flags for the "p_flags" word of a PROC_TABLE entry:
 321  *
 322  *      OCCUPIED        This slot in init's proc table is in use.
 323  *
 324  *      LIVING          Process is alive.
 325  *
 326  *      NOCLEANUP       efork() is not allowed to cleanup this entry even
 327  *                      if process is dead.
 328  *
 329  *      NAMED           This process has a name, i.e. came from inittab.
 330  *
 331  *      DEMANDREQUEST   Process started by a "telinit [abc]" command.  Processes
 332  *                      formed this way are respawnable and immune to level
 333  *                      changes as long as their entry exists in inittab.
 334  *
 335  *      TOUCHED         Flag used by remv() to determine whether it has looked
 336  *                      at an entry while checking for processes to be killed.
 337  *
 338  *      WARNED          Flag used by remv() to mark processes that have been
 339  *                      sent the SIGTERM signal.  If they don't die in 5
 340  *                      seconds, they are sent the SIGKILL signal.
 341  *
 342  *      KILLED          Flag used by remv() to mark procs that have been sent
 343  *                      the SIGTERM and SIGKILL signals.
 344  *
 345  *      PF_MASK         Bitwise or of legal flags, for sanity checking.
 346  */
 347 #define OCCUPIED        01
 348 #define LIVING          02
 349 #define NOCLEANUP       04
 350 #define NAMED           010
 351 #define DEMANDREQUEST   020
 352 #define TOUCHED         040
 353 #define WARNED          0100
 354 #define KILLED          0200
 355 #define PF_MASK         0377
 356 
 357 /*
 358  * Respawn limits for processes that are to be respawned:
 359  *
 360  *      SPAWN_INTERVAL  The number of seconds over which "init" will try to
 361  *                      respawn a process SPAWN_LIMIT times before it gets mad.
 362  *
 363  *      SPAWN_LIMIT     The number of respawns "init" will attempt in
 364  *                      SPAWN_INTERVAL seconds before it generates an
 365  *                      error message and inhibits further tries for
 366  *                      INHIBIT seconds.
 367  *
 368  *      INHIBIT         The number of seconds "init" ignores an entry it had
 369  *                      trouble spawning unless a "telinit Q" is received.
 370  */
 371 
 372 #define SPAWN_INTERVAL  (2*60)
 373 #define SPAWN_LIMIT     10
 374 #define INHIBIT         (5*60)
 375 
 376 /*
 377  * The maximum number of decimal digits for an id_t.  (ceil(log10 (max_id)))
 378  */
 379 #define ID_MAX_STR_LEN  10
 380 
 381 #define NULLPROC        ((struct PROC_TABLE *)(0))
 382 #define NO_ROOM         ((struct PROC_TABLE *)(FAILURE))
 383 
 384 struct CMD_LINE {
 385         char c_id[INITTAB_ENTRY_ID_SIZE];       /* Four letter unique id of */
 386                                                 /* process to be affected by */
 387                                                 /* action */
 388         short c_levels; /* Mask of legal levels for process */
 389         short c_action; /* Mask for type of action required */
 390         char *c_command; /* Pointer to init command */
 391 };
 392 
 393 struct  pidrec {
 394         int     pd_type;        /* Command type */
 395         pid_t   pd_pid;         /* pid to add or remove */
 396 };
 397 
 398 /*
 399  * pd_type's
 400  */
 401 #define ADDPID  1
 402 #define REMPID  2
 403 
 404 static struct   pidlist {
 405         pid_t   pl_pid;         /* pid to watch for */
 406         int     pl_dflag;       /* Flag indicating SIGCLD from this pid */
 407         short   pl_exit;        /* Exit status of proc */
 408         struct  pidlist *pl_next; /* Next in list */
 409 } *Plhead, *Plfree;
 410 
 411 /*
 412  * The following structure contains a set of modes for /dev/syscon
 413  * and should match the default contents of /etc/ioctl.syscon.  It should also
 414  * be kept in-sync with base_termios in uts/common/io/ttcompat.c.
 415  */
 416 static struct termios   dflt_termios = {
 417         BRKINT|ICRNL|IXON|IMAXBEL,                      /* iflag */
 418         OPOST|ONLCR|TAB3,                               /* oflag */
 419         CS8|CREAD|B9600,                                /* cflag */
 420         ISIG|ICANON|ECHO|ECHOE|ECHOK|ECHOCTL|ECHOKE|IEXTEN, /* lflag */
 421         CINTR, CQUIT, CERASE, CKILL, CEOF, 0, 0, 0,
 422         0, 0, 0, 0, 0, 0, 0, 0,
 423         0, 0, 0
 424 };
 425 
 426 static struct termios   stored_syscon_termios;
 427 static int              write_ioctl = 0;        /* Rewrite /etc/ioctl.syscon */
 428 
 429 static union WAKEUP {
 430         struct WAKEFLAGS {
 431                 unsigned w_usersignal : 1;      /* User sent signal to "init" */
 432                 unsigned w_childdeath : 1;      /* An "init" child died */
 433                 unsigned w_powerhit : 1;        /* OS experienced powerfail */
 434         }       w_flags;
 435         int w_mask;
 436 } wakeup;
 437 
 438 
 439 struct init_state {
 440         int                     ist_runlevel;
 441         int                     ist_num_proc;
 442         int                     ist_utmpx_ok;
 443         struct PROC_TABLE       ist_proc_table[1];
 444 };
 445 
 446 #define cur_state       (g_state->ist_runlevel)
 447 #define num_proc        (g_state->ist_num_proc)
 448 #define proc_table      (g_state->ist_proc_table)
 449 #define utmpx_ok        (g_state->ist_utmpx_ok)
 450 
 451 /* Contract cookies. */
 452 #define ORDINARY_COOKIE         0
 453 #define STARTD_COOKIE           1
 454 
 455 
 456 #ifndef NDEBUG
 457 #define bad_error(func, err)    {                                       \
 458         (void) fprintf(stderr, "%s:%d: %s() failed with unexpected "    \
 459             "error %d.  Aborting.\n", __FILE__, __LINE__, (func), (err)); \
 460         abort();                                                        \
 461 }
 462 #else
 463 #define bad_error(func, err)    abort()
 464 #endif
 465 
 466 
 467 /*
 468  * Useful file and device names.
 469  */
 470 static char *CONSOLE      = "/dev/console";     /* Real system console */
 471 static char *INITPIPE_DIR = "/var/run";
 472 static char *INITPIPE     = "/var/run/initpipe";
 473 
 474 #define INIT_STATE_DIR "/etc/svc/volatile"
 475 static const char * const init_state_file = INIT_STATE_DIR "/init.state";
 476 static const char * const init_next_state_file =
 477         INIT_STATE_DIR "/init-next.state";
 478 
 479 static const int init_num_proc = 20;    /* Initial size of process table. */
 480 
 481 static char *UTMPX       = UTMPX_FILE;          /* Snapshot record file */
 482 static char *WTMPX       = WTMPX_FILE;          /* Long term record file */
 483 static char *INITTAB     = "/etc/inittab";      /* Script file for "init" */
 484 static char *SYSTTY      = "/dev/systty";       /* System Console */
 485 static char *SYSCON      = "/dev/syscon";       /* Virtual System console */
 486 static char *IOCTLSYSCON = "/etc/ioctl.syscon"; /* Last syscon modes */
 487 static char *ENVFILE     = "/etc/default/init"; /* Default env. */
 488 static char *SU = "/etc/sulogin";       /* Super-user program for single user */
 489 static char *SH = "/sbin/sh";           /* Standard shell */
 490 
 491 /*
 492  * Default Path.  /sbin is included in path only during sysinit phase
 493  */
 494 #define DEF_PATH        "PATH=/usr/sbin:/usr/bin"
 495 #define INIT_PATH       "PATH=/sbin:/usr/sbin:/usr/bin"
 496 
 497 static int      prior_state;
 498 static int      prev_state;     /* State "init" was in last time it woke */
 499 static int      new_state;      /* State user wants "init" to go to. */
 500 static int      lvlq_received;  /* Explicit request to examine state */
 501 static int      op_modes = BOOT_MODES; /* Current state of "init" */
 502 static int      Gchild = 0;     /* Flag to indicate "godchild" died, set in */
 503                                 /*   childeath() and cleared in cleanaux() */
 504 static int      Pfd = -1;       /* fd to receive pids thru */
 505 static unsigned int     spawncnt, pausecnt;
 506 static int      rsflag;         /* Set if a respawn has taken place */
 507 static volatile int time_up;    /* Flag set to TRUE by the alarm interrupt */
 508                                 /* routine each time an alarm interrupt */
 509                                 /* takes place. */
 510 static int      sflg = 0;       /* Set if we were booted -s to single user */
 511 static int      rflg = 0;       /* Set if booted -r, reconfigure devices */
 512 static int      bflg = 0;       /* Set if booted -b, don't run rc scripts */
 513 static pid_t    init_pid;       /* PID of "one true" init for current zone */
 514 
 515 static struct init_state *g_state = NULL;
 516 static size_t   g_state_sz;
 517 static int      booting = 1;    /* Set while we're booting. */
 518 
 519 /*
 520  * Array for default global environment.
 521  */
 522 #define MAXENVENT       24      /* Max number of default env variables + 1 */
 523                                 /* init can use three itself, so this leaves */
 524                                 /* 20 for the administrator in ENVFILE. */
 525 static char     *glob_envp[MAXENVENT];  /* Array of environment strings */
 526 static int      glob_envn;              /* Number of environment strings */
 527 
 528 
 529 static struct pollfd    poll_fds[1];
 530 static int              poll_nfds = 0;  /* poll_fds is uninitialized */
 531 
 532 /*
 533  * Contracts constants
 534  */
 535 #define SVC_INIT_PREFIX "init:/"
 536 #define SVC_AUX_SIZE (INITTAB_ENTRY_ID_SIZE + 1)
 537 #define SVC_FMRI_SIZE (sizeof (SVC_INIT_PREFIX) + INITTAB_ENTRY_ID_SIZE)
 538 
 539 static int      legacy_tmpl = -1;       /* fd for legacy contract template */
 540 static int      startd_tmpl = -1;       /* fd for svc.startd's template */
 541 static char     startd_svc_aux[SVC_AUX_SIZE];
 542 
 543 static char     startd_cline[256] = ""; /* svc.startd's command line */
 544 static int      do_restart_startd = 1;  /* Whether to restart svc.startd. */
 545 static char     *smf_options = NULL;    /* Options to give to startd. */
 546 static int      smf_debug = 0;          /* Messages for debugging smf(5) */
 547 static time_t   init_boot_time;         /* Substitute for kernel boot time. */
 548 
 549 #define NSTARTD_FAILURE_TIMES   3               /* trigger after 3 failures */
 550 #define STARTD_FAILURE_RATE_NS  5000000000LL    /* 1 failure/5 seconds */
 551 
 552 static hrtime_t startd_failure_time[NSTARTD_FAILURE_TIMES];
 553 static uint_t   startd_failure_index;
 554 
 555 
 556 static char     *prog_name(char *);
 557 static int      state_to_mask(int);
 558 static int      lvlname_to_mask(char, int *);
 559 static void     lscf_set_runlevel(char);
 560 static int      state_to_flags(int);
 561 static char     state_to_name(int);
 562 static int      lvlname_to_state(char);
 563 static int      getcmd(struct CMD_LINE *, char *);
 564 static int      realcon();
 565 static int      spawn_processes();
 566 static int      get_ioctl_syscon();
 567 static int      account(short, struct PROC_TABLE *, char *);
 568 static void     alarmclk();
 569 static void     childeath(int);
 570 static void     cleanaux();
 571 static void     clearent(pid_t, short);
 572 static void     console(boolean_t, char *, ...);
 573 static void     init_signals(void);
 574 static void     setup_pipe();
 575 static void     killproc(pid_t);
 576 static void     init_env();
 577 static void     boot_init();
 578 static void     powerfail();
 579 static void     remv();
 580 static void     write_ioctl_syscon();
 581 static void     spawn(struct PROC_TABLE *, struct CMD_LINE *);
 582 static void     setimer(int);
 583 static void     siglvl(int, siginfo_t *, ucontext_t *);
 584 static void     sigpoll(int);
 585 static void     enter_maintenance(void);
 586 static void     timer(int);
 587 static void     userinit(int, char **);
 588 static void     notify_pam_dead(struct utmpx *);
 589 static long     waitproc(struct PROC_TABLE *);
 590 static struct PROC_TABLE *efork(int, struct PROC_TABLE *, int);
 591 static struct PROC_TABLE *findpslot(struct CMD_LINE *);
 592 static void     increase_proc_table_size();
 593 static void     st_init();
 594 static void     st_write();
 595 static void     contracts_init();
 596 static void     contract_event(struct pollfd *);
 597 static int      startd_run(const char *, int, ctid_t);
 598 static void     startd_record_failure();
 599 static int      startd_failure_rate_critical();
 600 static char     *audit_boot_msg();
 601 static int      audit_put_record(int, int, char *);
 602 static void     update_boot_archive(int new_state);
 603 
 604 int
 605 main(int argc, char *argv[])
 606 {
 607         int     chg_lvl_flag = FALSE, print_banner = FALSE;
 608         int     may_need_audit = 1;
 609         int     c;
 610         char    *msg;
 611 
 612         /* Get a timestamp for use as boot time, if needed. */
 613         (void) time(&init_boot_time);
 614 
 615         /* Get the default umask */
 616         cmask = umask(022);
 617         (void) umask(cmask);
 618 
 619         /* Parse the arguments to init. Check for single user */
 620         opterr = 0;
 621         while ((c = getopt(argc, argv, "brsm:")) != EOF) {
 622                 switch (c) {
 623                 case 'b':
 624                         rflg = 0;
 625                         bflg = 1;
 626                         if (!sflg)
 627                                 sflg++;
 628                         break;
 629                 case 'r':
 630                         bflg = 0;
 631                         rflg++;
 632                         break;
 633                 case 's':
 634                         if (!bflg)
 635                                 sflg++;
 636                         break;
 637                 case 'm':
 638                         smf_options = optarg;
 639                         smf_debug = (strstr(smf_options, "debug") != NULL);
 640                         break;
 641                 }
 642         }
 643 
 644         /*
 645          * Determine if we are the main init, or a user invoked init, whose job
 646          * it is to inform init to change levels or perform some other action.
 647          */
 648         if (zone_getattr(getzoneid(), ZONE_ATTR_INITPID, &init_pid,
 649             sizeof (init_pid)) != sizeof (init_pid)) {
 650                 (void) fprintf(stderr, "could not get pid for init\n");
 651                 return (1);
 652         }
 653 
 654         /*
 655          * If this PID is not the same as the "true" init for the zone, then we
 656          * must be in 'user' mode.
 657          */
 658         if (getpid() != init_pid) {
 659                 userinit(argc, argv);
 660         }
 661 
 662         if (getzoneid() != GLOBAL_ZONEID) {
 663                 print_banner = TRUE;
 664         }
 665 
 666         /*
 667          * Initialize state (and set "booting").
 668          */
 669         st_init();
 670 
 671         if (booting && print_banner) {
 672                 struct utsname un;
 673                 char buf[BUFSIZ], *isa;
 674                 long ret;
 675                 int bits = 32;
 676 
 677                 /*
 678                  * We want to print the boot banner as soon as
 679                  * possible.  In the global zone, the kernel does it,
 680                  * but we do not have that luxury in non-global zones,
 681                  * so we will print it here.
 682                  */
 683                 (void) uname(&un);
 684                 ret = sysinfo(SI_ISALIST, buf, sizeof (buf));
 685                 if (ret != -1L && ret <= sizeof (buf)) {
 686                         for (isa = strtok(buf, " "); isa;
 687                             isa = strtok(NULL, " ")) {
 688                                 if (strcmp(isa, "sparcv9") == 0 ||
 689                                     strcmp(isa, "amd64") == 0) {
 690                                         bits = 64;
 691                                         break;
 692                                 }
 693                         }
 694                 }
 695 
 696                 console(B_FALSE,
 697                     "\n\n%s Release %s Version %s %d-bit\r\n",
 698                     un.sysname, un.release, un.version, bits);
 699                 console(B_FALSE,
 700                     "Copyright (c) 1983, 2010, Oracle and/or its affiliates."
 701                     " All rights reserved.\r\n");
 702         }
 703 
 704         /*
 705          * Get the ioctl settings for /dev/syscon from /etc/ioctl.syscon
 706          * so that it can be brought up in the state it was in when the
 707          * system went down; or set to defaults if ioctl.syscon isn't
 708          * valid.
 709          *
 710          * This needs to be done even if we're restarting so reset_modes()
 711          * will work in case we need to go down to single user mode.
 712          */
 713         write_ioctl = get_ioctl_syscon();
 714 
 715         /*
 716          * Set up all signals to be caught or ignored as appropriate.
 717          */
 718         init_signals();
 719 
 720         /* Load glob_envp from ENVFILE. */
 721         init_env();
 722 
 723         contracts_init();
 724 
 725         if (!booting) {
 726                 /* cur_state should have been read in. */
 727 
 728                 op_modes = NORMAL_MODES;
 729 
 730                 /* Rewrite the ioctl file if it was bad. */
 731                 if (write_ioctl)
 732                         write_ioctl_syscon();
 733         } else {
 734                 /*
 735                  * It's fine to boot up with state as zero, because
 736                  * startd will later tell us the real state.
 737                  */
 738                 cur_state = 0;
 739                 op_modes = BOOT_MODES;
 740 
 741                 boot_init();
 742         }
 743 
 744         prev_state = prior_state = cur_state;
 745 
 746         setup_pipe();
 747 
 748         /*
 749          * Here is the beginning of the main process loop.
 750          */
 751         for (;;) {
 752                 if (lvlq_received) {
 753                         setup_pipe();
 754                         lvlq_received = B_FALSE;
 755                 }
 756 
 757                 /*
 758                  * Clean up any accounting records for dead "godchildren".
 759                  */
 760                 if (Gchild)
 761                         cleanaux();
 762 
 763                 /*
 764                  * If in "normal" mode, check all living processes and initiate
 765                  * kill sequence on those that should not be there anymore.
 766                  */
 767                 if (op_modes == NORMAL_MODES && cur_state != LVLa &&
 768                     cur_state != LVLb && cur_state != LVLc)
 769                         remv();
 770 
 771                 /*
 772                  * If a change in run levels is the reason we awoke, now do
 773                  * the accounting to report the change in the utmp file.
 774                  * Also report the change on the system console.
 775                  */
 776                 if (chg_lvl_flag) {
 777                         chg_lvl_flag = FALSE;
 778 
 779                         if (state_to_flags(cur_state) & LSEL_RUNLEVEL) {
 780                                 char rl = state_to_name(cur_state);
 781 
 782                                 if (rl != -1)
 783                                         lscf_set_runlevel(rl);
 784                         }
 785 
 786                         may_need_audit = 1;
 787                 }
 788 
 789                 /*
 790                  * Scan the inittab file and spawn and respawn processes that
 791                  * should be alive in the current state. If inittab does not
 792                  * exist default to  single user mode.
 793                  */
 794                 if (spawn_processes() == FAILURE) {
 795                         prior_state = prev_state;
 796                         cur_state = SINGLE_USER;
 797                 }
 798 
 799                 /* If any respawns occurred, take note. */
 800                 if (rsflag) {
 801                         rsflag = 0;
 802                         spawncnt++;
 803                 }
 804 
 805                 /*
 806                  * If a powerfail signal was received during the last
 807                  * sequence, set mode to powerfail.  When spawn_processes() is
 808                  * entered the first thing it does is to check "powerhit".  If
 809                  * it is in PF_MODES then it clears "powerhit" and does
 810                  * a powerfail sequence.  If it is not in PF_MODES, then it
 811                  * puts itself in PF_MODES and then clears "powerhit".  Should
 812                  * "powerhit" get set again while spawn_processes() is working
 813                  * on a powerfail sequence, the following code  will see that
 814                  * spawn_processes() tries to execute the powerfail sequence
 815                  * again.  This guarantees that the powerfail sequence will be
 816                  * successfully completed before further processing takes
 817                  * place.
 818                  */
 819                 if (wakeup.w_flags.w_powerhit) {
 820                         op_modes = PF_MODES;
 821                         /*
 822                          * Make sure that cur_state != prev_state so that
 823                          * ONCE and WAIT types work.
 824                          */
 825                         prev_state = 0;
 826                 } else if (op_modes != NORMAL_MODES) {
 827                         /*
 828                          * If spawn_processes() was not just called while in
 829                          * normal mode, we set the mode to normal and it will
 830                          * be called again to check normal modes.  If we have
 831                          * just finished a powerfail sequence with prev_state
 832                          * equal to zero, we set prev_state equal to cur_state
 833                          * before the next pass through.
 834                          */
 835                         if (op_modes == PF_MODES)
 836                                 prev_state = cur_state;
 837                         op_modes = NORMAL_MODES;
 838                 } else if (cur_state == LVLa || cur_state == LVLb ||
 839                     cur_state == LVLc) {
 840                         /*
 841                          * If it was a change of levels that awakened us and the
 842                          * new level is one of the demand levels then reset
 843                          * cur_state to the previous state and do another scan
 844                          * to take care of the usual respawn actions.
 845                          */
 846                         cur_state = prior_state;
 847                         prior_state = prev_state;
 848                         prev_state = cur_state;
 849                 } else {
 850                         prev_state = cur_state;
 851 
 852                         if (wakeup.w_mask == 0) {
 853                                 int ret;
 854 
 855                                 if (may_need_audit && (cur_state == LVL3)) {
 856                                         msg = audit_boot_msg();
 857 
 858                                         may_need_audit = 0;
 859                                         (void) audit_put_record(ADT_SUCCESS,
 860                                             ADT_SUCCESS, msg);
 861                                         free(msg);
 862                                 }
 863 
 864                                 /*
 865                                  * "init" is finished with all actions for
 866                                  * the current wakeup.
 867                                  */
 868                                 ret = poll(poll_fds, poll_nfds,
 869                                     SLEEPTIME * MILLISEC);
 870                                 pausecnt++;
 871                                 if (ret > 0)
 872                                         contract_event(&poll_fds[0]);
 873                                 else if (ret < 0 && errno != EINTR)
 874                                         console(B_TRUE, "poll() error: %s\n",
 875                                             strerror(errno));
 876                         }
 877 
 878                         if (wakeup.w_flags.w_usersignal) {
 879                                 /*
 880                                  * Install the new level.  This could be a real
 881                                  * change in levels  or a telinit [Q|a|b|c] or
 882                                  * just a telinit to the same level at which
 883                                  * we are running.
 884                                  */
 885                                 if (new_state != cur_state) {
 886                                         if (new_state == LVLa ||
 887                                             new_state == LVLb ||
 888                                             new_state == LVLc) {
 889                                                 prev_state = prior_state;
 890                                                 prior_state = cur_state;
 891                                                 cur_state = new_state;
 892                                         } else {
 893                                                 prev_state = cur_state;
 894                                                 if (cur_state >= 0)
 895                                                         prior_state = cur_state;
 896                                                 cur_state = new_state;
 897                                                 chg_lvl_flag = TRUE;
 898                                         }
 899                                 }
 900 
 901                                 new_state = 0;
 902                         }
 903 
 904                         if (wakeup.w_flags.w_powerhit)
 905                                 op_modes = PF_MODES;
 906 
 907                         /*
 908                          * Clear all wakeup reasons.
 909                          */
 910                         wakeup.w_mask = 0;
 911                 }
 912         }
 913 
 914         /*NOTREACHED*/
 915 }
 916 
 917 static void
 918 update_boot_archive(int new_state)
 919 {
 920         if (new_state != LVL0 && new_state != LVL5 && new_state != LVL6)
 921                 return;
 922 
 923         if (getzoneid() != GLOBAL_ZONEID)
 924                 return;
 925 
 926         (void) system("/sbin/bootadm -ea update_all");
 927 }
 928 
 929 /*
 930  * void enter_maintenance()
 931  *   A simple invocation of sulogin(1M), with no baggage, in the case that we
 932  *   are unable to activate svc.startd(1M).  We fork; the child runs sulogin;
 933  *   we wait for it to exit.
 934  */
 935 static void
 936 enter_maintenance()
 937 {
 938         struct PROC_TABLE       *su_process;
 939 
 940         console(B_FALSE, "Requesting maintenance mode\n"
 941             "(See /lib/svc/share/README for additional information.)\n");
 942         (void) sighold(SIGCLD);
 943         while ((su_process = efork(M_OFF, NULLPROC, NOCLEANUP)) == NO_ROOM)
 944                 (void) pause();
 945         (void) sigrelse(SIGCLD);
 946         if (su_process == NULLPROC) {
 947                 int fd;
 948 
 949                 (void) fclose(stdin);
 950                 (void) fclose(stdout);
 951                 (void) fclose(stderr);
 952                 closefrom(0);
 953 
 954                 fd = open(SYSCON, O_RDWR | O_NOCTTY);
 955                 if (fd >= 0) {
 956                         (void) dup2(fd, 1);
 957                         (void) dup2(fd, 2);
 958                 } else {
 959                         /*
 960                          * Need to issue an error message somewhere.
 961                          */
 962                         syslog(LOG_CRIT, "init[%d]: cannot open %s; %s\n",
 963                             getpid(), SYSCON, strerror(errno));
 964                 }
 965 
 966                 /*
 967                  * Execute the "su" program.
 968                  */
 969                 (void) execle(SU, SU, "-", (char *)0, glob_envp);
 970                 console(B_TRUE, "execle of %s failed: %s\n", SU,
 971                     strerror(errno));
 972                 timer(5);
 973                 exit(1);
 974         }
 975 
 976         /*
 977          * If we are the parent, wait around for the child to die
 978          * or for "init" to be signaled to change levels.
 979          */
 980         while (waitproc(su_process) == FAILURE) {
 981                 /*
 982                  * All other reasons for waking are ignored when in
 983                  * single-user mode.  The only child we are interested
 984                  * in is being waited for explicitly by waitproc().
 985                  */
 986                 wakeup.w_mask = 0;
 987         }
 988 }
 989 
 990 /*
 991  * remv() scans through "proc_table" and performs cleanup.  If
 992  * there is a process in the table, which shouldn't be here at
 993  * the current run level, then remv() kills the process.
 994  */
 995 static void
 996 remv()
 997 {
 998         struct PROC_TABLE       *process;
 999         struct CMD_LINE         cmd;
1000         char                    cmd_string[MAXCMDL];
1001         int                     change_level;
1002 
1003         change_level = (cur_state != prev_state ? TRUE : FALSE);
1004 
1005         /*
1006          * Clear the TOUCHED flag on all entries so that when we have
1007          * finished scanning inittab, we will be able to tell if we
1008          * have any processes for which there is no entry in inittab.
1009          */
1010         for (process = proc_table;
1011             (process < proc_table + num_proc); process++) {
1012                 process->p_flags &= ~TOUCHED;
1013         }
1014 
1015         /*
1016          * Scan all inittab entries.
1017          */
1018         while (getcmd(&cmd, &cmd_string[0]) == TRUE) {
1019                 /* Scan for process which goes with this entry in inittab. */
1020                 for (process = proc_table;
1021                     (process < proc_table + num_proc); process++) {
1022                         if ((process->p_flags & OCCUPIED) == 0 ||
1023                             !id_eq(process->p_id, cmd.c_id))
1024                                 continue;
1025 
1026                         /*
1027                          * This slot contains the process we are looking for.
1028                          */
1029 
1030                         /*
1031                          * Is the cur_state SINGLE_USER or is this process
1032                          * marked as "off" or was this proc started by some
1033                          * mechanism other than LVL{a|b|c} and the current level
1034                          * does not support this process?
1035                          */
1036                         if (cur_state == SINGLE_USER ||
1037                             cmd.c_action == M_OFF ||
1038                             ((cmd.c_levels & state_to_mask(cur_state)) == 0 &&
1039                             (process->p_flags & DEMANDREQUEST) == 0)) {
1040                                 if (process->p_flags & LIVING) {
1041                                         /*
1042                                          * Touch this entry so we know we have
1043                                          * treated it.  Note that procs which
1044                                          * are already dead at this point and
1045                                          * should not be restarted are left
1046                                          * untouched.  This causes their slot to
1047                                          * be freed later after dead accounting
1048                                          * is done.
1049                                          */
1050                                         process->p_flags |= TOUCHED;
1051 
1052                                         if ((process->p_flags & KILLED) == 0) {
1053                                                 if (change_level) {
1054                                                         process->p_flags
1055                                                             |= WARNED;
1056                                                         (void) kill(
1057                                                             process->p_pid,
1058                                                             SIGTERM);
1059                                                 } else {
1060                                                         /*
1061                                                          * Fork a killing proc
1062                                                          * so "init" can
1063                                                          * continue without
1064                                                          * having to pause for
1065                                                          * TWARN seconds.
1066                                                          */
1067                                                         killproc(
1068                                                             process->p_pid);
1069                                                 }
1070                                                 process->p_flags |= KILLED;
1071                                         }
1072                                 }
1073                         } else {
1074                                 /*
1075                                  * Process can exist at current level.  If it is
1076                                  * still alive or a DEMANDREQUEST we touch it so
1077                                  * it will be left alone.  Otherwise we leave it
1078                                  * untouched so it will be accounted for and
1079                                  * cleaned up later in remv().  Dead
1080                                  * DEMANDREQUESTs will be accounted but not
1081                                  * freed.
1082                                  */
1083                                 if (process->p_flags &
1084                                     (LIVING|NOCLEANUP|DEMANDREQUEST))
1085                                         process->p_flags |= TOUCHED;
1086                         }
1087 
1088                         break;
1089                 }
1090         }
1091 
1092         st_write();
1093 
1094         /*
1095          * If this was a change of levels call, scan through the
1096          * process table for processes that were warned to die.  If any
1097          * are found that haven't left yet, sleep for TWARN seconds and
1098          * then send final terminations to any that haven't died yet.
1099          */
1100         if (change_level) {
1101 
1102                 /*
1103                  * Set the alarm for TWARN seconds on the assumption
1104                  * that there will be some that need to be waited for.
1105                  * This won't harm anything except we are guaranteed to
1106                  * wakeup in TWARN seconds whether we need to or not.
1107                  */
1108                 setimer(TWARN);
1109 
1110                 /*
1111                  * Scan for processes which should be dying.  We hope they
1112                  * will die without having to be sent a SIGKILL signal.
1113                  */
1114                 for (process = proc_table;
1115                     (process < proc_table + num_proc); process++) {
1116                         /*
1117                          * If this process should die, hasn't yet, and the
1118                          * TWARN time hasn't expired yet, wait for process
1119                          * to die or for timer to expire.
1120                          */
1121                         while (time_up == FALSE &&
1122                             (process->p_flags & (WARNED|LIVING|OCCUPIED)) ==
1123                             (WARNED|LIVING|OCCUPIED))
1124                                 (void) pause();
1125 
1126                         if (time_up == TRUE)
1127                                 break;
1128                 }
1129 
1130                 /*
1131                  * If we reached the end of the table without the timer
1132                  * expiring, then there are no procs which will have to be
1133                  * sent the SIGKILL signal.  If the timer has expired, then
1134                  * it is necessary to scan the table again and send signals
1135                  * to all processes which aren't going away nicely.
1136                  */
1137                 if (time_up == TRUE) {
1138                         for (process = proc_table;
1139                             (process < proc_table + num_proc); process++) {
1140                                 if ((process->p_flags &
1141                                     (WARNED|LIVING|OCCUPIED)) ==
1142                                     (WARNED|LIVING|OCCUPIED))
1143                                         (void) kill(process->p_pid, SIGKILL);
1144                         }
1145                 }
1146                 setimer(0);
1147         }
1148 
1149         /*
1150          * Rescan the proc_table for two kinds of entry, those marked LIVING,
1151          * NAMED, which don't have an entry in inittab (haven't been TOUCHED
1152          * by the above scanning), and haven't been sent kill signals, and
1153          * those entries marked not LIVING, NAMED.  The former procs are killed.
1154          * The latter have DEAD_PROCESS accounting done and the slot cleared.
1155          */
1156         for (process = proc_table;
1157             (process < proc_table + num_proc); process++) {
1158                 if ((process->p_flags & (LIVING|NAMED|TOUCHED|KILLED|OCCUPIED))
1159                     == (LIVING|NAMED|OCCUPIED)) {
1160                         killproc(process->p_pid);
1161                         process->p_flags |= KILLED;
1162                 } else if ((process->p_flags & (LIVING|NAMED|OCCUPIED)) ==
1163                     (NAMED|OCCUPIED)) {
1164                         (void) account(DEAD_PROCESS, process, NULL);
1165                         /*
1166                          * If this named proc hasn't been TOUCHED, then free the
1167                          * space. It has either died of it's own accord, but
1168                          * isn't respawnable or it was killed because it
1169                          * shouldn't exist at this level.
1170                          */
1171                         if ((process->p_flags & TOUCHED) == 0)
1172                                 process->p_flags = 0;
1173                 }
1174         }
1175 
1176         st_write();
1177 }
1178 
1179 /*
1180  * Extract the svc.startd command line and whether to restart it from its
1181  * inittab entry.
1182  */
1183 /*ARGSUSED*/
1184 static void
1185 process_startd_line(struct CMD_LINE *cmd, char *cmd_string)
1186 {
1187         size_t sz;
1188 
1189         /* Save the command line. */
1190         if (sflg || rflg) {
1191                 /* Also append -r or -s. */
1192                 (void) strlcpy(startd_cline, cmd_string, sizeof (startd_cline));
1193                 (void) strlcat(startd_cline, " -", sizeof (startd_cline));
1194                 if (sflg)
1195                         sz = strlcat(startd_cline, "s", sizeof (startd_cline));
1196                 if (rflg)
1197                         sz = strlcat(startd_cline, "r", sizeof (startd_cline));
1198         } else {
1199                 sz = strlcpy(startd_cline, cmd_string, sizeof (startd_cline));
1200         }
1201 
1202         if (sz >= sizeof (startd_cline)) {
1203                 console(B_TRUE,
1204                     "svc.startd command line too long.  Ignoring.\n");
1205                 startd_cline[0] = '\0';
1206                 return;
1207         }
1208 }
1209 
1210 /*
1211  * spawn_processes() scans inittab for entries which should be run at this
1212  * mode.  Processes which should be running but are not, are started.
1213  */
1214 static int
1215 spawn_processes()
1216 {
1217         struct PROC_TABLE               *pp;
1218         struct CMD_LINE                 cmd;
1219         char                            cmd_string[MAXCMDL];
1220         short                           lvl_mask;
1221         int                             status;
1222 
1223         /*
1224          * First check the "powerhit" flag.  If it is set, make sure the modes
1225          * are PF_MODES and clear the "powerhit" flag.  Avoid the possible race
1226          * on the "powerhit" flag by disallowing a new powerfail interrupt
1227          * between the test of the powerhit flag and the clearing of it.
1228          */
1229         if (wakeup.w_flags.w_powerhit) {
1230                 wakeup.w_flags.w_powerhit = 0;
1231                 op_modes = PF_MODES;
1232         }
1233         lvl_mask = state_to_mask(cur_state);
1234 
1235         /*
1236          * Scan through all the entries in inittab.
1237          */
1238         while ((status = getcmd(&cmd, &cmd_string[0])) == TRUE) {
1239                 if (id_eq(cmd.c_id, "smf")) {
1240                         process_startd_line(&cmd, cmd_string);
1241                         continue;
1242                 }
1243 
1244 retry_for_proc_slot:
1245 
1246                 /*
1247                  * Find out if there is a process slot for this entry already.
1248                  */
1249                 if ((pp = findpslot(&cmd)) == NULLPROC) {
1250                         /*
1251                          * we've run out of proc table entries
1252                          * increase proc_table.
1253                          */
1254                         increase_proc_table_size();
1255 
1256                         /*
1257                          * Retry now as we have an empty proc slot.
1258                          * In case increase_proc_table_size() fails,
1259                          * we will keep retrying.
1260                          */
1261                         goto retry_for_proc_slot;
1262                 }
1263 
1264                 /*
1265                  * If there is an entry, and it is marked as DEMANDREQUEST,
1266                  * one of the levels a, b, or c is in its levels mask, and
1267                  * the action field is ONDEMAND and ONDEMAND is a permissable
1268                  * mode, and the process is dead, then respawn it.
1269                  */
1270                 if (((pp->p_flags & (LIVING|DEMANDREQUEST)) == DEMANDREQUEST) &&
1271                     (cmd.c_levels & MASK_abc) &&
1272                     (cmd.c_action & op_modes) == M_ONDEMAND) {
1273                         spawn(pp, &cmd);
1274                         continue;
1275                 }
1276 
1277                 /*
1278                  * If the action is not an action we are interested in,
1279                  * skip the entry.
1280                  */
1281                 if ((cmd.c_action & op_modes) == 0 || pp->p_flags & LIVING ||
1282                     (cmd.c_levels & lvl_mask) == 0)
1283                         continue;
1284 
1285                 /*
1286                  * If the modes are the normal modes (ONCE, WAIT, RESPAWN, OFF,
1287                  * ONDEMAND) and the action field is either OFF or the action
1288                  * field is ONCE or WAIT and the current level is the same as
1289                  * the last level, then skip this entry.  ONCE and WAIT only
1290                  * get run when the level changes.
1291                  */
1292                 if (op_modes == NORMAL_MODES &&
1293                     (cmd.c_action == M_OFF ||
1294                     (cmd.c_action & (M_ONCE|M_WAIT)) &&
1295                     cur_state == prev_state))
1296                         continue;
1297 
1298                 /*
1299                  * At this point we are interested in performing the action for
1300                  * this entry.  Actions fall into two categories, spinning off
1301                  * a process and not waiting, and spinning off a process and
1302                  * waiting for it to die.  If the action is ONCE, RESPAWN,
1303                  * ONDEMAND, POWERFAIL, or BOOT we don't wait for the process
1304                  * to die, for all other actions we do wait.
1305                  */
1306                 if (cmd.c_action & (M_ONCE | M_RESPAWN | M_PF | M_BOOT)) {
1307                         spawn(pp, &cmd);
1308 
1309                 } else {
1310                         spawn(pp, &cmd);
1311                         while (waitproc(pp) == FAILURE)
1312                                 ;
1313                         (void) account(DEAD_PROCESS, pp, NULL);
1314                         pp->p_flags = 0;
1315                 }
1316         }
1317         return (status);
1318 }
1319 
1320 /*
1321  * spawn() spawns a shell, inserts the information about the process
1322  * process into the proc_table, and does the startup accounting.
1323  */
1324 static void
1325 spawn(struct PROC_TABLE *process, struct CMD_LINE *cmd)
1326 {
1327         int             i;
1328         int             modes, maxfiles;
1329         time_t          now;
1330         struct PROC_TABLE tmproc, *oprocess;
1331 
1332         /*
1333          * The modes to be sent to efork() are 0 unless we are
1334          * spawning a LVLa, LVLb, or LVLc entry or we will be
1335          * waiting for the death of the child before continuing.
1336          */
1337         modes = NAMED;
1338         if (process->p_flags & DEMANDREQUEST || cur_state == LVLa ||
1339             cur_state == LVLb || cur_state == LVLc)
1340                 modes |= DEMANDREQUEST;
1341         if ((cmd->c_action & (M_SYSINIT | M_WAIT | M_BOOTWAIT | M_PWAIT)) != 0)
1342                 modes |= NOCLEANUP;
1343 
1344         /*
1345          * If this is a respawnable process, check the threshold
1346          * information to avoid excessive respawns.
1347          */
1348         if (cmd->c_action & M_RESPAWN) {
1349                 /*
1350                  * Add NOCLEANUP to all respawnable commands so that the
1351                  * information about the frequency of respawns isn't lost.
1352                  */
1353                 modes |= NOCLEANUP;
1354                 (void) time(&now);
1355 
1356                 /*
1357                  * If no time is assigned, then this is the first time
1358                  * this command is being processed in this series.  Assign
1359                  * the current time.
1360                  */
1361                 if (process->p_time == 0L)
1362                         process->p_time = now;
1363 
1364                 if (process->p_count++ == SPAWN_LIMIT) {
1365 
1366                         if ((now - process->p_time) < SPAWN_INTERVAL) {
1367                                 /*
1368                                  * Process is respawning too rapidly.  Print
1369                                  * message and refuse to respawn it for now.
1370                                  */
1371                                 console(B_TRUE, "Command is respawning too "
1372                                     "rapidly. Check for possible errors.\n"
1373                                     "id:%4s \"%s\"\n",
1374                                     &cmd->c_id[0], &cmd->c_command[EXEC]);
1375                                 return;
1376                         }
1377                         process->p_time = now;
1378                         process->p_count = 0;
1379 
1380                 } else if (process->p_count > SPAWN_LIMIT) {
1381                         /*
1382                          * If process has been respawning too rapidly and
1383                          * the inhibit time limit hasn't expired yet, we
1384                          * refuse to respawn.
1385                          */
1386                         if (now - process->p_time < SPAWN_INTERVAL + INHIBIT)
1387                                 return;
1388                         process->p_time = now;
1389                         process->p_count = 0;
1390                 }
1391                 rsflag = TRUE;
1392         }
1393 
1394         /*
1395          * Spawn a child process to execute this command.
1396          */
1397         (void) sighold(SIGCLD);
1398         oprocess = process;
1399         while ((process = efork(cmd->c_action, oprocess, modes)) == NO_ROOM)
1400                 (void) pause();
1401 
1402         if (process == NULLPROC) {
1403 
1404                 /*
1405                  * We are the child.  We must make sure we get a different
1406                  * file pointer for our references to utmpx.  Otherwise our
1407                  * seeks and reads will compete with those of the parent.
1408                  */
1409                 endutxent();
1410 
1411                 /*
1412                  * Perform the accounting for the beginning of a process.
1413                  * Note that all processes are initially "INIT_PROCESS"es.
1414                  */
1415                 tmproc.p_id[0] = cmd->c_id[0];
1416                 tmproc.p_id[1] = cmd->c_id[1];
1417                 tmproc.p_id[2] = cmd->c_id[2];
1418                 tmproc.p_id[3] = cmd->c_id[3];
1419                 tmproc.p_pid = getpid();
1420                 tmproc.p_exit = 0;
1421                 (void) account(INIT_PROCESS, &tmproc,
1422                     prog_name(&cmd->c_command[EXEC]));
1423                 maxfiles = ulimit(UL_GDESLIM, 0);
1424                 for (i = 0; i < maxfiles; i++)
1425                         (void) fcntl(i, F_SETFD, FD_CLOEXEC);
1426 
1427                 /*
1428                  * Now exec a shell with the -c option and the command
1429                  * from inittab.
1430                  */
1431                 (void) execle(SH, "INITSH", "-c", cmd->c_command, (char *)0,
1432                     glob_envp);
1433                 console(B_TRUE, "Command\n\"%s\"\n failed to execute.  errno "
1434                     "= %d (exec of shell failed)\n", cmd->c_command, errno);
1435 
1436                 /*
1437                  * Don't come back so quickly that "init" doesn't have a
1438                  * chance to finish putting this child in "proc_table".
1439                  */
1440                 timer(20);
1441                 exit(1);
1442 
1443         }
1444 
1445         /*
1446          * We are the parent.  Insert the necessary
1447          * information in the proc_table.
1448          */
1449         process->p_id[0] = cmd->c_id[0];
1450         process->p_id[1] = cmd->c_id[1];
1451         process->p_id[2] = cmd->c_id[2];
1452         process->p_id[3] = cmd->c_id[3];
1453 
1454         st_write();
1455 
1456         (void) sigrelse(SIGCLD);
1457 }
1458 
1459 /*
1460  * findpslot() finds the old slot in the process table for the
1461  * command with the same id, or it finds an empty slot.
1462  */
1463 static struct PROC_TABLE *
1464 findpslot(struct CMD_LINE *cmd)
1465 {
1466         struct PROC_TABLE       *process;
1467         struct PROC_TABLE       *empty = NULLPROC;
1468 
1469         for (process = proc_table;
1470             (process < proc_table + num_proc); process++) {
1471                 if (process->p_flags & OCCUPIED &&
1472                     id_eq(process->p_id, cmd->c_id))
1473                         break;
1474 
1475                 /*
1476                  * If the entry is totally empty and "empty" is still 0,
1477                  * remember where this hole is and make sure the slot is
1478                  * zeroed out.
1479                  */
1480                 if (empty == NULLPROC && (process->p_flags & OCCUPIED) == 0) {
1481                         empty = process;
1482                         process->p_id[0] = '\0';
1483                         process->p_id[1] = '\0';
1484                         process->p_id[2] = '\0';
1485                         process->p_id[3] = '\0';
1486                         process->p_pid = 0;
1487                         process->p_time = 0L;
1488                         process->p_count = 0;
1489                         process->p_flags = 0;
1490                         process->p_exit = 0;
1491                 }
1492         }
1493 
1494         /*
1495          * If there is no entry for this slot, then there should be an
1496          * empty slot.  If there is no empty slot, then we've run out
1497          * of proc_table space.  If the latter is true, empty will be
1498          * NULL and the caller will have to complain.
1499          */
1500         if (process == (proc_table + num_proc))
1501                 process = empty;
1502 
1503         return (process);
1504 }
1505 
1506 /*
1507  * getcmd() parses lines from inittab.  Each time it finds a command line
1508  * it will return TRUE as well as fill the passed CMD_LINE structure and
1509  * the shell command string.  When the end of inittab is reached, FALSE
1510  * is returned inittab is automatically opened if it is not currently open
1511  * and is closed when the end of the file is reached.
1512  */
1513 static FILE *fp_inittab = NULL;
1514 
1515 static int
1516 getcmd(struct CMD_LINE *cmd, char *shcmd)
1517 {
1518         char    *ptr;
1519         int     c, lastc, state;
1520         char    *ptr1;
1521         int     answer, i, proceed;
1522         struct  stat    sbuf;
1523         static char *actions[] = {
1524                 "off", "respawn", "ondemand", "once", "wait", "boot",
1525                 "bootwait", "powerfail", "powerwait", "initdefault",
1526                 "sysinit",
1527         };
1528         static short act_masks[] = {
1529                 M_OFF, M_RESPAWN, M_ONDEMAND, M_ONCE, M_WAIT, M_BOOT,
1530                 M_BOOTWAIT, M_PF, M_PWAIT, M_INITDEFAULT, M_SYSINIT,
1531         };
1532         /*
1533          * Only these actions will be allowed for entries which
1534          * are specified for single-user mode.
1535          */
1536         short su_acts = M_INITDEFAULT | M_PF | M_PWAIT | M_WAIT;
1537 
1538         if (fp_inittab == NULL) {
1539                 /*
1540                  * Before attempting to open inittab we stat it to make
1541                  * sure it currently exists and is not empty.  We try
1542                  * several times because someone may have temporarily
1543                  * unlinked or truncated the file.
1544                  */
1545                 for (i = 0; i < 3; i++) {
1546                         if (stat(INITTAB, &sbuf) == -1) {
1547                                 if (i == 2) {
1548                                         console(B_TRUE,
1549                                             "Cannot stat %s, errno: %d\n",
1550                                             INITTAB, errno);
1551                                         return (FAILURE);
1552                                 } else {
1553                                         timer(3);
1554                                 }
1555                         } else if (sbuf.st_size < 10) {
1556                                 if (i == 2) {
1557                                         console(B_TRUE,
1558                                             "%s truncated or corrupted\n",
1559                                             INITTAB);
1560                                         return (FAILURE);
1561                                 } else {
1562                                         timer(3);
1563                                 }
1564                         } else {
1565                                 break;
1566                         }
1567                 }
1568 
1569                 /*
1570                  * If unable to open inittab, print error message and
1571                  * return FAILURE to caller.
1572                  */
1573                 if ((fp_inittab = fopen(INITTAB, "r")) == NULL) {
1574                         console(B_TRUE, "Cannot open %s errno: %d\n", INITTAB,
1575                             errno);
1576                         return (FAILURE);
1577                 }
1578         }
1579 
1580         /*
1581          * Keep getting commands from inittab until you find a
1582          * good one or run out of file.
1583          */
1584         for (answer = FALSE; answer == FALSE; ) {
1585                 /*
1586                  * Zero out the cmd itself before trying next line.
1587                  */
1588                 bzero(cmd, sizeof (struct CMD_LINE));
1589 
1590                 /*
1591                  * Read in lines of inittab, parsing at colons, until a line is
1592                  * read in which doesn't end with a backslash.  Do not start if
1593                  * the first character read is an EOF.  Note that this means
1594                  * that lines which don't end in a newline are still processed,
1595                  * since the "for" will terminate normally once started,
1596                  * regardless of whether line terminates with a newline or EOF.
1597                  */
1598                 state = FAILURE;
1599                 if ((c = fgetc(fp_inittab)) == EOF) {
1600                         answer = FALSE;
1601                         (void) fclose(fp_inittab);
1602                         fp_inittab = NULL;
1603                         break;
1604                 }
1605 
1606                 for (proceed = TRUE, ptr = shcmd, state = ID, lastc = '\0';
1607                     proceed && c != EOF;
1608                     lastc = c, c = fgetc(fp_inittab)) {
1609                         /* If we're not in the FAILURE state and haven't */
1610                         /* yet reached the shell command field, process  */
1611                         /* the line, otherwise just look for a real end  */
1612                         /* of line.                                      */
1613                         if (state != FAILURE && state != COMMAND) {
1614                         /*
1615                          * Squeeze out spaces and tabs.
1616                          */
1617                         if (c == ' ' || c == '\t')
1618                                 continue;
1619 
1620                         /*
1621                          * Ignore characters in a comment, except for the \n.
1622                          */
1623                         if (state == COMMENT) {
1624                                 if (c == '\n') {
1625                                         lastc = ' ';
1626                                         break;
1627                                 } else {
1628                                         continue;
1629                                 }
1630                         }
1631 
1632                         /*
1633                          * Detect comments (lines whose first non-whitespace
1634                          * character is '#') by checking that we're at the
1635                          * beginning of a line, have seen a '#', and haven't
1636                          * yet accumulated any characters.
1637                          */
1638                         if (state == ID && c == '#' && ptr == shcmd) {
1639                                 state = COMMENT;
1640                                 continue;
1641                         }
1642 
1643                         /*
1644                          * If the character is a ':', then check the
1645                          * previous field for correctness and advance
1646                          * to the next field.
1647                          */
1648                         if (c == ':') {
1649                                 switch (state) {
1650 
1651                                 case ID :
1652                                 /*
1653                                  * Check to see that there are only
1654                                  * 1 to 4 characters for the id.
1655                                  */
1656                                 if ((i = ptr - shcmd) < 1 || i > 4) {
1657                                         state = FAILURE;
1658                                 } else {
1659                                         bcopy(shcmd, &cmd->c_id[0], i);
1660                                         ptr = shcmd;
1661                                         state = LEVELS;
1662                                 }
1663                                 break;
1664 
1665                                 case LEVELS :
1666                                 /*
1667                                  * Build a mask for all the levels for
1668                                  * which this command will be legal.
1669                                  */
1670                                 for (cmd->c_levels = 0, ptr1 = shcmd;
1671                                     ptr1 < ptr; ptr1++) {
1672                                         int mask;
1673                                         if (lvlname_to_mask(*ptr1,
1674                                             &mask) == -1) {
1675                                                 state = FAILURE;
1676                                                 break;
1677                                         }
1678                                         cmd->c_levels |= mask;
1679                                 }
1680                                 if (state != FAILURE) {
1681                                         state = ACTION;
1682                                         ptr = shcmd;    /* Reset the buffer */
1683                                 }
1684                                 break;
1685 
1686                                 case ACTION :
1687                                 /*
1688                                  * Null terminate the string in shcmd buffer and
1689                                  * then try to match against legal actions.  If
1690                                  * the field is of length 0, then the default of
1691                                  * "RESPAWN" is used if the id is numeric,
1692                                  * otherwise the default is "OFF".
1693                                  */
1694                                 if (ptr == shcmd) {
1695                                         if (isdigit(cmd->c_id[0]) &&
1696                                             (cmd->c_id[1] == '\0' ||
1697                                             isdigit(cmd->c_id[1])) &&
1698                                             (cmd->c_id[2] == '\0' ||
1699                                             isdigit(cmd->c_id[2])) &&
1700                                             (cmd->c_id[3] == '\0' ||
1701                                             isdigit(cmd->c_id[3])))
1702                                                 cmd->c_action = M_RESPAWN;
1703                                         else
1704                                                 cmd->c_action = M_OFF;
1705                                 } else {
1706                                         for (cmd->c_action = 0, i = 0,
1707                                             *ptr = '\0';
1708                                             i <
1709                                             sizeof (actions)/sizeof (char *);
1710                                             i++) {
1711                                         if (strcmp(shcmd, actions[i]) == 0) {
1712                                                 if ((cmd->c_levels & MASKSU) &&
1713                                                     !(act_masks[i] & su_acts))
1714                                                         cmd->c_action = 0;
1715                                                 else
1716                                                         cmd->c_action =
1717                                                             act_masks[i];
1718                                                 break;
1719                                         }
1720                                         }
1721                                 }
1722 
1723                                 /*
1724                                  * If the action didn't match any legal action,
1725                                  * set state to FAILURE.
1726                                  */
1727                                 if (cmd->c_action == 0) {
1728                                         state = FAILURE;
1729                                 } else {
1730                                         state = COMMAND;
1731                                         (void) strcpy(shcmd, "exec ");
1732                                 }
1733                                 ptr = shcmd + EXEC;
1734                                 break;
1735                                 }
1736                                 continue;
1737                         }
1738                 }
1739 
1740                 /* If the character is a '\n', then this is the end of a */
1741                 /* line.  If the '\n' wasn't preceded by a backslash, */
1742                 /* it is also the end of an inittab command.  If it was */
1743                 /* preceded by a backslash then the next line is a */
1744                 /* continuation.  Note that the continuation '\n' falls */
1745                 /* through and is treated like other characters and is */
1746                 /* stored in the shell command line. */
1747                 if (c == '\n' && lastc != '\\') {
1748                         proceed = FALSE;
1749                         *ptr = '\0';
1750                         break;
1751                 }
1752 
1753                 /* For all other characters just stuff them into the */
1754                 /* command as long as there aren't too many of them. */
1755                 /* Make sure there is room for a terminating '\0' also. */
1756                 if (ptr >= shcmd + MAXCMDL - 1)
1757                         state = FAILURE;
1758                 else
1759                         *ptr++ = (char)c;
1760 
1761                 /* If the character we just stored was a quoted */
1762                 /* backslash, then change "c" to '\0', so that this     */
1763                 /* backslash will not cause a subsequent '\n' to appear */
1764                 /* quoted.  In otherwords '\' '\' '\n' is the real end */
1765                 /* of a command, while '\' '\n' is a continuation. */
1766                 if (c == '\\' && lastc == '\\')
1767                         c = '\0';
1768                 }
1769 
1770                 /*
1771                  * Make sure all the fields are properly specified
1772                  * for a good command line.
1773                  */
1774                 if (state == COMMAND) {
1775                         answer = TRUE;
1776                         cmd->c_command = shcmd;
1777 
1778                         /*
1779                          * If no default level was supplied, insert
1780                          * all numerical levels.
1781                          */
1782                         if (cmd->c_levels == 0)
1783                                 cmd->c_levels = MASK_NUMERIC;
1784 
1785                         /*
1786                          * If no action has been supplied, declare this
1787                          * entry to be OFF.
1788                          */
1789                         if (cmd->c_action == 0)
1790                                 cmd->c_action = M_OFF;
1791 
1792                         /*
1793                          * If no shell command has been supplied, make sure
1794                          * there is a null string in the command field.
1795                          */
1796                         if (ptr == shcmd + EXEC)
1797                                 *shcmd = '\0';
1798                 } else
1799                         answer = FALSE;
1800 
1801                 /*
1802                  * If we have reached the end of inittab, then close it
1803                  * and quit trying to find a good command line.
1804                  */
1805                 if (c == EOF) {
1806                         (void) fclose(fp_inittab);
1807                         fp_inittab = NULL;
1808                         break;
1809                 }
1810         }
1811         return (answer);
1812 }
1813 
1814 /*
1815  * lvlname_to_state(): convert the character name of a state to its level
1816  * (its corresponding signal number).
1817  */
1818 static int
1819 lvlname_to_state(char name)
1820 {
1821         int i;
1822         for (i = 0; i < LVL_NELEMS; i++) {
1823                 if (lvls[i].lvl_name == name)
1824                         return (lvls[i].lvl_state);
1825         }
1826         return (-1);
1827 }
1828 
1829 /*
1830  * state_to_name(): convert the level to the character name.
1831  */
1832 static char
1833 state_to_name(int state)
1834 {
1835         int i;
1836         for (i = 0; i < LVL_NELEMS; i++) {
1837                 if (lvls[i].lvl_state == state)
1838                         return (lvls[i].lvl_name);
1839         }
1840         return (-1);
1841 }
1842 
1843 /*
1844  * state_to_mask(): return the mask corresponding to a signal number
1845  */
1846 static int
1847 state_to_mask(int state)
1848 {
1849         int i;
1850         for (i = 0; i < LVL_NELEMS; i++) {
1851                 if (lvls[i].lvl_state == state)
1852                         return (lvls[i].lvl_mask);
1853         }
1854         return (0);     /* return 0, since that represents an empty mask */
1855 }
1856 
1857 /*
1858  * lvlname_to_mask(): return the mask corresponding to a levels character name
1859  */
1860 static int
1861 lvlname_to_mask(char name, int *mask)
1862 {
1863         int i;
1864         for (i = 0; i < LVL_NELEMS; i++) {
1865                 if (lvls[i].lvl_name == name) {
1866                         *mask = lvls[i].lvl_mask;
1867                         return (0);
1868                 }
1869         }
1870         return (-1);
1871 }
1872 
1873 /*
1874  * state_to_flags(): return the flags corresponding to a runlevel.  These
1875  * indicate properties of that runlevel.
1876  */
1877 static int
1878 state_to_flags(int state)
1879 {
1880         int i;
1881         for (i = 0; i < LVL_NELEMS; i++) {
1882                 if (lvls[i].lvl_state == state)
1883                         return (lvls[i].lvl_flags);
1884         }
1885         return (0);
1886 }
1887 
1888 /*
1889  * killproc() creates a child which kills the process specified by pid.
1890  */
1891 void
1892 killproc(pid_t pid)
1893 {
1894         struct PROC_TABLE       *process;
1895 
1896         (void) sighold(SIGCLD);
1897         while ((process = efork(M_OFF, NULLPROC, 0)) == NO_ROOM)
1898                 (void) pause();
1899         (void) sigrelse(SIGCLD);
1900 
1901         if (process == NULLPROC) {
1902                 /*
1903                  * efork() sets all signal handlers to the default, so reset
1904                  * the ALRM handler to make timer() work as expected.
1905                  */
1906                 (void) sigset(SIGALRM, alarmclk);
1907 
1908                 /*
1909                  * We are the child.  Try to terminate the process nicely
1910                  * first using SIGTERM and if it refuses to die in TWARN
1911                  * seconds kill it with SIGKILL.
1912                  */
1913                 (void) kill(pid, SIGTERM);
1914                 (void) timer(TWARN);
1915                 (void) kill(pid, SIGKILL);
1916                 (void) exit(0);
1917         }
1918 }
1919 
1920 /*
1921  * Set up the default environment for all procs to be forked from init.
1922  * Read the values from the /etc/default/init file, except for PATH.  If
1923  * there's not enough room in the environment array, the environment
1924  * lines that don't fit are silently discarded.
1925  */
1926 void
1927 init_env()
1928 {
1929         char    line[MAXCMDL];
1930         FILE    *fp;
1931         int     inquotes, length, wslength;
1932         char    *tokp, *cp1, *cp2;
1933 
1934         glob_envp[0] = malloc((unsigned)(strlen(DEF_PATH)+2));
1935         (void) strcpy(glob_envp[0], DEF_PATH);
1936         glob_envn = 1;
1937 
1938         if (rflg) {
1939                 glob_envp[1] =
1940                     malloc((unsigned)(strlen("_DVFS_RECONFIG=YES")+2));
1941                 (void) strcpy(glob_envp[1], "_DVFS_RECONFIG=YES");
1942                 ++glob_envn;
1943         } else if (bflg == 1) {
1944                 glob_envp[1] =
1945                     malloc((unsigned)(strlen("RB_NOBOOTRC=YES")+2));
1946                 (void) strcpy(glob_envp[1], "RB_NOBOOTRC=YES");
1947                 ++glob_envn;
1948         }
1949 
1950         if ((fp = fopen(ENVFILE, "r")) == NULL) {
1951                 console(B_TRUE,
1952                     "Cannot open %s. Environment not initialized.\n",
1953                     ENVFILE);
1954         } else {
1955                 while (fgets(line, MAXCMDL - 1, fp) != NULL &&
1956                     glob_envn < MAXENVENT - 2) {
1957                         /*
1958                          * Toss newline
1959                          */
1960                         length = strlen(line);
1961                         if (line[length - 1] == '\n')
1962                                 line[length - 1] = '\0';
1963 
1964                         /*
1965                          * Ignore blank or comment lines.
1966                          */
1967                         if (line[0] == '#' || line[0] == '\0' ||
1968                             (wslength = strspn(line, " \t\n")) ==
1969                             strlen(line) ||
1970                             strchr(line, '#') == line + wslength)
1971                                 continue;
1972 
1973                         /*
1974                          * First make a pass through the line and change
1975                          * any non-quoted semi-colons to blanks so they
1976                          * will be treated as token separators below.
1977                          */
1978                         inquotes = 0;
1979                         for (cp1 = line; *cp1 != '\0'; cp1++) {
1980                                 if (*cp1 == '"') {
1981                                         if (inquotes == 0)
1982                                                 inquotes = 1;
1983                                         else
1984                                                 inquotes = 0;
1985                                 } else if (*cp1 == ';') {
1986                                         if (inquotes == 0)
1987                                                 *cp1 = ' ';
1988                                 }
1989                         }
1990 
1991                         /*
1992                          * Tokens within the line are separated by blanks
1993                          *  and tabs.  For each token in the line which
1994                          * contains a '=' we strip out any quotes and then
1995                          * stick the token in the environment array.
1996                          */
1997                         if ((tokp = strtok(line, " \t")) == NULL)
1998                                 continue;
1999                         do {
2000                                 if (strchr(tokp, '=') == NULL)
2001                                         continue;
2002                                 length = strlen(tokp);
2003                                 while ((cp1 = strpbrk(tokp, "\"\'")) != NULL) {
2004                                         for (cp2 = cp1;
2005                                             cp2 < &tokp[length]; cp2++)
2006                                                 *cp2 = *(cp2 + 1);
2007                                         length--;
2008                                 }
2009 
2010                                 if (strncmp(tokp, "CMASK=",
2011                                     sizeof ("CMASK=") - 1) == 0) {
2012                                         long t;
2013 
2014                                         /* We know there's an = */
2015                                         t = strtol(strchr(tokp, '=') + 1, NULL,
2016                                             8);
2017 
2018                                         /* Sanity */
2019                                         if (t <= 077 && t >= 0)
2020                                                 cmask = (int)t;
2021                                         (void) umask(cmask);
2022                                         continue;
2023                                 }
2024                                 glob_envp[glob_envn] =
2025                                     malloc((unsigned)(length + 1));
2026                                 (void) strcpy(glob_envp[glob_envn], tokp);
2027                                 if (++glob_envn >= MAXENVENT - 1)
2028                                         break;
2029                         } while ((tokp = strtok(NULL, " \t")) != NULL);
2030                 }
2031 
2032                 /*
2033                  * Append a null pointer to the environment array
2034                  * to mark its end.
2035                  */
2036                 glob_envp[glob_envn] = NULL;
2037                 (void) fclose(fp);
2038         }
2039 }
2040 
2041 /*
2042  * boot_init(): Do initialization things that should be done at boot.
2043  */
2044 void
2045 boot_init()
2046 {
2047         int i;
2048         struct PROC_TABLE *process, *oprocess;
2049         struct CMD_LINE cmd;
2050         char    line[MAXCMDL];
2051         char    svc_aux[SVC_AUX_SIZE];
2052         char    init_svc_fmri[SVC_FMRI_SIZE];
2053         char *old_path;
2054         int maxfiles;
2055 
2056         /* Use INIT_PATH for sysinit cmds */
2057         old_path = glob_envp[0];
2058         glob_envp[0] = malloc((unsigned)(strlen(INIT_PATH)+2));
2059         (void) strcpy(glob_envp[0], INIT_PATH);
2060 
2061         /*
2062          * Scan inittab(4) and process the special svc.startd entry, initdefault
2063          * and sysinit entries.
2064          */
2065         while (getcmd(&cmd, &line[0]) == TRUE) {
2066                 if (startd_tmpl >= 0 && id_eq(cmd.c_id, "smf")) {
2067                         process_startd_line(&cmd, line);
2068                         (void) snprintf(startd_svc_aux, SVC_AUX_SIZE,
2069                             INITTAB_ENTRY_ID_STR_FORMAT, cmd.c_id);
2070                 } else if (cmd.c_action == M_INITDEFAULT) {
2071                         /*
2072                          * initdefault is no longer meaningful, as the SMF
2073                          * milestone controls what (legacy) run level we
2074                          * boot to.
2075                          */
2076                         console(B_TRUE,
2077                             "Ignoring legacy \"initdefault\" entry.\n");
2078                 } else if (cmd.c_action == M_SYSINIT) {
2079                         /*
2080                          * Execute the "sysinit" entry and wait for it to
2081                          * complete.  No bookkeeping is performed on these
2082                          * entries because we avoid writing to the file system
2083                          * until after there has been an chance to check it.
2084                          */
2085                         if (process = findpslot(&cmd)) {
2086                                 (void) sighold(SIGCLD);
2087                                 (void) snprintf(svc_aux, SVC_AUX_SIZE,
2088                                     INITTAB_ENTRY_ID_STR_FORMAT, cmd.c_id);
2089                                 (void) snprintf(init_svc_fmri, SVC_FMRI_SIZE,
2090                                     SVC_INIT_PREFIX INITTAB_ENTRY_ID_STR_FORMAT,
2091                                     cmd.c_id);
2092                                 if (legacy_tmpl >= 0) {
2093                                         (void) ct_pr_tmpl_set_svc_fmri(
2094                                             legacy_tmpl, init_svc_fmri);
2095                                         (void) ct_pr_tmpl_set_svc_aux(
2096                                             legacy_tmpl, svc_aux);
2097                                 }
2098 
2099                                 for (oprocess = process;
2100                                     (process = efork(M_OFF, oprocess,
2101                                     (NAMED|NOCLEANUP))) == NO_ROOM;
2102                                     /* CSTYLED */)
2103                                         ;
2104                                 (void) sigrelse(SIGCLD);
2105 
2106                                 if (process == NULLPROC) {
2107                                         maxfiles = ulimit(UL_GDESLIM, 0);
2108 
2109                                         for (i = 0; i < maxfiles; i++)
2110                                                 (void) fcntl(i, F_SETFD,
2111                                                     FD_CLOEXEC);
2112                                         (void) execle(SH, "INITSH", "-c",
2113                                             cmd.c_command,
2114                                             (char *)0, glob_envp);
2115                                         console(B_TRUE,
2116 "Command\n\"%s\"\n failed to execute.  errno = %d (exec of shell failed)\n",
2117                                             cmd.c_command, errno);
2118                                         exit(1);
2119                                 } else
2120                                         while (waitproc(process) == FAILURE)
2121                                                 ;
2122                                 process->p_flags = 0;
2123                                 st_write();
2124                         }
2125                 }
2126         }
2127 
2128         /* Restore the path. */
2129         free(glob_envp[0]);
2130         glob_envp[0] = old_path;
2131 
2132         /*
2133          * This will enable st_write() to complain about init_state_file.
2134          */
2135         booting = 0;
2136 
2137         /*
2138          * If the /etc/ioctl.syscon didn't exist or had invalid contents write
2139          * out a correct version.
2140          */
2141         if (write_ioctl)
2142                 write_ioctl_syscon();
2143 
2144         /*
2145          * Start svc.startd(1M), which does most of the work.
2146          */
2147         if (startd_cline[0] != '\0' && startd_tmpl >= 0) {
2148                 /* Start svc.startd. */
2149                 if (startd_run(startd_cline, startd_tmpl, 0) == -1)
2150                         cur_state = SINGLE_USER;
2151         } else {
2152                 console(B_TRUE, "Absent svc.startd entry or bad "
2153                     "contract template.  Not starting svc.startd.\n");
2154                 enter_maintenance();
2155         }
2156 }
2157 
2158 /*
2159  * init_signals(): Initialize all signals to either be caught or ignored.
2160  */
2161 void
2162 init_signals(void)
2163 {
2164         struct sigaction act;
2165         int i;
2166 
2167         /*
2168          * Start by ignoring all signals, then selectively re-enable some.
2169          * The SIG_IGN disposition will only affect asynchronous signals:
2170          * any signal that we trigger synchronously that doesn't end up
2171          * being handled by siglvl() will be forcibly delivered by the kernel.
2172          */
2173         for (i = SIGHUP; i <= SIGRTMAX; i++)
2174                 (void) sigset(i, SIG_IGN);
2175 
2176         /*
2177          * Handle all level-changing signals using siglvl() and set sa_mask so
2178          * that all level-changing signals are blocked while in siglvl().
2179          */
2180         act.sa_handler = siglvl;
2181         act.sa_flags = SA_SIGINFO;
2182         (void) sigemptyset(&act.sa_mask);
2183 
2184         (void) sigaddset(&act.sa_mask, LVLQ);
2185         (void) sigaddset(&act.sa_mask, LVL0);
2186         (void) sigaddset(&act.sa_mask, LVL1);
2187         (void) sigaddset(&act.sa_mask, LVL2);
2188         (void) sigaddset(&act.sa_mask, LVL3);
2189         (void) sigaddset(&act.sa_mask, LVL4);
2190         (void) sigaddset(&act.sa_mask, LVL5);
2191         (void) sigaddset(&act.sa_mask, LVL6);
2192         (void) sigaddset(&act.sa_mask, SINGLE_USER);
2193         (void) sigaddset(&act.sa_mask, LVLa);
2194         (void) sigaddset(&act.sa_mask, LVLb);
2195         (void) sigaddset(&act.sa_mask, LVLc);
2196 
2197         (void) sigaction(LVLQ, &act, NULL);
2198         (void) sigaction(LVL0, &act, NULL);
2199         (void) sigaction(LVL1, &act, NULL);
2200         (void) sigaction(LVL2, &act, NULL);
2201         (void) sigaction(LVL3, &act, NULL);
2202         (void) sigaction(LVL4, &act, NULL);
2203         (void) sigaction(LVL5, &act, NULL);
2204         (void) sigaction(LVL6, &act, NULL);
2205         (void) sigaction(SINGLE_USER, &act, NULL);
2206         (void) sigaction(LVLa, &act, NULL);
2207         (void) sigaction(LVLb, &act, NULL);
2208         (void) sigaction(LVLc, &act, NULL);
2209 
2210         (void) sigset(SIGALRM, alarmclk);
2211         alarmclk();
2212 
2213         (void) sigset(SIGCLD, childeath);
2214         (void) sigset(SIGPWR, powerfail);
2215 }
2216 
2217 /*
2218  * Set up pipe for "godchildren". If the file exists and is a pipe just open
2219  * it. Else, if the file system is r/w create it.  Otherwise, defer its
2220  * creation and open until after /var/run has been mounted.  This function is
2221  * only called on startup and when explicitly requested via LVLQ.
2222  */
2223 void
2224 setup_pipe()
2225 {
2226         struct stat stat_buf;
2227         struct statvfs statvfs_buf;
2228         struct sigaction act;
2229 
2230         /*
2231          * Always close the previous pipe descriptor as the mounted filesystems
2232          * may have changed.
2233          */
2234         if (Pfd >= 0)
2235                 (void) close(Pfd);
2236 
2237         if ((stat(INITPIPE, &stat_buf) == 0) &&
2238             ((stat_buf.st_mode & (S_IFMT|S_IRUSR)) == (S_IFIFO|S_IRUSR)))
2239                 Pfd = open(INITPIPE, O_RDWR | O_NDELAY);
2240         else
2241                 if ((statvfs(INITPIPE_DIR, &statvfs_buf) == 0) &&
2242                     ((statvfs_buf.f_flag & ST_RDONLY) == 0)) {
2243                         (void) unlink(INITPIPE);
2244                         (void) mknod(INITPIPE, S_IFIFO | 0600, 0);
2245                         Pfd = open(INITPIPE, O_RDWR | O_NDELAY);
2246                 }
2247 
2248         if (Pfd >= 0) {
2249                 (void) ioctl(Pfd, I_SETSIG, S_INPUT);
2250                 /*
2251                  * Read pipe in message discard mode.
2252                  */
2253                 (void) ioctl(Pfd, I_SRDOPT, RMSGD);
2254 
2255                 act.sa_handler = sigpoll;
2256                 act.sa_flags = 0;
2257                 (void) sigemptyset(&act.sa_mask);
2258                 (void) sigaddset(&act.sa_mask, SIGCLD);
2259                 (void) sigaction(SIGPOLL, &act, NULL);
2260         }
2261 }
2262 
2263 /*
2264  * siglvl - handle an asynchronous signal from init(1M) telling us that we
2265  * should change the current run level.  We set new_state accordingly.
2266  */
2267 void
2268 siglvl(int sig, siginfo_t *sip, ucontext_t *ucp)
2269 {
2270         struct PROC_TABLE *process;
2271         struct sigaction act;
2272 
2273         /*
2274          * If the signal was from the kernel (rather than init(1M)) then init
2275          * itself tripped the signal.  That is, we might have a bug and tripped
2276          * a real SIGSEGV instead of receiving it as an alias for SIGLVLa.  In
2277          * such a case we reset the disposition to SIG_DFL, block all signals
2278          * in uc_mask but the current one, and return to the interrupted ucp
2279          * to effect an appropriate death.  The kernel will then restart us.
2280          *
2281          * The one exception to SI_FROMKERNEL() is SIGFPE (a.k.a. LVL6), which
2282          * the kernel can send us when it wants to effect an orderly reboot.
2283          * For this case we must also verify si_code is zero, rather than a
2284          * code such as FPE_INTDIV which a bug might have triggered.
2285          */
2286         if (sip != NULL && SI_FROMKERNEL(sip) &&
2287             (sig != SIGFPE || sip->si_code == 0)) {
2288 
2289                 (void) sigemptyset(&act.sa_mask);
2290                 act.sa_handler = SIG_DFL;
2291                 act.sa_flags = 0;
2292                 (void) sigaction(sig, &act, NULL);
2293 
2294                 (void) sigfillset(&ucp->uc_sigmask);
2295                 (void) sigdelset(&ucp->uc_sigmask, sig);
2296                 ucp->uc_flags |= UC_SIGMASK;
2297 
2298                 (void) setcontext(ucp);
2299         }
2300 
2301         /*
2302          * If the signal received is a LVLQ signal, do not really
2303          * change levels, just restate the current level.  If the
2304          * signal is not a LVLQ, set the new level to the signal
2305          * received.
2306          */
2307         if (sig == LVLQ) {
2308                 new_state = cur_state;
2309                 lvlq_received = B_TRUE;
2310         } else {
2311                 new_state = sig;
2312         }
2313 
2314         /*
2315          * Clear all times and repeat counts in the process table
2316          * since either the level is changing or the user has editted
2317          * the inittab file and wants us to look at it again.
2318          * If the user has fixed a typo, we don't want residual timing
2319          * data preventing the fixed command line from executing.
2320          */
2321         for (process = proc_table;
2322             (process < proc_table + num_proc); process++) {
2323                 process->p_time = 0L;
2324                 process->p_count = 0;
2325         }
2326 
2327         /*
2328          * Set the flag to indicate that a "user signal" was received.
2329          */
2330         wakeup.w_flags.w_usersignal = 1;
2331 }
2332 
2333 
2334 /*
2335  * alarmclk
2336  */
2337 static void
2338 alarmclk()
2339 {
2340         time_up = TRUE;
2341 }
2342 
2343 /*
2344  * childeath_single():
2345  *
2346  * This used to be the SIGCLD handler and it was set with signal()
2347  * (as opposed to sigset()).  When a child exited we'd come to the
2348  * handler, wait for the child, and reenable the handler with
2349  * signal() just before returning.  The implementation of signal()
2350  * checks with waitid() for waitable children and sends a SIGCLD
2351  * if there are some.  If children are exiting faster than the
2352  * handler can run we keep sending signals and the handler never
2353  * gets to return and eventually the stack runs out and init dies.
2354  * To prevent that we set the handler with sigset() so the handler
2355  * doesn't need to be reset, and in childeath() (see below) we
2356  * call childeath_single() as long as there are children to be
2357  * waited for.  If a child exits while init is in the handler a
2358  * SIGCLD will be pending and delivered on return from the handler.
2359  * If the child was already waited for the handler will have nothing
2360  * to do and return, otherwise the child will be waited for.
2361  */
2362 static void
2363 childeath_single(pid_t pid, int status)
2364 {
2365         struct PROC_TABLE       *process;
2366         struct pidlist          *pp;
2367 
2368         /*
2369          * Scan the process table to see if we are interested in this process.
2370          */
2371         for (process = proc_table;
2372             (process < proc_table + num_proc); process++) {
2373                 if ((process->p_flags & (LIVING|OCCUPIED)) ==
2374                     (LIVING|OCCUPIED) && process->p_pid == pid) {
2375 
2376                         /*
2377                          * Mark this process as having died and store the exit
2378                          * status.  Also set the wakeup flag for a dead child
2379                          * and break out of the loop.
2380                          */
2381                         process->p_flags &= ~LIVING;
2382                         process->p_exit = (short)status;
2383                         wakeup.w_flags.w_childdeath = 1;
2384 
2385                         return;
2386                 }
2387         }
2388 
2389         /*
2390          * No process was found above, look through auxiliary list.
2391          */
2392         (void) sighold(SIGPOLL);
2393         pp = Plhead;
2394         while (pp) {
2395                 if (pid > pp->pl_pid) {
2396                         /*
2397                          * Keep on looking.
2398                          */
2399                         pp = pp->pl_next;
2400                         continue;
2401                 } else if (pid < pp->pl_pid) {
2402                         /*
2403                          * Not in the list.
2404                          */
2405                         break;
2406                 } else {
2407                         /*
2408                          * This is a dead "godchild".
2409                          */
2410                         pp->pl_dflag = 1;
2411                         pp->pl_exit = (short)status;
2412                         wakeup.w_flags.w_childdeath = 1;
2413                         Gchild = 1;     /* Notice to call cleanaux(). */
2414                         break;
2415                 }
2416         }
2417 
2418         (void) sigrelse(SIGPOLL);
2419 }
2420 
2421 /* ARGSUSED */
2422 static void
2423 childeath(int signo)
2424 {
2425         pid_t pid;
2426         int status;
2427 
2428         while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
2429                 childeath_single(pid, status);
2430 }
2431 
2432 static void
2433 powerfail()
2434 {
2435         (void) nice(-19);
2436         wakeup.w_flags.w_powerhit = 1;
2437 }
2438 
2439 /*
2440  * efork() forks a child and the parent inserts the process in its table
2441  * of processes that are directly a result of forks that it has performed.
2442  * The child just changes the "global" with the process id for this process
2443  * to it's new value.
2444  * If efork() is called with a pointer into the proc_table it uses that slot,
2445  * otherwise it searches for a free slot.  Regardless of how it was called,
2446  * it returns the pointer to the proc_table entry
2447  *
2448  * The SIGCLD signal is blocked (held) before calling efork()
2449  * and is unblocked (released) after efork() returns.
2450  *
2451  * Ideally, this should be rewritten to use modern signal semantics.
2452  */
2453 static struct PROC_TABLE *
2454 efork(int action, struct PROC_TABLE *process, int modes)
2455 {
2456         pid_t   childpid;
2457         struct PROC_TABLE *proc;
2458         int             i;
2459         /*
2460          * Freshen up the proc_table, removing any entries for dead processes
2461          * that don't have NOCLEANUP set.  Perform the necessary accounting.
2462          */
2463         for (proc = proc_table; (proc < proc_table + num_proc); proc++) {
2464                 if ((proc->p_flags & (OCCUPIED|LIVING|NOCLEANUP)) ==
2465                     (OCCUPIED)) {
2466                         /*
2467                          * Is this a named process?
2468                          * If so, do the necessary bookkeeping.
2469                          */
2470                         if (proc->p_flags & NAMED)
2471                                 (void) account(DEAD_PROCESS, proc, NULL);
2472 
2473                         /*
2474                          * Free this entry for new usage.
2475                          */
2476                         proc->p_flags = 0;
2477                 }
2478         }
2479 
2480         while ((childpid = fork()) == FAILURE) {
2481                 /*
2482                  * Shorten the alarm timer in case someone else's child dies
2483                  * and free up a slot in the process table.
2484                  */
2485                 setimer(5);
2486 
2487                 /*
2488                  * Wait for some children to die.  Since efork()
2489                  * is always called with SIGCLD blocked, unblock
2490                  * it here so that child death signals can come in.
2491                  */
2492                 (void) sigrelse(SIGCLD);
2493                 (void) pause();
2494                 (void) sighold(SIGCLD);
2495                 setimer(0);
2496         }
2497 
2498         if (childpid != 0) {
2499 
2500                 if (process == NULLPROC) {
2501                         /*
2502                          * No proc table pointer specified so search
2503                          * for a free slot.
2504                          */
2505                         for (process = proc_table;  process->p_flags != 0 &&
2506                             (process < proc_table + num_proc); process++)
2507                                         ;
2508 
2509                         if (process == (proc_table + num_proc)) {
2510                                 int old_proc_table_size = num_proc;
2511 
2512                                 /* Increase the process table size */
2513                                 increase_proc_table_size();
2514                                 if (old_proc_table_size == num_proc) {
2515                                         /* didn't grow: memory failure */
2516                                         return (NO_ROOM);
2517                                 } else {
2518                                         process =
2519                                             proc_table + old_proc_table_size;
2520                                 }
2521                         }
2522 
2523                         process->p_time = 0L;
2524                         process->p_count = 0;
2525                 }
2526                 process->p_id[0] = '\0';
2527                 process->p_id[1] = '\0';
2528                 process->p_id[2] = '\0';
2529                 process->p_id[3] = '\0';
2530                 process->p_pid = childpid;
2531                 process->p_flags = (LIVING | OCCUPIED | modes);
2532                 process->p_exit = 0;
2533 
2534                 st_write();
2535         } else {
2536                 if ((action & (M_WAIT | M_BOOTWAIT)) == 0)
2537                         (void) setpgrp();
2538 
2539                 process = NULLPROC;
2540 
2541                 /*
2542                  * Reset all signals to the system defaults.
2543                  */
2544                 for (i = SIGHUP; i <= SIGRTMAX; i++)
2545                         (void) sigset(i, SIG_DFL);
2546 
2547                 /*
2548                  * POSIX B.2.2.2 advises that init should set SIGTTOU,
2549                  * SIGTTIN, and SIGTSTP to SIG_IGN.
2550                  *
2551                  * Make sure that SIGXCPU and SIGXFSZ also remain ignored,
2552                  * for backward compatibility.
2553                  */
2554                 (void) sigset(SIGTTIN, SIG_IGN);
2555                 (void) sigset(SIGTTOU, SIG_IGN);
2556                 (void) sigset(SIGTSTP, SIG_IGN);
2557                 (void) sigset(SIGXCPU, SIG_IGN);
2558                 (void) sigset(SIGXFSZ, SIG_IGN);
2559         }
2560         return (process);
2561 }
2562 
2563 
2564 /*
2565  * waitproc() waits for a specified process to die.  For this function to
2566  * work, the specified process must already in the proc_table.  waitproc()
2567  * returns the exit status of the specified process when it dies.
2568  */
2569 static long
2570 waitproc(struct PROC_TABLE *process)
2571 {
2572         int             answer;
2573         sigset_t        oldmask, newmask, zeromask;
2574 
2575         (void) sigemptyset(&zeromask);
2576         (void) sigemptyset(&newmask);
2577 
2578         (void) sigaddset(&newmask, SIGCLD);
2579 
2580         /* Block SIGCLD and save the current signal mask */
2581         if (sigprocmask(SIG_BLOCK, &newmask, &oldmask) < 0)
2582                 perror("SIG_BLOCK error");
2583 
2584         /*
2585          * Wait around until the process dies.
2586          */
2587         if (process->p_flags & LIVING)
2588                 (void) sigsuspend(&zeromask);
2589 
2590         /* Reset signal mask to unblock SIGCLD */
2591         if (sigprocmask(SIG_SETMASK, &oldmask, NULL) < 0)
2592                 perror("SIG_SETMASK error");
2593 
2594         if (process->p_flags & LIVING)
2595                 return (FAILURE);
2596 
2597         /*
2598          * Make sure to only return 16 bits so that answer will always
2599          * be positive whenever the process of interest really died.
2600          */
2601         answer = (process->p_exit & 0xffff);
2602 
2603         /*
2604          * Free the slot in the proc_table.
2605          */
2606         process->p_flags = 0;
2607         return (answer);
2608 }
2609 
2610 /*
2611  * notify_pam_dead(): calls into the PAM framework to close the given session.
2612  */
2613 static void
2614 notify_pam_dead(struct utmpx *up)
2615 {
2616         pam_handle_t *pamh;
2617         char user[sizeof (up->ut_user) + 1];
2618         char ttyn[sizeof (up->ut_line) + 1];
2619         char host[sizeof (up->ut_host) + 1];
2620 
2621         /*
2622          * PAM does not take care of updating utmpx/wtmpx.
2623          */
2624         (void) snprintf(user, sizeof (user), "%s", up->ut_user);
2625         (void) snprintf(ttyn, sizeof (ttyn), "%s", up->ut_line);
2626         (void) snprintf(host, sizeof (host), "%s", up->ut_host);
2627 
2628         if (pam_start("init", user, NULL, &pamh) == PAM_SUCCESS)  {
2629                 (void) pam_set_item(pamh, PAM_TTY, ttyn);
2630                 (void) pam_set_item(pamh, PAM_RHOST, host);
2631                 (void) pam_close_session(pamh, 0);
2632                 (void) pam_end(pamh, PAM_SUCCESS);
2633         }
2634 }
2635 
2636 /*
2637  * Check you can access utmpx (As / may be read-only and
2638  * /var may not be mounted yet).
2639  */
2640 static int
2641 access_utmpx(void)
2642 {
2643         do {
2644                 utmpx_ok = (access(UTMPX, R_OK|W_OK) == 0);
2645         } while (!utmpx_ok && errno == EINTR);
2646 
2647         return (utmpx_ok);
2648 }
2649 
2650 /*
2651  * account() updates entries in utmpx and appends new entries to the end of
2652  * wtmpx (assuming they exist).  The program argument indicates the name of
2653  * program if INIT_PROCESS, otherwise should be NULL.
2654  *
2655  * account() only blocks for INIT_PROCESS requests.
2656  *
2657  * Returns non-zero if write failed.
2658  */
2659 static int
2660 account(short state, struct PROC_TABLE *process, char *program)
2661 {
2662         struct utmpx utmpbuf, *u, *oldu;
2663         int tmplen;
2664         char fail_buf[UT_LINE_SZ];
2665         sigset_t block, unblock;
2666 
2667         if (!utmpx_ok && !access_utmpx()) {
2668                 return (-1);
2669         }
2670 
2671         /*
2672          * Set up the prototype for the utmp structure we want to write.
2673          */
2674         u = &utmpbuf;
2675         (void) memset(u, 0, sizeof (struct utmpx));
2676 
2677         /*
2678          * Fill in the various fields of the utmp structure.
2679          */
2680         u->ut_id[0] = process->p_id[0];
2681         u->ut_id[1] = process->p_id[1];
2682         u->ut_id[2] = process->p_id[2];
2683         u->ut_id[3] = process->p_id[3];
2684         u->ut_pid = process->p_pid;
2685 
2686         /*
2687          * Fill the "ut_exit" structure.
2688          */
2689         u->ut_exit.e_termination = WTERMSIG(process->p_exit);
2690         u->ut_exit.e_exit = WEXITSTATUS(process->p_exit);
2691         u->ut_type = state;
2692 
2693         (void) time(&u->ut_tv.tv_sec);
2694 
2695         /*
2696          * Block signals for utmp update.
2697          */
2698         (void) sigfillset(&block);
2699         (void) sigprocmask(SIG_BLOCK, &block, &unblock);
2700 
2701         /*
2702          * See if there already is such an entry in the "utmpx" file.
2703          */
2704         setutxent();    /* Start at beginning of utmpx file. */
2705 
2706         if ((oldu = getutxid(u)) != NULL) {
2707                 /*
2708                  * Copy in the old "user", "line" and "host" fields
2709                  * to our new structure.
2710                  */
2711                 bcopy(oldu->ut_user, u->ut_user, sizeof (u->ut_user));
2712                 bcopy(oldu->ut_line, u->ut_line, sizeof (u->ut_line));
2713                 bcopy(oldu->ut_host, u->ut_host, sizeof (u->ut_host));
2714                 u->ut_syslen = (tmplen = strlen(u->ut_host)) ?
2715                     min(tmplen + 1, sizeof (u->ut_host)) : 0;
2716 
2717                 if (oldu->ut_type == USER_PROCESS && state == DEAD_PROCESS) {
2718                         notify_pam_dead(oldu);
2719                 }
2720         }
2721 
2722         /*
2723          * Perform special accounting. Insert the special string into the
2724          * ut_line array. For INIT_PROCESSes put in the name of the
2725          * program in the "ut_user" field.
2726          */
2727         switch (state) {
2728         case INIT_PROCESS:
2729                 (void) strncpy(u->ut_user, program, sizeof (u->ut_user));
2730                 (void) strcpy(fail_buf, "INIT_PROCESS");
2731                 break;
2732 
2733         default:
2734                 (void) strlcpy(fail_buf, u->ut_id, sizeof (u->ut_id) + 1);
2735                 break;
2736         }
2737 
2738         /*
2739          * Write out the updated entry to utmpx file.
2740          */
2741         if (pututxline(u) == NULL) {
2742                 console(B_TRUE, "Failed write of utmpx entry: \"%s\": %s\n",
2743                     fail_buf, strerror(errno));
2744                 endutxent();
2745                 (void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2746                 return (-1);
2747         }
2748 
2749         /*
2750          * If we're able to write to utmpx, then attempt to add to the
2751          * end of the wtmpx file.
2752          */
2753         updwtmpx(WTMPX, u);
2754 
2755         endutxent();
2756 
2757         (void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2758 
2759         return (0);
2760 }
2761 
2762 static void
2763 clearent(pid_t pid, short status)
2764 {
2765         struct utmpx *up;
2766         sigset_t block, unblock;
2767 
2768         /*
2769          * Block signals for utmp update.
2770          */
2771         (void) sigfillset(&block);
2772         (void) sigprocmask(SIG_BLOCK, &block, &unblock);
2773 
2774         /*
2775          * No error checking for now.
2776          */
2777 
2778         setutxent();
2779         while (up = getutxent()) {
2780                 if (up->ut_pid == pid) {
2781                         if (up->ut_type == DEAD_PROCESS) {
2782                                 /*
2783                                  * Cleaned up elsewhere.
2784                                  */
2785                                 continue;
2786                         }
2787 
2788                         notify_pam_dead(up);
2789 
2790                         up->ut_type = DEAD_PROCESS;
2791                         up->ut_exit.e_termination = WTERMSIG(status);
2792                         up->ut_exit.e_exit = WEXITSTATUS(status);
2793                         (void) time(&up->ut_tv.tv_sec);
2794 
2795                         (void) pututxline(up);
2796                         /*
2797                          * Now attempt to add to the end of the
2798                          * wtmp and wtmpx files.  Do not create
2799                          * if they don't already exist.
2800                          */
2801                         updwtmpx(WTMPX, up);
2802 
2803                         break;
2804                 }
2805         }
2806 
2807         endutxent();
2808         (void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2809 }
2810 
2811 /*
2812  * prog_name() searches for the word or unix path name and
2813  * returns a pointer to the last element of the pathname.
2814  */
2815 static char *
2816 prog_name(char *string)
2817 {
2818         char    *ptr, *ptr2;
2819         static char word[UT_USER_SZ + 1];
2820 
2821         /*
2822          * Search for the first word skipping leading spaces and tabs.
2823          */
2824         while (*string == ' ' || *string == '\t')
2825                 string++;
2826 
2827         /*
2828          * If the first non-space non-tab character is not one allowed in
2829          * a word, return a pointer to a null string, otherwise parse the
2830          * pathname.
2831          */
2832         if (*string != '.' && *string != '/' && *string != '_' &&
2833             (*string < 'a' || *string > 'z') &&
2834             (*string < 'A' || * string > 'Z') &&
2835             (*string < '0' || *string > '9'))
2836                 return ("");
2837 
2838         /*
2839          * Parse the pathname looking forward for '/', ' ', '\t', '\n' or
2840          * '\0'.  Each time a '/' is found, move "ptr" to one past the
2841          * '/', thus when a ' ', '\t', '\n', or '\0' is found, "ptr" will
2842          * point to the last element of the pathname.
2843          */
2844         for (ptr = string; *string != ' ' && *string != '\t' &&
2845             *string != '\n' && *string != '\0'; string++) {
2846                 if (*string == '/')
2847                         ptr = string+1;
2848         }
2849 
2850         /*
2851          * Copy out up to the size of the "ut_user" array into "word",
2852          * null terminate it and return a pointer to it.
2853          */
2854         for (ptr2 = &word[0]; ptr2 < &word[UT_USER_SZ] &&
2855             ptr < string; /* CSTYLED */)
2856                 *ptr2++ = *ptr++;
2857 
2858         *ptr2 = '\0';
2859         return (&word[0]);
2860 }
2861 
2862 
2863 /*
2864  * realcon() returns a nonzero value if there is a character device
2865  * associated with SYSCON that has the same device number as CONSOLE.
2866  */
2867 static int
2868 realcon()
2869 {
2870         struct stat sconbuf, conbuf;
2871 
2872         if (stat(SYSCON, &sconbuf) != -1 &&
2873             stat(CONSOLE, &conbuf) != -1 &&
2874             S_ISCHR(sconbuf.st_mode) &&
2875             S_ISCHR(conbuf.st_mode) &&
2876             sconbuf.st_rdev == conbuf.st_rdev) {
2877                 return (1);
2878         } else {
2879                 return (0);
2880         }
2881 }
2882 
2883 
2884 /*
2885  * get_ioctl_syscon() retrieves the SYSCON settings from the IOCTLSYSCON file.
2886  * Returns true if the IOCTLSYSCON file needs to be written (with
2887  * write_ioctl_syscon() below)
2888  */
2889 static int
2890 get_ioctl_syscon()
2891 {
2892         FILE    *fp;
2893         unsigned int    iflags, oflags, cflags, lflags, ldisc, cc[18];
2894         int             i, valid_format = 0;
2895 
2896         /*
2897          * Read in the previous modes for SYSCON from IOCTLSYSCON.
2898          */
2899         if ((fp = fopen(IOCTLSYSCON, "r")) == NULL) {
2900                 stored_syscon_termios = dflt_termios;
2901                 console(B_TRUE,
2902                     "warning:%s does not exist, default settings assumed\n",
2903                     IOCTLSYSCON);
2904         } else {
2905 
2906                 i = fscanf(fp,
2907             "%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x",
2908                     &iflags, &oflags, &cflags, &lflags,
2909                     &cc[0], &cc[1], &cc[2], &cc[3], &cc[4], &cc[5], &cc[6],
2910                     &cc[7], &cc[8], &cc[9], &cc[10], &cc[11], &cc[12], &cc[13],
2911                     &cc[14], &cc[15], &cc[16], &cc[17]);
2912 
2913                 if (i == 22) {
2914                         stored_syscon_termios.c_iflag = iflags;
2915                         stored_syscon_termios.c_oflag = oflags;
2916                         stored_syscon_termios.c_cflag = cflags;
2917                         stored_syscon_termios.c_lflag = lflags;
2918                         for (i = 0; i < 18; i++)
2919                                 stored_syscon_termios.c_cc[i] = (char)cc[i];
2920                         valid_format = 1;
2921                 } else if (i == 13) {
2922                 rewind(fp);
2923                 i = fscanf(fp, "%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x",
2924                     &iflags, &oflags, &cflags, &lflags, &ldisc, &cc[0], &cc[1],
2925                     &cc[2], &cc[3], &cc[4], &cc[5], &cc[6], &cc[7]);
2926 
2927                 /*
2928                  * If the file is formatted properly, use the values to
2929                  * initialize the console terminal condition.
2930                  */
2931                 stored_syscon_termios.c_iflag = (ushort_t)iflags;
2932                 stored_syscon_termios.c_oflag = (ushort_t)oflags;
2933                 stored_syscon_termios.c_cflag = (ushort_t)cflags;
2934                 stored_syscon_termios.c_lflag = (ushort_t)lflags;
2935                 for (i = 0; i < 8; i++)
2936                         stored_syscon_termios.c_cc[i] = (char)cc[i];
2937                 valid_format = 1;
2938                 }
2939                 (void) fclose(fp);
2940 
2941                 /* If the file is badly formatted, use the default settings. */
2942                 if (!valid_format)
2943                         stored_syscon_termios = dflt_termios;
2944         }
2945 
2946         /* If the file had a bad format, rewrite it later. */
2947         return (!valid_format);
2948 }
2949 
2950 
2951 static void
2952 write_ioctl_syscon()
2953 {
2954         FILE *fp;
2955         int i;
2956 
2957         (void) unlink(SYSCON);
2958         (void) link(SYSTTY, SYSCON);
2959         (void) umask(022);
2960         fp = fopen(IOCTLSYSCON, "w");
2961 
2962         (void) fprintf(fp, "%x:%x:%x:%x:0", stored_syscon_termios.c_iflag,
2963             stored_syscon_termios.c_oflag, stored_syscon_termios.c_cflag,
2964             stored_syscon_termios.c_lflag);
2965         for (i = 0; i < 8; ++i)
2966                 (void) fprintf(fp, ":%x", stored_syscon_termios.c_cc[i]);
2967         (void) putc('\n', fp);
2968 
2969         (void) fflush(fp);
2970         (void) fsync(fileno(fp));
2971         (void) fclose(fp);
2972         (void) umask(cmask);
2973 }
2974 
2975 
2976 /*
2977  * void console(boolean_t, char *, ...)
2978  *   Outputs the requested message to the system console.  Note that the number
2979  *   of arguments passed to console() should be determined by the print format.
2980  *
2981  *   The "prefix" parameter indicates whether or not "INIT: " should precede the
2982  *   message.
2983  *
2984  *   To make sure we write to the console in a sane fashion, we use the modes
2985  *   we keep in stored_syscon_termios (which we read out of /etc/ioctl.syscon).
2986  *   Afterwards we restore whatever modes were already there.
2987  */
2988 /* PRINTFLIKE2 */
2989 static void
2990 console(boolean_t prefix, char *format, ...)
2991 {
2992         char    outbuf[BUFSIZ];
2993         va_list args;
2994         int fd, getret;
2995         struct termios old_syscon_termios;
2996         FILE *f;
2997 
2998         /*
2999          * We open SYSCON anew each time in case it has changed (see
3000          * userinit()).
3001          */
3002         if ((fd = open(SYSCON, O_RDWR | O_NOCTTY)) < 0 ||
3003             (f = fdopen(fd, "r+")) == NULL) {
3004                 if (prefix)
3005                         syslog(LOG_WARNING, "INIT: ");
3006                 va_start(args, format);
3007                 vsyslog(LOG_WARNING, format, args);
3008                 va_end(args);
3009                 if (fd >= 0)
3010                         (void) close(fd);
3011                 return;
3012         }
3013         setbuf(f, &outbuf[0]);
3014 
3015         getret = tcgetattr(fd, &old_syscon_termios);
3016         old_syscon_termios.c_cflag &= ~HUPCL;
3017         if (realcon())
3018                 /* Don't overwrite cflag of real console. */
3019                 stored_syscon_termios.c_cflag = old_syscon_termios.c_cflag;
3020 
3021         stored_syscon_termios.c_cflag &= ~HUPCL;
3022 
3023         (void) tcsetattr(fd, TCSANOW, &stored_syscon_termios);
3024 
3025         if (prefix)
3026                 (void) fprintf(f, "\nINIT: ");
3027         va_start(args, format);
3028         (void) vfprintf(f, format, args);
3029         va_end(args);
3030 
3031         if (getret == 0)
3032                 (void) tcsetattr(fd, TCSADRAIN, &old_syscon_termios);
3033 
3034         (void) fclose(f);
3035 }
3036 
3037 /*
3038  * timer() is a substitute for sleep() which uses alarm() and pause().
3039  */
3040 static void
3041 timer(int waitime)
3042 {
3043         setimer(waitime);
3044         while (time_up == FALSE)
3045                 (void) pause();
3046 }
3047 
3048 static void
3049 setimer(int timelimit)
3050 {
3051         alarmclk();
3052         (void) alarm(timelimit);
3053         time_up = (timelimit ? FALSE : TRUE);
3054 }
3055 
3056 /*
3057  * Fails with
3058  *   ENOMEM - out of memory
3059  *   ECONNABORTED - repository connection broken
3060  *   EPERM - permission denied
3061  *   EACCES - backend access denied
3062  *   EROFS - backend readonly
3063  */
3064 static int
3065 get_or_add_startd(scf_instance_t *inst)
3066 {
3067         scf_handle_t *h;
3068         scf_scope_t *scope = NULL;
3069         scf_service_t *svc = NULL;
3070         int ret = 0;
3071 
3072         h = scf_instance_handle(inst);
3073 
3074         if (scf_handle_decode_fmri(h, SCF_SERVICE_STARTD, NULL, NULL, inst,
3075             NULL, NULL, SCF_DECODE_FMRI_EXACT) == 0)
3076                 return (0);
3077 
3078         switch (scf_error()) {
3079         case SCF_ERROR_CONNECTION_BROKEN:
3080                 return (ECONNABORTED);
3081 
3082         case SCF_ERROR_NOT_FOUND:
3083                 break;
3084 
3085         case SCF_ERROR_HANDLE_MISMATCH:
3086         case SCF_ERROR_INVALID_ARGUMENT:
3087         case SCF_ERROR_CONSTRAINT_VIOLATED:
3088         default:
3089                 bad_error("scf_handle_decode_fmri", scf_error());
3090         }
3091 
3092         /* Make sure we're right, since we're adding piece-by-piece. */
3093         assert(strcmp(SCF_SERVICE_STARTD,
3094             "svc:/system/svc/restarter:default") == 0);
3095 
3096         if ((scope = scf_scope_create(h)) == NULL ||
3097             (svc = scf_service_create(h)) == NULL) {
3098                 ret = ENOMEM;
3099                 goto out;
3100         }
3101 
3102 get_scope:
3103         if (scf_handle_get_scope(h, SCF_SCOPE_LOCAL, scope) != 0) {
3104                 switch (scf_error()) {
3105                 case SCF_ERROR_CONNECTION_BROKEN:
3106                         ret = ECONNABORTED;
3107                         goto out;
3108 
3109                 case SCF_ERROR_NOT_FOUND:
3110                         (void) fputs(gettext(
3111                             "smf(5) repository missing local scope.\n"),
3112                             stderr);
3113                         exit(1);
3114                         /* NOTREACHED */
3115 
3116                 case SCF_ERROR_HANDLE_MISMATCH:
3117                 case SCF_ERROR_INVALID_ARGUMENT:
3118                 default:
3119                         bad_error("scf_handle_get_scope", scf_error());
3120                 }
3121         }
3122 
3123 get_svc:
3124         if (scf_scope_get_service(scope, "system/svc/restarter", svc) != 0) {
3125                 switch (scf_error()) {
3126                 case SCF_ERROR_CONNECTION_BROKEN:
3127                         ret = ECONNABORTED;
3128                         goto out;
3129 
3130                 case SCF_ERROR_DELETED:
3131                         goto get_scope;
3132 
3133                 case SCF_ERROR_NOT_FOUND:
3134                         break;
3135 
3136                 case SCF_ERROR_HANDLE_MISMATCH:
3137                 case SCF_ERROR_INVALID_ARGUMENT:
3138                 case SCF_ERROR_NOT_SET:
3139                 default:
3140                         bad_error("scf_scope_get_service", scf_error());
3141                 }
3142 
3143 add_svc:
3144                 if (scf_scope_add_service(scope, "system/svc/restarter", svc) !=
3145                     0) {
3146                         switch (scf_error()) {
3147                         case SCF_ERROR_CONNECTION_BROKEN:
3148                                 ret = ECONNABORTED;
3149                                 goto out;
3150 
3151                         case SCF_ERROR_EXISTS:
3152                                 goto get_svc;
3153 
3154                         case SCF_ERROR_PERMISSION_DENIED:
3155                                 ret = EPERM;
3156                                 goto out;
3157 
3158                         case SCF_ERROR_BACKEND_ACCESS:
3159                                 ret = EACCES;
3160                                 goto out;
3161 
3162                         case SCF_ERROR_BACKEND_READONLY:
3163                                 ret = EROFS;
3164                                 goto out;
3165 
3166                         case SCF_ERROR_HANDLE_MISMATCH:
3167                         case SCF_ERROR_INVALID_ARGUMENT:
3168                         case SCF_ERROR_NOT_SET:
3169                         default:
3170                                 bad_error("scf_scope_add_service", scf_error());
3171                         }
3172                 }
3173         }
3174 
3175 get_inst:
3176         if (scf_service_get_instance(svc, "default", inst) != 0) {
3177                 switch (scf_error()) {
3178                 case SCF_ERROR_CONNECTION_BROKEN:
3179                         ret = ECONNABORTED;
3180                         goto out;
3181 
3182                 case SCF_ERROR_DELETED:
3183                         goto add_svc;
3184 
3185                 case SCF_ERROR_NOT_FOUND:
3186                         break;
3187 
3188                 case SCF_ERROR_HANDLE_MISMATCH:
3189                 case SCF_ERROR_INVALID_ARGUMENT:
3190                 case SCF_ERROR_NOT_SET:
3191                 default:
3192                         bad_error("scf_service_get_instance", scf_error());
3193                 }
3194 
3195                 if (scf_service_add_instance(svc, "default", inst) !=
3196                     0) {
3197                         switch (scf_error()) {
3198                         case SCF_ERROR_CONNECTION_BROKEN:
3199                                 ret = ECONNABORTED;
3200                                 goto out;
3201 
3202                         case SCF_ERROR_DELETED:
3203                                 goto add_svc;
3204 
3205                         case SCF_ERROR_EXISTS:
3206                                 goto get_inst;
3207 
3208                         case SCF_ERROR_PERMISSION_DENIED:
3209                                 ret = EPERM;
3210                                 goto out;
3211 
3212                         case SCF_ERROR_BACKEND_ACCESS:
3213                                 ret = EACCES;
3214                                 goto out;
3215 
3216                         case SCF_ERROR_BACKEND_READONLY:
3217                                 ret = EROFS;
3218                                 goto out;
3219 
3220                         case SCF_ERROR_HANDLE_MISMATCH:
3221                         case SCF_ERROR_INVALID_ARGUMENT:
3222                         case SCF_ERROR_NOT_SET:
3223                         default:
3224                                 bad_error("scf_service_add_instance",
3225                                     scf_error());
3226                         }
3227                 }
3228         }
3229 
3230         ret = 0;
3231 
3232 out:
3233         scf_service_destroy(svc);
3234         scf_scope_destroy(scope);
3235         return (ret);
3236 }
3237 
3238 /*
3239  * Fails with
3240  *   ECONNABORTED - repository connection broken
3241  *   ECANCELED - the transaction's property group was deleted
3242  */
3243 static int
3244 transaction_add_set(scf_transaction_t *tx, scf_transaction_entry_t *ent,
3245     const char *pname, scf_type_t type)
3246 {
3247 change_type:
3248         if (scf_transaction_property_change_type(tx, ent, pname, type) == 0)
3249                 return (0);
3250 
3251         switch (scf_error()) {
3252         case SCF_ERROR_CONNECTION_BROKEN:
3253                 return (ECONNABORTED);
3254 
3255         case SCF_ERROR_DELETED:
3256                 return (ECANCELED);
3257 
3258         case SCF_ERROR_NOT_FOUND:
3259                 goto new;
3260 
3261         case SCF_ERROR_HANDLE_MISMATCH:
3262         case SCF_ERROR_INVALID_ARGUMENT:
3263         case SCF_ERROR_NOT_BOUND:
3264         case SCF_ERROR_NOT_SET:
3265         default:
3266                 bad_error("scf_transaction_property_change_type", scf_error());
3267         }
3268 
3269 new:
3270         if (scf_transaction_property_new(tx, ent, pname, type) == 0)
3271                 return (0);
3272 
3273         switch (scf_error()) {
3274         case SCF_ERROR_CONNECTION_BROKEN:
3275                 return (ECONNABORTED);
3276 
3277         case SCF_ERROR_DELETED:
3278                 return (ECANCELED);
3279 
3280         case SCF_ERROR_EXISTS:
3281                 goto change_type;
3282 
3283         case SCF_ERROR_HANDLE_MISMATCH:
3284         case SCF_ERROR_INVALID_ARGUMENT:
3285         case SCF_ERROR_NOT_BOUND:
3286         case SCF_ERROR_NOT_SET:
3287         default:
3288                 bad_error("scf_transaction_property_new", scf_error());
3289                 /* NOTREACHED */
3290         }
3291 }
3292 
3293 static void
3294 scferr(void)
3295 {
3296         switch (scf_error()) {
3297         case SCF_ERROR_NO_MEMORY:
3298                 console(B_TRUE, gettext("Out of memory.\n"));
3299                 break;
3300 
3301         case SCF_ERROR_CONNECTION_BROKEN:
3302                 console(B_TRUE, gettext(
3303                     "Connection to smf(5) repository server broken.\n"));
3304                 break;
3305 
3306         case SCF_ERROR_NO_RESOURCES:
3307                 console(B_TRUE, gettext(
3308                     "smf(5) repository server is out of memory.\n"));
3309                 break;
3310 
3311         case SCF_ERROR_PERMISSION_DENIED:
3312                 console(B_TRUE, gettext("Insufficient privileges.\n"));
3313                 break;
3314 
3315         default:
3316                 console(B_TRUE, gettext("libscf error: %s\n"),
3317                     scf_strerror(scf_error()));
3318         }
3319 }
3320 
3321 static void
3322 lscf_set_runlevel(char rl)
3323 {
3324         scf_handle_t *h;
3325         scf_instance_t *inst = NULL;
3326         scf_propertygroup_t *pg = NULL;
3327         scf_transaction_t *tx = NULL;
3328         scf_transaction_entry_t *ent = NULL;
3329         scf_value_t *val = NULL;
3330         char buf[2];
3331         int r;
3332 
3333         h = scf_handle_create(SCF_VERSION);
3334         if (h == NULL) {
3335                 scferr();
3336                 return;
3337         }
3338 
3339         if (scf_handle_bind(h) != 0) {
3340                 switch (scf_error()) {
3341                 case SCF_ERROR_NO_SERVER:
3342                         console(B_TRUE,
3343                             gettext("smf(5) repository server not running.\n"));
3344                         goto bail;
3345 
3346                 default:
3347                         scferr();
3348                         goto bail;
3349                 }
3350         }
3351 
3352         if ((inst = scf_instance_create(h)) == NULL ||
3353             (pg = scf_pg_create(h)) == NULL ||
3354             (val = scf_value_create(h)) == NULL ||
3355             (tx = scf_transaction_create(h)) == NULL ||
3356             (ent = scf_entry_create(h)) == NULL) {
3357                 scferr();
3358                 goto bail;
3359         }
3360 
3361 get_inst:
3362         r = get_or_add_startd(inst);
3363         switch (r) {
3364         case 0:
3365                 break;
3366 
3367         case ENOMEM:
3368         case ECONNABORTED:
3369         case EPERM:
3370         case EACCES:
3371         case EROFS:
3372                 scferr();
3373                 goto bail;
3374         default:
3375                 bad_error("get_or_add_startd", r);
3376         }
3377 
3378 get_pg:
3379         if (scf_instance_get_pg(inst, SCF_PG_OPTIONS_OVR, pg) != 0) {
3380                 switch (scf_error()) {
3381                 case SCF_ERROR_CONNECTION_BROKEN:
3382                         scferr();
3383                         goto bail;
3384 
3385                 case SCF_ERROR_DELETED:
3386                         goto get_inst;
3387 
3388                 case SCF_ERROR_NOT_FOUND:
3389                         break;
3390 
3391                 case SCF_ERROR_HANDLE_MISMATCH:
3392                 case SCF_ERROR_INVALID_ARGUMENT:
3393                 case SCF_ERROR_NOT_SET:
3394                 default:
3395                         bad_error("scf_instance_get_pg", scf_error());
3396                 }
3397 
3398 add_pg:
3399                 if (scf_instance_add_pg(inst, SCF_PG_OPTIONS_OVR,
3400                     SCF_PG_OPTIONS_OVR_TYPE, SCF_PG_OPTIONS_OVR_FLAGS, pg) !=
3401                     0) {
3402                         switch (scf_error()) {
3403                         case SCF_ERROR_CONNECTION_BROKEN:
3404                         case SCF_ERROR_PERMISSION_DENIED:
3405                         case SCF_ERROR_BACKEND_ACCESS:
3406                                 scferr();
3407                                 goto bail;
3408 
3409                         case SCF_ERROR_DELETED:
3410                                 goto get_inst;
3411 
3412                         case SCF_ERROR_EXISTS:
3413                                 goto get_pg;
3414 
3415                         case SCF_ERROR_HANDLE_MISMATCH:
3416                         case SCF_ERROR_INVALID_ARGUMENT:
3417                         case SCF_ERROR_NOT_SET:
3418                         default:
3419                                 bad_error("scf_instance_add_pg", scf_error());
3420                         }
3421                 }
3422         }
3423 
3424         buf[0] = rl;
3425         buf[1] = '\0';
3426         r = scf_value_set_astring(val, buf);
3427         assert(r == 0);
3428 
3429         for (;;) {
3430                 if (scf_transaction_start(tx, pg) != 0) {
3431                         switch (scf_error()) {
3432                         case SCF_ERROR_CONNECTION_BROKEN:
3433                         case SCF_ERROR_PERMISSION_DENIED:
3434                         case SCF_ERROR_BACKEND_ACCESS:
3435                                 scferr();
3436                                 goto bail;
3437 
3438                         case SCF_ERROR_DELETED:
3439                                 goto add_pg;
3440 
3441                         case SCF_ERROR_HANDLE_MISMATCH:
3442                         case SCF_ERROR_NOT_BOUND:
3443                         case SCF_ERROR_IN_USE:
3444                         case SCF_ERROR_NOT_SET:
3445                         default:
3446                                 bad_error("scf_transaction_start", scf_error());
3447                         }
3448                 }
3449 
3450                 r = transaction_add_set(tx, ent, "runlevel", SCF_TYPE_ASTRING);
3451                 switch (r) {
3452                 case 0:
3453                         break;
3454 
3455                 case ECONNABORTED:
3456                         scferr();
3457                         goto bail;
3458 
3459                 case ECANCELED:
3460                         scf_transaction_reset(tx);
3461                         goto add_pg;
3462 
3463                 default:
3464                         bad_error("transaction_add_set", r);
3465                 }
3466 
3467                 r = scf_entry_add_value(ent, val);
3468                 assert(r == 0);
3469 
3470                 r = scf_transaction_commit(tx);
3471                 if (r == 1)
3472                         break;
3473 
3474                 if (r != 0) {
3475                         switch (scf_error()) {
3476                         case SCF_ERROR_CONNECTION_BROKEN:
3477                         case SCF_ERROR_PERMISSION_DENIED:
3478                         case SCF_ERROR_BACKEND_ACCESS:
3479                         case SCF_ERROR_BACKEND_READONLY:
3480                                 scferr();
3481                                 goto bail;
3482 
3483                         case SCF_ERROR_DELETED:
3484                                 scf_transaction_reset(tx);
3485                                 goto add_pg;
3486 
3487                         case SCF_ERROR_INVALID_ARGUMENT:
3488                         case SCF_ERROR_NOT_BOUND:
3489                         case SCF_ERROR_NOT_SET:
3490                         default:
3491                                 bad_error("scf_transaction_commit",
3492                                     scf_error());
3493                         }
3494                 }
3495 
3496                 scf_transaction_reset(tx);
3497                 (void) scf_pg_update(pg);
3498         }
3499 
3500 bail:
3501         scf_transaction_destroy(tx);
3502         scf_entry_destroy(ent);
3503         scf_value_destroy(val);
3504         scf_pg_destroy(pg);
3505         scf_instance_destroy(inst);
3506 
3507         (void) scf_handle_unbind(h);
3508         scf_handle_destroy(h);
3509 }
3510 
3511 /*
3512  * Function to handle requests from users to main init running as process 1.
3513  */
3514 static void
3515 userinit(int argc, char **argv)
3516 {
3517         FILE    *fp;
3518         char    *ln;
3519         int     init_signal;
3520         struct stat     sconbuf, conbuf;
3521         const char *usage_msg = "Usage: init [0123456SsQqabc]\n";
3522 
3523         /*
3524          * We are a user invoked init.  Is there an argument and is it
3525          * a single character?  If not, print usage message and quit.
3526          */
3527         if (argc != 2 || argv[1][1] != '\0') {
3528                 (void) fprintf(stderr, usage_msg);
3529                 exit(0);
3530         }
3531 
3532         if ((init_signal = lvlname_to_state((char)argv[1][0])) == -1) {
3533                 (void) fprintf(stderr, usage_msg);
3534                 (void) audit_put_record(ADT_FAILURE, ADT_FAIL_VALUE_BAD_CMD,
3535                     argv[1]);
3536                 exit(1);
3537         }
3538 
3539         if (init_signal == SINGLE_USER) {
3540                 /*
3541                  * Make sure this process is talking to a legal tty line
3542                  * and that /dev/syscon is linked to this line.
3543                  */
3544                 ln = ttyname(0);        /* Get the name of tty */
3545                 if (ln == NULL) {
3546                         (void) fprintf(stderr,
3547                             "Standard input not a tty line\n");
3548                         (void) audit_put_record(ADT_FAILURE,
3549                             ADT_FAIL_VALUE_BAD_TTY, argv[1]);
3550                         exit(1);
3551                 }
3552 
3553                 if ((stat(ln, &sconbuf) != -1) &&
3554                     (stat(SYSCON, &conbuf) == -1 ||
3555                     sconbuf.st_rdev != conbuf.st_rdev)) {
3556                         /*
3557                          * /dev/syscon needs to change.
3558                          * Unlink /dev/syscon and relink it to the current line.
3559                          */
3560                         if (lstat(SYSCON, &conbuf) != -1 &&
3561                             unlink(SYSCON) == FAILURE) {
3562                                 perror("Can't unlink /dev/syscon");
3563                                 (void) fprintf(stderr,
3564                                     "Run command on the system console.\n");
3565                                 (void) audit_put_record(ADT_FAILURE,
3566                                     ADT_FAIL_VALUE_PROGRAM, argv[1]);
3567                                 exit(1);
3568                         }
3569                         if (symlink(ln, SYSCON) == FAILURE) {
3570                                 (void) fprintf(stderr,
3571                                     "Can't symlink /dev/syscon to %s: %s", ln,
3572                                     strerror(errno));
3573 
3574                                 /* Try to leave a syscon */
3575                                 (void) link(SYSTTY, SYSCON);
3576                                 (void) audit_put_record(ADT_FAILURE,
3577                                     ADT_FAIL_VALUE_PROGRAM, argv[1]);
3578                                 exit(1);
3579                         }
3580 
3581                         /*
3582                          * Try to leave a message on system console saying where
3583                          * /dev/syscon is currently connected.
3584                          */
3585                         if ((fp = fopen(SYSTTY, "r+")) != NULL) {
3586                                 (void) fprintf(fp,
3587                                     "\n****     SYSCON CHANGED TO %s    ****\n",
3588                                     ln);
3589                                 (void) fclose(fp);
3590                         }
3591                 }
3592         }
3593 
3594         update_boot_archive(init_signal);
3595 
3596         (void) audit_put_record(ADT_SUCCESS, ADT_SUCCESS, argv[1]);
3597 
3598         /*
3599          * Signal init; init will take care of telling svc.startd.
3600          */
3601         if (kill(init_pid, init_signal) == FAILURE) {
3602                 (void) fprintf(stderr, "Must be super-user\n");
3603                 (void) audit_put_record(ADT_FAILURE,
3604                     ADT_FAIL_VALUE_AUTH, argv[1]);
3605                 exit(1);
3606         }
3607 
3608         exit(0);
3609 }
3610 
3611 
3612 #define DELTA   25      /* Number of pidlist elements to allocate at a time */
3613 
3614 /* ARGSUSED */
3615 void
3616 sigpoll(int n)
3617 {
3618         struct pidrec prec;
3619         struct pidrec *p = &prec;
3620         struct pidlist *plp;
3621         struct pidlist *tp, *savetp;
3622         int i;
3623 
3624         if (Pfd < 0) {
3625                 return;
3626         }
3627 
3628         for (;;) {
3629                 /*
3630                  * Important Note: Either read will really fail (in which case
3631                  * return is all we can do) or will get EAGAIN (Pfd was opened
3632                  * O_NDELAY), in which case we also want to return.
3633                  * Always return from here!
3634                  */
3635                 if (read(Pfd, p, sizeof (struct pidrec)) !=
3636                                                 sizeof (struct pidrec)) {
3637                         return;
3638                 }
3639                 switch (p->pd_type) {
3640 
3641                 case ADDPID:
3642                         /*
3643                          * New "godchild", add to list.
3644                          */
3645                         if (Plfree == NULL) {
3646                                 plp = (struct pidlist *)calloc(DELTA,
3647                                     sizeof (struct pidlist));
3648                                 if (plp == NULL) {
3649                                         /* Can't save pid */
3650                                         break;
3651                                 }
3652                                 /*
3653                                  * Point at 2nd record allocated, we'll use plp.
3654                                  */
3655                                 tp = plp + 1;
3656                                 /*
3657                                  * Link them into a chain.
3658                                  */
3659                                 Plfree = tp;
3660                                 for (i = 0; i < DELTA - 2; i++) {
3661                                         tp->pl_next = tp + 1;
3662                                         tp++;
3663                                 }
3664                         } else {
3665                                 plp = Plfree;
3666                                 Plfree = plp->pl_next;
3667                         }
3668                         plp->pl_pid = p->pd_pid;
3669                         plp->pl_dflag = 0;
3670                         plp->pl_next = NULL;
3671                         /*
3672                          * Note - pid list is kept in increasing order of pids.
3673                          */
3674                         if (Plhead == NULL) {
3675                                 Plhead = plp;
3676                                 /* Back up to read next record */
3677                                 break;
3678                         } else {
3679                                 savetp = tp = Plhead;
3680                                 while (tp) {
3681                                         if (plp->pl_pid > tp->pl_pid) {
3682                                                 savetp = tp;
3683                                                 tp = tp->pl_next;
3684                                                 continue;
3685                                         } else if (plp->pl_pid < tp->pl_pid) {
3686                                                 if (tp == Plhead) {
3687                                                         plp->pl_next = Plhead;
3688                                                         Plhead = plp;
3689                                                 } else {
3690                                                         plp->pl_next =
3691                                                             savetp->pl_next;
3692                                                         savetp->pl_next = plp;
3693                                                 }
3694                                                 break;
3695                                         } else {
3696                                                 /* Already in list! */
3697                                                 plp->pl_next = Plfree;
3698                                                 Plfree = plp;
3699                                                 break;
3700                                         }
3701                                 }
3702                                 if (tp == NULL) {
3703                                         /* Add to end of list */
3704                                         savetp->pl_next = plp;
3705                                 }
3706                         }
3707                         /* Back up to read next record. */
3708                         break;
3709 
3710                 case REMPID:
3711                         /*
3712                          * This one was handled by someone else,
3713                          * purge it from the list.
3714                          */
3715                         if (Plhead == NULL) {
3716                                 /* Back up to read next record. */
3717                                 break;
3718                         }
3719                         savetp = tp = Plhead;
3720                         while (tp) {
3721                                 if (p->pd_pid > tp->pl_pid) {
3722                                         /* Keep on looking. */
3723                                         savetp = tp;
3724                                         tp = tp->pl_next;
3725                                         continue;
3726                                 } else if (p->pd_pid < tp->pl_pid) {
3727                                         /* Not in list. */
3728                                         break;
3729                                 } else {
3730                                         /* Found it. */
3731                                         if (tp == Plhead)
3732                                                 Plhead = tp->pl_next;
3733                                         else
3734                                                 savetp->pl_next = tp->pl_next;
3735                                         tp->pl_next = Plfree;
3736                                         Plfree = tp;
3737                                         break;
3738                                 }
3739                         }
3740                         /* Back up to read next record. */
3741                         break;
3742                 default:
3743                         console(B_TRUE, "Bad message on initpipe\n");
3744                         break;
3745                 }
3746         }
3747 }
3748 
3749 
3750 static void
3751 cleanaux()
3752 {
3753         struct pidlist *savep, *p;
3754         pid_t   pid;
3755         short   status;
3756 
3757         (void) sighold(SIGCLD);
3758         Gchild = 0;     /* Note - Safe to do this here since no SIGCLDs */
3759         (void) sighold(SIGPOLL);
3760         savep = p = Plhead;
3761         while (p) {
3762                 if (p->pl_dflag) {
3763                         /*
3764                          * Found an entry to delete,
3765                          * remove it from list first.
3766                          */
3767                         pid = p->pl_pid;
3768                         status = p->pl_exit;
3769                         if (p == Plhead) {
3770                                 Plhead = p->pl_next;
3771                                 p->pl_next = Plfree;
3772                                 Plfree = p;
3773                                 savep = p = Plhead;
3774                         } else {
3775                                 savep->pl_next = p->pl_next;
3776                                 p->pl_next = Plfree;
3777                                 Plfree = p;
3778                                 p = savep->pl_next;
3779                         }
3780                         clearent(pid, status);
3781                         continue;
3782                 }
3783                 savep = p;
3784                 p = p->pl_next;
3785         }
3786         (void) sigrelse(SIGPOLL);
3787         (void) sigrelse(SIGCLD);
3788 }
3789 
3790 
3791 /*
3792  * /etc/inittab has more entries and we have run out of room in the proc_table
3793  * array. Double the size of proc_table to accomodate the extra entries.
3794  */
3795 static void
3796 increase_proc_table_size()
3797 {
3798         sigset_t block, unblock;
3799         void *ptr;
3800         size_t delta = num_proc * sizeof (struct PROC_TABLE);
3801 
3802 
3803         /*
3804          * Block signals for realloc.
3805          */
3806         (void) sigfillset(&block);
3807         (void) sigprocmask(SIG_BLOCK, &block, &unblock);
3808 
3809 
3810         /*
3811          * On failure we just return because callers of this function check
3812          * for failure.
3813          */
3814         do
3815                 ptr = realloc(g_state, g_state_sz + delta);
3816         while (ptr == NULL && errno == EAGAIN)
3817                 ;
3818 
3819         if (ptr != NULL) {
3820                 /* ensure that the new part is initialized to zero */
3821                 bzero((caddr_t)ptr + g_state_sz, delta);
3822 
3823                 g_state = ptr;
3824                 g_state_sz += delta;
3825                 num_proc <<= 1;
3826         }
3827 
3828 
3829         /* unblock our signals before returning */
3830         (void) sigprocmask(SIG_SETMASK, &unblock, NULL);
3831 }
3832 
3833 
3834 
3835 /*
3836  * Sanity check g_state.
3837  */
3838 static int
3839 st_sane()
3840 {
3841         int i;
3842         struct PROC_TABLE *ptp;
3843 
3844 
3845         /* Note: cur_state is encoded as a signal number */
3846         if (cur_state < 1 || cur_state == 9 || cur_state > 13)
3847                 return (0);
3848 
3849         /* Check num_proc */
3850         if (g_state_sz != sizeof (struct init_state) + (num_proc - 1) *
3851             sizeof (struct PROC_TABLE))
3852                 return (0);
3853 
3854         /* Check proc_table */
3855         for (i = 0, ptp = proc_table; i < num_proc; ++i, ++ptp) {
3856                 /* skip unoccupied entries */
3857                 if (!(ptp->p_flags & OCCUPIED))
3858                         continue;
3859 
3860                 /* p_flags has no bits outside of PF_MASK */
3861                 if (ptp->p_flags & ~(PF_MASK))
3862                         return (0);
3863 
3864                 /* 5 <= pid <= MAXPID */
3865                 if (ptp->p_pid < 5 || ptp->p_pid > MAXPID)
3866                         return (0);
3867 
3868                 /* p_count >= 0 */
3869                 if (ptp->p_count < 0)
3870                         return (0);
3871 
3872                 /* p_time >= 0 */
3873                 if (ptp->p_time < 0)
3874                         return (0);
3875         }
3876 
3877         return (1);
3878 }
3879 
3880 /*
3881  * Initialize our state.
3882  *
3883  * If the system just booted, then init_state_file, which is located on an
3884  * everpresent tmpfs filesystem, should not exist.
3885  *
3886  * If we were restarted, then init_state_file should exist, in
3887  * which case we'll read it in, sanity check it, and use it.
3888  *
3889  * Note: You can't call console() until proc_table is ready.
3890  */
3891 void
3892 st_init()
3893 {
3894         struct stat stb;
3895         int ret, st_fd, insane = 0;
3896         size_t to_be_read;
3897         char *ptr;
3898 
3899 
3900         booting = 1;
3901 
3902         do {
3903                 /*
3904                  * If we can exclusively create the file, then we're the
3905                  * initial invocation of init(1M).
3906                  */
3907                 st_fd = open(init_state_file, O_RDWR | O_CREAT | O_EXCL,
3908                     S_IRUSR | S_IWUSR);
3909         } while (st_fd == -1 && errno == EINTR);
3910         if (st_fd != -1)
3911                 goto new_state;
3912 
3913         booting = 0;
3914 
3915         do {
3916                 st_fd = open(init_state_file, O_RDWR, S_IRUSR | S_IWUSR);
3917         } while (st_fd == -1 && errno == EINTR);
3918         if (st_fd == -1)
3919                 goto new_state;
3920 
3921         /* Get the size of the file. */
3922         do
3923                 ret = fstat(st_fd, &stb);
3924         while (ret == -1 && errno == EINTR)
3925                 ;
3926         if (ret == -1)
3927                 goto new_state;
3928 
3929         do
3930                 g_state = malloc(stb.st_size);
3931         while (g_state == NULL && errno == EAGAIN)
3932                 ;
3933         if (g_state == NULL)
3934                 goto new_state;
3935 
3936         to_be_read = stb.st_size;
3937         ptr = (char *)g_state;
3938         while (to_be_read > 0) {
3939                 ssize_t read_ret;
3940 
3941                 read_ret = read(st_fd, ptr, to_be_read);
3942                 if (read_ret < 0) {
3943                         if (errno == EINTR)
3944                                 continue;
3945 
3946                         goto new_state;
3947                 }
3948 
3949                 to_be_read -= read_ret;
3950                 ptr += read_ret;
3951         }
3952 
3953         (void) close(st_fd);
3954 
3955         g_state_sz = stb.st_size;
3956 
3957         if (st_sane()) {
3958                 console(B_TRUE, "Restarting.\n");
3959                 return;
3960         }
3961 
3962         insane = 1;
3963 
3964 new_state:
3965         if (st_fd >= 0)
3966                 (void) close(st_fd);
3967         else
3968                 (void) unlink(init_state_file);
3969 
3970         if (g_state != NULL)
3971                 free(g_state);
3972 
3973         /* Something went wrong, so allocate new state. */
3974         g_state_sz = sizeof (struct init_state) +
3975             ((init_num_proc - 1) * sizeof (struct PROC_TABLE));
3976         do
3977                 g_state = calloc(1, g_state_sz);
3978         while (g_state == NULL && errno == EAGAIN)
3979                 ;
3980         if (g_state == NULL) {
3981                 /* Fatal error! */
3982                 exit(errno);
3983         }
3984 
3985         g_state->ist_runlevel = -1;
3986         num_proc = init_num_proc;
3987 
3988         if (!booting) {
3989                 console(B_TRUE, "Restarting.\n");
3990 
3991                 /* Overwrite the bad state file. */
3992                 st_write();
3993 
3994                 if (!insane) {
3995                         console(B_TRUE,
3996                             "Error accessing persistent state file `%s'.  "
3997                             "Ignored.\n", init_state_file);
3998                 } else {
3999                         console(B_TRUE,
4000                             "Persistent state file `%s' is invalid and was "
4001                             "ignored.\n", init_state_file);
4002                 }
4003         }
4004 }
4005 
4006 /*
4007  * Write g_state out to the state file.
4008  */
4009 void
4010 st_write()
4011 {
4012         static int complained = 0;
4013 
4014         int st_fd;
4015         char *cp;
4016         size_t sz;
4017         ssize_t ret;
4018 
4019 
4020         do {
4021                 st_fd = open(init_next_state_file,
4022                     O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
4023         } while (st_fd < 0 && errno == EINTR);
4024         if (st_fd < 0)
4025                 goto err;
4026 
4027         cp = (char *)g_state;
4028         sz = g_state_sz;
4029         while (sz > 0) {
4030                 ret = write(st_fd, cp, sz);
4031                 if (ret < 0) {
4032                         if (errno == EINTR)
4033                                 continue;
4034 
4035                         goto err;
4036                 }
4037 
4038                 sz -= ret;
4039                 cp += ret;
4040         }
4041 
4042         (void) close(st_fd);
4043         st_fd = -1;
4044         if (rename(init_next_state_file, init_state_file)) {
4045                 (void) unlink(init_next_state_file);
4046                 goto err;
4047         }
4048         complained = 0;
4049 
4050         return;
4051 
4052 err:
4053         if (st_fd >= 0)
4054                 (void) close(st_fd);
4055 
4056         if (!booting && !complained) {
4057                 /*
4058                  * Only complain after the filesystem should have come up.
4059                  * And only do it once so we don't loop between console()
4060                  * & efork().
4061                  */
4062                 complained = 1;
4063                 if (st_fd)
4064                         console(B_TRUE, "Couldn't write persistent state "
4065                             "file `%s'.\n", init_state_file);
4066                 else
4067                         console(B_TRUE, "Couldn't move persistent state "
4068                             "file `%s' to `%s'.\n", init_next_state_file,
4069                             init_state_file);
4070         }
4071 }
4072 
4073 /*
4074  * Create a contract with these parameters.
4075  */
4076 static int
4077 contract_make_template(uint_t info, uint_t critical, uint_t fatal,
4078     uint64_t cookie)
4079 {
4080         int fd, err;
4081 
4082         char *ioctl_tset_emsg =
4083             "Couldn't set \"%s\" contract template parameter: %s.\n";
4084 
4085         do
4086                 fd = open64(CTFS_ROOT "/process/template", O_RDWR);
4087         while (fd < 0 && errno == EINTR)
4088                 ;
4089         if (fd < 0) {
4090                 console(B_TRUE, "Couldn't create process template: %s.\n",
4091                     strerror(errno));
4092                 return (-1);
4093         }
4094 
4095         if (err = ct_pr_tmpl_set_param(fd, CT_PR_INHERIT | CT_PR_REGENT))
4096                 console(B_TRUE, "Contract set template inherit, regent "
4097                     "failed: %s.\n", strerror(err));
4098 
4099         /*
4100          * These errors result in a misconfigured template, which is better
4101          * than no template at all, so warn but don't abort.
4102          */
4103         if (err = ct_tmpl_set_informative(fd, info))
4104                 console(B_TRUE, ioctl_tset_emsg, "informative", strerror(err));
4105 
4106         if (err = ct_tmpl_set_critical(fd, critical))
4107                 console(B_TRUE, ioctl_tset_emsg, "critical", strerror(err));
4108 
4109         if (err = ct_pr_tmpl_set_fatal(fd, fatal))
4110                 console(B_TRUE, ioctl_tset_emsg, "fatal", strerror(err));
4111 
4112         if (err = ct_tmpl_set_cookie(fd, cookie))
4113                 console(B_TRUE, ioctl_tset_emsg, "cookie", strerror(err));
4114 
4115         (void) fcntl(fd, F_SETFD, FD_CLOEXEC);
4116 
4117         return (fd);
4118 }
4119 
4120 /*
4121  * Create the templates and open an event file descriptor.  We use dup2(2) to
4122  * get these descriptors away from the stdin/stdout/stderr group.
4123  */
4124 static void
4125 contracts_init()
4126 {
4127         int err, fd;
4128 
4129         /*
4130          * Create & configure a legacy template.  We only want empty events so
4131          * we know when to abandon them.
4132          */
4133         legacy_tmpl = contract_make_template(0, CT_PR_EV_EMPTY, CT_PR_EV_HWERR,
4134             ORDINARY_COOKIE);
4135         if (legacy_tmpl >= 0) {
4136                 err = ct_tmpl_activate(legacy_tmpl);
4137                 if (err != 0) {
4138                         (void) close(legacy_tmpl);
4139                         legacy_tmpl = -1;
4140                         console(B_TRUE,
4141                             "Couldn't activate legacy template (%s); "
4142                             "legacy services will be in init's contract.\n",
4143                             strerror(err));
4144                 }
4145         } else
4146                 console(B_TRUE,
4147                     "Legacy services will be in init's contract.\n");
4148 
4149         if (dup2(legacy_tmpl, 255) == -1) {
4150                 console(B_TRUE, "Could not duplicate legacy template: %s.\n",
4151                     strerror(errno));
4152         } else {
4153                 (void) close(legacy_tmpl);
4154                 legacy_tmpl = 255;
4155         }
4156 
4157         (void) fcntl(legacy_tmpl, F_SETFD, FD_CLOEXEC);
4158 
4159         startd_tmpl = contract_make_template(0, CT_PR_EV_EMPTY,
4160             CT_PR_EV_HWERR | CT_PR_EV_SIGNAL | CT_PR_EV_CORE, STARTD_COOKIE);
4161 
4162         if (dup2(startd_tmpl, 254) == -1) {
4163                 console(B_TRUE, "Could not duplicate startd template: %s.\n",
4164                     strerror(errno));
4165         } else {
4166                 (void) close(startd_tmpl);
4167                 startd_tmpl = 254;
4168         }
4169 
4170         (void) fcntl(startd_tmpl, F_SETFD, FD_CLOEXEC);
4171 
4172         if (legacy_tmpl < 0 && startd_tmpl < 0) {
4173                 /* The creation errors have already been reported. */
4174                 console(B_TRUE,
4175                     "Ignoring contract events.  Core smf(5) services will not "
4176                     "be restarted.\n");
4177                 return;
4178         }
4179 
4180         /*
4181          * Open an event endpoint.
4182          */
4183         do
4184                 fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
4185         while (fd < 0 && errno == EINTR)
4186                 ;
4187         if (fd < 0) {
4188                 console(B_TRUE,
4189                     "Couldn't open process pbundle: %s.  Core smf(5) services "
4190                     "will not be restarted.\n", strerror(errno));
4191                 return;
4192         }
4193 
4194         if (dup2(fd, 253) == -1) {
4195                 console(B_TRUE, "Could not duplicate process bundle: %s.\n",
4196                     strerror(errno));
4197         } else {
4198                 (void) close(fd);
4199                 fd = 253;
4200         }
4201 
4202         (void) fcntl(fd, F_SETFD, FD_CLOEXEC);
4203 
4204         /* Reset in case we've been restarted. */
4205         (void) ct_event_reset(fd);
4206 
4207         poll_fds[0].fd = fd;
4208         poll_fds[0].events = POLLIN;
4209         poll_nfds = 1;
4210 }
4211 
4212 static int
4213 contract_getfile(ctid_t id, const char *name, int oflag)
4214 {
4215         int fd;
4216 
4217         do
4218                 fd = contract_open(id, "process", name, oflag);
4219         while (fd < 0 && errno == EINTR)
4220                 ;
4221 
4222         if (fd < 0)
4223                 console(B_TRUE, "Couldn't open %s for contract %ld: %s.\n",
4224                     name, id, strerror(errno));
4225 
4226         return (fd);
4227 }
4228 
4229 static int
4230 contract_cookie(ctid_t id, uint64_t *cp)
4231 {
4232         int fd, err;
4233         ct_stathdl_t sh;
4234 
4235         fd = contract_getfile(id, "status", O_RDONLY);
4236         if (fd < 0)
4237                 return (-1);
4238 
4239         err = ct_status_read(fd, CTD_COMMON, &sh);
4240         if (err != 0) {
4241                 console(B_TRUE, "Couldn't read status of contract %ld: %s.\n",
4242                     id, strerror(err));
4243                 (void) close(fd);
4244                 return (-1);
4245         }
4246 
4247         (void) close(fd);
4248 
4249         *cp = ct_status_get_cookie(sh);
4250 
4251         ct_status_free(sh);
4252         return (0);
4253 }
4254 
4255 static void
4256 contract_ack(ct_evthdl_t e)
4257 {
4258         int fd;
4259 
4260         if (ct_event_get_flags(e) & CTE_INFO)
4261                 return;
4262 
4263         fd = contract_getfile(ct_event_get_ctid(e), "ctl", O_WRONLY);
4264         if (fd < 0)
4265                 return;
4266 
4267         (void) ct_ctl_ack(fd, ct_event_get_evid(e));
4268         (void) close(fd);
4269 }
4270 
4271 /*
4272  * Process a contract event.
4273  */
4274 static void
4275 contract_event(struct pollfd *poll)
4276 {
4277         ct_evthdl_t e;
4278         int err;
4279         ctid_t ctid;
4280 
4281         if (!(poll->revents & POLLIN)) {
4282                 if (poll->revents & POLLERR)
4283                         console(B_TRUE,
4284                             "Unknown poll error on my process contract "
4285                             "pbundle.\n");
4286                 return;
4287         }
4288 
4289         err = ct_event_read(poll->fd, &e);
4290         if (err != 0) {
4291                 console(B_TRUE, "Error retrieving contract event: %s.\n",
4292                     strerror(err));
4293                 return;
4294         }
4295 
4296         ctid = ct_event_get_ctid(e);
4297 
4298         if (ct_event_get_type(e) == CT_PR_EV_EMPTY) {
4299                 uint64_t cookie;
4300                 int ret, abandon = 1;
4301 
4302                 /* If it's svc.startd, restart it.  Else, abandon. */
4303                 ret = contract_cookie(ctid, &cookie);
4304 
4305                 if (ret == 0) {
4306                         if (cookie == STARTD_COOKIE &&
4307                             do_restart_startd) {
4308                                 if (smf_debug)
4309                                         console(B_TRUE, "Restarting "
4310                                             "svc.startd.\n");
4311 
4312                                 /*
4313                                  * Account for the failure.  If the failure rate
4314                                  * exceeds a threshold, then drop to maintenance
4315                                  * mode.
4316                                  */
4317                                 startd_record_failure();
4318                                 if (startd_failure_rate_critical())
4319                                         enter_maintenance();
4320 
4321                                 if (startd_tmpl < 0)
4322                                         console(B_TRUE,
4323                                             "Restarting svc.startd in "
4324                                             "improper contract (bad "
4325                                             "template).\n");
4326 
4327                                 (void) startd_run(startd_cline, startd_tmpl,
4328                                     ctid);
4329 
4330                                 abandon = 0;
4331                         }
4332                 }
4333 
4334                 if (abandon && (err = contract_abandon_id(ctid))) {
4335                         console(B_TRUE, "Couldn't abandon contract %ld: %s.\n",
4336                             ctid, strerror(err));
4337                 }
4338 
4339                 /*
4340                  * No need to acknowledge the event since either way the
4341                  * originating contract should be abandoned.
4342                  */
4343         } else {
4344                 console(B_TRUE,
4345                     "Received contract event of unexpected type %d from "
4346                     "contract %ld.\n", ct_event_get_type(e), ctid);
4347 
4348                 if ((ct_event_get_flags(e) & (CTE_INFO | CTE_ACK)) == 0)
4349                         /* Allow unexpected critical events to be released. */
4350                         contract_ack(e);
4351         }
4352 
4353         ct_event_free(e);
4354 }
4355 
4356 /*
4357  * svc.startd(1M) Management
4358  */
4359 
4360 /*
4361  * (Re)start svc.startd(1M).  old_ctid should be the contract ID of the old
4362  * contract, or 0 if we're starting it for the first time.  If wait is true
4363  * we'll wait for and return the exit value of the child.
4364  */
4365 static int
4366 startd_run(const char *cline, int tmpl, ctid_t old_ctid)
4367 {
4368         int err, i, ret, did_activate;
4369         pid_t pid;
4370         struct stat sb;
4371 
4372         if (cline[0] == '\0')
4373                 return (-1);
4374 
4375         /*
4376          * Don't restart startd if the system is rebooting or shutting down.
4377          */
4378         do {
4379                 ret = stat("/etc/svc/volatile/resetting", &sb);
4380         } while (ret == -1 && errno == EINTR);
4381 
4382         if (ret == 0) {
4383                 if (smf_debug)
4384                         console(B_TRUE, "Quiescing for reboot.\n");
4385                 (void) pause();
4386                 return (-1);
4387         }
4388 
4389         err = ct_pr_tmpl_set_transfer(tmpl, old_ctid);
4390         if (err == EINVAL) {
4391                 console(B_TRUE, "Remake startd_tmpl; reattempt transfer.\n");
4392                 tmpl = startd_tmpl = contract_make_template(0, CT_PR_EV_EMPTY,
4393                     CT_PR_EV_HWERR, STARTD_COOKIE);
4394 
4395                 err = ct_pr_tmpl_set_transfer(tmpl, old_ctid);
4396         }
4397         if (err != 0) {
4398                 console(B_TRUE,
4399                     "Couldn't set transfer parameter of contract template: "
4400                     "%s.\n", strerror(err));
4401         }
4402 
4403         if ((err = ct_pr_tmpl_set_svc_fmri(startd_tmpl,
4404             SCF_SERVICE_STARTD)) != 0)
4405                 console(B_TRUE,
4406                     "Can not set svc_fmri in contract template: %s\n",
4407                     strerror(err));
4408         if ((err = ct_pr_tmpl_set_svc_aux(startd_tmpl,
4409             startd_svc_aux)) != 0)
4410                 console(B_TRUE,
4411                     "Can not set svc_aux in contract template: %s\n",
4412                     strerror(err));
4413         did_activate = !(ct_tmpl_activate(tmpl));
4414         if (!did_activate)
4415                 console(B_TRUE,
4416                     "Template activation failed; not starting \"%s\" in "
4417                     "proper contract.\n", cline);
4418 
4419         /* Hold SIGCLD so we can wait if necessary. */
4420         (void) sighold(SIGCLD);
4421 
4422         while ((pid = fork()) < 0) {
4423                 if (errno == EPERM) {
4424                         console(B_TRUE, "Insufficient permission to fork.\n");
4425 
4426                         /* Now that's a doozy. */
4427                         exit(1);
4428                 }
4429 
4430                 console(B_TRUE,
4431                     "fork() for svc.startd failed: %s.  Will retry in 1 "
4432                     "second...\n", strerror(errno));
4433 
4434                 (void) sleep(1);
4435 
4436                 /* Eventually give up? */
4437         }
4438 
4439         if (pid == 0) {
4440                 /* child */
4441 
4442                 /* See the comment in efork() */
4443                 for (i = SIGHUP; i <= SIGRTMAX; ++i) {
4444                         if (i == SIGTTOU || i == SIGTTIN || i == SIGTSTP)
4445                                 (void) sigset(i, SIG_IGN);
4446                         else
4447                                 (void) sigset(i, SIG_DFL);
4448                 }
4449 
4450                 if (smf_options != NULL) {
4451                         /* Put smf_options in the environment. */
4452                         glob_envp[glob_envn] =
4453                             malloc(sizeof ("SMF_OPTIONS=") - 1 +
4454                             strlen(smf_options) + 1);
4455 
4456                         if (glob_envp[glob_envn] != NULL) {
4457                                 /* LINTED */
4458                                 (void) sprintf(glob_envp[glob_envn],
4459                                     "SMF_OPTIONS=%s", smf_options);
4460                                 glob_envp[glob_envn+1] = NULL;
4461                         } else {
4462                                 console(B_TRUE,
4463                                     "Could not set SMF_OPTIONS (%s).\n",
4464                                     strerror(errno));
4465                         }
4466                 }
4467 
4468                 if (smf_debug)
4469                         console(B_TRUE, "Executing svc.startd\n");
4470 
4471                 (void) execle(SH, "INITSH", "-c", cline, NULL, glob_envp);
4472 
4473                 console(B_TRUE, "Could not exec \"%s\" (%s).\n", SH,
4474                     strerror(errno));
4475 
4476                 exit(1);
4477         }
4478 
4479         /* parent */
4480 
4481         if (did_activate) {
4482                 if (legacy_tmpl < 0 || ct_tmpl_activate(legacy_tmpl) != 0)
4483                         (void) ct_tmpl_clear(tmpl);
4484         }
4485 
4486         /* Clear the old_ctid reference so the kernel can reclaim it. */
4487         if (old_ctid != 0)
4488                 (void) ct_pr_tmpl_set_transfer(tmpl, 0);
4489 
4490         (void) sigrelse(SIGCLD);
4491 
4492         return (0);
4493 }
4494 
4495 /*
4496  * void startd_record_failure(void)
4497  *   Place the current time in our circular array of svc.startd failures.
4498  */
4499 void
4500 startd_record_failure()
4501 {
4502         int index = startd_failure_index++ % NSTARTD_FAILURE_TIMES;
4503 
4504         startd_failure_time[index] = gethrtime();
4505 }
4506 
4507 /*
4508  * int startd_failure_rate_critical(void)
4509  *   Return true if the average failure interval is less than the permitted
4510  *   interval.  Implicit success if insufficient measurements for an average
4511  *   exist.
4512  */
4513 int
4514 startd_failure_rate_critical()
4515 {
4516         int n = startd_failure_index;
4517         hrtime_t avg_ns = 0;
4518 
4519         if (startd_failure_index < NSTARTD_FAILURE_TIMES)
4520                 return (0);
4521 
4522         avg_ns =
4523             (startd_failure_time[(n - 1) % NSTARTD_FAILURE_TIMES] -
4524             startd_failure_time[n % NSTARTD_FAILURE_TIMES]) /
4525             NSTARTD_FAILURE_TIMES;
4526 
4527         return (avg_ns < STARTD_FAILURE_RATE_NS);
4528 }
4529 
4530 /*
4531  * returns string that must be free'd
4532  */
4533 
4534 static char
4535 *audit_boot_msg()
4536 {
4537         char            *b, *p;
4538         char            desc[] = "booted";
4539         zoneid_t        zid = getzoneid();
4540 
4541         b = malloc(sizeof (desc) + MAXNAMELEN + 3);
4542         if (b == NULL)
4543                 return (b);
4544 
4545         p = b;
4546         p += strlcpy(p, desc, sizeof (desc));
4547         if (zid != GLOBAL_ZONEID) {
4548                 p += strlcpy(p, ": ", 3);
4549                 (void) getzonenamebyid(zid, p, MAXNAMELEN);
4550         }
4551         return (b);
4552 }
4553 
4554 /*
4555  * Generate AUE_init_solaris audit record.  Return 1 if
4556  * auditing is enabled in case the caller cares.
4557  *
4558  * In the case of userint() or a local zone invocation of
4559  * one_true_init, the process initially contains the audit
4560  * characteristics of the process that invoked init.  The first pass
4561  * through here uses those characteristics then for the case of
4562  * one_true_init in a local zone, clears them so subsequent system
4563  * state changes won't be attributed to the person who booted the
4564  * zone.
4565  */
4566 static int
4567 audit_put_record(int pass_fail, int status, char *msg)
4568 {
4569         adt_session_data_t      *ah;
4570         adt_event_data_t        *event;
4571 
4572         if (!adt_audit_enabled())
4573                 return (0);
4574 
4575         /*
4576          * the PROC_DATA picks up the context to tell whether this is
4577          * an attributed record (auid = -2 is unattributed)
4578          */
4579         if (adt_start_session(&ah, NULL, ADT_USE_PROC_DATA)) {
4580                 console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4581                 return (1);
4582         }
4583         event = adt_alloc_event(ah, ADT_init_solaris);
4584         if (event == NULL) {
4585                 console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4586                 (void) adt_end_session(ah);
4587                 return (1);
4588         }
4589         event->adt_init_solaris.info = msg;  /* NULL is ok here */
4590 
4591         if (adt_put_event(event, pass_fail, status)) {
4592                 console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4593                 (void) adt_end_session(ah);
4594                 return (1);
4595         }
4596         adt_free_event(event);
4597 
4598         (void) adt_end_session(ah);
4599 
4600         return (1);
4601 }