illumos-omnios Old usr/src/cmd/svc/startd/restarter.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * restarter.c - service manipulation
  28  *
  29  * This component manages services whose restarter is svc.startd, the standard
  30  * restarter.  It translates restarter protocol events from the graph engine
  31  * into actions on processes, as a delegated restarter would do.
  32  *
  33  * The master restarter manages a number of always-running threads:
  34  *   - restarter event thread: events from the graph engine
  35  *   - timeout thread: thread to fire queued timeouts
  36  *   - contract thread: thread to handle contract events
  37  *   - wait thread: thread to handle wait-based services
  38  *
  39  * The other threads are created as-needed:
  40  *   - per-instance method threads
  41  *   - per-instance event processing threads
  42  *
  43  * The interaction of all threads must result in the following conditions
  44  * being satisfied (on a per-instance basis):
  45  *   - restarter events must be processed in order
  46  *   - method execution must be serialized
  47  *   - instance delete must be held until outstanding methods are complete
  48  *   - contract events shouldn't be processed while a method is running
  49  *   - timeouts should fire even when a method is running
  50  *
  51  * Service instances are represented by restarter_inst_t's and are kept in the
  52  * instance_list list.
  53  *
  54  * Service States
  55  *   The current state of a service instance is kept in
  56  *   restarter_inst_t->ri_i.i_state.  If transition to a new state could take
  57  *   some time, then before we effect the transition we set
  58  *   restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
  59  *   rotate i_next_state to i_state and set i_next_state to
  60  *   RESTARTER_STATE_NONE.  So usually i_next_state is _NONE when ri_lock is not
  61  *   held.  The exception is when we launch methods, which are done with
  62  *   a separate thread.  To keep any other threads from grabbing ri_lock before
  63  *   method_thread() does, we set ri_method_thread to the thread id of the
  64  *   method thread, and when it is nonzero any thread with a different thread id
  65  *   waits on ri_method_cv.
  66  *
  67  * Method execution is serialized by blocking on ri_method_cv in
  68  * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread.  This
  69  * also prevents the instance structure from being deleted until all
  70  * outstanding operations such as method_thread() have finished.
  71  *
  72  * Lock ordering:
  73  *
  74  * dgraph_lock [can be held when taking:]
  75  *   utmpx_lock
  76  *   dictionary->dict_lock
  77  *   st->st_load_lock
  78  *   wait_info_lock
  79  *   ru->restarter_update_lock
  80  *     restarter_queue->rpeq_lock
  81  *   instance_list.ril_lock
  82  *     inst->ri_lock
  83  *   st->st_configd_live_lock
  84  *
  85  * instance_list.ril_lock
  86  *   graph_queue->gpeq_lock
  87  *   gu->gu_lock
  88  *   st->st_configd_live_lock
  89  *   dictionary->dict_lock
  90  *   inst->ri_lock
  91  *     graph_queue->gpeq_lock
  92  *     gu->gu_lock
  93  *     tu->tu_lock
  94  *     tq->tq_lock
  95  *     inst->ri_queue_lock
  96  *       wait_info_lock
  97  *       bp->cb_lock
  98  *     utmpx_lock
  99  *
 100  * single_user_thread_lock
 101  *   wait_info_lock
 102  *   utmpx_lock
 103  *
 104  * gu_freeze_lock
 105  *
 106  * logbuf_mutex nests inside pretty much everything.
 107  */
 108 
 109 #include <sys/contract/process.h>
 110 #include <sys/ctfs.h>
 111 #include <sys/stat.h>
 112 #include <sys/time.h>
 113 #include <sys/types.h>
 114 #include <sys/uio.h>
 115 #include <sys/wait.h>
 116 #include <assert.h>
 117 #include <errno.h>
 118 #include <fcntl.h>
 119 #include <libcontract.h>
 120 #include <libcontract_priv.h>
 121 #include <libintl.h>
 122 #include <librestart.h>
 123 #include <librestart_priv.h>
 124 #include <libuutil.h>
 125 #include <limits.h>
 126 #include <poll.h>
 127 #include <port.h>
 128 #include <pthread.h>
 129 #include <stdarg.h>
 130 #include <stdio.h>
 131 #include <strings.h>
 132 #include <unistd.h>
 133 
 134 #include "startd.h"
 135 #include "protocol.h"
 136 
 137 static uu_list_pool_t *restarter_instance_pool;
 138 static restarter_instance_list_t instance_list;
 139 
 140 static uu_list_pool_t *restarter_queue_pool;
 141 
 142 /*
 143  * Function used to reset the restart times for an instance, when
 144  * an administrative task comes along and essentially makes the times
 145  * in this array ineffective.
 146  */
 147 static void
 148 reset_start_times(restarter_inst_t *inst)
 149 {
 150         inst->ri_start_index = 0;
 151         bzero(inst->ri_start_time, sizeof (inst->ri_start_time));
 152 }
 153 
 154 /*ARGSUSED*/
 155 static int
 156 restarter_instance_compare(const void *lc_arg, const void *rc_arg,
 157     void *private)
 158 {
 159         int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
 160         int rc_id = *(int *)rc_arg;
 161 
 162         if (lc_id > rc_id)
 163                 return (1);
 164         if (lc_id < rc_id)
 165                 return (-1);
 166         return (0);
 167 }
 168 
 169 static restarter_inst_t *
 170 inst_lookup_by_name(const char *name)
 171 {
 172         int id;
 173 
 174         id = dict_lookup_byname(name);
 175         if (id == -1)
 176                 return (NULL);
 177 
 178         return (inst_lookup_by_id(id));
 179 }
 180 
 181 restarter_inst_t *
 182 inst_lookup_by_id(int id)
 183 {
 184         restarter_inst_t *inst;
 185 
 186         MUTEX_LOCK(&instance_list.ril_lock);
 187         inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
 188         if (inst != NULL)
 189                 MUTEX_LOCK(&inst->ri_lock);
 190         MUTEX_UNLOCK(&instance_list.ril_lock);
 191 
 192         if (inst != NULL) {
 193                 while (inst->ri_method_thread != 0 &&
 194                     !pthread_equal(inst->ri_method_thread, pthread_self())) {
 195                         ++inst->ri_method_waiters;
 196                         (void) pthread_cond_wait(&inst->ri_method_cv,
 197                             &inst->ri_lock);
 198                         assert(inst->ri_method_waiters > 0);
 199                         --inst->ri_method_waiters;
 200                 }
 201         }
 202 
 203         return (inst);
 204 }
 205 
 206 static restarter_inst_t *
 207 inst_lookup_queue(const char *name)
 208 {
 209         int id;
 210         restarter_inst_t *inst;
 211 
 212         id = dict_lookup_byname(name);
 213         if (id == -1)
 214                 return (NULL);
 215 
 216         MUTEX_LOCK(&instance_list.ril_lock);
 217         inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
 218         if (inst != NULL)
 219                 MUTEX_LOCK(&inst->ri_queue_lock);
 220         MUTEX_UNLOCK(&instance_list.ril_lock);
 221 
 222         return (inst);
 223 }
 224 
 225 const char *
 226 service_style(int flags)
 227 {
 228         switch (flags & RINST_STYLE_MASK) {
 229         case RINST_CONTRACT:    return ("contract");
 230         case RINST_TRANSIENT:   return ("transient");
 231         case RINST_WAIT:        return ("wait");
 232 
 233         default:
 234 #ifndef NDEBUG
 235                 uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
 236 #endif
 237                 abort();
 238                 /* NOTREACHED */
 239         }
 240 }
 241 
 242 /*
 243  * Fails with ECONNABORTED or ECANCELED.
 244  */
 245 static int
 246 check_contract(restarter_inst_t *inst, boolean_t primary,
 247     scf_instance_t *scf_inst)
 248 {
 249         ctid_t *ctidp;
 250         int fd, r;
 251 
 252         ctidp = primary ? &inst->ri_i.i_primary_ctid :
 253             &inst->ri_i.i_transient_ctid;
 254 
 255         assert(*ctidp >= 1);
 256 
 257         fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
 258         if (fd >= 0) {
 259                 r = close(fd);
 260                 assert(r == 0);
 261                 return (0);
 262         }
 263 
 264         r = restarter_remove_contract(scf_inst, *ctidp, primary ?
 265             RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
 266         switch (r) {
 267         case 0:
 268         case ECONNABORTED:
 269         case ECANCELED:
 270                 *ctidp = 0;
 271                 return (r);
 272 
 273         case ENOMEM:
 274                 uu_die("Out of memory\n");
 275                 /* NOTREACHED */
 276 
 277         case EPERM:
 278                 uu_die("Insufficient privilege.\n");
 279                 /* NOTREACHED */
 280 
 281         case EACCES:
 282                 uu_die("Repository backend access denied.\n");
 283                 /* NOTREACHED */
 284 
 285         case EROFS:
 286                 log_error(LOG_INFO, "Could not remove unusable contract id %ld "
 287                     "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
 288                 return (0);
 289 
 290         case EINVAL:
 291         case EBADF:
 292         default:
 293                 assert(0);
 294                 abort();
 295                 /* NOTREACHED */
 296         }
 297 }
 298 
 299 static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
 300 
 301 /*
 302  * int restarter_insert_inst(scf_handle_t *, char *)
 303  *   If the inst is already in the restarter list, return its id.  If the inst
 304  *   is not in the restarter list, initialize a restarter_inst_t, initialize its
 305  *   states, insert it into the list, and return 0.
 306  *
 307  *   Fails with
 308  *     ENOENT - name is not in the repository
 309  */
 310 static int
 311 restarter_insert_inst(scf_handle_t *h, const char *name)
 312 {
 313         int id, r;
 314         restarter_inst_t *inst;
 315         uu_list_index_t idx;
 316         scf_service_t *scf_svc;
 317         scf_instance_t *scf_inst;
 318         scf_snapshot_t *snap = NULL;
 319         scf_propertygroup_t *pg;
 320         char *svc_name, *inst_name;
 321         char logfilebuf[PATH_MAX];
 322         char *c;
 323         boolean_t do_commit_states;
 324         restarter_instance_state_t state, next_state;
 325         protocol_states_t *ps;
 326         pid_t start_pid;
 327         restarter_str_t reason = restarter_str_insert_in_graph;
 328 
 329         MUTEX_LOCK(&instance_list.ril_lock);
 330 
 331         /*
 332          * We don't use inst_lookup_by_name() here because we want the lookup
 333          * & insert to be atomic.
 334          */
 335         id = dict_lookup_byname(name);
 336         if (id != -1) {
 337                 inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
 338                     &idx);
 339                 if (inst != NULL) {
 340                         MUTEX_UNLOCK(&instance_list.ril_lock);
 341                         return (0);
 342                 }
 343         }
 344 
 345         /* Allocate an instance */
 346         inst = startd_zalloc(sizeof (restarter_inst_t));
 347         inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
 348         inst->ri_utmpx_prefix[0] = '\0';
 349 
 350         inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
 351         (void) strcpy((char *)inst->ri_i.i_fmri, name);
 352 
 353         inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
 354 
 355         /*
 356          * id shouldn't be -1 since we use the same dictionary as graph.c, but
 357          * just in case.
 358          */
 359         inst->ri_id = (id != -1 ? id : dict_insert(name));
 360 
 361         special_online_hooks_get(name, &inst->ri_pre_online_hook,
 362             &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
 363 
 364         scf_svc = safe_scf_service_create(h);
 365         scf_inst = safe_scf_instance_create(h);
 366         pg = safe_scf_pg_create(h);
 367         svc_name = startd_alloc(max_scf_name_size);
 368         inst_name = startd_alloc(max_scf_name_size);
 369 
 370 rep_retry:
 371         if (snap != NULL)
 372                 scf_snapshot_destroy(snap);
 373         if (inst->ri_logstem != NULL)
 374                 startd_free(inst->ri_logstem, PATH_MAX);
 375         if (inst->ri_common_name != NULL)
 376                 startd_free(inst->ri_common_name, max_scf_value_size);
 377         if (inst->ri_C_common_name != NULL)
 378                 startd_free(inst->ri_C_common_name, max_scf_value_size);
 379         snap = NULL;
 380         inst->ri_logstem = NULL;
 381         inst->ri_common_name = NULL;
 382         inst->ri_C_common_name = NULL;
 383 
 384         if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
 385             NULL, SCF_DECODE_FMRI_EXACT) != 0) {
 386                 switch (scf_error()) {
 387                 case SCF_ERROR_CONNECTION_BROKEN:
 388                         libscf_handle_rebind(h);
 389                         goto rep_retry;
 390 
 391                 case SCF_ERROR_NOT_FOUND:
 392                         goto deleted;
 393                 }
 394 
 395                 uu_die("Can't decode FMRI %s: %s\n", name,
 396                     scf_strerror(scf_error()));
 397         }
 398 
 399         /*
 400          * If there's no running snapshot, then we execute using the editing
 401          * snapshot.  Pending snapshots will be taken later.
 402          */
 403         snap = libscf_get_running_snapshot(scf_inst);
 404 
 405         if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
 406             (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
 407             0)) {
 408                 switch (scf_error()) {
 409                 case SCF_ERROR_NOT_SET:
 410                         break;
 411 
 412                 case SCF_ERROR_CONNECTION_BROKEN:
 413                         libscf_handle_rebind(h);
 414                         goto rep_retry;
 415 
 416                 default:
 417                         assert(0);
 418                         abort();
 419                 }
 420 
 421                 goto deleted;
 422         }
 423 
 424         (void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
 425         for (c = logfilebuf; *c != '\0'; c++)
 426                 if (*c == '/')
 427                         *c = '-';
 428 
 429         inst->ri_logstem = startd_alloc(PATH_MAX);
 430         (void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
 431             LOG_SUFFIX);
 432 
 433         /*
 434          * If the restarter group is missing, use uninit/none.  Otherwise,
 435          * we're probably being restarted & don't want to mess up the states
 436          * that are there.
 437          */
 438         state = RESTARTER_STATE_UNINIT;
 439         next_state = RESTARTER_STATE_NONE;
 440 
 441         r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
 442         if (r != 0) {
 443                 switch (scf_error()) {
 444                 case SCF_ERROR_CONNECTION_BROKEN:
 445                         libscf_handle_rebind(h);
 446                         goto rep_retry;
 447 
 448                 case SCF_ERROR_NOT_SET:
 449                         goto deleted;
 450 
 451                 case SCF_ERROR_NOT_FOUND:
 452                         /*
 453                          * This shouldn't happen since the graph engine should
 454                          * have initialized the state to uninitialized/none if
 455                          * there was no restarter pg.  In case somebody
 456                          * deleted it, though....
 457                          */
 458                         do_commit_states = B_TRUE;
 459                         break;
 460 
 461                 default:
 462                         assert(0);
 463                         abort();
 464                 }
 465         } else {
 466                 r = libscf_read_states(pg, &state, &next_state);
 467                 if (r != 0) {
 468                         do_commit_states = B_TRUE;
 469                 } else {
 470                         if (next_state != RESTARTER_STATE_NONE) {
 471                                 /*
 472                                  * Force next_state to _NONE since we
 473                                  * don't look for method processes.
 474                                  */
 475                                 next_state = RESTARTER_STATE_NONE;
 476                                 do_commit_states = B_TRUE;
 477                         } else {
 478                                 /*
 479                                  * The reason for transition will depend on
 480                                  * state.
 481                                  */
 482                                 if (st->st_initial == 0)
 483                                         reason = restarter_str_startd_restart;
 484                                 else if (state == RESTARTER_STATE_MAINT)
 485                                         reason = restarter_str_bad_repo_state;
 486                                 /*
 487                                  * Inform the restarter of our state without
 488                                  * changing the STIME in the repository.
 489                                  */
 490                                 ps = startd_alloc(sizeof (*ps));
 491                                 inst->ri_i.i_state = ps->ps_state = state;
 492                                 inst->ri_i.i_next_state = ps->ps_state_next =
 493                                     next_state;
 494                                 ps->ps_reason = reason;
 495 
 496                                 graph_protocol_send_event(inst->ri_i.i_fmri,
 497                                     GRAPH_UPDATE_STATE_CHANGE, ps);
 498 
 499                                 do_commit_states = B_FALSE;
 500                         }
 501                 }
 502         }
 503 
 504         switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
 505             &inst->ri_utmpx_prefix)) {
 506         case 0:
 507                 break;
 508 
 509         case ECONNABORTED:
 510                 libscf_handle_rebind(h);
 511                 goto rep_retry;
 512 
 513         case ECANCELED:
 514                 goto deleted;
 515 
 516         case ENOENT:
 517                 /*
 518                  * This is odd, because the graph engine should have required
 519                  * the general property group.  So we'll just use default
 520                  * flags in anticipation of the graph engine sending us
 521                  * REMOVE_INSTANCE when it finds out that the general property
 522                  * group has been deleted.
 523                  */
 524                 inst->ri_flags = RINST_CONTRACT;
 525                 break;
 526 
 527         default:
 528                 assert(0);
 529                 abort();
 530         }
 531 
 532         switch (libscf_get_template_values(scf_inst, snap,
 533             &inst->ri_common_name, &inst->ri_C_common_name)) {
 534         case 0:
 535                 break;
 536 
 537         case ECONNABORTED:
 538                 libscf_handle_rebind(h);
 539                 goto rep_retry;
 540 
 541         case ECANCELED:
 542                 goto deleted;
 543 
 544         case ECHILD:
 545         case ENOENT:
 546                 break;
 547 
 548         default:
 549                 assert(0);
 550                 abort();
 551         }
 552 
 553         switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
 554             &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
 555             &start_pid)) {
 556         case 0:
 557                 break;
 558 
 559         case ECONNABORTED:
 560                 libscf_handle_rebind(h);
 561                 goto rep_retry;
 562 
 563         case ECANCELED:
 564                 goto deleted;
 565 
 566         default:
 567                 assert(0);
 568                 abort();
 569         }
 570 
 571         if (inst->ri_i.i_primary_ctid >= 1) {
 572                 contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
 573 
 574                 switch (check_contract(inst, B_TRUE, scf_inst)) {
 575                 case 0:
 576                         break;
 577 
 578                 case ECONNABORTED:
 579                         libscf_handle_rebind(h);
 580                         goto rep_retry;
 581 
 582                 case ECANCELED:
 583                         goto deleted;
 584 
 585                 default:
 586                         assert(0);
 587                         abort();
 588                 }
 589         }
 590 
 591         if (inst->ri_i.i_transient_ctid >= 1) {
 592                 switch (check_contract(inst, B_FALSE, scf_inst)) {
 593                 case 0:
 594                         break;
 595 
 596                 case ECONNABORTED:
 597                         libscf_handle_rebind(h);
 598                         goto rep_retry;
 599 
 600                 case ECANCELED:
 601                         goto deleted;
 602 
 603                 default:
 604                         assert(0);
 605                         abort();
 606                 }
 607         }
 608 
 609         /* No more failures we live through, so add it to the list. */
 610         (void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
 611         (void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
 612         MUTEX_LOCK(&inst->ri_lock);
 613         MUTEX_LOCK(&inst->ri_queue_lock);
 614 
 615         (void) pthread_cond_init(&inst->ri_method_cv, NULL);
 616 
 617         uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
 618         uu_list_insert(instance_list.ril_instance_list, inst, idx);
 619         MUTEX_UNLOCK(&instance_list.ril_lock);
 620 
 621         if (start_pid != -1 &&
 622             (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
 623                 int ret;
 624                 ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
 625                 if (ret == -1) {
 626                         /*
 627                          * Implication:  if we can't reregister the
 628                          * instance, we will start another one.  Two
 629                          * instances may or may not result in a resource
 630                          * conflict.
 631                          */
 632                         log_error(LOG_WARNING,
 633                             "%s: couldn't reregister %ld for wait\n",
 634                             inst->ri_i.i_fmri, start_pid);
 635                 } else if (ret == 1) {
 636                         /*
 637                          * Leading PID has exited.
 638                          */
 639                         (void) stop_instance(h, inst, RSTOP_EXIT);
 640                 }
 641         }
 642 
 643 
 644         scf_pg_destroy(pg);
 645 
 646         if (do_commit_states)
 647                 (void) restarter_instance_update_states(h, inst, state,
 648                     next_state, RERR_NONE, reason);
 649 
 650         log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
 651             service_style(inst->ri_flags));
 652 
 653         MUTEX_UNLOCK(&inst->ri_queue_lock);
 654         MUTEX_UNLOCK(&inst->ri_lock);
 655 
 656         startd_free(svc_name, max_scf_name_size);
 657         startd_free(inst_name, max_scf_name_size);
 658         scf_snapshot_destroy(snap);
 659         scf_instance_destroy(scf_inst);
 660         scf_service_destroy(scf_svc);
 661 
 662         log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
 663             name);
 664 
 665         return (0);
 666 
 667 deleted:
 668         MUTEX_UNLOCK(&instance_list.ril_lock);
 669         startd_free(inst_name, max_scf_name_size);
 670         startd_free(svc_name, max_scf_name_size);
 671         if (snap != NULL)
 672                 scf_snapshot_destroy(snap);
 673         scf_pg_destroy(pg);
 674         scf_instance_destroy(scf_inst);
 675         scf_service_destroy(scf_svc);
 676         startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
 677         uu_list_destroy(inst->ri_queue);
 678         if (inst->ri_logstem != NULL)
 679                 startd_free(inst->ri_logstem, PATH_MAX);
 680         if (inst->ri_common_name != NULL)
 681                 startd_free(inst->ri_common_name, max_scf_value_size);
 682         if (inst->ri_C_common_name != NULL)
 683                 startd_free(inst->ri_C_common_name, max_scf_value_size);
 684         startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
 685         startd_free(inst, sizeof (restarter_inst_t));
 686         return (ENOENT);
 687 }
 688 
 689 static void
 690 restarter_delete_inst(restarter_inst_t *ri)
 691 {
 692         int id;
 693         restarter_inst_t *rip;
 694         void *cookie = NULL;
 695         restarter_instance_qentry_t *e;
 696 
 697         assert(MUTEX_HELD(&ri->ri_lock));
 698 
 699         /*
 700          * Must drop the instance lock so we can pick up the instance_list
 701          * lock & remove the instance.
 702          */
 703         id = ri->ri_id;
 704         MUTEX_UNLOCK(&ri->ri_lock);
 705 
 706         MUTEX_LOCK(&instance_list.ril_lock);
 707 
 708         rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
 709         if (rip == NULL) {
 710                 MUTEX_UNLOCK(&instance_list.ril_lock);
 711                 return;
 712         }
 713 
 714         assert(ri == rip);
 715 
 716         uu_list_remove(instance_list.ril_instance_list, ri);
 717 
 718         log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
 719             ri->ri_i.i_fmri);
 720 
 721         MUTEX_UNLOCK(&instance_list.ril_lock);
 722 
 723         /*
 724          * We can lock the instance without holding the instance_list lock
 725          * since we removed the instance from the list.
 726          */
 727         MUTEX_LOCK(&ri->ri_lock);
 728         MUTEX_LOCK(&ri->ri_queue_lock);
 729 
 730         if (ri->ri_i.i_primary_ctid >= 1)
 731                 contract_hash_remove(ri->ri_i.i_primary_ctid);
 732 
 733         while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
 734                 (void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
 735 
 736         while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
 737                 startd_free(e, sizeof (*e));
 738         uu_list_destroy(ri->ri_queue);
 739 
 740         startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
 741         startd_free(ri->ri_logstem, PATH_MAX);
 742         if (ri->ri_common_name != NULL)
 743                 startd_free(ri->ri_common_name, max_scf_value_size);
 744         if (ri->ri_C_common_name != NULL)
 745                 startd_free(ri->ri_C_common_name, max_scf_value_size);
 746         startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
 747         (void) pthread_mutex_destroy(&ri->ri_lock);
 748         (void) pthread_mutex_destroy(&ri->ri_queue_lock);
 749         startd_free(ri, sizeof (restarter_inst_t));
 750 }
 751 
 752 /*
 753  * instance_is_wait_style()
 754  *
 755  *   Returns 1 if the given instance is a "wait-style" service instance.
 756  */
 757 int
 758 instance_is_wait_style(restarter_inst_t *inst)
 759 {
 760         assert(MUTEX_HELD(&inst->ri_lock));
 761         return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
 762 }
 763 
 764 /*
 765  * instance_is_transient_style()
 766  *
 767  *   Returns 1 if the given instance is a transient service instance.
 768  */
 769 int
 770 instance_is_transient_style(restarter_inst_t *inst)
 771 {
 772         assert(MUTEX_HELD(&inst->ri_lock));
 773         return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
 774 }
 775 
 776 /*
 777  * instance_in_transition()
 778  * Returns 1 if instance is in transition, 0 if not
 779  */
 780 int
 781 instance_in_transition(restarter_inst_t *inst)
 782 {
 783         assert(MUTEX_HELD(&inst->ri_lock));
 784         if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
 785                 return (0);
 786         return (1);
 787 }
 788 
 789 /*
 790  * returns 1 if instance is already started, 0 if not
 791  */
 792 static int
 793 instance_started(restarter_inst_t *inst)
 794 {
 795         int ret;
 796 
 797         assert(MUTEX_HELD(&inst->ri_lock));
 798 
 799         if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
 800             inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
 801                 ret = 1;
 802         else
 803                 ret = 0;
 804 
 805         return (ret);
 806 }
 807 
 808 /*
 809  * Returns
 810  *   0 - success
 811  *   ECONNRESET - success, but h was rebound
 812  */
 813 int
 814 restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
 815     restarter_instance_state_t new_state,
 816     restarter_instance_state_t new_state_next, restarter_error_t err,
 817     restarter_str_t reason)
 818 {
 819         protocol_states_t *states;
 820         int e;
 821         uint_t retry_count = 0, msecs = ALLOC_DELAY;
 822         boolean_t rebound = B_FALSE;
 823         int prev_state_online;
 824         int state_online;
 825 
 826         assert(MUTEX_HELD(&ri->ri_lock));
 827 
 828         prev_state_online = instance_started(ri);
 829 
 830 retry:
 831         e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
 832             restarter_get_str_short(reason));
 833         switch (e) {
 834         case 0:
 835                 break;
 836 
 837         case ENOMEM:
 838                 ++retry_count;
 839                 if (retry_count < ALLOC_RETRY) {
 840                         (void) poll(NULL, 0, msecs);
 841                         msecs *= ALLOC_DELAY_MULT;
 842                         goto retry;
 843                 }
 844 
 845                 /* Like startd_alloc(). */
 846                 uu_die("Insufficient memory.\n");
 847                 /* NOTREACHED */
 848 
 849         case ECONNABORTED:
 850                 libscf_handle_rebind(h);
 851                 rebound = B_TRUE;
 852                 goto retry;
 853 
 854         case EPERM:
 855         case EACCES:
 856         case EROFS:
 857                 log_error(LOG_NOTICE, "Could not commit state change for %s "
 858                     "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
 859                 /* FALLTHROUGH */
 860 
 861         case ENOENT:
 862                 ri->ri_i.i_state = new_state;
 863                 ri->ri_i.i_next_state = new_state_next;
 864                 break;
 865 
 866         case EINVAL:
 867         default:
 868                 bad_error("_restarter_commit_states", e);
 869         }
 870 
 871         states = startd_alloc(sizeof (protocol_states_t));
 872         states->ps_state = new_state;
 873         states->ps_state_next = new_state_next;
 874         states->ps_err = err;
 875         states->ps_reason = reason;
 876         graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
 877             (void *)states);
 878 
 879         state_online = instance_started(ri);
 880 
 881         if (prev_state_online && !state_online)
 882                 ri->ri_post_offline_hook();
 883         else if (!prev_state_online && state_online)
 884                 ri->ri_post_online_hook();
 885 
 886         return (rebound ? ECONNRESET : 0);
 887 }
 888 
 889 void
 890 restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
 891 {
 892         restarter_inst_t *inst;
 893 
 894         assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
 895 
 896         inst = inst_lookup_by_name(fmri);
 897         if (inst == NULL)
 898                 return;
 899 
 900         inst->ri_flags |= flag;
 901 
 902         MUTEX_UNLOCK(&inst->ri_lock);
 903 }
 904 
 905 static void
 906 restarter_take_pending_snapshots(scf_handle_t *h)
 907 {
 908         restarter_inst_t *inst;
 909         int r;
 910 
 911         MUTEX_LOCK(&instance_list.ril_lock);
 912 
 913         for (inst = uu_list_first(instance_list.ril_instance_list);
 914             inst != NULL;
 915             inst = uu_list_next(instance_list.ril_instance_list, inst)) {
 916                 const char *fmri;
 917                 scf_instance_t *sinst = NULL;
 918 
 919                 MUTEX_LOCK(&inst->ri_lock);
 920 
 921                 /*
 922                  * This is where we'd check inst->ri_method_thread and if it
 923                  * were nonzero we'd wait in anticipation of another thread
 924                  * executing a method for inst.  Doing so with the instance_list
 925                  * locked, though, leads to deadlock.  Since taking a snapshot
 926                  * during that window won't hurt anything, we'll just continue.
 927                  */
 928 
 929                 fmri = inst->ri_i.i_fmri;
 930 
 931                 if (inst->ri_flags & RINST_RETAKE_RUNNING) {
 932                         scf_snapshot_t *rsnap;
 933 
 934                         (void) libscf_fmri_get_instance(h, fmri, &sinst);
 935 
 936                         rsnap = libscf_get_or_make_running_snapshot(sinst,
 937                             fmri, B_FALSE);
 938 
 939                         scf_instance_destroy(sinst);
 940 
 941                         if (rsnap != NULL)
 942                                 inst->ri_flags &= ~RINST_RETAKE_RUNNING;
 943 
 944                         scf_snapshot_destroy(rsnap);
 945                 }
 946 
 947                 if (inst->ri_flags & RINST_RETAKE_START) {
 948                         switch (r = libscf_snapshots_poststart(h, fmri,
 949                             B_FALSE)) {
 950                         case 0:
 951                         case ENOENT:
 952                                 inst->ri_flags &= ~RINST_RETAKE_START;
 953                                 break;
 954 
 955                         case ECONNABORTED:
 956                                 break;
 957 
 958                         case EACCES:
 959                         default:
 960                                 bad_error("libscf_snapshots_poststart", r);
 961                         }
 962                 }
 963 
 964                 MUTEX_UNLOCK(&inst->ri_lock);
 965         }
 966 
 967         MUTEX_UNLOCK(&instance_list.ril_lock);
 968 }
 969 
 970 /* ARGSUSED */
 971 void *
 972 restarter_post_fsminimal_thread(void *unused)
 973 {
 974         scf_handle_t *h;
 975         int r;
 976 
 977         h = libscf_handle_create_bound_loop();
 978 
 979         for (;;) {
 980                 r = libscf_create_self(h);
 981                 if (r == 0)
 982                         break;
 983 
 984                 assert(r == ECONNABORTED);
 985                 libscf_handle_rebind(h);
 986         }
 987 
 988         restarter_take_pending_snapshots(h);
 989 
 990         (void) scf_handle_unbind(h);
 991         scf_handle_destroy(h);
 992 
 993         return (NULL);
 994 }
 995 
 996 /*
 997  * int stop_instance()
 998  *
 999  *   Stop the instance identified by the instance given as the second argument,
1000  *   for the cause stated.
1001  *
1002  *   Returns
1003  *     0 - success
1004  *     -1 - inst is in transition
1005  */
1006 static int
1007 stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1008     stop_cause_t cause)
1009 {
1010         fork_info_t *info;
1011         const char *cp;
1012         int err;
1013         restarter_error_t re;
1014         restarter_str_t reason;
1015 
1016         assert(MUTEX_HELD(&inst->ri_lock));
1017         assert(inst->ri_method_thread == 0);
1018 
1019         switch (cause) {
1020         case RSTOP_EXIT:
1021                 re = RERR_RESTART;
1022                 reason = restarter_str_ct_ev_exit;
1023                 cp = "all processes in service exited";
1024                 break;
1025         case RSTOP_CORE:
1026                 re = RERR_FAULT;
1027                 reason = restarter_str_ct_ev_core;
1028                 cp = "process dumped core";
1029                 break;
1030         case RSTOP_SIGNAL:
1031                 re = RERR_FAULT;
1032                 reason = restarter_str_ct_ev_signal;
1033                 cp = "process received fatal signal from outside the service";
1034                 break;
1035         case RSTOP_HWERR:
1036                 re = RERR_FAULT;
1037                 reason = restarter_str_ct_ev_hwerr;
1038                 cp = "process killed due to uncorrectable hardware error";
1039                 break;
1040         case RSTOP_DEPENDENCY:
1041                 re = RERR_RESTART;
1042                 reason = restarter_str_dependency_activity;
1043                 cp = "dependency activity requires stop";
1044                 break;
1045         case RSTOP_DISABLE:
1046                 re = RERR_RESTART;
1047                 reason = restarter_str_disable_request;
1048                 cp = "service disabled";
1049                 break;
1050         case RSTOP_RESTART:
1051                 re = RERR_RESTART;
1052                 reason = restarter_str_restart_request;
1053                 cp = "service restarting";
1054                 break;
1055         default:
1056 #ifndef NDEBUG
1057                 (void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
1058                     cause, __FILE__, __LINE__);
1059 #endif
1060                 abort();
1061         }
1062 
1063         /* Services in the disabled and maintenance state are ignored */
1064         if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1065             inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
1066                 log_framework(LOG_DEBUG,
1067                     "%s: stop_instance -> is maint/disabled\n",
1068                     inst->ri_i.i_fmri);
1069                 return (0);
1070         }
1071 
1072         /* Already stopped instances are left alone */
1073         if (instance_started(inst) == 0) {
1074                 log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
1075                     inst->ri_i.i_fmri);
1076                 return (0);
1077         }
1078 
1079         if (instance_in_transition(inst)) {
1080                 /* requeue event by returning -1 */
1081                 log_framework(LOG_DEBUG,
1082                     "Restarter: Not stopping %s, in transition.\n",
1083                     inst->ri_i.i_fmri);
1084                 return (-1);
1085         }
1086 
1087         log_instance(inst, B_TRUE, "Stopping because %s.", cp);
1088 
1089         log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
1090             "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
1091 
1092         if (instance_is_wait_style(inst) && cause == RSTOP_EXIT) {
1093                 /*
1094                  * No need to stop instance, as child has exited; remove
1095                  * contract and move the instance to the offline state.
1096                  */
1097                 switch (err = restarter_instance_update_states(local_handle,
1098                     inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1099                     reason)) {
1100                 case 0:
1101                 case ECONNRESET:
1102                         break;
1103 
1104                 default:
1105                         bad_error("restarter_instance_update_states", err);
1106                 }
1107 
1108                 (void) update_fault_count(inst, FAULT_COUNT_RESET);
1109                 reset_start_times(inst);
1110 
1111                 if (inst->ri_i.i_primary_ctid != 0) {
1112                         inst->ri_m_inst =
1113                             safe_scf_instance_create(local_handle);
1114                         inst->ri_mi_deleted = B_FALSE;
1115 
1116                         libscf_reget_instance(inst);
1117                         method_remove_contract(inst, B_TRUE, B_TRUE);
1118 
1119                         scf_instance_destroy(inst->ri_m_inst);
1120                         inst->ri_m_inst = NULL;
1121                 }
1122 
1123                 switch (err = restarter_instance_update_states(local_handle,
1124                     inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1125                     reason)) {
1126                 case 0:
1127                 case ECONNRESET:
1128                         break;
1129 
1130                 default:
1131                         bad_error("restarter_instance_update_states", err);
1132                 }
1133 
1134                 return (0);
1135         } else if (instance_is_wait_style(inst) && re == RERR_RESTART) {
1136                 /*
1137                  * Stopping a wait service through means other than the pid
1138                  * exiting should keep wait_thread() from restarting the
1139                  * service, by removing it from the wait list.
1140                  * We cannot remove it right now otherwise the process will
1141                  * end up <defunct> so mark it to be ignored.
1142                  */
1143                 wait_ignore_by_fmri(inst->ri_i.i_fmri);
1144         }
1145 
1146         switch (err = restarter_instance_update_states(local_handle, inst,
1147             inst->ri_i.i_state, inst->ri_i.i_enabled ? RESTARTER_STATE_OFFLINE :
1148             RESTARTER_STATE_DISABLED, RERR_NONE, reason)) {
1149         case 0:
1150         case ECONNRESET:
1151                 break;
1152 
1153         default:
1154                 bad_error("restarter_instance_update_states", err);
1155         }
1156 
1157         info = startd_zalloc(sizeof (fork_info_t));
1158 
1159         info->sf_id = inst->ri_id;
1160         info->sf_method_type = METHOD_STOP;
1161         info->sf_event_type = re;
1162         info->sf_reason = reason;
1163         inst->ri_method_thread = startd_thread_create(method_thread, info);
1164 
1165         return (0);
1166 }
1167 
1168 /*
1169  * Returns
1170  *   ENOENT - fmri is not in instance_list
1171  *   0 - success
1172  *   ECONNRESET - success, though handle was rebound
1173  *   -1 - instance is in transition
1174  */
1175 int
1176 stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
1177 {
1178         restarter_inst_t *rip;
1179         int r;
1180 
1181         rip = inst_lookup_by_name(fmri);
1182         if (rip == NULL)
1183                 return (ENOENT);
1184 
1185         r = stop_instance(h, rip, flags);
1186 
1187         MUTEX_UNLOCK(&rip->ri_lock);
1188 
1189         return (r);
1190 }
1191 
1192 static void
1193 unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
1194     unmaint_cause_t cause)
1195 {
1196         ctid_t ctid;
1197         scf_instance_t *inst;
1198         int r;
1199         uint_t tries = 0, msecs = ALLOC_DELAY;
1200         const char *cp;
1201         restarter_str_t reason;
1202 
1203         assert(MUTEX_HELD(&rip->ri_lock));
1204 
1205         if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
1206                 log_error(LOG_DEBUG, "Restarter: "
1207                     "Ignoring maintenance off command because %s is not in the "
1208                     "maintenance state.\n", rip->ri_i.i_fmri);
1209                 return;
1210         }
1211 
1212         switch (cause) {
1213         case RUNMAINT_CLEAR:
1214                 cp = "clear requested";
1215                 reason = restarter_str_clear_request;
1216                 break;
1217         case RUNMAINT_DISABLE:
1218                 cp = "disable requested";
1219                 reason = restarter_str_disable_request;
1220                 break;
1221         default:
1222 #ifndef NDEBUG
1223                 (void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
1224                     cause, __FILE__, __LINE__);
1225 #endif
1226                 abort();
1227         }
1228 
1229         log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
1230             cp);
1231         log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
1232             "%s.\n", rip->ri_i.i_fmri, cp);
1233 
1234         (void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1235             RESTARTER_STATE_NONE, RERR_RESTART, reason);
1236 
1237         /*
1238          * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
1239          * a primary contract.
1240          */
1241         if (rip->ri_i.i_primary_ctid == 0)
1242                 return;
1243 
1244         ctid = rip->ri_i.i_primary_ctid;
1245         contract_abandon(ctid);
1246         rip->ri_i.i_primary_ctid = 0;
1247 
1248 rep_retry:
1249         switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
1250         case 0:
1251                 break;
1252 
1253         case ECONNABORTED:
1254                 libscf_handle_rebind(h);
1255                 goto rep_retry;
1256 
1257         case ENOENT:
1258                 /* Must have been deleted. */
1259                 return;
1260 
1261         case EINVAL:
1262         case ENOTSUP:
1263         default:
1264                 bad_error("libscf_handle_rebind", r);
1265         }
1266 
1267 again:
1268         r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
1269         switch (r) {
1270         case 0:
1271                 break;
1272 
1273         case ENOMEM:
1274                 ++tries;
1275                 if (tries < ALLOC_RETRY) {
1276                         (void) poll(NULL, 0, msecs);
1277                         msecs *= ALLOC_DELAY_MULT;
1278                         goto again;
1279                 }
1280 
1281                 uu_die("Insufficient memory.\n");
1282                 /* NOTREACHED */
1283 
1284         case ECONNABORTED:
1285                 scf_instance_destroy(inst);
1286                 libscf_handle_rebind(h);
1287                 goto rep_retry;
1288 
1289         case ECANCELED:
1290                 break;
1291 
1292         case EPERM:
1293         case EACCES:
1294         case EROFS:
1295                 log_error(LOG_INFO,
1296                     "Could not remove contract id %lu for %s (%s).\n", ctid,
1297                     rip->ri_i.i_fmri, strerror(r));
1298                 break;
1299 
1300         case EINVAL:
1301         case EBADF:
1302         default:
1303                 bad_error("restarter_remove_contract", r);
1304         }
1305 
1306         scf_instance_destroy(inst);
1307 }
1308 
1309 /*
1310  * enable_inst()
1311  *   Set inst->ri_i.i_enabled.  Expects 'e' to be _ENABLE, _DISABLE, or
1312  *   _ADMIN_DISABLE.  If the event is _ENABLE and inst is uninitialized or
1313  *   disabled, move it to offline.  If the event is _DISABLE or
1314  *   _ADMIN_DISABLE, make sure inst will move to disabled.
1315  *
1316  *   Returns
1317  *     0 - success
1318  *     ECONNRESET - h was rebound
1319  */
1320 static int
1321 enable_inst(scf_handle_t *h, restarter_inst_t *inst,
1322     restarter_instance_qentry_t *riq)
1323 {
1324         restarter_instance_state_t state;
1325         restarter_event_type_t e = riq->riq_type;
1326         restarter_str_t reason = restarter_str_per_configuration;
1327         int r;
1328 
1329         assert(MUTEX_HELD(&inst->ri_lock));
1330         assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
1331             e == RESTARTER_EVENT_TYPE_DISABLE ||
1332             e == RESTARTER_EVENT_TYPE_ENABLE);
1333         assert(instance_in_transition(inst) == 0);
1334 
1335         state = inst->ri_i.i_state;
1336 
1337         if (e == RESTARTER_EVENT_TYPE_ENABLE) {
1338                 inst->ri_i.i_enabled = 1;
1339 
1340                 if (state == RESTARTER_STATE_UNINIT ||
1341                     state == RESTARTER_STATE_DISABLED) {
1342                         /*
1343                          * B_FALSE: Don't log an error if the log_instance()
1344                          * fails because it will fail on the miniroot before
1345                          * install-discovery runs.
1346                          */
1347                         log_instance(inst, B_FALSE, "Enabled.");
1348                         log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
1349                             inst->ri_i.i_fmri);
1350 
1351                         /*
1352                          * If we are coming from DISABLED, it was obviously an
1353                          * enable request. If we are coming from UNINIT, it may
1354                          * have been a sevice in MAINT that was cleared.
1355                          */
1356                         if (riq->riq_reason == restarter_str_clear_request)
1357                                 reason = restarter_str_clear_request;
1358                         else if (state == RESTARTER_STATE_DISABLED)
1359                                 reason = restarter_str_enable_request;
1360                         (void) restarter_instance_update_states(h, inst,
1361                             RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1362                             RERR_NONE, reason);
1363                 } else {
1364                         log_framework(LOG_DEBUG, "Restarter: "
1365                             "Not changing state of %s for enable command.\n",
1366                             inst->ri_i.i_fmri);
1367                 }
1368         } else {
1369                 inst->ri_i.i_enabled = 0;
1370 
1371                 switch (state) {
1372                 case RESTARTER_STATE_ONLINE:
1373                 case RESTARTER_STATE_DEGRADED:
1374                         r = stop_instance(h, inst, RSTOP_DISABLE);
1375                         return (r == ECONNRESET ? 0 : r);
1376 
1377                 case RESTARTER_STATE_OFFLINE:
1378                 case RESTARTER_STATE_UNINIT:
1379                         if (inst->ri_i.i_primary_ctid != 0) {
1380                                 inst->ri_m_inst = safe_scf_instance_create(h);
1381                                 inst->ri_mi_deleted = B_FALSE;
1382 
1383                                 libscf_reget_instance(inst);
1384                                 method_remove_contract(inst, B_TRUE, B_TRUE);
1385 
1386                                 scf_instance_destroy(inst->ri_m_inst);
1387                         }
1388                         /* B_FALSE: See log_instance(..., "Enabled."); above */
1389                         log_instance(inst, B_FALSE, "Disabled.");
1390                         log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
1391                             inst->ri_i.i_fmri);
1392 
1393                         /*
1394                          * If we are coming from OFFLINE, it was obviously a
1395                          * disable request. But if we are coming from
1396                          * UNINIT, it may have been a disable request for a
1397                          * service in MAINT.
1398                          */
1399                         if (riq->riq_reason == restarter_str_disable_request ||
1400                             state == RESTARTER_STATE_OFFLINE)
1401                                 reason = restarter_str_disable_request;
1402                         (void) restarter_instance_update_states(h, inst,
1403                             RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1404                             RERR_RESTART, reason);
1405                         return (0);
1406 
1407                 case RESTARTER_STATE_DISABLED:
1408                         break;
1409 
1410                 case RESTARTER_STATE_MAINT:
1411                         /*
1412                          * We only want to pull the instance out of maintenance
1413                          * if the disable is on adminstrative request.  The
1414                          * graph engine sends _DISABLE events whenever a
1415                          * service isn't in the disabled state, and we don't
1416                          * want to pull the service out of maintenance if,
1417                          * for example, it is there due to a dependency cycle.
1418                          */
1419                         if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
1420                                 unmaintain_instance(h, inst, RUNMAINT_DISABLE);
1421                         break;
1422 
1423                 default:
1424 #ifndef NDEBUG
1425                         (void) fprintf(stderr, "Restarter instance %s has "
1426                             "unknown state %d.\n", inst->ri_i.i_fmri, state);
1427 #endif
1428                         abort();
1429                 }
1430         }
1431 
1432         return (0);
1433 }
1434 
1435 static void
1436 start_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1437     int32_t reason)
1438 {
1439         fork_info_t *info;
1440         restarter_str_t new_reason;
1441 
1442         assert(MUTEX_HELD(&inst->ri_lock));
1443         assert(instance_in_transition(inst) == 0);
1444         assert(inst->ri_method_thread == 0);
1445 
1446         log_framework(LOG_DEBUG, "%s: trying to start instance\n",
1447             inst->ri_i.i_fmri);
1448 
1449         /*
1450          * We want to keep the original reason for restarts and clear actions
1451          */
1452         switch (reason) {
1453         case restarter_str_restart_request:
1454         case restarter_str_clear_request:
1455                 new_reason = reason;
1456                 break;
1457         default:
1458                 new_reason = restarter_str_dependencies_satisfied;
1459         }
1460 
1461         /* Services in the disabled and maintenance state are ignored */
1462         if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1463             inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
1464             inst->ri_i.i_enabled == 0) {
1465                 log_framework(LOG_DEBUG,
1466                     "%s: start_instance -> is maint/disabled\n",
1467                     inst->ri_i.i_fmri);
1468                 return;
1469         }
1470 
1471         /* Already started instances are left alone */
1472         if (instance_started(inst) == 1) {
1473                 log_framework(LOG_DEBUG,
1474                     "%s: start_instance -> is already started\n",
1475                     inst->ri_i.i_fmri);
1476                 return;
1477         }
1478 
1479         log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
1480 
1481         (void) restarter_instance_update_states(local_handle, inst,
1482             inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, new_reason);
1483 
1484         info = startd_zalloc(sizeof (fork_info_t));
1485 
1486         info->sf_id = inst->ri_id;
1487         info->sf_method_type = METHOD_START;
1488         info->sf_event_type = RERR_NONE;
1489         info->sf_reason = new_reason;
1490         inst->ri_method_thread = startd_thread_create(method_thread, info);
1491 }
1492 
1493 static int
1494 event_from_tty(scf_handle_t *h, restarter_inst_t *rip)
1495 {
1496         scf_instance_t *inst;
1497         int ret = 0;
1498 
1499         if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1500                 return (-1);
1501 
1502         ret = restarter_inst_ractions_from_tty(inst);
1503 
1504         scf_instance_destroy(inst);
1505         return (ret);
1506 }
1507 
1508 static void
1509 maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1510     restarter_str_t reason)
1511 {
1512         fork_info_t *info;
1513         scf_instance_t *scf_inst = NULL;
1514 
1515         assert(MUTEX_HELD(&rip->ri_lock));
1516         assert(reason != restarter_str_none);
1517         assert(rip->ri_method_thread == 0);
1518 
1519         log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.",
1520             restarter_get_str_short(reason));
1521         log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1522             rip->ri_i.i_fmri, restarter_get_str_short(reason));
1523 
1524         /* Services in the maintenance state are ignored */
1525         if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
1526                 log_framework(LOG_DEBUG,
1527                     "%s: maintain_instance -> is already in maintenance\n",
1528                     rip->ri_i.i_fmri);
1529                 return;
1530         }
1531 
1532         /*
1533          * If reason state is restarter_str_service_request and
1534          * restarter_actions/auxiliary_fmri property is set with a valid fmri,
1535          * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
1536          */
1537         if (reason == restarter_str_service_request &&
1538             libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &scf_inst) == 0) {
1539                 if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
1540                         if (restarter_inst_set_aux_fmri(scf_inst))
1541                                 log_framework(LOG_DEBUG, "%s: "
1542                                     "restarter_inst_set_aux_fmri failed: ",
1543                                     rip->ri_i.i_fmri);
1544                 } else {
1545                         log_framework(LOG_DEBUG, "%s: "
1546                             "restarter_inst_validate_ractions_aux_fmri "
1547                             "failed: ", rip->ri_i.i_fmri);
1548 
1549                         if (restarter_inst_reset_aux_fmri(scf_inst))
1550                                 log_framework(LOG_DEBUG, "%s: "
1551                                     "restarter_inst_reset_aux_fmri failed: ",
1552                                     rip->ri_i.i_fmri);
1553                 }
1554                 scf_instance_destroy(scf_inst);
1555         }
1556 
1557         if (immediate || !instance_started(rip)) {
1558                 if (rip->ri_i.i_primary_ctid != 0) {
1559                         rip->ri_m_inst = safe_scf_instance_create(h);
1560                         rip->ri_mi_deleted = B_FALSE;
1561 
1562                         libscf_reget_instance(rip);
1563                         method_remove_contract(rip, B_TRUE, B_TRUE);
1564 
1565                         scf_instance_destroy(rip->ri_m_inst);
1566                 }
1567 
1568                 (void) restarter_instance_update_states(h, rip,
1569                     RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1570                     reason);
1571                 return;
1572         }
1573 
1574         (void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1575             RESTARTER_STATE_MAINT, RERR_NONE, reason);
1576 
1577         log_transition(rip, MAINT_REQUESTED);
1578 
1579         info = startd_zalloc(sizeof (*info));
1580         info->sf_id = rip->ri_id;
1581         info->sf_method_type = METHOD_STOP;
1582         info->sf_event_type = RERR_RESTART;
1583         info->sf_reason = reason;
1584         rip->ri_method_thread = startd_thread_create(method_thread, info);
1585 }
1586 
1587 static void
1588 refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
1589 {
1590         scf_instance_t *inst;
1591         scf_snapshot_t *snap;
1592         fork_info_t *info;
1593         int r;
1594 
1595         assert(MUTEX_HELD(&rip->ri_lock));
1596 
1597         log_instance(rip, B_TRUE, "Rereading configuration.");
1598         log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
1599             rip->ri_i.i_fmri);
1600 
1601 rep_retry:
1602         r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
1603         switch (r) {
1604         case 0:
1605                 break;
1606 
1607         case ECONNABORTED:
1608                 libscf_handle_rebind(h);
1609                 goto rep_retry;
1610 
1611         case ENOENT:
1612                 /* Must have been deleted. */
1613                 return;
1614 
1615         case EINVAL:
1616         case ENOTSUP:
1617         default:
1618                 bad_error("libscf_fmri_get_instance", r);
1619         }
1620 
1621         snap = libscf_get_running_snapshot(inst);
1622 
1623         r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
1624             &rip->ri_utmpx_prefix);
1625         switch (r) {
1626         case 0:
1627                 log_framework(LOG_DEBUG, "%s is a %s-style service\n",
1628                     rip->ri_i.i_fmri, service_style(rip->ri_flags));
1629                 break;
1630 
1631         case ECONNABORTED:
1632                 scf_instance_destroy(inst);
1633                 scf_snapshot_destroy(snap);
1634                 libscf_handle_rebind(h);
1635                 goto rep_retry;
1636 
1637         case ECANCELED:
1638         case ENOENT:
1639                 /* Succeed in anticipation of REMOVE_INSTANCE. */
1640                 break;
1641 
1642         default:
1643                 bad_error("libscf_get_startd_properties", r);
1644         }
1645 
1646         if (instance_started(rip)) {
1647                 /* Refresh does not change the state. */
1648                 (void) restarter_instance_update_states(h, rip,
1649                     rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE,
1650                     restarter_str_refresh);
1651 
1652                 info = startd_zalloc(sizeof (*info));
1653                 info->sf_id = rip->ri_id;
1654                 info->sf_method_type = METHOD_REFRESH;
1655                 info->sf_event_type = RERR_REFRESH;
1656                 info->sf_reason = NULL;
1657 
1658                 assert(rip->ri_method_thread == 0);
1659                 rip->ri_method_thread =
1660                     startd_thread_create(method_thread, info);
1661         }
1662 
1663         scf_snapshot_destroy(snap);
1664         scf_instance_destroy(inst);
1665 }
1666 
1667 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1668         "ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1669         "ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1670         "ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1671         "INVALID_DEPENDENCY", "ADMIN_DISABLE", "STOP_RESET"
1672 };
1673 
1674 /*
1675  * void *restarter_process_events()
1676  *
1677  *   Called in a separate thread to process the events on an instance's
1678  *   queue.  Empties the queue completely, and tries to keep the thread
1679  *   around for a little while after the queue is empty to save on
1680  *   startup costs.
1681  */
1682 static void *
1683 restarter_process_events(void *arg)
1684 {
1685         scf_handle_t *h;
1686         restarter_instance_qentry_t *event;
1687         restarter_inst_t *rip;
1688         char *fmri = (char *)arg;
1689         struct timespec to;
1690 
1691         assert(fmri != NULL);
1692 
1693         h = libscf_handle_create_bound_loop();
1694 
1695         /* grab the queue lock */
1696         rip = inst_lookup_queue(fmri);
1697         if (rip == NULL)
1698                 goto out;
1699 
1700 again:
1701 
1702         while ((event = uu_list_first(rip->ri_queue)) != NULL) {
1703                 restarter_inst_t *inst;
1704 
1705                 /* drop the queue lock */
1706                 MUTEX_UNLOCK(&rip->ri_queue_lock);
1707 
1708                 /*
1709                  * Grab the inst lock -- this waits until any outstanding
1710                  * method finishes running.
1711                  */
1712                 inst = inst_lookup_by_name(fmri);
1713                 if (inst == NULL) {
1714                         /* Getting deleted in the middle isn't an error. */
1715                         goto cont;
1716                 }
1717 
1718                 assert(instance_in_transition(inst) == 0);
1719 
1720                 /* process the event */
1721                 switch (event->riq_type) {
1722                 case RESTARTER_EVENT_TYPE_ENABLE:
1723                 case RESTARTER_EVENT_TYPE_DISABLE:
1724                         (void) enable_inst(h, inst, event);
1725                         break;
1726 
1727                 case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1728                         if (enable_inst(h, inst, event) == 0)
1729                                 reset_start_times(inst);
1730                         break;
1731 
1732                 case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1733                         restarter_delete_inst(inst);
1734                         inst = NULL;
1735                         goto cont;
1736 
1737                 case RESTARTER_EVENT_TYPE_STOP_RESET:
1738                         reset_start_times(inst);
1739                         /* FALLTHROUGH */
1740                 case RESTARTER_EVENT_TYPE_STOP:
1741                         (void) stop_instance(h, inst, RSTOP_DEPENDENCY);
1742                         break;
1743 
1744                 case RESTARTER_EVENT_TYPE_START:
1745                         start_instance(h, inst, event->riq_reason);
1746                         break;
1747 
1748                 case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1749                         maintain_instance(h, inst, 0,
1750                             restarter_str_dependency_cycle);
1751                         break;
1752 
1753                 case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1754                         maintain_instance(h, inst, 0,
1755                             restarter_str_invalid_dependency);
1756                         break;
1757 
1758                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1759                         if (event_from_tty(h, inst) == 0)
1760                                 maintain_instance(h, inst, 0,
1761                                     restarter_str_service_request);
1762                         else
1763                                 maintain_instance(h, inst, 0,
1764                                     restarter_str_administrative_request);
1765                         break;
1766 
1767                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1768                         if (event_from_tty(h, inst) == 0)
1769                                 maintain_instance(h, inst, 1,
1770                                     restarter_str_service_request);
1771                         else
1772                                 maintain_instance(h, inst, 1,
1773                                     restarter_str_administrative_request);
1774                         break;
1775 
1776                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1777                         unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1778                         reset_start_times(inst);
1779                         break;
1780 
1781                 case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1782                         refresh_instance(h, inst);
1783                         break;
1784 
1785                 case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1786                         log_framework(LOG_WARNING, "Restarter: "
1787                             "%s command (for %s) unimplemented.\n",
1788                             event_names[event->riq_type], inst->ri_i.i_fmri);
1789                         break;
1790 
1791                 case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1792                         if (!instance_started(inst)) {
1793                                 log_framework(LOG_DEBUG, "Restarter: "
1794                                     "Not restarting %s; not running.\n",
1795                                     inst->ri_i.i_fmri);
1796                         } else {
1797                                 /*
1798                                  * Stop the instance.  If it can be restarted,
1799                                  * the graph engine will send a new event.
1800                                  */
1801                                 if (stop_instance(h, inst, RSTOP_RESTART) == 0)
1802                                         reset_start_times(inst);
1803                         }
1804                         break;
1805 
1806                 case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1807                 default:
1808 #ifndef NDEBUG
1809                         uu_warn("%s:%d: Bad restarter event %d.  "
1810                             "Aborting.\n", __FILE__, __LINE__, event->riq_type);
1811 #endif
1812                         abort();
1813                 }
1814 
1815                 assert(inst != NULL);
1816                 MUTEX_UNLOCK(&inst->ri_lock);
1817 
1818 cont:
1819                 /* grab the queue lock */
1820                 rip = inst_lookup_queue(fmri);
1821                 if (rip == NULL)
1822                         goto out;
1823 
1824                 /* delete the event */
1825                 uu_list_remove(rip->ri_queue, event);
1826                 startd_free(event, sizeof (restarter_instance_qentry_t));
1827         }
1828 
1829         assert(rip != NULL);
1830 
1831         /*
1832          * Try to preserve the thread for a little while for future use.
1833          */
1834         to.tv_sec = 3;
1835         to.tv_nsec = 0;
1836         (void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
1837             &rip->ri_queue_lock, &to);
1838 
1839         if (uu_list_first(rip->ri_queue) != NULL)
1840                 goto again;
1841 
1842         rip->ri_queue_thread = 0;
1843         MUTEX_UNLOCK(&rip->ri_queue_lock);
1844 out:
1845         (void) scf_handle_unbind(h);
1846         scf_handle_destroy(h);
1847         free(fmri);
1848         return (NULL);
1849 }
1850 
1851 static int
1852 is_admin_event(restarter_event_type_t t) {
1853 
1854         switch (t) {
1855         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1856         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1857         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1858         case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1859         case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1860         case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1861                 return (1);
1862         default:
1863                 return (0);
1864         }
1865 }
1866 
1867 static void
1868 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
1869 {
1870         restarter_instance_qentry_t *qe;
1871         int r;
1872 
1873         assert(MUTEX_HELD(&ri->ri_queue_lock));
1874         assert(!MUTEX_HELD(&ri->ri_lock));
1875 
1876         qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
1877         qe->riq_type = e->rpe_type;
1878         qe->riq_reason = e->rpe_reason;
1879 
1880         uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
1881         r = uu_list_insert_before(ri->ri_queue, NULL, qe);
1882         assert(r == 0);
1883 }
1884 
1885 /*
1886  * void *restarter_event_thread()
1887  *
1888  *  Handle incoming graph events by placing them on a per-instance
1889  *  queue.  We can't lock the main part of the instance structure, so
1890  *  just modify the seprarately locked event queue portion.
1891  */
1892 /*ARGSUSED*/
1893 static void *
1894 restarter_event_thread(void *unused)
1895 {
1896         scf_handle_t *h;
1897 
1898         /*
1899          * This is a new thread, and thus, gets its own handle
1900          * to the repository.
1901          */
1902         h = libscf_handle_create_bound_loop();
1903 
1904         MUTEX_LOCK(&ru->restarter_update_lock);
1905 
1906         /*CONSTCOND*/
1907         while (1) {
1908                 restarter_protocol_event_t *e;
1909 
1910                 while (ru->restarter_update_wakeup == 0)
1911                         (void) pthread_cond_wait(&ru->restarter_update_cv,
1912                             &ru->restarter_update_lock);
1913 
1914                 ru->restarter_update_wakeup = 0;
1915 
1916                 while ((e = restarter_event_dequeue()) != NULL) {
1917                         restarter_inst_t *rip;
1918                         char *fmri;
1919 
1920                         MUTEX_UNLOCK(&ru->restarter_update_lock);
1921 
1922                         /*
1923                          * ADD_INSTANCE is special: there's likely no
1924                          * instance structure yet, so we need to handle the
1925                          * addition synchronously.
1926                          */
1927                         switch (e->rpe_type) {
1928                         case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1929                                 if (restarter_insert_inst(h, e->rpe_inst) != 0)
1930                                         log_error(LOG_INFO, "Restarter: "
1931                                             "Could not add %s.\n", e->rpe_inst);
1932 
1933                                 MUTEX_LOCK(&st->st_load_lock);
1934                                 if (--st->st_load_instances == 0)
1935                                         (void) pthread_cond_broadcast(
1936                                             &st->st_load_cv);
1937                                 MUTEX_UNLOCK(&st->st_load_lock);
1938 
1939                                 goto nolookup;
1940                         }
1941 
1942                         /*
1943                          * Lookup the instance, locking only the event queue.
1944                          * Can't grab ri_lock here because it might be held
1945                          * by a long-running method.
1946                          */
1947                         rip = inst_lookup_queue(e->rpe_inst);
1948                         if (rip == NULL) {
1949                                 log_error(LOG_INFO, "Restarter: "
1950                                     "Ignoring %s command for unknown service "
1951                                     "%s.\n", event_names[e->rpe_type],
1952                                     e->rpe_inst);
1953                                 goto nolookup;
1954                         }
1955 
1956                         /* Keep ADMIN events from filling up the queue. */
1957                         if (is_admin_event(e->rpe_type) &&
1958                             uu_list_numnodes(rip->ri_queue) >
1959                             RINST_QUEUE_THRESHOLD) {
1960                                 MUTEX_UNLOCK(&rip->ri_queue_lock);
1961                                 log_instance(rip, B_TRUE, "Instance event "
1962                                     "queue overflow.  Dropping administrative "
1963                                     "request.");
1964                                 log_framework(LOG_DEBUG, "%s: Instance event "
1965                                     "queue overflow.  Dropping administrative "
1966                                     "request.\n", rip->ri_i.i_fmri);
1967                                 goto nolookup;
1968                         }
1969 
1970                         /* Now add the event to the instance queue. */
1971                         restarter_queue_event(rip, e);
1972 
1973                         if (rip->ri_queue_thread == 0) {
1974                                 /*
1975                                  * Start a thread if one isn't already
1976                                  * running.
1977                                  */
1978                                 fmri = safe_strdup(e->rpe_inst);
1979                                 rip->ri_queue_thread =  startd_thread_create(
1980                                     restarter_process_events, (void *)fmri);
1981                         } else {
1982                                 /*
1983                                  * Signal the existing thread that there's
1984                                  * a new event.
1985                                  */
1986                                 (void) pthread_cond_broadcast(
1987                                     &rip->ri_queue_cv);
1988                         }
1989 
1990                         MUTEX_UNLOCK(&rip->ri_queue_lock);
1991 nolookup:
1992                         restarter_event_release(e);
1993 
1994                         MUTEX_LOCK(&ru->restarter_update_lock);
1995                 }
1996         }
1997 
1998         /*
1999          * Unreachable for now -- there's currently no graceful cleanup
2000          * called on exit().
2001          */
2002         (void) scf_handle_unbind(h);
2003         scf_handle_destroy(h);
2004         return (NULL);
2005 }
2006 
2007 static restarter_inst_t *
2008 contract_to_inst(ctid_t ctid)
2009 {
2010         restarter_inst_t *inst;
2011         int id;
2012 
2013         id = lookup_inst_by_contract(ctid);
2014         if (id == -1)
2015                 return (NULL);
2016 
2017         inst = inst_lookup_by_id(id);
2018         if (inst != NULL) {
2019                 /*
2020                  * Since ri_lock isn't held by the contract id lookup, this
2021                  * instance may have been restarted and now be in a new
2022                  * contract, making the old contract no longer valid for this
2023                  * instance.
2024                  */
2025                 if (ctid != inst->ri_i.i_primary_ctid) {
2026                         MUTEX_UNLOCK(&inst->ri_lock);
2027                         inst = NULL;
2028                 }
2029         }
2030         return (inst);
2031 }
2032 
2033 /*
2034  * void contract_action()
2035  *   Take action on contract events.
2036  */
2037 static void
2038 contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
2039     uint32_t type)
2040 {
2041         const char *fmri = inst->ri_i.i_fmri;
2042 
2043         assert(MUTEX_HELD(&inst->ri_lock));
2044 
2045         /*
2046          * If startd has stopped this contract, there is no need to
2047          * stop it again.
2048          */
2049         if (inst->ri_i.i_primary_ctid > 0 &&
2050             inst->ri_i.i_primary_ctid_stopped)
2051                 return;
2052 
2053         if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
2054             | CT_PR_EV_HWERR)) == 0) {
2055                 /*
2056                  * There shouldn't be other events, since that's not how we set
2057                  * the terms. Thus, just log an error and drive on.
2058                  */
2059                 log_framework(LOG_NOTICE,
2060                     "%s: contract %ld received unexpected critical event "
2061                     "(%d)\n", fmri, id, type);
2062                 return;
2063         }
2064 
2065         assert(instance_in_transition(inst) == 0);
2066 
2067         if (instance_is_wait_style(inst)) {
2068                 /*
2069                  * We ignore all events; if they impact the
2070                  * process we're monitoring, then the
2071                  * wait_thread will stop the instance.
2072                  */
2073                 log_framework(LOG_DEBUG,
2074                     "%s: ignoring contract event on wait-style service\n",
2075                     fmri);
2076         } else {
2077                 /*
2078                  * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
2079                  */
2080                 switch (type) {
2081                 case CT_PR_EV_EMPTY:
2082                         (void) stop_instance(h, inst, RSTOP_EXIT);
2083                         break;
2084                 case CT_PR_EV_CORE:
2085                         (void) stop_instance(h, inst, RSTOP_CORE);
2086                         break;
2087                 case CT_PR_EV_SIGNAL:
2088                         (void) stop_instance(h, inst, RSTOP_SIGNAL);
2089                         break;
2090                 case CT_PR_EV_HWERR:
2091                         (void) stop_instance(h, inst, RSTOP_HWERR);
2092                         break;
2093                 }
2094         }
2095 }
2096 
2097 /*
2098  * void *restarter_contract_event_thread(void *)
2099  *   Listens to the process contract bundle for critical events, taking action
2100  *   on events from contracts we know we are responsible for.
2101  */
2102 /*ARGSUSED*/
2103 static void *
2104 restarter_contracts_event_thread(void *unused)
2105 {
2106         int fd, err;
2107         scf_handle_t *local_handle;
2108 
2109         /*
2110          * Await graph load completion.  That is, stop here, until we've scanned
2111          * the repository for contract - instance associations.
2112          */
2113         MUTEX_LOCK(&st->st_load_lock);
2114         while (!(st->st_load_complete && st->st_load_instances == 0))
2115                 (void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
2116         MUTEX_UNLOCK(&st->st_load_lock);
2117 
2118         /*
2119          * This is a new thread, and thus, gets its own handle
2120          * to the repository.
2121          */
2122         if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
2123                 uu_die("Unable to bind a new repository handle: %s\n",
2124                     scf_strerror(scf_error()));
2125 
2126         fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
2127         if (fd == -1)
2128                 uu_die("process bundle open failed");
2129 
2130         /*
2131          * Make sure we get all events (including those generated by configd
2132          * before this thread was started).
2133          */
2134         err = ct_event_reset(fd);
2135         assert(err == 0);
2136 
2137         for (;;) {
2138                 int efd, sfd;
2139                 ct_evthdl_t ev;
2140                 uint32_t type;
2141                 ctevid_t evid;
2142                 ct_stathdl_t status;
2143                 ctid_t ctid;
2144                 restarter_inst_t *inst;
2145                 uint64_t cookie;
2146 
2147                 if (err = ct_event_read_critical(fd, &ev)) {
2148                         log_error(LOG_WARNING,
2149                             "Error reading next contract event: %s",
2150                             strerror(err));
2151                         continue;
2152                 }
2153 
2154                 evid = ct_event_get_evid(ev);
2155                 ctid = ct_event_get_ctid(ev);
2156                 type = ct_event_get_type(ev);
2157 
2158                 /* Fetch cookie. */
2159                 if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
2160                     < 0) {
2161                         ct_event_free(ev);
2162                         continue;
2163                 }
2164 
2165                 if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
2166                         log_framework(LOG_WARNING, "Could not get status for "
2167                             "contract %ld: %s\n", ctid, strerror(err));
2168 
2169                         startd_close(sfd);
2170                         ct_event_free(ev);
2171                         continue;
2172                 }
2173 
2174                 cookie = ct_status_get_cookie(status);
2175 
2176                 log_framework(LOG_DEBUG, "Received event %d for ctid %ld "
2177                     "cookie %lld\n", type, ctid, cookie);
2178 
2179                 ct_status_free(status);
2180 
2181                 startd_close(sfd);
2182 
2183                 /*
2184                  * svc.configd(1M) restart handling performed by the
2185                  * fork_configd_thread.  We don't acknowledge, as that thread
2186                  * will do so.
2187                  */
2188                 if (cookie == CONFIGD_COOKIE) {
2189                         ct_event_free(ev);
2190                         continue;
2191                 }
2192 
2193                 inst = NULL;
2194                 if (storing_contract != 0 &&
2195                     (inst = contract_to_inst(ctid)) == NULL) {
2196                         /*
2197                          * This can happen for two reasons:
2198                          * - method_run() has not yet stored the
2199                          *    the contract into the internal hash table.
2200                          * - we receive an EMPTY event for an abandoned
2201                          *    contract.
2202                          * If there is any contract in the process of
2203                          * being stored into the hash table then re-read
2204                          * the event later.
2205                          */
2206                         log_framework(LOG_DEBUG,
2207                             "Reset event %d for unknown "
2208                             "contract id %ld\n", type, ctid);
2209 
2210                         /* don't go too fast */
2211                         (void) poll(NULL, 0, 100);
2212 
2213                         (void) ct_event_reset(fd);
2214                         ct_event_free(ev);
2215                         continue;
2216                 }
2217 
2218                 /*
2219                  * Do not call contract_to_inst() again if first
2220                  * call succeeded.
2221                  */
2222                 if (inst == NULL)
2223                         inst = contract_to_inst(ctid);
2224                 if (inst == NULL) {
2225                         /*
2226                          * This can happen if we receive an EMPTY
2227                          * event for an abandoned contract.
2228                          */
2229                         log_framework(LOG_DEBUG,
2230                             "Received event %d for unknown contract id "
2231                             "%ld\n", type, ctid);
2232                 } else {
2233                         log_framework(LOG_DEBUG,
2234                             "Received event %d for contract id "
2235                             "%ld (%s)\n", type, ctid,
2236                             inst->ri_i.i_fmri);
2237 
2238                         contract_action(local_handle, inst, ctid, type);
2239 
2240                         MUTEX_UNLOCK(&inst->ri_lock);
2241                 }
2242 
2243                 efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
2244                     O_WRONLY);
2245                 if (efd != -1) {
2246                         (void) ct_ctl_ack(efd, evid);
2247                         startd_close(efd);
2248                 }
2249 
2250                 ct_event_free(ev);
2251 
2252         }
2253 
2254         /*NOTREACHED*/
2255         return (NULL);
2256 }
2257 
2258 /*
2259  * Timeout queue, processed by restarter_timeouts_event_thread().
2260  */
2261 timeout_queue_t *timeouts;
2262 static uu_list_pool_t *timeout_pool;
2263 
2264 typedef struct timeout_update {
2265         pthread_mutex_t         tu_lock;
2266         pthread_cond_t          tu_cv;
2267         int                     tu_wakeup;
2268 } timeout_update_t;
2269 
2270 timeout_update_t *tu;
2271 
2272 static const char *timeout_ovr_svcs[] = {
2273         "svc:/system/manifest-import:default",
2274         "svc:/network/initial:default",
2275         "svc:/network/service:default",
2276         "svc:/system/rmtmpfiles:default",
2277         "svc:/network/loopback:default",
2278         "svc:/network/physical:default",
2279         "svc:/system/device/local:default",
2280         "svc:/system/metainit:default",
2281         "svc:/system/filesystem/usr:default",
2282         "svc:/system/filesystem/minimal:default",
2283         "svc:/system/filesystem/local:default",
2284         NULL
2285 };
2286 
2287 int
2288 is_timeout_ovr(restarter_inst_t *inst)
2289 {
2290         int i;
2291 
2292         for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
2293                 if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
2294                         log_instance(inst, B_TRUE, "Timeout override by "
2295                             "svc.startd.  Using infinite timeout.");
2296                         return (1);
2297                 }
2298         }
2299 
2300         return (0);
2301 }
2302 
2303 /*ARGSUSED*/
2304 static int
2305 timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
2306 {
2307         hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
2308         hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
2309 
2310         if (t1 > t2)
2311                 return (1);
2312         else if (t1 < t2)
2313                 return (-1);
2314         return (0);
2315 }
2316 
2317 void
2318 timeout_init()
2319 {
2320         timeouts = startd_zalloc(sizeof (timeout_queue_t));
2321 
2322         (void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
2323 
2324         timeout_pool = startd_list_pool_create("timeouts",
2325             sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
2326             timeout_compare, UU_LIST_POOL_DEBUG);
2327         assert(timeout_pool != NULL);
2328 
2329         timeouts->tq_list = startd_list_create(timeout_pool,
2330             timeouts, UU_LIST_SORTED);
2331         assert(timeouts->tq_list != NULL);
2332 
2333         tu = startd_zalloc(sizeof (timeout_update_t));
2334         (void) pthread_cond_init(&tu->tu_cv, NULL);
2335         (void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
2336 }
2337 
2338 void
2339 timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
2340 {
2341         hrtime_t now, timeout;
2342         timeout_entry_t *entry;
2343         uu_list_index_t idx;
2344 
2345         assert(MUTEX_HELD(&inst->ri_lock));
2346 
2347         now = gethrtime();
2348 
2349         /*
2350          * If we overflow LLONG_MAX, we're never timing out anyways, so
2351          * just return.
2352          */
2353         if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
2354                 log_instance(inst, B_TRUE, "timeout_seconds too large, "
2355                     "treating as infinite.");
2356                 return;
2357         }
2358 
2359         /* hrtime is in nanoseconds. Convert timeout_sec. */
2360         timeout = now + (timeout_sec * 1000000000LL);
2361 
2362         entry = startd_alloc(sizeof (timeout_entry_t));
2363         entry->te_timeout = timeout;
2364         entry->te_ctid = cid;
2365         entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
2366         entry->te_logstem = safe_strdup(inst->ri_logstem);
2367         entry->te_fired = 0;
2368         /* Insert the calculated timeout time onto the queue. */
2369         MUTEX_LOCK(&timeouts->tq_lock);
2370         (void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
2371         uu_list_node_init(entry, &entry->te_link, timeout_pool);
2372         uu_list_insert(timeouts->tq_list, entry, idx);
2373         MUTEX_UNLOCK(&timeouts->tq_lock);
2374 
2375         assert(inst->ri_timeout == NULL);
2376         inst->ri_timeout = entry;
2377 
2378         MUTEX_LOCK(&tu->tu_lock);
2379         tu->tu_wakeup = 1;
2380         (void) pthread_cond_broadcast(&tu->tu_cv);
2381         MUTEX_UNLOCK(&tu->tu_lock);
2382 }
2383 
2384 
2385 void
2386 timeout_remove(restarter_inst_t *inst, ctid_t cid)
2387 {
2388         assert(MUTEX_HELD(&inst->ri_lock));
2389 
2390         if (inst->ri_timeout == NULL)
2391                 return;
2392 
2393         assert(inst->ri_timeout->te_ctid == cid);
2394 
2395         MUTEX_LOCK(&timeouts->tq_lock);
2396         uu_list_remove(timeouts->tq_list, inst->ri_timeout);
2397         MUTEX_UNLOCK(&timeouts->tq_lock);
2398 
2399         free(inst->ri_timeout->te_fmri);
2400         free(inst->ri_timeout->te_logstem);
2401         startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
2402         inst->ri_timeout = NULL;
2403 }
2404 
2405 static int
2406 timeout_now()
2407 {
2408         timeout_entry_t *e;
2409         hrtime_t now;
2410         int ret;
2411 
2412         now = gethrtime();
2413 
2414         /*
2415          * Walk through the (sorted) timeouts list.  While the timeout
2416          * at the head of the list is <= the current time, kill the
2417          * method.
2418          */
2419         MUTEX_LOCK(&timeouts->tq_lock);
2420 
2421         for (e = uu_list_first(timeouts->tq_list);
2422             e != NULL && e->te_timeout <= now;
2423             e = uu_list_next(timeouts->tq_list, e)) {
2424                 log_framework(LOG_WARNING, "%s: Method or service exit timed "
2425                     "out.  Killing contract %ld.\n", e->te_fmri, e->te_ctid);
2426                 log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
2427                     "Method or service exit timed out.  Killing contract %ld.",
2428                     e->te_ctid);
2429                 e->te_fired = 1;
2430                 (void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
2431         }
2432 
2433         if (uu_list_numnodes(timeouts->tq_list) > 0)
2434                 ret = 0;
2435         else
2436                 ret = -1;
2437 
2438         MUTEX_UNLOCK(&timeouts->tq_lock);
2439 
2440         return (ret);
2441 }
2442 
2443 /*
2444  * void *restarter_timeouts_event_thread(void *)
2445  *   Responsible for monitoring the method timeouts.  This thread must
2446  *   be started before any methods are called.
2447  */
2448 /*ARGSUSED*/
2449 static void *
2450 restarter_timeouts_event_thread(void *unused)
2451 {
2452         /*
2453          * Timeouts are entered on a priority queue, which is processed by
2454          * this thread.  As timeouts are specified in seconds, we'll do
2455          * the necessary processing every second, as long as the queue
2456          * is not empty.
2457          */
2458 
2459         /*CONSTCOND*/
2460         while (1) {
2461                 /*
2462                  * As long as the timeout list isn't empty, process it
2463                  * every second.
2464                  */
2465                 if (timeout_now() == 0) {
2466                         (void) sleep(1);
2467                         continue;
2468                 }
2469 
2470                 /* The list is empty, wait until we have more timeouts. */
2471                 MUTEX_LOCK(&tu->tu_lock);
2472 
2473                 while (tu->tu_wakeup == 0)
2474                         (void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
2475 
2476                 tu->tu_wakeup = 0;
2477                 MUTEX_UNLOCK(&tu->tu_lock);
2478         }
2479 
2480         return (NULL);
2481 }
2482 
2483 void
2484 restarter_start()
2485 {
2486         (void) startd_thread_create(restarter_timeouts_event_thread, NULL);
2487         (void) startd_thread_create(restarter_event_thread, NULL);
2488         (void) startd_thread_create(restarter_contracts_event_thread, NULL);
2489         (void) startd_thread_create(wait_thread, NULL);
2490 }
2491 
2492 
2493 void
2494 restarter_init()
2495 {
2496         restarter_instance_pool = startd_list_pool_create("restarter_instances",
2497             sizeof (restarter_inst_t), offsetof(restarter_inst_t,
2498             ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
2499         (void) memset(&instance_list, 0, sizeof (instance_list));
2500 
2501         (void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
2502         instance_list.ril_instance_list = startd_list_create(
2503             restarter_instance_pool, &instance_list, UU_LIST_SORTED);
2504 
2505         restarter_queue_pool = startd_list_pool_create(
2506             "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
2507             offsetof(restarter_instance_qentry_t,  riq_link), NULL,
2508             UU_LIST_POOL_DEBUG);
2509 
2510         contract_list_pool = startd_list_pool_create(
2511             "contract_list", sizeof (contract_entry_t),
2512             offsetof(contract_entry_t,  ce_link), NULL,
2513             UU_LIST_POOL_DEBUG);
2514         contract_hash_init();
2515 
2516         log_framework(LOG_DEBUG, "Initialized restarter\n");
2517 }