illumos-gate New usr/src/cmd/svc/startd/restarter.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2019 Joyent, Inc.
  25  */
  26 
  27 /*
  28  * restarter.c - service manipulation
  29  *
  30  * This component manages services whose restarter is svc.startd, the standard
  31  * restarter.  It translates restarter protocol events from the graph engine
  32  * into actions on processes, as a delegated restarter would do.
  33  *
  34  * The master restarter manages a number of always-running threads:
  35  *   - restarter event thread: events from the graph engine
  36  *   - timeout thread: thread to fire queued timeouts
  37  *   - contract thread: thread to handle contract events
  38  *   - wait thread: thread to handle wait-based services
  39  *
  40  * The other threads are created as-needed:
  41  *   - per-instance method threads
  42  *   - per-instance event processing threads
  43  *
  44  * The interaction of all threads must result in the following conditions
  45  * being satisfied (on a per-instance basis):
  46  *   - restarter events must be processed in order
  47  *   - method execution must be serialized
  48  *   - instance delete must be held until outstanding methods are complete
  49  *   - contract events shouldn't be processed while a method is running
  50  *   - timeouts should fire even when a method is running
  51  *
  52  * Service instances are represented by restarter_inst_t's and are kept in the
  53  * instance_list list.
  54  *
  55  * Service States
  56  *   The current state of a service instance is kept in
  57  *   restarter_inst_t->ri_i.i_state.  If transition to a new state could take
  58  *   some time, then before we effect the transition we set
  59  *   restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
  60  *   rotate i_next_state to i_state and set i_next_state to
  61  *   RESTARTER_STATE_NONE.  So usually i_next_state is _NONE when ri_lock is not
  62  *   held.  The exception is when we launch methods, which are done with
  63  *   a separate thread.  To keep any other threads from grabbing ri_lock before
  64  *   method_thread() does, we set ri_method_thread to the thread id of the
  65  *   method thread, and when it is nonzero any thread with a different thread id
  66  *   waits on ri_method_cv.
  67  *
  68  * Method execution is serialized by blocking on ri_method_cv in
  69  * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread.  This
  70  * also prevents the instance structure from being deleted until all
  71  * outstanding operations such as method_thread() have finished.
  72  *
  73  * Lock ordering:
  74  *
  75  * dgraph_lock [can be held when taking:]
  76  *   utmpx_lock
  77  *   dictionary->dict_lock
  78  *   st->st_load_lock
  79  *   wait_info_lock
  80  *   ru->restarter_update_lock
  81  *     restarter_queue->rpeq_lock
  82  *   instance_list.ril_lock
  83  *     inst->ri_lock
  84  *   st->st_configd_live_lock
  85  *
  86  * instance_list.ril_lock
  87  *   graph_queue->gpeq_lock
  88  *   gu->gu_lock
  89  *   st->st_configd_live_lock
  90  *   dictionary->dict_lock
  91  *   inst->ri_lock
  92  *     graph_queue->gpeq_lock
  93  *     gu->gu_lock
  94  *     tu->tu_lock
  95  *     tq->tq_lock
  96  *     inst->ri_queue_lock
  97  *       wait_info_lock
  98  *       bp->cb_lock
  99  *     utmpx_lock
 100  *
 101  * single_user_thread_lock
 102  *   wait_info_lock
 103  *   utmpx_lock
 104  *
 105  * gu_freeze_lock
 106  *
 107  * logbuf_mutex nests inside pretty much everything.
 108  */
 109 
 110 #include <sys/contract/process.h>
 111 #include <sys/ctfs.h>
 112 #include <sys/stat.h>
 113 #include <sys/time.h>
 114 #include <sys/types.h>
 115 #include <sys/uio.h>
 116 #include <sys/wait.h>
 117 #include <assert.h>
 118 #include <errno.h>
 119 #include <fcntl.h>
 120 #include <libcontract.h>
 121 #include <libcontract_priv.h>
 122 #include <libintl.h>
 123 #include <librestart.h>
 124 #include <librestart_priv.h>
 125 #include <libuutil.h>
 126 #include <limits.h>
 127 #include <poll.h>
 128 #include <port.h>
 129 #include <pthread.h>
 130 #include <stdarg.h>
 131 #include <stdio.h>
 132 #include <strings.h>
 133 #include <unistd.h>
 134 
 135 #include "startd.h"
 136 #include "protocol.h"
 137 
 138 static uu_list_pool_t *restarter_instance_pool;
 139 static restarter_instance_list_t instance_list;
 140 
 141 static uu_list_pool_t *restarter_queue_pool;
 142 
 143 #define WT_SVC_ERR_THROTTLE     1       /* 1 sec delay for erroring wait svc */
 144 
 145 /*
 146  * Function used to reset the restart times for an instance, when
 147  * an administrative task comes along and essentially makes the times
 148  * in this array ineffective.
 149  */
 150 static void
 151 reset_start_times(restarter_inst_t *inst)
 152 {
 153         inst->ri_start_index = 0;
 154         bzero(inst->ri_start_time, sizeof (inst->ri_start_time));
 155 }
 156 
 157 /*ARGSUSED*/
 158 static int
 159 restarter_instance_compare(const void *lc_arg, const void *rc_arg,
 160     void *private)
 161 {
 162         int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
 163         int rc_id = *(int *)rc_arg;
 164 
 165         if (lc_id > rc_id)
 166                 return (1);
 167         if (lc_id < rc_id)
 168                 return (-1);
 169         return (0);
 170 }
 171 
 172 static restarter_inst_t *
 173 inst_lookup_by_name(const char *name)
 174 {
 175         int id;
 176 
 177         id = dict_lookup_byname(name);
 178         if (id == -1)
 179                 return (NULL);
 180 
 181         return (inst_lookup_by_id(id));
 182 }
 183 
 184 restarter_inst_t *
 185 inst_lookup_by_id(int id)
 186 {
 187         restarter_inst_t *inst;
 188 
 189         MUTEX_LOCK(&instance_list.ril_lock);
 190         inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
 191         if (inst != NULL)
 192                 MUTEX_LOCK(&inst->ri_lock);
 193         MUTEX_UNLOCK(&instance_list.ril_lock);
 194 
 195         if (inst != NULL) {
 196                 while (inst->ri_method_thread != 0 &&
 197                     !pthread_equal(inst->ri_method_thread, pthread_self())) {
 198                         ++inst->ri_method_waiters;
 199                         (void) pthread_cond_wait(&inst->ri_method_cv,
 200                             &inst->ri_lock);
 201                         assert(inst->ri_method_waiters > 0);
 202                         --inst->ri_method_waiters;
 203                 }
 204         }
 205 
 206         return (inst);
 207 }
 208 
 209 static restarter_inst_t *
 210 inst_lookup_queue(const char *name)
 211 {
 212         int id;
 213         restarter_inst_t *inst;
 214 
 215         id = dict_lookup_byname(name);
 216         if (id == -1)
 217                 return (NULL);
 218 
 219         MUTEX_LOCK(&instance_list.ril_lock);
 220         inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
 221         if (inst != NULL)
 222                 MUTEX_LOCK(&inst->ri_queue_lock);
 223         MUTEX_UNLOCK(&instance_list.ril_lock);
 224 
 225         return (inst);
 226 }
 227 
 228 const char *
 229 service_style(int flags)
 230 {
 231         switch (flags & RINST_STYLE_MASK) {
 232         case RINST_CONTRACT:    return ("contract");
 233         case RINST_TRANSIENT:   return ("transient");
 234         case RINST_WAIT:        return ("wait");
 235 
 236         default:
 237 #ifndef NDEBUG
 238                 uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
 239 #endif
 240                 abort();
 241                 /* NOTREACHED */
 242         }
 243 }
 244 
 245 /*
 246  * Fails with ECONNABORTED or ECANCELED.
 247  */
 248 static int
 249 check_contract(restarter_inst_t *inst, boolean_t primary,
 250     scf_instance_t *scf_inst)
 251 {
 252         ctid_t *ctidp;
 253         int fd, r;
 254 
 255         ctidp = primary ? &inst->ri_i.i_primary_ctid :
 256             &inst->ri_i.i_transient_ctid;
 257 
 258         assert(*ctidp >= 1);
 259 
 260         fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
 261         if (fd >= 0) {
 262                 r = close(fd);
 263                 assert(r == 0);
 264                 return (0);
 265         }
 266 
 267         r = restarter_remove_contract(scf_inst, *ctidp, primary ?
 268             RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
 269         switch (r) {
 270         case 0:
 271         case ECONNABORTED:
 272         case ECANCELED:
 273                 *ctidp = 0;
 274                 return (r);
 275 
 276         case ENOMEM:
 277                 uu_die("Out of memory\n");
 278                 /* NOTREACHED */
 279 
 280         case EPERM:
 281                 uu_die("Insufficient privilege.\n");
 282                 /* NOTREACHED */
 283 
 284         case EACCES:
 285                 uu_die("Repository backend access denied.\n");
 286                 /* NOTREACHED */
 287 
 288         case EROFS:
 289                 log_error(LOG_INFO, "Could not remove unusable contract id %ld "
 290                     "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
 291                 return (0);
 292 
 293         case EINVAL:
 294         case EBADF:
 295         default:
 296                 assert(0);
 297                 abort();
 298                 /* NOTREACHED */
 299         }
 300 }
 301 
 302 static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
 303 
 304 /*
 305  * int restarter_insert_inst(scf_handle_t *, char *)
 306  *   If the inst is already in the restarter list, return its id.  If the inst
 307  *   is not in the restarter list, initialize a restarter_inst_t, initialize its
 308  *   states, insert it into the list, and return 0.
 309  *
 310  *   Fails with
 311  *     ENOENT - name is not in the repository
 312  */
 313 static int
 314 restarter_insert_inst(scf_handle_t *h, const char *name)
 315 {
 316         int id, r;
 317         restarter_inst_t *inst;
 318         uu_list_index_t idx;
 319         scf_service_t *scf_svc;
 320         scf_instance_t *scf_inst;
 321         scf_snapshot_t *snap = NULL;
 322         scf_propertygroup_t *pg;
 323         char *svc_name, *inst_name;
 324         char logfilebuf[PATH_MAX];
 325         char *c;
 326         boolean_t do_commit_states;
 327         restarter_instance_state_t state, next_state;
 328         protocol_states_t *ps;
 329         pid_t start_pid;
 330         restarter_str_t reason = restarter_str_insert_in_graph;
 331 
 332         MUTEX_LOCK(&instance_list.ril_lock);
 333 
 334         /*
 335          * We don't use inst_lookup_by_name() here because we want the lookup
 336          * & insert to be atomic.
 337          */
 338         id = dict_lookup_byname(name);
 339         if (id != -1) {
 340                 inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
 341                     &idx);
 342                 if (inst != NULL) {
 343                         MUTEX_UNLOCK(&instance_list.ril_lock);
 344                         return (0);
 345                 }
 346         }
 347 
 348         /* Allocate an instance */
 349         inst = startd_zalloc(sizeof (restarter_inst_t));
 350         inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
 351         inst->ri_utmpx_prefix[0] = '\0';
 352 
 353         inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
 354         (void) strcpy((char *)inst->ri_i.i_fmri, name);
 355 
 356         inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
 357 
 358         /*
 359          * id shouldn't be -1 since we use the same dictionary as graph.c, but
 360          * just in case.
 361          */
 362         inst->ri_id = (id != -1 ? id : dict_insert(name));
 363 
 364         special_online_hooks_get(name, &inst->ri_pre_online_hook,
 365             &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
 366 
 367         scf_svc = safe_scf_service_create(h);
 368         scf_inst = safe_scf_instance_create(h);
 369         pg = safe_scf_pg_create(h);
 370         svc_name = startd_alloc(max_scf_name_size);
 371         inst_name = startd_alloc(max_scf_name_size);
 372 
 373 rep_retry:
 374         if (snap != NULL)
 375                 scf_snapshot_destroy(snap);
 376         if (inst->ri_logstem != NULL)
 377                 startd_free(inst->ri_logstem, PATH_MAX);
 378         if (inst->ri_common_name != NULL)
 379                 free(inst->ri_common_name);
 380         if (inst->ri_C_common_name != NULL)
 381                 free(inst->ri_C_common_name);
 382         snap = NULL;
 383         inst->ri_logstem = NULL;
 384         inst->ri_common_name = NULL;
 385         inst->ri_C_common_name = NULL;
 386 
 387         if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
 388             NULL, SCF_DECODE_FMRI_EXACT) != 0) {
 389                 switch (scf_error()) {
 390                 case SCF_ERROR_CONNECTION_BROKEN:
 391                         libscf_handle_rebind(h);
 392                         goto rep_retry;
 393 
 394                 case SCF_ERROR_NOT_FOUND:
 395                         goto deleted;
 396                 }
 397 
 398                 uu_die("Can't decode FMRI %s: %s\n", name,
 399                     scf_strerror(scf_error()));
 400         }
 401 
 402         /*
 403          * If there's no running snapshot, then we execute using the editing
 404          * snapshot.  Pending snapshots will be taken later.
 405          */
 406         snap = libscf_get_running_snapshot(scf_inst);
 407 
 408         if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
 409             (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
 410             0)) {
 411                 switch (scf_error()) {
 412                 case SCF_ERROR_NOT_SET:
 413                         break;
 414 
 415                 case SCF_ERROR_CONNECTION_BROKEN:
 416                         libscf_handle_rebind(h);
 417                         goto rep_retry;
 418 
 419                 default:
 420                         assert(0);
 421                         abort();
 422                 }
 423 
 424                 goto deleted;
 425         }
 426 
 427         (void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
 428         for (c = logfilebuf; *c != '\0'; c++)
 429                 if (*c == '/')
 430                         *c = '-';
 431 
 432         inst->ri_logstem = startd_alloc(PATH_MAX);
 433         (void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
 434             LOG_SUFFIX);
 435 
 436         /*
 437          * If the restarter group is missing, use uninit/none.  Otherwise,
 438          * we're probably being restarted & don't want to mess up the states
 439          * that are there.
 440          */
 441         state = RESTARTER_STATE_UNINIT;
 442         next_state = RESTARTER_STATE_NONE;
 443 
 444         r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
 445         if (r != 0) {
 446                 switch (scf_error()) {
 447                 case SCF_ERROR_CONNECTION_BROKEN:
 448                         libscf_handle_rebind(h);
 449                         goto rep_retry;
 450 
 451                 case SCF_ERROR_NOT_SET:
 452                         goto deleted;
 453 
 454                 case SCF_ERROR_NOT_FOUND:
 455                         /*
 456                          * This shouldn't happen since the graph engine should
 457                          * have initialized the state to uninitialized/none if
 458                          * there was no restarter pg.  In case somebody
 459                          * deleted it, though....
 460                          */
 461                         do_commit_states = B_TRUE;
 462                         break;
 463 
 464                 default:
 465                         assert(0);
 466                         abort();
 467                 }
 468         } else {
 469                 r = libscf_read_states(pg, &state, &next_state);
 470                 if (r != 0) {
 471                         do_commit_states = B_TRUE;
 472                 } else {
 473                         if (next_state != RESTARTER_STATE_NONE) {
 474                                 /*
 475                                  * Force next_state to _NONE since we
 476                                  * don't look for method processes.
 477                                  */
 478                                 next_state = RESTARTER_STATE_NONE;
 479                                 do_commit_states = B_TRUE;
 480                         } else {
 481                                 /*
 482                                  * The reason for transition will depend on
 483                                  * state.
 484                                  */
 485                                 if (st->st_initial == 0)
 486                                         reason = restarter_str_startd_restart;
 487                                 else if (state == RESTARTER_STATE_MAINT)
 488                                         reason = restarter_str_bad_repo_state;
 489                                 /*
 490                                  * Inform the restarter of our state without
 491                                  * changing the STIME in the repository.
 492                                  */
 493                                 ps = startd_alloc(sizeof (*ps));
 494                                 inst->ri_i.i_state = ps->ps_state = state;
 495                                 inst->ri_i.i_next_state = ps->ps_state_next =
 496                                     next_state;
 497                                 ps->ps_reason = reason;
 498 
 499                                 graph_protocol_send_event(inst->ri_i.i_fmri,
 500                                     GRAPH_UPDATE_STATE_CHANGE, ps);
 501 
 502                                 do_commit_states = B_FALSE;
 503                         }
 504                 }
 505         }
 506 
 507         switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
 508             &inst->ri_utmpx_prefix)) {
 509         case 0:
 510                 break;
 511 
 512         case ECONNABORTED:
 513                 libscf_handle_rebind(h);
 514                 goto rep_retry;
 515 
 516         case ECANCELED:
 517                 goto deleted;
 518 
 519         case ENOENT:
 520                 /*
 521                  * This is odd, because the graph engine should have required
 522                  * the general property group.  So we'll just use default
 523                  * flags in anticipation of the graph engine sending us
 524                  * REMOVE_INSTANCE when it finds out that the general property
 525                  * group has been deleted.
 526                  */
 527                 inst->ri_flags = RINST_CONTRACT;
 528                 break;
 529 
 530         default:
 531                 assert(0);
 532                 abort();
 533         }
 534 
 535         r = libscf_get_template_values(scf_inst, snap,
 536             &inst->ri_common_name, &inst->ri_C_common_name);
 537 
 538         /*
 539          * Copy our names to smaller buffers to reduce our memory footprint.
 540          */
 541         if (inst->ri_common_name != NULL) {
 542                 char *tmp = safe_strdup(inst->ri_common_name);
 543                 startd_free(inst->ri_common_name, max_scf_value_size);
 544                 inst->ri_common_name = tmp;
 545         }
 546 
 547         if (inst->ri_C_common_name != NULL) {
 548                 char *tmp = safe_strdup(inst->ri_C_common_name);
 549                 startd_free(inst->ri_C_common_name, max_scf_value_size);
 550                 inst->ri_C_common_name = tmp;
 551         }
 552 
 553         switch (r) {
 554         case 0:
 555                 break;
 556 
 557         case ECONNABORTED:
 558                 libscf_handle_rebind(h);
 559                 goto rep_retry;
 560 
 561         case ECANCELED:
 562                 goto deleted;
 563 
 564         case ECHILD:
 565         case ENOENT:
 566                 break;
 567 
 568         default:
 569                 assert(0);
 570                 abort();
 571         }
 572 
 573         switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
 574             &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
 575             &start_pid)) {
 576         case 0:
 577                 break;
 578 
 579         case ECONNABORTED:
 580                 libscf_handle_rebind(h);
 581                 goto rep_retry;
 582 
 583         case ECANCELED:
 584                 goto deleted;
 585 
 586         default:
 587                 assert(0);
 588                 abort();
 589         }
 590 
 591         if (inst->ri_i.i_primary_ctid >= 1) {
 592                 contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
 593 
 594                 switch (check_contract(inst, B_TRUE, scf_inst)) {
 595                 case 0:
 596                         break;
 597 
 598                 case ECONNABORTED:
 599                         libscf_handle_rebind(h);
 600                         goto rep_retry;
 601 
 602                 case ECANCELED:
 603                         goto deleted;
 604 
 605                 default:
 606                         assert(0);
 607                         abort();
 608                 }
 609         }
 610 
 611         if (inst->ri_i.i_transient_ctid >= 1) {
 612                 switch (check_contract(inst, B_FALSE, scf_inst)) {
 613                 case 0:
 614                         break;
 615 
 616                 case ECONNABORTED:
 617                         libscf_handle_rebind(h);
 618                         goto rep_retry;
 619 
 620                 case ECANCELED:
 621                         goto deleted;
 622 
 623                 default:
 624                         assert(0);
 625                         abort();
 626                 }
 627         }
 628 
 629         /* No more failures we live through, so add it to the list. */
 630         (void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
 631         (void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
 632         MUTEX_LOCK(&inst->ri_lock);
 633         MUTEX_LOCK(&inst->ri_queue_lock);
 634 
 635         (void) pthread_cond_init(&inst->ri_method_cv, NULL);
 636 
 637         uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
 638         uu_list_insert(instance_list.ril_instance_list, inst, idx);
 639         MUTEX_UNLOCK(&instance_list.ril_lock);
 640 
 641         if (start_pid != -1 &&
 642             (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
 643                 int ret;
 644                 ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
 645                 if (ret == -1) {
 646                         /*
 647                          * Implication:  if we can't reregister the
 648                          * instance, we will start another one.  Two
 649                          * instances may or may not result in a resource
 650                          * conflict.
 651                          */
 652                         log_error(LOG_WARNING,
 653                             "%s: couldn't reregister %ld for wait\n",
 654                             inst->ri_i.i_fmri, start_pid);
 655                 } else if (ret == 1) {
 656                         /*
 657                          * Leading PID has exited.
 658                          */
 659                         (void) stop_instance(h, inst, RSTOP_EXIT);
 660                 }
 661         }
 662 
 663 
 664         scf_pg_destroy(pg);
 665 
 666         if (do_commit_states)
 667                 (void) restarter_instance_update_states(h, inst, state,
 668                     next_state, RERR_NONE, reason);
 669 
 670         log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
 671             service_style(inst->ri_flags));
 672 
 673         MUTEX_UNLOCK(&inst->ri_queue_lock);
 674         MUTEX_UNLOCK(&inst->ri_lock);
 675 
 676         startd_free(svc_name, max_scf_name_size);
 677         startd_free(inst_name, max_scf_name_size);
 678         scf_snapshot_destroy(snap);
 679         scf_instance_destroy(scf_inst);
 680         scf_service_destroy(scf_svc);
 681 
 682         log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
 683             name);
 684 
 685         return (0);
 686 
 687 deleted:
 688         MUTEX_UNLOCK(&instance_list.ril_lock);
 689         startd_free(inst_name, max_scf_name_size);
 690         startd_free(svc_name, max_scf_name_size);
 691         if (snap != NULL)
 692                 scf_snapshot_destroy(snap);
 693         scf_pg_destroy(pg);
 694         scf_instance_destroy(scf_inst);
 695         scf_service_destroy(scf_svc);
 696         startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
 697         uu_list_destroy(inst->ri_queue);
 698         if (inst->ri_logstem != NULL)
 699                 startd_free(inst->ri_logstem, PATH_MAX);
 700         if (inst->ri_common_name != NULL)
 701                 free(inst->ri_common_name);
 702         if (inst->ri_C_common_name != NULL)
 703                 free(inst->ri_C_common_name);
 704         startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
 705         startd_free(inst, sizeof (restarter_inst_t));
 706         return (ENOENT);
 707 }
 708 
 709 static void
 710 restarter_delete_inst(restarter_inst_t *ri)
 711 {
 712         int id;
 713         restarter_inst_t *rip;
 714         void *cookie = NULL;
 715         restarter_instance_qentry_t *e;
 716 
 717         assert(MUTEX_HELD(&ri->ri_lock));
 718 
 719         /*
 720          * Must drop the instance lock so we can pick up the instance_list
 721          * lock & remove the instance.
 722          */
 723         id = ri->ri_id;
 724         MUTEX_UNLOCK(&ri->ri_lock);
 725 
 726         MUTEX_LOCK(&instance_list.ril_lock);
 727 
 728         rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
 729         if (rip == NULL) {
 730                 MUTEX_UNLOCK(&instance_list.ril_lock);
 731                 return;
 732         }
 733 
 734         assert(ri == rip);
 735 
 736         uu_list_remove(instance_list.ril_instance_list, ri);
 737 
 738         log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
 739             ri->ri_i.i_fmri);
 740 
 741         MUTEX_UNLOCK(&instance_list.ril_lock);
 742 
 743         /*
 744          * We can lock the instance without holding the instance_list lock
 745          * since we removed the instance from the list.
 746          */
 747         MUTEX_LOCK(&ri->ri_lock);
 748         MUTEX_LOCK(&ri->ri_queue_lock);
 749 
 750         if (ri->ri_i.i_primary_ctid >= 1)
 751                 contract_hash_remove(ri->ri_i.i_primary_ctid);
 752 
 753         while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
 754                 (void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
 755 
 756         while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
 757                 startd_free(e, sizeof (*e));
 758         uu_list_destroy(ri->ri_queue);
 759 
 760         startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
 761         startd_free(ri->ri_logstem, PATH_MAX);
 762         if (ri->ri_common_name != NULL)
 763                 free(ri->ri_common_name);
 764         if (ri->ri_C_common_name != NULL)
 765                 free(ri->ri_C_common_name);
 766         startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
 767         (void) pthread_mutex_destroy(&ri->ri_lock);
 768         (void) pthread_mutex_destroy(&ri->ri_queue_lock);
 769         startd_free(ri, sizeof (restarter_inst_t));
 770 }
 771 
 772 /*
 773  * instance_is_wait_style()
 774  *
 775  *   Returns 1 if the given instance is a "wait-style" service instance.
 776  */
 777 int
 778 instance_is_wait_style(restarter_inst_t *inst)
 779 {
 780         assert(MUTEX_HELD(&inst->ri_lock));
 781         return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
 782 }
 783 
 784 /*
 785  * instance_is_transient_style()
 786  *
 787  *   Returns 1 if the given instance is a transient service instance.
 788  */
 789 int
 790 instance_is_transient_style(restarter_inst_t *inst)
 791 {
 792         assert(MUTEX_HELD(&inst->ri_lock));
 793         return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
 794 }
 795 
 796 /*
 797  * instance_in_transition()
 798  * Returns 1 if instance is in transition, 0 if not
 799  */
 800 int
 801 instance_in_transition(restarter_inst_t *inst)
 802 {
 803         assert(MUTEX_HELD(&inst->ri_lock));
 804         if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
 805                 return (0);
 806         return (1);
 807 }
 808 
 809 /*
 810  * returns 1 if instance is already started, 0 if not
 811  */
 812 static int
 813 instance_started(restarter_inst_t *inst)
 814 {
 815         int ret;
 816 
 817         assert(MUTEX_HELD(&inst->ri_lock));
 818 
 819         if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
 820             inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
 821                 ret = 1;
 822         else
 823                 ret = 0;
 824 
 825         return (ret);
 826 }
 827 
 828 /*
 829  * Returns
 830  *   0 - success
 831  *   ECONNRESET - success, but h was rebound
 832  */
 833 int
 834 restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
 835     restarter_instance_state_t new_state,
 836     restarter_instance_state_t new_state_next, restarter_error_t err,
 837     restarter_str_t reason)
 838 {
 839         protocol_states_t *states;
 840         int e;
 841         uint_t retry_count = 0, msecs = ALLOC_DELAY;
 842         boolean_t rebound = B_FALSE;
 843         int prev_state_online;
 844         int state_online;
 845 
 846         assert(MUTEX_HELD(&ri->ri_lock));
 847 
 848         prev_state_online = instance_started(ri);
 849 
 850 retry:
 851         e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
 852             restarter_get_str_short(reason));
 853         switch (e) {
 854         case 0:
 855                 break;
 856 
 857         case ENOMEM:
 858                 ++retry_count;
 859                 if (retry_count < ALLOC_RETRY) {
 860                         (void) poll(NULL, 0, msecs);
 861                         msecs *= ALLOC_DELAY_MULT;
 862                         goto retry;
 863                 }
 864 
 865                 /* Like startd_alloc(). */
 866                 uu_die("Insufficient memory.\n");
 867                 /* NOTREACHED */
 868 
 869         case ECONNABORTED:
 870                 libscf_handle_rebind(h);
 871                 rebound = B_TRUE;
 872                 goto retry;
 873 
 874         case EPERM:
 875         case EACCES:
 876         case EROFS:
 877                 log_error(LOG_NOTICE, "Could not commit state change for %s "
 878                     "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
 879                 /* FALLTHROUGH */
 880 
 881         case ENOENT:
 882                 ri->ri_i.i_state = new_state;
 883                 ri->ri_i.i_next_state = new_state_next;
 884                 break;
 885 
 886         case EINVAL:
 887         default:
 888                 bad_error("_restarter_commit_states", e);
 889         }
 890 
 891         states = startd_alloc(sizeof (protocol_states_t));
 892         states->ps_state = new_state;
 893         states->ps_state_next = new_state_next;
 894         states->ps_err = err;
 895         states->ps_reason = reason;
 896         graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
 897             (void *)states);
 898 
 899         state_online = instance_started(ri);
 900 
 901         if (prev_state_online && !state_online)
 902                 ri->ri_post_offline_hook();
 903         else if (!prev_state_online && state_online)
 904                 ri->ri_post_online_hook();
 905 
 906         return (rebound ? ECONNRESET : 0);
 907 }
 908 
 909 void
 910 restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
 911 {
 912         restarter_inst_t *inst;
 913 
 914         assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
 915 
 916         inst = inst_lookup_by_name(fmri);
 917         if (inst == NULL)
 918                 return;
 919 
 920         inst->ri_flags |= flag;
 921 
 922         MUTEX_UNLOCK(&inst->ri_lock);
 923 }
 924 
 925 static void
 926 restarter_take_pending_snapshots(scf_handle_t *h)
 927 {
 928         restarter_inst_t *inst;
 929         int r;
 930 
 931         MUTEX_LOCK(&instance_list.ril_lock);
 932 
 933         for (inst = uu_list_first(instance_list.ril_instance_list);
 934             inst != NULL;
 935             inst = uu_list_next(instance_list.ril_instance_list, inst)) {
 936                 const char *fmri;
 937                 scf_instance_t *sinst = NULL;
 938 
 939                 MUTEX_LOCK(&inst->ri_lock);
 940 
 941                 /*
 942                  * This is where we'd check inst->ri_method_thread and if it
 943                  * were nonzero we'd wait in anticipation of another thread
 944                  * executing a method for inst.  Doing so with the instance_list
 945                  * locked, though, leads to deadlock.  Since taking a snapshot
 946                  * during that window won't hurt anything, we'll just continue.
 947                  */
 948 
 949                 fmri = inst->ri_i.i_fmri;
 950 
 951                 if (inst->ri_flags & RINST_RETAKE_RUNNING) {
 952                         scf_snapshot_t *rsnap;
 953 
 954                         (void) libscf_fmri_get_instance(h, fmri, &sinst);
 955 
 956                         rsnap = libscf_get_or_make_running_snapshot(sinst,
 957                             fmri, B_FALSE);
 958 
 959                         scf_instance_destroy(sinst);
 960 
 961                         if (rsnap != NULL)
 962                                 inst->ri_flags &= ~RINST_RETAKE_RUNNING;
 963 
 964                         scf_snapshot_destroy(rsnap);
 965                 }
 966 
 967                 if (inst->ri_flags & RINST_RETAKE_START) {
 968                         switch (r = libscf_snapshots_poststart(h, fmri,
 969                             B_FALSE)) {
 970                         case 0:
 971                         case ENOENT:
 972                                 inst->ri_flags &= ~RINST_RETAKE_START;
 973                                 break;
 974 
 975                         case ECONNABORTED:
 976                                 break;
 977 
 978                         case EACCES:
 979                         default:
 980                                 bad_error("libscf_snapshots_poststart", r);
 981                         }
 982                 }
 983 
 984                 MUTEX_UNLOCK(&inst->ri_lock);
 985         }
 986 
 987         MUTEX_UNLOCK(&instance_list.ril_lock);
 988 }
 989 
 990 /* ARGSUSED */
 991 void *
 992 restarter_post_fsminimal_thread(void *unused)
 993 {
 994         scf_handle_t *h;
 995         int r;
 996 
 997         (void) pthread_setname_np(pthread_self(), "restarter_post_fsmin");
 998 
 999         h = libscf_handle_create_bound_loop();
1000 
1001         for (;;) {
1002                 r = libscf_create_self(h);
1003                 if (r == 0)
1004                         break;
1005 
1006                 assert(r == ECONNABORTED);
1007                 libscf_handle_rebind(h);
1008         }
1009 
1010         restarter_take_pending_snapshots(h);
1011 
1012         (void) scf_handle_unbind(h);
1013         scf_handle_destroy(h);
1014 
1015         return (NULL);
1016 }
1017 
1018 /*
1019  * int stop_instance()
1020  *
1021  *   Stop the instance identified by the instance given as the second argument,
1022  *   for the cause stated.
1023  *
1024  *   Returns
1025  *     0 - success
1026  *     -1 - inst is in transition
1027  */
1028 static int
1029 stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1030     stop_cause_t cause)
1031 {
1032         fork_info_t *info;
1033         const char *cp;
1034         int err;
1035         restarter_error_t re;
1036         restarter_str_t reason;
1037         restarter_instance_state_t new_state;
1038 
1039         assert(MUTEX_HELD(&inst->ri_lock));
1040         assert(inst->ri_method_thread == 0);
1041 
1042         switch (cause) {
1043         case RSTOP_EXIT:
1044                 re = RERR_RESTART;
1045                 reason = restarter_str_ct_ev_exit;
1046                 cp = "all processes in service exited";
1047                 break;
1048         case RSTOP_ERR_CFG:
1049                 re = RERR_FAULT;
1050                 reason = restarter_str_method_failed;
1051                 cp = "service exited with a configuration error";
1052                 break;
1053         case RSTOP_ERR_EXIT:
1054                 re = RERR_RESTART;
1055                 reason = restarter_str_ct_ev_exit;
1056                 cp = "service exited with an error";
1057                 break;
1058         case RSTOP_CORE:
1059                 re = RERR_FAULT;
1060                 reason = restarter_str_ct_ev_core;
1061                 cp = "process dumped core";
1062                 break;
1063         case RSTOP_SIGNAL:
1064                 re = RERR_FAULT;
1065                 reason = restarter_str_ct_ev_signal;
1066                 cp = "process received fatal signal from outside the service";
1067                 break;
1068         case RSTOP_HWERR:
1069                 re = RERR_FAULT;
1070                 reason = restarter_str_ct_ev_hwerr;
1071                 cp = "process killed due to uncorrectable hardware error";
1072                 break;
1073         case RSTOP_DEPENDENCY:
1074                 re = RERR_RESTART;
1075                 reason = restarter_str_dependency_activity;
1076                 cp = "dependency activity requires stop";
1077                 break;
1078         case RSTOP_DISABLE:
1079                 re = RERR_RESTART;
1080                 reason = restarter_str_disable_request;
1081                 cp = "service disabled";
1082                 break;
1083         case RSTOP_RESTART:
1084                 re = RERR_RESTART;
1085                 reason = restarter_str_restart_request;
1086                 cp = "service restarting";
1087                 break;
1088         default:
1089 #ifndef NDEBUG
1090                 (void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
1091                     cause, __FILE__, __LINE__);
1092 #endif
1093                 abort();
1094         }
1095 
1096         /* Services in the disabled and maintenance state are ignored */
1097         if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1098             inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
1099                 log_framework(LOG_DEBUG,
1100                     "%s: stop_instance -> is maint/disabled\n",
1101                     inst->ri_i.i_fmri);
1102                 return (0);
1103         }
1104 
1105         /* Already stopped instances are left alone */
1106         if (instance_started(inst) == 0) {
1107                 log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
1108                     inst->ri_i.i_fmri);
1109                 return (0);
1110         }
1111 
1112         if (instance_in_transition(inst)) {
1113                 /* requeue event by returning -1 */
1114                 log_framework(LOG_DEBUG,
1115                     "Restarter: Not stopping %s, in transition.\n",
1116                     inst->ri_i.i_fmri);
1117                 return (-1);
1118         }
1119 
1120         log_instance(inst, B_TRUE, "Stopping because %s.", cp);
1121 
1122         log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
1123             "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
1124 
1125         if (instance_is_wait_style(inst) &&
1126             (cause == RSTOP_EXIT ||
1127             cause == RSTOP_ERR_CFG ||
1128             cause == RSTOP_ERR_EXIT)) {
1129                 /*
1130                  * No need to stop instance, as child has exited; remove
1131                  * contract and move the instance to the offline state.
1132                  */
1133                 switch (err = restarter_instance_update_states(local_handle,
1134                     inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1135                     reason)) {
1136                 case 0:
1137                 case ECONNRESET:
1138                         break;
1139 
1140                 default:
1141                         bad_error("restarter_instance_update_states", err);
1142                 }
1143 
1144                 if (cause == RSTOP_ERR_EXIT) {
1145                         /*
1146                          * The RSTOP_ERR_EXIT cause is set via the
1147                          * wait_thread -> wait_remove code path when we have
1148                          * a "wait" style svc that exited with an error. If
1149                          * the svc is failing too quickly, we throttle it so
1150                          * that we don't restart it more than once/second.
1151                          * Since we know we're running in the wait thread its
1152                          * ok to throttle it right here.
1153                          */
1154                         (void) update_fault_count(inst, FAULT_COUNT_INCR);
1155                         if (method_rate_critical(inst)) {
1156                                 log_instance(inst, B_TRUE, "Failing too "
1157                                     "quickly, throttling.");
1158                                 (void) sleep(WT_SVC_ERR_THROTTLE);
1159                         }
1160                 } else {
1161                         (void) update_fault_count(inst, FAULT_COUNT_RESET);
1162                         reset_start_times(inst);
1163                 }
1164 
1165                 if (inst->ri_i.i_primary_ctid != 0) {
1166                         inst->ri_m_inst =
1167                             safe_scf_instance_create(local_handle);
1168                         inst->ri_mi_deleted = B_FALSE;
1169 
1170                         libscf_reget_instance(inst);
1171                         method_remove_contract(inst, B_TRUE, B_TRUE);
1172 
1173                         scf_instance_destroy(inst->ri_m_inst);
1174                         inst->ri_m_inst = NULL;
1175                 }
1176 
1177                 switch (err = restarter_instance_update_states(local_handle,
1178                     inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1179                     reason)) {
1180                 case 0:
1181                 case ECONNRESET:
1182                         break;
1183 
1184                 default:
1185                         bad_error("restarter_instance_update_states", err);
1186                 }
1187 
1188                 if (cause != RSTOP_ERR_CFG)
1189                         return (0);
1190         } else if (instance_is_wait_style(inst) && re == RERR_RESTART) {
1191                 /*
1192                  * Stopping a wait service through means other than the pid
1193                  * exiting should keep wait_thread() from restarting the
1194                  * service, by removing it from the wait list.
1195                  * We cannot remove it right now otherwise the process will
1196                  * end up <defunct> so mark it to be ignored.
1197                  */
1198                 wait_ignore_by_fmri(inst->ri_i.i_fmri);
1199         }
1200 
1201         /*
1202          * There are some configuration errors which we cannot detect until we
1203          * try to run the method.  For example, see exec_method() where the
1204          * restarter_set_method_context() call can return SMF_EXIT_ERR_CONFIG
1205          * in several cases. If this happens for a "wait-style" svc,
1206          * wait_remove() sets the cause as RSTOP_ERR_CFG so that we can detect
1207          * the configuration error and go into maintenance, even though it is
1208          * a "wait-style" svc.
1209          */
1210         if (cause == RSTOP_ERR_CFG)
1211                 new_state = RESTARTER_STATE_MAINT;
1212         else
1213                 new_state = inst->ri_i.i_enabled ?
1214                     RESTARTER_STATE_OFFLINE : RESTARTER_STATE_DISABLED;
1215 
1216         switch (err = restarter_instance_update_states(local_handle, inst,
1217             inst->ri_i.i_state, new_state, RERR_NONE, reason)) {
1218         case 0:
1219         case ECONNRESET:
1220                 break;
1221 
1222         default:
1223                 bad_error("restarter_instance_update_states", err);
1224         }
1225 
1226         info = startd_zalloc(sizeof (fork_info_t));
1227 
1228         info->sf_id = inst->ri_id;
1229         info->sf_method_type = METHOD_STOP;
1230         info->sf_event_type = re;
1231         info->sf_reason = reason;
1232         inst->ri_method_thread = startd_thread_create(method_thread, info);
1233 
1234         return (0);
1235 }
1236 
1237 /*
1238  * Returns
1239  *   ENOENT - fmri is not in instance_list
1240  *   0 - success
1241  *   ECONNRESET - success, though handle was rebound
1242  *   -1 - instance is in transition
1243  */
1244 int
1245 stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
1246 {
1247         restarter_inst_t *rip;
1248         int r;
1249 
1250         rip = inst_lookup_by_name(fmri);
1251         if (rip == NULL)
1252                 return (ENOENT);
1253 
1254         r = stop_instance(h, rip, flags);
1255 
1256         MUTEX_UNLOCK(&rip->ri_lock);
1257 
1258         return (r);
1259 }
1260 
1261 static void
1262 unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
1263     unmaint_cause_t cause)
1264 {
1265         ctid_t ctid;
1266         scf_instance_t *inst;
1267         int r;
1268         uint_t tries = 0, msecs = ALLOC_DELAY;
1269         const char *cp;
1270         restarter_str_t reason;
1271 
1272         assert(MUTEX_HELD(&rip->ri_lock));
1273 
1274         if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
1275                 log_error(LOG_DEBUG, "Restarter: "
1276                     "Ignoring maintenance off command because %s is not in the "
1277                     "maintenance state.\n", rip->ri_i.i_fmri);
1278                 return;
1279         }
1280 
1281         switch (cause) {
1282         case RUNMAINT_CLEAR:
1283                 cp = "clear requested";
1284                 reason = restarter_str_clear_request;
1285                 break;
1286         case RUNMAINT_DISABLE:
1287                 cp = "disable requested";
1288                 reason = restarter_str_disable_request;
1289                 break;
1290         default:
1291 #ifndef NDEBUG
1292                 (void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
1293                     cause, __FILE__, __LINE__);
1294 #endif
1295                 abort();
1296         }
1297 
1298         log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
1299             cp);
1300         log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
1301             "%s.\n", rip->ri_i.i_fmri, cp);
1302 
1303         (void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1304             RESTARTER_STATE_NONE, RERR_RESTART, reason);
1305 
1306         /*
1307          * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
1308          * a primary contract.
1309          */
1310         if (rip->ri_i.i_primary_ctid == 0)
1311                 return;
1312 
1313         ctid = rip->ri_i.i_primary_ctid;
1314         contract_abandon(ctid);
1315         rip->ri_i.i_primary_ctid = 0;
1316 
1317 rep_retry:
1318         switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
1319         case 0:
1320                 break;
1321 
1322         case ECONNABORTED:
1323                 libscf_handle_rebind(h);
1324                 goto rep_retry;
1325 
1326         case ENOENT:
1327                 /* Must have been deleted. */
1328                 return;
1329 
1330         case EINVAL:
1331         case ENOTSUP:
1332         default:
1333                 bad_error("libscf_handle_rebind", r);
1334         }
1335 
1336 again:
1337         r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
1338         switch (r) {
1339         case 0:
1340                 break;
1341 
1342         case ENOMEM:
1343                 ++tries;
1344                 if (tries < ALLOC_RETRY) {
1345                         (void) poll(NULL, 0, msecs);
1346                         msecs *= ALLOC_DELAY_MULT;
1347                         goto again;
1348                 }
1349 
1350                 uu_die("Insufficient memory.\n");
1351                 /* NOTREACHED */
1352 
1353         case ECONNABORTED:
1354                 scf_instance_destroy(inst);
1355                 libscf_handle_rebind(h);
1356                 goto rep_retry;
1357 
1358         case ECANCELED:
1359                 break;
1360 
1361         case EPERM:
1362         case EACCES:
1363         case EROFS:
1364                 log_error(LOG_INFO,
1365                     "Could not remove contract id %lu for %s (%s).\n", ctid,
1366                     rip->ri_i.i_fmri, strerror(r));
1367                 break;
1368 
1369         case EINVAL:
1370         case EBADF:
1371         default:
1372                 bad_error("restarter_remove_contract", r);
1373         }
1374 
1375         scf_instance_destroy(inst);
1376 }
1377 
1378 /*
1379  * enable_inst()
1380  *   Set inst->ri_i.i_enabled.  Expects 'e' to be _ENABLE, _DISABLE, or
1381  *   _ADMIN_DISABLE.  If the event is _ENABLE and inst is uninitialized or
1382  *   disabled, move it to offline.  If the event is _DISABLE or
1383  *   _ADMIN_DISABLE, make sure inst will move to disabled.
1384  *
1385  *   Returns
1386  *     0 - success
1387  *     ECONNRESET - h was rebound
1388  */
1389 static int
1390 enable_inst(scf_handle_t *h, restarter_inst_t *inst,
1391     restarter_instance_qentry_t *riq)
1392 {
1393         restarter_instance_state_t state;
1394         restarter_event_type_t e = riq->riq_type;
1395         restarter_str_t reason = restarter_str_per_configuration;
1396         int r;
1397 
1398         assert(MUTEX_HELD(&inst->ri_lock));
1399         assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
1400             e == RESTARTER_EVENT_TYPE_DISABLE ||
1401             e == RESTARTER_EVENT_TYPE_ENABLE);
1402         assert(instance_in_transition(inst) == 0);
1403 
1404         state = inst->ri_i.i_state;
1405 
1406         if (e == RESTARTER_EVENT_TYPE_ENABLE) {
1407                 inst->ri_i.i_enabled = 1;
1408 
1409                 if (state == RESTARTER_STATE_UNINIT ||
1410                     state == RESTARTER_STATE_DISABLED) {
1411                         /*
1412                          * B_FALSE: Don't log an error if the log_instance()
1413                          * fails because it will fail on the miniroot before
1414                          * install-discovery runs.
1415                          */
1416                         log_instance(inst, B_FALSE, "Enabled.");
1417                         log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
1418                             inst->ri_i.i_fmri);
1419 
1420                         /*
1421                          * If we are coming from DISABLED, it was obviously an
1422                          * enable request. If we are coming from UNINIT, it may
1423                          * have been a sevice in MAINT that was cleared.
1424                          */
1425                         if (riq->riq_reason == restarter_str_clear_request)
1426                                 reason = restarter_str_clear_request;
1427                         else if (state == RESTARTER_STATE_DISABLED)
1428                                 reason = restarter_str_enable_request;
1429                         (void) restarter_instance_update_states(h, inst,
1430                             RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1431                             RERR_NONE, reason);
1432                 } else {
1433                         log_framework(LOG_DEBUG, "Restarter: "
1434                             "Not changing state of %s for enable command.\n",
1435                             inst->ri_i.i_fmri);
1436                 }
1437         } else {
1438                 inst->ri_i.i_enabled = 0;
1439 
1440                 switch (state) {
1441                 case RESTARTER_STATE_ONLINE:
1442                 case RESTARTER_STATE_DEGRADED:
1443                         r = stop_instance(h, inst, RSTOP_DISABLE);
1444                         return (r == ECONNRESET ? 0 : r);
1445 
1446                 case RESTARTER_STATE_OFFLINE:
1447                 case RESTARTER_STATE_UNINIT:
1448                         if (inst->ri_i.i_primary_ctid != 0) {
1449                                 inst->ri_m_inst = safe_scf_instance_create(h);
1450                                 inst->ri_mi_deleted = B_FALSE;
1451 
1452                                 libscf_reget_instance(inst);
1453                                 method_remove_contract(inst, B_TRUE, B_TRUE);
1454 
1455                                 scf_instance_destroy(inst->ri_m_inst);
1456                         }
1457                         /* B_FALSE: See log_instance(..., "Enabled."); above */
1458                         log_instance(inst, B_FALSE, "Disabled.");
1459                         log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
1460                             inst->ri_i.i_fmri);
1461 
1462                         /*
1463                          * If we are coming from OFFLINE, it was obviously a
1464                          * disable request. But if we are coming from
1465                          * UNINIT, it may have been a disable request for a
1466                          * service in MAINT.
1467                          */
1468                         if (riq->riq_reason == restarter_str_disable_request ||
1469                             state == RESTARTER_STATE_OFFLINE)
1470                                 reason = restarter_str_disable_request;
1471                         (void) restarter_instance_update_states(h, inst,
1472                             RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1473                             RERR_RESTART, reason);
1474                         return (0);
1475 
1476                 case RESTARTER_STATE_DISABLED:
1477                         break;
1478 
1479                 case RESTARTER_STATE_MAINT:
1480                         /*
1481                          * We only want to pull the instance out of maintenance
1482                          * if the disable is on adminstrative request.  The
1483                          * graph engine sends _DISABLE events whenever a
1484                          * service isn't in the disabled state, and we don't
1485                          * want to pull the service out of maintenance if,
1486                          * for example, it is there due to a dependency cycle.
1487                          */
1488                         if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
1489                                 unmaintain_instance(h, inst, RUNMAINT_DISABLE);
1490                         break;
1491 
1492                 default:
1493 #ifndef NDEBUG
1494                         (void) fprintf(stderr, "Restarter instance %s has "
1495                             "unknown state %d.\n", inst->ri_i.i_fmri, state);
1496 #endif
1497                         abort();
1498                 }
1499         }
1500 
1501         return (0);
1502 }
1503 
1504 static void
1505 start_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1506     int32_t reason)
1507 {
1508         fork_info_t *info;
1509         restarter_str_t new_reason;
1510 
1511         assert(MUTEX_HELD(&inst->ri_lock));
1512         assert(instance_in_transition(inst) == 0);
1513         assert(inst->ri_method_thread == 0);
1514 
1515         log_framework(LOG_DEBUG, "%s: trying to start instance\n",
1516             inst->ri_i.i_fmri);
1517 
1518         /*
1519          * We want to keep the original reason for restarts and clear actions
1520          */
1521         switch (reason) {
1522         case restarter_str_restart_request:
1523         case restarter_str_clear_request:
1524                 new_reason = reason;
1525                 break;
1526         default:
1527                 new_reason = restarter_str_dependencies_satisfied;
1528         }
1529 
1530         /* Services in the disabled and maintenance state are ignored */
1531         if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1532             inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
1533             inst->ri_i.i_enabled == 0) {
1534                 log_framework(LOG_DEBUG,
1535                     "%s: start_instance -> is maint/disabled\n",
1536                     inst->ri_i.i_fmri);
1537                 return;
1538         }
1539 
1540         /* Already started instances are left alone */
1541         if (instance_started(inst) == 1) {
1542                 log_framework(LOG_DEBUG,
1543                     "%s: start_instance -> is already started\n",
1544                     inst->ri_i.i_fmri);
1545                 return;
1546         }
1547 
1548         log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
1549 
1550         (void) restarter_instance_update_states(local_handle, inst,
1551             inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, new_reason);
1552 
1553         info = startd_zalloc(sizeof (fork_info_t));
1554 
1555         info->sf_id = inst->ri_id;
1556         info->sf_method_type = METHOD_START;
1557         info->sf_event_type = RERR_NONE;
1558         info->sf_reason = new_reason;
1559         inst->ri_method_thread = startd_thread_create(method_thread, info);
1560 }
1561 
1562 static int
1563 event_from_tty(scf_handle_t *h, restarter_inst_t *rip)
1564 {
1565         scf_instance_t *inst;
1566         int ret = 0;
1567 
1568         if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1569                 return (-1);
1570 
1571         ret = restarter_inst_ractions_from_tty(inst);
1572 
1573         scf_instance_destroy(inst);
1574         return (ret);
1575 }
1576 
1577 static boolean_t
1578 restart_dump(scf_handle_t *h, restarter_inst_t *rip)
1579 {
1580         scf_instance_t *inst;
1581         boolean_t ret = B_FALSE;
1582 
1583         if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1584                 return (-1);
1585 
1586         if (restarter_inst_dump(inst) == 1)
1587                 ret = B_TRUE;
1588 
1589         scf_instance_destroy(inst);
1590         return (ret);
1591 }
1592 
1593 static void
1594 maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1595     restarter_str_t reason)
1596 {
1597         fork_info_t *info;
1598         scf_instance_t *scf_inst = NULL;
1599 
1600         assert(MUTEX_HELD(&rip->ri_lock));
1601         assert(reason != restarter_str_none);
1602         assert(rip->ri_method_thread == 0);
1603 
1604         log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.",
1605             restarter_get_str_short(reason));
1606         log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1607             rip->ri_i.i_fmri, restarter_get_str_short(reason));
1608 
1609         /* Services in the maintenance state are ignored */
1610         if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
1611                 log_framework(LOG_DEBUG,
1612                     "%s: maintain_instance -> is already in maintenance\n",
1613                     rip->ri_i.i_fmri);
1614                 return;
1615         }
1616 
1617         /*
1618          * If reason state is restarter_str_service_request and
1619          * restarter_actions/auxiliary_fmri property is set with a valid fmri,
1620          * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
1621          */
1622         if (reason == restarter_str_service_request &&
1623             libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &scf_inst) == 0) {
1624                 if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
1625                         if (restarter_inst_set_aux_fmri(scf_inst))
1626                                 log_framework(LOG_DEBUG, "%s: "
1627                                     "restarter_inst_set_aux_fmri failed: ",
1628                                     rip->ri_i.i_fmri);
1629                 } else {
1630                         log_framework(LOG_DEBUG, "%s: "
1631                             "restarter_inst_validate_ractions_aux_fmri "
1632                             "failed: ", rip->ri_i.i_fmri);
1633 
1634                         if (restarter_inst_reset_aux_fmri(scf_inst))
1635                                 log_framework(LOG_DEBUG, "%s: "
1636                                     "restarter_inst_reset_aux_fmri failed: ",
1637                                     rip->ri_i.i_fmri);
1638                 }
1639                 scf_instance_destroy(scf_inst);
1640         }
1641 
1642         if (immediate || !instance_started(rip)) {
1643                 if (rip->ri_i.i_primary_ctid != 0) {
1644                         rip->ri_m_inst = safe_scf_instance_create(h);
1645                         rip->ri_mi_deleted = B_FALSE;
1646 
1647                         libscf_reget_instance(rip);
1648                         method_remove_contract(rip, B_TRUE, B_TRUE);
1649 
1650                         scf_instance_destroy(rip->ri_m_inst);
1651                 }
1652 
1653                 (void) restarter_instance_update_states(h, rip,
1654                     RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1655                     reason);
1656                 return;
1657         }
1658 
1659         (void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1660             RESTARTER_STATE_MAINT, RERR_NONE, reason);
1661 
1662         log_transition(rip, MAINT_REQUESTED);
1663 
1664         info = startd_zalloc(sizeof (*info));
1665         info->sf_id = rip->ri_id;
1666         info->sf_method_type = METHOD_STOP;
1667         info->sf_event_type = RERR_RESTART;
1668         info->sf_reason = reason;
1669         rip->ri_method_thread = startd_thread_create(method_thread, info);
1670 }
1671 
1672 static void
1673 refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
1674 {
1675         scf_instance_t *inst;
1676         scf_snapshot_t *snap;
1677         fork_info_t *info;
1678         int r;
1679 
1680         assert(MUTEX_HELD(&rip->ri_lock));
1681 
1682         log_instance(rip, B_TRUE, "Rereading configuration.");
1683         log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
1684             rip->ri_i.i_fmri);
1685 
1686 rep_retry:
1687         r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
1688         switch (r) {
1689         case 0:
1690                 break;
1691 
1692         case ECONNABORTED:
1693                 libscf_handle_rebind(h);
1694                 goto rep_retry;
1695 
1696         case ENOENT:
1697                 /* Must have been deleted. */
1698                 return;
1699 
1700         case EINVAL:
1701         case ENOTSUP:
1702         default:
1703                 bad_error("libscf_fmri_get_instance", r);
1704         }
1705 
1706         snap = libscf_get_running_snapshot(inst);
1707 
1708         r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
1709             &rip->ri_utmpx_prefix);
1710         switch (r) {
1711         case 0:
1712                 log_framework(LOG_DEBUG, "%s is a %s-style service\n",
1713                     rip->ri_i.i_fmri, service_style(rip->ri_flags));
1714                 break;
1715 
1716         case ECONNABORTED:
1717                 scf_instance_destroy(inst);
1718                 scf_snapshot_destroy(snap);
1719                 libscf_handle_rebind(h);
1720                 goto rep_retry;
1721 
1722         case ECANCELED:
1723         case ENOENT:
1724                 /* Succeed in anticipation of REMOVE_INSTANCE. */
1725                 break;
1726 
1727         default:
1728                 bad_error("libscf_get_startd_properties", r);
1729         }
1730 
1731         if (instance_started(rip)) {
1732                 /* Refresh does not change the state. */
1733                 (void) restarter_instance_update_states(h, rip,
1734                     rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE,
1735                     restarter_str_refresh);
1736 
1737                 info = startd_zalloc(sizeof (*info));
1738                 info->sf_id = rip->ri_id;
1739                 info->sf_method_type = METHOD_REFRESH;
1740                 info->sf_event_type = RERR_REFRESH;
1741                 info->sf_reason = 0;
1742 
1743                 assert(rip->ri_method_thread == 0);
1744                 rip->ri_method_thread =
1745                     startd_thread_create(method_thread, info);
1746         }
1747 
1748         scf_snapshot_destroy(snap);
1749         scf_instance_destroy(inst);
1750 }
1751 
1752 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1753         "ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1754         "ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1755         "ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1756         "INVALID_DEPENDENCY", "ADMIN_DISABLE", "STOP_RESET"
1757 };
1758 
1759 /*
1760  * void *restarter_process_events()
1761  *
1762  *   Called in a separate thread to process the events on an instance's
1763  *   queue.  Empties the queue completely, and tries to keep the thread
1764  *   around for a little while after the queue is empty to save on
1765  *   startup costs.
1766  */
1767 static void *
1768 restarter_process_events(void *arg)
1769 {
1770         scf_handle_t *h;
1771         restarter_instance_qentry_t *event;
1772         restarter_inst_t *rip;
1773         char *fmri = (char *)arg;
1774         struct timespec to;
1775 
1776         (void) pthread_setname_np(pthread_self(), "restarter_process_events");
1777 
1778         assert(fmri != NULL);
1779 
1780         h = libscf_handle_create_bound_loop();
1781 
1782         /* grab the queue lock */
1783         rip = inst_lookup_queue(fmri);
1784         if (rip == NULL)
1785                 goto out;
1786 
1787 again:
1788 
1789         while ((event = uu_list_first(rip->ri_queue)) != NULL) {
1790                 restarter_inst_t *inst;
1791 
1792                 /* drop the queue lock */
1793                 MUTEX_UNLOCK(&rip->ri_queue_lock);
1794 
1795                 /*
1796                  * Grab the inst lock -- this waits until any outstanding
1797                  * method finishes running.
1798                  */
1799                 inst = inst_lookup_by_name(fmri);
1800                 if (inst == NULL) {
1801                         /* Getting deleted in the middle isn't an error. */
1802                         goto cont;
1803                 }
1804 
1805                 assert(instance_in_transition(inst) == 0);
1806 
1807                 /* process the event */
1808                 switch (event->riq_type) {
1809                 case RESTARTER_EVENT_TYPE_ENABLE:
1810                 case RESTARTER_EVENT_TYPE_DISABLE:
1811                         (void) enable_inst(h, inst, event);
1812                         break;
1813 
1814                 case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1815                         if (enable_inst(h, inst, event) == 0)
1816                                 reset_start_times(inst);
1817                         break;
1818 
1819                 case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1820                         restarter_delete_inst(inst);
1821                         inst = NULL;
1822                         goto cont;
1823 
1824                 case RESTARTER_EVENT_TYPE_STOP_RESET:
1825                         reset_start_times(inst);
1826                         /* FALLTHROUGH */
1827                 case RESTARTER_EVENT_TYPE_STOP:
1828                         (void) stop_instance(h, inst, RSTOP_DEPENDENCY);
1829                         break;
1830 
1831                 case RESTARTER_EVENT_TYPE_START:
1832                         start_instance(h, inst, event->riq_reason);
1833                         break;
1834 
1835                 case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1836                         maintain_instance(h, inst, 0,
1837                             restarter_str_dependency_cycle);
1838                         break;
1839 
1840                 case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1841                         maintain_instance(h, inst, 0,
1842                             restarter_str_invalid_dependency);
1843                         break;
1844 
1845                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1846                         if (event_from_tty(h, inst) == 0)
1847                                 maintain_instance(h, inst, 0,
1848                                     restarter_str_service_request);
1849                         else
1850                                 maintain_instance(h, inst, 0,
1851                                     restarter_str_administrative_request);
1852                         break;
1853 
1854                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1855                         if (event_from_tty(h, inst) == 0)
1856                                 maintain_instance(h, inst, 1,
1857                                     restarter_str_service_request);
1858                         else
1859                                 maintain_instance(h, inst, 1,
1860                                     restarter_str_administrative_request);
1861                         break;
1862 
1863                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1864                         unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1865                         reset_start_times(inst);
1866                         break;
1867 
1868                 case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1869                         refresh_instance(h, inst);
1870                         break;
1871 
1872                 case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1873                         log_framework(LOG_WARNING, "Restarter: "
1874                             "%s command (for %s) unimplemented.\n",
1875                             event_names[event->riq_type], inst->ri_i.i_fmri);
1876                         break;
1877 
1878                 case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1879                         if (!instance_started(inst)) {
1880                                 log_framework(LOG_DEBUG, "Restarter: "
1881                                     "Not restarting %s; not running.\n",
1882                                     inst->ri_i.i_fmri);
1883                         } else {
1884                                 /*
1885                                  * Stop the instance.  If it can be restarted,
1886                                  * the graph engine will send a new event.
1887                                  */
1888                                 if (restart_dump(h, inst)) {
1889                                         (void) contract_kill(
1890                                             inst->ri_i.i_primary_ctid, SIGABRT,
1891                                             inst->ri_i.i_fmri);
1892                                 } else if (stop_instance(h, inst,
1893                                     RSTOP_RESTART) == 0) {
1894                                         reset_start_times(inst);
1895                                 }
1896                         }
1897                         break;
1898 
1899                 case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1900                 default:
1901 #ifndef NDEBUG
1902                         uu_warn("%s:%d: Bad restarter event %d.  "
1903                             "Aborting.\n", __FILE__, __LINE__, event->riq_type);
1904 #endif
1905                         abort();
1906                 }
1907 
1908                 assert(inst != NULL);
1909                 MUTEX_UNLOCK(&inst->ri_lock);
1910 
1911 cont:
1912                 /* grab the queue lock */
1913                 rip = inst_lookup_queue(fmri);
1914                 if (rip == NULL)
1915                         goto out;
1916 
1917                 /* delete the event */
1918                 uu_list_remove(rip->ri_queue, event);
1919                 startd_free(event, sizeof (restarter_instance_qentry_t));
1920         }
1921 
1922         assert(rip != NULL);
1923 
1924         /*
1925          * Try to preserve the thread for a little while for future use.
1926          */
1927         to.tv_sec = 3;
1928         to.tv_nsec = 0;
1929         (void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
1930             &rip->ri_queue_lock, &to);
1931 
1932         if (uu_list_first(rip->ri_queue) != NULL)
1933                 goto again;
1934 
1935         rip->ri_queue_thread = 0;
1936         MUTEX_UNLOCK(&rip->ri_queue_lock);
1937 
1938 out:
1939         (void) scf_handle_unbind(h);
1940         scf_handle_destroy(h);
1941         free(fmri);
1942         return (NULL);
1943 }
1944 
1945 static int
1946 is_admin_event(restarter_event_type_t t)
1947 {
1948         switch (t) {
1949         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1950         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1951         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1952         case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1953         case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1954         case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1955                 return (1);
1956         default:
1957                 return (0);
1958         }
1959 }
1960 
1961 static void
1962 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
1963 {
1964         restarter_instance_qentry_t *qe;
1965         int r;
1966 
1967         assert(MUTEX_HELD(&ri->ri_queue_lock));
1968         assert(!MUTEX_HELD(&ri->ri_lock));
1969 
1970         qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
1971         qe->riq_type = e->rpe_type;
1972         qe->riq_reason = e->rpe_reason;
1973 
1974         uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
1975         r = uu_list_insert_before(ri->ri_queue, NULL, qe);
1976         assert(r == 0);
1977 }
1978 
1979 /*
1980  * void *restarter_event_thread()
1981  *
1982  *  Handle incoming graph events by placing them on a per-instance
1983  *  queue.  We can't lock the main part of the instance structure, so
1984  *  just modify the seprarately locked event queue portion.
1985  */
1986 /*ARGSUSED*/
1987 static void *
1988 restarter_event_thread(void *unused)
1989 {
1990         scf_handle_t *h;
1991 
1992         (void) pthread_setname_np(pthread_self(), "restarter_event");
1993 
1994         /*
1995          * This is a new thread, and thus, gets its own handle
1996          * to the repository.
1997          */
1998         h = libscf_handle_create_bound_loop();
1999 
2000         MUTEX_LOCK(&ru->restarter_update_lock);
2001 
2002         /*CONSTCOND*/
2003         while (1) {
2004                 restarter_protocol_event_t *e;
2005 
2006                 while (ru->restarter_update_wakeup == 0)
2007                         (void) pthread_cond_wait(&ru->restarter_update_cv,
2008                             &ru->restarter_update_lock);
2009 
2010                 ru->restarter_update_wakeup = 0;
2011 
2012                 while ((e = restarter_event_dequeue()) != NULL) {
2013                         restarter_inst_t *rip;
2014                         char *fmri;
2015 
2016                         MUTEX_UNLOCK(&ru->restarter_update_lock);
2017 
2018                         /*
2019                          * ADD_INSTANCE is special: there's likely no
2020                          * instance structure yet, so we need to handle the
2021                          * addition synchronously.
2022                          */
2023                         switch (e->rpe_type) {
2024                         case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
2025                                 if (restarter_insert_inst(h, e->rpe_inst) != 0)
2026                                         log_error(LOG_INFO, "Restarter: "
2027                                             "Could not add %s.\n", e->rpe_inst);
2028 
2029                                 MUTEX_LOCK(&st->st_load_lock);
2030                                 if (--st->st_load_instances == 0)
2031                                         (void) pthread_cond_broadcast(
2032                                             &st->st_load_cv);
2033                                 MUTEX_UNLOCK(&st->st_load_lock);
2034 
2035                                 goto nolookup;
2036                         }
2037 
2038                         /*
2039                          * Lookup the instance, locking only the event queue.
2040                          * Can't grab ri_lock here because it might be held
2041                          * by a long-running method.
2042                          */
2043                         rip = inst_lookup_queue(e->rpe_inst);
2044                         if (rip == NULL) {
2045                                 log_error(LOG_INFO, "Restarter: "
2046                                     "Ignoring %s command for unknown service "
2047                                     "%s.\n", event_names[e->rpe_type],
2048                                     e->rpe_inst);
2049                                 goto nolookup;
2050                         }
2051 
2052                         /* Keep ADMIN events from filling up the queue. */
2053                         if (is_admin_event(e->rpe_type) &&
2054                             uu_list_numnodes(rip->ri_queue) >
2055                             RINST_QUEUE_THRESHOLD) {
2056                                 MUTEX_UNLOCK(&rip->ri_queue_lock);
2057                                 log_instance(rip, B_TRUE, "Instance event "
2058                                     "queue overflow.  Dropping administrative "
2059                                     "request.");
2060                                 log_framework(LOG_DEBUG, "%s: Instance event "
2061                                     "queue overflow.  Dropping administrative "
2062                                     "request.\n", rip->ri_i.i_fmri);
2063                                 goto nolookup;
2064                         }
2065 
2066                         /* Now add the event to the instance queue. */
2067                         restarter_queue_event(rip, e);
2068 
2069                         if (rip->ri_queue_thread == 0) {
2070                                 /*
2071                                  * Start a thread if one isn't already
2072                                  * running.
2073                                  */
2074                                 fmri = safe_strdup(e->rpe_inst);
2075                                 rip->ri_queue_thread =  startd_thread_create(
2076                                     restarter_process_events, (void *)fmri);
2077                         } else {
2078                                 /*
2079                                  * Signal the existing thread that there's
2080                                  * a new event.
2081                                  */
2082                                 (void) pthread_cond_broadcast(
2083                                     &rip->ri_queue_cv);
2084                         }
2085 
2086                         MUTEX_UNLOCK(&rip->ri_queue_lock);
2087 nolookup:
2088                         restarter_event_release(e);
2089 
2090                         MUTEX_LOCK(&ru->restarter_update_lock);
2091                 }
2092         }
2093 }
2094 
2095 static restarter_inst_t *
2096 contract_to_inst(ctid_t ctid)
2097 {
2098         restarter_inst_t *inst;
2099         int id;
2100 
2101         id = lookup_inst_by_contract(ctid);
2102         if (id == -1)
2103                 return (NULL);
2104 
2105         inst = inst_lookup_by_id(id);
2106         if (inst != NULL) {
2107                 /*
2108                  * Since ri_lock isn't held by the contract id lookup, this
2109                  * instance may have been restarted and now be in a new
2110                  * contract, making the old contract no longer valid for this
2111                  * instance.
2112                  */
2113                 if (ctid != inst->ri_i.i_primary_ctid) {
2114                         MUTEX_UNLOCK(&inst->ri_lock);
2115                         inst = NULL;
2116                 }
2117         }
2118         return (inst);
2119 }
2120 
2121 /*
2122  * void contract_action()
2123  *   Take action on contract events.
2124  */
2125 static void
2126 contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
2127     uint32_t type)
2128 {
2129         const char *fmri = inst->ri_i.i_fmri;
2130 
2131         assert(MUTEX_HELD(&inst->ri_lock));
2132 
2133         /*
2134          * If startd has stopped this contract, there is no need to
2135          * stop it again.
2136          */
2137         if (inst->ri_i.i_primary_ctid > 0 &&
2138             inst->ri_i.i_primary_ctid_stopped)
2139                 return;
2140 
2141         if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
2142             | CT_PR_EV_HWERR)) == 0) {
2143                 /*
2144                  * There shouldn't be other events, since that's not how we set
2145                  * the terms. Thus, just log an error and drive on.
2146                  */
2147                 log_framework(LOG_NOTICE,
2148                     "%s: contract %ld received unexpected critical event "
2149                     "(%d)\n", fmri, id, type);
2150                 return;
2151         }
2152 
2153         assert(instance_in_transition(inst) == 0);
2154 
2155         if (instance_is_wait_style(inst)) {
2156                 /*
2157                  * We ignore all events; if they impact the
2158                  * process we're monitoring, then the
2159                  * wait_thread will stop the instance.
2160                  */
2161                 log_framework(LOG_DEBUG,
2162                     "%s: ignoring contract event on wait-style service\n",
2163                     fmri);
2164         } else {
2165                 /*
2166                  * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
2167                  */
2168                 switch (type) {
2169                 case CT_PR_EV_EMPTY:
2170                         (void) stop_instance(h, inst, RSTOP_EXIT);
2171                         break;
2172                 case CT_PR_EV_CORE:
2173                         (void) stop_instance(h, inst, RSTOP_CORE);
2174                         break;
2175                 case CT_PR_EV_SIGNAL:
2176                         (void) stop_instance(h, inst, RSTOP_SIGNAL);
2177                         break;
2178                 case CT_PR_EV_HWERR:
2179                         (void) stop_instance(h, inst, RSTOP_HWERR);
2180                         break;
2181                 }
2182         }
2183 }
2184 
2185 /*
2186  * void *restarter_contract_event_thread(void *)
2187  *   Listens to the process contract bundle for critical events, taking action
2188  *   on events from contracts we know we are responsible for.
2189  */
2190 /*ARGSUSED*/
2191 static void *
2192 restarter_contracts_event_thread(void *unused)
2193 {
2194         int fd, err;
2195         scf_handle_t *local_handle;
2196 
2197         (void) pthread_setname_np(pthread_self(), "restarter_contracts_event");
2198 
2199         /*
2200          * Await graph load completion.  That is, stop here, until we've scanned
2201          * the repository for contract - instance associations.
2202          */
2203         MUTEX_LOCK(&st->st_load_lock);
2204         while (!(st->st_load_complete && st->st_load_instances == 0))
2205                 (void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
2206         MUTEX_UNLOCK(&st->st_load_lock);
2207 
2208         /*
2209          * This is a new thread, and thus, gets its own handle
2210          * to the repository.
2211          */
2212         if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
2213                 uu_die("Unable to bind a new repository handle: %s\n",
2214                     scf_strerror(scf_error()));
2215 
2216         fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
2217         if (fd == -1)
2218                 uu_die("process bundle open failed");
2219 
2220         /*
2221          * Make sure we get all events (including those generated by configd
2222          * before this thread was started).
2223          */
2224         err = ct_event_reset(fd);
2225         assert(err == 0);
2226 
2227         for (;;) {
2228                 int efd, sfd;
2229                 ct_evthdl_t ev;
2230                 uint32_t type;
2231                 ctevid_t evid;
2232                 ct_stathdl_t status;
2233                 ctid_t ctid;
2234                 restarter_inst_t *inst;
2235                 uint64_t cookie;
2236 
2237                 if (err = ct_event_read_critical(fd, &ev)) {
2238                         log_error(LOG_WARNING,
2239                             "Error reading next contract event: %s",
2240                             strerror(err));
2241                         continue;
2242                 }
2243 
2244                 evid = ct_event_get_evid(ev);
2245                 ctid = ct_event_get_ctid(ev);
2246                 type = ct_event_get_type(ev);
2247 
2248                 /* Fetch cookie. */
2249                 if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
2250                     < 0) {
2251                         ct_event_free(ev);
2252                         continue;
2253                 }
2254 
2255                 if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
2256                         log_framework(LOG_WARNING, "Could not get status for "
2257                             "contract %ld: %s\n", ctid, strerror(err));
2258 
2259                         startd_close(sfd);
2260                         ct_event_free(ev);
2261                         continue;
2262                 }
2263 
2264                 cookie = ct_status_get_cookie(status);
2265 
2266                 log_framework(LOG_DEBUG, "Received event %d for ctid %ld "
2267                     "cookie %lld\n", type, ctid, cookie);
2268 
2269                 ct_status_free(status);
2270 
2271                 startd_close(sfd);
2272 
2273                 /*
2274                  * svc.configd(1M) restart handling performed by the
2275                  * fork_configd_thread.  We don't acknowledge, as that thread
2276                  * will do so.
2277                  */
2278                 if (cookie == CONFIGD_COOKIE) {
2279                         ct_event_free(ev);
2280                         continue;
2281                 }
2282 
2283                 inst = NULL;
2284                 if (storing_contract != 0 &&
2285                     (inst = contract_to_inst(ctid)) == NULL) {
2286                         /*
2287                          * This can happen for two reasons:
2288                          * - method_run() has not yet stored the
2289                          *    the contract into the internal hash table.
2290                          * - we receive an EMPTY event for an abandoned
2291                          *    contract.
2292                          * If there is any contract in the process of
2293                          * being stored into the hash table then re-read
2294                          * the event later.
2295                          */
2296                         log_framework(LOG_DEBUG,
2297                             "Reset event %d for unknown "
2298                             "contract id %ld\n", type, ctid);
2299 
2300                         /* don't go too fast */
2301                         (void) poll(NULL, 0, 100);
2302 
2303                         (void) ct_event_reset(fd);
2304                         ct_event_free(ev);
2305                         continue;
2306                 }
2307 
2308                 /*
2309                  * Do not call contract_to_inst() again if first
2310                  * call succeeded.
2311                  */
2312                 if (inst == NULL)
2313                         inst = contract_to_inst(ctid);
2314                 if (inst == NULL) {
2315                         /*
2316                          * This can happen if we receive an EMPTY
2317                          * event for an abandoned contract.
2318                          */
2319                         log_framework(LOG_DEBUG,
2320                             "Received event %d for unknown contract id "
2321                             "%ld\n", type, ctid);
2322                 } else {
2323                         log_framework(LOG_DEBUG,
2324                             "Received event %d for contract id "
2325                             "%ld (%s)\n", type, ctid,
2326                             inst->ri_i.i_fmri);
2327 
2328                         contract_action(local_handle, inst, ctid, type);
2329 
2330                         MUTEX_UNLOCK(&inst->ri_lock);
2331                 }
2332 
2333                 efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
2334                     O_WRONLY);
2335                 if (efd != -1) {
2336                         (void) ct_ctl_ack(efd, evid);
2337                         startd_close(efd);
2338                 }
2339 
2340                 ct_event_free(ev);
2341 
2342         }
2343 
2344         /*NOTREACHED*/
2345         return (NULL);
2346 }
2347 
2348 /*
2349  * Timeout queue, processed by restarter_timeouts_event_thread().
2350  */
2351 timeout_queue_t *timeouts;
2352 static uu_list_pool_t *timeout_pool;
2353 
2354 typedef struct timeout_update {
2355         pthread_mutex_t         tu_lock;
2356         pthread_cond_t          tu_cv;
2357         int                     tu_wakeup;
2358 } timeout_update_t;
2359 
2360 timeout_update_t *tu;
2361 
2362 static const char *timeout_ovr_svcs[] = {
2363         "svc:/system/manifest-import:default",
2364         "svc:/network/initial:default",
2365         "svc:/network/service:default",
2366         "svc:/system/rmtmpfiles:default",
2367         "svc:/network/loopback:default",
2368         "svc:/network/physical:default",
2369         "svc:/system/device/local:default",
2370         "svc:/system/filesystem/usr:default",
2371         "svc:/system/filesystem/minimal:default",
2372         "svc:/system/filesystem/local:default",
2373         NULL
2374 };
2375 
2376 int
2377 is_timeout_ovr(restarter_inst_t *inst)
2378 {
2379         int i;
2380 
2381         for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
2382                 if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
2383                         log_instance(inst, B_TRUE, "Timeout override by "
2384                             "svc.startd.  Using infinite timeout.");
2385                         return (1);
2386                 }
2387         }
2388 
2389         return (0);
2390 }
2391 
2392 /*ARGSUSED*/
2393 static int
2394 timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
2395 {
2396         hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
2397         hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
2398 
2399         if (t1 > t2)
2400                 return (1);
2401         else if (t1 < t2)
2402                 return (-1);
2403         return (0);
2404 }
2405 
2406 void
2407 timeout_init()
2408 {
2409         timeouts = startd_zalloc(sizeof (timeout_queue_t));
2410 
2411         (void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
2412 
2413         timeout_pool = startd_list_pool_create("timeouts",
2414             sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
2415             timeout_compare, UU_LIST_POOL_DEBUG);
2416         assert(timeout_pool != NULL);
2417 
2418         timeouts->tq_list = startd_list_create(timeout_pool,
2419             timeouts, UU_LIST_SORTED);
2420         assert(timeouts->tq_list != NULL);
2421 
2422         tu = startd_zalloc(sizeof (timeout_update_t));
2423         (void) pthread_cond_init(&tu->tu_cv, NULL);
2424         (void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
2425 }
2426 
2427 void
2428 timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
2429 {
2430         hrtime_t now, timeout;
2431         timeout_entry_t *entry;
2432         uu_list_index_t idx;
2433 
2434         assert(MUTEX_HELD(&inst->ri_lock));
2435 
2436         now = gethrtime();
2437 
2438         /*
2439          * If we overflow LLONG_MAX, we're never timing out anyways, so
2440          * just return.
2441          */
2442         if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
2443                 log_instance(inst, B_TRUE, "timeout_seconds too large, "
2444                     "treating as infinite.");
2445                 return;
2446         }
2447 
2448         /* hrtime is in nanoseconds. Convert timeout_sec. */
2449         timeout = now + (timeout_sec * 1000000000LL);
2450 
2451         entry = startd_alloc(sizeof (timeout_entry_t));
2452         entry->te_timeout = timeout;
2453         entry->te_ctid = cid;
2454         entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
2455         entry->te_logstem = safe_strdup(inst->ri_logstem);
2456         entry->te_fired = 0;
2457         /* Insert the calculated timeout time onto the queue. */
2458         MUTEX_LOCK(&timeouts->tq_lock);
2459         (void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
2460         uu_list_node_init(entry, &entry->te_link, timeout_pool);
2461         uu_list_insert(timeouts->tq_list, entry, idx);
2462         MUTEX_UNLOCK(&timeouts->tq_lock);
2463 
2464         assert(inst->ri_timeout == NULL);
2465         inst->ri_timeout = entry;
2466 
2467         MUTEX_LOCK(&tu->tu_lock);
2468         tu->tu_wakeup = 1;
2469         (void) pthread_cond_broadcast(&tu->tu_cv);
2470         MUTEX_UNLOCK(&tu->tu_lock);
2471 }
2472 
2473 
2474 void
2475 timeout_remove(restarter_inst_t *inst, ctid_t cid)
2476 {
2477         assert(MUTEX_HELD(&inst->ri_lock));
2478 
2479         if (inst->ri_timeout == NULL)
2480                 return;
2481 
2482         assert(inst->ri_timeout->te_ctid == cid);
2483 
2484         MUTEX_LOCK(&timeouts->tq_lock);
2485         uu_list_remove(timeouts->tq_list, inst->ri_timeout);
2486         MUTEX_UNLOCK(&timeouts->tq_lock);
2487 
2488         free(inst->ri_timeout->te_fmri);
2489         free(inst->ri_timeout->te_logstem);
2490         startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
2491         inst->ri_timeout = NULL;
2492 }
2493 
2494 static int
2495 timeout_now()
2496 {
2497         timeout_entry_t *e;
2498         hrtime_t now;
2499         int ret;
2500 
2501         now = gethrtime();
2502 
2503         /*
2504          * Walk through the (sorted) timeouts list.  While the timeout
2505          * at the head of the list is <= the current time, kill the
2506          * method.
2507          */
2508         MUTEX_LOCK(&timeouts->tq_lock);
2509 
2510         for (e = uu_list_first(timeouts->tq_list);
2511             e != NULL && e->te_timeout <= now;
2512             e = uu_list_next(timeouts->tq_list, e)) {
2513                 log_framework(LOG_WARNING, "%s: Method or service exit timed "
2514                     "out.  Killing contract %ld.\n", e->te_fmri, e->te_ctid);
2515                 log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
2516                     "Method or service exit timed out.  Killing contract %ld.",
2517                     e->te_ctid);
2518                 e->te_fired = 1;
2519                 (void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
2520         }
2521 
2522         if (uu_list_numnodes(timeouts->tq_list) > 0)
2523                 ret = 0;
2524         else
2525                 ret = -1;
2526 
2527         MUTEX_UNLOCK(&timeouts->tq_lock);
2528 
2529         return (ret);
2530 }
2531 
2532 /*
2533  * void *restarter_timeouts_event_thread(void *)
2534  *   Responsible for monitoring the method timeouts.  This thread must
2535  *   be started before any methods are called.
2536  */
2537 /*ARGSUSED*/
2538 static void *
2539 restarter_timeouts_event_thread(void *unused)
2540 {
2541         /*
2542          * Timeouts are entered on a priority queue, which is processed by
2543          * this thread.  As timeouts are specified in seconds, we'll do
2544          * the necessary processing every second, as long as the queue
2545          * is not empty.
2546          */
2547 
2548         (void) pthread_setname_np(pthread_self(), "restarter_timeouts_event");
2549 
2550         /*CONSTCOND*/
2551         while (1) {
2552                 /*
2553                  * As long as the timeout list isn't empty, process it
2554                  * every second.
2555                  */
2556                 if (timeout_now() == 0) {
2557                         (void) sleep(1);
2558                         continue;
2559                 }
2560 
2561                 /* The list is empty, wait until we have more timeouts. */
2562                 MUTEX_LOCK(&tu->tu_lock);
2563 
2564                 while (tu->tu_wakeup == 0)
2565                         (void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
2566 
2567                 tu->tu_wakeup = 0;
2568                 MUTEX_UNLOCK(&tu->tu_lock);
2569         }
2570 
2571         return (NULL);
2572 }
2573 
2574 void
2575 restarter_start()
2576 {
2577         (void) startd_thread_create(restarter_timeouts_event_thread, NULL);
2578         (void) startd_thread_create(restarter_event_thread, NULL);
2579         (void) startd_thread_create(restarter_contracts_event_thread, NULL);
2580         (void) startd_thread_create(wait_thread, NULL);
2581 }
2582 
2583 
2584 void
2585 restarter_init()
2586 {
2587         restarter_instance_pool = startd_list_pool_create("restarter_instances",
2588             sizeof (restarter_inst_t), offsetof(restarter_inst_t,
2589             ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
2590         (void) memset(&instance_list, 0, sizeof (instance_list));
2591 
2592         (void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
2593         instance_list.ril_instance_list = startd_list_create(
2594             restarter_instance_pool, &instance_list, UU_LIST_SORTED);
2595 
2596         restarter_queue_pool = startd_list_pool_create(
2597             "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
2598             offsetof(restarter_instance_qentry_t,  riq_link), NULL,
2599             UU_LIST_POOL_DEBUG);
2600 
2601         contract_list_pool = startd_list_pool_create(
2602             "contract_list", sizeof (contract_entry_t),
2603             offsetof(contract_entry_t,  ce_link), NULL,
2604             UU_LIST_POOL_DEBUG);
2605         contract_hash_init();
2606 
2607         log_framework(LOG_DEBUG, "Initialized restarter\n");
2608 }