illumos-gate Old usr/src/cmd/svc/startd/restarter.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  25  */
  26 
  27 /*
  28  * restarter.c - service manipulation
  29  *
  30  * This component manages services whose restarter is svc.startd, the standard
  31  * restarter.  It translates restarter protocol events from the graph engine
  32  * into actions on processes, as a delegated restarter would do.
  33  *
  34  * The master restarter manages a number of always-running threads:
  35  *   - restarter event thread: events from the graph engine
  36  *   - timeout thread: thread to fire queued timeouts
  37  *   - contract thread: thread to handle contract events
  38  *   - wait thread: thread to handle wait-based services
  39  *
  40  * The other threads are created as-needed:
  41  *   - per-instance method threads
  42  *   - per-instance event processing threads
  43  *
  44  * The interaction of all threads must result in the following conditions
  45  * being satisfied (on a per-instance basis):
  46  *   - restarter events must be processed in order
  47  *   - method execution must be serialized
  48  *   - instance delete must be held until outstanding methods are complete
  49  *   - contract events shouldn't be processed while a method is running
  50  *   - timeouts should fire even when a method is running
  51  *
  52  * Service instances are represented by restarter_inst_t's and are kept in the
  53  * instance_list list.
  54  *
  55  * Service States
  56  *   The current state of a service instance is kept in
  57  *   restarter_inst_t->ri_i.i_state.  If transition to a new state could take
  58  *   some time, then before we effect the transition we set
  59  *   restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
  60  *   rotate i_next_state to i_state and set i_next_state to
  61  *   RESTARTER_STATE_NONE.  So usually i_next_state is _NONE when ri_lock is not
  62  *   held.  The exception is when we launch methods, which are done with
  63  *   a separate thread.  To keep any other threads from grabbing ri_lock before
  64  *   method_thread() does, we set ri_method_thread to the thread id of the
  65  *   method thread, and when it is nonzero any thread with a different thread id
  66  *   waits on ri_method_cv.
  67  *
  68  * Method execution is serialized by blocking on ri_method_cv in
  69  * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread.  This
  70  * also prevents the instance structure from being deleted until all
  71  * outstanding operations such as method_thread() have finished.
  72  *
  73  * Lock ordering:
  74  *
  75  * dgraph_lock [can be held when taking:]
  76  *   utmpx_lock
  77  *   dictionary->dict_lock
  78  *   st->st_load_lock
  79  *   wait_info_lock
  80  *   ru->restarter_update_lock
  81  *     restarter_queue->rpeq_lock
  82  *   instance_list.ril_lock
  83  *     inst->ri_lock
  84  *   st->st_configd_live_lock
  85  *
  86  * instance_list.ril_lock
  87  *   graph_queue->gpeq_lock
  88  *   gu->gu_lock
  89  *   st->st_configd_live_lock
  90  *   dictionary->dict_lock
  91  *   inst->ri_lock
  92  *     graph_queue->gpeq_lock
  93  *     gu->gu_lock
  94  *     tu->tu_lock
  95  *     tq->tq_lock
  96  *     inst->ri_queue_lock
  97  *       wait_info_lock
  98  *       bp->cb_lock
  99  *     utmpx_lock
 100  *
 101  * single_user_thread_lock
 102  *   wait_info_lock
 103  *   utmpx_lock
 104  *
 105  * gu_freeze_lock
 106  *
 107  * logbuf_mutex nests inside pretty much everything.
 108  */
 109 
 110 #include <sys/contract/process.h>
 111 #include <sys/ctfs.h>
 112 #include <sys/stat.h>
 113 #include <sys/time.h>
 114 #include <sys/types.h>
 115 #include <sys/uio.h>
 116 #include <sys/wait.h>
 117 #include <assert.h>
 118 #include <errno.h>
 119 #include <fcntl.h>
 120 #include <libcontract.h>
 121 #include <libcontract_priv.h>
 122 #include <libintl.h>
 123 #include <librestart.h>
 124 #include <librestart_priv.h>
 125 #include <libuutil.h>
 126 #include <limits.h>
 127 #include <poll.h>
 128 #include <port.h>
 129 #include <pthread.h>
 130 #include <stdarg.h>
 131 #include <stdio.h>
 132 #include <strings.h>
 133 #include <unistd.h>
 134 
 135 #include "startd.h"
 136 #include "protocol.h"
 137 
 138 static uu_list_pool_t *restarter_instance_pool;
 139 static restarter_instance_list_t instance_list;
 140 
 141 static uu_list_pool_t *restarter_queue_pool;
 142 
 143 #define WT_SVC_ERR_THROTTLE     1       /* 1 sec delay for erroring wait svc */
 144 
 145 /*
 146  * Function used to reset the restart times for an instance, when
 147  * an administrative task comes along and essentially makes the times
 148  * in this array ineffective.
 149  */
 150 static void
 151 reset_start_times(restarter_inst_t *inst)
 152 {
 153         inst->ri_start_index = 0;
 154         bzero(inst->ri_start_time, sizeof (inst->ri_start_time));
 155 }
 156 
 157 /*ARGSUSED*/
 158 static int
 159 restarter_instance_compare(const void *lc_arg, const void *rc_arg,
 160     void *private)
 161 {
 162         int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
 163         int rc_id = *(int *)rc_arg;
 164 
 165         if (lc_id > rc_id)
 166                 return (1);
 167         if (lc_id < rc_id)
 168                 return (-1);
 169         return (0);
 170 }
 171 
 172 static restarter_inst_t *
 173 inst_lookup_by_name(const char *name)
 174 {
 175         int id;
 176 
 177         id = dict_lookup_byname(name);
 178         if (id == -1)
 179                 return (NULL);
 180 
 181         return (inst_lookup_by_id(id));
 182 }
 183 
 184 restarter_inst_t *
 185 inst_lookup_by_id(int id)
 186 {
 187         restarter_inst_t *inst;
 188 
 189         MUTEX_LOCK(&instance_list.ril_lock);
 190         inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
 191         if (inst != NULL)
 192                 MUTEX_LOCK(&inst->ri_lock);
 193         MUTEX_UNLOCK(&instance_list.ril_lock);
 194 
 195         if (inst != NULL) {
 196                 while (inst->ri_method_thread != 0 &&
 197                     !pthread_equal(inst->ri_method_thread, pthread_self())) {
 198                         ++inst->ri_method_waiters;
 199                         (void) pthread_cond_wait(&inst->ri_method_cv,
 200                             &inst->ri_lock);
 201                         assert(inst->ri_method_waiters > 0);
 202                         --inst->ri_method_waiters;
 203                 }
 204         }
 205 
 206         return (inst);
 207 }
 208 
 209 static restarter_inst_t *
 210 inst_lookup_queue(const char *name)
 211 {
 212         int id;
 213         restarter_inst_t *inst;
 214 
 215         id = dict_lookup_byname(name);
 216         if (id == -1)
 217                 return (NULL);
 218 
 219         MUTEX_LOCK(&instance_list.ril_lock);
 220         inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
 221         if (inst != NULL)
 222                 MUTEX_LOCK(&inst->ri_queue_lock);
 223         MUTEX_UNLOCK(&instance_list.ril_lock);
 224 
 225         return (inst);
 226 }
 227 
 228 const char *
 229 service_style(int flags)
 230 {
 231         switch (flags & RINST_STYLE_MASK) {
 232         case RINST_CONTRACT:    return ("contract");
 233         case RINST_TRANSIENT:   return ("transient");
 234         case RINST_WAIT:        return ("wait");
 235 
 236         default:
 237 #ifndef NDEBUG
 238                 uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
 239 #endif
 240                 abort();
 241                 /* NOTREACHED */
 242         }
 243 }
 244 
 245 /*
 246  * Fails with ECONNABORTED or ECANCELED.
 247  */
 248 static int
 249 check_contract(restarter_inst_t *inst, boolean_t primary,
 250     scf_instance_t *scf_inst)
 251 {
 252         ctid_t *ctidp;
 253         int fd, r;
 254 
 255         ctidp = primary ? &inst->ri_i.i_primary_ctid :
 256             &inst->ri_i.i_transient_ctid;
 257 
 258         assert(*ctidp >= 1);
 259 
 260         fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
 261         if (fd >= 0) {
 262                 r = close(fd);
 263                 assert(r == 0);
 264                 return (0);
 265         }
 266 
 267         r = restarter_remove_contract(scf_inst, *ctidp, primary ?
 268             RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
 269         switch (r) {
 270         case 0:
 271         case ECONNABORTED:
 272         case ECANCELED:
 273                 *ctidp = 0;
 274                 return (r);
 275 
 276         case ENOMEM:
 277                 uu_die("Out of memory\n");
 278                 /* NOTREACHED */
 279 
 280         case EPERM:
 281                 uu_die("Insufficient privilege.\n");
 282                 /* NOTREACHED */
 283 
 284         case EACCES:
 285                 uu_die("Repository backend access denied.\n");
 286                 /* NOTREACHED */
 287 
 288         case EROFS:
 289                 log_error(LOG_INFO, "Could not remove unusable contract id %ld "
 290                     "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
 291                 return (0);
 292 
 293         case EINVAL:
 294         case EBADF:
 295         default:
 296                 assert(0);
 297                 abort();
 298                 /* NOTREACHED */
 299         }
 300 }
 301 
 302 static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
 303 
 304 /*
 305  * int restarter_insert_inst(scf_handle_t *, char *)
 306  *   If the inst is already in the restarter list, return its id.  If the inst
 307  *   is not in the restarter list, initialize a restarter_inst_t, initialize its
 308  *   states, insert it into the list, and return 0.
 309  *
 310  *   Fails with
 311  *     ENOENT - name is not in the repository
 312  */
 313 static int
 314 restarter_insert_inst(scf_handle_t *h, const char *name)
 315 {
 316         int id, r;
 317         restarter_inst_t *inst;
 318         uu_list_index_t idx;
 319         scf_service_t *scf_svc;
 320         scf_instance_t *scf_inst;
 321         scf_snapshot_t *snap = NULL;
 322         scf_propertygroup_t *pg;
 323         char *svc_name, *inst_name;
 324         char logfilebuf[PATH_MAX];
 325         char *c;
 326         boolean_t do_commit_states;
 327         restarter_instance_state_t state, next_state;
 328         protocol_states_t *ps;
 329         pid_t start_pid;
 330         restarter_str_t reason = restarter_str_insert_in_graph;
 331 
 332         MUTEX_LOCK(&instance_list.ril_lock);
 333 
 334         /*
 335          * We don't use inst_lookup_by_name() here because we want the lookup
 336          * & insert to be atomic.
 337          */
 338         id = dict_lookup_byname(name);
 339         if (id != -1) {
 340                 inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
 341                     &idx);
 342                 if (inst != NULL) {
 343                         MUTEX_UNLOCK(&instance_list.ril_lock);
 344                         return (0);
 345                 }
 346         }
 347 
 348         /* Allocate an instance */
 349         inst = startd_zalloc(sizeof (restarter_inst_t));
 350         inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
 351         inst->ri_utmpx_prefix[0] = '\0';
 352 
 353         inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
 354         (void) strcpy((char *)inst->ri_i.i_fmri, name);
 355 
 356         inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
 357 
 358         /*
 359          * id shouldn't be -1 since we use the same dictionary as graph.c, but
 360          * just in case.
 361          */
 362         inst->ri_id = (id != -1 ? id : dict_insert(name));
 363 
 364         special_online_hooks_get(name, &inst->ri_pre_online_hook,
 365             &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
 366 
 367         scf_svc = safe_scf_service_create(h);
 368         scf_inst = safe_scf_instance_create(h);
 369         pg = safe_scf_pg_create(h);
 370         svc_name = startd_alloc(max_scf_name_size);
 371         inst_name = startd_alloc(max_scf_name_size);
 372 
 373 rep_retry:
 374         if (snap != NULL)
 375                 scf_snapshot_destroy(snap);
 376         if (inst->ri_logstem != NULL)
 377                 startd_free(inst->ri_logstem, PATH_MAX);
 378         if (inst->ri_common_name != NULL)
 379                 free(inst->ri_common_name);
 380         if (inst->ri_C_common_name != NULL)
 381                 free(inst->ri_C_common_name);
 382         snap = NULL;
 383         inst->ri_logstem = NULL;
 384         inst->ri_common_name = NULL;
 385         inst->ri_C_common_name = NULL;
 386 
 387         if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
 388             NULL, SCF_DECODE_FMRI_EXACT) != 0) {
 389                 switch (scf_error()) {
 390                 case SCF_ERROR_CONNECTION_BROKEN:
 391                         libscf_handle_rebind(h);
 392                         goto rep_retry;
 393 
 394                 case SCF_ERROR_NOT_FOUND:
 395                         goto deleted;
 396                 }
 397 
 398                 uu_die("Can't decode FMRI %s: %s\n", name,
 399                     scf_strerror(scf_error()));
 400         }
 401 
 402         /*
 403          * If there's no running snapshot, then we execute using the editing
 404          * snapshot.  Pending snapshots will be taken later.
 405          */
 406         snap = libscf_get_running_snapshot(scf_inst);
 407 
 408         if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
 409             (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
 410             0)) {
 411                 switch (scf_error()) {
 412                 case SCF_ERROR_NOT_SET:
 413                         break;
 414 
 415                 case SCF_ERROR_CONNECTION_BROKEN:
 416                         libscf_handle_rebind(h);
 417                         goto rep_retry;
 418 
 419                 default:
 420                         assert(0);
 421                         abort();
 422                 }
 423 
 424                 goto deleted;
 425         }
 426 
 427         (void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
 428         for (c = logfilebuf; *c != '\0'; c++)
 429                 if (*c == '/')
 430                         *c = '-';
 431 
 432         inst->ri_logstem = startd_alloc(PATH_MAX);
 433         (void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
 434             LOG_SUFFIX);
 435 
 436         /*
 437          * If the restarter group is missing, use uninit/none.  Otherwise,
 438          * we're probably being restarted & don't want to mess up the states
 439          * that are there.
 440          */
 441         state = RESTARTER_STATE_UNINIT;
 442         next_state = RESTARTER_STATE_NONE;
 443 
 444         r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
 445         if (r != 0) {
 446                 switch (scf_error()) {
 447                 case SCF_ERROR_CONNECTION_BROKEN:
 448                         libscf_handle_rebind(h);
 449                         goto rep_retry;
 450 
 451                 case SCF_ERROR_NOT_SET:
 452                         goto deleted;
 453 
 454                 case SCF_ERROR_NOT_FOUND:
 455                         /*
 456                          * This shouldn't happen since the graph engine should
 457                          * have initialized the state to uninitialized/none if
 458                          * there was no restarter pg.  In case somebody
 459                          * deleted it, though....
 460                          */
 461                         do_commit_states = B_TRUE;
 462                         break;
 463 
 464                 default:
 465                         assert(0);
 466                         abort();
 467                 }
 468         } else {
 469                 r = libscf_read_states(pg, &state, &next_state);
 470                 if (r != 0) {
 471                         do_commit_states = B_TRUE;
 472                 } else {
 473                         if (next_state != RESTARTER_STATE_NONE) {
 474                                 /*
 475                                  * Force next_state to _NONE since we
 476                                  * don't look for method processes.
 477                                  */
 478                                 next_state = RESTARTER_STATE_NONE;
 479                                 do_commit_states = B_TRUE;
 480                         } else {
 481                                 /*
 482                                  * The reason for transition will depend on
 483                                  * state.
 484                                  */
 485                                 if (st->st_initial == 0)
 486                                         reason = restarter_str_startd_restart;
 487                                 else if (state == RESTARTER_STATE_MAINT)
 488                                         reason = restarter_str_bad_repo_state;
 489                                 /*
 490                                  * Inform the restarter of our state without
 491                                  * changing the STIME in the repository.
 492                                  */
 493                                 ps = startd_alloc(sizeof (*ps));
 494                                 inst->ri_i.i_state = ps->ps_state = state;
 495                                 inst->ri_i.i_next_state = ps->ps_state_next =
 496                                     next_state;
 497                                 ps->ps_reason = reason;
 498 
 499                                 graph_protocol_send_event(inst->ri_i.i_fmri,
 500                                     GRAPH_UPDATE_STATE_CHANGE, ps);
 501 
 502                                 do_commit_states = B_FALSE;
 503                         }
 504                 }
 505         }
 506 
 507         switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
 508             &inst->ri_utmpx_prefix)) {
 509         case 0:
 510                 break;
 511 
 512         case ECONNABORTED:
 513                 libscf_handle_rebind(h);
 514                 goto rep_retry;
 515 
 516         case ECANCELED:
 517                 goto deleted;
 518 
 519         case ENOENT:
 520                 /*
 521                  * This is odd, because the graph engine should have required
 522                  * the general property group.  So we'll just use default
 523                  * flags in anticipation of the graph engine sending us
 524                  * REMOVE_INSTANCE when it finds out that the general property
 525                  * group has been deleted.
 526                  */
 527                 inst->ri_flags = RINST_CONTRACT;
 528                 break;
 529 
 530         default:
 531                 assert(0);
 532                 abort();
 533         }
 534 
 535         r = libscf_get_template_values(scf_inst, snap,
 536             &inst->ri_common_name, &inst->ri_C_common_name);
 537 
 538         /*
 539          * Copy our names to smaller buffers to reduce our memory footprint.
 540          */
 541         if (inst->ri_common_name != NULL) {
 542                 char *tmp = safe_strdup(inst->ri_common_name);
 543                 startd_free(inst->ri_common_name, max_scf_value_size);
 544                 inst->ri_common_name = tmp;
 545         }
 546 
 547         if (inst->ri_C_common_name != NULL) {
 548                 char *tmp = safe_strdup(inst->ri_C_common_name);
 549                 startd_free(inst->ri_C_common_name, max_scf_value_size);
 550                 inst->ri_C_common_name = tmp;
 551         }
 552 
 553         switch (r) {
 554         case 0:
 555                 break;
 556 
 557         case ECONNABORTED:
 558                 libscf_handle_rebind(h);
 559                 goto rep_retry;
 560 
 561         case ECANCELED:
 562                 goto deleted;
 563 
 564         case ECHILD:
 565         case ENOENT:
 566                 break;
 567 
 568         default:
 569                 assert(0);
 570                 abort();
 571         }
 572 
 573         switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
 574             &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
 575             &start_pid)) {
 576         case 0:
 577                 break;
 578 
 579         case ECONNABORTED:
 580                 libscf_handle_rebind(h);
 581                 goto rep_retry;
 582 
 583         case ECANCELED:
 584                 goto deleted;
 585 
 586         default:
 587                 assert(0);
 588                 abort();
 589         }
 590 
 591         if (inst->ri_i.i_primary_ctid >= 1) {
 592                 contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
 593 
 594                 switch (check_contract(inst, B_TRUE, scf_inst)) {
 595                 case 0:
 596                         break;
 597 
 598                 case ECONNABORTED:
 599                         libscf_handle_rebind(h);
 600                         goto rep_retry;
 601 
 602                 case ECANCELED:
 603                         goto deleted;
 604 
 605                 default:
 606                         assert(0);
 607                         abort();
 608                 }
 609         }
 610 
 611         if (inst->ri_i.i_transient_ctid >= 1) {
 612                 switch (check_contract(inst, B_FALSE, scf_inst)) {
 613                 case 0:
 614                         break;
 615 
 616                 case ECONNABORTED:
 617                         libscf_handle_rebind(h);
 618                         goto rep_retry;
 619 
 620                 case ECANCELED:
 621                         goto deleted;
 622 
 623                 default:
 624                         assert(0);
 625                         abort();
 626                 }
 627         }
 628 
 629         /* No more failures we live through, so add it to the list. */
 630         (void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
 631         (void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
 632         MUTEX_LOCK(&inst->ri_lock);
 633         MUTEX_LOCK(&inst->ri_queue_lock);
 634 
 635         (void) pthread_cond_init(&inst->ri_method_cv, NULL);
 636 
 637         uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
 638         uu_list_insert(instance_list.ril_instance_list, inst, idx);
 639         MUTEX_UNLOCK(&instance_list.ril_lock);
 640 
 641         if (start_pid != -1 &&
 642             (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
 643                 int ret;
 644                 ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
 645                 if (ret == -1) {
 646                         /*
 647                          * Implication:  if we can't reregister the
 648                          * instance, we will start another one.  Two
 649                          * instances may or may not result in a resource
 650                          * conflict.
 651                          */
 652                         log_error(LOG_WARNING,
 653                             "%s: couldn't reregister %ld for wait\n",
 654                             inst->ri_i.i_fmri, start_pid);
 655                 } else if (ret == 1) {
 656                         /*
 657                          * Leading PID has exited.
 658                          */
 659                         (void) stop_instance(h, inst, RSTOP_EXIT);
 660                 }
 661         }
 662 
 663 
 664         scf_pg_destroy(pg);
 665 
 666         if (do_commit_states)
 667                 (void) restarter_instance_update_states(h, inst, state,
 668                     next_state, RERR_NONE, reason);
 669 
 670         log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
 671             service_style(inst->ri_flags));
 672 
 673         MUTEX_UNLOCK(&inst->ri_queue_lock);
 674         MUTEX_UNLOCK(&inst->ri_lock);
 675 
 676         startd_free(svc_name, max_scf_name_size);
 677         startd_free(inst_name, max_scf_name_size);
 678         scf_snapshot_destroy(snap);
 679         scf_instance_destroy(scf_inst);
 680         scf_service_destroy(scf_svc);
 681 
 682         log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
 683             name);
 684 
 685         return (0);
 686 
 687 deleted:
 688         MUTEX_UNLOCK(&instance_list.ril_lock);
 689         startd_free(inst_name, max_scf_name_size);
 690         startd_free(svc_name, max_scf_name_size);
 691         if (snap != NULL)
 692                 scf_snapshot_destroy(snap);
 693         scf_pg_destroy(pg);
 694         scf_instance_destroy(scf_inst);
 695         scf_service_destroy(scf_svc);
 696         startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
 697         uu_list_destroy(inst->ri_queue);
 698         if (inst->ri_logstem != NULL)
 699                 startd_free(inst->ri_logstem, PATH_MAX);
 700         if (inst->ri_common_name != NULL)
 701                 free(inst->ri_common_name);
 702         if (inst->ri_C_common_name != NULL)
 703                 free(inst->ri_C_common_name);
 704         startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
 705         startd_free(inst, sizeof (restarter_inst_t));
 706         return (ENOENT);
 707 }
 708 
 709 static void
 710 restarter_delete_inst(restarter_inst_t *ri)
 711 {
 712         int id;
 713         restarter_inst_t *rip;
 714         void *cookie = NULL;
 715         restarter_instance_qentry_t *e;
 716 
 717         assert(MUTEX_HELD(&ri->ri_lock));
 718 
 719         /*
 720          * Must drop the instance lock so we can pick up the instance_list
 721          * lock & remove the instance.
 722          */
 723         id = ri->ri_id;
 724         MUTEX_UNLOCK(&ri->ri_lock);
 725 
 726         MUTEX_LOCK(&instance_list.ril_lock);
 727 
 728         rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
 729         if (rip == NULL) {
 730                 MUTEX_UNLOCK(&instance_list.ril_lock);
 731                 return;
 732         }
 733 
 734         assert(ri == rip);
 735 
 736         uu_list_remove(instance_list.ril_instance_list, ri);
 737 
 738         log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
 739             ri->ri_i.i_fmri);
 740 
 741         MUTEX_UNLOCK(&instance_list.ril_lock);
 742 
 743         /*
 744          * We can lock the instance without holding the instance_list lock
 745          * since we removed the instance from the list.
 746          */
 747         MUTEX_LOCK(&ri->ri_lock);
 748         MUTEX_LOCK(&ri->ri_queue_lock);
 749 
 750         if (ri->ri_i.i_primary_ctid >= 1)
 751                 contract_hash_remove(ri->ri_i.i_primary_ctid);
 752 
 753         while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
 754                 (void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
 755 
 756         while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
 757                 startd_free(e, sizeof (*e));
 758         uu_list_destroy(ri->ri_queue);
 759 
 760         startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
 761         startd_free(ri->ri_logstem, PATH_MAX);
 762         if (ri->ri_common_name != NULL)
 763                 free(ri->ri_common_name);
 764         if (ri->ri_C_common_name != NULL)
 765                 free(ri->ri_C_common_name);
 766         startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
 767         (void) pthread_mutex_destroy(&ri->ri_lock);
 768         (void) pthread_mutex_destroy(&ri->ri_queue_lock);
 769         startd_free(ri, sizeof (restarter_inst_t));
 770 }
 771 
 772 /*
 773  * instance_is_wait_style()
 774  *
 775  *   Returns 1 if the given instance is a "wait-style" service instance.
 776  */
 777 int
 778 instance_is_wait_style(restarter_inst_t *inst)
 779 {
 780         assert(MUTEX_HELD(&inst->ri_lock));
 781         return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
 782 }
 783 
 784 /*
 785  * instance_is_transient_style()
 786  *
 787  *   Returns 1 if the given instance is a transient service instance.
 788  */
 789 int
 790 instance_is_transient_style(restarter_inst_t *inst)
 791 {
 792         assert(MUTEX_HELD(&inst->ri_lock));
 793         return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
 794 }
 795 
 796 /*
 797  * instance_in_transition()
 798  * Returns 1 if instance is in transition, 0 if not
 799  */
 800 int
 801 instance_in_transition(restarter_inst_t *inst)
 802 {
 803         assert(MUTEX_HELD(&inst->ri_lock));
 804         if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
 805                 return (0);
 806         return (1);
 807 }
 808 
 809 /*
 810  * returns 1 if instance is already started, 0 if not
 811  */
 812 static int
 813 instance_started(restarter_inst_t *inst)
 814 {
 815         int ret;
 816 
 817         assert(MUTEX_HELD(&inst->ri_lock));
 818 
 819         if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
 820             inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
 821                 ret = 1;
 822         else
 823                 ret = 0;
 824 
 825         return (ret);
 826 }
 827 
 828 /*
 829  * Returns
 830  *   0 - success
 831  *   ECONNRESET - success, but h was rebound
 832  */
 833 int
 834 restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
 835     restarter_instance_state_t new_state,
 836     restarter_instance_state_t new_state_next, restarter_error_t err,
 837     restarter_str_t reason)
 838 {
 839         protocol_states_t *states;
 840         int e;
 841         uint_t retry_count = 0, msecs = ALLOC_DELAY;
 842         boolean_t rebound = B_FALSE;
 843         int prev_state_online;
 844         int state_online;
 845 
 846         assert(MUTEX_HELD(&ri->ri_lock));
 847 
 848         prev_state_online = instance_started(ri);
 849 
 850 retry:
 851         e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
 852             restarter_get_str_short(reason));
 853         switch (e) {
 854         case 0:
 855                 break;
 856 
 857         case ENOMEM:
 858                 ++retry_count;
 859                 if (retry_count < ALLOC_RETRY) {
 860                         (void) poll(NULL, 0, msecs);
 861                         msecs *= ALLOC_DELAY_MULT;
 862                         goto retry;
 863                 }
 864 
 865                 /* Like startd_alloc(). */
 866                 uu_die("Insufficient memory.\n");
 867                 /* NOTREACHED */
 868 
 869         case ECONNABORTED:
 870                 libscf_handle_rebind(h);
 871                 rebound = B_TRUE;
 872                 goto retry;
 873 
 874         case EPERM:
 875         case EACCES:
 876         case EROFS:
 877                 log_error(LOG_NOTICE, "Could not commit state change for %s "
 878                     "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
 879                 /* FALLTHROUGH */
 880 
 881         case ENOENT:
 882                 ri->ri_i.i_state = new_state;
 883                 ri->ri_i.i_next_state = new_state_next;
 884                 break;
 885 
 886         case EINVAL:
 887         default:
 888                 bad_error("_restarter_commit_states", e);
 889         }
 890 
 891         states = startd_alloc(sizeof (protocol_states_t));
 892         states->ps_state = new_state;
 893         states->ps_state_next = new_state_next;
 894         states->ps_err = err;
 895         states->ps_reason = reason;
 896         graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
 897             (void *)states);
 898 
 899         state_online = instance_started(ri);
 900 
 901         if (prev_state_online && !state_online)
 902                 ri->ri_post_offline_hook();
 903         else if (!prev_state_online && state_online)
 904                 ri->ri_post_online_hook();
 905 
 906         return (rebound ? ECONNRESET : 0);
 907 }
 908 
 909 void
 910 restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
 911 {
 912         restarter_inst_t *inst;
 913 
 914         assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
 915 
 916         inst = inst_lookup_by_name(fmri);
 917         if (inst == NULL)
 918                 return;
 919 
 920         inst->ri_flags |= flag;
 921 
 922         MUTEX_UNLOCK(&inst->ri_lock);
 923 }
 924 
 925 static void
 926 restarter_take_pending_snapshots(scf_handle_t *h)
 927 {
 928         restarter_inst_t *inst;
 929         int r;
 930 
 931         MUTEX_LOCK(&instance_list.ril_lock);
 932 
 933         for (inst = uu_list_first(instance_list.ril_instance_list);
 934             inst != NULL;
 935             inst = uu_list_next(instance_list.ril_instance_list, inst)) {
 936                 const char *fmri;
 937                 scf_instance_t *sinst = NULL;
 938 
 939                 MUTEX_LOCK(&inst->ri_lock);
 940 
 941                 /*
 942                  * This is where we'd check inst->ri_method_thread and if it
 943                  * were nonzero we'd wait in anticipation of another thread
 944                  * executing a method for inst.  Doing so with the instance_list
 945                  * locked, though, leads to deadlock.  Since taking a snapshot
 946                  * during that window won't hurt anything, we'll just continue.
 947                  */
 948 
 949                 fmri = inst->ri_i.i_fmri;
 950 
 951                 if (inst->ri_flags & RINST_RETAKE_RUNNING) {
 952                         scf_snapshot_t *rsnap;
 953 
 954                         (void) libscf_fmri_get_instance(h, fmri, &sinst);
 955 
 956                         rsnap = libscf_get_or_make_running_snapshot(sinst,
 957                             fmri, B_FALSE);
 958 
 959                         scf_instance_destroy(sinst);
 960 
 961                         if (rsnap != NULL)
 962                                 inst->ri_flags &= ~RINST_RETAKE_RUNNING;
 963 
 964                         scf_snapshot_destroy(rsnap);
 965                 }
 966 
 967                 if (inst->ri_flags & RINST_RETAKE_START) {
 968                         switch (r = libscf_snapshots_poststart(h, fmri,
 969                             B_FALSE)) {
 970                         case 0:
 971                         case ENOENT:
 972                                 inst->ri_flags &= ~RINST_RETAKE_START;
 973                                 break;
 974 
 975                         case ECONNABORTED:
 976                                 break;
 977 
 978                         case EACCES:
 979                         default:
 980                                 bad_error("libscf_snapshots_poststart", r);
 981                         }
 982                 }
 983 
 984                 MUTEX_UNLOCK(&inst->ri_lock);
 985         }
 986 
 987         MUTEX_UNLOCK(&instance_list.ril_lock);
 988 }
 989 
 990 /* ARGSUSED */
 991 void *
 992 restarter_post_fsminimal_thread(void *unused)
 993 {
 994         scf_handle_t *h;
 995         int r;
 996 
 997         h = libscf_handle_create_bound_loop();
 998 
 999         for (;;) {
1000                 r = libscf_create_self(h);
1001                 if (r == 0)
1002                         break;
1003 
1004                 assert(r == ECONNABORTED);
1005                 libscf_handle_rebind(h);
1006         }
1007 
1008         restarter_take_pending_snapshots(h);
1009 
1010         (void) scf_handle_unbind(h);
1011         scf_handle_destroy(h);
1012 
1013         return (NULL);
1014 }
1015 
1016 /*
1017  * int stop_instance()
1018  *
1019  *   Stop the instance identified by the instance given as the second argument,
1020  *   for the cause stated.
1021  *
1022  *   Returns
1023  *     0 - success
1024  *     -1 - inst is in transition
1025  */
1026 static int
1027 stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1028     stop_cause_t cause)
1029 {
1030         fork_info_t *info;
1031         const char *cp;
1032         int err;
1033         restarter_error_t re;
1034         restarter_str_t reason;
1035         restarter_instance_state_t new_state;
1036 
1037         assert(MUTEX_HELD(&inst->ri_lock));
1038         assert(inst->ri_method_thread == 0);
1039 
1040         switch (cause) {
1041         case RSTOP_EXIT:
1042                 re = RERR_RESTART;
1043                 reason = restarter_str_ct_ev_exit;
1044                 cp = "all processes in service exited";
1045                 break;
1046         case RSTOP_ERR_CFG:
1047                 re = RERR_FAULT;
1048                 reason = restarter_str_method_failed;
1049                 cp = "service exited with a configuration error";
1050                 break;
1051         case RSTOP_ERR_EXIT:
1052                 re = RERR_RESTART;
1053                 reason = restarter_str_ct_ev_exit;
1054                 cp = "service exited with an error";
1055                 break;
1056         case RSTOP_CORE:
1057                 re = RERR_FAULT;
1058                 reason = restarter_str_ct_ev_core;
1059                 cp = "process dumped core";
1060                 break;
1061         case RSTOP_SIGNAL:
1062                 re = RERR_FAULT;
1063                 reason = restarter_str_ct_ev_signal;
1064                 cp = "process received fatal signal from outside the service";
1065                 break;
1066         case RSTOP_HWERR:
1067                 re = RERR_FAULT;
1068                 reason = restarter_str_ct_ev_hwerr;
1069                 cp = "process killed due to uncorrectable hardware error";
1070                 break;
1071         case RSTOP_DEPENDENCY:
1072                 re = RERR_RESTART;
1073                 reason = restarter_str_dependency_activity;
1074                 cp = "dependency activity requires stop";
1075                 break;
1076         case RSTOP_DISABLE:
1077                 re = RERR_RESTART;
1078                 reason = restarter_str_disable_request;
1079                 cp = "service disabled";
1080                 break;
1081         case RSTOP_RESTART:
1082                 re = RERR_RESTART;
1083                 reason = restarter_str_restart_request;
1084                 cp = "service restarting";
1085                 break;
1086         default:
1087 #ifndef NDEBUG
1088                 (void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
1089                     cause, __FILE__, __LINE__);
1090 #endif
1091                 abort();
1092         }
1093 
1094         /* Services in the disabled and maintenance state are ignored */
1095         if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1096             inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
1097                 log_framework(LOG_DEBUG,
1098                     "%s: stop_instance -> is maint/disabled\n",
1099                     inst->ri_i.i_fmri);
1100                 return (0);
1101         }
1102 
1103         /* Already stopped instances are left alone */
1104         if (instance_started(inst) == 0) {
1105                 log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
1106                     inst->ri_i.i_fmri);
1107                 return (0);
1108         }
1109 
1110         if (instance_in_transition(inst)) {
1111                 /* requeue event by returning -1 */
1112                 log_framework(LOG_DEBUG,
1113                     "Restarter: Not stopping %s, in transition.\n",
1114                     inst->ri_i.i_fmri);
1115                 return (-1);
1116         }
1117 
1118         log_instance(inst, B_TRUE, "Stopping because %s.", cp);
1119 
1120         log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
1121             "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
1122 
1123         if (instance_is_wait_style(inst) &&
1124             (cause == RSTOP_EXIT ||
1125             cause == RSTOP_ERR_CFG ||
1126             cause == RSTOP_ERR_EXIT)) {
1127                 /*
1128                  * No need to stop instance, as child has exited; remove
1129                  * contract and move the instance to the offline state.
1130                  */
1131                 switch (err = restarter_instance_update_states(local_handle,
1132                     inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1133                     reason)) {
1134                 case 0:
1135                 case ECONNRESET:
1136                         break;
1137 
1138                 default:
1139                         bad_error("restarter_instance_update_states", err);
1140                 }
1141 
1142                 if (cause == RSTOP_ERR_EXIT) {
1143                         /*
1144                          * The RSTOP_ERR_EXIT cause is set via the
1145                          * wait_thread -> wait_remove code path when we have
1146                          * a "wait" style svc that exited with an error. If
1147                          * the svc is failing too quickly, we throttle it so
1148                          * that we don't restart it more than once/second.
1149                          * Since we know we're running in the wait thread its
1150                          * ok to throttle it right here.
1151                          */
1152                         (void) update_fault_count(inst, FAULT_COUNT_INCR);
1153                         if (method_rate_critical(inst)) {
1154                                 log_instance(inst, B_TRUE, "Failing too "
1155                                     "quickly, throttling.");
1156                                 (void) sleep(WT_SVC_ERR_THROTTLE);
1157                         }
1158                 } else {
1159                         (void) update_fault_count(inst, FAULT_COUNT_RESET);
1160                         reset_start_times(inst);
1161                 }
1162 
1163                 if (inst->ri_i.i_primary_ctid != 0) {
1164                         inst->ri_m_inst =
1165                             safe_scf_instance_create(local_handle);
1166                         inst->ri_mi_deleted = B_FALSE;
1167 
1168                         libscf_reget_instance(inst);
1169                         method_remove_contract(inst, B_TRUE, B_TRUE);
1170 
1171                         scf_instance_destroy(inst->ri_m_inst);
1172                         inst->ri_m_inst = NULL;
1173                 }
1174 
1175                 switch (err = restarter_instance_update_states(local_handle,
1176                     inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1177                     reason)) {
1178                 case 0:
1179                 case ECONNRESET:
1180                         break;
1181 
1182                 default:
1183                         bad_error("restarter_instance_update_states", err);
1184                 }
1185 
1186                 if (cause != RSTOP_ERR_CFG)
1187                         return (0);
1188         } else if (instance_is_wait_style(inst) && re == RERR_RESTART) {
1189                 /*
1190                  * Stopping a wait service through means other than the pid
1191                  * exiting should keep wait_thread() from restarting the
1192                  * service, by removing it from the wait list.
1193                  * We cannot remove it right now otherwise the process will
1194                  * end up <defunct> so mark it to be ignored.
1195                  */
1196                 wait_ignore_by_fmri(inst->ri_i.i_fmri);
1197         }
1198 
1199         /*
1200          * There are some configuration errors which we cannot detect until we
1201          * try to run the method.  For example, see exec_method() where the
1202          * restarter_set_method_context() call can return SMF_EXIT_ERR_CONFIG
1203          * in several cases. If this happens for a "wait-style" svc,
1204          * wait_remove() sets the cause as RSTOP_ERR_CFG so that we can detect
1205          * the configuration error and go into maintenance, even though it is
1206          * a "wait-style" svc.
1207          */
1208         if (cause == RSTOP_ERR_CFG)
1209                 new_state = RESTARTER_STATE_MAINT;
1210         else
1211                 new_state = inst->ri_i.i_enabled ?
1212                     RESTARTER_STATE_OFFLINE : RESTARTER_STATE_DISABLED;
1213 
1214         switch (err = restarter_instance_update_states(local_handle, inst,
1215             inst->ri_i.i_state, new_state, RERR_NONE, reason)) {
1216         case 0:
1217         case ECONNRESET:
1218                 break;
1219 
1220         default:
1221                 bad_error("restarter_instance_update_states", err);
1222         }
1223 
1224         info = startd_zalloc(sizeof (fork_info_t));
1225 
1226         info->sf_id = inst->ri_id;
1227         info->sf_method_type = METHOD_STOP;
1228         info->sf_event_type = re;
1229         info->sf_reason = reason;
1230         inst->ri_method_thread = startd_thread_create(method_thread, info);
1231 
1232         return (0);
1233 }
1234 
1235 /*
1236  * Returns
1237  *   ENOENT - fmri is not in instance_list
1238  *   0 - success
1239  *   ECONNRESET - success, though handle was rebound
1240  *   -1 - instance is in transition
1241  */
1242 int
1243 stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
1244 {
1245         restarter_inst_t *rip;
1246         int r;
1247 
1248         rip = inst_lookup_by_name(fmri);
1249         if (rip == NULL)
1250                 return (ENOENT);
1251 
1252         r = stop_instance(h, rip, flags);
1253 
1254         MUTEX_UNLOCK(&rip->ri_lock);
1255 
1256         return (r);
1257 }
1258 
1259 static void
1260 unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
1261     unmaint_cause_t cause)
1262 {
1263         ctid_t ctid;
1264         scf_instance_t *inst;
1265         int r;
1266         uint_t tries = 0, msecs = ALLOC_DELAY;
1267         const char *cp;
1268         restarter_str_t reason;
1269 
1270         assert(MUTEX_HELD(&rip->ri_lock));
1271 
1272         if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
1273                 log_error(LOG_DEBUG, "Restarter: "
1274                     "Ignoring maintenance off command because %s is not in the "
1275                     "maintenance state.\n", rip->ri_i.i_fmri);
1276                 return;
1277         }
1278 
1279         switch (cause) {
1280         case RUNMAINT_CLEAR:
1281                 cp = "clear requested";
1282                 reason = restarter_str_clear_request;
1283                 break;
1284         case RUNMAINT_DISABLE:
1285                 cp = "disable requested";
1286                 reason = restarter_str_disable_request;
1287                 break;
1288         default:
1289 #ifndef NDEBUG
1290                 (void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
1291                     cause, __FILE__, __LINE__);
1292 #endif
1293                 abort();
1294         }
1295 
1296         log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
1297             cp);
1298         log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
1299             "%s.\n", rip->ri_i.i_fmri, cp);
1300 
1301         (void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1302             RESTARTER_STATE_NONE, RERR_RESTART, reason);
1303 
1304         /*
1305          * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
1306          * a primary contract.
1307          */
1308         if (rip->ri_i.i_primary_ctid == 0)
1309                 return;
1310 
1311         ctid = rip->ri_i.i_primary_ctid;
1312         contract_abandon(ctid);
1313         rip->ri_i.i_primary_ctid = 0;
1314 
1315 rep_retry:
1316         switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
1317         case 0:
1318                 break;
1319 
1320         case ECONNABORTED:
1321                 libscf_handle_rebind(h);
1322                 goto rep_retry;
1323 
1324         case ENOENT:
1325                 /* Must have been deleted. */
1326                 return;
1327 
1328         case EINVAL:
1329         case ENOTSUP:
1330         default:
1331                 bad_error("libscf_handle_rebind", r);
1332         }
1333 
1334 again:
1335         r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
1336         switch (r) {
1337         case 0:
1338                 break;
1339 
1340         case ENOMEM:
1341                 ++tries;
1342                 if (tries < ALLOC_RETRY) {
1343                         (void) poll(NULL, 0, msecs);
1344                         msecs *= ALLOC_DELAY_MULT;
1345                         goto again;
1346                 }
1347 
1348                 uu_die("Insufficient memory.\n");
1349                 /* NOTREACHED */
1350 
1351         case ECONNABORTED:
1352                 scf_instance_destroy(inst);
1353                 libscf_handle_rebind(h);
1354                 goto rep_retry;
1355 
1356         case ECANCELED:
1357                 break;
1358 
1359         case EPERM:
1360         case EACCES:
1361         case EROFS:
1362                 log_error(LOG_INFO,
1363                     "Could not remove contract id %lu for %s (%s).\n", ctid,
1364                     rip->ri_i.i_fmri, strerror(r));
1365                 break;
1366 
1367         case EINVAL:
1368         case EBADF:
1369         default:
1370                 bad_error("restarter_remove_contract", r);
1371         }
1372 
1373         scf_instance_destroy(inst);
1374 }
1375 
1376 /*
1377  * enable_inst()
1378  *   Set inst->ri_i.i_enabled.  Expects 'e' to be _ENABLE, _DISABLE, or
1379  *   _ADMIN_DISABLE.  If the event is _ENABLE and inst is uninitialized or
1380  *   disabled, move it to offline.  If the event is _DISABLE or
1381  *   _ADMIN_DISABLE, make sure inst will move to disabled.
1382  *
1383  *   Returns
1384  *     0 - success
1385  *     ECONNRESET - h was rebound
1386  */
1387 static int
1388 enable_inst(scf_handle_t *h, restarter_inst_t *inst,
1389     restarter_instance_qentry_t *riq)
1390 {
1391         restarter_instance_state_t state;
1392         restarter_event_type_t e = riq->riq_type;
1393         restarter_str_t reason = restarter_str_per_configuration;
1394         int r;
1395 
1396         assert(MUTEX_HELD(&inst->ri_lock));
1397         assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
1398             e == RESTARTER_EVENT_TYPE_DISABLE ||
1399             e == RESTARTER_EVENT_TYPE_ENABLE);
1400         assert(instance_in_transition(inst) == 0);
1401 
1402         state = inst->ri_i.i_state;
1403 
1404         if (e == RESTARTER_EVENT_TYPE_ENABLE) {
1405                 inst->ri_i.i_enabled = 1;
1406 
1407                 if (state == RESTARTER_STATE_UNINIT ||
1408                     state == RESTARTER_STATE_DISABLED) {
1409                         /*
1410                          * B_FALSE: Don't log an error if the log_instance()
1411                          * fails because it will fail on the miniroot before
1412                          * install-discovery runs.
1413                          */
1414                         log_instance(inst, B_FALSE, "Enabled.");
1415                         log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
1416                             inst->ri_i.i_fmri);
1417 
1418                         /*
1419                          * If we are coming from DISABLED, it was obviously an
1420                          * enable request. If we are coming from UNINIT, it may
1421                          * have been a sevice in MAINT that was cleared.
1422                          */
1423                         if (riq->riq_reason == restarter_str_clear_request)
1424                                 reason = restarter_str_clear_request;
1425                         else if (state == RESTARTER_STATE_DISABLED)
1426                                 reason = restarter_str_enable_request;
1427                         (void) restarter_instance_update_states(h, inst,
1428                             RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1429                             RERR_NONE, reason);
1430                 } else {
1431                         log_framework(LOG_DEBUG, "Restarter: "
1432                             "Not changing state of %s for enable command.\n",
1433                             inst->ri_i.i_fmri);
1434                 }
1435         } else {
1436                 inst->ri_i.i_enabled = 0;
1437 
1438                 switch (state) {
1439                 case RESTARTER_STATE_ONLINE:
1440                 case RESTARTER_STATE_DEGRADED:
1441                         r = stop_instance(h, inst, RSTOP_DISABLE);
1442                         return (r == ECONNRESET ? 0 : r);
1443 
1444                 case RESTARTER_STATE_OFFLINE:
1445                 case RESTARTER_STATE_UNINIT:
1446                         if (inst->ri_i.i_primary_ctid != 0) {
1447                                 inst->ri_m_inst = safe_scf_instance_create(h);
1448                                 inst->ri_mi_deleted = B_FALSE;
1449 
1450                                 libscf_reget_instance(inst);
1451                                 method_remove_contract(inst, B_TRUE, B_TRUE);
1452 
1453                                 scf_instance_destroy(inst->ri_m_inst);
1454                         }
1455                         /* B_FALSE: See log_instance(..., "Enabled."); above */
1456                         log_instance(inst, B_FALSE, "Disabled.");
1457                         log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
1458                             inst->ri_i.i_fmri);
1459 
1460                         /*
1461                          * If we are coming from OFFLINE, it was obviously a
1462                          * disable request. But if we are coming from
1463                          * UNINIT, it may have been a disable request for a
1464                          * service in MAINT.
1465                          */
1466                         if (riq->riq_reason == restarter_str_disable_request ||
1467                             state == RESTARTER_STATE_OFFLINE)
1468                                 reason = restarter_str_disable_request;
1469                         (void) restarter_instance_update_states(h, inst,
1470                             RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1471                             RERR_RESTART, reason);
1472                         return (0);
1473 
1474                 case RESTARTER_STATE_DISABLED:
1475                         break;
1476 
1477                 case RESTARTER_STATE_MAINT:
1478                         /*
1479                          * We only want to pull the instance out of maintenance
1480                          * if the disable is on adminstrative request.  The
1481                          * graph engine sends _DISABLE events whenever a
1482                          * service isn't in the disabled state, and we don't
1483                          * want to pull the service out of maintenance if,
1484                          * for example, it is there due to a dependency cycle.
1485                          */
1486                         if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
1487                                 unmaintain_instance(h, inst, RUNMAINT_DISABLE);
1488                         break;
1489 
1490                 default:
1491 #ifndef NDEBUG
1492                         (void) fprintf(stderr, "Restarter instance %s has "
1493                             "unknown state %d.\n", inst->ri_i.i_fmri, state);
1494 #endif
1495                         abort();
1496                 }
1497         }
1498 
1499         return (0);
1500 }
1501 
1502 static void
1503 start_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1504     int32_t reason)
1505 {
1506         fork_info_t *info;
1507         restarter_str_t new_reason;
1508 
1509         assert(MUTEX_HELD(&inst->ri_lock));
1510         assert(instance_in_transition(inst) == 0);
1511         assert(inst->ri_method_thread == 0);
1512 
1513         log_framework(LOG_DEBUG, "%s: trying to start instance\n",
1514             inst->ri_i.i_fmri);
1515 
1516         /*
1517          * We want to keep the original reason for restarts and clear actions
1518          */
1519         switch (reason) {
1520         case restarter_str_restart_request:
1521         case restarter_str_clear_request:
1522                 new_reason = reason;
1523                 break;
1524         default:
1525                 new_reason = restarter_str_dependencies_satisfied;
1526         }
1527 
1528         /* Services in the disabled and maintenance state are ignored */
1529         if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1530             inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
1531             inst->ri_i.i_enabled == 0) {
1532                 log_framework(LOG_DEBUG,
1533                     "%s: start_instance -> is maint/disabled\n",
1534                     inst->ri_i.i_fmri);
1535                 return;
1536         }
1537 
1538         /* Already started instances are left alone */
1539         if (instance_started(inst) == 1) {
1540                 log_framework(LOG_DEBUG,
1541                     "%s: start_instance -> is already started\n",
1542                     inst->ri_i.i_fmri);
1543                 return;
1544         }
1545 
1546         log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
1547 
1548         (void) restarter_instance_update_states(local_handle, inst,
1549             inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, new_reason);
1550 
1551         info = startd_zalloc(sizeof (fork_info_t));
1552 
1553         info->sf_id = inst->ri_id;
1554         info->sf_method_type = METHOD_START;
1555         info->sf_event_type = RERR_NONE;
1556         info->sf_reason = new_reason;
1557         inst->ri_method_thread = startd_thread_create(method_thread, info);
1558 }
1559 
1560 static int
1561 event_from_tty(scf_handle_t *h, restarter_inst_t *rip)
1562 {
1563         scf_instance_t *inst;
1564         int ret = 0;
1565 
1566         if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1567                 return (-1);
1568 
1569         ret = restarter_inst_ractions_from_tty(inst);
1570 
1571         scf_instance_destroy(inst);
1572         return (ret);
1573 }
1574 
1575 static boolean_t
1576 restart_dump(scf_handle_t *h, restarter_inst_t *rip)
1577 {
1578         scf_instance_t *inst;
1579         boolean_t ret = B_FALSE;
1580 
1581         if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1582                 return (-1);
1583 
1584         if (restarter_inst_dump(inst) == 1)
1585                 ret = B_TRUE;
1586 
1587         scf_instance_destroy(inst);
1588         return (ret);
1589 }
1590 
1591 static void
1592 maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1593     restarter_str_t reason)
1594 {
1595         fork_info_t *info;
1596         scf_instance_t *scf_inst = NULL;
1597 
1598         assert(MUTEX_HELD(&rip->ri_lock));
1599         assert(reason != restarter_str_none);
1600         assert(rip->ri_method_thread == 0);
1601 
1602         log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.",
1603             restarter_get_str_short(reason));
1604         log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1605             rip->ri_i.i_fmri, restarter_get_str_short(reason));
1606 
1607         /* Services in the maintenance state are ignored */
1608         if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
1609                 log_framework(LOG_DEBUG,
1610                     "%s: maintain_instance -> is already in maintenance\n",
1611                     rip->ri_i.i_fmri);
1612                 return;
1613         }
1614 
1615         /*
1616          * If reason state is restarter_str_service_request and
1617          * restarter_actions/auxiliary_fmri property is set with a valid fmri,
1618          * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
1619          */
1620         if (reason == restarter_str_service_request &&
1621             libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &scf_inst) == 0) {
1622                 if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
1623                         if (restarter_inst_set_aux_fmri(scf_inst))
1624                                 log_framework(LOG_DEBUG, "%s: "
1625                                     "restarter_inst_set_aux_fmri failed: ",
1626                                     rip->ri_i.i_fmri);
1627                 } else {
1628                         log_framework(LOG_DEBUG, "%s: "
1629                             "restarter_inst_validate_ractions_aux_fmri "
1630                             "failed: ", rip->ri_i.i_fmri);
1631 
1632                         if (restarter_inst_reset_aux_fmri(scf_inst))
1633                                 log_framework(LOG_DEBUG, "%s: "
1634                                     "restarter_inst_reset_aux_fmri failed: ",
1635                                     rip->ri_i.i_fmri);
1636                 }
1637                 scf_instance_destroy(scf_inst);
1638         }
1639 
1640         if (immediate || !instance_started(rip)) {
1641                 if (rip->ri_i.i_primary_ctid != 0) {
1642                         rip->ri_m_inst = safe_scf_instance_create(h);
1643                         rip->ri_mi_deleted = B_FALSE;
1644 
1645                         libscf_reget_instance(rip);
1646                         method_remove_contract(rip, B_TRUE, B_TRUE);
1647 
1648                         scf_instance_destroy(rip->ri_m_inst);
1649                 }
1650 
1651                 (void) restarter_instance_update_states(h, rip,
1652                     RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1653                     reason);
1654                 return;
1655         }
1656 
1657         (void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1658             RESTARTER_STATE_MAINT, RERR_NONE, reason);
1659 
1660         log_transition(rip, MAINT_REQUESTED);
1661 
1662         info = startd_zalloc(sizeof (*info));
1663         info->sf_id = rip->ri_id;
1664         info->sf_method_type = METHOD_STOP;
1665         info->sf_event_type = RERR_RESTART;
1666         info->sf_reason = reason;
1667         rip->ri_method_thread = startd_thread_create(method_thread, info);
1668 }
1669 
1670 static void
1671 refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
1672 {
1673         scf_instance_t *inst;
1674         scf_snapshot_t *snap;
1675         fork_info_t *info;
1676         int r;
1677 
1678         assert(MUTEX_HELD(&rip->ri_lock));
1679 
1680         log_instance(rip, B_TRUE, "Rereading configuration.");
1681         log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
1682             rip->ri_i.i_fmri);
1683 
1684 rep_retry:
1685         r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
1686         switch (r) {
1687         case 0:
1688                 break;
1689 
1690         case ECONNABORTED:
1691                 libscf_handle_rebind(h);
1692                 goto rep_retry;
1693 
1694         case ENOENT:
1695                 /* Must have been deleted. */
1696                 return;
1697 
1698         case EINVAL:
1699         case ENOTSUP:
1700         default:
1701                 bad_error("libscf_fmri_get_instance", r);
1702         }
1703 
1704         snap = libscf_get_running_snapshot(inst);
1705 
1706         r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
1707             &rip->ri_utmpx_prefix);
1708         switch (r) {
1709         case 0:
1710                 log_framework(LOG_DEBUG, "%s is a %s-style service\n",
1711                     rip->ri_i.i_fmri, service_style(rip->ri_flags));
1712                 break;
1713 
1714         case ECONNABORTED:
1715                 scf_instance_destroy(inst);
1716                 scf_snapshot_destroy(snap);
1717                 libscf_handle_rebind(h);
1718                 goto rep_retry;
1719 
1720         case ECANCELED:
1721         case ENOENT:
1722                 /* Succeed in anticipation of REMOVE_INSTANCE. */
1723                 break;
1724 
1725         default:
1726                 bad_error("libscf_get_startd_properties", r);
1727         }
1728 
1729         if (instance_started(rip)) {
1730                 /* Refresh does not change the state. */
1731                 (void) restarter_instance_update_states(h, rip,
1732                     rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE,
1733                     restarter_str_refresh);
1734 
1735                 info = startd_zalloc(sizeof (*info));
1736                 info->sf_id = rip->ri_id;
1737                 info->sf_method_type = METHOD_REFRESH;
1738                 info->sf_event_type = RERR_REFRESH;
1739                 info->sf_reason = NULL;
1740 
1741                 assert(rip->ri_method_thread == 0);
1742                 rip->ri_method_thread =
1743                     startd_thread_create(method_thread, info);
1744         }
1745 
1746         scf_snapshot_destroy(snap);
1747         scf_instance_destroy(inst);
1748 }
1749 
1750 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1751         "ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1752         "ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1753         "ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1754         "INVALID_DEPENDENCY", "ADMIN_DISABLE", "STOP_RESET"
1755 };
1756 
1757 /*
1758  * void *restarter_process_events()
1759  *
1760  *   Called in a separate thread to process the events on an instance's
1761  *   queue.  Empties the queue completely, and tries to keep the thread
1762  *   around for a little while after the queue is empty to save on
1763  *   startup costs.
1764  */
1765 static void *
1766 restarter_process_events(void *arg)
1767 {
1768         scf_handle_t *h;
1769         restarter_instance_qentry_t *event;
1770         restarter_inst_t *rip;
1771         char *fmri = (char *)arg;
1772         struct timespec to;
1773 
1774         assert(fmri != NULL);
1775 
1776         h = libscf_handle_create_bound_loop();
1777 
1778         /* grab the queue lock */
1779         rip = inst_lookup_queue(fmri);
1780         if (rip == NULL)
1781                 goto out;
1782 
1783 again:
1784 
1785         while ((event = uu_list_first(rip->ri_queue)) != NULL) {
1786                 restarter_inst_t *inst;
1787 
1788                 /* drop the queue lock */
1789                 MUTEX_UNLOCK(&rip->ri_queue_lock);
1790 
1791                 /*
1792                  * Grab the inst lock -- this waits until any outstanding
1793                  * method finishes running.
1794                  */
1795                 inst = inst_lookup_by_name(fmri);
1796                 if (inst == NULL) {
1797                         /* Getting deleted in the middle isn't an error. */
1798                         goto cont;
1799                 }
1800 
1801                 assert(instance_in_transition(inst) == 0);
1802 
1803                 /* process the event */
1804                 switch (event->riq_type) {
1805                 case RESTARTER_EVENT_TYPE_ENABLE:
1806                 case RESTARTER_EVENT_TYPE_DISABLE:
1807                         (void) enable_inst(h, inst, event);
1808                         break;
1809 
1810                 case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1811                         if (enable_inst(h, inst, event) == 0)
1812                                 reset_start_times(inst);
1813                         break;
1814 
1815                 case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1816                         restarter_delete_inst(inst);
1817                         inst = NULL;
1818                         goto cont;
1819 
1820                 case RESTARTER_EVENT_TYPE_STOP_RESET:
1821                         reset_start_times(inst);
1822                         /* FALLTHROUGH */
1823                 case RESTARTER_EVENT_TYPE_STOP:
1824                         (void) stop_instance(h, inst, RSTOP_DEPENDENCY);
1825                         break;
1826 
1827                 case RESTARTER_EVENT_TYPE_START:
1828                         start_instance(h, inst, event->riq_reason);
1829                         break;
1830 
1831                 case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1832                         maintain_instance(h, inst, 0,
1833                             restarter_str_dependency_cycle);
1834                         break;
1835 
1836                 case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1837                         maintain_instance(h, inst, 0,
1838                             restarter_str_invalid_dependency);
1839                         break;
1840 
1841                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1842                         if (event_from_tty(h, inst) == 0)
1843                                 maintain_instance(h, inst, 0,
1844                                     restarter_str_service_request);
1845                         else
1846                                 maintain_instance(h, inst, 0,
1847                                     restarter_str_administrative_request);
1848                         break;
1849 
1850                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1851                         if (event_from_tty(h, inst) == 0)
1852                                 maintain_instance(h, inst, 1,
1853                                     restarter_str_service_request);
1854                         else
1855                                 maintain_instance(h, inst, 1,
1856                                     restarter_str_administrative_request);
1857                         break;
1858 
1859                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1860                         unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1861                         reset_start_times(inst);
1862                         break;
1863 
1864                 case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1865                         refresh_instance(h, inst);
1866                         break;
1867 
1868                 case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1869                         log_framework(LOG_WARNING, "Restarter: "
1870                             "%s command (for %s) unimplemented.\n",
1871                             event_names[event->riq_type], inst->ri_i.i_fmri);
1872                         break;
1873 
1874                 case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1875                         if (!instance_started(inst)) {
1876                                 log_framework(LOG_DEBUG, "Restarter: "
1877                                     "Not restarting %s; not running.\n",
1878                                     inst->ri_i.i_fmri);
1879                         } else {
1880                                 /*
1881                                  * Stop the instance.  If it can be restarted,
1882                                  * the graph engine will send a new event.
1883                                  */
1884                                 if (restart_dump(h, inst)) {
1885                                         (void) contract_kill(
1886                                             inst->ri_i.i_primary_ctid, SIGABRT,
1887                                             inst->ri_i.i_fmri);
1888                                 } else if (stop_instance(h, inst,
1889                                     RSTOP_RESTART) == 0) {
1890                                         reset_start_times(inst);
1891                                 }
1892                         }
1893                         break;
1894 
1895                 case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1896                 default:
1897 #ifndef NDEBUG
1898                         uu_warn("%s:%d: Bad restarter event %d.  "
1899                             "Aborting.\n", __FILE__, __LINE__, event->riq_type);
1900 #endif
1901                         abort();
1902                 }
1903 
1904                 assert(inst != NULL);
1905                 MUTEX_UNLOCK(&inst->ri_lock);
1906 
1907 cont:
1908                 /* grab the queue lock */
1909                 rip = inst_lookup_queue(fmri);
1910                 if (rip == NULL)
1911                         goto out;
1912 
1913                 /* delete the event */
1914                 uu_list_remove(rip->ri_queue, event);
1915                 startd_free(event, sizeof (restarter_instance_qentry_t));
1916         }
1917 
1918         assert(rip != NULL);
1919 
1920         /*
1921          * Try to preserve the thread for a little while for future use.
1922          */
1923         to.tv_sec = 3;
1924         to.tv_nsec = 0;
1925         (void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
1926             &rip->ri_queue_lock, &to);
1927 
1928         if (uu_list_first(rip->ri_queue) != NULL)
1929                 goto again;
1930 
1931         rip->ri_queue_thread = 0;
1932         MUTEX_UNLOCK(&rip->ri_queue_lock);
1933 
1934 out:
1935         (void) scf_handle_unbind(h);
1936         scf_handle_destroy(h);
1937         free(fmri);
1938         return (NULL);
1939 }
1940 
1941 static int
1942 is_admin_event(restarter_event_type_t t) {
1943 
1944         switch (t) {
1945         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1946         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1947         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1948         case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1949         case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1950         case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1951                 return (1);
1952         default:
1953                 return (0);
1954         }
1955 }
1956 
1957 static void
1958 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
1959 {
1960         restarter_instance_qentry_t *qe;
1961         int r;
1962 
1963         assert(MUTEX_HELD(&ri->ri_queue_lock));
1964         assert(!MUTEX_HELD(&ri->ri_lock));
1965 
1966         qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
1967         qe->riq_type = e->rpe_type;
1968         qe->riq_reason = e->rpe_reason;
1969 
1970         uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
1971         r = uu_list_insert_before(ri->ri_queue, NULL, qe);
1972         assert(r == 0);
1973 }
1974 
1975 /*
1976  * void *restarter_event_thread()
1977  *
1978  *  Handle incoming graph events by placing them on a per-instance
1979  *  queue.  We can't lock the main part of the instance structure, so
1980  *  just modify the seprarately locked event queue portion.
1981  */
1982 /*ARGSUSED*/
1983 static void *
1984 restarter_event_thread(void *unused)
1985 {
1986         scf_handle_t *h;
1987 
1988         /*
1989          * This is a new thread, and thus, gets its own handle
1990          * to the repository.
1991          */
1992         h = libscf_handle_create_bound_loop();
1993 
1994         MUTEX_LOCK(&ru->restarter_update_lock);
1995 
1996         /*CONSTCOND*/
1997         while (1) {
1998                 restarter_protocol_event_t *e;
1999 
2000                 while (ru->restarter_update_wakeup == 0)
2001                         (void) pthread_cond_wait(&ru->restarter_update_cv,
2002                             &ru->restarter_update_lock);
2003 
2004                 ru->restarter_update_wakeup = 0;
2005 
2006                 while ((e = restarter_event_dequeue()) != NULL) {
2007                         restarter_inst_t *rip;
2008                         char *fmri;
2009 
2010                         MUTEX_UNLOCK(&ru->restarter_update_lock);
2011 
2012                         /*
2013                          * ADD_INSTANCE is special: there's likely no
2014                          * instance structure yet, so we need to handle the
2015                          * addition synchronously.
2016                          */
2017                         switch (e->rpe_type) {
2018                         case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
2019                                 if (restarter_insert_inst(h, e->rpe_inst) != 0)
2020                                         log_error(LOG_INFO, "Restarter: "
2021                                             "Could not add %s.\n", e->rpe_inst);
2022 
2023                                 MUTEX_LOCK(&st->st_load_lock);
2024                                 if (--st->st_load_instances == 0)
2025                                         (void) pthread_cond_broadcast(
2026                                             &st->st_load_cv);
2027                                 MUTEX_UNLOCK(&st->st_load_lock);
2028 
2029                                 goto nolookup;
2030                         }
2031 
2032                         /*
2033                          * Lookup the instance, locking only the event queue.
2034                          * Can't grab ri_lock here because it might be held
2035                          * by a long-running method.
2036                          */
2037                         rip = inst_lookup_queue(e->rpe_inst);
2038                         if (rip == NULL) {
2039                                 log_error(LOG_INFO, "Restarter: "
2040                                     "Ignoring %s command for unknown service "
2041                                     "%s.\n", event_names[e->rpe_type],
2042                                     e->rpe_inst);
2043                                 goto nolookup;
2044                         }
2045 
2046                         /* Keep ADMIN events from filling up the queue. */
2047                         if (is_admin_event(e->rpe_type) &&
2048                             uu_list_numnodes(rip->ri_queue) >
2049                             RINST_QUEUE_THRESHOLD) {
2050                                 MUTEX_UNLOCK(&rip->ri_queue_lock);
2051                                 log_instance(rip, B_TRUE, "Instance event "
2052                                     "queue overflow.  Dropping administrative "
2053                                     "request.");
2054                                 log_framework(LOG_DEBUG, "%s: Instance event "
2055                                     "queue overflow.  Dropping administrative "
2056                                     "request.\n", rip->ri_i.i_fmri);
2057                                 goto nolookup;
2058                         }
2059 
2060                         /* Now add the event to the instance queue. */
2061                         restarter_queue_event(rip, e);
2062 
2063                         if (rip->ri_queue_thread == 0) {
2064                                 /*
2065                                  * Start a thread if one isn't already
2066                                  * running.
2067                                  */
2068                                 fmri = safe_strdup(e->rpe_inst);
2069                                 rip->ri_queue_thread =  startd_thread_create(
2070                                     restarter_process_events, (void *)fmri);
2071                         } else {
2072                                 /*
2073                                  * Signal the existing thread that there's
2074                                  * a new event.
2075                                  */
2076                                 (void) pthread_cond_broadcast(
2077                                     &rip->ri_queue_cv);
2078                         }
2079 
2080                         MUTEX_UNLOCK(&rip->ri_queue_lock);
2081 nolookup:
2082                         restarter_event_release(e);
2083 
2084                         MUTEX_LOCK(&ru->restarter_update_lock);
2085                 }
2086         }
2087 
2088         /*
2089          * Unreachable for now -- there's currently no graceful cleanup
2090          * called on exit().
2091          */
2092         (void) scf_handle_unbind(h);
2093         scf_handle_destroy(h);
2094         return (NULL);
2095 }
2096 
2097 static restarter_inst_t *
2098 contract_to_inst(ctid_t ctid)
2099 {
2100         restarter_inst_t *inst;
2101         int id;
2102 
2103         id = lookup_inst_by_contract(ctid);
2104         if (id == -1)
2105                 return (NULL);
2106 
2107         inst = inst_lookup_by_id(id);
2108         if (inst != NULL) {
2109                 /*
2110                  * Since ri_lock isn't held by the contract id lookup, this
2111                  * instance may have been restarted and now be in a new
2112                  * contract, making the old contract no longer valid for this
2113                  * instance.
2114                  */
2115                 if (ctid != inst->ri_i.i_primary_ctid) {
2116                         MUTEX_UNLOCK(&inst->ri_lock);
2117                         inst = NULL;
2118                 }
2119         }
2120         return (inst);
2121 }
2122 
2123 /*
2124  * void contract_action()
2125  *   Take action on contract events.
2126  */
2127 static void
2128 contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
2129     uint32_t type)
2130 {
2131         const char *fmri = inst->ri_i.i_fmri;
2132 
2133         assert(MUTEX_HELD(&inst->ri_lock));
2134 
2135         /*
2136          * If startd has stopped this contract, there is no need to
2137          * stop it again.
2138          */
2139         if (inst->ri_i.i_primary_ctid > 0 &&
2140             inst->ri_i.i_primary_ctid_stopped)
2141                 return;
2142 
2143         if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
2144             | CT_PR_EV_HWERR)) == 0) {
2145                 /*
2146                  * There shouldn't be other events, since that's not how we set
2147                  * the terms. Thus, just log an error and drive on.
2148                  */
2149                 log_framework(LOG_NOTICE,
2150                     "%s: contract %ld received unexpected critical event "
2151                     "(%d)\n", fmri, id, type);
2152                 return;
2153         }
2154 
2155         assert(instance_in_transition(inst) == 0);
2156 
2157         if (instance_is_wait_style(inst)) {
2158                 /*
2159                  * We ignore all events; if they impact the
2160                  * process we're monitoring, then the
2161                  * wait_thread will stop the instance.
2162                  */
2163                 log_framework(LOG_DEBUG,
2164                     "%s: ignoring contract event on wait-style service\n",
2165                     fmri);
2166         } else {
2167                 /*
2168                  * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
2169                  */
2170                 switch (type) {
2171                 case CT_PR_EV_EMPTY:
2172                         (void) stop_instance(h, inst, RSTOP_EXIT);
2173                         break;
2174                 case CT_PR_EV_CORE:
2175                         (void) stop_instance(h, inst, RSTOP_CORE);
2176                         break;
2177                 case CT_PR_EV_SIGNAL:
2178                         (void) stop_instance(h, inst, RSTOP_SIGNAL);
2179                         break;
2180                 case CT_PR_EV_HWERR:
2181                         (void) stop_instance(h, inst, RSTOP_HWERR);
2182                         break;
2183                 }
2184         }
2185 }
2186 
2187 /*
2188  * void *restarter_contract_event_thread(void *)
2189  *   Listens to the process contract bundle for critical events, taking action
2190  *   on events from contracts we know we are responsible for.
2191  */
2192 /*ARGSUSED*/
2193 static void *
2194 restarter_contracts_event_thread(void *unused)
2195 {
2196         int fd, err;
2197         scf_handle_t *local_handle;
2198 
2199         /*
2200          * Await graph load completion.  That is, stop here, until we've scanned
2201          * the repository for contract - instance associations.
2202          */
2203         MUTEX_LOCK(&st->st_load_lock);
2204         while (!(st->st_load_complete && st->st_load_instances == 0))
2205                 (void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
2206         MUTEX_UNLOCK(&st->st_load_lock);
2207 
2208         /*
2209          * This is a new thread, and thus, gets its own handle
2210          * to the repository.
2211          */
2212         if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
2213                 uu_die("Unable to bind a new repository handle: %s\n",
2214                     scf_strerror(scf_error()));
2215 
2216         fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
2217         if (fd == -1)
2218                 uu_die("process bundle open failed");
2219 
2220         /*
2221          * Make sure we get all events (including those generated by configd
2222          * before this thread was started).
2223          */
2224         err = ct_event_reset(fd);
2225         assert(err == 0);
2226 
2227         for (;;) {
2228                 int efd, sfd;
2229                 ct_evthdl_t ev;
2230                 uint32_t type;
2231                 ctevid_t evid;
2232                 ct_stathdl_t status;
2233                 ctid_t ctid;
2234                 restarter_inst_t *inst;
2235                 uint64_t cookie;
2236 
2237                 if (err = ct_event_read_critical(fd, &ev)) {
2238                         log_error(LOG_WARNING,
2239                             "Error reading next contract event: %s",
2240                             strerror(err));
2241                         continue;
2242                 }
2243 
2244                 evid = ct_event_get_evid(ev);
2245                 ctid = ct_event_get_ctid(ev);
2246                 type = ct_event_get_type(ev);
2247 
2248                 /* Fetch cookie. */
2249                 if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
2250                     < 0) {
2251                         ct_event_free(ev);
2252                         continue;
2253                 }
2254 
2255                 if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
2256                         log_framework(LOG_WARNING, "Could not get status for "
2257                             "contract %ld: %s\n", ctid, strerror(err));
2258 
2259                         startd_close(sfd);
2260                         ct_event_free(ev);
2261                         continue;
2262                 }
2263 
2264                 cookie = ct_status_get_cookie(status);
2265 
2266                 log_framework(LOG_DEBUG, "Received event %d for ctid %ld "
2267                     "cookie %lld\n", type, ctid, cookie);
2268 
2269                 ct_status_free(status);
2270 
2271                 startd_close(sfd);
2272 
2273                 /*
2274                  * svc.configd(1M) restart handling performed by the
2275                  * fork_configd_thread.  We don't acknowledge, as that thread
2276                  * will do so.
2277                  */
2278                 if (cookie == CONFIGD_COOKIE) {
2279                         ct_event_free(ev);
2280                         continue;
2281                 }
2282 
2283                 inst = NULL;
2284                 if (storing_contract != 0 &&
2285                     (inst = contract_to_inst(ctid)) == NULL) {
2286                         /*
2287                          * This can happen for two reasons:
2288                          * - method_run() has not yet stored the
2289                          *    the contract into the internal hash table.
2290                          * - we receive an EMPTY event for an abandoned
2291                          *    contract.
2292                          * If there is any contract in the process of
2293                          * being stored into the hash table then re-read
2294                          * the event later.
2295                          */
2296                         log_framework(LOG_DEBUG,
2297                             "Reset event %d for unknown "
2298                             "contract id %ld\n", type, ctid);
2299 
2300                         /* don't go too fast */
2301                         (void) poll(NULL, 0, 100);
2302 
2303                         (void) ct_event_reset(fd);
2304                         ct_event_free(ev);
2305                         continue;
2306                 }
2307 
2308                 /*
2309                  * Do not call contract_to_inst() again if first
2310                  * call succeeded.
2311                  */
2312                 if (inst == NULL)
2313                         inst = contract_to_inst(ctid);
2314                 if (inst == NULL) {
2315                         /*
2316                          * This can happen if we receive an EMPTY
2317                          * event for an abandoned contract.
2318                          */
2319                         log_framework(LOG_DEBUG,
2320                             "Received event %d for unknown contract id "
2321                             "%ld\n", type, ctid);
2322                 } else {
2323                         log_framework(LOG_DEBUG,
2324                             "Received event %d for contract id "
2325                             "%ld (%s)\n", type, ctid,
2326                             inst->ri_i.i_fmri);
2327 
2328                         contract_action(local_handle, inst, ctid, type);
2329 
2330                         MUTEX_UNLOCK(&inst->ri_lock);
2331                 }
2332 
2333                 efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
2334                     O_WRONLY);
2335                 if (efd != -1) {
2336                         (void) ct_ctl_ack(efd, evid);
2337                         startd_close(efd);
2338                 }
2339 
2340                 ct_event_free(ev);
2341 
2342         }
2343 
2344         /*NOTREACHED*/
2345         return (NULL);
2346 }
2347 
2348 /*
2349  * Timeout queue, processed by restarter_timeouts_event_thread().
2350  */
2351 timeout_queue_t *timeouts;
2352 static uu_list_pool_t *timeout_pool;
2353 
2354 typedef struct timeout_update {
2355         pthread_mutex_t         tu_lock;
2356         pthread_cond_t          tu_cv;
2357         int                     tu_wakeup;
2358 } timeout_update_t;
2359 
2360 timeout_update_t *tu;
2361 
2362 static const char *timeout_ovr_svcs[] = {
2363         "svc:/system/manifest-import:default",
2364         "svc:/network/initial:default",
2365         "svc:/network/service:default",
2366         "svc:/system/rmtmpfiles:default",
2367         "svc:/network/loopback:default",
2368         "svc:/network/physical:default",
2369         "svc:/system/device/local:default",
2370         "svc:/system/filesystem/usr:default",
2371         "svc:/system/filesystem/minimal:default",
2372         "svc:/system/filesystem/local:default",
2373         NULL
2374 };
2375 
2376 int
2377 is_timeout_ovr(restarter_inst_t *inst)
2378 {
2379         int i;
2380 
2381         for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
2382                 if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
2383                         log_instance(inst, B_TRUE, "Timeout override by "
2384                             "svc.startd.  Using infinite timeout.");
2385                         return (1);
2386                 }
2387         }
2388 
2389         return (0);
2390 }
2391 
2392 /*ARGSUSED*/
2393 static int
2394 timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
2395 {
2396         hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
2397         hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
2398 
2399         if (t1 > t2)
2400                 return (1);
2401         else if (t1 < t2)
2402                 return (-1);
2403         return (0);
2404 }
2405 
2406 void
2407 timeout_init()
2408 {
2409         timeouts = startd_zalloc(sizeof (timeout_queue_t));
2410 
2411         (void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
2412 
2413         timeout_pool = startd_list_pool_create("timeouts",
2414             sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
2415             timeout_compare, UU_LIST_POOL_DEBUG);
2416         assert(timeout_pool != NULL);
2417 
2418         timeouts->tq_list = startd_list_create(timeout_pool,
2419             timeouts, UU_LIST_SORTED);
2420         assert(timeouts->tq_list != NULL);
2421 
2422         tu = startd_zalloc(sizeof (timeout_update_t));
2423         (void) pthread_cond_init(&tu->tu_cv, NULL);
2424         (void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
2425 }
2426 
2427 void
2428 timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
2429 {
2430         hrtime_t now, timeout;
2431         timeout_entry_t *entry;
2432         uu_list_index_t idx;
2433 
2434         assert(MUTEX_HELD(&inst->ri_lock));
2435 
2436         now = gethrtime();
2437 
2438         /*
2439          * If we overflow LLONG_MAX, we're never timing out anyways, so
2440          * just return.
2441          */
2442         if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
2443                 log_instance(inst, B_TRUE, "timeout_seconds too large, "
2444                     "treating as infinite.");
2445                 return;
2446         }
2447 
2448         /* hrtime is in nanoseconds. Convert timeout_sec. */
2449         timeout = now + (timeout_sec * 1000000000LL);
2450 
2451         entry = startd_alloc(sizeof (timeout_entry_t));
2452         entry->te_timeout = timeout;
2453         entry->te_ctid = cid;
2454         entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
2455         entry->te_logstem = safe_strdup(inst->ri_logstem);
2456         entry->te_fired = 0;
2457         /* Insert the calculated timeout time onto the queue. */
2458         MUTEX_LOCK(&timeouts->tq_lock);
2459         (void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
2460         uu_list_node_init(entry, &entry->te_link, timeout_pool);
2461         uu_list_insert(timeouts->tq_list, entry, idx);
2462         MUTEX_UNLOCK(&timeouts->tq_lock);
2463 
2464         assert(inst->ri_timeout == NULL);
2465         inst->ri_timeout = entry;
2466 
2467         MUTEX_LOCK(&tu->tu_lock);
2468         tu->tu_wakeup = 1;
2469         (void) pthread_cond_broadcast(&tu->tu_cv);
2470         MUTEX_UNLOCK(&tu->tu_lock);
2471 }
2472 
2473 
2474 void
2475 timeout_remove(restarter_inst_t *inst, ctid_t cid)
2476 {
2477         assert(MUTEX_HELD(&inst->ri_lock));
2478 
2479         if (inst->ri_timeout == NULL)
2480                 return;
2481 
2482         assert(inst->ri_timeout->te_ctid == cid);
2483 
2484         MUTEX_LOCK(&timeouts->tq_lock);
2485         uu_list_remove(timeouts->tq_list, inst->ri_timeout);
2486         MUTEX_UNLOCK(&timeouts->tq_lock);
2487 
2488         free(inst->ri_timeout->te_fmri);
2489         free(inst->ri_timeout->te_logstem);
2490         startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
2491         inst->ri_timeout = NULL;
2492 }
2493 
2494 static int
2495 timeout_now()
2496 {
2497         timeout_entry_t *e;
2498         hrtime_t now;
2499         int ret;
2500 
2501         now = gethrtime();
2502 
2503         /*
2504          * Walk through the (sorted) timeouts list.  While the timeout
2505          * at the head of the list is <= the current time, kill the
2506          * method.
2507          */
2508         MUTEX_LOCK(&timeouts->tq_lock);
2509 
2510         for (e = uu_list_first(timeouts->tq_list);
2511             e != NULL && e->te_timeout <= now;
2512             e = uu_list_next(timeouts->tq_list, e)) {
2513                 log_framework(LOG_WARNING, "%s: Method or service exit timed "
2514                     "out.  Killing contract %ld.\n", e->te_fmri, e->te_ctid);
2515                 log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
2516                     "Method or service exit timed out.  Killing contract %ld.",
2517                     e->te_ctid);
2518                 e->te_fired = 1;
2519                 (void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
2520         }
2521 
2522         if (uu_list_numnodes(timeouts->tq_list) > 0)
2523                 ret = 0;
2524         else
2525                 ret = -1;
2526 
2527         MUTEX_UNLOCK(&timeouts->tq_lock);
2528 
2529         return (ret);
2530 }
2531 
2532 /*
2533  * void *restarter_timeouts_event_thread(void *)
2534  *   Responsible for monitoring the method timeouts.  This thread must
2535  *   be started before any methods are called.
2536  */
2537 /*ARGSUSED*/
2538 static void *
2539 restarter_timeouts_event_thread(void *unused)
2540 {
2541         /*
2542          * Timeouts are entered on a priority queue, which is processed by
2543          * this thread.  As timeouts are specified in seconds, we'll do
2544          * the necessary processing every second, as long as the queue
2545          * is not empty.
2546          */
2547 
2548         /*CONSTCOND*/
2549         while (1) {
2550                 /*
2551                  * As long as the timeout list isn't empty, process it
2552                  * every second.
2553                  */
2554                 if (timeout_now() == 0) {
2555                         (void) sleep(1);
2556                         continue;
2557                 }
2558 
2559                 /* The list is empty, wait until we have more timeouts. */
2560                 MUTEX_LOCK(&tu->tu_lock);
2561 
2562                 while (tu->tu_wakeup == 0)
2563                         (void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
2564 
2565                 tu->tu_wakeup = 0;
2566                 MUTEX_UNLOCK(&tu->tu_lock);
2567         }
2568 
2569         return (NULL);
2570 }
2571 
2572 void
2573 restarter_start()
2574 {
2575         (void) startd_thread_create(restarter_timeouts_event_thread, NULL);
2576         (void) startd_thread_create(restarter_event_thread, NULL);
2577         (void) startd_thread_create(restarter_contracts_event_thread, NULL);
2578         (void) startd_thread_create(wait_thread, NULL);
2579 }
2580 
2581 
2582 void
2583 restarter_init()
2584 {
2585         restarter_instance_pool = startd_list_pool_create("restarter_instances",
2586             sizeof (restarter_inst_t), offsetof(restarter_inst_t,
2587             ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
2588         (void) memset(&instance_list, 0, sizeof (instance_list));
2589 
2590         (void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
2591         instance_list.ril_instance_list = startd_list_create(
2592             restarter_instance_pool, &instance_list, UU_LIST_SORTED);
2593 
2594         restarter_queue_pool = startd_list_pool_create(
2595             "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
2596             offsetof(restarter_instance_qentry_t,  riq_link), NULL,
2597             UU_LIST_POOL_DEBUG);
2598 
2599         contract_list_pool = startd_list_pool_create(
2600             "contract_list", sizeof (contract_entry_t),
2601             offsetof(contract_entry_t,  ce_link), NULL,
2602             UU_LIST_POOL_DEBUG);
2603         contract_hash_init();
2604 
2605         log_framework(LOG_DEBUG, "Initialized restarter\n");
2606 }