1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  *
  25  * Copyright 2016 RackTop Systems.
  26  */
  27 
  28 
  29 /*
  30  * transition.c - Graph State Machine
  31  *
  32  * The graph state machine is implemented here, with a typical approach
  33  * of a function per state.  Separating the implementation allows more
  34  * clarity into the actions taken on notification of state change, as well
  35  * as a place for future expansion including hooks for configurable actions.
  36  * All functions are called with dgraph_lock held.
  37  *
  38  * The start action for this state machine is not explicit.  The states
  39  * (ONLINE and DEGRADED) which need to know when they're entering the state
  40  * due to a daemon restart implement this understanding by checking for
  41  * transition from uninitialized.  In the future, this would likely be better
  42  * as an explicit start action instead of relying on an overloaded transition.
  43  *
  44  * All gt_enter functions use the same set of return codes.
  45  *    0              success
  46  *    ECONNABORTED   repository connection aborted
  47  */
  48 
  49 #include "startd.h"
  50 
  51 static int
  52 gt_running(restarter_instance_state_t state)
  53 {
  54         if (state == RESTARTER_STATE_ONLINE ||
  55             state == RESTARTER_STATE_DEGRADED)
  56                 return (1);
  57 
  58         return (0);
  59 }
  60 
  61 static int
  62 gt_enter_uninit(scf_handle_t *h, graph_vertex_t *v,
  63     restarter_instance_state_t old_state, restarter_error_t rerr)
  64 {
  65         int err;
  66         scf_instance_t *inst;
  67 
  68         /* Initialize instance by refreshing it. */
  69 
  70         err = libscf_fmri_get_instance(h, v->gv_name, &inst);
  71         switch (err) {
  72         case 0:
  73                 break;
  74 
  75         case ECONNABORTED:
  76                 return (ECONNABORTED);
  77 
  78         case ENOENT:
  79                 return (0);
  80 
  81         case EINVAL:
  82         case ENOTSUP:
  83         default:
  84                 bad_error("libscf_fmri_get_instance", err);
  85         }
  86 
  87         err = refresh_vertex(v, inst);
  88         if (err == 0)
  89                 graph_enable_by_vertex(v, v->gv_flags & GV_ENABLED, 0);
  90 
  91         scf_instance_destroy(inst);
  92 
  93         /* If the service was running, propagate a stop event. */
  94         if (gt_running(old_state)) {
  95                 log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
  96                     v->gv_name);
  97 
  98                 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
  99         }
 100 
 101         graph_transition_sulogin(RESTARTER_STATE_UNINIT, old_state);
 102         return (0);
 103 }
 104 
 105 /* ARGSUSED */
 106 static int
 107 gt_enter_maint(scf_handle_t *h, graph_vertex_t *v,
 108     restarter_instance_state_t old_state, restarter_error_t rerr)
 109 {
 110         int to_offline = v->gv_flags & GV_TOOFFLINE;
 111 
 112         /*
 113          * If the service was running, propagate a stop event.  If the
 114          * service was not running the maintenance transition may satisfy
 115          * optional dependencies and should be propagated to determine
 116          * whether new dependents are satisfiable.
 117          * Instances that transition to maintenance and have the GV_TOOFFLINE
 118          * flag are special because they can expose new subtree leaves so
 119          * propagate the offline to the instance dependencies.
 120          */
 121 
 122         /* instance transitioning to maintenance is considered disabled */
 123         v->gv_flags &= ~GV_TODISABLE;
 124         v->gv_flags &= ~GV_TOOFFLINE;
 125 
 126         if (gt_running(old_state)) {
 127                 /*
 128                  * Handle state change during instance disabling.
 129                  * Propagate offline to the new exposed leaves.
 130                  */
 131                 if (to_offline) {
 132                         log_framework(LOG_DEBUG, "%s removed from subtree\n",
 133                             v->gv_name);
 134 
 135                         graph_offline_subtree_leaves(v, (void *)h);
 136                 }
 137 
 138                 log_framework(LOG_DEBUG, "Propagating maintenance (stop) of "
 139                     "%s.\n", v->gv_name);
 140 
 141                 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
 142 
 143                 /*
 144                  * The maintenance transition may satisfy optional_all/restart
 145                  * dependencies and should be propagated to determine
 146                  * whether new dependents are satisfiable.
 147                  */
 148                 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
 149         } else {
 150                 log_framework(LOG_DEBUG, "Propagating maintenance of %s.\n",
 151                     v->gv_name);
 152 
 153                 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
 154         }
 155 
 156         graph_transition_sulogin(RESTARTER_STATE_MAINT, old_state);
 157         return (0);
 158 }
 159 
 160 /* ARGSUSED */
 161 static int
 162 gt_enter_offline(scf_handle_t *h, graph_vertex_t *v,
 163     restarter_instance_state_t old_state, restarter_error_t rerr)
 164 {
 165         int to_offline = v->gv_flags & GV_TOOFFLINE;
 166 
 167         v->gv_flags &= ~GV_TOOFFLINE;
 168 
 169         /*
 170          * If the instance should be enabled, see if we can start it.
 171          * Otherwise send a disable command.
 172          * If a instance has the GV_TOOFFLINE flag set then it must
 173          * remains offline until the disable process completes.
 174          */
 175         if (v->gv_flags & GV_ENABLED) {
 176                 if (to_offline == 0)
 177                         graph_start_if_satisfied(v);
 178         } else {
 179                 if (gt_running(old_state) && v->gv_post_disable_f)
 180                         v->gv_post_disable_f();
 181 
 182                 vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
 183         }
 184 
 185         /*
 186          * If the service was running, propagate a stop event.  If the
 187          * service was not running the offline transition may satisfy
 188          * optional dependencies and should be propagated to determine
 189          * whether new dependents are satisfiable.
 190          * Instances that transition to offline and have the GV_TOOFFLINE flag
 191          * are special because they can expose new subtree leaves so propagate
 192          * the offline to the instance dependencies.
 193          */
 194         if (gt_running(old_state)) {
 195                 /*
 196                  * Handle state change during instance disabling.
 197                  * Propagate offline to the new exposed leaves.
 198                  */
 199                 if (to_offline) {
 200                         log_framework(LOG_DEBUG, "%s removed from subtree\n",
 201                             v->gv_name);
 202 
 203                         graph_offline_subtree_leaves(v, (void *)h);
 204                 }
 205 
 206                 log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
 207                     v->gv_name);
 208 
 209                 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
 210 
 211                 /*
 212                  * The offline transition may satisfy require_any/restart
 213                  * dependencies and should be propagated to determine
 214                  * whether new dependents are satisfiable.
 215                  */
 216                 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
 217         } else {
 218                 log_framework(LOG_DEBUG, "Propagating offline of %s.\n",
 219                     v->gv_name);
 220 
 221                 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
 222         }
 223 
 224         graph_transition_sulogin(RESTARTER_STATE_OFFLINE, old_state);
 225         return (0);
 226 }
 227 
 228 /* ARGSUSED */
 229 static int
 230 gt_enter_disabled(scf_handle_t *h, graph_vertex_t *v,
 231     restarter_instance_state_t old_state, restarter_error_t rerr)
 232 {
 233         int to_offline = v->gv_flags & GV_TOOFFLINE;
 234 
 235         v->gv_flags &= ~GV_TODISABLE;
 236         v->gv_flags &= ~GV_TOOFFLINE;
 237 
 238         /*
 239          * If the instance should be disabled, no problem.  Otherwise,
 240          * send an enable command, which should result in the instance
 241          * moving to OFFLINE unless the instance is part of a subtree
 242          * (non root) and in this case the result is unpredictable.
 243          */
 244         if (v->gv_flags & GV_ENABLED) {
 245                 vertex_send_event(v, RESTARTER_EVENT_TYPE_ENABLE);
 246         } else if (gt_running(old_state) && v->gv_post_disable_f) {
 247                 v->gv_post_disable_f();
 248         }
 249 
 250         /*
 251          * If the service was running, propagate this as a stop.  If the
 252          * service was not running the disabled transition may satisfy
 253          * optional dependencies and should be propagated to determine
 254          * whether new dependents are satisfiable.
 255          */
 256         if (gt_running(old_state)) {
 257                 /*
 258                  * We need to propagate the offline to new exposed leaves in
 259                  * case we've just disabled an instance that was part of a
 260                  * subtree.
 261                  */
 262                 if (to_offline) {
 263                         log_framework(LOG_DEBUG, "%s removed from subtree\n",
 264                             v->gv_name);
 265 
 266                         /*
 267                          * Handle state change during instance disabling.
 268                          * Propagate offline to the new exposed leaves.
 269                          */
 270                         graph_offline_subtree_leaves(v, (void *)h);
 271                 }
 272 
 273 
 274                 log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
 275                     v->gv_name);
 276 
 277                 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
 278 
 279                 /*
 280                  * The disable transition may satisfy optional_all/restart
 281                  * dependencies and should be propagated to determine
 282                  * whether new dependents are satisfiable.
 283                  */
 284                 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
 285         } else {
 286                 log_framework(LOG_DEBUG, "Propagating disable of %s.\n",
 287                     v->gv_name);
 288 
 289                 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
 290         }
 291 
 292         graph_transition_sulogin(RESTARTER_STATE_DISABLED, old_state);
 293         return (0);
 294 }
 295 
 296 static int
 297 gt_internal_online_or_degraded(scf_handle_t *h, graph_vertex_t *v,
 298     restarter_instance_state_t old_state, restarter_error_t rerr)
 299 {
 300         int r;
 301 
 302         /*
 303          * If the instance has just come up, update the start
 304          * snapshot.
 305          */
 306         if (gt_running(old_state) == 0) {
 307                 /*
 308                  * Don't fire if we're just recovering state
 309                  * after a restart.
 310                  */
 311                 if (old_state != RESTARTER_STATE_UNINIT &&
 312                     v->gv_post_online_f)
 313                         v->gv_post_online_f();
 314 
 315                 r = libscf_snapshots_poststart(h, v->gv_name, B_TRUE);
 316                 switch (r) {
 317                 case 0:
 318                 case ENOENT:
 319                         /*
 320                          * If ENOENT, the instance must have been
 321                          * deleted.  Pretend we were successful since
 322                          * we should get a delete event later.
 323                          */
 324                         break;
 325 
 326                 case ECONNABORTED:
 327                         return (ECONNABORTED);
 328 
 329                 case EACCES:
 330                 case ENOTSUP:
 331                 default:
 332                         bad_error("libscf_snapshots_poststart", r);
 333                 }
 334         }
 335 
 336         if (!(v->gv_flags & GV_ENABLED)) {
 337                 vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
 338         } else if (v->gv_flags & GV_TOOFFLINE) {
 339                 /*
 340                  * If the vertex has the GV_TOOFFLINE flag set then that's
 341                  * because the instance was transitioning from offline to
 342                  * online and the reverse disable algorithm doesn't offline
 343                  * those instances because it was already appearing offline.
 344                  * Try to do it now.
 345                  */
 346                 if (insubtree_dependents_down(v))
 347                         offline_vertex(v);
 348         }
 349 
 350         if (gt_running(old_state) == 0) {
 351                 log_framework(LOG_DEBUG, "Propagating start of %s.\n",
 352                     v->gv_name);
 353 
 354                 graph_transition_propagate(v, PROPAGATE_START, rerr);
 355         } else if (rerr == RERR_REFRESH) {
 356                 /* For refresh we'll get a message sans state change */
 357 
 358                 log_framework(LOG_DEBUG, "Propagating refresh of %s.\n",
 359                     v->gv_name);
 360 
 361                 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
 362         }
 363 
 364         return (0);
 365 }
 366 
 367 static int
 368 gt_enter_online(scf_handle_t *h, graph_vertex_t *v,
 369     restarter_instance_state_t old_state, restarter_error_t rerr)
 370 {
 371         int r;
 372 
 373         r = gt_internal_online_or_degraded(h, v, old_state, rerr);
 374         if (r != 0)
 375                 return (r);
 376 
 377         graph_transition_sulogin(RESTARTER_STATE_ONLINE, old_state);
 378         return (0);
 379 }
 380 
 381 static int
 382 gt_enter_degraded(scf_handle_t *h, graph_vertex_t *v,
 383     restarter_instance_state_t old_state, restarter_error_t rerr)
 384 {
 385         int r;
 386 
 387         r = gt_internal_online_or_degraded(h, v, old_state, rerr);
 388         if (r != 0)
 389                 return (r);
 390 
 391         graph_transition_sulogin(RESTARTER_STATE_DEGRADED, old_state);
 392         return (0);
 393 }
 394 
 395 /*
 396  * gt_transition() implements the state transition for the graph
 397  * state machine.  It can return:
 398  *    0              success
 399  *    ECONNABORTED   repository connection aborted
 400  *
 401  * v->gv_state should be set to the state we're transitioning to before
 402  * calling this function.
 403  */
 404 int
 405 gt_transition(scf_handle_t *h, graph_vertex_t *v, restarter_error_t rerr,
 406     restarter_instance_state_t old_state)
 407 {
 408         int err;
 409         int lost_repository = 0;
 410 
 411         /*
 412          * If there's a common set of work to be done on exit from the
 413          * old_state, include it as a separate set of functions here.  For
 414          * now there's no such work, so there are no gt_exit functions.
 415          */
 416 
 417         err = vertex_subgraph_dependencies_shutdown(h, v, old_state);
 418         switch (err) {
 419         case 0:
 420                 break;
 421 
 422         case ECONNABORTED:
 423                 lost_repository = 1;
 424                 break;
 425 
 426         default:
 427                 bad_error("vertex_subgraph_dependencies_shutdown", err);
 428         }
 429 
 430         /*
 431          * Now call the appropriate gt_enter function for the new state.
 432          */
 433         switch (v->gv_state) {
 434         case RESTARTER_STATE_UNINIT:
 435                 err = gt_enter_uninit(h, v, old_state, rerr);
 436                 break;
 437 
 438         case RESTARTER_STATE_DISABLED:
 439                 err = gt_enter_disabled(h, v, old_state, rerr);
 440                 break;
 441 
 442         case RESTARTER_STATE_OFFLINE:
 443                 err = gt_enter_offline(h, v, old_state, rerr);
 444                 break;
 445 
 446         case RESTARTER_STATE_ONLINE:
 447                 err = gt_enter_online(h, v, old_state, rerr);
 448                 break;
 449 
 450         case RESTARTER_STATE_DEGRADED:
 451                 err = gt_enter_degraded(h, v, old_state, rerr);
 452                 break;
 453 
 454         case RESTARTER_STATE_MAINT:
 455                 err = gt_enter_maint(h, v, old_state, rerr);
 456                 break;
 457 
 458         default:
 459                 /* Shouldn't be in an invalid state. */
 460 #ifndef NDEBUG
 461                 uu_warn("%s:%d: Invalid state %d.\n", __FILE__, __LINE__,
 462                     v->gv_state);
 463 #endif
 464                 abort();
 465         }
 466 
 467         switch (err) {
 468         case 0:
 469                 break;
 470 
 471         case ECONNABORTED:
 472                 lost_repository = 1;
 473                 break;
 474 
 475         default:
 476 #ifndef NDEBUG
 477                 uu_warn("%s:%d: "
 478                     "gt_enter_%s() failed with unexpected error %d.\n",
 479                     __FILE__, __LINE__, instance_state_str[v->gv_state], err);
 480 #endif
 481                 abort();
 482         }
 483 
 484         return (lost_repository ? ECONNABORTED : 0);
 485 }