1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 
  27 /*
  28  * transition.c - Graph State Machine
  29  *
  30  * The graph state machine is implemented here, with a typical approach
  31  * of a function per state.  Separating the implementation allows more
  32  * clarity into the actions taken on notification of state change, as well
  33  * as a place for future expansion including hooks for configurable actions.
  34  * All functions are called with dgraph_lock held.
  35  *
  36  * The start action for this state machine is not explicit.  The states
  37  * (ONLINE and DEGRADED) which need to know when they're entering the state
  38  * due to a daemon restart implement this understanding by checking for
  39  * transition from uninitialized.  In the future, this would likely be better
  40  * as an explicit start action instead of relying on an overloaded transition.
  41  *
  42  * All gt_enter functions use the same set of return codes.
  43  *    0              success
  44  *    ECONNABORTED   repository connection aborted
  45  */
  46 
  47 #include "startd.h"
  48 
  49 static int
  50 gt_running(restarter_instance_state_t state)
  51 {
  52         if (state == RESTARTER_STATE_ONLINE ||
  53             state == RESTARTER_STATE_DEGRADED)
  54                 return (1);
  55 
  56         return (0);
  57 }
  58 
  59 static int
  60 gt_enter_uninit(scf_handle_t *h, graph_vertex_t *v,
  61     restarter_instance_state_t old_state, restarter_error_t rerr)
  62 {
  63         int err;
  64         scf_instance_t *inst;
  65 
  66         /* Initialize instance by refreshing it. */
  67 
  68         err = libscf_fmri_get_instance(h, v->gv_name, &inst);
  69         switch (err) {
  70         case 0:
  71                 break;
  72 
  73         case ECONNABORTED:
  74                 return (ECONNABORTED);
  75 
  76         case ENOENT:
  77                 return (0);
  78 
  79         case EINVAL:
  80         case ENOTSUP:
  81         default:
  82                 bad_error("libscf_fmri_get_instance", err);
  83         }
  84 
  85         err = refresh_vertex(v, inst);
  86         if (err == 0)
  87                 graph_enable_by_vertex(v, v->gv_flags & GV_ENABLED, 0);
  88 
  89         scf_instance_destroy(inst);
  90 
  91         /* If the service was running, propagate a stop event. */
  92         if (gt_running(old_state)) {
  93                 log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
  94                     v->gv_name);
  95 
  96                 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
  97         }
  98 
  99         graph_transition_sulogin(RESTARTER_STATE_UNINIT, old_state);
 100         return (0);
 101 }
 102 
 103 /* ARGSUSED */
 104 static int
 105 gt_enter_maint(scf_handle_t *h, graph_vertex_t *v,
 106     restarter_instance_state_t old_state, restarter_error_t rerr)
 107 {
 108         int to_offline = v->gv_flags & GV_TOOFFLINE;
 109 
 110         /*
 111          * If the service was running, propagate a stop event.  If the
 112          * service was not running the maintenance transition may satisfy
 113          * optional dependencies and should be propagated to determine
 114          * whether new dependents are satisfiable.
 115          * Instances that transition to maintenance and have the GV_TOOFFLINE
 116          * flag are special because they can expose new subtree leaves so
 117          * propagate the offline to the instance dependencies.
 118          */
 119 
 120         /* instance transitioning to maintenance is considered disabled */
 121         v->gv_flags &= ~GV_TODISABLE;
 122         v->gv_flags &= ~GV_TOOFFLINE;
 123 
 124         if (gt_running(old_state)) {
 125                 /*
 126                  * Handle state change during instance disabling.
 127                  * Propagate offline to the new exposed leaves.
 128                  */
 129                 if (to_offline) {
 130                         log_framework(LOG_DEBUG, "%s removed from subtree\n",
 131                             v->gv_name);
 132 
 133                         graph_offline_subtree_leaves(v, (void *)h);
 134                 }
 135 
 136                 log_framework(LOG_DEBUG, "Propagating maintenance (stop) of "
 137                     "%s.\n", v->gv_name);
 138 
 139                 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
 140         } else {
 141                 log_framework(LOG_DEBUG, "Propagating maintenance of %s.\n",
 142                     v->gv_name);
 143 
 144                 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
 145         }
 146 
 147         graph_transition_sulogin(RESTARTER_STATE_MAINT, old_state);
 148         return (0);
 149 }
 150 
 151 /* ARGSUSED */
 152 static int
 153 gt_enter_offline(scf_handle_t *h, graph_vertex_t *v,
 154     restarter_instance_state_t old_state, restarter_error_t rerr)
 155 {
 156         int to_offline = v->gv_flags & GV_TOOFFLINE;
 157 
 158         v->gv_flags &= ~GV_TOOFFLINE;
 159 
 160         /*
 161          * If the instance should be enabled, see if we can start it.
 162          * Otherwise send a disable command.
 163          * If a instance has the GV_TOOFFLINE flag set then it must
 164          * remains offline until the disable process completes.
 165          */
 166         if (v->gv_flags & GV_ENABLED) {
 167                 if (to_offline == 0)
 168                         graph_start_if_satisfied(v);
 169         } else {
 170                 if (gt_running(old_state) && v->gv_post_disable_f)
 171                         v->gv_post_disable_f();
 172 
 173                 vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
 174         }
 175 
 176         /*
 177          * If the service was running, propagate a stop event.  If the
 178          * service was not running the offline transition may satisfy
 179          * optional dependencies and should be propagated to determine
 180          * whether new dependents are satisfiable.
 181          * Instances that transition to offline and have the GV_TOOFFLINE flag
 182          * are special because they can expose new subtree leaves so propagate
 183          * the offline to the instance dependencies.
 184          */
 185         if (gt_running(old_state)) {
 186                 /*
 187                  * Handle state change during instance disabling.
 188                  * Propagate offline to the new exposed leaves.
 189                  */
 190                 if (to_offline) {
 191                         log_framework(LOG_DEBUG, "%s removed from subtree\n",
 192                             v->gv_name);
 193 
 194                         graph_offline_subtree_leaves(v, (void *)h);
 195                 }
 196 
 197                 log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
 198                     v->gv_name);
 199 
 200                 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
 201 
 202                 /*
 203                  * The offline transition may satisfy require_any/restart
 204                  * dependencies and should be propagated to determine
 205                  * whether new dependents are satisfiable.
 206                  */
 207                 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
 208         } else {
 209                 log_framework(LOG_DEBUG, "Propagating offline of %s.\n",
 210                     v->gv_name);
 211 
 212                 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
 213         }
 214 
 215         graph_transition_sulogin(RESTARTER_STATE_OFFLINE, old_state);
 216         return (0);
 217 }
 218 
 219 /* ARGSUSED */
 220 static int
 221 gt_enter_disabled(scf_handle_t *h, graph_vertex_t *v,
 222     restarter_instance_state_t old_state, restarter_error_t rerr)
 223 {
 224         int to_offline = v->gv_flags & GV_TOOFFLINE;
 225 
 226         v->gv_flags &= ~GV_TODISABLE;
 227         v->gv_flags &= ~GV_TOOFFLINE;
 228 
 229         /*
 230          * If the instance should be disabled, no problem.  Otherwise,
 231          * send an enable command, which should result in the instance
 232          * moving to OFFLINE unless the instance is part of a subtree
 233          * (non root) and in this case the result is unpredictable.
 234          */
 235         if (v->gv_flags & GV_ENABLED) {
 236                 vertex_send_event(v, RESTARTER_EVENT_TYPE_ENABLE);
 237         } else if (gt_running(old_state) && v->gv_post_disable_f) {
 238                 v->gv_post_disable_f();
 239         }
 240 
 241         /*
 242          * If the service was running, propagate this as a stop.  If the
 243          * service was not running the disabled transition may satisfy
 244          * optional dependencies and should be propagated to determine
 245          * whether new dependents are satisfiable.
 246          */
 247         if (gt_running(old_state)) {
 248                 /*
 249                  * We need to propagate the offline to new exposed leaves in
 250                  * case we've just disabled an instance that was part of a
 251                  * subtree.
 252                  */
 253                 if (to_offline) {
 254                         log_framework(LOG_DEBUG, "%s removed from subtree\n",
 255                             v->gv_name);
 256 
 257                         /*
 258                          * Handle state change during instance disabling.
 259                          * Propagate offline to the new exposed leaves.
 260                          */
 261                         graph_offline_subtree_leaves(v, (void *)h);
 262                 }
 263 
 264 
 265                 log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
 266                     v->gv_name);
 267 
 268                 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
 269 
 270         } else {
 271                 log_framework(LOG_DEBUG, "Propagating disable of %s.\n",
 272                     v->gv_name);
 273 
 274                 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
 275         }
 276 
 277         graph_transition_sulogin(RESTARTER_STATE_DISABLED, old_state);
 278         return (0);
 279 }
 280 
 281 static int
 282 gt_internal_online_or_degraded(scf_handle_t *h, graph_vertex_t *v,
 283     restarter_instance_state_t old_state, restarter_error_t rerr)
 284 {
 285         int r;
 286 
 287         /*
 288          * If the instance has just come up, update the start
 289          * snapshot.
 290          */
 291         if (gt_running(old_state) == 0) {
 292                 /*
 293                  * Don't fire if we're just recovering state
 294                  * after a restart.
 295                  */
 296                 if (old_state != RESTARTER_STATE_UNINIT &&
 297                     v->gv_post_online_f)
 298                         v->gv_post_online_f();
 299 
 300                 r = libscf_snapshots_poststart(h, v->gv_name, B_TRUE);
 301                 switch (r) {
 302                 case 0:
 303                 case ENOENT:
 304                         /*
 305                          * If ENOENT, the instance must have been
 306                          * deleted.  Pretend we were successful since
 307                          * we should get a delete event later.
 308                          */
 309                         break;
 310 
 311                 case ECONNABORTED:
 312                         return (ECONNABORTED);
 313 
 314                 case EACCES:
 315                 case ENOTSUP:
 316                 default:
 317                         bad_error("libscf_snapshots_poststart", r);
 318                 }
 319         }
 320 
 321         if (!(v->gv_flags & GV_ENABLED)) {
 322                 vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
 323         } else if (v->gv_flags & GV_TOOFFLINE) {
 324                 /*
 325                  * If the vertex has the GV_TOOFFLINE flag set then that's
 326                  * because the instance was transitioning from offline to
 327                  * online and the reverse disable algorithm doesn't offline
 328                  * those instances because it was already appearing offline.
 329                  * So do it now.
 330                  */
 331                 offline_vertex(v);
 332         }
 333 
 334         if (gt_running(old_state) == 0) {
 335                 log_framework(LOG_DEBUG, "Propagating start of %s.\n",
 336                     v->gv_name);
 337 
 338                 graph_transition_propagate(v, PROPAGATE_START, rerr);
 339         } else if (rerr == RERR_REFRESH) {
 340                 /* For refresh we'll get a message sans state change */
 341 
 342                 log_framework(LOG_DEBUG, "Propagating refresh of %s.\n",
 343                     v->gv_name);
 344 
 345                 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
 346         }
 347 
 348         return (0);
 349 }
 350 
 351 static int
 352 gt_enter_online(scf_handle_t *h, graph_vertex_t *v,
 353     restarter_instance_state_t old_state, restarter_error_t rerr)
 354 {
 355         int r;
 356 
 357         r = gt_internal_online_or_degraded(h, v, old_state, rerr);
 358         if (r != 0)
 359                 return (r);
 360 
 361         graph_transition_sulogin(RESTARTER_STATE_ONLINE, old_state);
 362         return (0);
 363 }
 364 
 365 static int
 366 gt_enter_degraded(scf_handle_t *h, graph_vertex_t *v,
 367     restarter_instance_state_t old_state, restarter_error_t rerr)
 368 {
 369         int r;
 370 
 371         r = gt_internal_online_or_degraded(h, v, old_state, rerr);
 372         if (r != 0)
 373                 return (r);
 374 
 375         graph_transition_sulogin(RESTARTER_STATE_DEGRADED, old_state);
 376         return (0);
 377 }
 378 
 379 /*
 380  * gt_transition() implements the state transition for the graph
 381  * state machine.  It can return:
 382  *    0              success
 383  *    ECONNABORTED   repository connection aborted
 384  *
 385  * v->gv_state should be set to the state we're transitioning to before
 386  * calling this function.
 387  */
 388 int
 389 gt_transition(scf_handle_t *h, graph_vertex_t *v, restarter_error_t rerr,
 390     restarter_instance_state_t old_state)
 391 {
 392         int err;
 393         int lost_repository = 0;
 394 
 395         /*
 396          * If there's a common set of work to be done on exit from the
 397          * old_state, include it as a separate set of functions here.  For
 398          * now there's no such work, so there are no gt_exit functions.
 399          */
 400 
 401         err = vertex_subgraph_dependencies_shutdown(h, v, old_state);
 402         switch (err) {
 403         case 0:
 404                 break;
 405 
 406         case ECONNABORTED:
 407                 lost_repository = 1;
 408                 break;
 409 
 410         default:
 411                 bad_error("vertex_subgraph_dependencies_shutdown", err);
 412         }
 413 
 414         /*
 415          * Now call the appropriate gt_enter function for the new state.
 416          */
 417         switch (v->gv_state) {
 418         case RESTARTER_STATE_UNINIT:
 419                 err = gt_enter_uninit(h, v, old_state, rerr);
 420                 break;
 421 
 422         case RESTARTER_STATE_DISABLED:
 423                 err = gt_enter_disabled(h, v, old_state, rerr);
 424                 break;
 425 
 426         case RESTARTER_STATE_OFFLINE:
 427                 err = gt_enter_offline(h, v, old_state, rerr);
 428                 break;
 429 
 430         case RESTARTER_STATE_ONLINE:
 431                 err = gt_enter_online(h, v, old_state, rerr);
 432                 break;
 433 
 434         case RESTARTER_STATE_DEGRADED:
 435                 err = gt_enter_degraded(h, v, old_state, rerr);
 436                 break;
 437 
 438         case RESTARTER_STATE_MAINT:
 439                 err = gt_enter_maint(h, v, old_state, rerr);
 440                 break;
 441 
 442         default:
 443                 /* Shouldn't be in an invalid state. */
 444 #ifndef NDEBUG
 445                 uu_warn("%s:%d: Invalid state %d.\n", __FILE__, __LINE__,
 446                     v->gv_state);
 447 #endif
 448                 abort();
 449         }
 450 
 451         switch (err) {
 452         case 0:
 453                 break;
 454 
 455         case ECONNABORTED:
 456                 lost_repository = 1;
 457                 break;
 458 
 459         default:
 460 #ifndef NDEBUG
 461                 uu_warn("%s:%d: "
 462                     "gt_enter_%s() failed with unexpected error %d.\n",
 463                     __FILE__, __LINE__, instance_state_str[v->gv_state], err);
 464 #endif
 465                 abort();
 466         }
 467 
 468         return (lost_repository ? ECONNABORTED : 0);
 469 }