1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 27 /* 28 * transition.c - Graph State Machine 29 * 30 * The graph state machine is implemented here, with a typical approach 31 * of a function per state. Separating the implementation allows more 32 * clarity into the actions taken on notification of state change, as well 33 * as a place for future expansion including hooks for configurable actions. 34 * All functions are called with dgraph_lock held. 35 * 36 * The start action for this state machine is not explicit. The states 37 * (ONLINE and DEGRADED) which need to know when they're entering the state 38 * due to a daemon restart implement this understanding by checking for 39 * transition from uninitialized. In the future, this would likely be better 40 * as an explicit start action instead of relying on an overloaded transition. 41 * 42 * All gt_enter functions use the same set of return codes. 43 * 0 success 44 * ECONNABORTED repository connection aborted 45 */ 46 47 #include "startd.h" 48 49 static int 50 gt_running(restarter_instance_state_t state) 51 { 52 if (state == RESTARTER_STATE_ONLINE || 53 state == RESTARTER_STATE_DEGRADED) 54 return (1); 55 56 return (0); 57 } 58 59 static int 60 gt_enter_uninit(scf_handle_t *h, graph_vertex_t *v, 61 restarter_instance_state_t old_state, restarter_error_t rerr) 62 { 63 int err; 64 scf_instance_t *inst; 65 66 /* Initialize instance by refreshing it. */ 67 68 err = libscf_fmri_get_instance(h, v->gv_name, &inst); 69 switch (err) { 70 case 0: 71 break; 72 73 case ECONNABORTED: 74 return (ECONNABORTED); 75 76 case ENOENT: 77 return (0); 78 79 case EINVAL: 80 case ENOTSUP: 81 default: 82 bad_error("libscf_fmri_get_instance", err); 83 } 84 85 err = refresh_vertex(v, inst); 86 if (err == 0) 87 graph_enable_by_vertex(v, v->gv_flags & GV_ENABLED, 0); 88 89 scf_instance_destroy(inst); 90 91 /* If the service was running, propagate a stop event. */ 92 if (gt_running(old_state)) { 93 log_framework(LOG_DEBUG, "Propagating stop of %s.\n", 94 v->gv_name); 95 96 graph_transition_propagate(v, PROPAGATE_STOP, rerr); 97 } 98 99 graph_transition_sulogin(RESTARTER_STATE_UNINIT, old_state); 100 return (0); 101 } 102 103 /* ARGSUSED */ 104 static int 105 gt_enter_maint(scf_handle_t *h, graph_vertex_t *v, 106 restarter_instance_state_t old_state, restarter_error_t rerr) 107 { 108 int to_offline = v->gv_flags & GV_TOOFFLINE; 109 110 /* 111 * If the service was running, propagate a stop event. If the 112 * service was not running the maintenance transition may satisfy 113 * optional dependencies and should be propagated to determine 114 * whether new dependents are satisfiable. 115 * Instances that transition to maintenance and have the GV_TOOFFLINE 116 * flag are special because they can expose new subtree leaves so 117 * propagate the offline to the instance dependencies. 118 */ 119 120 /* instance transitioning to maintenance is considered disabled */ 121 v->gv_flags &= ~GV_TODISABLE; 122 v->gv_flags &= ~GV_TOOFFLINE; 123 124 if (gt_running(old_state)) { 125 /* 126 * Handle state change during instance disabling. 127 * Propagate offline to the new exposed leaves. 128 */ 129 if (to_offline) { 130 log_framework(LOG_DEBUG, "%s removed from subtree\n", 131 v->gv_name); 132 133 graph_offline_subtree_leaves(v, (void *)h); 134 } 135 136 log_framework(LOG_DEBUG, "Propagating maintenance (stop) of " 137 "%s.\n", v->gv_name); 138 139 graph_transition_propagate(v, PROPAGATE_STOP, rerr); 140 } else { 141 log_framework(LOG_DEBUG, "Propagating maintenance of %s.\n", 142 v->gv_name); 143 144 graph_transition_propagate(v, PROPAGATE_SAT, rerr); 145 } 146 147 graph_transition_sulogin(RESTARTER_STATE_MAINT, old_state); 148 return (0); 149 } 150 151 /* ARGSUSED */ 152 static int 153 gt_enter_offline(scf_handle_t *h, graph_vertex_t *v, 154 restarter_instance_state_t old_state, restarter_error_t rerr) 155 { 156 int to_offline = v->gv_flags & GV_TOOFFLINE; 157 158 v->gv_flags &= ~GV_TOOFFLINE; 159 160 /* 161 * If the instance should be enabled, see if we can start it. 162 * Otherwise send a disable command. 163 * If a instance has the GV_TOOFFLINE flag set then it must 164 * remains offline until the disable process completes. 165 */ 166 if (v->gv_flags & GV_ENABLED) { 167 if (to_offline == 0) 168 graph_start_if_satisfied(v); 169 } else { 170 if (gt_running(old_state) && v->gv_post_disable_f) 171 v->gv_post_disable_f(); 172 173 vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE); 174 } 175 176 /* 177 * If the service was running, propagate a stop event. If the 178 * service was not running the offline transition may satisfy 179 * optional dependencies and should be propagated to determine 180 * whether new dependents are satisfiable. 181 * Instances that transition to offline and have the GV_TOOFFLINE flag 182 * are special because they can expose new subtree leaves so propagate 183 * the offline to the instance dependencies. 184 */ 185 if (gt_running(old_state)) { 186 /* 187 * Handle state change during instance disabling. 188 * Propagate offline to the new exposed leaves. 189 */ 190 if (to_offline) { 191 log_framework(LOG_DEBUG, "%s removed from subtree\n", 192 v->gv_name); 193 194 graph_offline_subtree_leaves(v, (void *)h); 195 } 196 197 log_framework(LOG_DEBUG, "Propagating stop of %s.\n", 198 v->gv_name); 199 200 graph_transition_propagate(v, PROPAGATE_STOP, rerr); 201 202 /* 203 * The offline transition may satisfy require_any/restart 204 * dependencies and should be propagated to determine 205 * whether new dependents are satisfiable. 206 */ 207 graph_transition_propagate(v, PROPAGATE_SAT, rerr); 208 } else { 209 log_framework(LOG_DEBUG, "Propagating offline of %s.\n", 210 v->gv_name); 211 212 graph_transition_propagate(v, PROPAGATE_SAT, rerr); 213 } 214 215 graph_transition_sulogin(RESTARTER_STATE_OFFLINE, old_state); 216 return (0); 217 } 218 219 /* ARGSUSED */ 220 static int 221 gt_enter_disabled(scf_handle_t *h, graph_vertex_t *v, 222 restarter_instance_state_t old_state, restarter_error_t rerr) 223 { 224 int to_offline = v->gv_flags & GV_TOOFFLINE; 225 226 v->gv_flags &= ~GV_TODISABLE; 227 v->gv_flags &= ~GV_TOOFFLINE; 228 229 /* 230 * If the instance should be disabled, no problem. Otherwise, 231 * send an enable command, which should result in the instance 232 * moving to OFFLINE unless the instance is part of a subtree 233 * (non root) and in this case the result is unpredictable. 234 */ 235 if (v->gv_flags & GV_ENABLED) { 236 vertex_send_event(v, RESTARTER_EVENT_TYPE_ENABLE); 237 } else if (gt_running(old_state) && v->gv_post_disable_f) { 238 v->gv_post_disable_f(); 239 } 240 241 /* 242 * If the service was running, propagate this as a stop. If the 243 * service was not running the disabled transition may satisfy 244 * optional dependencies and should be propagated to determine 245 * whether new dependents are satisfiable. 246 */ 247 if (gt_running(old_state)) { 248 /* 249 * We need to propagate the offline to new exposed leaves in 250 * case we've just disabled an instance that was part of a 251 * subtree. 252 */ 253 if (to_offline) { 254 log_framework(LOG_DEBUG, "%s removed from subtree\n", 255 v->gv_name); 256 257 /* 258 * Handle state change during instance disabling. 259 * Propagate offline to the new exposed leaves. 260 */ 261 graph_offline_subtree_leaves(v, (void *)h); 262 } 263 264 265 log_framework(LOG_DEBUG, "Propagating stop of %s.\n", 266 v->gv_name); 267 268 graph_transition_propagate(v, PROPAGATE_STOP, rerr); 269 270 } else { 271 log_framework(LOG_DEBUG, "Propagating disable of %s.\n", 272 v->gv_name); 273 274 graph_transition_propagate(v, PROPAGATE_SAT, rerr); 275 } 276 277 graph_transition_sulogin(RESTARTER_STATE_DISABLED, old_state); 278 return (0); 279 } 280 281 static int 282 gt_internal_online_or_degraded(scf_handle_t *h, graph_vertex_t *v, 283 restarter_instance_state_t old_state, restarter_error_t rerr) 284 { 285 int r; 286 287 /* 288 * If the instance has just come up, update the start 289 * snapshot. 290 */ 291 if (gt_running(old_state) == 0) { 292 /* 293 * Don't fire if we're just recovering state 294 * after a restart. 295 */ 296 if (old_state != RESTARTER_STATE_UNINIT && 297 v->gv_post_online_f) 298 v->gv_post_online_f(); 299 300 r = libscf_snapshots_poststart(h, v->gv_name, B_TRUE); 301 switch (r) { 302 case 0: 303 case ENOENT: 304 /* 305 * If ENOENT, the instance must have been 306 * deleted. Pretend we were successful since 307 * we should get a delete event later. 308 */ 309 break; 310 311 case ECONNABORTED: 312 return (ECONNABORTED); 313 314 case EACCES: 315 case ENOTSUP: 316 default: 317 bad_error("libscf_snapshots_poststart", r); 318 } 319 } 320 321 if (!(v->gv_flags & GV_ENABLED)) { 322 vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE); 323 } else if (v->gv_flags & GV_TOOFFLINE) { 324 /* 325 * If the vertex has the GV_TOOFFLINE flag set then that's 326 * because the instance was transitioning from offline to 327 * online and the reverse disable algorithm doesn't offline 328 * those instances because it was already appearing offline. 329 * So do it now. 330 */ 331 offline_vertex(v); 332 } 333 334 if (gt_running(old_state) == 0) { 335 log_framework(LOG_DEBUG, "Propagating start of %s.\n", 336 v->gv_name); 337 338 graph_transition_propagate(v, PROPAGATE_START, rerr); 339 } else if (rerr == RERR_REFRESH) { 340 /* For refresh we'll get a message sans state change */ 341 342 log_framework(LOG_DEBUG, "Propagating refresh of %s.\n", 343 v->gv_name); 344 345 graph_transition_propagate(v, PROPAGATE_STOP, rerr); 346 } 347 348 return (0); 349 } 350 351 static int 352 gt_enter_online(scf_handle_t *h, graph_vertex_t *v, 353 restarter_instance_state_t old_state, restarter_error_t rerr) 354 { 355 int r; 356 357 r = gt_internal_online_or_degraded(h, v, old_state, rerr); 358 if (r != 0) 359 return (r); 360 361 graph_transition_sulogin(RESTARTER_STATE_ONLINE, old_state); 362 return (0); 363 } 364 365 static int 366 gt_enter_degraded(scf_handle_t *h, graph_vertex_t *v, 367 restarter_instance_state_t old_state, restarter_error_t rerr) 368 { 369 int r; 370 371 r = gt_internal_online_or_degraded(h, v, old_state, rerr); 372 if (r != 0) 373 return (r); 374 375 graph_transition_sulogin(RESTARTER_STATE_DEGRADED, old_state); 376 return (0); 377 } 378 379 /* 380 * gt_transition() implements the state transition for the graph 381 * state machine. It can return: 382 * 0 success 383 * ECONNABORTED repository connection aborted 384 * 385 * v->gv_state should be set to the state we're transitioning to before 386 * calling this function. 387 */ 388 int 389 gt_transition(scf_handle_t *h, graph_vertex_t *v, restarter_error_t rerr, 390 restarter_instance_state_t old_state) 391 { 392 int err; 393 int lost_repository = 0; 394 395 /* 396 * If there's a common set of work to be done on exit from the 397 * old_state, include it as a separate set of functions here. For 398 * now there's no such work, so there are no gt_exit functions. 399 */ 400 401 err = vertex_subgraph_dependencies_shutdown(h, v, old_state); 402 switch (err) { 403 case 0: 404 break; 405 406 case ECONNABORTED: 407 lost_repository = 1; 408 break; 409 410 default: 411 bad_error("vertex_subgraph_dependencies_shutdown", err); 412 } 413 414 /* 415 * Now call the appropriate gt_enter function for the new state. 416 */ 417 switch (v->gv_state) { 418 case RESTARTER_STATE_UNINIT: 419 err = gt_enter_uninit(h, v, old_state, rerr); 420 break; 421 422 case RESTARTER_STATE_DISABLED: 423 err = gt_enter_disabled(h, v, old_state, rerr); 424 break; 425 426 case RESTARTER_STATE_OFFLINE: 427 err = gt_enter_offline(h, v, old_state, rerr); 428 break; 429 430 case RESTARTER_STATE_ONLINE: 431 err = gt_enter_online(h, v, old_state, rerr); 432 break; 433 434 case RESTARTER_STATE_DEGRADED: 435 err = gt_enter_degraded(h, v, old_state, rerr); 436 break; 437 438 case RESTARTER_STATE_MAINT: 439 err = gt_enter_maint(h, v, old_state, rerr); 440 break; 441 442 default: 443 /* Shouldn't be in an invalid state. */ 444 #ifndef NDEBUG 445 uu_warn("%s:%d: Invalid state %d.\n", __FILE__, __LINE__, 446 v->gv_state); 447 #endif 448 abort(); 449 } 450 451 switch (err) { 452 case 0: 453 break; 454 455 case ECONNABORTED: 456 lost_repository = 1; 457 break; 458 459 default: 460 #ifndef NDEBUG 461 uu_warn("%s:%d: " 462 "gt_enter_%s() failed with unexpected error %d.\n", 463 __FILE__, __LINE__, instance_state_str[v->gv_state], err); 464 #endif 465 abort(); 466 } 467 468 return (lost_repository ? ECONNABORTED : 0); 469 }