1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 *
25 * Copyright 2016 RackTop Systems.
26 */
27
28
29 /*
30 * transition.c - Graph State Machine
31 *
32 * The graph state machine is implemented here, with a typical approach
33 * of a function per state. Separating the implementation allows more
34 * clarity into the actions taken on notification of state change, as well
35 * as a place for future expansion including hooks for configurable actions.
36 * All functions are called with dgraph_lock held.
37 *
38 * The start action for this state machine is not explicit. The states
39 * (ONLINE and DEGRADED) which need to know when they're entering the state
40 * due to a daemon restart implement this understanding by checking for
41 * transition from uninitialized. In the future, this would likely be better
42 * as an explicit start action instead of relying on an overloaded transition.
43 *
44 * All gt_enter functions use the same set of return codes.
45 * 0 success
46 * ECONNABORTED repository connection aborted
47 */
48
49 #include "startd.h"
50
51 static int
52 gt_running(restarter_instance_state_t state)
53 {
54 if (state == RESTARTER_STATE_ONLINE ||
55 state == RESTARTER_STATE_DEGRADED)
56 return (1);
57
58 return (0);
59 }
60
61 static int
62 gt_enter_uninit(scf_handle_t *h, graph_vertex_t *v,
63 restarter_instance_state_t old_state, restarter_error_t rerr)
64 {
65 int err;
66 scf_instance_t *inst;
67
68 /* Initialize instance by refreshing it. */
69
70 err = libscf_fmri_get_instance(h, v->gv_name, &inst);
71 switch (err) {
72 case 0:
73 break;
74
75 case ECONNABORTED:
76 return (ECONNABORTED);
77
78 case ENOENT:
79 return (0);
80
81 case EINVAL:
82 case ENOTSUP:
83 default:
84 bad_error("libscf_fmri_get_instance", err);
85 }
86
87 err = refresh_vertex(v, inst);
88 if (err == 0)
89 graph_enable_by_vertex(v, v->gv_flags & GV_ENABLED, 0);
90
91 scf_instance_destroy(inst);
92
93 /* If the service was running, propagate a stop event. */
94 if (gt_running(old_state)) {
95 log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
96 v->gv_name);
97
98 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
99 }
100
101 graph_transition_sulogin(RESTARTER_STATE_UNINIT, old_state);
102 return (0);
103 }
104
105 /* ARGSUSED */
106 static int
107 gt_enter_maint(scf_handle_t *h, graph_vertex_t *v,
108 restarter_instance_state_t old_state, restarter_error_t rerr)
109 {
110 int to_offline = v->gv_flags & GV_TOOFFLINE;
111
112 /*
113 * If the service was running, propagate a stop event. If the
114 * service was not running the maintenance transition may satisfy
115 * optional dependencies and should be propagated to determine
116 * whether new dependents are satisfiable.
117 * Instances that transition to maintenance and have the GV_TOOFFLINE
118 * flag are special because they can expose new subtree leaves so
119 * propagate the offline to the instance dependencies.
120 */
121
122 /* instance transitioning to maintenance is considered disabled */
123 v->gv_flags &= ~GV_TODISABLE;
124 v->gv_flags &= ~GV_TOOFFLINE;
125
126 if (gt_running(old_state)) {
127 /*
128 * Handle state change during instance disabling.
129 * Propagate offline to the new exposed leaves.
130 */
131 if (to_offline) {
132 log_framework(LOG_DEBUG, "%s removed from subtree\n",
133 v->gv_name);
134
135 graph_offline_subtree_leaves(v, (void *)h);
136 }
137
138 log_framework(LOG_DEBUG, "Propagating maintenance (stop) of "
139 "%s.\n", v->gv_name);
140
141 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
142
143 /*
144 * The maintenance transition may satisfy optional_all/restart
145 * dependencies and should be propagated to determine
146 * whether new dependents are satisfiable.
147 */
148 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
149 } else {
150 log_framework(LOG_DEBUG, "Propagating maintenance of %s.\n",
151 v->gv_name);
152
153 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
154 }
155
156 graph_transition_sulogin(RESTARTER_STATE_MAINT, old_state);
157 return (0);
158 }
159
160 /* ARGSUSED */
161 static int
162 gt_enter_offline(scf_handle_t *h, graph_vertex_t *v,
163 restarter_instance_state_t old_state, restarter_error_t rerr)
164 {
165 int to_offline = v->gv_flags & GV_TOOFFLINE;
166 int to_disable = v->gv_flags & GV_TODISABLE;
167
168 v->gv_flags &= ~GV_TOOFFLINE;
169
170 /*
171 * If the instance should be enabled, see if we can start it.
172 * Otherwise send a disable command.
173 * If a instance has the GV_TOOFFLINE flag set then it must
174 * remains offline until the disable process completes.
175 */
176 if (v->gv_flags & GV_ENABLED) {
177 if (to_offline == 0 && to_disable == 0)
178 graph_start_if_satisfied(v);
179 } else {
180 if (gt_running(old_state) && v->gv_post_disable_f)
181 v->gv_post_disable_f();
182
183 vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
184 }
185
186 /*
187 * If the service was running, propagate a stop event. If the
188 * service was not running the offline transition may satisfy
189 * optional dependencies and should be propagated to determine
190 * whether new dependents are satisfiable.
191 * Instances that transition to offline and have the GV_TOOFFLINE flag
192 * are special because they can expose new subtree leaves so propagate
193 * the offline to the instance dependencies.
194 */
195 if (gt_running(old_state)) {
196 /*
197 * Handle state change during instance disabling.
198 * Propagate offline to the new exposed leaves.
199 */
200 if (to_offline) {
201 log_framework(LOG_DEBUG, "%s removed from subtree\n",
202 v->gv_name);
203
204 graph_offline_subtree_leaves(v, (void *)h);
205 }
206
207 log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
208 v->gv_name);
209
210 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
211
212 /*
213 * The offline transition may satisfy require_any/restart
214 * dependencies and should be propagated to determine
215 * whether new dependents are satisfiable.
216 */
217 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
218 } else {
219 log_framework(LOG_DEBUG, "Propagating offline of %s.\n",
220 v->gv_name);
221
222 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
223 }
224
225 graph_transition_sulogin(RESTARTER_STATE_OFFLINE, old_state);
226 return (0);
227 }
228
229 /* ARGSUSED */
230 static int
231 gt_enter_disabled(scf_handle_t *h, graph_vertex_t *v,
232 restarter_instance_state_t old_state, restarter_error_t rerr)
233 {
234 int to_offline = v->gv_flags & GV_TOOFFLINE;
235
236 v->gv_flags &= ~GV_TODISABLE;
237 v->gv_flags &= ~GV_TOOFFLINE;
238
239 /*
240 * If the instance should be disabled, no problem. Otherwise,
241 * send an enable command, which should result in the instance
242 * moving to OFFLINE unless the instance is part of a subtree
243 * (non root) and in this case the result is unpredictable.
244 */
245 if (v->gv_flags & GV_ENABLED) {
246 vertex_send_event(v, RESTARTER_EVENT_TYPE_ENABLE);
247 } else if (gt_running(old_state) && v->gv_post_disable_f) {
248 v->gv_post_disable_f();
249 }
250
251 /*
252 * If the service was running, propagate this as a stop. If the
253 * service was not running the disabled transition may satisfy
254 * optional dependencies and should be propagated to determine
255 * whether new dependents are satisfiable.
256 */
257 if (gt_running(old_state)) {
258 /*
259 * We need to propagate the offline to new exposed leaves in
260 * case we've just disabled an instance that was part of a
261 * subtree.
262 */
263 if (to_offline) {
264 log_framework(LOG_DEBUG, "%s removed from subtree\n",
265 v->gv_name);
266
267 /*
268 * Handle state change during instance disabling.
269 * Propagate offline to the new exposed leaves.
270 */
271 graph_offline_subtree_leaves(v, (void *)h);
272 }
273
274
275 log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
276 v->gv_name);
277
278 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
279
280 /*
281 * The disable transition may satisfy optional_all/restart
282 * dependencies and should be propagated to determine
283 * whether new dependents are satisfiable.
284 */
285 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
286 } else {
287 log_framework(LOG_DEBUG, "Propagating disable of %s.\n",
288 v->gv_name);
289
290 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
291 }
292
293 graph_transition_sulogin(RESTARTER_STATE_DISABLED, old_state);
294 return (0);
295 }
296
297 static int
298 gt_internal_online_or_degraded(scf_handle_t *h, graph_vertex_t *v,
299 restarter_instance_state_t old_state, restarter_error_t rerr)
300 {
301 int r;
302
303 /*
304 * If the instance has just come up, update the start
305 * snapshot.
306 */
307 if (gt_running(old_state) == 0) {
308 /*
309 * Don't fire if we're just recovering state
310 * after a restart.
311 */
312 if (old_state != RESTARTER_STATE_UNINIT &&
313 v->gv_post_online_f)
314 v->gv_post_online_f();
315
316 r = libscf_snapshots_poststart(h, v->gv_name, B_TRUE);
317 switch (r) {
318 case 0:
319 case ENOENT:
320 /*
321 * If ENOENT, the instance must have been
322 * deleted. Pretend we were successful since
323 * we should get a delete event later.
324 */
325 break;
326
327 case ECONNABORTED:
328 return (ECONNABORTED);
329
330 case EACCES:
331 case ENOTSUP:
332 default:
333 bad_error("libscf_snapshots_poststart", r);
334 }
335 }
336
337 if (!(v->gv_flags & GV_ENABLED)) {
338 vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
339 } else if (v->gv_flags & GV_TOOFFLINE) {
340 /*
341 * If the vertex has the GV_TOOFFLINE flag set then that's
342 * because the instance was transitioning from offline to
343 * online and the reverse disable algorithm doesn't offline
344 * those instances because it was already appearing offline.
345 * So do it now.
346 */
347 offline_vertex(v);
348 }
349
350 if (gt_running(old_state) == 0) {
351 log_framework(LOG_DEBUG, "Propagating start of %s.\n",
352 v->gv_name);
353
354 graph_transition_propagate(v, PROPAGATE_START, rerr);
355 } else if (rerr == RERR_REFRESH) {
356 /* For refresh we'll get a message sans state change */
357
358 log_framework(LOG_DEBUG, "Propagating refresh of %s.\n",
359 v->gv_name);
360
361 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
362 }
363
364 return (0);
365 }
366
367 static int
368 gt_enter_online(scf_handle_t *h, graph_vertex_t *v,
369 restarter_instance_state_t old_state, restarter_error_t rerr)
370 {
371 int r;
372
373 r = gt_internal_online_or_degraded(h, v, old_state, rerr);
374 if (r != 0)
375 return (r);
376
377 graph_transition_sulogin(RESTARTER_STATE_ONLINE, old_state);
378 return (0);
379 }
380
381 static int
382 gt_enter_degraded(scf_handle_t *h, graph_vertex_t *v,
383 restarter_instance_state_t old_state, restarter_error_t rerr)
384 {
385 int r;
386
387 r = gt_internal_online_or_degraded(h, v, old_state, rerr);
388 if (r != 0)
389 return (r);
390
391 graph_transition_sulogin(RESTARTER_STATE_DEGRADED, old_state);
392 return (0);
393 }
394
395 /*
396 * gt_transition() implements the state transition for the graph
397 * state machine. It can return:
398 * 0 success
399 * ECONNABORTED repository connection aborted
400 *
401 * v->gv_state should be set to the state we're transitioning to before
402 * calling this function.
403 */
404 int
405 gt_transition(scf_handle_t *h, graph_vertex_t *v, restarter_error_t rerr,
406 restarter_instance_state_t old_state)
407 {
408 int err;
409 int lost_repository = 0;
410
411 /*
412 * If there's a common set of work to be done on exit from the
413 * old_state, include it as a separate set of functions here. For
414 * now there's no such work, so there are no gt_exit functions.
415 */
416
417 err = vertex_subgraph_dependencies_shutdown(h, v, old_state);
418 switch (err) {
419 case 0:
420 break;
421
422 case ECONNABORTED:
423 lost_repository = 1;
424 break;
425
426 default:
427 bad_error("vertex_subgraph_dependencies_shutdown", err);
428 }
429
430 /*
431 * Now call the appropriate gt_enter function for the new state.
432 */
433 switch (v->gv_state) {
434 case RESTARTER_STATE_UNINIT:
435 err = gt_enter_uninit(h, v, old_state, rerr);
436 break;
437
438 case RESTARTER_STATE_DISABLED:
439 err = gt_enter_disabled(h, v, old_state, rerr);
440 break;
441
442 case RESTARTER_STATE_OFFLINE:
443 err = gt_enter_offline(h, v, old_state, rerr);
444 break;
445
446 case RESTARTER_STATE_ONLINE:
447 err = gt_enter_online(h, v, old_state, rerr);
448 break;
449
450 case RESTARTER_STATE_DEGRADED:
451 err = gt_enter_degraded(h, v, old_state, rerr);
452 break;
453
454 case RESTARTER_STATE_MAINT:
455 err = gt_enter_maint(h, v, old_state, rerr);
456 break;
457
458 default:
459 /* Shouldn't be in an invalid state. */
460 #ifndef NDEBUG
461 uu_warn("%s:%d: Invalid state %d.\n", __FILE__, __LINE__,
462 v->gv_state);
463 #endif
464 abort();
465 }
466
467 switch (err) {
468 case 0:
469 break;
470
471 case ECONNABORTED:
472 lost_repository = 1;
473 break;
474
475 default:
476 #ifndef NDEBUG
477 uu_warn("%s:%d: "
478 "gt_enter_%s() failed with unexpected error %d.\n",
479 __FILE__, __LINE__, instance_state_str[v->gv_state], err);
480 #endif
481 abort();
482 }
483
484 return (lost_repository ? ECONNABORTED : 0);
485 }