Print this page
7267 SMF is fast and loose with optional dependencies
Reviewed by: Dan McDonald <danmcd@omniti.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Albert Lee <trisk@omniti.com>
Reviewed by: Gordon Ross <gordon.w.ross@gmail.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/cmd/svc/startd/transition.c
+++ new/usr/src/cmd/svc/startd/transition.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 + *
25 + * Copyright 2016 RackTop Systems.
24 26 */
25 27
26 28
27 29 /*
28 30 * transition.c - Graph State Machine
29 31 *
30 32 * The graph state machine is implemented here, with a typical approach
31 33 * of a function per state. Separating the implementation allows more
32 34 * clarity into the actions taken on notification of state change, as well
33 35 * as a place for future expansion including hooks for configurable actions.
34 36 * All functions are called with dgraph_lock held.
35 37 *
36 38 * The start action for this state machine is not explicit. The states
37 39 * (ONLINE and DEGRADED) which need to know when they're entering the state
38 40 * due to a daemon restart implement this understanding by checking for
39 41 * transition from uninitialized. In the future, this would likely be better
40 42 * as an explicit start action instead of relying on an overloaded transition.
41 43 *
42 44 * All gt_enter functions use the same set of return codes.
43 45 * 0 success
44 46 * ECONNABORTED repository connection aborted
45 47 */
46 48
47 49 #include "startd.h"
48 50
49 51 static int
50 52 gt_running(restarter_instance_state_t state)
51 53 {
52 54 if (state == RESTARTER_STATE_ONLINE ||
53 55 state == RESTARTER_STATE_DEGRADED)
54 56 return (1);
55 57
56 58 return (0);
57 59 }
58 60
59 61 static int
60 62 gt_enter_uninit(scf_handle_t *h, graph_vertex_t *v,
61 63 restarter_instance_state_t old_state, restarter_error_t rerr)
62 64 {
63 65 int err;
64 66 scf_instance_t *inst;
65 67
66 68 /* Initialize instance by refreshing it. */
67 69
68 70 err = libscf_fmri_get_instance(h, v->gv_name, &inst);
69 71 switch (err) {
70 72 case 0:
71 73 break;
72 74
73 75 case ECONNABORTED:
74 76 return (ECONNABORTED);
75 77
76 78 case ENOENT:
77 79 return (0);
78 80
79 81 case EINVAL:
80 82 case ENOTSUP:
81 83 default:
82 84 bad_error("libscf_fmri_get_instance", err);
83 85 }
84 86
85 87 err = refresh_vertex(v, inst);
86 88 if (err == 0)
87 89 graph_enable_by_vertex(v, v->gv_flags & GV_ENABLED, 0);
88 90
89 91 scf_instance_destroy(inst);
90 92
91 93 /* If the service was running, propagate a stop event. */
92 94 if (gt_running(old_state)) {
93 95 log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
94 96 v->gv_name);
95 97
96 98 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
97 99 }
98 100
99 101 graph_transition_sulogin(RESTARTER_STATE_UNINIT, old_state);
100 102 return (0);
101 103 }
102 104
103 105 /* ARGSUSED */
104 106 static int
105 107 gt_enter_maint(scf_handle_t *h, graph_vertex_t *v,
106 108 restarter_instance_state_t old_state, restarter_error_t rerr)
107 109 {
108 110 int to_offline = v->gv_flags & GV_TOOFFLINE;
109 111
110 112 /*
111 113 * If the service was running, propagate a stop event. If the
112 114 * service was not running the maintenance transition may satisfy
113 115 * optional dependencies and should be propagated to determine
114 116 * whether new dependents are satisfiable.
115 117 * Instances that transition to maintenance and have the GV_TOOFFLINE
116 118 * flag are special because they can expose new subtree leaves so
117 119 * propagate the offline to the instance dependencies.
118 120 */
119 121
120 122 /* instance transitioning to maintenance is considered disabled */
121 123 v->gv_flags &= ~GV_TODISABLE;
122 124 v->gv_flags &= ~GV_TOOFFLINE;
123 125
124 126 if (gt_running(old_state)) {
125 127 /*
126 128 * Handle state change during instance disabling.
127 129 * Propagate offline to the new exposed leaves.
128 130 */
129 131 if (to_offline) {
↓ open down ↓ |
96 lines elided |
↑ open up ↑ |
130 132 log_framework(LOG_DEBUG, "%s removed from subtree\n",
131 133 v->gv_name);
132 134
133 135 graph_offline_subtree_leaves(v, (void *)h);
134 136 }
135 137
136 138 log_framework(LOG_DEBUG, "Propagating maintenance (stop) of "
137 139 "%s.\n", v->gv_name);
138 140
139 141 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
142 +
143 + /*
144 + * The maintenance transition may satisfy optional_all/restart
145 + * dependencies and should be propagated to determine
146 + * whether new dependents are satisfiable.
147 + */
148 + graph_transition_propagate(v, PROPAGATE_SAT, rerr);
140 149 } else {
141 150 log_framework(LOG_DEBUG, "Propagating maintenance of %s.\n",
142 151 v->gv_name);
143 152
144 153 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
145 154 }
146 155
147 156 graph_transition_sulogin(RESTARTER_STATE_MAINT, old_state);
148 157 return (0);
149 158 }
150 159
151 160 /* ARGSUSED */
152 161 static int
153 162 gt_enter_offline(scf_handle_t *h, graph_vertex_t *v,
154 163 restarter_instance_state_t old_state, restarter_error_t rerr)
155 164 {
156 165 int to_offline = v->gv_flags & GV_TOOFFLINE;
166 + int to_disable = v->gv_flags & GV_TODISABLE;
157 167
158 168 v->gv_flags &= ~GV_TOOFFLINE;
159 169
160 170 /*
161 171 * If the instance should be enabled, see if we can start it.
162 172 * Otherwise send a disable command.
163 173 * If a instance has the GV_TOOFFLINE flag set then it must
164 174 * remains offline until the disable process completes.
165 175 */
166 176 if (v->gv_flags & GV_ENABLED) {
167 - if (to_offline == 0)
177 + if (to_offline == 0 && to_disable == 0)
168 178 graph_start_if_satisfied(v);
169 179 } else {
170 180 if (gt_running(old_state) && v->gv_post_disable_f)
171 181 v->gv_post_disable_f();
172 182
173 183 vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
174 184 }
175 185
176 186 /*
177 187 * If the service was running, propagate a stop event. If the
178 188 * service was not running the offline transition may satisfy
179 189 * optional dependencies and should be propagated to determine
180 190 * whether new dependents are satisfiable.
181 191 * Instances that transition to offline and have the GV_TOOFFLINE flag
182 192 * are special because they can expose new subtree leaves so propagate
183 193 * the offline to the instance dependencies.
184 194 */
185 195 if (gt_running(old_state)) {
186 196 /*
187 197 * Handle state change during instance disabling.
188 198 * Propagate offline to the new exposed leaves.
189 199 */
190 200 if (to_offline) {
191 201 log_framework(LOG_DEBUG, "%s removed from subtree\n",
192 202 v->gv_name);
193 203
194 204 graph_offline_subtree_leaves(v, (void *)h);
195 205 }
196 206
197 207 log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
198 208 v->gv_name);
199 209
200 210 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
201 211
202 212 /*
203 213 * The offline transition may satisfy require_any/restart
204 214 * dependencies and should be propagated to determine
205 215 * whether new dependents are satisfiable.
206 216 */
207 217 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
208 218 } else {
209 219 log_framework(LOG_DEBUG, "Propagating offline of %s.\n",
210 220 v->gv_name);
211 221
212 222 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
213 223 }
214 224
215 225 graph_transition_sulogin(RESTARTER_STATE_OFFLINE, old_state);
216 226 return (0);
217 227 }
218 228
219 229 /* ARGSUSED */
220 230 static int
221 231 gt_enter_disabled(scf_handle_t *h, graph_vertex_t *v,
222 232 restarter_instance_state_t old_state, restarter_error_t rerr)
223 233 {
224 234 int to_offline = v->gv_flags & GV_TOOFFLINE;
225 235
226 236 v->gv_flags &= ~GV_TODISABLE;
227 237 v->gv_flags &= ~GV_TOOFFLINE;
228 238
229 239 /*
230 240 * If the instance should be disabled, no problem. Otherwise,
231 241 * send an enable command, which should result in the instance
232 242 * moving to OFFLINE unless the instance is part of a subtree
233 243 * (non root) and in this case the result is unpredictable.
234 244 */
235 245 if (v->gv_flags & GV_ENABLED) {
236 246 vertex_send_event(v, RESTARTER_EVENT_TYPE_ENABLE);
237 247 } else if (gt_running(old_state) && v->gv_post_disable_f) {
238 248 v->gv_post_disable_f();
239 249 }
240 250
241 251 /*
242 252 * If the service was running, propagate this as a stop. If the
243 253 * service was not running the disabled transition may satisfy
244 254 * optional dependencies and should be propagated to determine
245 255 * whether new dependents are satisfiable.
246 256 */
247 257 if (gt_running(old_state)) {
248 258 /*
249 259 * We need to propagate the offline to new exposed leaves in
250 260 * case we've just disabled an instance that was part of a
251 261 * subtree.
252 262 */
253 263 if (to_offline) {
254 264 log_framework(LOG_DEBUG, "%s removed from subtree\n",
255 265 v->gv_name);
256 266
257 267 /*
258 268 * Handle state change during instance disabling.
259 269 * Propagate offline to the new exposed leaves.
↓ open down ↓ |
82 lines elided |
↑ open up ↑ |
260 270 */
261 271 graph_offline_subtree_leaves(v, (void *)h);
262 272 }
263 273
264 274
265 275 log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
266 276 v->gv_name);
267 277
268 278 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
269 279
280 + /*
281 + * The disable transition may satisfy optional_all/restart
282 + * dependencies and should be propagated to determine
283 + * whether new dependents are satisfiable.
284 + */
285 + graph_transition_propagate(v, PROPAGATE_SAT, rerr);
270 286 } else {
271 287 log_framework(LOG_DEBUG, "Propagating disable of %s.\n",
272 288 v->gv_name);
273 289
274 290 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
275 291 }
276 292
277 293 graph_transition_sulogin(RESTARTER_STATE_DISABLED, old_state);
278 294 return (0);
279 295 }
280 296
281 297 static int
282 298 gt_internal_online_or_degraded(scf_handle_t *h, graph_vertex_t *v,
283 299 restarter_instance_state_t old_state, restarter_error_t rerr)
284 300 {
285 301 int r;
286 302
287 303 /*
288 304 * If the instance has just come up, update the start
289 305 * snapshot.
290 306 */
291 307 if (gt_running(old_state) == 0) {
292 308 /*
293 309 * Don't fire if we're just recovering state
294 310 * after a restart.
295 311 */
296 312 if (old_state != RESTARTER_STATE_UNINIT &&
297 313 v->gv_post_online_f)
298 314 v->gv_post_online_f();
299 315
300 316 r = libscf_snapshots_poststart(h, v->gv_name, B_TRUE);
301 317 switch (r) {
302 318 case 0:
303 319 case ENOENT:
304 320 /*
305 321 * If ENOENT, the instance must have been
306 322 * deleted. Pretend we were successful since
307 323 * we should get a delete event later.
308 324 */
309 325 break;
310 326
311 327 case ECONNABORTED:
312 328 return (ECONNABORTED);
313 329
314 330 case EACCES:
315 331 case ENOTSUP:
316 332 default:
317 333 bad_error("libscf_snapshots_poststart", r);
318 334 }
319 335 }
320 336
321 337 if (!(v->gv_flags & GV_ENABLED)) {
322 338 vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
323 339 } else if (v->gv_flags & GV_TOOFFLINE) {
324 340 /*
325 341 * If the vertex has the GV_TOOFFLINE flag set then that's
326 342 * because the instance was transitioning from offline to
327 343 * online and the reverse disable algorithm doesn't offline
328 344 * those instances because it was already appearing offline.
329 345 * So do it now.
330 346 */
331 347 offline_vertex(v);
332 348 }
333 349
334 350 if (gt_running(old_state) == 0) {
335 351 log_framework(LOG_DEBUG, "Propagating start of %s.\n",
336 352 v->gv_name);
337 353
338 354 graph_transition_propagate(v, PROPAGATE_START, rerr);
339 355 } else if (rerr == RERR_REFRESH) {
340 356 /* For refresh we'll get a message sans state change */
341 357
342 358 log_framework(LOG_DEBUG, "Propagating refresh of %s.\n",
343 359 v->gv_name);
344 360
345 361 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
346 362 }
347 363
348 364 return (0);
349 365 }
350 366
351 367 static int
352 368 gt_enter_online(scf_handle_t *h, graph_vertex_t *v,
353 369 restarter_instance_state_t old_state, restarter_error_t rerr)
354 370 {
355 371 int r;
356 372
357 373 r = gt_internal_online_or_degraded(h, v, old_state, rerr);
358 374 if (r != 0)
359 375 return (r);
360 376
361 377 graph_transition_sulogin(RESTARTER_STATE_ONLINE, old_state);
362 378 return (0);
363 379 }
364 380
365 381 static int
366 382 gt_enter_degraded(scf_handle_t *h, graph_vertex_t *v,
367 383 restarter_instance_state_t old_state, restarter_error_t rerr)
368 384 {
369 385 int r;
370 386
371 387 r = gt_internal_online_or_degraded(h, v, old_state, rerr);
372 388 if (r != 0)
373 389 return (r);
374 390
375 391 graph_transition_sulogin(RESTARTER_STATE_DEGRADED, old_state);
376 392 return (0);
377 393 }
378 394
379 395 /*
380 396 * gt_transition() implements the state transition for the graph
381 397 * state machine. It can return:
382 398 * 0 success
383 399 * ECONNABORTED repository connection aborted
384 400 *
385 401 * v->gv_state should be set to the state we're transitioning to before
386 402 * calling this function.
387 403 */
388 404 int
389 405 gt_transition(scf_handle_t *h, graph_vertex_t *v, restarter_error_t rerr,
390 406 restarter_instance_state_t old_state)
391 407 {
392 408 int err;
393 409 int lost_repository = 0;
394 410
395 411 /*
396 412 * If there's a common set of work to be done on exit from the
397 413 * old_state, include it as a separate set of functions here. For
398 414 * now there's no such work, so there are no gt_exit functions.
399 415 */
400 416
401 417 err = vertex_subgraph_dependencies_shutdown(h, v, old_state);
402 418 switch (err) {
403 419 case 0:
404 420 break;
405 421
406 422 case ECONNABORTED:
407 423 lost_repository = 1;
408 424 break;
409 425
410 426 default:
411 427 bad_error("vertex_subgraph_dependencies_shutdown", err);
412 428 }
413 429
414 430 /*
415 431 * Now call the appropriate gt_enter function for the new state.
416 432 */
417 433 switch (v->gv_state) {
418 434 case RESTARTER_STATE_UNINIT:
419 435 err = gt_enter_uninit(h, v, old_state, rerr);
420 436 break;
421 437
422 438 case RESTARTER_STATE_DISABLED:
423 439 err = gt_enter_disabled(h, v, old_state, rerr);
424 440 break;
425 441
426 442 case RESTARTER_STATE_OFFLINE:
427 443 err = gt_enter_offline(h, v, old_state, rerr);
428 444 break;
429 445
430 446 case RESTARTER_STATE_ONLINE:
431 447 err = gt_enter_online(h, v, old_state, rerr);
432 448 break;
433 449
434 450 case RESTARTER_STATE_DEGRADED:
435 451 err = gt_enter_degraded(h, v, old_state, rerr);
436 452 break;
437 453
438 454 case RESTARTER_STATE_MAINT:
439 455 err = gt_enter_maint(h, v, old_state, rerr);
440 456 break;
441 457
442 458 default:
443 459 /* Shouldn't be in an invalid state. */
444 460 #ifndef NDEBUG
445 461 uu_warn("%s:%d: Invalid state %d.\n", __FILE__, __LINE__,
446 462 v->gv_state);
447 463 #endif
448 464 abort();
449 465 }
450 466
451 467 switch (err) {
452 468 case 0:
453 469 break;
454 470
455 471 case ECONNABORTED:
456 472 lost_repository = 1;
457 473 break;
458 474
459 475 default:
460 476 #ifndef NDEBUG
461 477 uu_warn("%s:%d: "
462 478 "gt_enter_%s() failed with unexpected error %d.\n",
463 479 __FILE__, __LINE__, instance_state_str[v->gv_state], err);
464 480 #endif
465 481 abort();
466 482 }
467 483
468 484 return (lost_repository ? ECONNABORTED : 0);
469 485 }
↓ open down ↓ |
190 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX