Print this page
7267 SMF is fast and loose with optional dependencies (fixes)
Reviewed by: Dan McDonald <danmcd@omniti.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Albert Lee <trisk@omniti.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/cmd/svc/startd/transition.c
+++ new/usr/src/cmd/svc/startd/transition.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 *
25 25 * Copyright 2016 RackTop Systems.
26 26 */
27 27
28 28
29 29 /*
30 30 * transition.c - Graph State Machine
31 31 *
32 32 * The graph state machine is implemented here, with a typical approach
33 33 * of a function per state. Separating the implementation allows more
34 34 * clarity into the actions taken on notification of state change, as well
35 35 * as a place for future expansion including hooks for configurable actions.
36 36 * All functions are called with dgraph_lock held.
37 37 *
38 38 * The start action for this state machine is not explicit. The states
39 39 * (ONLINE and DEGRADED) which need to know when they're entering the state
40 40 * due to a daemon restart implement this understanding by checking for
41 41 * transition from uninitialized. In the future, this would likely be better
42 42 * as an explicit start action instead of relying on an overloaded transition.
43 43 *
44 44 * All gt_enter functions use the same set of return codes.
45 45 * 0 success
46 46 * ECONNABORTED repository connection aborted
47 47 */
48 48
49 49 #include "startd.h"
50 50
51 51 static int
52 52 gt_running(restarter_instance_state_t state)
53 53 {
54 54 if (state == RESTARTER_STATE_ONLINE ||
55 55 state == RESTARTER_STATE_DEGRADED)
56 56 return (1);
57 57
58 58 return (0);
59 59 }
60 60
61 61 static int
62 62 gt_enter_uninit(scf_handle_t *h, graph_vertex_t *v,
63 63 restarter_instance_state_t old_state, restarter_error_t rerr)
64 64 {
65 65 int err;
66 66 scf_instance_t *inst;
67 67
68 68 /* Initialize instance by refreshing it. */
69 69
70 70 err = libscf_fmri_get_instance(h, v->gv_name, &inst);
71 71 switch (err) {
72 72 case 0:
73 73 break;
74 74
75 75 case ECONNABORTED:
76 76 return (ECONNABORTED);
77 77
78 78 case ENOENT:
79 79 return (0);
80 80
81 81 case EINVAL:
82 82 case ENOTSUP:
83 83 default:
84 84 bad_error("libscf_fmri_get_instance", err);
85 85 }
86 86
87 87 err = refresh_vertex(v, inst);
88 88 if (err == 0)
89 89 graph_enable_by_vertex(v, v->gv_flags & GV_ENABLED, 0);
90 90
91 91 scf_instance_destroy(inst);
92 92
93 93 /* If the service was running, propagate a stop event. */
94 94 if (gt_running(old_state)) {
95 95 log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
96 96 v->gv_name);
97 97
98 98 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
99 99 }
100 100
101 101 graph_transition_sulogin(RESTARTER_STATE_UNINIT, old_state);
102 102 return (0);
103 103 }
104 104
105 105 /* ARGSUSED */
106 106 static int
107 107 gt_enter_maint(scf_handle_t *h, graph_vertex_t *v,
108 108 restarter_instance_state_t old_state, restarter_error_t rerr)
109 109 {
110 110 int to_offline = v->gv_flags & GV_TOOFFLINE;
111 111
112 112 /*
113 113 * If the service was running, propagate a stop event. If the
114 114 * service was not running the maintenance transition may satisfy
115 115 * optional dependencies and should be propagated to determine
116 116 * whether new dependents are satisfiable.
117 117 * Instances that transition to maintenance and have the GV_TOOFFLINE
118 118 * flag are special because they can expose new subtree leaves so
119 119 * propagate the offline to the instance dependencies.
120 120 */
121 121
122 122 /* instance transitioning to maintenance is considered disabled */
123 123 v->gv_flags &= ~GV_TODISABLE;
124 124 v->gv_flags &= ~GV_TOOFFLINE;
125 125
126 126 if (gt_running(old_state)) {
127 127 /*
128 128 * Handle state change during instance disabling.
129 129 * Propagate offline to the new exposed leaves.
130 130 */
131 131 if (to_offline) {
132 132 log_framework(LOG_DEBUG, "%s removed from subtree\n",
133 133 v->gv_name);
134 134
135 135 graph_offline_subtree_leaves(v, (void *)h);
136 136 }
137 137
138 138 log_framework(LOG_DEBUG, "Propagating maintenance (stop) of "
139 139 "%s.\n", v->gv_name);
140 140
141 141 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
142 142
143 143 /*
144 144 * The maintenance transition may satisfy optional_all/restart
145 145 * dependencies and should be propagated to determine
146 146 * whether new dependents are satisfiable.
147 147 */
148 148 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
149 149 } else {
150 150 log_framework(LOG_DEBUG, "Propagating maintenance of %s.\n",
151 151 v->gv_name);
152 152
153 153 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
154 154 }
155 155
↓ open down ↓ |
155 lines elided |
↑ open up ↑ |
156 156 graph_transition_sulogin(RESTARTER_STATE_MAINT, old_state);
157 157 return (0);
158 158 }
159 159
160 160 /* ARGSUSED */
161 161 static int
162 162 gt_enter_offline(scf_handle_t *h, graph_vertex_t *v,
163 163 restarter_instance_state_t old_state, restarter_error_t rerr)
164 164 {
165 165 int to_offline = v->gv_flags & GV_TOOFFLINE;
166 + int to_disable = v->gv_flags & GV_TODISABLE;
166 167
167 168 v->gv_flags &= ~GV_TOOFFLINE;
168 169
169 170 /*
170 171 * If the instance should be enabled, see if we can start it.
171 172 * Otherwise send a disable command.
172 173 * If a instance has the GV_TOOFFLINE flag set then it must
173 174 * remains offline until the disable process completes.
174 175 */
175 176 if (v->gv_flags & GV_ENABLED) {
176 - if (to_offline == 0)
177 + if (to_offline == 0 && to_disable == 0)
177 178 graph_start_if_satisfied(v);
178 179 } else {
179 180 if (gt_running(old_state) && v->gv_post_disable_f)
180 181 v->gv_post_disable_f();
181 182
182 183 vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
183 184 }
184 185
185 186 /*
186 187 * If the service was running, propagate a stop event. If the
187 188 * service was not running the offline transition may satisfy
188 189 * optional dependencies and should be propagated to determine
189 190 * whether new dependents are satisfiable.
190 191 * Instances that transition to offline and have the GV_TOOFFLINE flag
191 192 * are special because they can expose new subtree leaves so propagate
192 193 * the offline to the instance dependencies.
193 194 */
194 195 if (gt_running(old_state)) {
195 196 /*
196 197 * Handle state change during instance disabling.
197 198 * Propagate offline to the new exposed leaves.
198 199 */
199 200 if (to_offline) {
200 201 log_framework(LOG_DEBUG, "%s removed from subtree\n",
201 202 v->gv_name);
202 203
203 204 graph_offline_subtree_leaves(v, (void *)h);
204 205 }
205 206
206 207 log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
207 208 v->gv_name);
208 209
209 210 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
210 211
211 212 /*
212 213 * The offline transition may satisfy require_any/restart
213 214 * dependencies and should be propagated to determine
214 215 * whether new dependents are satisfiable.
215 216 */
216 217 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
217 218 } else {
218 219 log_framework(LOG_DEBUG, "Propagating offline of %s.\n",
219 220 v->gv_name);
220 221
221 222 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
222 223 }
223 224
224 225 graph_transition_sulogin(RESTARTER_STATE_OFFLINE, old_state);
225 226 return (0);
226 227 }
227 228
228 229 /* ARGSUSED */
229 230 static int
230 231 gt_enter_disabled(scf_handle_t *h, graph_vertex_t *v,
231 232 restarter_instance_state_t old_state, restarter_error_t rerr)
232 233 {
233 234 int to_offline = v->gv_flags & GV_TOOFFLINE;
234 235
235 236 v->gv_flags &= ~GV_TODISABLE;
236 237 v->gv_flags &= ~GV_TOOFFLINE;
237 238
238 239 /*
239 240 * If the instance should be disabled, no problem. Otherwise,
240 241 * send an enable command, which should result in the instance
241 242 * moving to OFFLINE unless the instance is part of a subtree
242 243 * (non root) and in this case the result is unpredictable.
243 244 */
244 245 if (v->gv_flags & GV_ENABLED) {
245 246 vertex_send_event(v, RESTARTER_EVENT_TYPE_ENABLE);
246 247 } else if (gt_running(old_state) && v->gv_post_disable_f) {
247 248 v->gv_post_disable_f();
248 249 }
249 250
250 251 /*
251 252 * If the service was running, propagate this as a stop. If the
252 253 * service was not running the disabled transition may satisfy
253 254 * optional dependencies and should be propagated to determine
254 255 * whether new dependents are satisfiable.
255 256 */
256 257 if (gt_running(old_state)) {
257 258 /*
258 259 * We need to propagate the offline to new exposed leaves in
259 260 * case we've just disabled an instance that was part of a
260 261 * subtree.
261 262 */
262 263 if (to_offline) {
263 264 log_framework(LOG_DEBUG, "%s removed from subtree\n",
264 265 v->gv_name);
265 266
266 267 /*
267 268 * Handle state change during instance disabling.
268 269 * Propagate offline to the new exposed leaves.
269 270 */
270 271 graph_offline_subtree_leaves(v, (void *)h);
271 272 }
272 273
273 274
274 275 log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
275 276 v->gv_name);
276 277
277 278 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
278 279
279 280 /*
280 281 * The disable transition may satisfy optional_all/restart
281 282 * dependencies and should be propagated to determine
282 283 * whether new dependents are satisfiable.
283 284 */
284 285 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
285 286 } else {
286 287 log_framework(LOG_DEBUG, "Propagating disable of %s.\n",
287 288 v->gv_name);
288 289
289 290 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
290 291 }
291 292
292 293 graph_transition_sulogin(RESTARTER_STATE_DISABLED, old_state);
293 294 return (0);
294 295 }
295 296
296 297 static int
297 298 gt_internal_online_or_degraded(scf_handle_t *h, graph_vertex_t *v,
298 299 restarter_instance_state_t old_state, restarter_error_t rerr)
299 300 {
300 301 int r;
301 302
302 303 /*
303 304 * If the instance has just come up, update the start
304 305 * snapshot.
305 306 */
306 307 if (gt_running(old_state) == 0) {
307 308 /*
308 309 * Don't fire if we're just recovering state
309 310 * after a restart.
310 311 */
311 312 if (old_state != RESTARTER_STATE_UNINIT &&
312 313 v->gv_post_online_f)
313 314 v->gv_post_online_f();
314 315
315 316 r = libscf_snapshots_poststart(h, v->gv_name, B_TRUE);
316 317 switch (r) {
317 318 case 0:
318 319 case ENOENT:
319 320 /*
320 321 * If ENOENT, the instance must have been
321 322 * deleted. Pretend we were successful since
322 323 * we should get a delete event later.
323 324 */
324 325 break;
325 326
326 327 case ECONNABORTED:
327 328 return (ECONNABORTED);
328 329
329 330 case EACCES:
330 331 case ENOTSUP:
331 332 default:
332 333 bad_error("libscf_snapshots_poststart", r);
333 334 }
334 335 }
335 336
336 337 if (!(v->gv_flags & GV_ENABLED)) {
337 338 vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
338 339 } else if (v->gv_flags & GV_TOOFFLINE) {
339 340 /*
340 341 * If the vertex has the GV_TOOFFLINE flag set then that's
341 342 * because the instance was transitioning from offline to
342 343 * online and the reverse disable algorithm doesn't offline
343 344 * those instances because it was already appearing offline.
344 345 * So do it now.
345 346 */
346 347 offline_vertex(v);
347 348 }
348 349
349 350 if (gt_running(old_state) == 0) {
350 351 log_framework(LOG_DEBUG, "Propagating start of %s.\n",
351 352 v->gv_name);
352 353
353 354 graph_transition_propagate(v, PROPAGATE_START, rerr);
354 355 } else if (rerr == RERR_REFRESH) {
355 356 /* For refresh we'll get a message sans state change */
356 357
357 358 log_framework(LOG_DEBUG, "Propagating refresh of %s.\n",
358 359 v->gv_name);
359 360
360 361 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
361 362 }
362 363
363 364 return (0);
364 365 }
365 366
366 367 static int
367 368 gt_enter_online(scf_handle_t *h, graph_vertex_t *v,
368 369 restarter_instance_state_t old_state, restarter_error_t rerr)
369 370 {
370 371 int r;
371 372
372 373 r = gt_internal_online_or_degraded(h, v, old_state, rerr);
373 374 if (r != 0)
374 375 return (r);
375 376
376 377 graph_transition_sulogin(RESTARTER_STATE_ONLINE, old_state);
377 378 return (0);
378 379 }
379 380
380 381 static int
381 382 gt_enter_degraded(scf_handle_t *h, graph_vertex_t *v,
382 383 restarter_instance_state_t old_state, restarter_error_t rerr)
383 384 {
384 385 int r;
385 386
386 387 r = gt_internal_online_or_degraded(h, v, old_state, rerr);
387 388 if (r != 0)
388 389 return (r);
389 390
390 391 graph_transition_sulogin(RESTARTER_STATE_DEGRADED, old_state);
391 392 return (0);
392 393 }
393 394
394 395 /*
395 396 * gt_transition() implements the state transition for the graph
396 397 * state machine. It can return:
397 398 * 0 success
398 399 * ECONNABORTED repository connection aborted
399 400 *
400 401 * v->gv_state should be set to the state we're transitioning to before
401 402 * calling this function.
402 403 */
403 404 int
404 405 gt_transition(scf_handle_t *h, graph_vertex_t *v, restarter_error_t rerr,
405 406 restarter_instance_state_t old_state)
406 407 {
407 408 int err;
408 409 int lost_repository = 0;
409 410
410 411 /*
411 412 * If there's a common set of work to be done on exit from the
412 413 * old_state, include it as a separate set of functions here. For
413 414 * now there's no such work, so there are no gt_exit functions.
414 415 */
415 416
416 417 err = vertex_subgraph_dependencies_shutdown(h, v, old_state);
417 418 switch (err) {
418 419 case 0:
419 420 break;
420 421
421 422 case ECONNABORTED:
422 423 lost_repository = 1;
423 424 break;
424 425
425 426 default:
426 427 bad_error("vertex_subgraph_dependencies_shutdown", err);
427 428 }
428 429
429 430 /*
430 431 * Now call the appropriate gt_enter function for the new state.
431 432 */
432 433 switch (v->gv_state) {
433 434 case RESTARTER_STATE_UNINIT:
434 435 err = gt_enter_uninit(h, v, old_state, rerr);
435 436 break;
436 437
437 438 case RESTARTER_STATE_DISABLED:
438 439 err = gt_enter_disabled(h, v, old_state, rerr);
439 440 break;
440 441
441 442 case RESTARTER_STATE_OFFLINE:
442 443 err = gt_enter_offline(h, v, old_state, rerr);
443 444 break;
444 445
445 446 case RESTARTER_STATE_ONLINE:
446 447 err = gt_enter_online(h, v, old_state, rerr);
447 448 break;
448 449
449 450 case RESTARTER_STATE_DEGRADED:
450 451 err = gt_enter_degraded(h, v, old_state, rerr);
451 452 break;
452 453
453 454 case RESTARTER_STATE_MAINT:
454 455 err = gt_enter_maint(h, v, old_state, rerr);
455 456 break;
456 457
457 458 default:
458 459 /* Shouldn't be in an invalid state. */
459 460 #ifndef NDEBUG
460 461 uu_warn("%s:%d: Invalid state %d.\n", __FILE__, __LINE__,
461 462 v->gv_state);
462 463 #endif
463 464 abort();
464 465 }
465 466
466 467 switch (err) {
467 468 case 0:
468 469 break;
469 470
470 471 case ECONNABORTED:
471 472 lost_repository = 1;
472 473 break;
473 474
474 475 default:
475 476 #ifndef NDEBUG
476 477 uu_warn("%s:%d: "
477 478 "gt_enter_%s() failed with unexpected error %d.\n",
478 479 __FILE__, __LINE__, instance_state_str[v->gv_state], err);
479 480 #endif
480 481 abort();
481 482 }
482 483
483 484 return (lost_repository ? ECONNABORTED : 0);
484 485 }
↓ open down ↓ |
298 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX