Print this page
7928 Add support for SMF_EXIT_TEMP_TRANSIENT
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/cmd/svc/startd/method.c
+++ new/usr/src/cmd/svc/startd/method.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright 2011 Joyent Inc.
25 + * Copyright 2017 RackTop Systems.
25 26 */
26 27
27 28 /*
28 29 * method.c - method execution functions
29 30 *
30 31 * This file contains the routines needed to run a method: a fork(2)-exec(2)
31 32 * invocation monitored using either the contract filesystem or waitpid(2).
32 33 * (Plain fork1(2) support is provided in fork.c.)
33 34 *
34 35 * Contract Transfer
35 36 * When we restart a service, we want to transfer any contracts that the old
36 37 * service's contract inherited. This means that (a) we must not abandon the
37 38 * old contract when the service dies and (b) we must write the id of the old
38 39 * contract into the terms of the new contract. There should be limits to
39 40 * (a), though, since we don't want to keep the contract around forever. To
40 41 * this end we'll say that services in the offline state may have a contract
41 42 * to be transfered and services in the disabled or maintenance states cannot.
42 43 * This means that when a service transitions from online (or degraded) to
43 44 * offline, the contract should be preserved, and when the service transitions
44 45 * from offline to online (i.e., the start method), we'll transfer inherited
45 46 * contracts.
46 47 */
47 48
48 49 #include <sys/contract/process.h>
49 50 #include <sys/ctfs.h>
50 51 #include <sys/stat.h>
51 52 #include <sys/time.h>
52 53 #include <sys/types.h>
53 54 #include <sys/uio.h>
54 55 #include <sys/wait.h>
55 56 #include <alloca.h>
56 57 #include <assert.h>
57 58 #include <errno.h>
58 59 #include <fcntl.h>
59 60 #include <libcontract.h>
60 61 #include <libcontract_priv.h>
61 62 #include <libgen.h>
62 63 #include <librestart.h>
63 64 #include <libscf.h>
64 65 #include <limits.h>
65 66 #include <port.h>
66 67 #include <sac.h>
67 68 #include <signal.h>
68 69 #include <stdlib.h>
69 70 #include <string.h>
70 71 #include <strings.h>
71 72 #include <unistd.h>
72 73 #include <atomic.h>
73 74 #include <poll.h>
74 75 #include <libscf_priv.h>
75 76
76 77 #include "startd.h"
77 78
78 79 #define SBIN_SH "/sbin/sh"
79 80
80 81 /*
81 82 * Used to tell if contracts are in the process of being
82 83 * stored into the svc.startd internal hash table.
83 84 */
84 85 volatile uint16_t storing_contract = 0;
85 86
86 87 /*
87 88 * Mapping from restart_on method-type to contract events. Must correspond to
88 89 * enum method_restart_t.
89 90 */
90 91 static uint_t method_events[] = {
91 92 /* METHOD_RESTART_ALL */
92 93 CT_PR_EV_HWERR | CT_PR_EV_SIGNAL | CT_PR_EV_CORE | CT_PR_EV_EMPTY,
93 94 /* METHOD_RESTART_EXTERNAL_FAULT */
94 95 CT_PR_EV_HWERR | CT_PR_EV_SIGNAL,
95 96 /* METHOD_RESTART_ANY_FAULT */
96 97 CT_PR_EV_HWERR | CT_PR_EV_SIGNAL | CT_PR_EV_CORE
97 98 };
98 99
99 100 /*
100 101 * method_record_start(restarter_inst_t *)
101 102 * Record a service start for rate limiting. Place the current time
102 103 * in the circular array of instance starts.
103 104 */
104 105 static void
105 106 method_record_start(restarter_inst_t *inst)
106 107 {
107 108 int index = inst->ri_start_index++ % RINST_START_TIMES;
108 109
109 110 inst->ri_start_time[index] = gethrtime();
110 111 }
111 112
112 113 /*
113 114 * method_rate_critical(restarter_inst_t *)
114 115 * Return true if the average start interval is less than the permitted
115 116 * interval. The implicit interval defaults to RINST_FAILURE_RATE_NS and
116 117 * RINST_START_TIMES but may be overridden with the svc properties
117 118 * startd/critical_failure_count and startd/critical_failure_period
118 119 * which represent the number of failures to consider and the amount of
119 120 * time in seconds in which that number may occur, respectively. Note that
120 121 * this time is measured as of the transition to 'enabled' rather than wall
121 122 * clock time.
122 123 * Implicit success if insufficient measurements for an average exist.
123 124 */
124 125 int
125 126 method_rate_critical(restarter_inst_t *inst)
126 127 {
127 128 hrtime_t critical_failure_period;
128 129 uint_t critical_failure_count = RINST_START_TIMES;
129 130 uint_t n = inst->ri_start_index;
130 131 hrtime_t avg_ns = 0;
131 132 uint64_t scf_fr, scf_st;
132 133 scf_propvec_t *prop = NULL;
133 134 scf_propvec_t restart_critical[] = {
134 135 { "critical_failure_period", NULL, SCF_TYPE_INTEGER, NULL, 0 },
135 136 { "critical_failure_count", NULL, SCF_TYPE_INTEGER, NULL, 0 },
136 137 { NULL }
137 138 };
138 139
139 140 if (instance_is_wait_style(inst))
140 141 critical_failure_period = RINST_WT_SVC_FAILURE_RATE_NS;
141 142 else
142 143 critical_failure_period = RINST_FAILURE_RATE_NS;
143 144
144 145 restart_critical[0].pv_ptr = &scf_fr;
145 146 restart_critical[1].pv_ptr = &scf_st;
146 147
147 148 if (scf_read_propvec(inst->ri_i.i_fmri, "startd",
148 149 B_TRUE, restart_critical, &prop) != SCF_FAILED) {
149 150 /*
150 151 * critical_failure_period is expressed
151 152 * in seconds but tracked in ns
152 153 */
153 154 critical_failure_period = (hrtime_t)scf_fr * NANOSEC;
154 155 critical_failure_count = (uint_t)scf_st;
155 156 }
156 157 if (inst->ri_start_index < critical_failure_count)
157 158 return (0);
158 159
159 160 avg_ns =
160 161 (inst->ri_start_time[(n - 1) % critical_failure_count] -
161 162 inst->ri_start_time[n % critical_failure_count]) /
162 163 (critical_failure_count - 1);
163 164
164 165 return (avg_ns < critical_failure_period);
165 166 }
166 167
167 168 /*
168 169 * int method_is_transient()
169 170 * Determine if the method for the given instance is transient,
170 171 * from a contract perspective. Return 1 if it is, and 0 if it isn't.
171 172 */
↓ open down ↓ |
137 lines elided |
↑ open up ↑ |
172 173 static int
173 174 method_is_transient(restarter_inst_t *inst, int type)
174 175 {
175 176 if (instance_is_transient_style(inst) || type != METHOD_START)
176 177 return (1);
177 178 else
178 179 return (0);
179 180 }
180 181
181 182 /*
183 + * int method_failed()
184 + * Return 1 if the exit_code indicates failure (not all non-zero
185 + * exit codes do) otherwise return 0.
186 + */
187 +static int
188 +method_failed(int exit_code)
189 +{
190 + if (exit_code != 0 && exit_code != SMF_EXIT_TEMP_TRANSIENT)
191 + return (1);
192 + else
193 + return (0);
194 +}
195 +
196 +/*
182 197 * void method_store_contract()
183 198 * Store the newly created contract id into local structures and
184 199 * the repository. If the repository connection is broken it is rebound.
185 200 */
186 201 static void
187 202 method_store_contract(restarter_inst_t *inst, int type, ctid_t *cid)
188 203 {
189 204 int r;
190 205 boolean_t primary;
191 206
192 207 if (errno = contract_latest(cid))
193 208 uu_die("%s: Couldn't get new contract's id", inst->ri_i.i_fmri);
194 209
195 210 primary = !method_is_transient(inst, type);
196 211
197 212 if (!primary) {
198 213 if (inst->ri_i.i_transient_ctid != 0) {
199 214 log_framework(LOG_INFO,
200 215 "%s: transient ctid expected to be 0 but "
201 216 "was set to %ld\n", inst->ri_i.i_fmri,
202 217 inst->ri_i.i_transient_ctid);
203 218 }
204 219
205 220 inst->ri_i.i_transient_ctid = *cid;
206 221 } else {
207 222 if (inst->ri_i.i_primary_ctid != 0) {
208 223 /*
209 224 * There was an old contract that we transferred.
210 225 * Remove it.
211 226 */
212 227 method_remove_contract(inst, B_TRUE, B_FALSE);
213 228 }
214 229
215 230 if (inst->ri_i.i_primary_ctid != 0) {
216 231 log_framework(LOG_INFO,
217 232 "%s: primary ctid expected to be 0 but "
218 233 "was set to %ld\n", inst->ri_i.i_fmri,
219 234 inst->ri_i.i_primary_ctid);
220 235 }
221 236
222 237 inst->ri_i.i_primary_ctid = *cid;
223 238 inst->ri_i.i_primary_ctid_stopped = 0;
224 239
225 240 log_framework(LOG_DEBUG, "Storing primary contract %ld for "
226 241 "%s.\n", *cid, inst->ri_i.i_fmri);
227 242
228 243 contract_hash_store(*cid, inst->ri_id);
229 244 }
230 245
231 246 again:
232 247 if (inst->ri_mi_deleted)
233 248 return;
234 249
235 250 r = restarter_store_contract(inst->ri_m_inst, *cid, primary ?
236 251 RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
237 252 switch (r) {
238 253 case 0:
239 254 break;
240 255
241 256 case ECANCELED:
242 257 inst->ri_mi_deleted = B_TRUE;
243 258 break;
244 259
245 260 case ECONNABORTED:
246 261 libscf_handle_rebind(scf_instance_handle(inst->ri_m_inst));
247 262 /* FALLTHROUGH */
248 263
249 264 case EBADF:
250 265 libscf_reget_instance(inst);
251 266 goto again;
252 267
253 268 case ENOMEM:
254 269 case EPERM:
255 270 case EACCES:
256 271 case EROFS:
257 272 uu_die("%s: Couldn't store contract id %ld",
258 273 inst->ri_i.i_fmri, *cid);
259 274 /* NOTREACHED */
260 275
261 276 case EINVAL:
262 277 default:
263 278 bad_error("restarter_store_contract", r);
264 279 }
265 280 }
266 281
267 282 /*
268 283 * void method_remove_contract()
269 284 * Remove any non-permanent contracts from internal structures and
270 285 * the repository, then abandon them.
271 286 * Returns
272 287 * 0 - success
273 288 * ECANCELED - inst was deleted from the repository
274 289 *
275 290 * If the repository connection was broken, it is rebound.
276 291 */
277 292 void
278 293 method_remove_contract(restarter_inst_t *inst, boolean_t primary,
279 294 boolean_t abandon)
280 295 {
281 296 ctid_t * const ctidp = primary ? &inst->ri_i.i_primary_ctid :
282 297 &inst->ri_i.i_transient_ctid;
283 298
284 299 int r;
285 300
286 301 assert(*ctidp != 0);
287 302
288 303 log_framework(LOG_DEBUG, "Removing %s contract %lu for %s.\n",
289 304 primary ? "primary" : "transient", *ctidp, inst->ri_i.i_fmri);
290 305
291 306 if (abandon)
292 307 contract_abandon(*ctidp);
293 308
294 309 again:
295 310 if (inst->ri_mi_deleted) {
296 311 r = ECANCELED;
297 312 goto out;
298 313 }
299 314
300 315 r = restarter_remove_contract(inst->ri_m_inst, *ctidp, primary ?
301 316 RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
302 317 switch (r) {
303 318 case 0:
304 319 break;
305 320
306 321 case ECANCELED:
307 322 inst->ri_mi_deleted = B_TRUE;
308 323 break;
309 324
310 325 case ECONNABORTED:
311 326 libscf_handle_rebind(scf_instance_handle(inst->ri_m_inst));
312 327 /* FALLTHROUGH */
313 328
314 329 case EBADF:
315 330 libscf_reget_instance(inst);
316 331 goto again;
317 332
318 333 case ENOMEM:
319 334 case EPERM:
320 335 case EACCES:
321 336 case EROFS:
322 337 log_error(LOG_INFO, "%s: Couldn't remove contract id %ld: "
323 338 "%s.\n", inst->ri_i.i_fmri, *ctidp, strerror(r));
324 339 break;
325 340
326 341 case EINVAL:
327 342 default:
328 343 bad_error("restarter_remove_contract", r);
329 344 }
330 345
331 346 out:
332 347 if (primary)
333 348 contract_hash_remove(*ctidp);
334 349
335 350 *ctidp = 0;
336 351 }
337 352
338 353 static const char *method_names[] = { "start", "stop", "refresh" };
339 354
340 355 /*
341 356 * int method_ready_contract(restarter_inst_t *, int, method_restart_t, int)
342 357 *
343 358 * Activate a contract template for the type method of inst. type,
344 359 * restart_on, and cte_mask dictate the critical events term of the contract.
345 360 * Returns
346 361 * 0 - success
347 362 * ECANCELED - inst has been deleted from the repository
348 363 */
349 364 static int
350 365 method_ready_contract(restarter_inst_t *inst, int type,
351 366 method_restart_t restart_on, uint_t cte_mask)
352 367 {
353 368 int tmpl, err, istrans, iswait, ret;
354 369 uint_t cevents, fevents;
355 370
356 371 /*
357 372 * Correctly supporting wait-style services is tricky without
358 373 * rearchitecting startd to cope with multiple event sources
359 374 * simultaneously trying to stop an instance. Until a better
360 375 * solution is implemented, we avoid this problem for
361 376 * wait-style services by making contract events fatal and
362 377 * letting the wait code alone handle stopping the service.
363 378 */
364 379 iswait = instance_is_wait_style(inst);
365 380 istrans = method_is_transient(inst, type);
366 381
367 382 tmpl = open64(CTFS_ROOT "/process/template", O_RDWR);
368 383 if (tmpl == -1)
369 384 uu_die("Could not create contract template");
370 385
371 386 /*
372 387 * We assume non-login processes are unlikely to create
373 388 * multiple process groups, and set CT_PR_PGRPONLY for all
374 389 * wait-style services' contracts.
375 390 */
376 391 err = ct_pr_tmpl_set_param(tmpl, CT_PR_INHERIT | CT_PR_REGENT |
377 392 (iswait ? CT_PR_PGRPONLY : 0));
378 393 assert(err == 0);
379 394
380 395 if (istrans) {
381 396 cevents = 0;
382 397 fevents = 0;
383 398 } else {
384 399 assert(restart_on >= 0);
385 400 assert(restart_on <= METHOD_RESTART_ANY_FAULT);
386 401 cevents = method_events[restart_on] & ~cte_mask;
387 402 fevents = iswait ?
388 403 (method_events[restart_on] & ~cte_mask & CT_PR_ALLFATAL) :
389 404 0;
390 405 }
391 406
392 407 err = ct_tmpl_set_critical(tmpl, cevents);
393 408 assert(err == 0);
394 409
395 410 err = ct_tmpl_set_informative(tmpl, 0);
396 411 assert(err == 0);
397 412 err = ct_pr_tmpl_set_fatal(tmpl, fevents);
398 413 assert(err == 0);
399 414
400 415 err = ct_tmpl_set_cookie(tmpl, istrans ? METHOD_OTHER_COOKIE :
401 416 METHOD_START_COOKIE);
402 417 assert(err == 0);
403 418
404 419 if (type == METHOD_START && inst->ri_i.i_primary_ctid != 0) {
405 420 ret = ct_pr_tmpl_set_transfer(tmpl, inst->ri_i.i_primary_ctid);
406 421 switch (ret) {
407 422 case 0:
408 423 break;
409 424
410 425 case ENOTEMPTY:
411 426 /* No contracts for you! */
412 427 method_remove_contract(inst, B_TRUE, B_TRUE);
413 428 if (inst->ri_mi_deleted) {
414 429 ret = ECANCELED;
415 430 goto out;
416 431 }
417 432 break;
418 433
419 434 case EINVAL:
420 435 case ESRCH:
421 436 case EACCES:
422 437 default:
423 438 bad_error("ct_pr_tmpl_set_transfer", ret);
424 439 }
425 440 }
426 441
427 442 err = ct_pr_tmpl_set_svc_fmri(tmpl, inst->ri_i.i_fmri);
428 443 assert(err == 0);
429 444 err = ct_pr_tmpl_set_svc_aux(tmpl, method_names[type]);
430 445 assert(err == 0);
431 446
432 447 err = ct_tmpl_activate(tmpl);
433 448 assert(err == 0);
434 449
435 450 ret = 0;
436 451
437 452 out:
438 453 err = close(tmpl);
439 454 assert(err == 0);
440 455
441 456 return (ret);
442 457 }
443 458
444 459 static void
445 460 exec_method(const restarter_inst_t *inst, int type, const char *method,
446 461 struct method_context *mcp, uint8_t need_session)
447 462 {
448 463 char *cmd;
449 464 const char *errf;
450 465 char **nenv;
451 466 int rsmc_errno = 0;
452 467
453 468 cmd = uu_msprintf("exec %s", method);
454 469
455 470 if (inst->ri_utmpx_prefix[0] != '\0' && inst->ri_utmpx_prefix != NULL)
456 471 (void) utmpx_mark_init(getpid(), inst->ri_utmpx_prefix);
457 472
458 473 setlog(inst->ri_logstem);
459 474 log_instance(inst, B_FALSE, "Executing %s method (\"%s\").",
460 475 method_names[type], method);
461 476
462 477 if (need_session)
463 478 (void) setpgrp();
464 479
465 480 /* Set credentials. */
466 481 rsmc_errno = restarter_set_method_context(mcp, &errf);
467 482 if (rsmc_errno != 0) {
468 483 log_instance(inst, B_FALSE,
469 484 "svc.startd could not set context for method: ");
470 485
471 486 if (rsmc_errno == -1) {
472 487 if (strcmp(errf, "core_set_process_path") == 0) {
473 488 log_instance(inst, B_FALSE,
474 489 "Could not set corefile path.");
475 490 } else if (strcmp(errf, "setproject") == 0) {
476 491 log_instance(inst, B_FALSE, "%s: a resource "
477 492 "control assignment failed", errf);
478 493 } else if (strcmp(errf, "pool_set_binding") == 0) {
479 494 log_instance(inst, B_FALSE, "%s: a system "
480 495 "error occurred", errf);
481 496 } else {
482 497 #ifndef NDEBUG
483 498 uu_warn("%s:%d: Bad function name \"%s\" for "
484 499 "error %d from "
485 500 "restarter_set_method_context().\n",
486 501 __FILE__, __LINE__, errf, rsmc_errno);
487 502 #endif
488 503 abort();
489 504 }
490 505
491 506 exit(1);
492 507 }
493 508
494 509 if (errf != NULL && strcmp(errf, "pool_set_binding") == 0) {
495 510 switch (rsmc_errno) {
496 511 case ENOENT:
497 512 log_instance(inst, B_FALSE, "%s: the pool "
498 513 "could not be found", errf);
499 514 break;
500 515
501 516 case EBADF:
502 517 log_instance(inst, B_FALSE, "%s: the "
503 518 "configuration is invalid", errf);
504 519 break;
505 520
506 521 case EINVAL:
507 522 log_instance(inst, B_FALSE, "%s: pool name "
508 523 "\"%s\" is invalid", errf,
509 524 mcp->resource_pool);
510 525 break;
511 526
512 527 default:
513 528 #ifndef NDEBUG
514 529 uu_warn("%s:%d: Bad error %d for function %s "
515 530 "in restarter_set_method_context().\n",
516 531 __FILE__, __LINE__, rsmc_errno, errf);
517 532 #endif
518 533 abort();
519 534 }
520 535
521 536 exit(SMF_EXIT_ERR_CONFIG);
522 537 }
523 538
524 539 if (errf != NULL && strcmp(errf, "chdir") == 0) {
525 540 switch (rsmc_errno) {
526 541 case EACCES:
527 542 case EFAULT:
528 543 case EIO:
529 544 case ELOOP:
530 545 case ENAMETOOLONG:
531 546 case ENOENT:
532 547 case ENOLINK:
533 548 case ENOTDIR:
534 549 log_instance(inst, B_FALSE, "%s: %s (\"%s\")",
535 550 errf,
536 551 strerror(rsmc_errno), mcp->working_dir);
537 552 break;
538 553
539 554 default:
540 555 #ifndef NDEBUG
541 556 uu_warn("%s:%d: Bad error %d for function %s "
542 557 "in restarter_set_method_context().\n",
543 558 __FILE__, __LINE__, rsmc_errno, errf);
544 559 #endif
545 560 abort();
546 561 }
547 562
548 563 exit(SMF_EXIT_ERR_CONFIG);
549 564 }
550 565
551 566 if (errf != NULL) {
552 567 errno = rsmc_errno;
553 568 perror(errf);
554 569
555 570 switch (rsmc_errno) {
556 571 case EINVAL:
557 572 case EPERM:
558 573 case ENOENT:
559 574 case ENAMETOOLONG:
560 575 case ERANGE:
561 576 case ESRCH:
562 577 exit(SMF_EXIT_ERR_CONFIG);
563 578 /* NOTREACHED */
564 579
565 580 default:
566 581 exit(1);
567 582 }
568 583 }
569 584
570 585 switch (rsmc_errno) {
571 586 case ENOMEM:
572 587 log_instance(inst, B_FALSE, "Out of memory.");
573 588 exit(1);
574 589 /* NOTREACHED */
575 590
576 591 case ENOENT:
577 592 log_instance(inst, B_FALSE, "Missing passwd entry for "
578 593 "user.");
579 594 exit(SMF_EXIT_ERR_CONFIG);
580 595 /* NOTREACHED */
581 596
582 597 default:
583 598 #ifndef NDEBUG
584 599 uu_warn("%s:%d: Bad miscellaneous error %d from "
585 600 "restarter_set_method_context().\n", __FILE__,
586 601 __LINE__, rsmc_errno);
587 602 #endif
588 603 abort();
589 604 }
590 605 }
591 606
592 607 nenv = set_smf_env(mcp->env, mcp->env_sz, NULL, inst,
593 608 method_names[type]);
594 609
595 610 log_preexec();
596 611
597 612 (void) execle(SBIN_SH, SBIN_SH, "-c", cmd, NULL, nenv);
598 613
599 614 exit(10);
600 615 }
601 616
602 617 static void
603 618 write_status(restarter_inst_t *inst, const char *mname, int stat)
604 619 {
605 620 int r;
606 621
607 622 again:
608 623 if (inst->ri_mi_deleted)
609 624 return;
610 625
611 626 r = libscf_write_method_status(inst->ri_m_inst, mname, stat);
612 627 switch (r) {
613 628 case 0:
614 629 break;
615 630
616 631 case ECONNABORTED:
617 632 libscf_reget_instance(inst);
618 633 goto again;
619 634
620 635 case ECANCELED:
621 636 inst->ri_mi_deleted = 1;
622 637 break;
623 638
624 639 case EPERM:
625 640 case EACCES:
626 641 case EROFS:
627 642 log_framework(LOG_INFO, "Could not write exit status "
628 643 "for %s method of %s: %s.\n", mname,
629 644 inst->ri_i.i_fmri, strerror(r));
630 645 break;
631 646
632 647 case ENAMETOOLONG:
633 648 default:
634 649 bad_error("libscf_write_method_status", r);
635 650 }
636 651 }
637 652
638 653 /*
639 654 * int method_run()
640 655 * Execute the type method of instp. If it requires a fork(), wait for it
641 656 * to return and return its exit code in *exit_code. Otherwise set
642 657 * *exit_code to 0 if the method succeeds & -1 if it fails. If the
643 658 * repository connection is broken, it is rebound, but inst may not be
644 659 * reset.
645 660 * Returns
646 661 * 0 - success
647 662 * EINVAL - A correct method or method context couldn't be retrieved.
648 663 * EIO - Contract kill failed.
649 664 * EFAULT - Method couldn't be executed successfully.
650 665 * ELOOP - Retry threshold exceeded.
651 666 * ECANCELED - inst was deleted from the repository before method was run
652 667 * ERANGE - Timeout retry threshold exceeded.
653 668 * EAGAIN - Failed due to external cause, retry.
654 669 */
655 670 int
656 671 method_run(restarter_inst_t **instp, int type, int *exit_code)
657 672 {
658 673 char *method;
659 674 int ret_status;
660 675 pid_t pid;
661 676 method_restart_t restart_on;
662 677 uint_t cte_mask;
663 678 uint8_t need_session;
664 679 scf_handle_t *h;
665 680 scf_snapshot_t *snap;
666 681 const char *mname;
667 682 mc_error_t *m_error;
668 683 struct method_context *mcp;
669 684 int result = 0, timeout_fired = 0;
670 685 int sig, r;
671 686 boolean_t transient;
672 687 uint64_t timeout;
673 688 uint8_t timeout_retry;
674 689 ctid_t ctid;
675 690 int ctfd = -1;
676 691 restarter_inst_t *inst = *instp;
677 692 int id = inst->ri_id;
678 693 int forkerr;
679 694
680 695 assert(MUTEX_HELD(&inst->ri_lock));
681 696 assert(instance_in_transition(inst));
682 697
683 698 if (inst->ri_mi_deleted)
684 699 return (ECANCELED);
685 700
686 701 *exit_code = 0;
687 702
688 703 assert(0 <= type && type <= 2);
689 704 mname = method_names[type];
690 705
691 706 if (type == METHOD_START)
692 707 inst->ri_pre_online_hook();
693 708
694 709 h = scf_instance_handle(inst->ri_m_inst);
695 710
696 711 snap = scf_snapshot_create(h);
697 712 if (snap == NULL ||
698 713 scf_instance_get_snapshot(inst->ri_m_inst, "running", snap) != 0) {
699 714 log_framework(LOG_DEBUG,
700 715 "Could not get running snapshot for %s. "
701 716 "Using editing version to run method %s.\n",
702 717 inst->ri_i.i_fmri, mname);
703 718 scf_snapshot_destroy(snap);
704 719 snap = NULL;
705 720 }
706 721
707 722 /*
708 723 * After this point, we may be logging to the instance log.
709 724 * Make sure we've noted where that log is as a property of
710 725 * the instance.
711 726 */
712 727 r = libscf_note_method_log(inst->ri_m_inst, st->st_log_prefix,
713 728 inst->ri_logstem);
714 729 if (r != 0) {
715 730 log_framework(LOG_WARNING,
716 731 "%s: couldn't note log location: %s\n",
717 732 inst->ri_i.i_fmri, strerror(r));
718 733 }
719 734
720 735 if ((method = libscf_get_method(h, type, inst, snap, &restart_on,
721 736 &cte_mask, &need_session, &timeout, &timeout_retry)) == NULL) {
722 737 if (errno == LIBSCF_PGROUP_ABSENT) {
723 738 log_framework(LOG_DEBUG,
724 739 "%s: instance has no method property group '%s'.\n",
725 740 inst->ri_i.i_fmri, mname);
726 741 if (type == METHOD_REFRESH)
727 742 log_instance(inst, B_TRUE, "No '%s' method "
728 743 "defined. Treating as :true.", mname);
729 744 else
730 745 log_instance(inst, B_TRUE, "Method property "
731 746 "group '%s' is not present.", mname);
732 747 scf_snapshot_destroy(snap);
733 748 return (0);
734 749 } else if (errno == LIBSCF_PROPERTY_ABSENT) {
735 750 log_framework(LOG_DEBUG,
736 751 "%s: instance has no '%s/exec' method property.\n",
737 752 inst->ri_i.i_fmri, mname);
738 753 log_instance(inst, B_TRUE, "Method property '%s/exec "
739 754 "is not present.", mname);
740 755 scf_snapshot_destroy(snap);
741 756 return (0);
742 757 } else {
743 758 log_error(LOG_WARNING,
744 759 "%s: instance libscf_get_method failed\n",
745 760 inst->ri_i.i_fmri);
746 761 scf_snapshot_destroy(snap);
747 762 return (EINVAL);
748 763 }
749 764 }
750 765
751 766 /* open service contract if stopping a non-transient service */
752 767 if (type == METHOD_STOP && (!instance_is_transient_style(inst))) {
753 768 if (inst->ri_i.i_primary_ctid == 0) {
754 769 /* service is not running, nothing to stop */
755 770 log_framework(LOG_DEBUG, "%s: instance has no primary "
756 771 "contract, no service to stop.\n",
757 772 inst->ri_i.i_fmri);
758 773 scf_snapshot_destroy(snap);
759 774 return (0);
760 775 }
761 776 if ((ctfd = contract_open(inst->ri_i.i_primary_ctid, "process",
762 777 "events", O_RDONLY)) < 0) {
763 778 result = EFAULT;
764 779 log_instance(inst, B_TRUE, "Could not open service "
765 780 "contract %ld. Stop method not run.",
766 781 inst->ri_i.i_primary_ctid);
767 782 goto out;
768 783 }
769 784 }
770 785
771 786 if (restarter_is_null_method(method)) {
772 787 log_framework(LOG_DEBUG, "%s: null method succeeds\n",
773 788 inst->ri_i.i_fmri);
774 789
775 790 log_instance(inst, B_TRUE, "Executing %s method (null).",
776 791 mname);
777 792
778 793 if (type == METHOD_START)
779 794 write_status(inst, mname, 0);
780 795 goto out;
781 796 }
782 797
783 798 sig = restarter_is_kill_method(method);
784 799 if (sig >= 0) {
785 800
786 801 if (inst->ri_i.i_primary_ctid == 0) {
787 802 log_error(LOG_ERR, "%s: :kill with no contract\n",
788 803 inst->ri_i.i_fmri);
789 804 log_instance(inst, B_TRUE, "Invalid use of \":kill\" "
790 805 "as stop method for transient service.");
791 806 result = EINVAL;
792 807 goto out;
793 808 }
794 809
795 810 log_framework(LOG_DEBUG,
796 811 "%s: :killing contract with signal %d\n",
797 812 inst->ri_i.i_fmri, sig);
798 813
799 814 log_instance(inst, B_TRUE, "Executing %s method (:kill).",
800 815 mname);
801 816
802 817 if (contract_kill(inst->ri_i.i_primary_ctid, sig,
803 818 inst->ri_i.i_fmri) != 0) {
804 819 result = EIO;
805 820 goto out;
806 821 } else
807 822 goto assured_kill;
808 823 }
809 824
810 825 log_framework(LOG_DEBUG, "%s: forking to run method %s\n",
811 826 inst->ri_i.i_fmri, method);
812 827
813 828 m_error = restarter_get_method_context(RESTARTER_METHOD_CONTEXT_VERSION,
814 829 inst->ri_m_inst, snap, mname, method, &mcp);
815 830
816 831 if (m_error != NULL) {
817 832 log_instance(inst, B_TRUE, "%s", m_error->msg);
818 833 restarter_mc_error_destroy(m_error);
819 834 result = EINVAL;
820 835 goto out;
821 836 }
822 837
823 838 r = method_ready_contract(inst, type, restart_on, cte_mask);
824 839 if (r != 0) {
825 840 assert(r == ECANCELED);
826 841 assert(inst->ri_mi_deleted);
827 842 restarter_free_method_context(mcp);
828 843 result = ECANCELED;
829 844 goto out;
830 845 }
831 846
832 847 /*
833 848 * Validate safety of method contexts, to save children work.
834 849 */
835 850 if (!restarter_rm_libs_loadable())
836 851 log_framework(LOG_DEBUG, "%s: method contexts limited "
837 852 "to root-accessible libraries\n", inst->ri_i.i_fmri);
838 853
839 854 /*
840 855 * For wait-style svc, sanity check that method exists to prevent an
841 856 * infinite loop.
842 857 */
843 858 if (instance_is_wait_style(inst) && type == METHOD_START) {
844 859 char *pend;
845 860 struct stat64 sbuf;
846 861
847 862 /*
848 863 * We need to handle start method strings that have arguments,
849 864 * such as '/lib/svc/method/console-login %i'.
850 865 */
851 866 if ((pend = strchr(method, ' ')) != NULL)
852 867 *pend = '\0';
853 868
854 869 if (*method == '/' && stat64(method, &sbuf) == -1 &&
855 870 errno == ENOENT) {
856 871 log_instance(inst, B_TRUE, "Missing start method (%s), "
857 872 "changing state to maintenance.", method);
858 873 restarter_free_method_context(mcp);
859 874 result = ENOENT;
860 875 goto out;
861 876 }
862 877 if (pend != NULL)
863 878 *pend = ' ';
864 879 }
865 880
866 881 /*
867 882 * If the service is restarting too quickly, send it to
868 883 * maintenance.
869 884 */
870 885 if (type == METHOD_START) {
871 886 method_record_start(inst);
872 887 if (method_rate_critical(inst) &&
873 888 !instance_is_wait_style(inst)) {
874 889 log_instance(inst, B_TRUE, "Restarting too quickly, "
875 890 "changing state to maintenance.");
876 891 result = ELOOP;
877 892 restarter_free_method_context(mcp);
878 893 goto out;
879 894 }
880 895 }
881 896
882 897 atomic_add_16(&storing_contract, 1);
883 898 pid = startd_fork1(&forkerr);
884 899 if (pid == 0)
885 900 exec_method(inst, type, method, mcp, need_session);
886 901
887 902 if (pid == -1) {
888 903 atomic_add_16(&storing_contract, -1);
889 904 if (forkerr == EAGAIN)
890 905 result = EAGAIN;
891 906 else
892 907 result = EFAULT;
893 908
894 909 log_error(LOG_WARNING,
895 910 "%s: Couldn't fork to execute method %s: %s\n",
896 911 inst->ri_i.i_fmri, method, strerror(forkerr));
897 912
898 913 restarter_free_method_context(mcp);
899 914 goto out;
900 915 }
901 916
902 917
903 918 /*
904 919 * Get the contract id, decide whether it is primary or transient, and
905 920 * stash it in inst & the repository.
906 921 */
907 922 method_store_contract(inst, type, &ctid);
908 923 atomic_add_16(&storing_contract, -1);
909 924
910 925 restarter_free_method_context(mcp);
911 926
912 927 /*
913 928 * Similarly for the start method PID.
914 929 */
915 930 if (type == METHOD_START && !inst->ri_mi_deleted)
916 931 (void) libscf_write_start_pid(inst->ri_m_inst, pid);
917 932
918 933 if (instance_is_wait_style(inst) && type == METHOD_START) {
919 934 /* Wait style instances don't get timeouts on start methods. */
920 935 if (wait_register(pid, inst->ri_i.i_fmri, 1, 0)) {
921 936 log_error(LOG_WARNING,
922 937 "%s: couldn't register %ld for wait\n",
923 938 inst->ri_i.i_fmri, pid);
924 939 result = EFAULT;
925 940 goto contract_out;
926 941 }
927 942 write_status(inst, mname, 0);
928 943
929 944 } else {
930 945 int r, err;
931 946 time_t start_time;
932 947 time_t end_time;
933 948
934 949 /*
935 950 * Because on upgrade/live-upgrade we may have no chance
936 951 * to override faulty timeout values on the way to
937 952 * manifest import, all services on the path to manifest
938 953 * import are treated the same as INFINITE timeout services.
939 954 */
940 955
941 956 start_time = time(NULL);
942 957 if (timeout != METHOD_TIMEOUT_INFINITE && !is_timeout_ovr(inst))
943 958 timeout_insert(inst, ctid, timeout);
944 959 else
945 960 timeout = METHOD_TIMEOUT_INFINITE;
946 961
947 962 /* Unlock the instance while waiting for the method. */
948 963 MUTEX_UNLOCK(&inst->ri_lock);
949 964
950 965 do {
951 966 r = waitpid(pid, &ret_status, NULL);
952 967 } while (r == -1 && errno == EINTR);
953 968 if (r == -1)
954 969 err = errno;
955 970
956 971 /* Re-grab the lock. */
957 972 inst = inst_lookup_by_id(id);
958 973
959 974 /*
960 975 * inst can't be removed, as the removal thread waits
961 976 * for completion of this one.
962 977 */
963 978 assert(inst != NULL);
964 979 *instp = inst;
965 980
966 981 if (inst->ri_timeout != NULL && inst->ri_timeout->te_fired)
967 982 timeout_fired = 1;
968 983
969 984 timeout_remove(inst, ctid);
970 985
971 986 log_framework(LOG_DEBUG,
972 987 "%s method for %s exited with status %d.\n", mname,
973 988 inst->ri_i.i_fmri, WEXITSTATUS(ret_status));
974 989
975 990 if (r == -1) {
976 991 log_error(LOG_WARNING,
977 992 "Couldn't waitpid() for %s method of %s (%s).\n",
978 993 mname, inst->ri_i.i_fmri, strerror(err));
979 994 result = EFAULT;
980 995 goto contract_out;
981 996 }
982 997
983 998 if (type == METHOD_START)
984 999 write_status(inst, mname, ret_status);
985 1000
986 1001 /* return ERANGE if this service doesn't retry on timeout */
987 1002 if (timeout_fired == 1 && timeout_retry == 0) {
988 1003 result = ERANGE;
989 1004 goto contract_out;
990 1005 }
991 1006
992 1007 if (!WIFEXITED(ret_status)) {
993 1008 /*
994 1009 * If method didn't exit itself (it was killed by an
995 1010 * external entity, etc.), consider the entire
996 1011 * method_run as failed.
997 1012 */
998 1013 if (WIFSIGNALED(ret_status)) {
999 1014 char buf[SIG2STR_MAX];
1000 1015 (void) sig2str(WTERMSIG(ret_status), buf);
1001 1016
1002 1017 log_error(LOG_WARNING, "%s: Method \"%s\" "
1003 1018 "failed due to signal %s.\n",
1004 1019 inst->ri_i.i_fmri, method, buf);
1005 1020 log_instance(inst, B_TRUE, "Method \"%s\" "
1006 1021 "failed due to signal %s.", mname, buf);
1007 1022 } else {
1008 1023 log_error(LOG_WARNING, "%s: Method \"%s\" "
1009 1024 "failed with exit status %d.\n",
1010 1025 inst->ri_i.i_fmri, method,
↓ open down ↓ |
819 lines elided |
↑ open up ↑ |
1011 1026 WEXITSTATUS(ret_status));
1012 1027 log_instance(inst, B_TRUE, "Method \"%s\" "
1013 1028 "failed with exit status %d.", mname,
1014 1029 WEXITSTATUS(ret_status));
1015 1030 }
1016 1031 result = EAGAIN;
1017 1032 goto contract_out;
1018 1033 }
1019 1034
1020 1035 *exit_code = WEXITSTATUS(ret_status);
1021 - if (*exit_code != 0) {
1036 + if (method_failed(*exit_code) != 0) {
1022 1037 log_error(LOG_WARNING,
1023 1038 "%s: Method \"%s\" failed with exit status %d.\n",
1024 1039 inst->ri_i.i_fmri, method, WEXITSTATUS(ret_status));
1025 1040 }
1026 1041
1042 + if (type == METHOD_STOP &&
1043 + *exit_code == SMF_EXIT_TEMP_TRANSIENT) {
1044 + log_instance(inst, B_TRUE, "Invalid use of "
1045 + "\"$SMF_EXIT_TEMP_TRANSIENT\" in stop method.");
1046 + result = EINVAL;
1047 + goto contract_out;
1048 + }
1049 +
1027 1050 log_instance(inst, B_TRUE, "Method \"%s\" exited with status "
1028 1051 "%d.", mname, *exit_code);
1029 1052
1030 - if (*exit_code != 0)
1053 + if (method_failed(*exit_code) != 0)
1031 1054 goto contract_out;
1032 1055
1033 1056 end_time = time(NULL);
1034 1057
1035 1058 /* Give service contract remaining seconds to empty */
1036 1059 if (timeout != METHOD_TIMEOUT_INFINITE)
1037 1060 timeout -= (end_time - start_time);
1038 1061 }
1039 1062
1040 1063 assured_kill:
1041 1064 /*
1042 1065 * For stop methods, assure that the service contract has emptied
1043 1066 * before returning.
1044 1067 */
1045 1068 if (type == METHOD_STOP && (!instance_is_transient_style(inst)) &&
1046 1069 !(contract_is_empty(inst->ri_i.i_primary_ctid))) {
1047 1070 int times = 0;
1048 1071
1049 1072 if (timeout != METHOD_TIMEOUT_INFINITE)
1050 1073 timeout_insert(inst, inst->ri_i.i_primary_ctid,
1051 1074 timeout);
1052 1075
1053 1076 for (;;) {
1054 1077 /*
1055 1078 * Check frequently at first, then back off. This
1056 1079 * keeps startd from idling while shutting down.
1057 1080 */
1058 1081 if (times < 20) {
1059 1082 (void) poll(NULL, 0, 5);
1060 1083 times++;
1061 1084 } else {
1062 1085 (void) poll(NULL, 0, 100);
1063 1086 }
1064 1087 if (contract_is_empty(inst->ri_i.i_primary_ctid))
1065 1088 break;
1066 1089 }
1067 1090
1068 1091 if (timeout != METHOD_TIMEOUT_INFINITE)
1069 1092 if (inst->ri_timeout->te_fired)
1070 1093 result = EFAULT;
1071 1094
1072 1095 timeout_remove(inst, inst->ri_i.i_primary_ctid);
1073 1096 }
1074 1097
1075 1098 contract_out:
1076 1099 /* Abandon contracts for transient methods & methods that fail. */
1077 1100 transient = method_is_transient(inst, type);
1078 1101 if ((transient || *exit_code != 0 || result != 0) &&
1079 1102 (restarter_is_kill_method(method) < 0))
1080 1103 method_remove_contract(inst, !transient, B_TRUE);
1081 1104
1082 1105 out:
1083 1106 if (ctfd >= 0)
1084 1107 (void) close(ctfd);
1085 1108 scf_snapshot_destroy(snap);
1086 1109 free(method);
1087 1110 return (result);
1088 1111 }
1089 1112
1090 1113 /*
1091 1114 * The method thread executes a service method to effect a state transition.
1092 1115 * The next_state of info->sf_id should be non-_NONE on entrance, and it will
1093 1116 * be _NONE on exit (state will either be what next_state was (on success), or
1094 1117 * it will be _MAINT (on error)).
1095 1118 *
1096 1119 * There are six classes of methods to consider: start & other (stop, refresh)
1097 1120 * for each of "normal" services, wait services, and transient services. For
1098 1121 * each, the method must be fetched from the repository & executed. fork()ed
1099 1122 * methods must be waited on, except for the start method of wait services
1100 1123 * (which must be registered with the wait subsystem via wait_register()). If
1101 1124 * the method succeeded (returned 0), then for start methods its contract
1102 1125 * should be recorded as the primary contract for the service. For other
1103 1126 * methods, it should be abandoned. If the method fails, then depending on
1104 1127 * the failure, either the method should be reexecuted or the service should
1105 1128 * be put into maintenance. Either way the contract should be abandoned.
1106 1129 */
1107 1130 void *
1108 1131 method_thread(void *arg)
1109 1132 {
1110 1133 fork_info_t *info = arg;
1111 1134 restarter_inst_t *inst;
1112 1135 scf_handle_t *local_handle;
1113 1136 scf_instance_t *s_inst = NULL;
1114 1137 int r, exit_code;
1115 1138 boolean_t retryable;
1116 1139 restarter_str_t reason;
1117 1140
1118 1141 assert(0 <= info->sf_method_type && info->sf_method_type <= 2);
1119 1142
1120 1143 /* Get (and lock) the restarter_inst_t. */
1121 1144 inst = inst_lookup_by_id(info->sf_id);
1122 1145
1123 1146 assert(inst->ri_method_thread != 0);
1124 1147 assert(instance_in_transition(inst) == 1);
1125 1148
1126 1149 /*
1127 1150 * We cannot leave this function with inst in transition, because
1128 1151 * protocol.c withholds messages for inst otherwise.
1129 1152 */
1130 1153
1131 1154 log_framework(LOG_DEBUG, "method_thread() running %s method for %s.\n",
1132 1155 method_names[info->sf_method_type], inst->ri_i.i_fmri);
1133 1156
1134 1157 local_handle = libscf_handle_create_bound_loop();
1135 1158
1136 1159 rebind_retry:
1137 1160 /* get scf_instance_t */
1138 1161 switch (r = libscf_fmri_get_instance(local_handle, inst->ri_i.i_fmri,
1139 1162 &s_inst)) {
1140 1163 case 0:
1141 1164 break;
1142 1165
1143 1166 case ECONNABORTED:
1144 1167 libscf_handle_rebind(local_handle);
1145 1168 goto rebind_retry;
1146 1169
1147 1170 case ENOENT:
1148 1171 /*
1149 1172 * It's not there, but we need to call this so protocol.c
1150 1173 * doesn't think it's in transition anymore.
1151 1174 */
1152 1175 (void) restarter_instance_update_states(local_handle, inst,
1153 1176 inst->ri_i.i_state, RESTARTER_STATE_NONE, RERR_NONE,
1154 1177 restarter_str_none);
1155 1178 goto out;
1156 1179
1157 1180 case EINVAL:
1158 1181 case ENOTSUP:
1159 1182 default:
1160 1183 bad_error("libscf_fmri_get_instance", r);
1161 1184 }
↓ open down ↓ |
121 lines elided |
↑ open up ↑ |
1162 1185
1163 1186 inst->ri_m_inst = s_inst;
1164 1187 inst->ri_mi_deleted = B_FALSE;
1165 1188
1166 1189 retry:
1167 1190 if (info->sf_method_type == METHOD_START)
1168 1191 log_transition(inst, START_REQUESTED);
1169 1192
1170 1193 r = method_run(&inst, info->sf_method_type, &exit_code);
1171 1194
1172 - if (r == 0 && exit_code == 0) {
1195 + if (r == 0 && method_failed(exit_code) == 0) {
1173 1196 /* Success! */
1174 1197 assert(inst->ri_i.i_next_state != RESTARTER_STATE_NONE);
1175 1198
1176 1199 /*
1177 1200 * When a stop method succeeds, remove the primary contract of
1178 1201 * the service, unless we're going to offline, in which case
1179 1202 * retain the contract so we can transfer inherited contracts to
1180 1203 * the replacement service.
1181 1204 */
1182 1205
1183 1206 if (info->sf_method_type == METHOD_STOP &&
1184 1207 inst->ri_i.i_primary_ctid != 0) {
1185 1208 if (inst->ri_i.i_next_state == RESTARTER_STATE_OFFLINE)
1186 1209 inst->ri_i.i_primary_ctid_stopped = 1;
1187 1210 else
1188 1211 method_remove_contract(inst, B_TRUE, B_TRUE);
1189 1212 }
1190 1213 /*
1191 1214 * We don't care whether the handle was rebound because this is
1192 1215 * the last thing we do with it.
1193 1216 */
1194 1217 (void) restarter_instance_update_states(local_handle, inst,
1195 1218 inst->ri_i.i_next_state, RESTARTER_STATE_NONE,
1196 1219 info->sf_event_type, info->sf_reason);
1197 1220
1198 1221 (void) update_fault_count(inst, FAULT_COUNT_RESET);
1199 1222
1200 1223 goto out;
1201 1224 }
1202 1225
1203 1226 /* Failure. Retry or go to maintenance. */
1204 1227
1205 1228 if (r != 0 && r != EAGAIN) {
1206 1229 retryable = B_FALSE;
1207 1230 } else {
1208 1231 switch (exit_code) {
1209 1232 case SMF_EXIT_ERR_CONFIG:
1210 1233 case SMF_EXIT_ERR_NOSMF:
1211 1234 case SMF_EXIT_ERR_PERM:
1212 1235 case SMF_EXIT_ERR_FATAL:
1213 1236 retryable = B_FALSE;
1214 1237 break;
1215 1238
1216 1239 default:
1217 1240 retryable = B_TRUE;
1218 1241 }
1219 1242 }
1220 1243
1221 1244 if (retryable && update_fault_count(inst, FAULT_COUNT_INCR) != 1)
1222 1245 goto retry;
1223 1246
1224 1247 /* maintenance */
1225 1248 if (r == ELOOP)
1226 1249 log_transition(inst, START_FAILED_REPEATEDLY);
1227 1250 else if (r == ERANGE)
1228 1251 log_transition(inst, START_FAILED_TIMEOUT_FATAL);
1229 1252 else if (exit_code == SMF_EXIT_ERR_CONFIG)
1230 1253 log_transition(inst, START_FAILED_CONFIGURATION);
1231 1254 else if (exit_code == SMF_EXIT_ERR_FATAL)
1232 1255 log_transition(inst, START_FAILED_FATAL);
1233 1256 else
1234 1257 log_transition(inst, START_FAILED_OTHER);
1235 1258
1236 1259 if (r == ELOOP) {
1237 1260 reason = restarter_str_restarting_too_quickly;
1238 1261 } else if (retryable) {
1239 1262 reason = restarter_str_fault_threshold_reached;
1240 1263 } else {
1241 1264 reason = restarter_str_method_failed;
1242 1265 }
1243 1266
1244 1267 (void) restarter_instance_update_states(local_handle, inst,
1245 1268 RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_FAULT,
1246 1269 reason);
1247 1270
1248 1271 if (!method_is_transient(inst, info->sf_method_type) &&
1249 1272 inst->ri_i.i_primary_ctid != 0)
1250 1273 method_remove_contract(inst, B_TRUE, B_TRUE);
1251 1274
1252 1275 out:
1253 1276 inst->ri_method_thread = 0;
1254 1277
1255 1278 /*
1256 1279 * Unlock the mutex after broadcasting to avoid a race condition
1257 1280 * with restarter_delete_inst() when the 'inst' structure is freed.
1258 1281 */
1259 1282 (void) pthread_cond_broadcast(&inst->ri_method_cv);
1260 1283 MUTEX_UNLOCK(&inst->ri_lock);
1261 1284
1262 1285 scf_instance_destroy(s_inst);
1263 1286 scf_handle_destroy(local_handle);
1264 1287 startd_free(info, sizeof (fork_info_t));
1265 1288 return (NULL);
1266 1289 }
↓ open down ↓ |
84 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX