5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2011 Joyent Inc.
25 */
26
27 /*
28 * method.c - method execution functions
29 *
30 * This file contains the routines needed to run a method: a fork(2)-exec(2)
31 * invocation monitored using either the contract filesystem or waitpid(2).
32 * (Plain fork1(2) support is provided in fork.c.)
33 *
34 * Contract Transfer
35 * When we restart a service, we want to transfer any contracts that the old
36 * service's contract inherited. This means that (a) we must not abandon the
37 * old contract when the service dies and (b) we must write the id of the old
38 * contract into the terms of the new contract. There should be limits to
39 * (a), though, since we don't want to keep the contract around forever. To
40 * this end we'll say that services in the offline state may have a contract
41 * to be transfered and services in the disabled or maintenance states cannot.
42 * This means that when a service transitions from online (or degraded) to
43 * offline, the contract should be preserved, and when the service transitions
44 * from offline to online (i.e., the start method), we'll transfer inherited
162 (critical_failure_count - 1);
163
164 return (avg_ns < critical_failure_period);
165 }
166
167 /*
168 * int method_is_transient()
169 * Determine if the method for the given instance is transient,
170 * from a contract perspective. Return 1 if it is, and 0 if it isn't.
171 */
172 static int
173 method_is_transient(restarter_inst_t *inst, int type)
174 {
175 if (instance_is_transient_style(inst) || type != METHOD_START)
176 return (1);
177 else
178 return (0);
179 }
180
181 /*
182 * void method_store_contract()
183 * Store the newly created contract id into local structures and
184 * the repository. If the repository connection is broken it is rebound.
185 */
186 static void
187 method_store_contract(restarter_inst_t *inst, int type, ctid_t *cid)
188 {
189 int r;
190 boolean_t primary;
191
192 if (errno = contract_latest(cid))
193 uu_die("%s: Couldn't get new contract's id", inst->ri_i.i_fmri);
194
195 primary = !method_is_transient(inst, type);
196
197 if (!primary) {
198 if (inst->ri_i.i_transient_ctid != 0) {
199 log_framework(LOG_INFO,
200 "%s: transient ctid expected to be 0 but "
201 "was set to %ld\n", inst->ri_i.i_fmri,
1001
1002 log_error(LOG_WARNING, "%s: Method \"%s\" "
1003 "failed due to signal %s.\n",
1004 inst->ri_i.i_fmri, method, buf);
1005 log_instance(inst, B_TRUE, "Method \"%s\" "
1006 "failed due to signal %s.", mname, buf);
1007 } else {
1008 log_error(LOG_WARNING, "%s: Method \"%s\" "
1009 "failed with exit status %d.\n",
1010 inst->ri_i.i_fmri, method,
1011 WEXITSTATUS(ret_status));
1012 log_instance(inst, B_TRUE, "Method \"%s\" "
1013 "failed with exit status %d.", mname,
1014 WEXITSTATUS(ret_status));
1015 }
1016 result = EAGAIN;
1017 goto contract_out;
1018 }
1019
1020 *exit_code = WEXITSTATUS(ret_status);
1021 if (*exit_code != 0) {
1022 log_error(LOG_WARNING,
1023 "%s: Method \"%s\" failed with exit status %d.\n",
1024 inst->ri_i.i_fmri, method, WEXITSTATUS(ret_status));
1025 }
1026
1027 log_instance(inst, B_TRUE, "Method \"%s\" exited with status "
1028 "%d.", mname, *exit_code);
1029
1030 if (*exit_code != 0)
1031 goto contract_out;
1032
1033 end_time = time(NULL);
1034
1035 /* Give service contract remaining seconds to empty */
1036 if (timeout != METHOD_TIMEOUT_INFINITE)
1037 timeout -= (end_time - start_time);
1038 }
1039
1040 assured_kill:
1041 /*
1042 * For stop methods, assure that the service contract has emptied
1043 * before returning.
1044 */
1045 if (type == METHOD_STOP && (!instance_is_transient_style(inst)) &&
1046 !(contract_is_empty(inst->ri_i.i_primary_ctid))) {
1047 int times = 0;
1048
1049 if (timeout != METHOD_TIMEOUT_INFINITE)
1050 timeout_insert(inst, inst->ri_i.i_primary_ctid,
1152 (void) restarter_instance_update_states(local_handle, inst,
1153 inst->ri_i.i_state, RESTARTER_STATE_NONE, RERR_NONE,
1154 restarter_str_none);
1155 goto out;
1156
1157 case EINVAL:
1158 case ENOTSUP:
1159 default:
1160 bad_error("libscf_fmri_get_instance", r);
1161 }
1162
1163 inst->ri_m_inst = s_inst;
1164 inst->ri_mi_deleted = B_FALSE;
1165
1166 retry:
1167 if (info->sf_method_type == METHOD_START)
1168 log_transition(inst, START_REQUESTED);
1169
1170 r = method_run(&inst, info->sf_method_type, &exit_code);
1171
1172 if (r == 0 && exit_code == 0) {
1173 /* Success! */
1174 assert(inst->ri_i.i_next_state != RESTARTER_STATE_NONE);
1175
1176 /*
1177 * When a stop method succeeds, remove the primary contract of
1178 * the service, unless we're going to offline, in which case
1179 * retain the contract so we can transfer inherited contracts to
1180 * the replacement service.
1181 */
1182
1183 if (info->sf_method_type == METHOD_STOP &&
1184 inst->ri_i.i_primary_ctid != 0) {
1185 if (inst->ri_i.i_next_state == RESTARTER_STATE_OFFLINE)
1186 inst->ri_i.i_primary_ctid_stopped = 1;
1187 else
1188 method_remove_contract(inst, B_TRUE, B_TRUE);
1189 }
1190 /*
1191 * We don't care whether the handle was rebound because this is
1192 * the last thing we do with it.
|
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2011 Joyent Inc.
25 * Copyright 2017 RackTop Systems.
26 */
27
28 /*
29 * method.c - method execution functions
30 *
31 * This file contains the routines needed to run a method: a fork(2)-exec(2)
32 * invocation monitored using either the contract filesystem or waitpid(2).
33 * (Plain fork1(2) support is provided in fork.c.)
34 *
35 * Contract Transfer
36 * When we restart a service, we want to transfer any contracts that the old
37 * service's contract inherited. This means that (a) we must not abandon the
38 * old contract when the service dies and (b) we must write the id of the old
39 * contract into the terms of the new contract. There should be limits to
40 * (a), though, since we don't want to keep the contract around forever. To
41 * this end we'll say that services in the offline state may have a contract
42 * to be transfered and services in the disabled or maintenance states cannot.
43 * This means that when a service transitions from online (or degraded) to
44 * offline, the contract should be preserved, and when the service transitions
45 * from offline to online (i.e., the start method), we'll transfer inherited
163 (critical_failure_count - 1);
164
165 return (avg_ns < critical_failure_period);
166 }
167
168 /*
169 * int method_is_transient()
170 * Determine if the method for the given instance is transient,
171 * from a contract perspective. Return 1 if it is, and 0 if it isn't.
172 */
173 static int
174 method_is_transient(restarter_inst_t *inst, int type)
175 {
176 if (instance_is_transient_style(inst) || type != METHOD_START)
177 return (1);
178 else
179 return (0);
180 }
181
182 /*
183 * int method_failed()
184 * Return 1 if the exit_code indicates failure (not all non-zero
185 * exit codes do) otherwise return 0.
186 */
187 static int
188 method_failed(int exit_code)
189 {
190 if (exit_code != 0 && exit_code != SMF_EXIT_TEMP_TRANSIENT)
191 return (1);
192 else
193 return (0);
194 }
195
196 /*
197 * void method_store_contract()
198 * Store the newly created contract id into local structures and
199 * the repository. If the repository connection is broken it is rebound.
200 */
201 static void
202 method_store_contract(restarter_inst_t *inst, int type, ctid_t *cid)
203 {
204 int r;
205 boolean_t primary;
206
207 if (errno = contract_latest(cid))
208 uu_die("%s: Couldn't get new contract's id", inst->ri_i.i_fmri);
209
210 primary = !method_is_transient(inst, type);
211
212 if (!primary) {
213 if (inst->ri_i.i_transient_ctid != 0) {
214 log_framework(LOG_INFO,
215 "%s: transient ctid expected to be 0 but "
216 "was set to %ld\n", inst->ri_i.i_fmri,
1016
1017 log_error(LOG_WARNING, "%s: Method \"%s\" "
1018 "failed due to signal %s.\n",
1019 inst->ri_i.i_fmri, method, buf);
1020 log_instance(inst, B_TRUE, "Method \"%s\" "
1021 "failed due to signal %s.", mname, buf);
1022 } else {
1023 log_error(LOG_WARNING, "%s: Method \"%s\" "
1024 "failed with exit status %d.\n",
1025 inst->ri_i.i_fmri, method,
1026 WEXITSTATUS(ret_status));
1027 log_instance(inst, B_TRUE, "Method \"%s\" "
1028 "failed with exit status %d.", mname,
1029 WEXITSTATUS(ret_status));
1030 }
1031 result = EAGAIN;
1032 goto contract_out;
1033 }
1034
1035 *exit_code = WEXITSTATUS(ret_status);
1036 if (method_failed(*exit_code) != 0) {
1037 log_error(LOG_WARNING,
1038 "%s: Method \"%s\" failed with exit status %d.\n",
1039 inst->ri_i.i_fmri, method, WEXITSTATUS(ret_status));
1040 }
1041
1042 if (type == METHOD_STOP &&
1043 *exit_code == SMF_EXIT_TEMP_TRANSIENT) {
1044 log_instance(inst, B_TRUE, "Invalid use of "
1045 "\"$SMF_EXIT_TEMP_TRANSIENT\" in stop method.");
1046 result = EINVAL;
1047 goto contract_out;
1048 }
1049
1050 log_instance(inst, B_TRUE, "Method \"%s\" exited with status "
1051 "%d.", mname, *exit_code);
1052
1053 if (method_failed(*exit_code) != 0)
1054 goto contract_out;
1055
1056 end_time = time(NULL);
1057
1058 /* Give service contract remaining seconds to empty */
1059 if (timeout != METHOD_TIMEOUT_INFINITE)
1060 timeout -= (end_time - start_time);
1061 }
1062
1063 assured_kill:
1064 /*
1065 * For stop methods, assure that the service contract has emptied
1066 * before returning.
1067 */
1068 if (type == METHOD_STOP && (!instance_is_transient_style(inst)) &&
1069 !(contract_is_empty(inst->ri_i.i_primary_ctid))) {
1070 int times = 0;
1071
1072 if (timeout != METHOD_TIMEOUT_INFINITE)
1073 timeout_insert(inst, inst->ri_i.i_primary_ctid,
1175 (void) restarter_instance_update_states(local_handle, inst,
1176 inst->ri_i.i_state, RESTARTER_STATE_NONE, RERR_NONE,
1177 restarter_str_none);
1178 goto out;
1179
1180 case EINVAL:
1181 case ENOTSUP:
1182 default:
1183 bad_error("libscf_fmri_get_instance", r);
1184 }
1185
1186 inst->ri_m_inst = s_inst;
1187 inst->ri_mi_deleted = B_FALSE;
1188
1189 retry:
1190 if (info->sf_method_type == METHOD_START)
1191 log_transition(inst, START_REQUESTED);
1192
1193 r = method_run(&inst, info->sf_method_type, &exit_code);
1194
1195 if (r == 0 && method_failed(exit_code) == 0) {
1196 /* Success! */
1197 assert(inst->ri_i.i_next_state != RESTARTER_STATE_NONE);
1198
1199 /*
1200 * When a stop method succeeds, remove the primary contract of
1201 * the service, unless we're going to offline, in which case
1202 * retain the contract so we can transfer inherited contracts to
1203 * the replacement service.
1204 */
1205
1206 if (info->sf_method_type == METHOD_STOP &&
1207 inst->ri_i.i_primary_ctid != 0) {
1208 if (inst->ri_i.i_next_state == RESTARTER_STATE_OFFLINE)
1209 inst->ri_i.i_primary_ctid_stopped = 1;
1210 else
1211 method_remove_contract(inst, B_TRUE, B_TRUE);
1212 }
1213 /*
1214 * We don't care whether the handle was rebound because this is
1215 * the last thing we do with it.
|