Print this page
7928 Add support for SMF_EXIT_TEMP_TRANSIENT


   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2011 Joyent Inc.

  25  */
  26 
  27 /*
  28  * method.c - method execution functions
  29  *
  30  * This file contains the routines needed to run a method:  a fork(2)-exec(2)
  31  * invocation monitored using either the contract filesystem or waitpid(2).
  32  * (Plain fork1(2) support is provided in fork.c.)
  33  *
  34  * Contract Transfer
  35  *   When we restart a service, we want to transfer any contracts that the old
  36  *   service's contract inherited.  This means that (a) we must not abandon the
  37  *   old contract when the service dies and (b) we must write the id of the old
  38  *   contract into the terms of the new contract.  There should be limits to
  39  *   (a), though, since we don't want to keep the contract around forever.  To
  40  *   this end we'll say that services in the offline state may have a contract
  41  *   to be transfered and services in the disabled or maintenance states cannot.
  42  *   This means that when a service transitions from online (or degraded) to
  43  *   offline, the contract should be preserved, and when the service transitions
  44  *   from offline to online (i.e., the start method), we'll transfer inherited


 162             (critical_failure_count - 1);
 163 
 164         return (avg_ns < critical_failure_period);
 165 }
 166 
 167 /*
 168  * int method_is_transient()
 169  *   Determine if the method for the given instance is transient,
 170  *   from a contract perspective. Return 1 if it is, and 0 if it isn't.
 171  */
 172 static int
 173 method_is_transient(restarter_inst_t *inst, int type)
 174 {
 175         if (instance_is_transient_style(inst) || type != METHOD_START)
 176                 return (1);
 177         else
 178                 return (0);
 179 }
 180 
 181 /*














 182  * void method_store_contract()
 183  *   Store the newly created contract id into local structures and
 184  *   the repository.  If the repository connection is broken it is rebound.
 185  */
 186 static void
 187 method_store_contract(restarter_inst_t *inst, int type, ctid_t *cid)
 188 {
 189         int r;
 190         boolean_t primary;
 191 
 192         if (errno = contract_latest(cid))
 193                 uu_die("%s: Couldn't get new contract's id", inst->ri_i.i_fmri);
 194 
 195         primary = !method_is_transient(inst, type);
 196 
 197         if (!primary) {
 198                 if (inst->ri_i.i_transient_ctid != 0) {
 199                         log_framework(LOG_INFO,
 200                             "%s: transient ctid expected to be 0 but "
 201                             "was set to %ld\n", inst->ri_i.i_fmri,


1001 
1002                                 log_error(LOG_WARNING, "%s: Method \"%s\" "
1003                                     "failed due to signal %s.\n",
1004                                     inst->ri_i.i_fmri, method, buf);
1005                                 log_instance(inst, B_TRUE, "Method \"%s\" "
1006                                     "failed due to signal %s.", mname, buf);
1007                         } else {
1008                                 log_error(LOG_WARNING, "%s: Method \"%s\" "
1009                                     "failed with exit status %d.\n",
1010                                     inst->ri_i.i_fmri, method,
1011                                     WEXITSTATUS(ret_status));
1012                                 log_instance(inst, B_TRUE, "Method \"%s\" "
1013                                     "failed with exit status %d.", mname,
1014                                     WEXITSTATUS(ret_status));
1015                         }
1016                         result = EAGAIN;
1017                         goto contract_out;
1018                 }
1019 
1020                 *exit_code = WEXITSTATUS(ret_status);
1021                 if (*exit_code != 0) {
1022                         log_error(LOG_WARNING,
1023                             "%s: Method \"%s\" failed with exit status %d.\n",
1024                             inst->ri_i.i_fmri, method, WEXITSTATUS(ret_status));
1025                 }
1026 








1027                 log_instance(inst, B_TRUE, "Method \"%s\" exited with status "
1028                     "%d.", mname, *exit_code);
1029 
1030                 if (*exit_code != 0)
1031                         goto contract_out;
1032 
1033                 end_time = time(NULL);
1034 
1035                 /* Give service contract remaining seconds to empty */
1036                 if (timeout != METHOD_TIMEOUT_INFINITE)
1037                         timeout -= (end_time - start_time);
1038         }
1039 
1040 assured_kill:
1041         /*
1042          * For stop methods, assure that the service contract has emptied
1043          * before returning.
1044          */
1045         if (type == METHOD_STOP && (!instance_is_transient_style(inst)) &&
1046             !(contract_is_empty(inst->ri_i.i_primary_ctid))) {
1047                 int times = 0;
1048 
1049                 if (timeout != METHOD_TIMEOUT_INFINITE)
1050                         timeout_insert(inst, inst->ri_i.i_primary_ctid,


1152                 (void) restarter_instance_update_states(local_handle, inst,
1153                     inst->ri_i.i_state, RESTARTER_STATE_NONE, RERR_NONE,
1154                     restarter_str_none);
1155                 goto out;
1156 
1157         case EINVAL:
1158         case ENOTSUP:
1159         default:
1160                 bad_error("libscf_fmri_get_instance", r);
1161         }
1162 
1163         inst->ri_m_inst = s_inst;
1164         inst->ri_mi_deleted = B_FALSE;
1165 
1166 retry:
1167         if (info->sf_method_type == METHOD_START)
1168                 log_transition(inst, START_REQUESTED);
1169 
1170         r = method_run(&inst, info->sf_method_type, &exit_code);
1171 
1172         if (r == 0 && exit_code == 0) {
1173                 /* Success! */
1174                 assert(inst->ri_i.i_next_state != RESTARTER_STATE_NONE);
1175 
1176                 /*
1177                  * When a stop method succeeds, remove the primary contract of
1178                  * the service, unless we're going to offline, in which case
1179                  * retain the contract so we can transfer inherited contracts to
1180                  * the replacement service.
1181                  */
1182 
1183                 if (info->sf_method_type == METHOD_STOP &&
1184                     inst->ri_i.i_primary_ctid != 0) {
1185                         if (inst->ri_i.i_next_state == RESTARTER_STATE_OFFLINE)
1186                                 inst->ri_i.i_primary_ctid_stopped = 1;
1187                         else
1188                                 method_remove_contract(inst, B_TRUE, B_TRUE);
1189                 }
1190                 /*
1191                  * We don't care whether the handle was rebound because this is
1192                  * the last thing we do with it.




   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2011 Joyent Inc.
  25  * Copyright 2017 RackTop Systems.
  26  */
  27 
  28 /*
  29  * method.c - method execution functions
  30  *
  31  * This file contains the routines needed to run a method:  a fork(2)-exec(2)
  32  * invocation monitored using either the contract filesystem or waitpid(2).
  33  * (Plain fork1(2) support is provided in fork.c.)
  34  *
  35  * Contract Transfer
  36  *   When we restart a service, we want to transfer any contracts that the old
  37  *   service's contract inherited.  This means that (a) we must not abandon the
  38  *   old contract when the service dies and (b) we must write the id of the old
  39  *   contract into the terms of the new contract.  There should be limits to
  40  *   (a), though, since we don't want to keep the contract around forever.  To
  41  *   this end we'll say that services in the offline state may have a contract
  42  *   to be transfered and services in the disabled or maintenance states cannot.
  43  *   This means that when a service transitions from online (or degraded) to
  44  *   offline, the contract should be preserved, and when the service transitions
  45  *   from offline to online (i.e., the start method), we'll transfer inherited


 163             (critical_failure_count - 1);
 164 
 165         return (avg_ns < critical_failure_period);
 166 }
 167 
 168 /*
 169  * int method_is_transient()
 170  *   Determine if the method for the given instance is transient,
 171  *   from a contract perspective. Return 1 if it is, and 0 if it isn't.
 172  */
 173 static int
 174 method_is_transient(restarter_inst_t *inst, int type)
 175 {
 176         if (instance_is_transient_style(inst) || type != METHOD_START)
 177                 return (1);
 178         else
 179                 return (0);
 180 }
 181 
 182 /*
 183  * int method_failed()
 184  *   Return 1 if the exit_code indicates failure (not all non-zero
 185  *   exit codes do) otherwise return 0.
 186  */
 187 static int
 188 method_failed(int exit_code)
 189 {
 190         if (exit_code != 0 && exit_code != SMF_EXIT_TEMP_TRANSIENT)
 191                 return (1);
 192         else
 193                 return (0);
 194 }
 195 
 196 /*
 197  * void method_store_contract()
 198  *   Store the newly created contract id into local structures and
 199  *   the repository.  If the repository connection is broken it is rebound.
 200  */
 201 static void
 202 method_store_contract(restarter_inst_t *inst, int type, ctid_t *cid)
 203 {
 204         int r;
 205         boolean_t primary;
 206 
 207         if (errno = contract_latest(cid))
 208                 uu_die("%s: Couldn't get new contract's id", inst->ri_i.i_fmri);
 209 
 210         primary = !method_is_transient(inst, type);
 211 
 212         if (!primary) {
 213                 if (inst->ri_i.i_transient_ctid != 0) {
 214                         log_framework(LOG_INFO,
 215                             "%s: transient ctid expected to be 0 but "
 216                             "was set to %ld\n", inst->ri_i.i_fmri,


1016 
1017                                 log_error(LOG_WARNING, "%s: Method \"%s\" "
1018                                     "failed due to signal %s.\n",
1019                                     inst->ri_i.i_fmri, method, buf);
1020                                 log_instance(inst, B_TRUE, "Method \"%s\" "
1021                                     "failed due to signal %s.", mname, buf);
1022                         } else {
1023                                 log_error(LOG_WARNING, "%s: Method \"%s\" "
1024                                     "failed with exit status %d.\n",
1025                                     inst->ri_i.i_fmri, method,
1026                                     WEXITSTATUS(ret_status));
1027                                 log_instance(inst, B_TRUE, "Method \"%s\" "
1028                                     "failed with exit status %d.", mname,
1029                                     WEXITSTATUS(ret_status));
1030                         }
1031                         result = EAGAIN;
1032                         goto contract_out;
1033                 }
1034 
1035                 *exit_code = WEXITSTATUS(ret_status);
1036                 if (method_failed(*exit_code) != 0) {
1037                         log_error(LOG_WARNING,
1038                             "%s: Method \"%s\" failed with exit status %d.\n",
1039                             inst->ri_i.i_fmri, method, WEXITSTATUS(ret_status));
1040                 }
1041 
1042                 if (type == METHOD_STOP &&
1043                     *exit_code == SMF_EXIT_TEMP_TRANSIENT) {
1044                         log_instance(inst, B_TRUE, "Invalid use of "
1045                             "\"$SMF_EXIT_TEMP_TRANSIENT\" in stop method.");
1046                         result = EINVAL;
1047                         goto contract_out;
1048                 }
1049 
1050                 log_instance(inst, B_TRUE, "Method \"%s\" exited with status "
1051                     "%d.", mname, *exit_code);
1052 
1053                 if (method_failed(*exit_code) != 0)
1054                         goto contract_out;
1055 
1056                 end_time = time(NULL);
1057 
1058                 /* Give service contract remaining seconds to empty */
1059                 if (timeout != METHOD_TIMEOUT_INFINITE)
1060                         timeout -= (end_time - start_time);
1061         }
1062 
1063 assured_kill:
1064         /*
1065          * For stop methods, assure that the service contract has emptied
1066          * before returning.
1067          */
1068         if (type == METHOD_STOP && (!instance_is_transient_style(inst)) &&
1069             !(contract_is_empty(inst->ri_i.i_primary_ctid))) {
1070                 int times = 0;
1071 
1072                 if (timeout != METHOD_TIMEOUT_INFINITE)
1073                         timeout_insert(inst, inst->ri_i.i_primary_ctid,


1175                 (void) restarter_instance_update_states(local_handle, inst,
1176                     inst->ri_i.i_state, RESTARTER_STATE_NONE, RERR_NONE,
1177                     restarter_str_none);
1178                 goto out;
1179 
1180         case EINVAL:
1181         case ENOTSUP:
1182         default:
1183                 bad_error("libscf_fmri_get_instance", r);
1184         }
1185 
1186         inst->ri_m_inst = s_inst;
1187         inst->ri_mi_deleted = B_FALSE;
1188 
1189 retry:
1190         if (info->sf_method_type == METHOD_START)
1191                 log_transition(inst, START_REQUESTED);
1192 
1193         r = method_run(&inst, info->sf_method_type, &exit_code);
1194 
1195         if (r == 0 && method_failed(exit_code) == 0) {
1196                 /* Success! */
1197                 assert(inst->ri_i.i_next_state != RESTARTER_STATE_NONE);
1198 
1199                 /*
1200                  * When a stop method succeeds, remove the primary contract of
1201                  * the service, unless we're going to offline, in which case
1202                  * retain the contract so we can transfer inherited contracts to
1203                  * the replacement service.
1204                  */
1205 
1206                 if (info->sf_method_type == METHOD_STOP &&
1207                     inst->ri_i.i_primary_ctid != 0) {
1208                         if (inst->ri_i.i_next_state == RESTARTER_STATE_OFFLINE)
1209                                 inst->ri_i.i_primary_ctid_stopped = 1;
1210                         else
1211                                 method_remove_contract(inst, B_TRUE, B_TRUE);
1212                 }
1213                 /*
1214                  * We don't care whether the handle was rebound because this is
1215                  * the last thing we do with it.