Print this page
7711 SMF: Finish implementing support for degraded state


   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2013, Joyent, Inc. All rights reserved.

  25  */
  26 
  27 /*
  28  * restarter.c - service manipulation
  29  *
  30  * This component manages services whose restarter is svc.startd, the standard
  31  * restarter.  It translates restarter protocol events from the graph engine
  32  * into actions on processes, as a delegated restarter would do.
  33  *
  34  * The master restarter manages a number of always-running threads:
  35  *   - restarter event thread: events from the graph engine
  36  *   - timeout thread: thread to fire queued timeouts
  37  *   - contract thread: thread to handle contract events
  38  *   - wait thread: thread to handle wait-based services
  39  *
  40  * The other threads are created as-needed:
  41  *   - per-instance method threads
  42  *   - per-instance event processing threads
  43  *
  44  * The interaction of all threads must result in the following conditions


1730                 /* Refresh does not change the state. */
1731                 (void) restarter_instance_update_states(h, rip,
1732                     rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE,
1733                     restarter_str_refresh);
1734 
1735                 info = startd_zalloc(sizeof (*info));
1736                 info->sf_id = rip->ri_id;
1737                 info->sf_method_type = METHOD_REFRESH;
1738                 info->sf_event_type = RERR_REFRESH;
1739                 info->sf_reason = NULL;
1740 
1741                 assert(rip->ri_method_thread == 0);
1742                 rip->ri_method_thread =
1743                     startd_thread_create(method_thread, info);
1744         }
1745 
1746         scf_snapshot_destroy(snap);
1747         scf_instance_destroy(inst);
1748 }
1749 



















































1750 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1751         "ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1752         "ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1753         "ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1754         "INVALID_DEPENDENCY", "ADMIN_DISABLE", "STOP_RESET"

1755 };
1756 
1757 /*
1758  * void *restarter_process_events()
1759  *
1760  *   Called in a separate thread to process the events on an instance's
1761  *   queue.  Empties the queue completely, and tries to keep the thread
1762  *   around for a little while after the queue is empty to save on
1763  *   startup costs.
1764  */
1765 static void *
1766 restarter_process_events(void *arg)
1767 {
1768         scf_handle_t *h;
1769         restarter_instance_qentry_t *event;
1770         restarter_inst_t *rip;
1771         char *fmri = (char *)arg;
1772         struct timespec to;
1773 
1774         assert(fmri != NULL);


1849 
1850                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1851                         if (event_from_tty(h, inst) == 0)
1852                                 maintain_instance(h, inst, 1,
1853                                     restarter_str_service_request);
1854                         else
1855                                 maintain_instance(h, inst, 1,
1856                                     restarter_str_administrative_request);
1857                         break;
1858 
1859                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1860                         unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1861                         reset_start_times(inst);
1862                         break;
1863 
1864                 case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1865                         refresh_instance(h, inst);
1866                         break;
1867 
1868                 case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1869                         log_framework(LOG_WARNING, "Restarter: "
1870                             "%s command (for %s) unimplemented.\n",
1871                             event_names[event->riq_type], inst->ri_i.i_fmri);




1872                         break;
1873 

1874                 case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1875                         if (!instance_started(inst)) {
1876                                 log_framework(LOG_DEBUG, "Restarter: "
1877                                     "Not restarting %s; not running.\n",
1878                                     inst->ri_i.i_fmri);
1879                         } else {
1880                                 /*
1881                                  * Stop the instance.  If it can be restarted,
1882                                  * the graph engine will send a new event.
1883                                  */
1884                                 if (restart_dump(h, inst)) {
1885                                         (void) contract_kill(
1886                                             inst->ri_i.i_primary_ctid, SIGABRT,
1887                                             inst->ri_i.i_fmri);
1888                                 } else if (stop_instance(h, inst,
1889                                     RSTOP_RESTART) == 0) {
1890                                         reset_start_times(inst);
1891                                 }
1892                         }
1893                         break;


1930 
1931         rip->ri_queue_thread = 0;
1932         MUTEX_UNLOCK(&rip->ri_queue_lock);
1933 
1934 out:
1935         (void) scf_handle_unbind(h);
1936         scf_handle_destroy(h);
1937         free(fmri);
1938         return (NULL);
1939 }
1940 
1941 static int
1942 is_admin_event(restarter_event_type_t t) {
1943 
1944         switch (t) {
1945         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1946         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1947         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1948         case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1949         case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:


1950         case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1951                 return (1);
1952         default:
1953                 return (0);
1954         }
1955 }
1956 
1957 static void
1958 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
1959 {
1960         restarter_instance_qentry_t *qe;
1961         int r;
1962 
1963         assert(MUTEX_HELD(&ri->ri_queue_lock));
1964         assert(!MUTEX_HELD(&ri->ri_lock));
1965 
1966         qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
1967         qe->riq_type = e->rpe_type;
1968         qe->riq_reason = e->rpe_reason;
1969 




   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  25  * Copyright 2017 RackTop Systems.
  26  */
  27 
  28 /*
  29  * restarter.c - service manipulation
  30  *
  31  * This component manages services whose restarter is svc.startd, the standard
  32  * restarter.  It translates restarter protocol events from the graph engine
  33  * into actions on processes, as a delegated restarter would do.
  34  *
  35  * The master restarter manages a number of always-running threads:
  36  *   - restarter event thread: events from the graph engine
  37  *   - timeout thread: thread to fire queued timeouts
  38  *   - contract thread: thread to handle contract events
  39  *   - wait thread: thread to handle wait-based services
  40  *
  41  * The other threads are created as-needed:
  42  *   - per-instance method threads
  43  *   - per-instance event processing threads
  44  *
  45  * The interaction of all threads must result in the following conditions


1731                 /* Refresh does not change the state. */
1732                 (void) restarter_instance_update_states(h, rip,
1733                     rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE,
1734                     restarter_str_refresh);
1735 
1736                 info = startd_zalloc(sizeof (*info));
1737                 info->sf_id = rip->ri_id;
1738                 info->sf_method_type = METHOD_REFRESH;
1739                 info->sf_event_type = RERR_REFRESH;
1740                 info->sf_reason = NULL;
1741 
1742                 assert(rip->ri_method_thread == 0);
1743                 rip->ri_method_thread =
1744                     startd_thread_create(method_thread, info);
1745         }
1746 
1747         scf_snapshot_destroy(snap);
1748         scf_instance_destroy(inst);
1749 }
1750 
1751 static void
1752 degrade_instance(scf_handle_t *h, restarter_inst_t *rip, restarter_str_t reason)
1753 {
1754         scf_instance_t *scf_inst = NULL;
1755 
1756         assert(MUTEX_HELD(&rip->ri_lock));
1757 
1758         log_instance(rip, B_TRUE, "Marking degraded due to %s.",
1759             restarter_get_str_short(reason));
1760         log_framework(LOG_DEBUG, "%s: marking degraded due to %s.\n",
1761             rip->ri_i.i_fmri, restarter_get_str_short(reason));
1762 
1763         /* Services that aren't online are ignored */
1764         if (rip->ri_i.i_state != RESTARTER_STATE_ONLINE) {
1765                 log_framework(LOG_DEBUG,
1766                     "%s: degrade_instance -> is not online\n",
1767                     rip->ri_i.i_fmri);
1768                 return;
1769         }
1770 
1771         /*
1772          * If reason state is restarter_str_service_request and
1773          * restarter_actions/auxiliary_fmri property is set with a valid fmri,
1774          * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
1775          */
1776         if (reason == restarter_str_service_request &&
1777             libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &scf_inst) == 0) {
1778                 if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
1779                         if (restarter_inst_set_aux_fmri(scf_inst))
1780                                 log_framework(LOG_DEBUG, "%s: "
1781                                     "restarter_inst_set_aux_fmri failed: ",
1782                                     rip->ri_i.i_fmri);
1783                 } else {
1784                         log_framework(LOG_DEBUG, "%s: "
1785                             "restarter_inst_validate_ractions_aux_fmri "
1786                             "failed: ", rip->ri_i.i_fmri);
1787 
1788                         if (restarter_inst_reset_aux_fmri(scf_inst))
1789                                 log_framework(LOG_DEBUG, "%s: "
1790                                     "restarter_inst_reset_aux_fmri failed: ",
1791                                     rip->ri_i.i_fmri);
1792                 }
1793                 scf_instance_destroy(scf_inst);
1794         }
1795 
1796         (void) restarter_instance_update_states(h, rip,
1797             RESTARTER_STATE_DEGRADED, RESTARTER_STATE_NONE, RERR_NONE, reason);
1798 
1799         log_transition(rip, DEGRADE_REQUESTED);
1800 }
1801 
1802 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1803         "ENABLE", "DISABLE", "ADMIN_RESTORE", "ADMIN_DEGRADED",
1804         "ADMIN_DEGRADE_IMMEDIATE", "ADMIN_REFRESH", "ADMIN_RESTART",
1805         "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON", "ADMIN_MAINT_ON_IMMEDIATE",
1806         "STOP", "START", "DEPENDENCY_CYCLE", "INVALID_DEPENDENCY",
1807         "ADMIN_DISABLE", "STOP_RESET"
1808 };
1809 
1810 /*
1811  * void *restarter_process_events()
1812  *
1813  *   Called in a separate thread to process the events on an instance's
1814  *   queue.  Empties the queue completely, and tries to keep the thread
1815  *   around for a little while after the queue is empty to save on
1816  *   startup costs.
1817  */
1818 static void *
1819 restarter_process_events(void *arg)
1820 {
1821         scf_handle_t *h;
1822         restarter_instance_qentry_t *event;
1823         restarter_inst_t *rip;
1824         char *fmri = (char *)arg;
1825         struct timespec to;
1826 
1827         assert(fmri != NULL);


1902 
1903                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1904                         if (event_from_tty(h, inst) == 0)
1905                                 maintain_instance(h, inst, 1,
1906                                     restarter_str_service_request);
1907                         else
1908                                 maintain_instance(h, inst, 1,
1909                                     restarter_str_administrative_request);
1910                         break;
1911 
1912                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1913                         unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1914                         reset_start_times(inst);
1915                         break;
1916 
1917                 case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1918                         refresh_instance(h, inst);
1919                         break;
1920 
1921                 case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1922                 case RESTARTER_EVENT_TYPE_ADMIN_DEGRADE_IMMEDIATE:
1923                         if (event_from_tty(h, inst) == 0)
1924                                 degrade_instance(h, inst,
1925                                     restarter_str_service_request);
1926                         else
1927                                 degrade_instance(h, inst,
1928                                     restarter_str_administrative_request);
1929                         break;
1930 
1931                 case RESTARTER_EVENT_TYPE_ADMIN_RESTORE:
1932                 case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1933                         if (!instance_started(inst)) {
1934                                 log_framework(LOG_DEBUG, "Restarter: "
1935                                     "Not restarting %s; not running.\n",
1936                                     inst->ri_i.i_fmri);
1937                         } else {
1938                                 /*
1939                                  * Stop the instance.  If it can be restarted,
1940                                  * the graph engine will send a new event.
1941                                  */
1942                                 if (restart_dump(h, inst)) {
1943                                         (void) contract_kill(
1944                                             inst->ri_i.i_primary_ctid, SIGABRT,
1945                                             inst->ri_i.i_fmri);
1946                                 } else if (stop_instance(h, inst,
1947                                     RSTOP_RESTART) == 0) {
1948                                         reset_start_times(inst);
1949                                 }
1950                         }
1951                         break;


1988 
1989         rip->ri_queue_thread = 0;
1990         MUTEX_UNLOCK(&rip->ri_queue_lock);
1991 
1992 out:
1993         (void) scf_handle_unbind(h);
1994         scf_handle_destroy(h);
1995         free(fmri);
1996         return (NULL);
1997 }
1998 
1999 static int
2000 is_admin_event(restarter_event_type_t t) {
2001 
2002         switch (t) {
2003         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
2004         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
2005         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
2006         case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
2007         case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
2008         case RESTARTER_EVENT_TYPE_ADMIN_DEGRADE_IMMEDIATE:
2009         case RESTARTER_EVENT_TYPE_ADMIN_RESTORE:
2010         case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
2011                 return (1);
2012         default:
2013                 return (0);
2014         }
2015 }
2016 
2017 static void
2018 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
2019 {
2020         restarter_instance_qentry_t *qe;
2021         int r;
2022 
2023         assert(MUTEX_HELD(&ri->ri_queue_lock));
2024         assert(!MUTEX_HELD(&ri->ri_lock));
2025 
2026         qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
2027         qe->riq_type = e->rpe_type;
2028         qe->riq_reason = e->rpe_reason;
2029