1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright (c) 2011 Bayard G. Bell. All rights reserved.
  25  * Copyright 2012 Milan Jurik. All rights reserved.
  26  */
  27 
  28 /*
  29  * System call to checkpoint and resume the currently running kernel
  30  */
  31 #include <sys/types.h>
  32 #include <sys/errno.h>
  33 #include <sys/modctl.h>
  34 #include <sys/syscall.h>
  35 #include <sys/cred.h>
  36 #include <sys/uadmin.h>
  37 #include <sys/cmn_err.h>
  38 #include <sys/systm.h>
  39 #include <sys/cpr.h>
  40 #include <sys/swap.h>
  41 #include <sys/vfs.h>
  42 #include <sys/autoconf.h>
  43 #include <sys/machsystm.h>
  44 
  45 extern int i_cpr_is_supported(int sleeptype);
  46 extern int cpr_is_ufs(struct vfs *);
  47 extern int cpr_is_zfs(struct vfs *);
  48 extern int cpr_check_spec_statefile(void);
  49 extern int cpr_reusable_mount_check(void);
  50 extern int i_cpr_reusable_supported(void);
  51 extern int i_cpr_reusefini(void);
  52 extern struct mod_ops mod_miscops;
  53 
  54 extern int cpr_init(int);
  55 extern void cpr_done(void);
  56 extern void i_cpr_stop_other_cpus(void);
  57 extern int i_cpr_power_down(int);
  58 
  59 #if defined(__sparc)
  60 extern void cpr_forget_cprconfig(void);
  61 #endif
  62 
  63 static struct modlmisc modlmisc = {
  64         &mod_miscops, "checkpoint resume"
  65 };
  66 
  67 static struct modlinkage modlinkage = {
  68         MODREV_1, (void *)&modlmisc, NULL
  69 };
  70 
  71 int cpr_reusable_mode;
  72 
  73 kmutex_t        cpr_slock;      /* cpr serial lock */
  74 cpr_t           cpr_state;
  75 int             cpr_debug;
  76 int             cpr_test_mode; /* true if called via uadmin testmode */
  77 int             cpr_test_point = LOOP_BACK_NONE;        /* cpr test point */
  78 int             cpr_mp_enable = 0;      /* set to 1 to enable MP suspend */
  79 major_t         cpr_device = 0;         /* major number for S3 on one device */
  80 
  81 /*
  82  * All the loadable module related code follows
  83  */
  84 int
  85 _init(void)
  86 {
  87         register int e;
  88 
  89         if ((e = mod_install(&modlinkage)) == 0) {
  90                 mutex_init(&cpr_slock, NULL, MUTEX_DEFAULT, NULL);
  91         }
  92         return (e);
  93 }
  94 
  95 int
  96 _fini(void)
  97 {
  98         register int e;
  99 
 100         if ((e = mod_remove(&modlinkage)) == 0) {
 101                 mutex_destroy(&cpr_slock);
 102         }
 103         return (e);
 104 }
 105 
 106 int
 107 _info(struct modinfo *modinfop)
 108 {
 109         return (mod_info(&modlinkage, modinfop));
 110 }
 111 
 112 static
 113 int
 114 atoi(char *p)
 115 {
 116         int     i;
 117 
 118         i = (*p++ - '0');
 119 
 120         while (*p != '\0')
 121                 i = 10 * i + (*p++ - '0');
 122 
 123         return (i);
 124 }
 125 
 126 int
 127 cpr(int fcn, void *mdep)
 128 {
 129 
 130 #if defined(__sparc)
 131         static const char noswapstr[] = "reusable statefile requires "
 132             "that no swap area be configured.\n";
 133         static const char blockstr[] = "reusable statefile must be "
 134             "a block device.  See power.conf(4) and pmconfig(1M).\n";
 135         static const char normalfmt[] = "cannot run normal "
 136             "checkpoint/resume when in reusable statefile mode. "
 137             "use uadmin A_FREEZE AD_REUSEFINI (uadmin %d %d) "
 138             "to exit reusable statefile mode.\n";
 139         static const char modefmt[] = "%s in reusable mode.\n";
 140 #endif
 141         register int rc = 0;
 142         int cpr_sleeptype;
 143 
 144         /*
 145          * First, reject commands that we don't (yet) support on this arch.
 146          * This is easier to understand broken out like this than grotting
 147          * through the second switch below.
 148          */
 149 
 150         switch (fcn) {
 151 #if defined(__sparc)
 152         case AD_CHECK_SUSPEND_TO_RAM:
 153         case AD_SUSPEND_TO_RAM:
 154                 return (ENOTSUP);
 155         case AD_CHECK_SUSPEND_TO_DISK:
 156         case AD_SUSPEND_TO_DISK:
 157         case AD_CPR_REUSEINIT:
 158         case AD_CPR_NOCOMPRESS:
 159         case AD_CPR_FORCE:
 160         case AD_CPR_REUSABLE:
 161         case AD_CPR_REUSEFINI:
 162         case AD_CPR_TESTZ:
 163         case AD_CPR_TESTNOZ:
 164         case AD_CPR_TESTHALT:
 165         case AD_CPR_SUSP_DEVICES:
 166                 cpr_sleeptype = CPR_TODISK;
 167                 break;
 168 #endif
 169 #if defined(__x86)
 170         case AD_CHECK_SUSPEND_TO_DISK:
 171         case AD_SUSPEND_TO_DISK:
 172         case AD_CPR_REUSEINIT:
 173         case AD_CPR_NOCOMPRESS:
 174         case AD_CPR_FORCE:
 175         case AD_CPR_REUSABLE:
 176         case AD_CPR_REUSEFINI:
 177         case AD_CPR_TESTZ:
 178         case AD_CPR_TESTNOZ:
 179         case AD_CPR_TESTHALT:
 180         case AD_CPR_PRINT:
 181                 return (ENOTSUP);
 182         /* The DEV_* values need to be removed after sys-syspend is fixed */
 183         case DEV_CHECK_SUSPEND_TO_RAM:
 184         case DEV_SUSPEND_TO_RAM:
 185         case AD_CPR_SUSP_DEVICES:
 186         case AD_CHECK_SUSPEND_TO_RAM:
 187         case AD_SUSPEND_TO_RAM:
 188         case AD_LOOPBACK_SUSPEND_TO_RAM_PASS:
 189         case AD_LOOPBACK_SUSPEND_TO_RAM_FAIL:
 190         case AD_FORCE_SUSPEND_TO_RAM:
 191         case AD_DEVICE_SUSPEND_TO_RAM:
 192                 cpr_sleeptype = CPR_TORAM;
 193                 break;
 194 #endif
 195         }
 196 #if defined(__sparc)
 197         /*
 198          * Need to know if we're in reusable mode, but we will likely have
 199          * rebooted since REUSEINIT, so we have to get the info from the
 200          * file system
 201          */
 202         if (!cpr_reusable_mode)
 203                 cpr_reusable_mode = cpr_get_reusable_mode();
 204 
 205         cpr_forget_cprconfig();
 206 #endif
 207 
 208         switch (fcn) {
 209 
 210 #if defined(__sparc)
 211         case AD_CPR_REUSEINIT:
 212                 if (!i_cpr_reusable_supported())
 213                         return (ENOTSUP);
 214                 if (!cpr_statefile_is_spec()) {
 215                         cpr_err(CE_CONT, blockstr);
 216                         return (EINVAL);
 217                 }
 218                 if ((rc = cpr_check_spec_statefile()) != 0)
 219                         return (rc);
 220                 if (swapinfo) {
 221                         cpr_err(CE_CONT, noswapstr);
 222                         return (EINVAL);
 223                 }
 224                 cpr_test_mode = 0;
 225                 break;
 226 
 227         case AD_CPR_NOCOMPRESS:
 228         case AD_CPR_COMPRESS:
 229         case AD_CPR_FORCE:
 230                 if (cpr_reusable_mode) {
 231                         cpr_err(CE_CONT, normalfmt, A_FREEZE, AD_REUSEFINI);
 232                         return (ENOTSUP);
 233                 }
 234                 cpr_test_mode = 0;
 235                 break;
 236 
 237         case AD_CPR_REUSABLE:
 238                 if (!i_cpr_reusable_supported())
 239                         return (ENOTSUP);
 240                 if (!cpr_statefile_is_spec()) {
 241                         cpr_err(CE_CONT, blockstr);
 242                         return (EINVAL);
 243                 }
 244                 if ((rc = cpr_check_spec_statefile()) != 0)
 245                         return (rc);
 246                 if (swapinfo) {
 247                         cpr_err(CE_CONT, noswapstr);
 248                         return (EINVAL);
 249                 }
 250                 if ((rc = cpr_reusable_mount_check()) != 0)
 251                         return (rc);
 252                 cpr_test_mode = 0;
 253                 break;
 254 
 255         case AD_CPR_REUSEFINI:
 256                 if (!i_cpr_reusable_supported())
 257                         return (ENOTSUP);
 258                 cpr_test_mode = 0;
 259                 break;
 260 
 261         case AD_CPR_TESTZ:
 262         case AD_CPR_TESTNOZ:
 263         case AD_CPR_TESTHALT:
 264                 if (cpr_reusable_mode) {
 265                         cpr_err(CE_CONT, normalfmt, A_FREEZE, AD_REUSEFINI);
 266                         return (ENOTSUP);
 267                 }
 268                 cpr_test_mode = 1;
 269                 break;
 270 
 271         case AD_CPR_CHECK:
 272                 if (!i_cpr_is_supported(cpr_sleeptype) || cpr_reusable_mode)
 273                         return (ENOTSUP);
 274                 return (0);
 275 
 276         case AD_CPR_PRINT:
 277                 CPR_STAT_EVENT_END("POST CPR DELAY");
 278                 cpr_stat_event_print();
 279                 return (0);
 280 #endif
 281 
 282         case AD_CPR_DEBUG0:
 283                 cpr_debug = 0;
 284                 return (0);
 285 
 286         case AD_CPR_DEBUG1:
 287         case AD_CPR_DEBUG2:
 288         case AD_CPR_DEBUG3:
 289         case AD_CPR_DEBUG4:
 290         case AD_CPR_DEBUG5:
 291         case AD_CPR_DEBUG7:
 292         case AD_CPR_DEBUG8:
 293                 cpr_debug |= CPR_DEBUG_BIT(fcn);
 294                 return (0);
 295 
 296         case AD_CPR_DEBUG9:
 297                 cpr_debug |= CPR_DEBUG6;
 298                 return (0);
 299 
 300         /* The DEV_* values need to be removed after sys-syspend is fixed */
 301         case DEV_CHECK_SUSPEND_TO_RAM:
 302         case DEV_SUSPEND_TO_RAM:
 303         case AD_CHECK_SUSPEND_TO_RAM:
 304         case AD_SUSPEND_TO_RAM:
 305                 cpr_test_point = LOOP_BACK_NONE;
 306                 break;
 307 
 308         case AD_LOOPBACK_SUSPEND_TO_RAM_PASS:
 309                 cpr_test_point = LOOP_BACK_PASS;
 310                 break;
 311 
 312         case AD_LOOPBACK_SUSPEND_TO_RAM_FAIL:
 313                 cpr_test_point = LOOP_BACK_FAIL;
 314                 break;
 315 
 316         case AD_FORCE_SUSPEND_TO_RAM:
 317                 cpr_test_point = FORCE_SUSPEND_TO_RAM;
 318                 break;
 319 
 320         case AD_DEVICE_SUSPEND_TO_RAM:
 321                 if (mdep == NULL) {
 322                         /* Didn't pass enough arguments */
 323                         return (EINVAL);
 324                 }
 325                 cpr_test_point = DEVICE_SUSPEND_TO_RAM;
 326                 cpr_device = (major_t)atoi((char *)mdep);
 327                 break;
 328 
 329         case AD_CPR_SUSP_DEVICES:
 330                 cpr_test_point = FORCE_SUSPEND_TO_RAM;
 331                 if (cpr_suspend_devices(ddi_root_node()) != DDI_SUCCESS)
 332                         cmn_err(CE_WARN,
 333                             "Some devices did not suspend "
 334                             "and may be unusable");
 335                 (void) cpr_resume_devices(ddi_root_node(), 0);
 336                 return (0);
 337 
 338         default:
 339                 return (ENOTSUP);
 340         }
 341 
 342         if (!i_cpr_is_supported(cpr_sleeptype))
 343                 return (ENOTSUP);
 344 
 345 #if defined(__sparc)
 346         if ((cpr_sleeptype == CPR_TODISK &&
 347             !cpr_is_ufs(rootvfs) && !cpr_is_zfs(rootvfs)))
 348                 return (ENOTSUP);
 349 #endif
 350 
 351         if (fcn == AD_CHECK_SUSPEND_TO_RAM ||
 352             fcn == DEV_CHECK_SUSPEND_TO_RAM) {
 353                 ASSERT(i_cpr_is_supported(cpr_sleeptype));
 354                 return (0);
 355         }
 356 
 357 #if defined(__sparc)
 358         if (fcn == AD_CPR_REUSEINIT) {
 359                 if (mutex_tryenter(&cpr_slock) == 0)
 360                         return (EBUSY);
 361                 if (cpr_reusable_mode) {
 362                         cpr_err(CE_CONT, modefmt, "already");
 363                         mutex_exit(&cpr_slock);
 364                         return (EBUSY);
 365                 }
 366                 rc = i_cpr_reuseinit();
 367                 mutex_exit(&cpr_slock);
 368                 return (rc);
 369         }
 370 
 371         if (fcn == AD_CPR_REUSEFINI) {
 372                 if (mutex_tryenter(&cpr_slock) == 0)
 373                         return (EBUSY);
 374                 if (!cpr_reusable_mode) {
 375                         cpr_err(CE_CONT, modefmt, "not");
 376                         mutex_exit(&cpr_slock);
 377                         return (EINVAL);
 378                 }
 379                 rc = i_cpr_reusefini();
 380                 mutex_exit(&cpr_slock);
 381                 return (rc);
 382         }
 383 #endif
 384 
 385         /*
 386          * acquire cpr serial lock and init cpr state structure.
 387          */
 388         if (rc = cpr_init(fcn))
 389                 return (rc);
 390 
 391 #if defined(__sparc)
 392         if (fcn == AD_CPR_REUSABLE) {
 393                 if ((rc = i_cpr_check_cprinfo()) != 0)  {
 394                         mutex_exit(&cpr_slock);
 395                         return (rc);
 396                 }
 397         }
 398 #endif
 399 
 400         /*
 401          * Call the main cpr routine. If we are successful, we will be coming
 402          * down from the resume side, otherwise we are still in suspend.
 403          */
 404         cpr_err(CE_CONT, "System is being suspended");
 405         if (rc = cpr_main(cpr_sleeptype)) {
 406                 CPR->c_flags |= C_ERROR;
 407                 PMD(PMD_SX, ("cpr: Suspend operation failed.\n"))
 408                 cpr_err(CE_NOTE, "Suspend operation failed.");
 409         } else if (CPR->c_flags & C_SUSPENDING) {
 410 
 411                 /*
 412                  * In the suspend to RAM case, by the time we get
 413                  * control back we're already resumed
 414                  */
 415                 if (cpr_sleeptype == CPR_TORAM) {
 416                         PMD(PMD_SX, ("cpr: cpr CPR_TORAM done\n"))
 417                         cpr_done();
 418                         return (rc);
 419                 }
 420 
 421 #if defined(__sparc)
 422 
 423                 PMD(PMD_SX, ("cpr: Suspend operation succeeded.\n"))
 424                 /*
 425                  * Back from a successful checkpoint
 426                  */
 427                 if (fcn == AD_CPR_TESTZ || fcn == AD_CPR_TESTNOZ) {
 428                         mdboot(0, AD_BOOT, "", B_FALSE);
 429                         /* NOTREACHED */
 430                 }
 431 
 432                 /* make sure there are no more changes to the device tree */
 433                 PMD(PMD_SX, ("cpr: dev tree freeze\n"))
 434                 devtree_freeze();
 435 
 436                 /*
 437                  * stop other cpus and raise our priority.  since there is only
 438                  * one active cpu after this, and our priority will be too high
 439                  * for us to be preempted, we're essentially single threaded
 440                  * from here on out.
 441                  */
 442                 PMD(PMD_SX, ("cpr: stop other cpus\n"))
 443                 i_cpr_stop_other_cpus();
 444                 PMD(PMD_SX, ("cpr: spl6\n"))
 445                 (void) spl6();
 446 
 447                 /*
 448                  * try and reset leaf devices.  reset_leaves() should only
 449                  * be called when there are no other threads that could be
 450                  * accessing devices
 451                  */
 452                 PMD(PMD_SX, ("cpr: reset leaves\n"))
 453                 reset_leaves();
 454 
 455                 /*
 456                  * If i_cpr_power_down() succeeds, it'll not return
 457                  *
 458                  * Drives with write-cache enabled need to flush
 459                  * their cache.
 460                  */
 461                 if (fcn != AD_CPR_TESTHALT) {
 462                         PMD(PMD_SX, ("cpr: power down\n"))
 463                         (void) i_cpr_power_down(cpr_sleeptype);
 464                 }
 465                 ASSERT(cpr_sleeptype == CPR_TODISK);
 466                 /* currently CPR_TODISK comes back via a boot path */
 467                 CPR_DEBUG(CPR_DEBUG1, "(Done. Please Switch Off)\n");
 468                 halt(NULL);
 469                 /* NOTREACHED */
 470 #endif
 471         }
 472         PMD(PMD_SX, ("cpr: cpr done\n"))
 473         cpr_done();
 474         return (rc);
 475 }