1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * PSMI 1.1 extensions are supported only in 2.6 and later versions.
  28  * PSMI 1.2 extensions are supported only in 2.7 and later versions.
  29  * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
  30  * PSMI 1.5 extensions are supported in Solaris Nevada.
  31  * PSMI 1.6 extensions are supported in Solaris Nevada.
  32  * PSMI 1.7 extensions are supported in Solaris Nevada.
  33  */
  34 #define PSMI_1_7
  35 
  36 #include <sys/processor.h>
  37 #include <sys/time.h>
  38 #include <sys/psm.h>
  39 #include <sys/smp_impldefs.h>
  40 #include <sys/cram.h>
  41 #include <sys/acpi/acpi.h>
  42 #include <sys/acpica.h>
  43 #include <sys/psm_common.h>
  44 #include <sys/apic.h>
  45 #include <sys/pit.h>
  46 #include <sys/ddi.h>
  47 #include <sys/sunddi.h>
  48 #include <sys/ddi_impldefs.h>
  49 #include <sys/pci.h>
  50 #include <sys/promif.h>
  51 #include <sys/x86_archext.h>
  52 #include <sys/cpc_impl.h>
  53 #include <sys/uadmin.h>
  54 #include <sys/panic.h>
  55 #include <sys/debug.h>
  56 #include <sys/archsystm.h>
  57 #include <sys/trap.h>
  58 #include <sys/machsystm.h>
  59 #include <sys/sysmacros.h>
  60 #include <sys/cpuvar.h>
  61 #include <sys/rm_platter.h>
  62 #include <sys/privregs.h>
  63 #include <sys/note.h>
  64 #include <sys/pci_intr_lib.h>
  65 #include <sys/spl.h>
  66 #include <sys/clock.h>
  67 #include <sys/dditypes.h>
  68 #include <sys/sunddi.h>
  69 #include <sys/x_call.h>
  70 #include <sys/reboot.h>
  71 #include <sys/hpet.h>
  72 #include <sys/apic_common.h>
  73 #include <sys/apic_timer.h>
  74 
  75 static void     apic_record_ioapic_rdt(void *intrmap_private,
  76                     ioapic_rdt_t *irdt);
  77 static void     apic_record_msi(void *intrmap_private, msi_regs_t *mregs);
  78 
  79 /*
  80  * Common routines between pcplusmp & apix (taken from apic.c).
  81  */
  82 
  83 int     apic_clkinit(int);
  84 hrtime_t apic_gethrtime(void);
  85 void    apic_send_ipi(int, int);
  86 void    apic_set_idlecpu(processorid_t);
  87 void    apic_unset_idlecpu(processorid_t);
  88 void    apic_shutdown(int, int);
  89 void    apic_preshutdown(int, int);
  90 processorid_t   apic_get_next_processorid(processorid_t);
  91 
  92 hrtime_t apic_gettime();
  93 
  94 enum apic_ioapic_method_type apix_mul_ioapic_method = APIC_MUL_IOAPIC_PCPLUSMP;
  95 
  96 /* Now the ones for Dynamic Interrupt distribution */
  97 int     apic_enable_dynamic_migration = 0;
  98 
  99 /* maximum loop count when sending Start IPIs. */
 100 int apic_sipi_max_loop_count = 0x1000;
 101 
 102 /*
 103  * These variables are frequently accessed in apic_intr_enter(),
 104  * apic_intr_exit and apic_setspl, so group them together
 105  */
 106 volatile uint32_t *apicadr =  NULL;     /* virtual addr of local APIC   */
 107 int apic_setspl_delay = 1;              /* apic_setspl - delay enable   */
 108 int apic_clkvect;
 109 
 110 /* vector at which error interrupts come in */
 111 int apic_errvect;
 112 int apic_enable_error_intr = 1;
 113 int apic_error_display_delay = 100;
 114 
 115 /* vector at which performance counter overflow interrupts come in */
 116 int apic_cpcovf_vect;
 117 int apic_enable_cpcovf_intr = 1;
 118 
 119 /* vector at which CMCI interrupts come in */
 120 int apic_cmci_vect;
 121 extern int cmi_enable_cmci;
 122 extern void cmi_cmci_trap(void);
 123 
 124 kmutex_t cmci_cpu_setup_lock;   /* protects cmci_cpu_setup_registered */
 125 int cmci_cpu_setup_registered;
 126 
 127 /* number of CPUs in power-on transition state */
 128 static int apic_poweron_cnt = 0;
 129 lock_t apic_mode_switch_lock;
 130 
 131 /*
 132  * Patchable global variables.
 133  */
 134 int     apic_forceload = 0;
 135 
 136 int     apic_coarse_hrtime = 1;         /* 0 - use accurate slow gethrtime() */
 137 
 138 int     apic_flat_model = 0;            /* 0 - clustered. 1 - flat */
 139 int     apic_panic_on_nmi = 0;
 140 int     apic_panic_on_apic_error = 0;
 141 
 142 int     apic_verbose = 0;       /* 0x1ff */
 143 
 144 #ifdef DEBUG
 145 int     apic_debug = 0;
 146 int     apic_restrict_vector = 0;
 147 
 148 int     apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE];
 149 int     apic_debug_msgbufindex = 0;
 150 
 151 #endif /* DEBUG */
 152 
 153 uint_t apic_nticks = 0;
 154 uint_t apic_skipped_redistribute = 0;
 155 
 156 uint_t last_count_read = 0;
 157 lock_t  apic_gethrtime_lock;
 158 volatile int    apic_hrtime_stamp = 0;
 159 volatile hrtime_t apic_nsec_since_boot = 0;
 160 
 161 static  hrtime_t        apic_last_hrtime = 0;
 162 int             apic_hrtime_error = 0;
 163 int             apic_remote_hrterr = 0;
 164 int             apic_num_nmis = 0;
 165 int             apic_apic_error = 0;
 166 int             apic_num_apic_errors = 0;
 167 int             apic_num_cksum_errors = 0;
 168 
 169 int     apic_error = 0;
 170 
 171 static  int     apic_cmos_ssb_set = 0;
 172 
 173 /* use to make sure only one cpu handles the nmi */
 174 lock_t  apic_nmi_lock;
 175 /* use to make sure only one cpu handles the error interrupt */
 176 lock_t  apic_error_lock;
 177 
 178 static  struct {
 179         uchar_t cntl;
 180         uchar_t data;
 181 } aspen_bmc[] = {
 182         { CC_SMS_WR_START,      0x18 },         /* NetFn/LUN */
 183         { CC_SMS_WR_NEXT,       0x24 },         /* Cmd SET_WATCHDOG_TIMER */
 184         { CC_SMS_WR_NEXT,       0x84 },         /* DataByte 1: SMS/OS no log */
 185         { CC_SMS_WR_NEXT,       0x2 },          /* DataByte 2: Power Down */
 186         { CC_SMS_WR_NEXT,       0x0 },          /* DataByte 3: no pre-timeout */
 187         { CC_SMS_WR_NEXT,       0x0 },          /* DataByte 4: timer expir. */
 188         { CC_SMS_WR_NEXT,       0xa },          /* DataByte 5: init countdown */
 189         { CC_SMS_WR_END,        0x0 },          /* DataByte 6: init countdown */
 190 
 191         { CC_SMS_WR_START,      0x18 },         /* NetFn/LUN */
 192         { CC_SMS_WR_END,        0x22 }          /* Cmd RESET_WATCHDOG_TIMER */
 193 };
 194 
 195 static  struct {
 196         int     port;
 197         uchar_t data;
 198 } sitka_bmc[] = {
 199         { SMS_COMMAND_REGISTER, SMS_WRITE_START },
 200         { SMS_DATA_REGISTER,    0x18 },         /* NetFn/LUN */
 201         { SMS_DATA_REGISTER,    0x24 },         /* Cmd SET_WATCHDOG_TIMER */
 202         { SMS_DATA_REGISTER,    0x84 },         /* DataByte 1: SMS/OS no log */
 203         { SMS_DATA_REGISTER,    0x2 },          /* DataByte 2: Power Down */
 204         { SMS_DATA_REGISTER,    0x0 },          /* DataByte 3: no pre-timeout */
 205         { SMS_DATA_REGISTER,    0x0 },          /* DataByte 4: timer expir. */
 206         { SMS_DATA_REGISTER,    0xa },          /* DataByte 5: init countdown */
 207         { SMS_COMMAND_REGISTER, SMS_WRITE_END },
 208         { SMS_DATA_REGISTER,    0x0 },          /* DataByte 6: init countdown */
 209 
 210         { SMS_COMMAND_REGISTER, SMS_WRITE_START },
 211         { SMS_DATA_REGISTER,    0x18 },         /* NetFn/LUN */
 212         { SMS_COMMAND_REGISTER, SMS_WRITE_END },
 213         { SMS_DATA_REGISTER,    0x22 }          /* Cmd RESET_WATCHDOG_TIMER */
 214 };
 215 
 216 /* Patchable global variables. */
 217 int             apic_kmdb_on_nmi = 0;           /* 0 - no, 1 - yes enter kmdb */
 218 uint32_t        apic_divide_reg_init = 0;       /* 0 - divide by 2 */
 219 
 220 /* default apic ops without interrupt remapping */
 221 static apic_intrmap_ops_t apic_nointrmap_ops = {
 222         (int (*)(int))return_instr,
 223         (void (*)(int))return_instr,
 224         (void (*)(void **, dev_info_t *, uint16_t, int, uchar_t))return_instr,
 225         (void (*)(void *, void *, uint16_t, int))return_instr,
 226         (void (*)(void **))return_instr,
 227         apic_record_ioapic_rdt,
 228         apic_record_msi,
 229 };
 230 
 231 apic_intrmap_ops_t *apic_vt_ops = &apic_nointrmap_ops;
 232 apic_cpus_info_t        *apic_cpus = NULL;
 233 cpuset_t        apic_cpumask;
 234 uint_t          apic_picinit_called;
 235 
 236 /* Flag to indicate that we need to shut down all processors */
 237 static uint_t   apic_shutdown_processors;
 238 
 239 /*
 240  * Probe the ioapic method for apix module. Called in apic_probe_common()
 241  */
 242 int
 243 apic_ioapic_method_probe()
 244 {
 245         if (apix_enable == 0)
 246                 return (PSM_SUCCESS);
 247 
 248         /*
 249          * Set IOAPIC EOI handling method. The priority from low to high is:
 250          *      1. IOxAPIC: with EOI register
 251          *      2. IOMMU interrupt mapping
 252          *      3. Mask-Before-EOI method for systems without boot
 253          *      interrupt routing, such as systems with only one IOAPIC;
 254          *      NVIDIA CK8-04/MCP55 systems; systems with bridge solution
 255          *      which disables the boot interrupt routing already.
 256          *      4. Directed EOI
 257          */
 258         if (apic_io_ver[0] >= 0x20)
 259                 apix_mul_ioapic_method = APIC_MUL_IOAPIC_IOXAPIC;
 260         if ((apic_io_max == 1) || (apic_nvidia_io_max == apic_io_max))
 261                 apix_mul_ioapic_method = APIC_MUL_IOAPIC_MASK;
 262         if (apic_directed_EOI_supported())
 263                 apix_mul_ioapic_method = APIC_MUL_IOAPIC_DEOI;
 264 
 265         /* fall back to pcplusmp */
 266         if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_PCPLUSMP) {
 267                 /* make sure apix is after pcplusmp in /etc/mach */
 268                 apix_enable = 0; /* go ahead with pcplusmp install next */
 269                 return (PSM_FAILURE);
 270         }
 271 
 272         return (PSM_SUCCESS);
 273 }
 274 
 275 /*
 276  * handler for APIC Error interrupt. Just print a warning and continue
 277  */
 278 int
 279 apic_error_intr()
 280 {
 281         uint_t  error0, error1, error;
 282         uint_t  i;
 283 
 284         /*
 285          * We need to write before read as per 7.4.17 of system prog manual.
 286          * We do both and or the results to be safe
 287          */
 288         error0 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
 289         apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
 290         error1 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
 291         error = error0 | error1;
 292 
 293         /*
 294          * Clear the APIC error status (do this on all cpus that enter here)
 295          * (two writes are required due to the semantics of accessing the
 296          * error status register.)
 297          */
 298         apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
 299         apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
 300 
 301         /*
 302          * Prevent more than 1 CPU from handling error interrupt causing
 303          * double printing (interleave of characters from multiple
 304          * CPU's when using prom_printf)
 305          */
 306         if (lock_try(&apic_error_lock) == 0)
 307                 return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
 308         if (error) {
 309 #if     DEBUG
 310                 if (apic_debug)
 311                         debug_enter("pcplusmp: APIC Error interrupt received");
 312 #endif /* DEBUG */
 313                 if (apic_panic_on_apic_error)
 314                         cmn_err(CE_PANIC,
 315                             "APIC Error interrupt on CPU %d. Status = %x",
 316                             psm_get_cpu_id(), error);
 317                 else {
 318                         if ((error & ~APIC_CS_ERRORS) == 0) {
 319                                 /* cksum error only */
 320                                 apic_error |= APIC_ERR_APIC_ERROR;
 321                                 apic_apic_error |= error;
 322                                 apic_num_apic_errors++;
 323                                 apic_num_cksum_errors++;
 324                         } else {
 325                                 /*
 326                                  * prom_printf is the best shot we have of
 327                                  * something which is problem free from
 328                                  * high level/NMI type of interrupts
 329                                  */
 330                                 prom_printf("APIC Error interrupt on CPU %d. "
 331                                     "Status 0 = %x, Status 1 = %x\n",
 332                                     psm_get_cpu_id(), error0, error1);
 333                                 apic_error |= APIC_ERR_APIC_ERROR;
 334                                 apic_apic_error |= error;
 335                                 apic_num_apic_errors++;
 336                                 for (i = 0; i < apic_error_display_delay; i++) {
 337                                         tenmicrosec();
 338                                 }
 339                                 /*
 340                                  * provide more delay next time limited to
 341                                  * roughly 1 clock tick time
 342                                  */
 343                                 if (apic_error_display_delay < 500)
 344                                         apic_error_display_delay *= 2;
 345                         }
 346                 }
 347                 lock_clear(&apic_error_lock);
 348                 return (DDI_INTR_CLAIMED);
 349         } else {
 350                 lock_clear(&apic_error_lock);
 351                 return (DDI_INTR_UNCLAIMED);
 352         }
 353 }
 354 
 355 /*
 356  * Turn off the mask bit in the performance counter Local Vector Table entry.
 357  */
 358 void
 359 apic_cpcovf_mask_clear(void)
 360 {
 361         apic_reg_ops->apic_write(APIC_PCINT_VECT,
 362             (apic_reg_ops->apic_read(APIC_PCINT_VECT) & ~APIC_LVT_MASK));
 363 }
 364 
 365 /*ARGSUSED*/
 366 static int
 367 apic_cmci_enable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
 368 {
 369         apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
 370         return (0);
 371 }
 372 
 373 /*ARGSUSED*/
 374 static int
 375 apic_cmci_disable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
 376 {
 377         apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect | AV_MASK);
 378         return (0);
 379 }
 380 
 381 /*ARGSUSED*/
 382 int
 383 cmci_cpu_setup(cpu_setup_t what, int cpuid, void *arg)
 384 {
 385         cpuset_t        cpu_set;
 386 
 387         CPUSET_ONLY(cpu_set, cpuid);
 388 
 389         switch (what) {
 390                 case CPU_ON:
 391                         xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
 392                             (xc_func_t)apic_cmci_enable);
 393                         break;
 394 
 395                 case CPU_OFF:
 396                         xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
 397                             (xc_func_t)apic_cmci_disable);
 398                         break;
 399 
 400                 default:
 401                         break;
 402         }
 403 
 404         return (0);
 405 }
 406 
 407 static void
 408 apic_disable_local_apic(void)
 409 {
 410         apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
 411         apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK);
 412 
 413         /* local intr reg 0 */
 414         apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK);
 415 
 416         /* disable NMI */
 417         apic_reg_ops->apic_write(APIC_INT_VECT1, AV_MASK);
 418 
 419         /* and error interrupt */
 420         apic_reg_ops->apic_write(APIC_ERR_VECT, AV_MASK);
 421 
 422         /* and perf counter intr */
 423         apic_reg_ops->apic_write(APIC_PCINT_VECT, AV_MASK);
 424 
 425         apic_reg_ops->apic_write(APIC_SPUR_INT_REG, APIC_SPUR_INTR);
 426 }
 427 
 428 static void
 429 apic_cpu_send_SIPI(processorid_t cpun, boolean_t start)
 430 {
 431         int             loop_count;
 432         uint32_t        vector;
 433         uint_t          apicid;
 434         ulong_t         iflag;
 435 
 436         apicid =  apic_cpus[cpun].aci_local_id;
 437 
 438         /*
 439          * Interrupts on current CPU will be disabled during the
 440          * steps in order to avoid unwanted side effects from
 441          * executing interrupt handlers on a problematic BIOS.
 442          */
 443         iflag = intr_clear();
 444 
 445         if (start) {
 446                 outb(CMOS_ADDR, SSB);
 447                 outb(CMOS_DATA, BIOS_SHUTDOWN);
 448         }
 449 
 450         /*
 451          * According to X2APIC specification in section '2.3.5.1' of
 452          * Interrupt Command Register Semantics, the semantics of
 453          * programming the Interrupt Command Register to dispatch an interrupt
 454          * is simplified. A single MSR write to the 64-bit ICR is required
 455          * for dispatching an interrupt. Specifically, with the 64-bit MSR
 456          * interface to ICR, system software is not required to check the
 457          * status of the delivery status bit prior to writing to the ICR
 458          * to send an IPI. With the removal of the Delivery Status bit,
 459          * system software no longer has a reason to read the ICR. It remains
 460          * readable only to aid in debugging.
 461          */
 462 #ifdef  DEBUG
 463         APIC_AV_PENDING_SET();
 464 #else
 465         if (apic_mode == LOCAL_APIC) {
 466                 APIC_AV_PENDING_SET();
 467         }
 468 #endif /* DEBUG */
 469 
 470         /* for integrated - make sure there is one INIT IPI in buffer */
 471         /* for external - it will wake up the cpu */
 472         apic_reg_ops->apic_write_int_cmd(apicid, AV_ASSERT | AV_RESET);
 473 
 474         /* If only 1 CPU is installed, PENDING bit will not go low */
 475         for (loop_count = apic_sipi_max_loop_count; loop_count; loop_count--) {
 476                 if (apic_mode == LOCAL_APIC &&
 477                     apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
 478                         apic_ret();
 479                 else
 480                         break;
 481         }
 482 
 483         apic_reg_ops->apic_write_int_cmd(apicid, AV_DEASSERT | AV_RESET);
 484         drv_usecwait(20000);            /* 20 milli sec */
 485 
 486         if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) {
 487                 /* integrated apic */
 488 
 489                 vector = (rm_platter_pa >> MMU_PAGESHIFT) &
 490                     (APIC_VECTOR_MASK | APIC_IPL_MASK);
 491 
 492                 /* to offset the INIT IPI queue up in the buffer */
 493                 apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
 494                 drv_usecwait(200);              /* 20 micro sec */
 495 
 496                 /*
 497                  * send the second SIPI (Startup IPI) as recommended by Intel
 498                  * software development manual.
 499                  */
 500                 apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
 501                 drv_usecwait(200);      /* 20 micro sec */
 502         }
 503 
 504         intr_restore(iflag);
 505 }
 506 
 507 /*ARGSUSED1*/
 508 int
 509 apic_cpu_start(processorid_t cpun, caddr_t arg)
 510 {
 511         ASSERT(MUTEX_HELD(&cpu_lock));
 512 
 513         if (!apic_cpu_in_range(cpun)) {
 514                 return (EINVAL);
 515         }
 516 
 517         /*
 518          * Switch to apic_common_send_ipi for safety during starting other CPUs.
 519          */
 520         if (apic_mode == LOCAL_X2APIC) {
 521                 apic_switch_ipi_callback(B_TRUE);
 522         }
 523 
 524         apic_cmos_ssb_set = 1;
 525         apic_cpu_send_SIPI(cpun, B_TRUE);
 526 
 527         return (0);
 528 }
 529 
 530 /*
 531  * Put CPU into halted state with interrupts disabled.
 532  */
 533 /*ARGSUSED1*/
 534 int
 535 apic_cpu_stop(processorid_t cpun, caddr_t arg)
 536 {
 537         int             rc;
 538         cpu_t           *cp;
 539         extern cpuset_t cpu_ready_set;
 540         extern void cpu_idle_intercept_cpu(cpu_t *cp);
 541 
 542         ASSERT(MUTEX_HELD(&cpu_lock));
 543 
 544         if (!apic_cpu_in_range(cpun)) {
 545                 return (EINVAL);
 546         }
 547         if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
 548                 return (ENOTSUP);
 549         }
 550 
 551         cp = cpu_get(cpun);
 552         ASSERT(cp != NULL);
 553         ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
 554         ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
 555         ASSERT((cp->cpu_flags & CPU_ENABLE) == 0);
 556 
 557         /* Clear CPU_READY flag to disable cross calls. */
 558         cp->cpu_flags &= ~CPU_READY;
 559         CPUSET_ATOMIC_DEL(cpu_ready_set, cpun);
 560         rc = xc_flush_cpu(cp);
 561         if (rc != 0) {
 562                 CPUSET_ATOMIC_ADD(cpu_ready_set, cpun);
 563                 cp->cpu_flags |= CPU_READY;
 564                 return (rc);
 565         }
 566 
 567         /* Intercept target CPU at a safe point before powering it off. */
 568         cpu_idle_intercept_cpu(cp);
 569 
 570         apic_cpu_send_SIPI(cpun, B_FALSE);
 571         cp->cpu_flags &= ~CPU_RUNNING;
 572 
 573         return (0);
 574 }
 575 
 576 int
 577 apic_cpu_ops(psm_cpu_request_t *reqp)
 578 {
 579         if (reqp == NULL) {
 580                 return (EINVAL);
 581         }
 582 
 583         switch (reqp->pcr_cmd) {
 584         case PSM_CPU_ADD:
 585                 return (apic_cpu_add(reqp));
 586 
 587         case PSM_CPU_REMOVE:
 588                 return (apic_cpu_remove(reqp));
 589 
 590         case PSM_CPU_STOP:
 591                 return (apic_cpu_stop(reqp->req.cpu_stop.cpuid,
 592                     reqp->req.cpu_stop.ctx));
 593 
 594         default:
 595                 return (ENOTSUP);
 596         }
 597 }
 598 
 599 #ifdef  DEBUG
 600 int     apic_break_on_cpu = 9;
 601 int     apic_stretch_interrupts = 0;
 602 int     apic_stretch_ISR = 1 << 3;        /* IPL of 3 matches nothing now */
 603 #endif /* DEBUG */
 604 
 605 /*
 606  * generates an interprocessor interrupt to another CPU. Any changes made to
 607  * this routine must be accompanied by similar changes to
 608  * apic_common_send_ipi().
 609  */
 610 void
 611 apic_send_ipi(int cpun, int ipl)
 612 {
 613         int vector;
 614         ulong_t flag;
 615 
 616         vector = apic_resv_vector[ipl];
 617 
 618         ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR));
 619 
 620         flag = intr_clear();
 621 
 622         APIC_AV_PENDING_SET();
 623 
 624         apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id,
 625             vector);
 626 
 627         intr_restore(flag);
 628 }
 629 
 630 
 631 /*ARGSUSED*/
 632 void
 633 apic_set_idlecpu(processorid_t cpun)
 634 {
 635 }
 636 
 637 /*ARGSUSED*/
 638 void
 639 apic_unset_idlecpu(processorid_t cpun)
 640 {
 641 }
 642 
 643 
 644 void
 645 apic_ret()
 646 {
 647 }
 648 
 649 /*
 650  * If apic_coarse_time == 1, then apic_gettime() is used instead of
 651  * apic_gethrtime().  This is used for performance instead of accuracy.
 652  */
 653 
 654 hrtime_t
 655 apic_gettime()
 656 {
 657         int old_hrtime_stamp;
 658         hrtime_t temp;
 659 
 660         /*
 661          * In one-shot mode, we do not keep time, so if anyone
 662          * calls psm_gettime() directly, we vector over to
 663          * gethrtime().
 664          * one-shot mode MUST NOT be enabled if this psm is the source of
 665          * hrtime.
 666          */
 667 
 668         if (apic_oneshot)
 669                 return (gethrtime());
 670 
 671 
 672 gettime_again:
 673         while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
 674                 apic_ret();
 675 
 676         temp = apic_nsec_since_boot;
 677 
 678         if (apic_hrtime_stamp != old_hrtime_stamp) {    /* got an interrupt */
 679                 goto gettime_again;
 680         }
 681         return (temp);
 682 }
 683 
 684 /*
 685  * Here we return the number of nanoseconds since booting.  Note every
 686  * clock interrupt increments apic_nsec_since_boot by the appropriate
 687  * amount.
 688  */
 689 hrtime_t
 690 apic_gethrtime(void)
 691 {
 692         int curr_timeval, countval, elapsed_ticks;
 693         int old_hrtime_stamp, status;
 694         hrtime_t temp;
 695         uint32_t cpun;
 696         ulong_t oflags;
 697 
 698         /*
 699          * In one-shot mode, we do not keep time, so if anyone
 700          * calls psm_gethrtime() directly, we vector over to
 701          * gethrtime().
 702          * one-shot mode MUST NOT be enabled if this psm is the source of
 703          * hrtime.
 704          */
 705 
 706         if (apic_oneshot)
 707                 return (gethrtime());
 708 
 709         oflags = intr_clear();  /* prevent migration */
 710 
 711         cpun = apic_reg_ops->apic_read(APIC_LID_REG);
 712         if (apic_mode == LOCAL_APIC)
 713                 cpun >>= APIC_ID_BIT_OFFSET;
 714 
 715         lock_set(&apic_gethrtime_lock);
 716 
 717 gethrtime_again:
 718         while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
 719                 apic_ret();
 720 
 721         /*
 722          * Check to see which CPU we are on.  Note the time is kept on
 723          * the local APIC of CPU 0.  If on CPU 0, simply read the current
 724          * counter.  If on another CPU, issue a remote read command to CPU 0.
 725          */
 726         if (cpun == apic_cpus[0].aci_local_id) {
 727                 countval = apic_reg_ops->apic_read(APIC_CURR_COUNT);
 728         } else {
 729 #ifdef  DEBUG
 730                 APIC_AV_PENDING_SET();
 731 #else
 732                 if (apic_mode == LOCAL_APIC)
 733                         APIC_AV_PENDING_SET();
 734 #endif /* DEBUG */
 735 
 736                 apic_reg_ops->apic_write_int_cmd(
 737                     apic_cpus[0].aci_local_id, APIC_CURR_ADD | AV_REMOTE);
 738 
 739                 while ((status = apic_reg_ops->apic_read(APIC_INT_CMD1))
 740                     & AV_READ_PENDING) {
 741                         apic_ret();
 742                 }
 743 
 744                 if (status & AV_REMOTE_STATUS)      /* 1 = valid */
 745                         countval = apic_reg_ops->apic_read(APIC_REMOTE_READ);
 746                 else {  /* 0 = invalid */
 747                         apic_remote_hrterr++;
 748                         /*
 749                          * return last hrtime right now, will need more
 750                          * testing if change to retry
 751                          */
 752                         temp = apic_last_hrtime;
 753 
 754                         lock_clear(&apic_gethrtime_lock);
 755 
 756                         intr_restore(oflags);
 757 
 758                         return (temp);
 759                 }
 760         }
 761         if (countval > last_count_read)
 762                 countval = 0;
 763         else
 764                 last_count_read = countval;
 765 
 766         elapsed_ticks = apic_hertz_count - countval;
 767 
 768         curr_timeval = APIC_TICKS_TO_NSECS(elapsed_ticks);
 769         temp = apic_nsec_since_boot + curr_timeval;
 770 
 771         if (apic_hrtime_stamp != old_hrtime_stamp) {    /* got an interrupt */
 772                 /* we might have clobbered last_count_read. Restore it */
 773                 last_count_read = apic_hertz_count;
 774                 goto gethrtime_again;
 775         }
 776 
 777         if (temp < apic_last_hrtime) {
 778                 /* return last hrtime if error occurs */
 779                 apic_hrtime_error++;
 780                 temp = apic_last_hrtime;
 781         }
 782         else
 783                 apic_last_hrtime = temp;
 784 
 785         lock_clear(&apic_gethrtime_lock);
 786         intr_restore(oflags);
 787 
 788         return (temp);
 789 }
 790 
 791 /* apic NMI handler */
 792 /*ARGSUSED*/
 793 void
 794 apic_nmi_intr(caddr_t arg, struct regs *rp)
 795 {
 796         if (apic_shutdown_processors) {
 797                 apic_disable_local_apic();
 798                 return;
 799         }
 800 
 801         apic_error |= APIC_ERR_NMI;
 802 
 803         if (!lock_try(&apic_nmi_lock))
 804                 return;
 805         apic_num_nmis++;
 806 
 807         if (apic_kmdb_on_nmi && psm_debugger()) {
 808                 debug_enter("NMI received: entering kmdb\n");
 809         } else if (apic_panic_on_nmi) {
 810                 /* Keep panic from entering kmdb. */
 811                 nopanicdebug = 1;
 812                 panic("NMI received\n");
 813         } else {
 814                 /*
 815                  * prom_printf is the best shot we have of something which is
 816                  * problem free from high level/NMI type of interrupts
 817                  */
 818                 prom_printf("NMI received\n");
 819         }
 820 
 821         lock_clear(&apic_nmi_lock);
 822 }
 823 
 824 processorid_t
 825 apic_get_next_processorid(processorid_t cpu_id)
 826 {
 827 
 828         int i;
 829 
 830         if (cpu_id == -1)
 831                 return ((processorid_t)0);
 832 
 833         for (i = cpu_id + 1; i < NCPU; i++) {
 834                 if (apic_cpu_in_range(i))
 835                         return (i);
 836         }
 837 
 838         return ((processorid_t)-1);
 839 }
 840 
 841 int
 842 apic_cpu_add(psm_cpu_request_t *reqp)
 843 {
 844         int i, rv = 0;
 845         ulong_t iflag;
 846         boolean_t first = B_TRUE;
 847         uchar_t localver;
 848         uint32_t localid, procid;
 849         processorid_t cpuid = (processorid_t)-1;
 850         mach_cpu_add_arg_t *ap;
 851 
 852         ASSERT(reqp != NULL);
 853         reqp->req.cpu_add.cpuid = (processorid_t)-1;
 854 
 855         /* Check whether CPU hotplug is supported. */
 856         if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
 857                 return (ENOTSUP);
 858         }
 859 
 860         ap = (mach_cpu_add_arg_t *)reqp->req.cpu_add.argp;
 861         switch (ap->type) {
 862         case MACH_CPU_ARG_LOCAL_APIC:
 863                 localid = ap->arg.apic.apic_id;
 864                 procid = ap->arg.apic.proc_id;
 865                 if (localid >= 255 || procid > 255) {
 866                         cmn_err(CE_WARN,
 867                             "!apic: apicid(%u) or procid(%u) is invalid.",
 868                             localid, procid);
 869                         return (EINVAL);
 870                 }
 871                 break;
 872 
 873         case MACH_CPU_ARG_LOCAL_X2APIC:
 874                 localid = ap->arg.apic.apic_id;
 875                 procid = ap->arg.apic.proc_id;
 876                 if (localid >= UINT32_MAX) {
 877                         cmn_err(CE_WARN,
 878                             "!apic: x2apicid(%u) is invalid.", localid);
 879                         return (EINVAL);
 880                 } else if (localid >= 255 && apic_mode == LOCAL_APIC) {
 881                         cmn_err(CE_WARN, "!apic: system is in APIC mode, "
 882                             "can't support x2APIC processor.");
 883                         return (ENOTSUP);
 884                 }
 885                 break;
 886 
 887         default:
 888                 cmn_err(CE_WARN,
 889                     "!apic: unknown argument type %d to apic_cpu_add().",
 890                     ap->type);
 891                 return (EINVAL);
 892         }
 893 
 894         /* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */
 895         iflag = intr_clear();
 896         lock_set(&apic_ioapic_lock);
 897 
 898         /* Check whether local APIC id already exists. */
 899         for (i = 0; i < apic_nproc; i++) {
 900                 if (!CPU_IN_SET(apic_cpumask, i))
 901                         continue;
 902                 if (apic_cpus[i].aci_local_id == localid) {
 903                         lock_clear(&apic_ioapic_lock);
 904                         intr_restore(iflag);
 905                         cmn_err(CE_WARN,
 906                             "!apic: local apic id %u already exists.",
 907                             localid);
 908                         return (EEXIST);
 909                 } else if (apic_cpus[i].aci_processor_id == procid) {
 910                         lock_clear(&apic_ioapic_lock);
 911                         intr_restore(iflag);
 912                         cmn_err(CE_WARN,
 913                             "!apic: processor id %u already exists.",
 914                             (int)procid);
 915                         return (EEXIST);
 916                 }
 917 
 918                 /*
 919                  * There's no local APIC version number available in MADT table,
 920                  * so assume that all CPUs are homogeneous and use local APIC
 921                  * version number of the first existing CPU.
 922                  */
 923                 if (first) {
 924                         first = B_FALSE;
 925                         localver = apic_cpus[i].aci_local_ver;
 926                 }
 927         }
 928         ASSERT(first == B_FALSE);
 929 
 930         /*
 931          * Try to assign the same cpuid if APIC id exists in the dirty cache.
 932          */
 933         for (i = 0; i < apic_max_nproc; i++) {
 934                 if (CPU_IN_SET(apic_cpumask, i)) {
 935                         ASSERT((apic_cpus[i].aci_status & APIC_CPU_FREE) == 0);
 936                         continue;
 937                 }
 938                 ASSERT(apic_cpus[i].aci_status & APIC_CPU_FREE);
 939                 if ((apic_cpus[i].aci_status & APIC_CPU_DIRTY) &&
 940                     apic_cpus[i].aci_local_id == localid &&
 941                     apic_cpus[i].aci_processor_id == procid) {
 942                         cpuid = i;
 943                         break;
 944                 }
 945         }
 946 
 947         /* Avoid the dirty cache and allocate fresh slot if possible. */
 948         if (cpuid == (processorid_t)-1) {
 949                 for (i = 0; i < apic_max_nproc; i++) {
 950                         if ((apic_cpus[i].aci_status & APIC_CPU_FREE) &&
 951                             (apic_cpus[i].aci_status & APIC_CPU_DIRTY) == 0) {
 952                                 cpuid = i;
 953                                 break;
 954                         }
 955                 }
 956         }
 957 
 958         /* Try to find any free slot as last resort. */
 959         if (cpuid == (processorid_t)-1) {
 960                 for (i = 0; i < apic_max_nproc; i++) {
 961                         if (apic_cpus[i].aci_status & APIC_CPU_FREE) {
 962                                 cpuid = i;
 963                                 break;
 964                         }
 965                 }
 966         }
 967 
 968         if (cpuid == (processorid_t)-1) {
 969                 lock_clear(&apic_ioapic_lock);
 970                 intr_restore(iflag);
 971                 cmn_err(CE_NOTE,
 972                     "!apic: failed to allocate cpu id for processor %u.",
 973                     procid);
 974                 rv = EAGAIN;
 975         } else if (ACPI_FAILURE(acpica_map_cpu(cpuid, procid))) {
 976                 lock_clear(&apic_ioapic_lock);
 977                 intr_restore(iflag);
 978                 cmn_err(CE_NOTE,
 979                     "!apic: failed to build mapping for processor %u.",
 980                     procid);
 981                 rv = EBUSY;
 982         } else {
 983                 ASSERT(cpuid >= 0 && cpuid < NCPU);
 984                 ASSERT(cpuid < apic_max_nproc && cpuid < max_ncpus);
 985                 bzero(&apic_cpus[cpuid], sizeof (apic_cpus[0]));
 986                 apic_cpus[cpuid].aci_processor_id = procid;
 987                 apic_cpus[cpuid].aci_local_id = localid;
 988                 apic_cpus[cpuid].aci_local_ver = localver;
 989                 CPUSET_ATOMIC_ADD(apic_cpumask, cpuid);
 990                 if (cpuid >= apic_nproc) {
 991                         apic_nproc = cpuid + 1;
 992                 }
 993                 lock_clear(&apic_ioapic_lock);
 994                 intr_restore(iflag);
 995                 reqp->req.cpu_add.cpuid = cpuid;
 996         }
 997 
 998         return (rv);
 999 }
1000 
1001 int
1002 apic_cpu_remove(psm_cpu_request_t *reqp)
1003 {
1004         int i;
1005         ulong_t iflag;
1006         processorid_t cpuid;
1007 
1008         /* Check whether CPU hotplug is supported. */
1009         if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
1010                 return (ENOTSUP);
1011         }
1012 
1013         cpuid = reqp->req.cpu_remove.cpuid;
1014 
1015         /* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */
1016         iflag = intr_clear();
1017         lock_set(&apic_ioapic_lock);
1018 
1019         if (!apic_cpu_in_range(cpuid)) {
1020                 lock_clear(&apic_ioapic_lock);
1021                 intr_restore(iflag);
1022                 cmn_err(CE_WARN,
1023                     "!apic: cpuid %d doesn't exist in apic_cpus array.",
1024                     cpuid);
1025                 return (ENODEV);
1026         }
1027         ASSERT((apic_cpus[cpuid].aci_status & APIC_CPU_FREE) == 0);
1028 
1029         if (ACPI_FAILURE(acpica_unmap_cpu(cpuid))) {
1030                 lock_clear(&apic_ioapic_lock);
1031                 intr_restore(iflag);
1032                 return (ENOENT);
1033         }
1034 
1035         if (cpuid == apic_nproc - 1) {
1036                 /*
1037                  * We are removing the highest numbered cpuid so we need to
1038                  * find the next highest cpuid as the new value for apic_nproc.
1039                  */
1040                 for (i = apic_nproc; i > 0; i--) {
1041                         if (CPU_IN_SET(apic_cpumask, i - 1)) {
1042                                 apic_nproc = i;
1043                                 break;
1044                         }
1045                 }
1046                 /* at least one CPU left */
1047                 ASSERT(i > 0);
1048         }
1049         CPUSET_ATOMIC_DEL(apic_cpumask, cpuid);
1050         /* mark slot as free and keep it in the dirty cache */
1051         apic_cpus[cpuid].aci_status = APIC_CPU_FREE | APIC_CPU_DIRTY;
1052 
1053         lock_clear(&apic_ioapic_lock);
1054         intr_restore(iflag);
1055 
1056         return (0);
1057 }
1058 
1059 /*
1060  * Return the number of APIC clock ticks elapsed for 8245 to decrement
1061  * (APIC_TIME_COUNT + pit_ticks_adj) ticks.
1062  */
1063 uint_t
1064 apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj)
1065 {
1066         uint8_t         pit_tick_lo;
1067         uint16_t        pit_tick, target_pit_tick;
1068         uint32_t        start_apic_tick, end_apic_tick;
1069         ulong_t         iflag;
1070         uint32_t        reg;
1071 
1072         reg = addr + APIC_CURR_COUNT - apicadr;
1073 
1074         iflag = intr_clear();
1075 
1076         do {
1077                 pit_tick_lo = inb(PITCTR0_PORT);
1078                 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1079         } while (pit_tick < APIC_TIME_MIN ||
1080             pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX);
1081 
1082         /*
1083          * Wait for the 8254 to decrement by 5 ticks to ensure
1084          * we didn't start in the middle of a tick.
1085          * Compare with 0x10 for the wrap around case.
1086          */
1087         target_pit_tick = pit_tick - 5;
1088         do {
1089                 pit_tick_lo = inb(PITCTR0_PORT);
1090                 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1091         } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1092 
1093         start_apic_tick = apic_reg_ops->apic_read(reg);
1094 
1095         /*
1096          * Wait for the 8254 to decrement by
1097          * (APIC_TIME_COUNT + pit_ticks_adj) ticks
1098          */
1099         target_pit_tick = pit_tick - APIC_TIME_COUNT;
1100         do {
1101                 pit_tick_lo = inb(PITCTR0_PORT);
1102                 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1103         } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1104 
1105         end_apic_tick = apic_reg_ops->apic_read(reg);
1106 
1107         *pit_ticks_adj = target_pit_tick - pit_tick;
1108 
1109         intr_restore(iflag);
1110 
1111         return (start_apic_tick - end_apic_tick);
1112 }
1113 
1114 /*
1115  * Initialise the APIC timer on the local APIC of CPU 0 to the desired
1116  * frequency.  Note at this stage in the boot sequence, the boot processor
1117  * is the only active processor.
1118  * hertz value of 0 indicates a one-shot mode request.  In this case
1119  * the function returns the resolution (in nanoseconds) for the hardware
1120  * timer interrupt.  If one-shot mode capability is not available,
1121  * the return value will be 0. apic_enable_oneshot is a global switch
1122  * for disabling the functionality.
1123  * A non-zero positive value for hertz indicates a periodic mode request.
1124  * In this case the hardware will be programmed to generate clock interrupts
1125  * at hertz frequency and returns the resolution of interrupts in
1126  * nanosecond.
1127  */
1128 
1129 int
1130 apic_clkinit(int hertz)
1131 {
1132         int             ret;
1133 
1134         apic_int_busy_mark = (apic_int_busy_mark *
1135             apic_sample_factor_redistribution) / 100;
1136         apic_int_free_mark = (apic_int_free_mark *
1137             apic_sample_factor_redistribution) / 100;
1138         apic_diff_for_redistribution = (apic_diff_for_redistribution *
1139             apic_sample_factor_redistribution) / 100;
1140 
1141         ret = apic_timer_init(hertz);
1142         return (ret);
1143 
1144 }
1145 
1146 /*
1147  * apic_preshutdown:
1148  * Called early in shutdown whilst we can still access filesystems to do
1149  * things like loading modules which will be required to complete shutdown
1150  * after filesystems are all unmounted.
1151  */
1152 void
1153 apic_preshutdown(int cmd, int fcn)
1154 {
1155         APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n",
1156             cmd, fcn, apic_poweroff_method, apic_enable_acpi));
1157 }
1158 
1159 void
1160 apic_shutdown(int cmd, int fcn)
1161 {
1162         int restarts, attempts;
1163         int i;
1164         uchar_t byte;
1165         ulong_t iflag;
1166 
1167         hpet_acpi_fini();
1168 
1169         /* Send NMI to all CPUs except self to do per processor shutdown */
1170         iflag = intr_clear();
1171 #ifdef  DEBUG
1172         APIC_AV_PENDING_SET();
1173 #else
1174         if (apic_mode == LOCAL_APIC)
1175                 APIC_AV_PENDING_SET();
1176 #endif /* DEBUG */
1177         apic_shutdown_processors = 1;
1178         apic_reg_ops->apic_write(APIC_INT_CMD1,
1179             AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF);
1180 
1181         /* restore cmos shutdown byte before reboot */
1182         if (apic_cmos_ssb_set) {
1183                 outb(CMOS_ADDR, SSB);
1184                 outb(CMOS_DATA, 0);
1185         }
1186 
1187         ioapic_disable_redirection();
1188 
1189         /*      disable apic mode if imcr present       */
1190         if (apic_imcrp) {
1191                 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
1192                 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC);
1193         }
1194 
1195         apic_disable_local_apic();
1196 
1197         intr_restore(iflag);
1198 
1199         /* remainder of function is for shutdown cases only */
1200         if (cmd != A_SHUTDOWN)
1201                 return;
1202 
1203         /*
1204          * Switch system back into Legacy-Mode if using ACPI and
1205          * not powering-off.  Some BIOSes need to remain in ACPI-mode
1206          * for power-off to succeed (Dell Dimension 4600)
1207          * Do not disable ACPI while doing fastreboot
1208          */
1209         if (apic_enable_acpi && fcn != AD_POWEROFF && fcn != AD_FASTREBOOT)
1210                 (void) AcpiDisable();
1211 
1212         if (fcn == AD_FASTREBOOT) {
1213                 apic_reg_ops->apic_write(APIC_INT_CMD1,
1214                     AV_ASSERT | AV_RESET | AV_SH_ALL_EXCSELF);
1215         }
1216 
1217         /* remainder of function is for shutdown+poweroff case only */
1218         if (fcn != AD_POWEROFF)
1219                 return;
1220 
1221         switch (apic_poweroff_method) {
1222                 case APIC_POWEROFF_VIA_RTC:
1223 
1224                         /* select the extended NVRAM bank in the RTC */
1225                         outb(CMOS_ADDR, RTC_REGA);
1226                         byte = inb(CMOS_DATA);
1227                         outb(CMOS_DATA, (byte | EXT_BANK));
1228 
1229                         outb(CMOS_ADDR, PFR_REG);
1230 
1231                         /* for Predator must toggle the PAB bit */
1232                         byte = inb(CMOS_DATA);
1233 
1234                         /*
1235                          * clear power active bar, wakeup alarm and
1236                          * kickstart
1237                          */
1238                         byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG);
1239                         outb(CMOS_DATA, byte);
1240 
1241                         /* delay before next write */
1242                         drv_usecwait(1000);
1243 
1244                         /* for S40 the following would suffice */
1245                         byte = inb(CMOS_DATA);
1246 
1247                         /* power active bar control bit */
1248                         byte |= PAB_CBIT;
1249                         outb(CMOS_DATA, byte);
1250 
1251                         break;
1252 
1253                 case APIC_POWEROFF_VIA_ASPEN_BMC:
1254                         restarts = 0;
1255 restart_aspen_bmc:
1256                         if (++restarts == 3)
1257                                 break;
1258                         attempts = 0;
1259                         do {
1260                                 byte = inb(MISMIC_FLAG_REGISTER);
1261                                 byte &= MISMIC_BUSY_MASK;
1262                                 if (byte != 0) {
1263                                         drv_usecwait(1000);
1264                                         if (attempts >= 3)
1265                                                 goto restart_aspen_bmc;
1266                                         ++attempts;
1267                                 }
1268                         } while (byte != 0);
1269                         outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS);
1270                         byte = inb(MISMIC_FLAG_REGISTER);
1271                         byte |= 0x1;
1272                         outb(MISMIC_FLAG_REGISTER, byte);
1273                         i = 0;
1274                         for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0]));
1275                             i++) {
1276                                 attempts = 0;
1277                                 do {
1278                                         byte = inb(MISMIC_FLAG_REGISTER);
1279                                         byte &= MISMIC_BUSY_MASK;
1280                                         if (byte != 0) {
1281                                                 drv_usecwait(1000);
1282                                                 if (attempts >= 3)
1283                                                         goto restart_aspen_bmc;
1284                                                 ++attempts;
1285                                         }
1286                                 } while (byte != 0);
1287                                 outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl);
1288                                 outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data);
1289                                 byte = inb(MISMIC_FLAG_REGISTER);
1290                                 byte |= 0x1;
1291                                 outb(MISMIC_FLAG_REGISTER, byte);
1292                         }
1293                         break;
1294 
1295                 case APIC_POWEROFF_VIA_SITKA_BMC:
1296                         restarts = 0;
1297 restart_sitka_bmc:
1298                         if (++restarts == 3)
1299                                 break;
1300                         attempts = 0;
1301                         do {
1302                                 byte = inb(SMS_STATUS_REGISTER);
1303                                 byte &= SMS_STATE_MASK;
1304                                 if ((byte == SMS_READ_STATE) ||
1305                                     (byte == SMS_WRITE_STATE)) {
1306                                         drv_usecwait(1000);
1307                                         if (attempts >= 3)
1308                                                 goto restart_sitka_bmc;
1309                                         ++attempts;
1310                                 }
1311                         } while ((byte == SMS_READ_STATE) ||
1312                             (byte == SMS_WRITE_STATE));
1313                         outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS);
1314                         i = 0;
1315                         for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0]));
1316                             i++) {
1317                                 attempts = 0;
1318                                 do {
1319                                         byte = inb(SMS_STATUS_REGISTER);
1320                                         byte &= SMS_IBF_MASK;
1321                                         if (byte != 0) {
1322                                                 drv_usecwait(1000);
1323                                                 if (attempts >= 3)
1324                                                         goto restart_sitka_bmc;
1325                                                 ++attempts;
1326                                         }
1327                                 } while (byte != 0);
1328                                 outb(sitka_bmc[i].port, sitka_bmc[i].data);
1329                         }
1330                         break;
1331 
1332                 case APIC_POWEROFF_NONE:
1333 
1334                         /* If no APIC direct method, we will try using ACPI */
1335                         if (apic_enable_acpi) {
1336                                 if (acpi_poweroff() == 1)
1337                                         return;
1338                         } else
1339                                 return;
1340 
1341                         break;
1342         }
1343         /*
1344          * Wait a limited time here for power to go off.
1345          * If the power does not go off, then there was a
1346          * problem and we should continue to the halt which
1347          * prints a message for the user to press a key to
1348          * reboot.
1349          */
1350         drv_usecwait(7000000); /* wait seven seconds */
1351 
1352 }
1353 
1354 ddi_periodic_t apic_periodic_id;
1355 
1356 /*
1357  * The following functions are in the platform specific file so that they
1358  * can be different functions depending on whether we are running on
1359  * bare metal or a hypervisor.
1360  */
1361 
1362 /*
1363  * map an apic for memory-mapped access
1364  */
1365 uint32_t *
1366 mapin_apic(uint32_t addr, size_t len, int flags)
1367 {
1368         return ((void *)psm_map_phys(addr, len, flags));
1369 }
1370 
1371 uint32_t *
1372 mapin_ioapic(uint32_t addr, size_t len, int flags)
1373 {
1374         return (mapin_apic(addr, len, flags));
1375 }
1376 
1377 /*
1378  * unmap an apic
1379  */
1380 void
1381 mapout_apic(caddr_t addr, size_t len)
1382 {
1383         psm_unmap_phys(addr, len);
1384 }
1385 
1386 void
1387 mapout_ioapic(caddr_t addr, size_t len)
1388 {
1389         mapout_apic(addr, len);
1390 }
1391 
1392 uint32_t
1393 ioapic_read(int ioapic_ix, uint32_t reg)
1394 {
1395         volatile uint32_t *ioapic;
1396 
1397         ioapic = apicioadr[ioapic_ix];
1398         ioapic[APIC_IO_REG] = reg;
1399         return (ioapic[APIC_IO_DATA]);
1400 }
1401 
1402 void
1403 ioapic_write(int ioapic_ix, uint32_t reg, uint32_t value)
1404 {
1405         volatile uint32_t *ioapic;
1406 
1407         ioapic = apicioadr[ioapic_ix];
1408         ioapic[APIC_IO_REG] = reg;
1409         ioapic[APIC_IO_DATA] = value;
1410 }
1411 
1412 void
1413 ioapic_write_eoi(int ioapic_ix, uint32_t value)
1414 {
1415         volatile uint32_t *ioapic;
1416 
1417         ioapic = apicioadr[ioapic_ix];
1418         ioapic[APIC_IO_EOI] = value;
1419 }
1420 
1421 /*
1422  * Round-robin algorithm to find the next CPU with interrupts enabled.
1423  * It can't share the same static variable apic_next_bind_cpu with
1424  * apic_get_next_bind_cpu(), since that will cause all interrupts to be
1425  * bound to CPU1 at boot time.  During boot, only CPU0 is online with
1426  * interrupts enabled when apic_get_next_bind_cpu() and apic_find_cpu()
1427  * are called.  However, the pcplusmp driver assumes that there will be
1428  * boot_ncpus CPUs configured eventually so it tries to distribute all
1429  * interrupts among CPU0 - CPU[boot_ncpus - 1].  Thus to prevent all
1430  * interrupts being targetted at CPU1, we need to use a dedicated static
1431  * variable for find_next_cpu() instead of sharing apic_next_bind_cpu.
1432  */
1433 
1434 processorid_t
1435 apic_find_cpu(int flag)
1436 {
1437         int i;
1438         static processorid_t acid = 0;
1439 
1440         /* Find the first CPU with the passed-in flag set */
1441         for (i = 0; i < apic_nproc; i++) {
1442                 if (++acid >= apic_nproc) {
1443                         acid = 0;
1444                 }
1445                 if (apic_cpu_in_range(acid) &&
1446                     (apic_cpus[acid].aci_status & flag)) {
1447                         break;
1448                 }
1449         }
1450 
1451         ASSERT((apic_cpus[acid].aci_status & flag) != 0);
1452         return (acid);
1453 }
1454 
1455 /*
1456  * Switch between safe and x2APIC IPI sending method.
1457  * CPU may power on in xapic mode or x2apic mode. If CPU needs to send IPI to
1458  * other CPUs before entering x2APIC mode, it still needs to xAPIC method.
1459  * Before sending StartIPI to target CPU, psm_send_ipi will be changed to
1460  * apic_common_send_ipi, which detects current local APIC mode and use right
1461  * method to send IPI. If some CPUs fail to start up, apic_poweron_cnt
1462  * won't return to zero, so apic_common_send_ipi will always be used.
1463  * psm_send_ipi can't be simply changed back to x2apic_send_ipi if some CPUs
1464  * failed to start up because those failed CPUs may recover itself later at
1465  * unpredictable time.
1466  */
1467 void
1468 apic_switch_ipi_callback(boolean_t enter)
1469 {
1470         ulong_t iflag;
1471         struct psm_ops *pops = psmops;
1472 
1473         iflag = intr_clear();
1474         lock_set(&apic_mode_switch_lock);
1475         if (enter) {
1476                 ASSERT(apic_poweron_cnt >= 0);
1477                 if (apic_poweron_cnt == 0) {
1478                         pops->psm_send_ipi = apic_common_send_ipi;
1479                         send_dirintf = pops->psm_send_ipi;
1480                 }
1481                 apic_poweron_cnt++;
1482         } else {
1483                 ASSERT(apic_poweron_cnt > 0);
1484                 apic_poweron_cnt--;
1485                 if (apic_poweron_cnt == 0) {
1486                         pops->psm_send_ipi = x2apic_send_ipi;
1487                         send_dirintf = pops->psm_send_ipi;
1488                 }
1489         }
1490         lock_clear(&apic_mode_switch_lock);
1491         intr_restore(iflag);
1492 }
1493 
1494 void
1495 apic_intrmap_init(int apic_mode)
1496 {
1497         int suppress_brdcst_eoi = 0;
1498 
1499         if (psm_vt_ops != NULL) {
1500                 /*
1501                  * Since X2APIC requires the use of interrupt remapping
1502                  * (though this is not documented explicitly in the Intel
1503                  * documentation (yet)), initialize interrupt remapping
1504                  * support before initializing the X2APIC unit.
1505                  */
1506                 if (((apic_intrmap_ops_t *)psm_vt_ops)->
1507                     apic_intrmap_init(apic_mode) == DDI_SUCCESS) {
1508 
1509                         apic_vt_ops = psm_vt_ops;
1510 
1511                         /*
1512                          * We leverage the interrupt remapping engine to
1513                          * suppress broadcast EOI; thus we must send the
1514                          * directed EOI with the directed-EOI handler.
1515                          */
1516                         if (apic_directed_EOI_supported() == 0) {
1517                                 suppress_brdcst_eoi = 1;
1518                         }
1519 
1520                         apic_vt_ops->apic_intrmap_enable(suppress_brdcst_eoi);
1521 
1522                         if (apic_detect_x2apic()) {
1523                                 apic_enable_x2apic();
1524                         }
1525 
1526                         if (apic_directed_EOI_supported() == 0) {
1527                                 apic_set_directed_EOI_handler();
1528                         }
1529                 }
1530         }
1531 }
1532 
1533 /*ARGSUSED*/
1534 static void
1535 apic_record_ioapic_rdt(void *intrmap_private, ioapic_rdt_t *irdt)
1536 {
1537         irdt->ir_hi <<= APIC_ID_BIT_OFFSET;
1538 }
1539 
1540 /*ARGSUSED*/
1541 static void
1542 apic_record_msi(void *intrmap_private, msi_regs_t *mregs)
1543 {
1544         mregs->mr_addr = MSI_ADDR_HDR |
1545             (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
1546             (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) |
1547             (mregs->mr_addr << MSI_ADDR_DEST_SHIFT);
1548         mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) |
1549             mregs->mr_data;
1550 }
1551 
1552 /*
1553  * Functions from apic_introp.c
1554  *
1555  * Those functions are used by apic_intr_ops().
1556  */
1557 
1558 /*
1559  * MSI support flag:
1560  * reflects whether MSI is supported at APIC level
1561  * it can also be patched through /etc/system
1562  *
1563  *  0 = default value - don't know and need to call apic_check_msi_support()
1564  *      to find out then set it accordingly
1565  *  1 = supported
1566  * -1 = not supported
1567  */
1568 int     apic_support_msi = 0;
1569 
1570 /* Multiple vector support for MSI-X */
1571 int     apic_msix_enable = 1;
1572 
1573 /* Multiple vector support for MSI */
1574 int     apic_multi_msi_enable = 1;
1575 
1576 /*
1577  * check whether the system supports MSI
1578  *
1579  * If PCI-E capability is found, then this must be a PCI-E system.
1580  * Since MSI is required for PCI-E system, it returns PSM_SUCCESS
1581  * to indicate this system supports MSI.
1582  */
1583 int
1584 apic_check_msi_support()
1585 {
1586         dev_info_t *cdip;
1587         char dev_type[16];
1588         int dev_len;
1589 
1590         DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n"));
1591 
1592         /*
1593          * check whether the first level children of root_node have
1594          * PCI-E capability
1595          */
1596         for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL;
1597             cdip = ddi_get_next_sibling(cdip)) {
1598 
1599                 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p,"
1600                     " driver: %s, binding: %s, nodename: %s\n", (void *)cdip,
1601                     ddi_driver_name(cdip), ddi_binding_name(cdip),
1602                     ddi_node_name(cdip)));
1603                 dev_len = sizeof (dev_type);
1604                 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
1605                     "device_type", (caddr_t)dev_type, &dev_len)
1606                     != DDI_PROP_SUCCESS)
1607                         continue;
1608                 if (strcmp(dev_type, "pciex") == 0)
1609                         return (PSM_SUCCESS);
1610         }
1611 
1612         /* MSI is not supported on this system */
1613         DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' "
1614             "device_type found\n"));
1615         return (PSM_FAILURE);
1616 }
1617 
1618 /*
1619  * apic_pci_msi_unconfigure:
1620  *
1621  * This and next two interfaces are copied from pci_intr_lib.c
1622  * Do ensure that these two files stay in sync.
1623  * These needed to be copied over here to avoid a deadlock situation on
1624  * certain mp systems that use MSI interrupts.
1625  *
1626  * IMPORTANT regards next three interfaces:
1627  * i) are called only for MSI/X interrupts.
1628  * ii) called with interrupts disabled, and must not block
1629  */
1630 void
1631 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum)
1632 {
1633         ushort_t                msi_ctrl;
1634         int                     cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
1635         ddi_acc_handle_t        handle = i_ddi_get_pci_config_handle(rdip);
1636 
1637         ASSERT((handle != NULL) && (cap_ptr != 0));
1638 
1639         if (type == DDI_INTR_TYPE_MSI) {
1640                 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1641                 msi_ctrl &= (~PCI_MSI_MME_MASK);
1642                 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
1643                 pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0);
1644 
1645                 if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
1646                         pci_config_put16(handle,
1647                             cap_ptr + PCI_MSI_64BIT_DATA, 0);
1648                         pci_config_put32(handle,
1649                             cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0);
1650                 } else {
1651                         pci_config_put16(handle,
1652                             cap_ptr + PCI_MSI_32BIT_DATA, 0);
1653                 }
1654 
1655         } else if (type == DDI_INTR_TYPE_MSIX) {
1656                 uintptr_t       off;
1657                 uint32_t        mask;
1658                 ddi_intr_msix_t *msix_p = i_ddi_get_msix(rdip);
1659 
1660                 ASSERT(msix_p != NULL);
1661 
1662                 /* Offset into "inum"th entry in the MSI-X table & mask it */
1663                 off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
1664                     PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
1665 
1666                 mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
1667 
1668                 ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask | 1));
1669 
1670                 /* Offset into the "inum"th entry in the MSI-X table */
1671                 off = (uintptr_t)msix_p->msix_tbl_addr +
1672                     (inum * PCI_MSIX_VECTOR_SIZE);
1673 
1674                 /* Reset the "data" and "addr" bits */
1675                 ddi_put32(msix_p->msix_tbl_hdl,
1676                     (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0);
1677                 ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0);
1678         }
1679 }
1680 
1681 /*
1682  * apic_pci_msi_disable_mode:
1683  */
1684 void
1685 apic_pci_msi_disable_mode(dev_info_t *rdip, int type)
1686 {
1687         ushort_t                msi_ctrl;
1688         int                     cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
1689         ddi_acc_handle_t        handle = i_ddi_get_pci_config_handle(rdip);
1690 
1691         ASSERT((handle != NULL) && (cap_ptr != 0));
1692 
1693         if (type == DDI_INTR_TYPE_MSI) {
1694                 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1695                 if (!(msi_ctrl & PCI_MSI_ENABLE_BIT))
1696                         return;
1697 
1698                 msi_ctrl &= ~PCI_MSI_ENABLE_BIT;    /* MSI disable */
1699                 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
1700 
1701         } else if (type == DDI_INTR_TYPE_MSIX) {
1702                 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
1703                 if (msi_ctrl & PCI_MSIX_ENABLE_BIT) {
1704                         msi_ctrl &= ~PCI_MSIX_ENABLE_BIT;
1705                         pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
1706                             msi_ctrl);
1707                 }
1708         }
1709 }
1710 
1711 uint32_t
1712 apic_get_localapicid(uint32_t cpuid)
1713 {
1714         ASSERT(cpuid < apic_nproc && apic_cpus != NULL);
1715 
1716         return (apic_cpus[cpuid].aci_local_id);
1717 }
1718 
1719 uchar_t
1720 apic_get_ioapicid(uchar_t ioapicindex)
1721 {
1722         ASSERT(ioapicindex < MAX_IO_APIC);
1723 
1724         return (apic_io_id[ioapicindex]);
1725 }