Print this page
    
PANKOVs restructure
    
      
        | Split | Close | 
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/i86pc/io/apix/apix.c
          +++ new/usr/src/uts/i86pc/io/apix/apix.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   */
  25   25  /*
  26   26   * Copyright (c) 2010, Intel Corporation.
  27   27   * All rights reserved.
  28   28   */
  29   29  /*
  30   30   * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
  31   31   */
  32   32  
  33   33  /*
  34   34   * To understand how the apix module interacts with the interrupt subsystem read
  35   35   * the theory statement in uts/i86pc/os/intr.c.
  36   36   */
  37   37  
  38   38  /*
  39   39   * PSMI 1.1 extensions are supported only in 2.6 and later versions.
  40   40   * PSMI 1.2 extensions are supported only in 2.7 and later versions.
  41   41   * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
  42   42   * PSMI 1.5 extensions are supported in Solaris Nevada.
  
    | ↓ open down ↓ | 42 lines elided | ↑ open up ↑ | 
  43   43   * PSMI 1.6 extensions are supported in Solaris Nevada.
  44   44   * PSMI 1.7 extensions are supported in Solaris Nevada.
  45   45   */
  46   46  #define PSMI_1_7
  47   47  
  48   48  #include <sys/processor.h>
  49   49  #include <sys/time.h>
  50   50  #include <sys/psm.h>
  51   51  #include <sys/smp_impldefs.h>
  52   52  #include <sys/cram.h>
  53      -#include <sys/acpi/acpi.h>
       53 +#include <acpica/include/acpi.h>
  54   54  #include <sys/acpica.h>
  55   55  #include <sys/psm_common.h>
  56   56  #include <sys/pit.h>
  57   57  #include <sys/ddi.h>
  58   58  #include <sys/sunddi.h>
  59   59  #include <sys/ddi_impldefs.h>
  60   60  #include <sys/pci.h>
  61   61  #include <sys/promif.h>
  62   62  #include <sys/x86_archext.h>
  63   63  #include <sys/cpc_impl.h>
  64   64  #include <sys/uadmin.h>
  65   65  #include <sys/panic.h>
  66   66  #include <sys/debug.h>
  67   67  #include <sys/archsystm.h>
  68   68  #include <sys/trap.h>
  69   69  #include <sys/machsystm.h>
  70   70  #include <sys/sysmacros.h>
  71   71  #include <sys/cpuvar.h>
  72   72  #include <sys/rm_platter.h>
  73   73  #include <sys/privregs.h>
  74   74  #include <sys/note.h>
  75   75  #include <sys/pci_intr_lib.h>
  76   76  #include <sys/spl.h>
  77   77  #include <sys/clock.h>
  78   78  #include <sys/cyclic.h>
  79   79  #include <sys/dditypes.h>
  80   80  #include <sys/sunddi.h>
  81   81  #include <sys/x_call.h>
  82   82  #include <sys/reboot.h>
  83   83  #include <sys/mach_intr.h>
  84   84  #include <sys/apix.h>
  85   85  #include <sys/apix_irm_impl.h>
  86   86  
  87   87  static int apix_probe();
  88   88  static void apix_init();
  89   89  static void apix_picinit(void);
  90   90  static int apix_intr_enter(int, int *);
  91   91  static void apix_intr_exit(int, int);
  92   92  static void apix_setspl(int);
  93   93  static int apix_disable_intr(processorid_t);
  94   94  static void apix_enable_intr(processorid_t);
  95   95  static int apix_get_clkvect(int);
  96   96  static int apix_get_ipivect(int, int);
  97   97  static void apix_post_cyclic_setup(void *);
  98   98  static int apix_post_cpu_start();
  99   99  static int apix_intr_ops(dev_info_t *, ddi_intr_handle_impl_t *,
 100  100      psm_intr_op_t, int *);
 101  101  
 102  102  /*
 103  103   * Helper functions for apix_intr_ops()
 104  104   */
 105  105  static void apix_redistribute_compute(void);
 106  106  static int apix_get_pending(apix_vector_t *);
 107  107  static apix_vector_t *apix_get_req_vector(ddi_intr_handle_impl_t *, ushort_t);
 108  108  static int apix_get_intr_info(ddi_intr_handle_impl_t *, apic_get_intr_t *);
 109  109  static char *apix_get_apic_type(void);
 110  110  static int apix_intx_get_pending(int);
 111  111  static void apix_intx_set_mask(int irqno);
 112  112  static void apix_intx_clear_mask(int irqno);
 113  113  static int apix_intx_get_shared(int irqno);
 114  114  static void apix_intx_set_shared(int irqno, int delta);
 115  115  static apix_vector_t *apix_intx_xlate_vector(dev_info_t *, int,
 116  116      struct intrspec *);
 117  117  static int apix_intx_alloc_vector(dev_info_t *, int, struct intrspec *);
 118  118  
 119  119  extern int apic_clkinit(int);
 120  120  
 121  121  /* IRM initialization for APIX PSM module */
 122  122  extern void apix_irm_init(void);
 123  123  
 124  124  extern int irm_enable;
 125  125  
 126  126  /*
 127  127   *      Local static data
 128  128   */
 129  129  static struct   psm_ops apix_ops = {
 130  130          apix_probe,
 131  131  
 132  132          apix_init,
 133  133          apix_picinit,
 134  134          apix_intr_enter,
 135  135          apix_intr_exit,
 136  136          apix_setspl,
 137  137          apix_addspl,
 138  138          apix_delspl,
 139  139          apix_disable_intr,
 140  140          apix_enable_intr,
 141  141          NULL,                   /* psm_softlvl_to_irq */
 142  142          NULL,                   /* psm_set_softintr */
 143  143  
 144  144          apic_set_idlecpu,
 145  145          apic_unset_idlecpu,
 146  146  
 147  147          apic_clkinit,
 148  148          apix_get_clkvect,
 149  149          NULL,                   /* psm_hrtimeinit */
 150  150          apic_gethrtime,
 151  151  
 152  152          apic_get_next_processorid,
 153  153          apic_cpu_start,
 154  154          apix_post_cpu_start,
 155  155          apic_shutdown,
 156  156          apix_get_ipivect,
 157  157          apic_send_ipi,
 158  158  
 159  159          NULL,                   /* psm_translate_irq */
 160  160          NULL,                   /* psm_notify_error */
 161  161          NULL,                   /* psm_notify_func */
 162  162          apic_timer_reprogram,
 163  163          apic_timer_enable,
 164  164          apic_timer_disable,
 165  165          apix_post_cyclic_setup,
 166  166          apic_preshutdown,
 167  167          apix_intr_ops,          /* Advanced DDI Interrupt framework */
 168  168          apic_state,             /* save, restore apic state for S3 */
 169  169          apic_cpu_ops,           /* CPU control interface. */
 170  170  };
 171  171  
 172  172  struct psm_ops *psmops = &apix_ops;
 173  173  
 174  174  static struct   psm_info apix_psm_info = {
 175  175          PSM_INFO_VER01_7,                       /* version */
 176  176          PSM_OWN_EXCLUSIVE,                      /* ownership */
 177  177          &apix_ops,                              /* operation */
 178  178          APIX_NAME,                              /* machine name */
 179  179          "apix MPv1.4 compatible",
 180  180  };
 181  181  
 182  182  static void *apix_hdlp;
 183  183  
 184  184  static int apix_is_enabled = 0;
 185  185  
 186  186  /*
 187  187   * Flag to indicate if APIX is to be enabled only for platforms
 188  188   * with specific hw feature(s).
 189  189   */
 190  190  int apix_hw_chk_enable = 1;
 191  191  
 192  192  /*
 193  193   * Hw features that are checked for enabling APIX support.
 194  194   */
 195  195  #define APIX_SUPPORT_X2APIC     0x00000001
 196  196  uint_t apix_supported_hw = APIX_SUPPORT_X2APIC;
 197  197  
 198  198  /*
 199  199   * apix_lock is used for cpu selection and vector re-binding
 200  200   */
 201  201  lock_t apix_lock;
 202  202  apix_impl_t *apixs[NCPU];
 203  203  /*
 204  204   * Mapping between device interrupt and the allocated vector. Indexed
 205  205   * by major number.
 206  206   */
 207  207  apix_dev_vector_t **apix_dev_vector;
 208  208  /*
 209  209   * Mapping between device major number and cpu id. It gets used
 210  210   * when interrupt binding policy round robin with affinity is
 211  211   * applied. With that policy, devices with the same major number
 212  212   * will be bound to the same CPU.
 213  213   */
 214  214  processorid_t *apix_major_to_cpu;       /* major to cpu mapping */
 215  215  kmutex_t apix_mutex;    /* for apix_dev_vector & apix_major_to_cpu */
 216  216  
 217  217  int apix_nipis = 16;    /* Maximum number of IPIs */
 218  218  /*
 219  219   * Maximum number of vectors in a CPU that can be used for interrupt
 220  220   * allocation (including IPIs and the reserved vectors).
 221  221   */
 222  222  int apix_cpu_nvectors = APIX_NVECTOR;
 223  223  
 224  224  /* gcpu.h */
 225  225  
 226  226  extern void apic_do_interrupt(struct regs *rp, trap_trace_rec_t *ttp);
 227  227  extern void apic_change_eoi();
 228  228  
 229  229  /*
 230  230   *      This is the loadable module wrapper
 231  231   */
 232  232  
 233  233  int
 234  234  _init(void)
 235  235  {
 236  236          if (apic_coarse_hrtime)
 237  237                  apix_ops.psm_gethrtime = &apic_gettime;
 238  238          return (psm_mod_init(&apix_hdlp, &apix_psm_info));
 239  239  }
 240  240  
 241  241  int
 242  242  _fini(void)
 243  243  {
 244  244          return (psm_mod_fini(&apix_hdlp, &apix_psm_info));
 245  245  }
 246  246  
 247  247  int
 248  248  _info(struct modinfo *modinfop)
 249  249  {
 250  250          return (psm_mod_info(&apix_hdlp, &apix_psm_info, modinfop));
 251  251  }
 252  252  
 253  253  static int
 254  254  apix_probe()
 255  255  {
 256  256          int rval;
 257  257  
 258  258          if (apix_enable == 0)
 259  259                  return (PSM_FAILURE);
 260  260  
 261  261          /* check for hw features if specified  */
 262  262          if (apix_hw_chk_enable) {
 263  263                  /* check if x2APIC mode is supported */
 264  264                  if ((apix_supported_hw & APIX_SUPPORT_X2APIC) ==
 265  265                      APIX_SUPPORT_X2APIC) {
 266  266                          if (!((apic_local_mode() == LOCAL_X2APIC) ||
 267  267                              apic_detect_x2apic())) {
 268  268                                  /* x2APIC mode is not supported in the hw */
 269  269                                  apix_enable = 0;
 270  270                          }
 271  271                  }
 272  272                  if (apix_enable == 0)
 273  273                          return (PSM_FAILURE);
 274  274          }
 275  275  
 276  276          rval = apic_probe_common(apix_psm_info.p_mach_idstring);
 277  277          if (rval == PSM_SUCCESS)
 278  278                  apix_is_enabled = 1;
 279  279          else
 280  280                  apix_is_enabled = 0;
 281  281          return (rval);
 282  282  }
 283  283  
 284  284  /*
 285  285   * Initialize the data structures needed by pcplusmpx module.
 286  286   * Specifically, the data structures used by addspl() and delspl()
 287  287   * routines.
 288  288   */
 289  289  static void
 290  290  apix_softinit()
 291  291  {
 292  292          int i, *iptr;
 293  293          apix_impl_t *hdlp;
 294  294          int nproc;
 295  295  
 296  296          nproc = max(apic_nproc, apic_max_nproc);
 297  297  
 298  298          hdlp = kmem_zalloc(nproc * sizeof (apix_impl_t), KM_SLEEP);
 299  299          for (i = 0; i < nproc; i++) {
 300  300                  apixs[i] = &hdlp[i];
 301  301                  apixs[i]->x_cpuid = i;
 302  302                  LOCK_INIT_CLEAR(&apixs[i]->x_lock);
 303  303          }
 304  304  
 305  305          /* cpu 0 is always up (for now) */
 306  306          apic_cpus[0].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE;
 307  307  
 308  308          iptr = (int *)&apic_irq_table[0];
 309  309          for (i = 0; i <= APIC_MAX_VECTOR; i++) {
 310  310                  apic_level_intr[i] = 0;
 311  311                  *iptr++ = NULL;
 312  312          }
 313  313          mutex_init(&airq_mutex, NULL, MUTEX_DEFAULT, NULL);
 314  314  
 315  315          apix_dev_vector = kmem_zalloc(sizeof (apix_dev_vector_t *) * devcnt,
 316  316              KM_SLEEP);
 317  317  
 318  318          if (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) {
 319  319                  apix_major_to_cpu = kmem_zalloc(sizeof (int) * devcnt,
 320  320                      KM_SLEEP);
 321  321                  for (i = 0; i < devcnt; i++)
 322  322                          apix_major_to_cpu[i] = IRQ_UNINIT;
 323  323          }
 324  324  
 325  325          mutex_init(&apix_mutex, NULL, MUTEX_DEFAULT, NULL);
 326  326  }
 327  327  
 328  328  static int
 329  329  apix_get_pending_spl(void)
 330  330  {
 331  331          int cpuid = CPU->cpu_id;
 332  332  
 333  333          return (bsrw_insn(apixs[cpuid]->x_intr_pending));
 334  334  }
 335  335  
 336  336  static uintptr_t
 337  337  apix_get_intr_handler(int cpu, short vec)
 338  338  {
 339  339          apix_vector_t *apix_vector;
 340  340  
 341  341          ASSERT(cpu < apic_nproc && vec < APIX_NVECTOR);
 342  342          if (cpu >= apic_nproc)
 343  343                  return (NULL);
 344  344  
 345  345          apix_vector = apixs[cpu]->x_vectbl[vec];
 346  346  
 347  347          return ((uintptr_t)(apix_vector->v_autovect));
 348  348  }
 349  349  
 350  350  #if defined(__amd64)
 351  351  static unsigned char dummy_cpu_pri[MAXIPL + 1] = {
 352  352          0, 0, 0, 0, 0, 0, 0, 0,
 353  353          0, 0, 0, 0, 0, 0, 0, 0, 0
 354  354  };
 355  355  #endif
 356  356  
 357  357  static void
 358  358  apix_init()
 359  359  {
 360  360          extern void (*do_interrupt_common)(struct regs *, trap_trace_rec_t *);
 361  361  
 362  362          APIC_VERBOSE(INIT, (CE_CONT, "apix: psm_softinit\n"));
 363  363  
 364  364          do_interrupt_common = apix_do_interrupt;
 365  365          addintr = apix_add_avintr;
 366  366          remintr = apix_rem_avintr;
 367  367          get_pending_spl = apix_get_pending_spl;
 368  368          get_intr_handler = apix_get_intr_handler;
 369  369          psm_get_localapicid = apic_get_localapicid;
 370  370          psm_get_ioapicid = apic_get_ioapicid;
 371  371  
 372  372          apix_softinit();
 373  373  #if defined(__amd64)
 374  374          /*
 375  375           * Make cpu-specific interrupt info point to cr8pri vector
 376  376           */
 377  377          CPU->cpu_pri_data = dummy_cpu_pri;
 378  378  #else
 379  379          if (cpuid_have_cr8access(CPU))
 380  380                  apic_have_32bit_cr8 = 1;
 381  381  #endif  /* __amd64 */
 382  382  
 383  383          /*
 384  384           * Initialize IRM pool parameters
 385  385           */
 386  386          if (irm_enable) {
 387  387                  int     i;
 388  388                  int     lowest_irq;
 389  389                  int     highest_irq;
 390  390  
 391  391                  /* number of CPUs present */
 392  392                  apix_irminfo.apix_ncpus = apic_nproc;
 393  393                  /* total number of entries in all of the IOAPICs present */
 394  394                  lowest_irq = apic_io_vectbase[0];
 395  395                  highest_irq = apic_io_vectend[0];
 396  396                  for (i = 1; i < apic_io_max; i++) {
 397  397                          if (apic_io_vectbase[i] < lowest_irq)
 398  398                                  lowest_irq = apic_io_vectbase[i];
 399  399                          if (apic_io_vectend[i] > highest_irq)
 400  400                                  highest_irq = apic_io_vectend[i];
 401  401                  }
 402  402                  apix_irminfo.apix_ioapic_max_vectors =
 403  403                      highest_irq - lowest_irq + 1;
 404  404                  /*
 405  405                   * Number of available per-CPU vectors excluding
 406  406                   * reserved vectors for Dtrace, int80, system-call,
 407  407                   * fast-trap, etc.
 408  408                   */
 409  409                  apix_irminfo.apix_per_cpu_vectors = APIX_NAVINTR -
 410  410                      APIX_SW_RESERVED_VECTORS;
 411  411  
 412  412                  /* Number of vectors (pre) allocated (SCI and HPET) */
 413  413                  apix_irminfo.apix_vectors_allocated = 0;
 414  414                  if (apic_hpet_vect != -1)
 415  415                          apix_irminfo.apix_vectors_allocated++;
 416  416                  if (apic_sci_vect != -1)
 417  417                          apix_irminfo.apix_vectors_allocated++;
 418  418          }
 419  419  }
 420  420  
 421  421  static void
 422  422  apix_init_intr()
 423  423  {
 424  424          processorid_t   cpun = psm_get_cpu_id();
 425  425          uint_t nlvt;
 426  426          uint32_t svr = AV_UNIT_ENABLE | APIC_SPUR_INTR;
 427  427          extern void cmi_cmci_trap(void);
 428  428  
 429  429          apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
 430  430  
 431  431          if (apic_mode == LOCAL_APIC) {
 432  432                  /*
 433  433                   * We are running APIC in MMIO mode.
 434  434                   */
 435  435                  if (apic_flat_model) {
 436  436                          apic_reg_ops->apic_write(APIC_FORMAT_REG,
 437  437                              APIC_FLAT_MODEL);
 438  438                  } else {
 439  439                          apic_reg_ops->apic_write(APIC_FORMAT_REG,
 440  440                              APIC_CLUSTER_MODEL);
 441  441                  }
 442  442  
 443  443                  apic_reg_ops->apic_write(APIC_DEST_REG,
 444  444                      AV_HIGH_ORDER >> cpun);
 445  445          }
 446  446  
 447  447          if (apic_directed_EOI_supported()) {
 448  448                  /*
 449  449                   * Setting the 12th bit in the Spurious Interrupt Vector
 450  450                   * Register suppresses broadcast EOIs generated by the local
 451  451                   * APIC. The suppression of broadcast EOIs happens only when
 452  452                   * interrupts are level-triggered.
 453  453                   */
 454  454                  svr |= APIC_SVR_SUPPRESS_BROADCAST_EOI;
 455  455          }
 456  456  
 457  457          /* need to enable APIC before unmasking NMI */
 458  458          apic_reg_ops->apic_write(APIC_SPUR_INT_REG, svr);
 459  459  
 460  460          /*
 461  461           * Presence of an invalid vector with delivery mode AV_FIXED can
 462  462           * cause an error interrupt, even if the entry is masked...so
 463  463           * write a valid vector to LVT entries along with the mask bit
 464  464           */
 465  465  
 466  466          /* All APICs have timer and LINT0/1 */
 467  467          apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK|APIC_RESV_IRQ);
 468  468          apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK|APIC_RESV_IRQ);
 469  469          apic_reg_ops->apic_write(APIC_INT_VECT1, AV_NMI);       /* enable NMI */
 470  470  
 471  471          /*
 472  472           * On integrated APICs, the number of LVT entries is
 473  473           * 'Max LVT entry' + 1; on 82489DX's (non-integrated
 474  474           * APICs), nlvt is "3" (LINT0, LINT1, and timer)
 475  475           */
 476  476  
 477  477          if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
 478  478                  nlvt = 3;
 479  479          } else {
 480  480                  nlvt = ((apic_reg_ops->apic_read(APIC_VERS_REG) >> 16) &
 481  481                      0xFF) + 1;
 482  482          }
 483  483  
 484  484          if (nlvt >= 5) {
 485  485                  /* Enable performance counter overflow interrupt */
 486  486  
 487  487                  if (!is_x86_feature(x86_featureset, X86FSET_MSR))
 488  488                          apic_enable_cpcovf_intr = 0;
 489  489                  if (apic_enable_cpcovf_intr) {
 490  490                          if (apic_cpcovf_vect == 0) {
 491  491                                  int ipl = APIC_PCINT_IPL;
 492  492  
 493  493                                  apic_cpcovf_vect = apix_get_ipivect(ipl, -1);
 494  494                                  ASSERT(apic_cpcovf_vect);
 495  495  
 496  496                                  (void) add_avintr(NULL, ipl,
 497  497                                      (avfunc)kcpc_hw_overflow_intr,
 498  498                                      "apic pcint", apic_cpcovf_vect,
 499  499                                      NULL, NULL, NULL, NULL);
 500  500                                  kcpc_hw_overflow_intr_installed = 1;
 501  501                                  kcpc_hw_enable_cpc_intr =
 502  502                                      apic_cpcovf_mask_clear;
 503  503                          }
 504  504                          apic_reg_ops->apic_write(APIC_PCINT_VECT,
 505  505                              apic_cpcovf_vect);
 506  506                  }
 507  507          }
 508  508  
 509  509          if (nlvt >= 6) {
 510  510                  /* Only mask TM intr if the BIOS apparently doesn't use it */
 511  511  
 512  512                  uint32_t lvtval;
 513  513  
 514  514                  lvtval = apic_reg_ops->apic_read(APIC_THERM_VECT);
 515  515                  if (((lvtval & AV_MASK) == AV_MASK) ||
 516  516                      ((lvtval & AV_DELIV_MODE) != AV_SMI)) {
 517  517                          apic_reg_ops->apic_write(APIC_THERM_VECT,
 518  518                              AV_MASK|APIC_RESV_IRQ);
 519  519                  }
 520  520          }
 521  521  
 522  522          /* Enable error interrupt */
 523  523  
 524  524          if (nlvt >= 4 && apic_enable_error_intr) {
 525  525                  if (apic_errvect == 0) {
 526  526                          int ipl = 0xf;  /* get highest priority intr */
 527  527                          apic_errvect = apix_get_ipivect(ipl, -1);
 528  528                          ASSERT(apic_errvect);
 529  529                          /*
 530  530                           * Not PSMI compliant, but we are going to merge
 531  531                           * with ON anyway
 532  532                           */
 533  533                          (void) add_avintr(NULL, ipl,
 534  534                              (avfunc)apic_error_intr, "apic error intr",
 535  535                              apic_errvect, NULL, NULL, NULL, NULL);
 536  536                  }
 537  537                  apic_reg_ops->apic_write(APIC_ERR_VECT, apic_errvect);
 538  538                  apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
 539  539                  apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
 540  540          }
 541  541  
 542  542          /* Enable CMCI interrupt */
 543  543          if (cmi_enable_cmci) {
 544  544                  mutex_enter(&cmci_cpu_setup_lock);
 545  545                  if (cmci_cpu_setup_registered == 0) {
 546  546                          mutex_enter(&cpu_lock);
 547  547                          register_cpu_setup_func(cmci_cpu_setup, NULL);
 548  548                          mutex_exit(&cpu_lock);
 549  549                          cmci_cpu_setup_registered = 1;
 550  550                  }
 551  551                  mutex_exit(&cmci_cpu_setup_lock);
 552  552  
 553  553                  if (apic_cmci_vect == 0) {
 554  554                          int ipl = 0x2;
 555  555                          apic_cmci_vect = apix_get_ipivect(ipl, -1);
 556  556                          ASSERT(apic_cmci_vect);
 557  557  
 558  558                          (void) add_avintr(NULL, ipl,
 559  559                              (avfunc)cmi_cmci_trap, "apic cmci intr",
 560  560                              apic_cmci_vect, NULL, NULL, NULL, NULL);
 561  561                  }
 562  562                  apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
 563  563          }
 564  564  
 565  565          apic_reg_ops->apic_write_task_reg(0);
 566  566  }
 567  567  
 568  568  static void
 569  569  apix_picinit(void)
 570  570  {
 571  571          int i, j;
 572  572          uint_t isr;
 573  573  
 574  574          APIC_VERBOSE(INIT, (CE_CONT, "apix: psm_picinit\n"));
 575  575  
 576  576          /*
 577  577           * initialize interrupt remapping before apic
 578  578           * hardware initialization
 579  579           */
 580  580          apic_intrmap_init(apic_mode);
 581  581          if (apic_vt_ops == psm_vt_ops)
 582  582                  apix_mul_ioapic_method = APIC_MUL_IOAPIC_IIR;
 583  583  
 584  584          /*
 585  585           * On UniSys Model 6520, the BIOS leaves vector 0x20 isr
 586  586           * bit on without clearing it with EOI.  Since softint
 587  587           * uses vector 0x20 to interrupt itself, so softint will
 588  588           * not work on this machine.  In order to fix this problem
 589  589           * a check is made to verify all the isr bits are clear.
 590  590           * If not, EOIs are issued to clear the bits.
 591  591           */
 592  592          for (i = 7; i >= 1; i--) {
 593  593                  isr = apic_reg_ops->apic_read(APIC_ISR_REG + (i * 4));
 594  594                  if (isr != 0)
 595  595                          for (j = 0; ((j < 32) && (isr != 0)); j++)
 596  596                                  if (isr & (1 << j)) {
 597  597                                          apic_reg_ops->apic_write(
 598  598                                              APIC_EOI_REG, 0);
 599  599                                          isr &= ~(1 << j);
 600  600                                          apic_error |= APIC_ERR_BOOT_EOI;
 601  601                                  }
 602  602          }
 603  603  
 604  604          /* set a flag so we know we have run apic_picinit() */
 605  605          apic_picinit_called = 1;
 606  606          LOCK_INIT_CLEAR(&apic_gethrtime_lock);
 607  607          LOCK_INIT_CLEAR(&apic_ioapic_lock);
 608  608          LOCK_INIT_CLEAR(&apic_error_lock);
 609  609          LOCK_INIT_CLEAR(&apic_mode_switch_lock);
 610  610  
 611  611          picsetup();      /* initialise the 8259 */
 612  612  
 613  613          /* add nmi handler - least priority nmi handler */
 614  614          LOCK_INIT_CLEAR(&apic_nmi_lock);
 615  615  
 616  616          if (!psm_add_nmintr(0, (avfunc) apic_nmi_intr,
 617  617              "apix NMI handler", (caddr_t)NULL))
 618  618                  cmn_err(CE_WARN, "apix: Unable to add nmi handler");
 619  619  
 620  620          apix_init_intr();
 621  621  
 622  622          /* enable apic mode if imcr present */
 623  623          if (apic_imcrp) {
 624  624                  outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
 625  625                  outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC);
 626  626          }
 627  627  
 628  628          ioapix_init_intr(IOAPIC_MASK);
 629  629  
 630  630          /* setup global IRM pool if applicable */
 631  631          if (irm_enable)
 632  632                  apix_irm_init();
 633  633  }
 634  634  
 635  635  static __inline__ void
 636  636  apix_send_eoi(void)
 637  637  {
 638  638          if (apic_mode == LOCAL_APIC)
 639  639                  LOCAL_APIC_WRITE_REG(APIC_EOI_REG, 0);
 640  640          else
 641  641                  X2APIC_WRITE(APIC_EOI_REG, 0);
 642  642  }
 643  643  
 644  644  /*
 645  645   * platform_intr_enter
 646  646   *
 647  647   *      Called at the beginning of the interrupt service routine, but unlike
 648  648   *      pcplusmp, does not mask interrupts. An EOI is given to the interrupt
 649  649   *      controller to enable other HW interrupts but interrupts are still
 650  650   *      masked by the IF flag.
 651  651   *
 652  652   *      Return -1 for spurious interrupts
 653  653   *
 654  654   */
 655  655  static int
 656  656  apix_intr_enter(int ipl, int *vectorp)
 657  657  {
 658  658          struct cpu *cpu = CPU;
 659  659          uint32_t cpuid = CPU->cpu_id;
 660  660          apic_cpus_info_t *cpu_infop;
 661  661          uchar_t vector;
 662  662          apix_vector_t *vecp;
 663  663          int nipl = -1;
 664  664  
 665  665          /*
 666  666           * The real vector delivered is (*vectorp + 0x20), but our caller
 667  667           * subtracts 0x20 from the vector before passing it to us.
 668  668           * (That's why APIC_BASE_VECT is 0x20.)
 669  669           */
 670  670          vector = *vectorp = (uchar_t)*vectorp + APIC_BASE_VECT;
 671  671  
 672  672          cpu_infop = &apic_cpus[cpuid];
 673  673          if (vector == APIC_SPUR_INTR) {
 674  674                  cpu_infop->aci_spur_cnt++;
 675  675                  return (APIC_INT_SPURIOUS);
 676  676          }
 677  677  
 678  678          vecp = xv_vector(cpuid, vector);
 679  679          if (vecp == NULL) {
 680  680                  if (APIX_IS_FAKE_INTR(vector))
 681  681                          nipl = apix_rebindinfo.i_pri;
 682  682                  apix_send_eoi();
 683  683                  return (nipl);
 684  684          }
 685  685          nipl = vecp->v_pri;
 686  686  
 687  687          /* if interrupted by the clock, increment apic_nsec_since_boot */
 688  688          if (vector == (apic_clkvect + APIC_BASE_VECT)) {
 689  689                  if (!apic_oneshot) {
 690  690                          /* NOTE: this is not MT aware */
 691  691                          apic_hrtime_stamp++;
 692  692                          apic_nsec_since_boot += apic_nsec_per_intr;
 693  693                          apic_hrtime_stamp++;
 694  694                          last_count_read = apic_hertz_count;
 695  695                          apix_redistribute_compute();
 696  696                  }
 697  697  
 698  698                  apix_send_eoi();
 699  699  
 700  700                  return (nipl);
 701  701          }
 702  702  
 703  703          ASSERT(vecp->v_state != APIX_STATE_OBSOLETED);
 704  704  
 705  705          /* pre-EOI handling for level-triggered interrupts */
 706  706          if (!APIX_IS_DIRECTED_EOI(apix_mul_ioapic_method) &&
 707  707              (vecp->v_type & APIX_TYPE_FIXED) && apic_level_intr[vecp->v_inum])
 708  708                  apix_level_intr_pre_eoi(vecp->v_inum);
 709  709  
 710  710          /* send back EOI */
 711  711          apix_send_eoi();
 712  712  
 713  713          cpu_infop->aci_current[nipl] = vector;
 714  714          if ((nipl > ipl) && (nipl > cpu->cpu_base_spl)) {
 715  715                  cpu_infop->aci_curipl = (uchar_t)nipl;
 716  716                  cpu_infop->aci_ISR_in_progress |= 1 << nipl;
 717  717          }
 718  718  
 719  719  #ifdef  DEBUG
 720  720          if (vector >= APIX_IPI_MIN)
 721  721                  return (nipl);  /* skip IPI */
 722  722  
 723  723          APIC_DEBUG_BUF_PUT(vector);
 724  724          APIC_DEBUG_BUF_PUT(vecp->v_inum);
 725  725          APIC_DEBUG_BUF_PUT(nipl);
 726  726          APIC_DEBUG_BUF_PUT(psm_get_cpu_id());
 727  727          if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl)))
 728  728                  drv_usecwait(apic_stretch_interrupts);
 729  729  #endif /* DEBUG */
 730  730  
 731  731          return (nipl);
 732  732  }
 733  733  
 734  734  /*
 735  735   * Any changes made to this function must also change X2APIC
 736  736   * version of intr_exit.
 737  737   */
 738  738  static void
 739  739  apix_intr_exit(int prev_ipl, int arg2)
 740  740  {
 741  741          int cpuid = psm_get_cpu_id();
 742  742          apic_cpus_info_t *cpu_infop = &apic_cpus[cpuid];
 743  743          apix_impl_t *apixp = apixs[cpuid];
 744  744  
 745  745          UNREFERENCED_1PARAMETER(arg2);
 746  746  
 747  747          cpu_infop->aci_curipl = (uchar_t)prev_ipl;
 748  748          /* ISR above current pri could not be in progress */
 749  749          cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1;
 750  750  
 751  751          if (apixp->x_obsoletes != NULL) {
 752  752                  if (APIX_CPU_LOCK_HELD(cpuid))
 753  753                          return;
 754  754  
 755  755                  APIX_ENTER_CPU_LOCK(cpuid);
 756  756                  (void) apix_obsolete_vector(apixp->x_obsoletes);
 757  757                  APIX_LEAVE_CPU_LOCK(cpuid);
 758  758          }
 759  759  }
 760  760  
 761  761  /*
 762  762   * The pcplusmp setspl code uses the TPR to mask all interrupts at or below the
 763  763   * given ipl, but apix never uses the TPR and we never mask a subset of the
 764  764   * interrupts. They are either all blocked by the IF flag or all can come in.
 765  765   *
 766  766   * For setspl, we mask all interrupts for XC_HI_PIL (15), otherwise, interrupts
 767  767   * can come in if currently enabled by the IF flag. This table shows the state
 768  768   * of the IF flag when we leave this function.
 769  769   *
 770  770   *    curr IF | ipl == 15       ipl != 15
 771  771   *    --------+---------------------------
 772  772   *       0    |    0                0
 773  773   *       1    |    0                1
 774  774   */
 775  775  static void
 776  776  apix_setspl(int ipl)
 777  777  {
 778  778          /*
 779  779           * Interrupts at ipl above this cannot be in progress, so the following
 780  780           * mask is ok.
 781  781           */
 782  782          apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
 783  783  
 784  784          if (ipl == XC_HI_PIL)
 785  785                  cli();
 786  786  }
 787  787  
 788  788  int
 789  789  apix_addspl(int virtvec, int ipl, int min_ipl, int max_ipl)
 790  790  {
 791  791          uint32_t cpuid = APIX_VIRTVEC_CPU(virtvec);
 792  792          uchar_t vector = (uchar_t)APIX_VIRTVEC_VECTOR(virtvec);
 793  793          apix_vector_t *vecp = xv_vector(cpuid, vector);
 794  794  
 795  795          UNREFERENCED_3PARAMETER(ipl, min_ipl, max_ipl);
 796  796          ASSERT(vecp != NULL && LOCK_HELD(&apix_lock));
 797  797  
 798  798          if (vecp->v_type == APIX_TYPE_FIXED)
 799  799                  apix_intx_set_shared(vecp->v_inum, 1);
 800  800  
 801  801          /* There are more interrupts, so it's already been enabled */
 802  802          if (vecp->v_share > 1)
 803  803                  return (PSM_SUCCESS);
 804  804  
 805  805          /* return if it is not hardware interrupt */
 806  806          if (vecp->v_type == APIX_TYPE_IPI)
 807  807                  return (PSM_SUCCESS);
 808  808  
 809  809          /*
 810  810           * if apix_picinit() has not been called yet, just return.
 811  811           * At the end of apic_picinit(), we will call setup_io_intr().
 812  812           */
 813  813          if (!apic_picinit_called)
 814  814                  return (PSM_SUCCESS);
 815  815  
 816  816          (void) apix_setup_io_intr(vecp);
 817  817  
 818  818          return (PSM_SUCCESS);
 819  819  }
 820  820  
 821  821  int
 822  822  apix_delspl(int virtvec, int ipl, int min_ipl, int max_ipl)
 823  823  {
 824  824          uint32_t cpuid = APIX_VIRTVEC_CPU(virtvec);
 825  825          uchar_t vector = (uchar_t)APIX_VIRTVEC_VECTOR(virtvec);
 826  826          apix_vector_t *vecp = xv_vector(cpuid, vector);
 827  827  
 828  828          UNREFERENCED_3PARAMETER(ipl, min_ipl, max_ipl);
 829  829          ASSERT(vecp != NULL && LOCK_HELD(&apix_lock));
 830  830  
 831  831          if (vecp->v_type == APIX_TYPE_FIXED)
 832  832                  apix_intx_set_shared(vecp->v_inum, -1);
 833  833  
 834  834          /* There are more interrupts */
 835  835          if (vecp->v_share > 1)
 836  836                  return (PSM_SUCCESS);
 837  837  
 838  838          /* return if it is not hardware interrupt */
 839  839          if (vecp->v_type == APIX_TYPE_IPI)
 840  840                  return (PSM_SUCCESS);
 841  841  
 842  842          if (!apic_picinit_called) {
 843  843                  cmn_err(CE_WARN, "apix: delete 0x%x before apic init",
 844  844                      virtvec);
 845  845                  return (PSM_SUCCESS);
 846  846          }
 847  847  
 848  848          apix_disable_vector(vecp);
 849  849  
 850  850          return (PSM_SUCCESS);
 851  851  }
 852  852  
 853  853  /*
 854  854   * Try and disable all interrupts. We just assign interrupts to other
 855  855   * processors based on policy. If any were bound by user request, we
 856  856   * let them continue and return failure. We do not bother to check
 857  857   * for cache affinity while rebinding.
 858  858   */
 859  859  static int
 860  860  apix_disable_intr(processorid_t cpun)
 861  861  {
 862  862          apix_impl_t *apixp = apixs[cpun];
 863  863          apix_vector_t *vecp, *newp;
 864  864          int bindcpu, i, hardbound = 0, errbound = 0, ret, loop, type;
 865  865  
 866  866          lock_set(&apix_lock);
 867  867  
 868  868          apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE;
 869  869          apic_cpus[cpun].aci_curipl = 0;
 870  870  
 871  871          /* if this is for SUSPEND operation, skip rebinding */
 872  872          if (apic_cpus[cpun].aci_status & APIC_CPU_SUSPEND) {
 873  873                  for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
 874  874                          vecp = apixp->x_vectbl[i];
 875  875                          if (!IS_VECT_ENABLED(vecp))
 876  876                                  continue;
 877  877  
 878  878                          apix_disable_vector(vecp);
 879  879                  }
 880  880                  lock_clear(&apix_lock);
 881  881                  return (PSM_SUCCESS);
 882  882          }
 883  883  
 884  884          for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
 885  885                  vecp = apixp->x_vectbl[i];
 886  886                  if (!IS_VECT_ENABLED(vecp))
 887  887                          continue;
 888  888  
 889  889                  if (vecp->v_flags & APIX_VECT_USER_BOUND) {
 890  890                          hardbound++;
 891  891                          continue;
 892  892                  }
 893  893                  type = vecp->v_type;
 894  894  
 895  895                  /*
 896  896                   * If there are bound interrupts on this cpu, then
 897  897                   * rebind them to other processors.
 898  898                   */
 899  899                  loop = 0;
 900  900                  do {
 901  901                          bindcpu = apic_find_cpu(APIC_CPU_INTR_ENABLE);
 902  902  
 903  903                          if (type != APIX_TYPE_MSI)
 904  904                                  newp = apix_set_cpu(vecp, bindcpu, &ret);
 905  905                          else
 906  906                                  newp = apix_grp_set_cpu(vecp, bindcpu, &ret);
 907  907                  } while ((newp == NULL) && (loop++ < apic_nproc));
 908  908  
 909  909                  if (loop >= apic_nproc) {
 910  910                          errbound++;
 911  911                          cmn_err(CE_WARN, "apix: failed to rebind vector %x/%x",
 912  912                              vecp->v_cpuid, vecp->v_vector);
 913  913                  }
 914  914          }
 915  915  
 916  916          lock_clear(&apix_lock);
 917  917  
 918  918          if (hardbound || errbound) {
 919  919                  cmn_err(CE_WARN, "Could not disable interrupts on %d"
 920  920                      "due to user bound interrupts or failed operation",
 921  921                      cpun);
 922  922                  return (PSM_FAILURE);
 923  923          }
 924  924  
 925  925          return (PSM_SUCCESS);
 926  926  }
 927  927  
 928  928  /*
 929  929   * Bind interrupts to specified CPU
 930  930   */
 931  931  static void
 932  932  apix_enable_intr(processorid_t cpun)
 933  933  {
 934  934          apix_vector_t *vecp;
 935  935          int i, ret;
 936  936          processorid_t n;
 937  937  
 938  938          lock_set(&apix_lock);
 939  939  
 940  940          apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE;
 941  941  
 942  942          /* interrupt enabling for system resume */
 943  943          if (apic_cpus[cpun].aci_status & APIC_CPU_SUSPEND) {
 944  944                  for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
 945  945                          vecp = xv_vector(cpun, i);
 946  946                          if (!IS_VECT_ENABLED(vecp))
 947  947                                  continue;
 948  948  
 949  949                          apix_enable_vector(vecp);
 950  950                  }
 951  951                  apic_cpus[cpun].aci_status &= ~APIC_CPU_SUSPEND;
 952  952          }
 953  953  
 954  954          for (n = 0; n < apic_nproc; n++) {
 955  955                  if (!apic_cpu_in_range(n) || n == cpun ||
 956  956                      (apic_cpus[n].aci_status & APIC_CPU_INTR_ENABLE) == 0)
 957  957                          continue;
 958  958  
 959  959                  for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
 960  960                          vecp = xv_vector(n, i);
 961  961                          if (!IS_VECT_ENABLED(vecp) ||
 962  962                              vecp->v_bound_cpuid != cpun)
 963  963                                  continue;
 964  964  
 965  965                          if (vecp->v_type != APIX_TYPE_MSI)
 966  966                                  (void) apix_set_cpu(vecp, cpun, &ret);
 967  967                          else
 968  968                                  (void) apix_grp_set_cpu(vecp, cpun, &ret);
 969  969                  }
 970  970          }
 971  971  
 972  972          lock_clear(&apix_lock);
 973  973  }
 974  974  
 975  975  /*
 976  976   * Allocate vector for IPI
 977  977   * type == -1 indicates it is an internal request. Do not change
 978  978   * resv_vector for these requests.
 979  979   */
 980  980  static int
 981  981  apix_get_ipivect(int ipl, int type)
 982  982  {
 983  983          uchar_t vector;
 984  984  
 985  985          if ((vector = apix_alloc_ipi(ipl)) > 0) {
 986  986                  if (type != -1)
 987  987                          apic_resv_vector[ipl] = vector;
 988  988                  return (vector);
 989  989          }
 990  990          apic_error |= APIC_ERR_GET_IPIVECT_FAIL;
 991  991          return (-1);    /* shouldn't happen */
 992  992  }
 993  993  
 994  994  static int
 995  995  apix_get_clkvect(int ipl)
 996  996  {
 997  997          int vector;
 998  998  
 999  999          if ((vector = apix_get_ipivect(ipl, -1)) == -1)
1000 1000                  return (-1);
1001 1001  
1002 1002          apic_clkvect = vector - APIC_BASE_VECT;
1003 1003          APIC_VERBOSE(IPI, (CE_CONT, "apix: clock vector = %x\n",
1004 1004              apic_clkvect));
1005 1005          return (vector);
1006 1006  }
1007 1007  
1008 1008  static int
1009 1009  apix_post_cpu_start()
1010 1010  {
1011 1011          int cpun;
1012 1012          static int cpus_started = 1;
1013 1013  
1014 1014          /* We know this CPU + BSP  started successfully. */
1015 1015          cpus_started++;
1016 1016  
1017 1017          /*
1018 1018           * On BSP we would have enabled X2APIC, if supported by processor,
1019 1019           * in acpi_probe(), but on AP we do it here.
1020 1020           *
1021 1021           * We enable X2APIC mode only if BSP is running in X2APIC & the
1022 1022           * local APIC mode of the current CPU is MMIO (xAPIC).
1023 1023           */
1024 1024          if (apic_mode == LOCAL_X2APIC && apic_detect_x2apic() &&
1025 1025              apic_local_mode() == LOCAL_APIC) {
1026 1026                  apic_enable_x2apic();
1027 1027          }
1028 1028  
1029 1029          /*
1030 1030           * Switch back to x2apic IPI sending method for performance when target
1031 1031           * CPU has entered x2apic mode.
1032 1032           */
1033 1033          if (apic_mode == LOCAL_X2APIC) {
1034 1034                  apic_switch_ipi_callback(B_FALSE);
1035 1035          }
1036 1036  
1037 1037          splx(ipltospl(LOCK_LEVEL));
1038 1038          apix_init_intr();
1039 1039  
1040 1040          /*
1041 1041           * since some systems don't enable the internal cache on the non-boot
1042 1042           * cpus, so we have to enable them here
1043 1043           */
1044 1044          setcr0(getcr0() & ~(CR0_CD | CR0_NW));
1045 1045  
1046 1046  #ifdef  DEBUG
1047 1047          APIC_AV_PENDING_SET();
1048 1048  #else
1049 1049          if (apic_mode == LOCAL_APIC)
1050 1050                  APIC_AV_PENDING_SET();
1051 1051  #endif  /* DEBUG */
1052 1052  
1053 1053          /*
1054 1054           * We may be booting, or resuming from suspend; aci_status will
1055 1055           * be APIC_CPU_INTR_ENABLE if coming from suspend, so we add the
1056 1056           * APIC_CPU_ONLINE flag here rather than setting aci_status completely.
1057 1057           */
1058 1058          cpun = psm_get_cpu_id();
1059 1059          apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE;
1060 1060  
1061 1061          apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
1062 1062  
1063 1063          return (PSM_SUCCESS);
1064 1064  }
1065 1065  
1066 1066  /*
1067 1067   * If this module needs a periodic handler for the interrupt distribution, it
1068 1068   * can be added here. The argument to the periodic handler is not currently
1069 1069   * used, but is reserved for future.
1070 1070   */
1071 1071  static void
1072 1072  apix_post_cyclic_setup(void *arg)
1073 1073  {
1074 1074          UNREFERENCED_1PARAMETER(arg);
1075 1075  
1076 1076          cyc_handler_t cyh;
1077 1077          cyc_time_t cyt;
1078 1078  
1079 1079          /* cpu_lock is held */
1080 1080          /* set up a periodic handler for intr redistribution */
1081 1081  
1082 1082          /*
1083 1083           * In peridoc mode intr redistribution processing is done in
1084 1084           * apic_intr_enter during clk intr processing
1085 1085           */
1086 1086          if (!apic_oneshot)
1087 1087                  return;
1088 1088  
1089 1089          /*
1090 1090           * Register a periodical handler for the redistribution processing.
1091 1091           * Though we would generally prefer to use the DDI interface for
1092 1092           * periodic handler invocation, ddi_periodic_add(9F), we are
1093 1093           * unfortunately already holding cpu_lock, which ddi_periodic_add will
1094 1094           * attempt to take for us.  Thus, we add our own cyclic directly:
1095 1095           */
1096 1096          cyh.cyh_func = (void (*)(void *))apix_redistribute_compute;
1097 1097          cyh.cyh_arg = NULL;
1098 1098          cyh.cyh_level = CY_LOW_LEVEL;
1099 1099  
1100 1100          cyt.cyt_when = 0;
1101 1101          cyt.cyt_interval = apic_redistribute_sample_interval;
1102 1102  
1103 1103          apic_cyclic_id = cyclic_add(&cyh, &cyt);
1104 1104  }
1105 1105  
1106 1106  /*
1107 1107   * Called the first time we enable x2apic mode on this cpu.
1108 1108   * Update some of the function pointers to use x2apic routines.
1109 1109   */
1110 1110  void
1111 1111  x2apic_update_psm()
1112 1112  {
1113 1113          struct psm_ops *pops = &apix_ops;
1114 1114  
1115 1115          ASSERT(pops != NULL);
1116 1116  
1117 1117          /*
1118 1118           * The pcplusmp module's version of x2apic_update_psm makes additional
1119 1119           * changes that we do not have to make here. It needs to make those
1120 1120           * changes because pcplusmp relies on the TPR register and the means of
1121 1121           * addressing that changes when using the local apic versus the x2apic.
1122 1122           * It's also worth noting that the apix driver specific function end up
1123 1123           * being apix_foo as opposed to apic_foo and x2apic_foo.
1124 1124           */
1125 1125          pops->psm_send_ipi = x2apic_send_ipi;
1126 1126  
1127 1127          send_dirintf = pops->psm_send_ipi;
1128 1128  
1129 1129          apic_mode = LOCAL_X2APIC;
1130 1130          apic_change_ops();
1131 1131  }
1132 1132  
1133 1133  /*
1134 1134   * This function provides external interface to the nexus for all
1135 1135   * functionalities related to the new DDI interrupt framework.
1136 1136   *
1137 1137   * Input:
1138 1138   * dip     - pointer to the dev_info structure of the requested device
1139 1139   * hdlp    - pointer to the internal interrupt handle structure for the
1140 1140   *           requested interrupt
1141 1141   * intr_op - opcode for this call
1142 1142   * result  - pointer to the integer that will hold the result to be
1143 1143   *           passed back if return value is PSM_SUCCESS
1144 1144   *
1145 1145   * Output:
1146 1146   * return value is either PSM_SUCCESS or PSM_FAILURE
1147 1147   */
1148 1148  static int
1149 1149  apix_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
1150 1150      psm_intr_op_t intr_op, int *result)
1151 1151  {
1152 1152          int             cap;
1153 1153          apix_vector_t   *vecp, *newvecp;
1154 1154          struct intrspec *ispec, intr_spec;
1155 1155          processorid_t target;
1156 1156  
1157 1157          ispec = &intr_spec;
1158 1158          ispec->intrspec_pri = hdlp->ih_pri;
1159 1159          ispec->intrspec_vec = hdlp->ih_inum;
1160 1160          ispec->intrspec_func = hdlp->ih_cb_func;
1161 1161  
1162 1162          switch (intr_op) {
1163 1163          case PSM_INTR_OP_ALLOC_VECTORS:
1164 1164                  switch (hdlp->ih_type) {
1165 1165                  case DDI_INTR_TYPE_MSI:
1166 1166                          /* allocate MSI vectors */
1167 1167                          *result = apix_alloc_msi(dip, hdlp->ih_inum,
1168 1168                              hdlp->ih_scratch1,
1169 1169                              (int)(uintptr_t)hdlp->ih_scratch2);
1170 1170                          break;
1171 1171                  case DDI_INTR_TYPE_MSIX:
1172 1172                          /* allocate MSI-X vectors */
1173 1173                          *result = apix_alloc_msix(dip, hdlp->ih_inum,
1174 1174                              hdlp->ih_scratch1,
1175 1175                              (int)(uintptr_t)hdlp->ih_scratch2);
1176 1176                          break;
1177 1177                  case DDI_INTR_TYPE_FIXED:
1178 1178                          /* allocate or share vector for fixed */
1179 1179                          if ((ihdl_plat_t *)hdlp->ih_private == NULL) {
1180 1180                                  return (PSM_FAILURE);
1181 1181                          }
1182 1182                          ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
1183 1183                          *result = apix_intx_alloc_vector(dip, hdlp->ih_inum,
1184 1184                              ispec);
1185 1185                          break;
1186 1186                  default:
1187 1187                          return (PSM_FAILURE);
1188 1188                  }
1189 1189                  break;
1190 1190          case PSM_INTR_OP_FREE_VECTORS:
1191 1191                  apix_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1,
1192 1192                      hdlp->ih_type);
1193 1193                  break;
1194 1194          case PSM_INTR_OP_XLATE_VECTOR:
1195 1195                  /*
1196 1196                   * Vectors are allocated by ALLOC and freed by FREE.
1197 1197                   * XLATE finds and returns APIX_VIRTVEC_VECTOR(cpu, vector).
1198 1198                   */
1199 1199                  *result = APIX_INVALID_VECT;
1200 1200                  vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1201 1201                  if (vecp != NULL) {
1202 1202                          *result = APIX_VIRTVECTOR(vecp->v_cpuid,
1203 1203                              vecp->v_vector);
1204 1204                          break;
1205 1205                  }
1206 1206  
1207 1207                  /*
1208 1208                   * No vector to device mapping exists. If this is FIXED type
1209 1209                   * then check if this IRQ is already mapped for another device
1210 1210                   * then return the vector number for it (i.e. shared IRQ case).
1211 1211                   * Otherwise, return PSM_FAILURE.
1212 1212                   */
1213 1213                  if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) {
1214 1214                          vecp = apix_intx_xlate_vector(dip, hdlp->ih_inum,
1215 1215                              ispec);
1216 1216                          *result = (vecp == NULL) ? APIX_INVALID_VECT :
1217 1217                              APIX_VIRTVECTOR(vecp->v_cpuid, vecp->v_vector);
1218 1218                  }
1219 1219                  if (*result == APIX_INVALID_VECT)
1220 1220                          return (PSM_FAILURE);
1221 1221                  break;
1222 1222          case PSM_INTR_OP_GET_PENDING:
1223 1223                  vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1224 1224                  if (vecp == NULL)
1225 1225                          return (PSM_FAILURE);
1226 1226  
1227 1227                  *result = apix_get_pending(vecp);
1228 1228                  break;
1229 1229          case PSM_INTR_OP_CLEAR_MASK:
1230 1230                  if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1231 1231                          return (PSM_FAILURE);
1232 1232  
1233 1233                  vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1234 1234                  if (vecp == NULL)
1235 1235                          return (PSM_FAILURE);
1236 1236  
1237 1237                  apix_intx_clear_mask(vecp->v_inum);
1238 1238                  break;
1239 1239          case PSM_INTR_OP_SET_MASK:
1240 1240                  if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1241 1241                          return (PSM_FAILURE);
1242 1242  
1243 1243                  vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1244 1244                  if (vecp == NULL)
1245 1245                          return (PSM_FAILURE);
1246 1246  
1247 1247                  apix_intx_set_mask(vecp->v_inum);
1248 1248                  break;
1249 1249          case PSM_INTR_OP_GET_SHARED:
1250 1250                  if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1251 1251                          return (PSM_FAILURE);
1252 1252  
1253 1253                  vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1254 1254                  if (vecp == NULL)
1255 1255                          return (PSM_FAILURE);
1256 1256  
1257 1257                  *result = apix_intx_get_shared(vecp->v_inum);
1258 1258                  break;
1259 1259          case PSM_INTR_OP_SET_PRI:
1260 1260                  /*
1261 1261                   * Called prior to adding the interrupt handler or when
1262 1262                   * an interrupt handler is unassigned.
1263 1263                   */
1264 1264                  if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
1265 1265                          return (PSM_SUCCESS);
1266 1266  
1267 1267                  if (apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type) == NULL)
1268 1268                          return (PSM_FAILURE);
1269 1269  
1270 1270                  break;
1271 1271          case PSM_INTR_OP_SET_CPU:
1272 1272          case PSM_INTR_OP_GRP_SET_CPU:
1273 1273                  /*
1274 1274                   * The interrupt handle given here has been allocated
1275 1275                   * specifically for this command, and ih_private carries
1276 1276                   * a CPU value.
1277 1277                   */
1278 1278                  *result = EINVAL;
1279 1279                  target = (int)(intptr_t)hdlp->ih_private;
1280 1280                  if (!apic_cpu_in_range(target)) {
1281 1281                          DDI_INTR_IMPLDBG((CE_WARN,
1282 1282                              "[grp_]set_cpu: cpu out of range: %d\n", target));
1283 1283                          return (PSM_FAILURE);
1284 1284                  }
1285 1285  
1286 1286                  lock_set(&apix_lock);
1287 1287  
1288 1288                  vecp = apix_get_req_vector(hdlp, hdlp->ih_flags);
1289 1289                  if (!IS_VECT_ENABLED(vecp)) {
1290 1290                          DDI_INTR_IMPLDBG((CE_WARN,
1291 1291                              "[grp]_set_cpu: invalid vector 0x%x\n",
1292 1292                              hdlp->ih_vector));
1293 1293                          lock_clear(&apix_lock);
1294 1294                          return (PSM_FAILURE);
1295 1295                  }
1296 1296  
1297 1297                  *result = 0;
1298 1298  
1299 1299                  if (intr_op == PSM_INTR_OP_SET_CPU)
1300 1300                          newvecp = apix_set_cpu(vecp, target, result);
1301 1301                  else
1302 1302                          newvecp = apix_grp_set_cpu(vecp, target, result);
1303 1303  
1304 1304                  lock_clear(&apix_lock);
1305 1305  
1306 1306                  if (newvecp == NULL) {
1307 1307                          *result = EIO;
1308 1308                          return (PSM_FAILURE);
1309 1309                  }
1310 1310                  newvecp->v_bound_cpuid = target;
1311 1311                  hdlp->ih_vector = APIX_VIRTVECTOR(newvecp->v_cpuid,
1312 1312                      newvecp->v_vector);
1313 1313                  break;
1314 1314  
1315 1315          case PSM_INTR_OP_GET_INTR:
1316 1316                  /*
1317 1317                   * The interrupt handle given here has been allocated
1318 1318                   * specifically for this command, and ih_private carries
1319 1319                   * a pointer to a apic_get_intr_t.
1320 1320                   */
1321 1321                  if (apix_get_intr_info(hdlp, hdlp->ih_private) != PSM_SUCCESS)
1322 1322                          return (PSM_FAILURE);
1323 1323                  break;
1324 1324  
1325 1325          case PSM_INTR_OP_CHECK_MSI:
1326 1326                  /*
1327 1327                   * Check MSI/X is supported or not at APIC level and
1328 1328                   * masked off the MSI/X bits in hdlp->ih_type if not
1329 1329                   * supported before return.  If MSI/X is supported,
1330 1330                   * leave the ih_type unchanged and return.
1331 1331                   *
1332 1332                   * hdlp->ih_type passed in from the nexus has all the
1333 1333                   * interrupt types supported by the device.
1334 1334                   */
1335 1335                  if (apic_support_msi == 0) {    /* uninitialized */
1336 1336                          /*
1337 1337                           * if apic_support_msi is not set, call
1338 1338                           * apic_check_msi_support() to check whether msi
1339 1339                           * is supported first
1340 1340                           */
1341 1341                          if (apic_check_msi_support() == PSM_SUCCESS)
1342 1342                                  apic_support_msi = 1;   /* supported */
1343 1343                          else
1344 1344                                  apic_support_msi = -1;  /* not-supported */
1345 1345                  }
1346 1346                  if (apic_support_msi == 1) {
1347 1347                          if (apic_msix_enable)
1348 1348                                  *result = hdlp->ih_type;
1349 1349                          else
1350 1350                                  *result = hdlp->ih_type & ~DDI_INTR_TYPE_MSIX;
1351 1351                  } else
1352 1352                          *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
1353 1353                              DDI_INTR_TYPE_MSIX);
1354 1354                  break;
1355 1355          case PSM_INTR_OP_GET_CAP:
1356 1356                  cap = DDI_INTR_FLAG_PENDING;
1357 1357                  if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
1358 1358                          cap |= DDI_INTR_FLAG_MASKABLE;
1359 1359                  *result = cap;
1360 1360                  break;
1361 1361          case PSM_INTR_OP_APIC_TYPE:
1362 1362                  ((apic_get_type_t *)(hdlp->ih_private))->avgi_type =
1363 1363                      apix_get_apic_type();
1364 1364                  ((apic_get_type_t *)(hdlp->ih_private))->avgi_num_intr =
1365 1365                      APIX_IPI_MIN;
1366 1366                  ((apic_get_type_t *)(hdlp->ih_private))->avgi_num_cpu =
1367 1367                      apic_nproc;
1368 1368                  hdlp->ih_ver = apic_get_apic_version();
1369 1369                  break;
1370 1370          case PSM_INTR_OP_SET_CAP:
1371 1371          default:
1372 1372                  return (PSM_FAILURE);
1373 1373          }
1374 1374  
1375 1375          return (PSM_SUCCESS);
1376 1376  }
1377 1377  
1378 1378  static void
1379 1379  apix_cleanup_busy(void)
1380 1380  {
1381 1381          int i, j;
1382 1382          apix_vector_t *vecp;
1383 1383  
1384 1384          for (i = 0; i < apic_nproc; i++) {
1385 1385                  if (!apic_cpu_in_range(i))
1386 1386                          continue;
1387 1387                  apic_cpus[i].aci_busy = 0;
1388 1388                  for (j = APIX_AVINTR_MIN; j < APIX_AVINTR_MAX; j++) {
1389 1389                          if ((vecp = xv_vector(i, j)) != NULL)
1390 1390                                  vecp->v_busy = 0;
1391 1391                  }
1392 1392          }
1393 1393  }
1394 1394  
1395 1395  static void
1396 1396  apix_redistribute_compute(void)
1397 1397  {
1398 1398          int     i, j, max_busy;
1399 1399  
1400 1400          if (!apic_enable_dynamic_migration)
1401 1401                  return;
1402 1402  
1403 1403          if (++apic_nticks == apic_sample_factor_redistribution) {
1404 1404                  /*
1405 1405                   * Time to call apic_intr_redistribute().
1406 1406                   * reset apic_nticks. This will cause max_busy
1407 1407                   * to be calculated below and if it is more than
1408 1408                   * apic_int_busy, we will do the whole thing
1409 1409                   */
1410 1410                  apic_nticks = 0;
1411 1411          }
1412 1412          max_busy = 0;
1413 1413          for (i = 0; i < apic_nproc; i++) {
1414 1414                  if (!apic_cpu_in_range(i))
1415 1415                          continue;
1416 1416                  /*
1417 1417                   * Check if curipl is non zero & if ISR is in
1418 1418                   * progress
1419 1419                   */
1420 1420                  if (((j = apic_cpus[i].aci_curipl) != 0) &&
1421 1421                      (apic_cpus[i].aci_ISR_in_progress & (1 << j))) {
1422 1422  
1423 1423                          int     vect;
1424 1424                          apic_cpus[i].aci_busy++;
1425 1425                          vect = apic_cpus[i].aci_current[j];
1426 1426                          apixs[i]->x_vectbl[vect]->v_busy++;
1427 1427                  }
1428 1428  
1429 1429                  if (!apic_nticks &&
1430 1430                      (apic_cpus[i].aci_busy > max_busy))
1431 1431                          max_busy = apic_cpus[i].aci_busy;
1432 1432          }
1433 1433          if (!apic_nticks) {
1434 1434                  if (max_busy > apic_int_busy_mark) {
1435 1435                  /*
1436 1436                   * We could make the following check be
1437 1437                   * skipped > 1 in which case, we get a
1438 1438                   * redistribution at half the busy mark (due to
1439 1439                   * double interval). Need to be able to collect
1440 1440                   * more empirical data to decide if that is a
1441 1441                   * good strategy. Punt for now.
1442 1442                   */
1443 1443                          apix_cleanup_busy();
1444 1444                          apic_skipped_redistribute = 0;
1445 1445                  } else
1446 1446                          apic_skipped_redistribute++;
1447 1447          }
1448 1448  }
1449 1449  
1450 1450  /*
1451 1451   * intr_ops() service routines
1452 1452   */
1453 1453  
1454 1454  static int
1455 1455  apix_get_pending(apix_vector_t *vecp)
1456 1456  {
1457 1457          int bit, index, irr, pending;
1458 1458  
1459 1459          /* need to get on the bound cpu */
1460 1460          mutex_enter(&cpu_lock);
1461 1461          affinity_set(vecp->v_cpuid);
1462 1462  
1463 1463          index = vecp->v_vector / 32;
1464 1464          bit = vecp->v_vector % 32;
1465 1465          irr = apic_reg_ops->apic_read(APIC_IRR_REG + index);
1466 1466  
1467 1467          affinity_clear();
1468 1468          mutex_exit(&cpu_lock);
1469 1469  
1470 1470          pending = (irr & (1 << bit)) ? 1 : 0;
1471 1471          if (!pending && vecp->v_type == APIX_TYPE_FIXED)
1472 1472                  pending = apix_intx_get_pending(vecp->v_inum);
1473 1473  
1474 1474          return (pending);
1475 1475  }
1476 1476  
1477 1477  static apix_vector_t *
1478 1478  apix_get_req_vector(ddi_intr_handle_impl_t *hdlp, ushort_t flags)
1479 1479  {
1480 1480          apix_vector_t *vecp;
1481 1481          processorid_t cpuid;
1482 1482          int32_t virt_vec = 0;
1483 1483  
1484 1484          switch (flags & PSMGI_INTRBY_FLAGS) {
1485 1485          case PSMGI_INTRBY_IRQ:
1486 1486                  return (apix_intx_get_vector(hdlp->ih_vector));
1487 1487          case PSMGI_INTRBY_VEC:
1488 1488                  virt_vec = (virt_vec == 0) ? hdlp->ih_vector : virt_vec;
1489 1489  
1490 1490                  cpuid = APIX_VIRTVEC_CPU(virt_vec);
1491 1491                  if (!apic_cpu_in_range(cpuid))
1492 1492                          return (NULL);
1493 1493  
1494 1494                  vecp = xv_vector(cpuid, APIX_VIRTVEC_VECTOR(virt_vec));
1495 1495                  break;
1496 1496          case PSMGI_INTRBY_DEFAULT:
1497 1497                  vecp = apix_get_dev_map(hdlp->ih_dip, hdlp->ih_inum,
1498 1498                      hdlp->ih_type);
1499 1499                  break;
1500 1500          default:
1501 1501                  return (NULL);
1502 1502          }
1503 1503  
1504 1504          return (vecp);
1505 1505  }
1506 1506  
1507 1507  static int
1508 1508  apix_get_intr_info(ddi_intr_handle_impl_t *hdlp,
1509 1509      apic_get_intr_t *intr_params_p)
1510 1510  {
1511 1511          apix_vector_t *vecp;
1512 1512          struct autovec *av_dev;
1513 1513          int i;
1514 1514  
1515 1515          vecp = apix_get_req_vector(hdlp, intr_params_p->avgi_req_flags);
1516 1516          if (IS_VECT_FREE(vecp)) {
1517 1517                  intr_params_p->avgi_num_devs = 0;
1518 1518                  intr_params_p->avgi_cpu_id = 0;
1519 1519                  intr_params_p->avgi_req_flags = 0;
1520 1520                  return (PSM_SUCCESS);
1521 1521          }
1522 1522  
1523 1523          if (intr_params_p->avgi_req_flags & PSMGI_REQ_CPUID) {
1524 1524                  intr_params_p->avgi_cpu_id = vecp->v_cpuid;
1525 1525  
1526 1526                  /* Return user bound info for intrd. */
1527 1527                  if (intr_params_p->avgi_cpu_id & IRQ_USER_BOUND) {
1528 1528                          intr_params_p->avgi_cpu_id &= ~IRQ_USER_BOUND;
1529 1529                          intr_params_p->avgi_cpu_id |= PSMGI_CPU_USER_BOUND;
1530 1530                  }
1531 1531          }
1532 1532  
1533 1533          if (intr_params_p->avgi_req_flags & PSMGI_REQ_VECTOR)
1534 1534                  intr_params_p->avgi_vector = vecp->v_vector;
1535 1535  
1536 1536          if (intr_params_p->avgi_req_flags &
1537 1537              (PSMGI_REQ_NUM_DEVS | PSMGI_REQ_GET_DEVS))
1538 1538                  /* Get number of devices from apic_irq table shared field. */
1539 1539                  intr_params_p->avgi_num_devs = vecp->v_share;
1540 1540  
1541 1541          if (intr_params_p->avgi_req_flags &  PSMGI_REQ_GET_DEVS) {
1542 1542  
1543 1543                  intr_params_p->avgi_req_flags  |= PSMGI_REQ_NUM_DEVS;
1544 1544  
1545 1545                  /* Some devices have NULL dip.  Don't count these. */
1546 1546                  if (intr_params_p->avgi_num_devs > 0) {
1547 1547                          for (i = 0, av_dev = vecp->v_autovect; av_dev;
1548 1548                              av_dev = av_dev->av_link) {
1549 1549                                  if (av_dev->av_vector && av_dev->av_dip)
1550 1550                                          i++;
1551 1551                          }
1552 1552                          intr_params_p->avgi_num_devs =
1553 1553                              (uint8_t)MIN(intr_params_p->avgi_num_devs, i);
1554 1554                  }
1555 1555  
1556 1556                  /* There are no viable dips to return. */
1557 1557                  if (intr_params_p->avgi_num_devs == 0) {
1558 1558                          intr_params_p->avgi_dip_list = NULL;
1559 1559  
1560 1560                  } else {        /* Return list of dips */
1561 1561  
1562 1562                          /* Allocate space in array for that number of devs. */
1563 1563                          intr_params_p->avgi_dip_list = kmem_zalloc(
1564 1564                              intr_params_p->avgi_num_devs *
1565 1565                              sizeof (dev_info_t *),
1566 1566                              KM_NOSLEEP);
1567 1567                          if (intr_params_p->avgi_dip_list == NULL) {
1568 1568                                  DDI_INTR_IMPLDBG((CE_WARN,
1569 1569                                      "apix_get_vector_intr_info: no memory"));
1570 1570                                  return (PSM_FAILURE);
1571 1571                          }
1572 1572  
1573 1573                          /*
1574 1574                           * Loop through the device list of the autovec table
1575 1575                           * filling in the dip array.
1576 1576                           *
1577 1577                           * Note that the autovect table may have some special
1578 1578                           * entries which contain NULL dips.  These will be
1579 1579                           * ignored.
1580 1580                           */
1581 1581                          for (i = 0, av_dev = vecp->v_autovect; av_dev;
1582 1582                              av_dev = av_dev->av_link) {
1583 1583                                  if (av_dev->av_vector && av_dev->av_dip)
1584 1584                                          intr_params_p->avgi_dip_list[i++] =
1585 1585                                              av_dev->av_dip;
1586 1586                          }
1587 1587                  }
1588 1588          }
1589 1589  
1590 1590          return (PSM_SUCCESS);
1591 1591  }
1592 1592  
1593 1593  static char *
1594 1594  apix_get_apic_type(void)
1595 1595  {
1596 1596          return (apix_psm_info.p_mach_idstring);
1597 1597  }
1598 1598  
1599 1599  apix_vector_t *
1600 1600  apix_set_cpu(apix_vector_t *vecp, int new_cpu, int *result)
1601 1601  {
1602 1602          apix_vector_t *newp = NULL;
1603 1603          dev_info_t *dip;
1604 1604          int inum, cap_ptr;
1605 1605          ddi_acc_handle_t handle;
1606 1606          ddi_intr_msix_t *msix_p = NULL;
1607 1607          ushort_t msix_ctrl;
1608 1608          uintptr_t off;
1609 1609          uint32_t mask;
1610 1610  
1611 1611          ASSERT(LOCK_HELD(&apix_lock));
1612 1612          *result = ENXIO;
1613 1613  
1614 1614          /* Fail if this is an MSI intr and is part of a group. */
1615 1615          if (vecp->v_type == APIX_TYPE_MSI) {
1616 1616                  if (i_ddi_intr_get_current_nintrs(APIX_GET_DIP(vecp)) > 1)
1617 1617                          return (NULL);
1618 1618                  else
1619 1619                          return (apix_grp_set_cpu(vecp, new_cpu, result));
1620 1620          }
1621 1621  
1622 1622          /*
1623 1623           * Mask MSI-X. It's unmasked when MSI-X gets enabled.
1624 1624           */
1625 1625          if (vecp->v_type == APIX_TYPE_MSIX && IS_VECT_ENABLED(vecp)) {
1626 1626                  if ((dip = APIX_GET_DIP(vecp)) == NULL)
1627 1627                          return (NULL);
1628 1628                  inum = vecp->v_devp->dv_inum;
1629 1629  
1630 1630                  handle = i_ddi_get_pci_config_handle(dip);
1631 1631                  cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
1632 1632                  msix_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
1633 1633                  if ((msix_ctrl & PCI_MSIX_FUNCTION_MASK) == 0) {
1634 1634                          /*
1635 1635                           * Function is not masked, then mask "inum"th
1636 1636                           * entry in the MSI-X table
1637 1637                           */
1638 1638                          msix_p = i_ddi_get_msix(dip);
1639 1639                          off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
1640 1640                              PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
1641 1641                          mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
1642 1642                          ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off,
1643 1643                              mask | 1);
1644 1644                  }
1645 1645          }
1646 1646  
1647 1647          *result = 0;
1648 1648          if ((newp = apix_rebind(vecp, new_cpu, 1)) == NULL)
1649 1649                  *result = EIO;
1650 1650  
1651 1651          /* Restore mask bit */
1652 1652          if (msix_p != NULL)
1653 1653                  ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, mask);
1654 1654  
1655 1655          return (newp);
1656 1656  }
1657 1657  
1658 1658  /*
1659 1659   * Set cpu for MSIs
1660 1660   */
1661 1661  apix_vector_t *
1662 1662  apix_grp_set_cpu(apix_vector_t *vecp, int new_cpu, int *result)
1663 1663  {
1664 1664          apix_vector_t *newp, *vp;
1665 1665          uint32_t orig_cpu = vecp->v_cpuid;
1666 1666          int orig_vect = vecp->v_vector;
1667 1667          int i, num_vectors, cap_ptr, msi_mask_off;
1668 1668          uint32_t msi_pvm;
1669 1669          ushort_t msi_ctrl;
1670 1670          ddi_acc_handle_t handle;
1671 1671          dev_info_t *dip;
1672 1672  
1673 1673          APIC_VERBOSE(INTR, (CE_CONT, "apix_grp_set_cpu: oldcpu: %x, vector: %x,"
1674 1674              " newcpu:%x\n", vecp->v_cpuid, vecp->v_vector, new_cpu));
1675 1675  
1676 1676          ASSERT(LOCK_HELD(&apix_lock));
1677 1677  
1678 1678          *result = ENXIO;
1679 1679  
1680 1680          if (vecp->v_type != APIX_TYPE_MSI) {
1681 1681                  DDI_INTR_IMPLDBG((CE_WARN, "set_grp: intr not MSI\n"));
1682 1682                  return (NULL);
1683 1683          }
1684 1684  
1685 1685          if ((dip = APIX_GET_DIP(vecp)) == NULL)
1686 1686                  return (NULL);
1687 1687  
1688 1688          num_vectors = i_ddi_intr_get_current_nintrs(dip);
1689 1689          if ((num_vectors < 1) || ((num_vectors - 1) & orig_vect)) {
1690 1690                  APIC_VERBOSE(INTR, (CE_WARN,
1691 1691                      "set_grp: base vec not part of a grp or not aligned: "
1692 1692                      "vec:0x%x, num_vec:0x%x\n", orig_vect, num_vectors));
1693 1693                  return (NULL);
1694 1694          }
1695 1695  
1696 1696          if (vecp->v_inum != apix_get_min_dev_inum(dip, vecp->v_type))
1697 1697                  return (NULL);
1698 1698  
1699 1699          *result = EIO;
1700 1700          for (i = 1; i < num_vectors; i++) {
1701 1701                  if ((vp = xv_vector(orig_cpu, orig_vect + i)) == NULL)
1702 1702                          return (NULL);
1703 1703  #ifdef DEBUG
1704 1704                  /*
1705 1705                   * Sanity check: CPU and dip is the same for all entries.
1706 1706                   * May be called when first msi to be enabled, at this time
1707 1707                   * add_avintr() is not called for other msi
1708 1708                   */
1709 1709                  if ((vp->v_share != 0) &&
1710 1710                      ((APIX_GET_DIP(vp) != dip) ||
1711 1711                      (vp->v_cpuid != vecp->v_cpuid))) {
1712 1712                          APIC_VERBOSE(INTR, (CE_WARN,
1713 1713                              "set_grp: cpu or dip for vec 0x%x difft than for "
1714 1714                              "vec 0x%x\n", orig_vect, orig_vect + i));
1715 1715                          APIC_VERBOSE(INTR, (CE_WARN,
1716 1716                              "  cpu: %d vs %d, dip: 0x%p vs 0x%p\n", orig_cpu,
1717 1717                              vp->v_cpuid, (void *)dip,
1718 1718                              (void *)APIX_GET_DIP(vp)));
1719 1719                          return (NULL);
1720 1720                  }
1721 1721  #endif /* DEBUG */
1722 1722          }
1723 1723  
1724 1724          cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
1725 1725          handle = i_ddi_get_pci_config_handle(dip);
1726 1726          msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1727 1727  
1728 1728          /* MSI Per vector masking is supported. */
1729 1729          if (msi_ctrl & PCI_MSI_PVM_MASK) {
1730 1730                  if (msi_ctrl &  PCI_MSI_64BIT_MASK)
1731 1731                          msi_mask_off = cap_ptr + PCI_MSI_64BIT_MASKBITS;
1732 1732                  else
1733 1733                          msi_mask_off = cap_ptr + PCI_MSI_32BIT_MASK;
1734 1734                  msi_pvm = pci_config_get32(handle, msi_mask_off);
1735 1735                  pci_config_put32(handle, msi_mask_off, (uint32_t)-1);
1736 1736                  APIC_VERBOSE(INTR, (CE_CONT,
1737 1737                      "set_grp: pvm supported.  Mask set to 0x%x\n",
1738 1738                      pci_config_get32(handle, msi_mask_off)));
1739 1739          }
1740 1740  
1741 1741          if ((newp = apix_rebind(vecp, new_cpu, num_vectors)) != NULL)
1742 1742                  *result = 0;
1743 1743  
1744 1744          /* Reenable vectors if per vector masking is supported. */
1745 1745          if (msi_ctrl & PCI_MSI_PVM_MASK) {
1746 1746                  pci_config_put32(handle, msi_mask_off, msi_pvm);
1747 1747                  APIC_VERBOSE(INTR, (CE_CONT,
1748 1748                      "set_grp: pvm supported.  Mask restored to 0x%x\n",
1749 1749                      pci_config_get32(handle, msi_mask_off)));
1750 1750          }
1751 1751  
1752 1752          return (newp);
1753 1753  }
1754 1754  
1755 1755  void
1756 1756  apix_intx_set_vector(int irqno, uint32_t cpuid, uchar_t vector)
1757 1757  {
1758 1758          apic_irq_t *irqp;
1759 1759  
1760 1760          mutex_enter(&airq_mutex);
1761 1761          irqp = apic_irq_table[irqno];
1762 1762          irqp->airq_cpu = cpuid;
1763 1763          irqp->airq_vector = vector;
1764 1764          apic_record_rdt_entry(irqp, irqno);
1765 1765          mutex_exit(&airq_mutex);
1766 1766  }
1767 1767  
1768 1768  apix_vector_t *
1769 1769  apix_intx_get_vector(int irqno)
1770 1770  {
1771 1771          apic_irq_t *irqp;
1772 1772          uint32_t cpuid;
1773 1773          uchar_t vector;
1774 1774  
1775 1775          mutex_enter(&airq_mutex);
1776 1776          irqp = apic_irq_table[irqno & 0xff];
1777 1777          if (IS_IRQ_FREE(irqp) || (irqp->airq_cpu == IRQ_UNINIT)) {
1778 1778                  mutex_exit(&airq_mutex);
1779 1779                  return (NULL);
1780 1780          }
1781 1781          cpuid = irqp->airq_cpu;
1782 1782          vector = irqp->airq_vector;
1783 1783          mutex_exit(&airq_mutex);
1784 1784  
1785 1785          return (xv_vector(cpuid, vector));
1786 1786  }
1787 1787  
1788 1788  /*
1789 1789   * Must called with interrupts disabled and apic_ioapic_lock held
1790 1790   */
1791 1791  void
1792 1792  apix_intx_enable(int irqno)
1793 1793  {
1794 1794          uchar_t ioapicindex, intin;
1795 1795          apic_irq_t *irqp = apic_irq_table[irqno];
1796 1796          ioapic_rdt_t irdt;
1797 1797          apic_cpus_info_t *cpu_infop;
1798 1798          apix_vector_t *vecp = xv_vector(irqp->airq_cpu, irqp->airq_vector);
1799 1799  
1800 1800          ASSERT(LOCK_HELD(&apic_ioapic_lock) && !IS_IRQ_FREE(irqp));
1801 1801  
1802 1802          ioapicindex = irqp->airq_ioapicindex;
1803 1803          intin = irqp->airq_intin_no;
1804 1804          cpu_infop =  &apic_cpus[irqp->airq_cpu];
1805 1805  
1806 1806          irdt.ir_lo = AV_PDEST | AV_FIXED | irqp->airq_rdt_entry;
1807 1807          irdt.ir_hi = cpu_infop->aci_local_id;
1808 1808  
1809 1809          apic_vt_ops->apic_intrmap_alloc_entry(&vecp->v_intrmap_private, NULL,
1810 1810              vecp->v_type, 1, ioapicindex);
1811 1811          apic_vt_ops->apic_intrmap_map_entry(vecp->v_intrmap_private,
1812 1812              (void *)&irdt, vecp->v_type, 1);
1813 1813          apic_vt_ops->apic_intrmap_record_rdt(vecp->v_intrmap_private, &irdt);
1814 1814  
1815 1815          /* write RDT entry high dword - destination */
1816 1816          WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapicindex, intin,
1817 1817              irdt.ir_hi);
1818 1818  
1819 1819          /* Write the vector, trigger, and polarity portion of the RDT */
1820 1820          WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapicindex, intin, irdt.ir_lo);
1821 1821  
1822 1822          vecp->v_state = APIX_STATE_ENABLED;
1823 1823  
1824 1824          APIC_VERBOSE_IOAPIC((CE_CONT, "apix_intx_enable: ioapic 0x%x"
1825 1825              " intin 0x%x rdt_low 0x%x rdt_high 0x%x\n",
1826 1826              ioapicindex, intin, irdt.ir_lo, irdt.ir_hi));
1827 1827  }
1828 1828  
1829 1829  /*
1830 1830   * Must called with interrupts disabled and apic_ioapic_lock held
1831 1831   */
1832 1832  void
1833 1833  apix_intx_disable(int irqno)
1834 1834  {
1835 1835          apic_irq_t *irqp = apic_irq_table[irqno];
1836 1836          int ioapicindex, intin;
1837 1837  
1838 1838          ASSERT(LOCK_HELD(&apic_ioapic_lock) && !IS_IRQ_FREE(irqp));
1839 1839          /*
1840 1840           * The assumption here is that this is safe, even for
1841 1841           * systems with IOAPICs that suffer from the hardware
1842 1842           * erratum because all devices have been quiesced before
1843 1843           * they unregister their interrupt handlers.  If that
1844 1844           * assumption turns out to be false, this mask operation
1845 1845           * can induce the same erratum result we're trying to
1846 1846           * avoid.
1847 1847           */
1848 1848          ioapicindex = irqp->airq_ioapicindex;
1849 1849          intin = irqp->airq_intin_no;
1850 1850          ioapic_write(ioapicindex, APIC_RDT_CMD + 2 * intin, AV_MASK);
1851 1851  
1852 1852          APIC_VERBOSE_IOAPIC((CE_CONT, "apix_intx_disable: ioapic 0x%x"
1853 1853              " intin 0x%x\n", ioapicindex, intin));
1854 1854  }
1855 1855  
1856 1856  void
1857 1857  apix_intx_free(int irqno)
1858 1858  {
1859 1859          apic_irq_t *irqp;
1860 1860  
1861 1861          mutex_enter(&airq_mutex);
1862 1862          irqp = apic_irq_table[irqno];
1863 1863  
1864 1864          if (IS_IRQ_FREE(irqp)) {
1865 1865                  mutex_exit(&airq_mutex);
1866 1866                  return;
1867 1867          }
1868 1868  
1869 1869          irqp->airq_mps_intr_index = FREE_INDEX;
1870 1870          irqp->airq_cpu = IRQ_UNINIT;
1871 1871          irqp->airq_vector = APIX_INVALID_VECT;
1872 1872          mutex_exit(&airq_mutex);
1873 1873  }
1874 1874  
1875 1875  #ifdef DEBUG
1876 1876  int apix_intr_deliver_timeouts = 0;
1877 1877  int apix_intr_rirr_timeouts = 0;
1878 1878  int apix_intr_rirr_reset_failure = 0;
1879 1879  #endif
1880 1880  int apix_max_reps_irr_pending = 10;
1881 1881  
1882 1882  #define GET_RDT_BITS(ioapic, intin, bits)       \
1883 1883          (READ_IOAPIC_RDT_ENTRY_LOW_DWORD((ioapic), (intin)) & (bits))
1884 1884  #define APIX_CHECK_IRR_DELAY    drv_usectohz(5000)
1885 1885  
1886 1886  int
1887 1887  apix_intx_rebind(int irqno, processorid_t cpuid, uchar_t vector)
1888 1888  {
1889 1889          apic_irq_t *irqp = apic_irq_table[irqno];
1890 1890          ulong_t iflag;
1891 1891          int waited, ioapic_ix, intin_no, level, repeats, rdt_entry, masked;
1892 1892  
1893 1893          ASSERT(irqp != NULL);
1894 1894  
1895 1895          iflag = intr_clear();
1896 1896          lock_set(&apic_ioapic_lock);
1897 1897  
1898 1898          ioapic_ix = irqp->airq_ioapicindex;
1899 1899          intin_no = irqp->airq_intin_no;
1900 1900          level = apic_level_intr[irqno];
1901 1901  
1902 1902          /*
1903 1903           * Wait for the delivery status bit to be cleared. This should
1904 1904           * be a very small amount of time.
1905 1905           */
1906 1906          repeats = 0;
1907 1907          do {
1908 1908                  repeats++;
1909 1909  
1910 1910                  for (waited = 0; waited < apic_max_reps_clear_pending;
1911 1911                      waited++) {
1912 1912                          if (GET_RDT_BITS(ioapic_ix, intin_no, AV_PENDING) == 0)
1913 1913                                  break;
1914 1914                  }
1915 1915                  if (!level)
1916 1916                          break;
1917 1917  
1918 1918                  /*
1919 1919                   * Mask the RDT entry for level-triggered interrupts.
1920 1920                   */
1921 1921                  irqp->airq_rdt_entry |= AV_MASK;
1922 1922                  rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1923 1923                      intin_no);
1924 1924                  if ((masked = (rdt_entry & AV_MASK)) == 0) {
1925 1925                          /* Mask it */
1926 1926                          WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix, intin_no,
1927 1927                              AV_MASK | rdt_entry);
1928 1928                  }
1929 1929  
1930 1930                  /*
1931 1931                   * If there was a race and an interrupt was injected
1932 1932                   * just before we masked, check for that case here.
1933 1933                   * Then, unmask the RDT entry and try again.  If we're
1934 1934                   * on our last try, don't unmask (because we want the
1935 1935                   * RDT entry to remain masked for the rest of the
1936 1936                   * function).
1937 1937                   */
1938 1938                  rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1939 1939                      intin_no);
1940 1940                  if ((masked == 0) && ((rdt_entry & AV_PENDING) != 0) &&
1941 1941                      (repeats < apic_max_reps_clear_pending)) {
1942 1942                          /* Unmask it */
1943 1943                          WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1944 1944                              intin_no, rdt_entry & ~AV_MASK);
1945 1945                          irqp->airq_rdt_entry &= ~AV_MASK;
1946 1946                  }
1947 1947          } while ((rdt_entry & AV_PENDING) &&
1948 1948              (repeats < apic_max_reps_clear_pending));
1949 1949  
1950 1950  #ifdef DEBUG
1951 1951          if (GET_RDT_BITS(ioapic_ix, intin_no, AV_PENDING) != 0)
1952 1952                  apix_intr_deliver_timeouts++;
1953 1953  #endif
1954 1954  
1955 1955          if (!level || !APIX_IS_MASK_RDT(apix_mul_ioapic_method))
1956 1956                  goto done;
1957 1957  
1958 1958          /*
1959 1959           * wait for remote IRR to be cleared for level-triggered
1960 1960           * interrupts
1961 1961           */
1962 1962          repeats = 0;
1963 1963          do {
1964 1964                  repeats++;
1965 1965  
1966 1966                  for (waited = 0; waited < apic_max_reps_clear_pending;
1967 1967                      waited++) {
1968 1968                          if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR)
1969 1969                              == 0)
1970 1970                                  break;
1971 1971                  }
1972 1972  
1973 1973                  if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
1974 1974                          lock_clear(&apic_ioapic_lock);
1975 1975                          intr_restore(iflag);
1976 1976  
1977 1977                          delay(APIX_CHECK_IRR_DELAY);
1978 1978  
1979 1979                          iflag = intr_clear();
1980 1980                          lock_set(&apic_ioapic_lock);
1981 1981                  }
1982 1982          } while (repeats < apix_max_reps_irr_pending);
1983 1983  
1984 1984          if (repeats >= apix_max_reps_irr_pending) {
1985 1985  #ifdef DEBUG
1986 1986                  apix_intr_rirr_timeouts++;
1987 1987  #endif
1988 1988  
1989 1989                  /*
1990 1990                   * If we waited and the Remote IRR bit is still not cleared,
1991 1991                   * AND if we've invoked the timeout APIC_REPROGRAM_MAX_TIMEOUTS
1992 1992                   * times for this interrupt, try the last-ditch workaround:
1993 1993                   */
1994 1994                  if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
1995 1995                          /*
1996 1996                           * Trying to clear the bit through normal
1997 1997                           * channels has failed.  So as a last-ditch
1998 1998                           * effort, try to set the trigger mode to
1999 1999                           * edge, then to level.  This has been
2000 2000                           * observed to work on many systems.
2001 2001                           */
2002 2002                          WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2003 2003                              intin_no,
2004 2004                              READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2005 2005                              intin_no) & ~AV_LEVEL);
2006 2006                          WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2007 2007                              intin_no,
2008 2008                              READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2009 2009                              intin_no) | AV_LEVEL);
2010 2010                  }
2011 2011  
2012 2012                  if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
2013 2013  #ifdef DEBUG
2014 2014                          apix_intr_rirr_reset_failure++;
2015 2015  #endif
2016 2016                          lock_clear(&apic_ioapic_lock);
2017 2017                          intr_restore(iflag);
2018 2018                          prom_printf("apix: Remote IRR still "
2019 2019                              "not clear for IOAPIC %d intin %d.\n"
2020 2020                              "\tInterrupts to this pin may cease "
2021 2021                              "functioning.\n", ioapic_ix, intin_no);
2022 2022                          return (1);     /* return failure */
2023 2023                  }
2024 2024          }
2025 2025  
2026 2026  done:
2027 2027          /* change apic_irq_table */
2028 2028          lock_clear(&apic_ioapic_lock);
2029 2029          intr_restore(iflag);
2030 2030          apix_intx_set_vector(irqno, cpuid, vector);
2031 2031          iflag = intr_clear();
2032 2032          lock_set(&apic_ioapic_lock);
2033 2033  
2034 2034          /* reprogramme IO-APIC RDT entry */
2035 2035          apix_intx_enable(irqno);
2036 2036  
2037 2037          lock_clear(&apic_ioapic_lock);
2038 2038          intr_restore(iflag);
2039 2039  
2040 2040          return (0);
2041 2041  }
2042 2042  
2043 2043  static int
2044 2044  apix_intx_get_pending(int irqno)
2045 2045  {
2046 2046          apic_irq_t *irqp;
2047 2047          int intin, ioapicindex, pending;
2048 2048          ulong_t iflag;
2049 2049  
2050 2050          mutex_enter(&airq_mutex);
2051 2051          irqp = apic_irq_table[irqno];
2052 2052          if (IS_IRQ_FREE(irqp)) {
2053 2053                  mutex_exit(&airq_mutex);
2054 2054                  return (0);
2055 2055          }
2056 2056  
2057 2057          /* check IO-APIC delivery status */
2058 2058          intin = irqp->airq_intin_no;
2059 2059          ioapicindex = irqp->airq_ioapicindex;
2060 2060          mutex_exit(&airq_mutex);
2061 2061  
2062 2062          iflag = intr_clear();
2063 2063          lock_set(&apic_ioapic_lock);
2064 2064  
2065 2065          pending = (READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapicindex, intin) &
2066 2066              AV_PENDING) ? 1 : 0;
2067 2067  
2068 2068          lock_clear(&apic_ioapic_lock);
2069 2069          intr_restore(iflag);
2070 2070  
2071 2071          return (pending);
2072 2072  }
2073 2073  
2074 2074  /*
2075 2075   * This function will mask the interrupt on the I/O APIC
2076 2076   */
2077 2077  static void
2078 2078  apix_intx_set_mask(int irqno)
2079 2079  {
2080 2080          int intin, ioapixindex, rdt_entry;
2081 2081          ulong_t iflag;
2082 2082          apic_irq_t *irqp;
2083 2083  
2084 2084          mutex_enter(&airq_mutex);
2085 2085          irqp = apic_irq_table[irqno];
2086 2086  
2087 2087          ASSERT(irqp->airq_mps_intr_index != FREE_INDEX);
2088 2088  
2089 2089          intin = irqp->airq_intin_no;
2090 2090          ioapixindex = irqp->airq_ioapicindex;
2091 2091          mutex_exit(&airq_mutex);
2092 2092  
2093 2093          iflag = intr_clear();
2094 2094          lock_set(&apic_ioapic_lock);
2095 2095  
2096 2096          rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin);
2097 2097  
2098 2098          /* clear mask */
2099 2099          WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin,
2100 2100              (AV_MASK | rdt_entry));
2101 2101  
2102 2102          lock_clear(&apic_ioapic_lock);
2103 2103          intr_restore(iflag);
2104 2104  }
2105 2105  
2106 2106  /*
2107 2107   * This function will clear the mask for the interrupt on the I/O APIC
2108 2108   */
2109 2109  static void
2110 2110  apix_intx_clear_mask(int irqno)
2111 2111  {
2112 2112          int intin, ioapixindex, rdt_entry;
2113 2113          ulong_t iflag;
2114 2114          apic_irq_t *irqp;
2115 2115  
2116 2116          mutex_enter(&airq_mutex);
2117 2117          irqp = apic_irq_table[irqno];
2118 2118  
2119 2119          ASSERT(irqp->airq_mps_intr_index != FREE_INDEX);
2120 2120  
2121 2121          intin = irqp->airq_intin_no;
2122 2122          ioapixindex = irqp->airq_ioapicindex;
2123 2123          mutex_exit(&airq_mutex);
2124 2124  
2125 2125          iflag = intr_clear();
2126 2126          lock_set(&apic_ioapic_lock);
2127 2127  
2128 2128          rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin);
2129 2129  
2130 2130          /* clear mask */
2131 2131          WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin,
2132 2132              ((~AV_MASK) & rdt_entry));
2133 2133  
2134 2134          lock_clear(&apic_ioapic_lock);
2135 2135          intr_restore(iflag);
2136 2136  }
2137 2137  
2138 2138  /*
2139 2139   * For level-triggered interrupt, mask the IRQ line. Mask means
2140 2140   * new interrupts will not be delivered. The interrupt already
2141 2141   * accepted by a local APIC is not affected
2142 2142   */
2143 2143  void
2144 2144  apix_level_intr_pre_eoi(int irq)
2145 2145  {
2146 2146          apic_irq_t *irqp = apic_irq_table[irq];
2147 2147          int apic_ix, intin_ix;
2148 2148  
2149 2149          if (irqp == NULL)
2150 2150                  return;
2151 2151  
2152 2152          ASSERT(apic_level_intr[irq] == TRIGGER_MODE_LEVEL);
2153 2153  
2154 2154          lock_set(&apic_ioapic_lock);
2155 2155  
2156 2156          intin_ix = irqp->airq_intin_no;
2157 2157          apic_ix = irqp->airq_ioapicindex;
2158 2158  
2159 2159          if (irqp->airq_cpu != CPU->cpu_id) {
2160 2160                  if (!APIX_IS_MASK_RDT(apix_mul_ioapic_method))
2161 2161                          ioapic_write_eoi(apic_ix, irqp->airq_vector);
2162 2162                  lock_clear(&apic_ioapic_lock);
2163 2163                  return;
2164 2164          }
2165 2165  
2166 2166          if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_IOXAPIC) {
2167 2167                  /*
2168 2168                   * This is a IOxAPIC and there is EOI register:
2169 2169                   *      Change the vector to reserved unused vector, so that
2170 2170                   *      the EOI from Local APIC won't clear the Remote IRR for
2171 2171                   *      this level trigger interrupt. Instead, we'll manually
2172 2172                   *      clear it in apix_post_hardint() after ISR handling.
2173 2173                   */
2174 2174                  WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2175 2175                      (irqp->airq_rdt_entry & (~0xff)) | APIX_RESV_VECTOR);
2176 2176          } else {
2177 2177                  WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2178 2178                      AV_MASK | irqp->airq_rdt_entry);
2179 2179          }
2180 2180  
2181 2181          lock_clear(&apic_ioapic_lock);
2182 2182  }
2183 2183  
2184 2184  /*
2185 2185   * For level-triggered interrupt, unmask the IRQ line
2186 2186   * or restore the original vector number.
2187 2187   */
2188 2188  void
2189 2189  apix_level_intr_post_dispatch(int irq)
2190 2190  {
2191 2191          apic_irq_t *irqp = apic_irq_table[irq];
2192 2192          int apic_ix, intin_ix;
2193 2193  
2194 2194          if (irqp == NULL)
2195 2195                  return;
2196 2196  
2197 2197          lock_set(&apic_ioapic_lock);
2198 2198  
2199 2199          intin_ix = irqp->airq_intin_no;
2200 2200          apic_ix = irqp->airq_ioapicindex;
2201 2201  
2202 2202          if (APIX_IS_DIRECTED_EOI(apix_mul_ioapic_method)) {
2203 2203                  /*
2204 2204                   * Already sent EOI back to Local APIC.
2205 2205                   * Send EOI to IO-APIC
2206 2206                   */
2207 2207                  ioapic_write_eoi(apic_ix, irqp->airq_vector);
2208 2208          } else {
2209 2209                  /* clear the mask or restore the vector */
2210 2210                  WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2211 2211                      irqp->airq_rdt_entry);
2212 2212  
2213 2213                  /* send EOI to IOxAPIC */
2214 2214                  if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_IOXAPIC)
2215 2215                          ioapic_write_eoi(apic_ix, irqp->airq_vector);
2216 2216          }
2217 2217  
2218 2218          lock_clear(&apic_ioapic_lock);
2219 2219  }
2220 2220  
2221 2221  static int
2222 2222  apix_intx_get_shared(int irqno)
2223 2223  {
2224 2224          apic_irq_t *irqp;
2225 2225          int share;
2226 2226  
2227 2227          mutex_enter(&airq_mutex);
2228 2228          irqp = apic_irq_table[irqno];
2229 2229          if (IS_IRQ_FREE(irqp) || (irqp->airq_cpu == IRQ_UNINIT)) {
2230 2230                  mutex_exit(&airq_mutex);
2231 2231                  return (0);
2232 2232          }
2233 2233          share = irqp->airq_share;
2234 2234          mutex_exit(&airq_mutex);
2235 2235  
2236 2236          return (share);
2237 2237  }
2238 2238  
2239 2239  static void
2240 2240  apix_intx_set_shared(int irqno, int delta)
2241 2241  {
2242 2242          apic_irq_t *irqp;
2243 2243  
2244 2244          mutex_enter(&airq_mutex);
2245 2245          irqp = apic_irq_table[irqno];
2246 2246          if (IS_IRQ_FREE(irqp)) {
2247 2247                  mutex_exit(&airq_mutex);
2248 2248                  return;
2249 2249          }
2250 2250          irqp->airq_share += delta;
2251 2251          mutex_exit(&airq_mutex);
2252 2252  }
2253 2253  
2254 2254  /*
2255 2255   * Setup IRQ table. Return IRQ no or -1 on failure
2256 2256   */
2257 2257  static int
2258 2258  apix_intx_setup(dev_info_t *dip, int inum, int irqno,
2259 2259      struct apic_io_intr *intrp, struct intrspec *ispec, iflag_t *iflagp)
2260 2260  {
2261 2261          int origirq = ispec->intrspec_vec;
2262 2262          int newirq;
2263 2263          short intr_index;
2264 2264          uchar_t ipin, ioapic, ioapicindex;
2265 2265          apic_irq_t *irqp;
2266 2266  
2267 2267          UNREFERENCED_1PARAMETER(inum);
2268 2268  
2269 2269          if (intrp != NULL) {
2270 2270                  intr_index = (short)(intrp - apic_io_intrp);
2271 2271                  ioapic = intrp->intr_destid;
2272 2272                  ipin = intrp->intr_destintin;
2273 2273  
2274 2274                  /* Find ioapicindex. If destid was ALL, we will exit with 0. */
2275 2275                  for (ioapicindex = apic_io_max - 1; ioapicindex; ioapicindex--)
2276 2276                          if (apic_io_id[ioapicindex] == ioapic)
2277 2277                                  break;
2278 2278                  ASSERT((ioapic == apic_io_id[ioapicindex]) ||
2279 2279                      (ioapic == INTR_ALL_APIC));
2280 2280  
2281 2281                  /* check whether this intin# has been used by another irqno */
2282 2282                  if ((newirq = apic_find_intin(ioapicindex, ipin)) != -1)
2283 2283                          return (newirq);
2284 2284  
2285 2285          } else if (iflagp != NULL) {    /* ACPI */
2286 2286                  intr_index = ACPI_INDEX;
2287 2287                  ioapicindex = acpi_find_ioapic(irqno);
2288 2288                  ASSERT(ioapicindex != 0xFF);
2289 2289                  ioapic = apic_io_id[ioapicindex];
2290 2290                  ipin = irqno - apic_io_vectbase[ioapicindex];
2291 2291  
2292 2292                  if (apic_irq_table[irqno] &&
2293 2293                      apic_irq_table[irqno]->airq_mps_intr_index == ACPI_INDEX) {
2294 2294                          ASSERT(apic_irq_table[irqno]->airq_intin_no == ipin &&
2295 2295                              apic_irq_table[irqno]->airq_ioapicindex ==
2296 2296                              ioapicindex);
2297 2297                          return (irqno);
2298 2298                  }
2299 2299  
2300 2300          } else {        /* default configuration */
2301 2301                  intr_index = DEFAULT_INDEX;
2302 2302                  ioapicindex = 0;
2303 2303                  ioapic = apic_io_id[ioapicindex];
2304 2304                  ipin = (uchar_t)irqno;
2305 2305          }
2306 2306  
2307 2307          /* allocate a new IRQ no */
2308 2308          if ((irqp = apic_irq_table[irqno]) == NULL) {
2309 2309                  irqp = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP);
2310 2310                  apic_irq_table[irqno] = irqp;
2311 2311          } else {
2312 2312                  if (irqp->airq_mps_intr_index != FREE_INDEX) {
2313 2313                          newirq = apic_allocate_irq(apic_first_avail_irq);
2314 2314                          if (newirq == -1) {
2315 2315                                  return (-1);
2316 2316                          }
2317 2317                          irqno = newirq;
2318 2318                          irqp = apic_irq_table[irqno];
2319 2319                          ASSERT(irqp != NULL);
2320 2320                  }
2321 2321          }
2322 2322          apic_max_device_irq = max(irqno, apic_max_device_irq);
2323 2323          apic_min_device_irq = min(irqno, apic_min_device_irq);
2324 2324  
2325 2325          irqp->airq_mps_intr_index = intr_index;
2326 2326          irqp->airq_ioapicindex = ioapicindex;
2327 2327          irqp->airq_intin_no = ipin;
2328 2328          irqp->airq_dip = dip;
2329 2329          irqp->airq_origirq = (uchar_t)origirq;
2330 2330          if (iflagp != NULL)
2331 2331                  irqp->airq_iflag = *iflagp;
2332 2332          irqp->airq_cpu = IRQ_UNINIT;
2333 2333          irqp->airq_vector = 0;
2334 2334  
2335 2335          return (irqno);
2336 2336  }
2337 2337  
2338 2338  /*
2339 2339   * Setup IRQ table for non-pci devices. Return IRQ no or -1 on error
2340 2340   */
2341 2341  static int
2342 2342  apix_intx_setup_nonpci(dev_info_t *dip, int inum, int bustype,
2343 2343      struct intrspec *ispec)
2344 2344  {
2345 2345          int irqno = ispec->intrspec_vec;
2346 2346          int newirq, i;
2347 2347          iflag_t intr_flag;
2348 2348          ACPI_SUBTABLE_HEADER    *hp;
2349 2349          ACPI_MADT_INTERRUPT_OVERRIDE *isop;
2350 2350          struct apic_io_intr *intrp;
2351 2351  
2352 2352          if (!apic_enable_acpi || apic_use_acpi_madt_only) {
2353 2353                  int busid;
2354 2354  
2355 2355                  if (bustype == 0)
2356 2356                          bustype = eisa_level_intr_mask ? BUS_EISA : BUS_ISA;
2357 2357  
2358 2358                  /* loop checking BUS_ISA/BUS_EISA */
2359 2359                  for (i = 0; i < 2; i++) {
2360 2360                          if (((busid = apic_find_bus_id(bustype)) != -1) &&
2361 2361                              ((intrp = apic_find_io_intr_w_busid(irqno, busid))
2362 2362                              != NULL)) {
2363 2363                                  return (apix_intx_setup(dip, inum, irqno,
2364 2364                                      intrp, ispec, NULL));
2365 2365                          }
2366 2366                          bustype = (bustype == BUS_EISA) ? BUS_ISA : BUS_EISA;
2367 2367                  }
2368 2368  
2369 2369                  /* fall back to default configuration */
2370 2370                  return (-1);
2371 2371          }
2372 2372  
2373 2373          /* search iso entries first */
2374 2374          if (acpi_iso_cnt != 0) {
2375 2375                  hp = (ACPI_SUBTABLE_HEADER *)acpi_isop;
2376 2376                  i = 0;
2377 2377                  while (i < acpi_iso_cnt) {
2378 2378                          if (hp->Type == ACPI_MADT_TYPE_INTERRUPT_OVERRIDE) {
2379 2379                                  isop = (ACPI_MADT_INTERRUPT_OVERRIDE *) hp;
2380 2380                                  if (isop->Bus == 0 &&
2381 2381                                      isop->SourceIrq == irqno) {
2382 2382                                          newirq = isop->GlobalIrq;
2383 2383                                          intr_flag.intr_po = isop->IntiFlags &
2384 2384                                              ACPI_MADT_POLARITY_MASK;
2385 2385                                          intr_flag.intr_el = (isop->IntiFlags &
2386 2386                                              ACPI_MADT_TRIGGER_MASK) >> 2;
2387 2387                                          intr_flag.bustype = BUS_ISA;
2388 2388  
2389 2389                                          return (apix_intx_setup(dip, inum,
2390 2390                                              newirq, NULL, ispec, &intr_flag));
2391 2391                                  }
2392 2392                                  i++;
2393 2393                          }
2394 2394                          hp = (ACPI_SUBTABLE_HEADER *)(((char *)hp) +
2395 2395                              hp->Length);
2396 2396                  }
2397 2397          }
2398 2398          intr_flag.intr_po = INTR_PO_ACTIVE_HIGH;
2399 2399          intr_flag.intr_el = INTR_EL_EDGE;
2400 2400          intr_flag.bustype = BUS_ISA;
2401 2401          return (apix_intx_setup(dip, inum, irqno, NULL, ispec, &intr_flag));
2402 2402  }
2403 2403  
2404 2404  
2405 2405  /*
2406 2406   * Setup IRQ table for pci devices. Return IRQ no or -1 on error
2407 2407   */
2408 2408  static int
2409 2409  apix_intx_setup_pci(dev_info_t *dip, int inum, int bustype,
2410 2410      struct intrspec *ispec)
2411 2411  {
2412 2412          int busid, devid, pci_irq;
2413 2413          ddi_acc_handle_t cfg_handle;
2414 2414          uchar_t ipin;
2415 2415          iflag_t intr_flag;
2416 2416          struct apic_io_intr *intrp;
2417 2417  
2418 2418          if (acpica_get_bdf(dip, &busid, &devid, NULL) != 0)
2419 2419                  return (-1);
2420 2420  
2421 2421          if (busid == 0 && apic_pci_bus_total == 1)
2422 2422                  busid = (int)apic_single_pci_busid;
2423 2423  
2424 2424          if (pci_config_setup(dip, &cfg_handle) != DDI_SUCCESS)
2425 2425                  return (-1);
2426 2426          ipin = pci_config_get8(cfg_handle, PCI_CONF_IPIN) - PCI_INTA;
2427 2427          pci_config_teardown(&cfg_handle);
2428 2428  
2429 2429          if (apic_enable_acpi && !apic_use_acpi_madt_only) {     /* ACPI */
2430 2430                  if (apic_acpi_translate_pci_irq(dip, busid, devid,
2431 2431                      ipin, &pci_irq, &intr_flag) != ACPI_PSM_SUCCESS)
2432 2432                          return (-1);
2433 2433  
2434 2434                  intr_flag.bustype = (uchar_t)bustype;
2435 2435                  return (apix_intx_setup(dip, inum, pci_irq, NULL, ispec,
2436 2436                      &intr_flag));
2437 2437          }
2438 2438  
2439 2439          /* MP configuration table */
2440 2440          pci_irq = ((devid & 0x1f) << 2) | (ipin & 0x3);
2441 2441          if ((intrp = apic_find_io_intr_w_busid(pci_irq, busid)) == NULL) {
2442 2442                  pci_irq = apic_handle_pci_pci_bridge(dip, devid, ipin, &intrp);
2443 2443                  if (pci_irq == -1)
2444 2444                          return (-1);
2445 2445          }
2446 2446  
2447 2447          return (apix_intx_setup(dip, inum, pci_irq, intrp, ispec, NULL));
2448 2448  }
2449 2449  
2450 2450  /*
2451 2451   * Translate and return IRQ no
2452 2452   */
2453 2453  static int
2454 2454  apix_intx_xlate_irq(dev_info_t *dip, int inum, struct intrspec *ispec)
2455 2455  {
2456 2456          int newirq, irqno = ispec->intrspec_vec;
2457 2457          int parent_is_pci_or_pciex = 0, child_is_pciex = 0;
2458 2458          int bustype = 0, dev_len;
2459 2459          char dev_type[16];
2460 2460  
2461 2461          if (apic_defconf) {
2462 2462                  mutex_enter(&airq_mutex);
2463 2463                  goto defconf;
2464 2464          }
2465 2465  
2466 2466          if ((dip == NULL) || (!apic_irq_translate && !apic_enable_acpi)) {
2467 2467                  mutex_enter(&airq_mutex);
2468 2468                  goto nonpci;
2469 2469          }
2470 2470  
2471 2471          /*
2472 2472           * use ddi_getlongprop_buf() instead of ddi_prop_lookup_string()
2473 2473           * to avoid extra buffer allocation.
2474 2474           */
2475 2475          dev_len = sizeof (dev_type);
2476 2476          if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ddi_get_parent(dip),
2477 2477              DDI_PROP_DONTPASS, "device_type", (caddr_t)dev_type,
2478 2478              &dev_len) == DDI_PROP_SUCCESS) {
2479 2479                  if ((strcmp(dev_type, "pci") == 0) ||
2480 2480                      (strcmp(dev_type, "pciex") == 0))
2481 2481                          parent_is_pci_or_pciex = 1;
2482 2482          }
2483 2483  
2484 2484          if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
2485 2485              DDI_PROP_DONTPASS, "compatible", (caddr_t)dev_type,
2486 2486              &dev_len) == DDI_PROP_SUCCESS) {
2487 2487                  if (strstr(dev_type, "pciex"))
2488 2488                          child_is_pciex = 1;
2489 2489          }
2490 2490  
2491 2491          mutex_enter(&airq_mutex);
2492 2492  
2493 2493          if (parent_is_pci_or_pciex) {
2494 2494                  bustype = child_is_pciex ? BUS_PCIE : BUS_PCI;
2495 2495                  newirq = apix_intx_setup_pci(dip, inum, bustype, ispec);
2496 2496                  if (newirq != -1)
2497 2497                          goto done;
2498 2498                  bustype = 0;
2499 2499          } else if (strcmp(dev_type, "isa") == 0)
2500 2500                  bustype = BUS_ISA;
2501 2501          else if (strcmp(dev_type, "eisa") == 0)
2502 2502                  bustype = BUS_EISA;
2503 2503  
2504 2504  nonpci:
2505 2505          newirq = apix_intx_setup_nonpci(dip, inum, bustype, ispec);
2506 2506          if (newirq != -1)
2507 2507                  goto done;
2508 2508  
2509 2509  defconf:
2510 2510          newirq = apix_intx_setup(dip, inum, irqno, NULL, ispec, NULL);
2511 2511          if (newirq == -1) {
2512 2512                  mutex_exit(&airq_mutex);
2513 2513                  return (-1);
2514 2514          }
2515 2515  done:
2516 2516          ASSERT(apic_irq_table[newirq]);
2517 2517          mutex_exit(&airq_mutex);
2518 2518          return (newirq);
2519 2519  }
2520 2520  
2521 2521  static int
2522 2522  apix_intx_alloc_vector(dev_info_t *dip, int inum, struct intrspec *ispec)
2523 2523  {
2524 2524          int irqno;
2525 2525          apix_vector_t *vecp;
2526 2526  
2527 2527          if ((irqno = apix_intx_xlate_irq(dip, inum, ispec)) == -1)
2528 2528                  return (0);
2529 2529  
2530 2530          if ((vecp = apix_alloc_intx(dip, inum, irqno)) == NULL)
2531 2531                  return (0);
2532 2532  
2533 2533          DDI_INTR_IMPLDBG((CE_CONT, "apix_intx_alloc_vector: dip=0x%p name=%s "
2534 2534              "irqno=0x%x cpuid=%d vector=0x%x\n",
2535 2535              (void *)dip, ddi_driver_name(dip), irqno,
2536 2536              vecp->v_cpuid, vecp->v_vector));
2537 2537  
2538 2538          return (1);
2539 2539  }
2540 2540  
2541 2541  /*
2542 2542   * Return the vector number if the translated IRQ for this device
2543 2543   * has a vector mapping setup. If no IRQ setup exists or no vector is
2544 2544   * allocated to it then return 0.
2545 2545   */
2546 2546  static apix_vector_t *
2547 2547  apix_intx_xlate_vector(dev_info_t *dip, int inum, struct intrspec *ispec)
2548 2548  {
2549 2549          int irqno;
2550 2550          apix_vector_t *vecp;
2551 2551  
2552 2552          /* get the IRQ number */
2553 2553          if ((irqno = apix_intx_xlate_irq(dip, inum, ispec)) == -1)
2554 2554                  return (NULL);
2555 2555  
2556 2556          /* get the vector number if a vector is allocated to this irqno */
2557 2557          vecp = apix_intx_get_vector(irqno);
2558 2558  
2559 2559          return (vecp);
2560 2560  }
2561 2561  
2562 2562  /* stub function */
2563 2563  int
2564 2564  apix_loaded(void)
2565 2565  {
2566 2566          return (apix_is_enabled);
2567 2567  }
  
    | ↓ open down ↓ | 2504 lines elided | ↑ open up ↑ | 
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX