illumos Wdiff usr/src/uts/sun4/os/intr.c

Print this page

OS-2366 ddi_periodic_add(9F) is entirely rubbish (MORE)

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/sun4/os/intr.c
          +++ new/usr/src/uts/sun4/os/intr.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  /*
  26   26   * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
  27   27   */
  28   28  
  29   29  #include <sys/sysmacros.h>
  30   30  #include <sys/stack.h>
  31   31  #include <sys/cpuvar.h>
  32   32  #include <sys/ivintr.h>
  33   33  #include <sys/intreg.h>
  34   34  #include <sys/membar.h>
  35   35  #include <sys/kmem.h>
  36   36  #include <sys/intr.h>
  37   37  #include <sys/sunddi.h>
  38   38  #include <sys/sunndi.h>

↓ open down ↓

38 lines elided

↑ open up ↑

  39   39  #include <sys/cmn_err.h>
  40   40  #include <sys/privregs.h>
  41   41  #include <sys/systm.h>
  42   42  #include <sys/archsystm.h>
  43   43  #include <sys/machsystm.h>
  44   44  #include <sys/x_call.h>
  45   45  #include <vm/seg_kp.h>
  46   46  #include <sys/debug.h>
  47   47  #include <sys/cyclic.h>
  48   48  #include <sys/kdi_impl.h>
  49      -#include <sys/ddi_timer.h>
       49 +#include <sys/ddi_periodic.h>
  50   50  
  51   51  #include <sys/cpu_sgnblk_defs.h>
  52   52  
  53   53  /* Global locks which protect the interrupt distribution lists */
  54   54  static kmutex_t intr_dist_lock;
  55   55  static kmutex_t intr_dist_cpu_lock;
  56   56  
  57   57  /* Head of the interrupt distribution lists */
  58   58  static struct intr_dist *intr_dist_head = NULL;
  59   59  static struct intr_dist *intr_dist_whead = NULL;

  60   60  
  61   61  static uint64_t siron_inum[DDI_IPL_10]; /* software interrupt numbers */
  62   62  uint64_t *siron_cpu_inum = NULL;
  63   63  uint64_t siron_poke_cpu_inum;
  64   64  static int siron_cpu_setup(cpu_setup_t, int, void *);
  65   65  extern uint_t softlevel1();
  66   66  
  67   67  static uint64_t siron1_inum; /* backward compatibility */
  68   68  uint64_t poke_cpu_inum;
  69   69  uint_t poke_cpu_intr(caddr_t arg1, caddr_t arg2);
  70   70  uint_t siron_poke_cpu_intr(caddr_t arg1, caddr_t arg2);
  71   71  
  72   72  /*
  73   73   * Variable to enable/disable printing a message when an invalid vecintr
  74   74   * is received.
  75   75   */
  76   76  uint_t ignore_invalid_vecintr = 0;
  77   77  
  78   78  /*
  79   79   * Note:-
  80   80   * siron_pending was originally created to prevent a resource over consumption
  81   81   * bug in setsoftint(exhaustion of interrupt pool free list).
  82   82   * It's original intention is obsolete with the use of iv_pending in
  83   83   * setsoftint. However, siron_pending stayed around, acting as a second
  84   84   * gatekeeper preventing soft interrupts from being queued. In this capacity,
  85   85   * it can lead to hangs on MP systems, where due to global visibility issues
  86   86   * it can end up set while iv_pending is reset, preventing soft interrupts from
  87   87   * ever being processed. In addition to its gatekeeper role, init_intr also
  88   88   * uses it to flag the situation where siron() was called before siron_inum has
  89   89   * been defined.
  90   90   *
  91   91   * siron() does not need an extra gatekeeper; any cpu that wishes should be
  92   92   * allowed to queue a soft interrupt. It is softint()'s job to ensure
  93   93   * correct handling of the queues. Therefore, siron_pending has been
  94   94   * stripped of its gatekeeper task, retaining only its intr_init job, where
  95   95   * it indicates that there is a pending need to call siron().
  96   96   */
  97   97  static int siron_pending[DDI_IPL_10]; /* software interrupt pending flags */
  98   98  static int siron1_pending; /* backward compatibility */
  99   99  
 100  100  int intr_policy = INTR_WEIGHTED_DIST;   /* interrupt distribution policy */
 101  101  int intr_dist_debug = 0;
 102  102  int32_t intr_dist_weight_max = 1;
 103  103  int32_t intr_dist_weight_maxmax = 1000;
 104  104  int intr_dist_weight_maxfactor = 2;
 105  105  #define INTR_DEBUG(args) if (intr_dist_debug) cmn_err args
 106  106  
 107  107  /*
 108  108   * intr_init() - Interrupt initialization
 109  109   *      Initialize the system's interrupt vector table.
 110  110   */
 111  111  void
 112  112  intr_init(cpu_t *cp)
 113  113  {
 114  114          int i;
 115  115          extern uint_t softlevel1();
 116  116  
 117  117          init_ivintr();
 118  118          REGISTER_BBUS_INTR();
 119  119  
 120  120          /*
 121  121           * Register these software interrupts for ddi timer.
 122  122           * Software interrupts up to the level 10 are supported.
 123  123           */
 124  124          for (i = DDI_IPL_1; i <= DDI_IPL_10; i++) {
 125  125                  siron_inum[i - 1] = add_softintr(i,
 126  126                      (softintrfunc)ddi_periodic_softintr,
 127  127                      (caddr_t)(uintptr_t)(i), SOFTINT_ST);
 128  128          }
 129  129  
 130  130          siron1_inum = add_softintr(PIL_1, softlevel1, 0, SOFTINT_ST);
 131  131          poke_cpu_inum = add_softintr(PIL_13, poke_cpu_intr, 0, SOFTINT_MT);
 132  132          siron_poke_cpu_inum = add_softintr(PIL_13,
 133  133              siron_poke_cpu_intr, 0, SOFTINT_MT);
 134  134          cp->cpu_m.poke_cpu_outstanding = B_FALSE;
 135  135  
 136  136          mutex_init(&intr_dist_lock, NULL, MUTEX_DEFAULT, NULL);
 137  137          mutex_init(&intr_dist_cpu_lock, NULL, MUTEX_DEFAULT, NULL);
 138  138  
 139  139          /*
 140  140           * A soft interrupt may have been requested prior to the initialization
 141  141           * of soft interrupts.  Soft interrupts can't be dispatched until after
 142  142           * init_intr(), so we have to wait until now before we can dispatch the
 143  143           * pending soft interrupt (if any).
 144  144           */
 145  145          for (i = DDI_IPL_1; i <= DDI_IPL_10; i++) {
 146  146                  if (siron_pending[i-1]) {
 147  147                          siron_pending[i-1] = 0;
 148  148                          sir_on(i);
 149  149                  }
 150  150          }
 151  151          if (siron1_pending) {
 152  152                  siron1_pending = 0;
 153  153                  siron();
 154  154          }
 155  155  }
 156  156  
 157  157  /*
 158  158   * poke_cpu_intr - fall through when poke_cpu calls
 159  159   */
 160  160  /* ARGSUSED */
 161  161  uint_t
 162  162  poke_cpu_intr(caddr_t arg1, caddr_t arg2)
 163  163  {
 164  164          CPU->cpu_m.poke_cpu_outstanding = B_FALSE;
 165  165          membar_stld_stst();
 166  166          return (1);
 167  167  }
 168  168  
 169  169  /*
 170  170   * Trigger software interrupts dedicated to ddi timer.
 171  171   */
 172  172  void
 173  173  sir_on(int level)
 174  174  {
 175  175          ASSERT(level >= DDI_IPL_1 && level <= DDI_IPL_10);
 176  176          if (siron_inum[level-1])
 177  177                  setsoftint(siron_inum[level-1]);
 178  178          else
 179  179                  siron_pending[level-1] = 1;
 180  180  }
 181  181  
 182  182  /*
 183  183   * kmdb uses siron (and thus setsoftint) while the world is stopped in order to
 184  184   * inform its driver component that there's work to be done.  We need to keep
 185  185   * DTrace from instrumenting kmdb's siron and setsoftint.  We duplicate siron,
 186  186   * giving kmdb's version a kdi_ prefix to keep DTrace at bay.  The
 187  187   * implementation of setsoftint is complicated enough that we don't want to
 188  188   * duplicate it, but at the same time we don't want to preclude tracing either.
 189  189   * The meat of setsoftint() therefore goes into kdi_setsoftint, with
 190  190   * setsoftint() implemented as a wrapper.  This allows tracing, while still
 191  191   * providing a way for kmdb to sneak in unmolested.
 192  192   */
 193  193  void
 194  194  kdi_siron(void)
 195  195  {
 196  196          if (siron1_inum != 0)
 197  197                  kdi_setsoftint(siron1_inum);
 198  198          else
 199  199                  siron1_pending = 1;
 200  200  }
 201  201  
 202  202  void
 203  203  setsoftint(uint64_t inum)
 204  204  {
 205  205          kdi_setsoftint(inum);
 206  206  }
 207  207  
 208  208  /*
 209  209   * Generates softlevel1 interrupt on current CPU if it
 210  210   * is not pending already.
 211  211   */
 212  212  void
 213  213  siron(void)
 214  214  {
 215  215          uint64_t inum;
 216  216  
 217  217          if (siron1_inum != 0) {
 218  218                  /*
 219  219                   * Once siron_cpu_inum has been allocated, we can
 220  220                   * use per-CPU siron inum.
 221  221                   */
 222  222                  if (siron_cpu_inum && siron_cpu_inum[CPU->cpu_id] != 0)
 223  223                          inum = siron_cpu_inum[CPU->cpu_id];
 224  224                  else
 225  225                          inum = siron1_inum;
 226  226  
 227  227                  setsoftint(inum);
 228  228          } else
 229  229                  siron1_pending = 1;
 230  230  }
 231  231  
 232  232  
 233  233  static void
 234  234  siron_init(void)
 235  235  {
 236  236          /*
 237  237           * We just allocate memory for per-cpu siron right now. Rest of
 238  238           * the work is done when CPU is configured.
 239  239           */
 240  240          siron_cpu_inum = kmem_zalloc(sizeof (uint64_t) * NCPU, KM_SLEEP);
 241  241  }
 242  242  
 243  243  /*
 244  244   * This routine creates per-CPU siron inum for CPUs which are
 245  245   * configured during boot.
 246  246   */
 247  247  void
 248  248  siron_mp_init()
 249  249  {
 250  250          cpu_t *c;
 251  251  
 252  252          /*
 253  253           * Get the memory for per-CPU siron inums
 254  254           */
 255  255          siron_init();
 256  256  
 257  257          mutex_enter(&cpu_lock);
 258  258          c = cpu_list;
 259  259          do {
 260  260                  (void) siron_cpu_setup(CPU_CONFIG, c->cpu_id, NULL);
 261  261          } while ((c = c->cpu_next) != cpu_list);
 262  262  
 263  263          register_cpu_setup_func(siron_cpu_setup, NULL);
 264  264          mutex_exit(&cpu_lock);
 265  265  }
 266  266  
 267  267  /*
 268  268   * siron_poke_cpu_intr - cross-call handler.
 269  269   */
 270  270  /* ARGSUSED */
 271  271  uint_t
 272  272  siron_poke_cpu_intr(caddr_t arg1, caddr_t arg2)
 273  273  {
 274  274          /* generate level1 softint */
 275  275          siron();
 276  276          return (1);
 277  277  }
 278  278  
 279  279  /*
 280  280   * This routine generates a cross-call on target CPU(s).
 281  281   */
 282  282  void
 283  283  siron_poke_cpu(cpuset_t poke)
 284  284  {
 285  285          int cpuid = CPU->cpu_id;
 286  286  
 287  287          if (CPU_IN_SET(poke, cpuid)) {
 288  288                  siron();
 289  289                  CPUSET_DEL(poke, cpuid);
 290  290                  if (CPUSET_ISNULL(poke))
 291  291                          return;
 292  292          }
 293  293  
 294  294          xt_some(poke, setsoftint_tl1, siron_poke_cpu_inum, 0);
 295  295  }
 296  296  
 297  297  /*
 298  298   * This callback function allows us to create per-CPU siron inum.
 299  299   */
 300  300  /* ARGSUSED */
 301  301  static int
 302  302  siron_cpu_setup(cpu_setup_t what, int id, void *arg)
 303  303  {
 304  304          cpu_t *cp = cpu[id];
 305  305  
 306  306          ASSERT(MUTEX_HELD(&cpu_lock));
 307  307          ASSERT(cp != NULL);
 308  308  
 309  309          switch (what) {
 310  310          case CPU_CONFIG:
 311  311                  siron_cpu_inum[cp->cpu_id] = add_softintr(PIL_1,
 312  312                      (softintrfunc)softlevel1, 0, SOFTINT_ST);
 313  313                  break;
 314  314          case CPU_UNCONFIG:
 315  315                  (void) rem_softintr(siron_cpu_inum[cp->cpu_id]);
 316  316                  siron_cpu_inum[cp->cpu_id] = 0;
 317  317                  break;
 318  318          default:
 319  319                  break;
 320  320          }
 321  321  
 322  322          return (0);
 323  323  }
 324  324  
 325  325  /*
 326  326   * no_ivintr()
 327  327   *      called by setvecint_tl1() through sys_trap()
 328  328   *      vector interrupt received but not valid or not
 329  329   *      registered in intr_vec_table
 330  330   *      considered as a spurious mondo interrupt
 331  331   */
 332  332  /* ARGSUSED */
 333  333  void
 334  334  no_ivintr(struct regs *rp, int inum, int pil)
 335  335  {
 336  336          if (!ignore_invalid_vecintr)
 337  337                  cmn_err(CE_WARN, "invalid vector intr: number 0x%x, pil 0x%x",
 338  338                      inum, pil);
 339  339  
 340  340  #ifdef DEBUG_VEC_INTR
 341  341          prom_enter_mon();
 342  342  #endif /* DEBUG_VEC_INTR */
 343  343  }
 344  344  
 345  345  void
 346  346  intr_dequeue_req(uint_t pil, uint64_t inum)
 347  347  {
 348  348          intr_vec_t      *iv, *next, *prev;
 349  349          struct machcpu  *mcpu;
 350  350          uint32_t        clr;
 351  351          processorid_t   cpu_id;
 352  352          extern uint_t   getpstate(void);
 353  353  
 354  354          ASSERT((getpstate() & PSTATE_IE) == 0);
 355  355  
 356  356          mcpu = &CPU->cpu_m;
 357  357          cpu_id = CPU->cpu_id;
 358  358  
 359  359          iv = (intr_vec_t *)inum;
 360  360          prev = NULL;
 361  361          next = mcpu->intr_head[pil];
 362  362  
 363  363          /* Find a matching entry in the list */
 364  364          while (next != NULL) {
 365  365                  if (next == iv)
 366  366                          break;
 367  367                  prev = next;
 368  368                  next = IV_GET_PIL_NEXT(next, cpu_id);
 369  369          }
 370  370  
 371  371          if (next != NULL) {
 372  372                  intr_vec_t      *next_iv = IV_GET_PIL_NEXT(next, cpu_id);
 373  373  
 374  374                  /* Remove entry from list */
 375  375                  if (prev != NULL)
 376  376                          IV_SET_PIL_NEXT(prev, cpu_id, next_iv); /* non-head */
 377  377                  else
 378  378                          mcpu->intr_head[pil] = next_iv; /* head */
 379  379  
 380  380                  if (next_iv == NULL)
 381  381                          mcpu->intr_tail[pil] = prev; /* tail */
 382  382          }
 383  383  
 384  384          /* Clear pending interrupts at this level if the list is empty */
 385  385          if (mcpu->intr_head[pil] == NULL) {
 386  386                  clr = 1 << pil;
 387  387                  if (pil == PIL_14)
 388  388                          clr |= (TICK_INT_MASK | STICK_INT_MASK);
 389  389                  wr_clr_softint(clr);
 390  390          }
 391  391  }
 392  392  
 393  393  
 394  394  /*
 395  395   * Send a directed interrupt of specified interrupt number id to a cpu.
 396  396   */
 397  397  void
 398  398  send_dirint(
 399  399          int cpuix,              /* cpu to be interrupted */
 400  400          int intr_id)            /* interrupt number id */
 401  401  {
 402  402          xt_one(cpuix, setsoftint_tl1, intr_id, 0);
 403  403  }
 404  404  
 405  405  /*
 406  406   * Take the specified CPU out of participation in interrupts.
 407  407   *      Called by p_online(2) when a processor is being taken off-line.
 408  408   *      This allows interrupt threads being handled on the processor to
 409  409   *      complete before the processor is idled.
 410  410   */
 411  411  int
 412  412  cpu_disable_intr(struct cpu *cp)
 413  413  {
 414  414          ASSERT(MUTEX_HELD(&cpu_lock));
 415  415  
 416  416          /*
 417  417           * Turn off the CPU_ENABLE flag before calling the redistribution
 418  418           * function, since it checks for this in the cpu flags.
 419  419           */
 420  420          cp->cpu_flags &= ~CPU_ENABLE;
 421  421  
 422  422          intr_redist_all_cpus();
 423  423  
 424  424          return (0);
 425  425  }
 426  426  
 427  427  /*
 428  428   * Allow the specified CPU to participate in interrupts.
 429  429   *      Called by p_online(2) if a processor could not be taken off-line
 430  430   *      because of bound threads, in order to resume processing interrupts.
 431  431   *      Also called after starting a processor.
 432  432   */
 433  433  void
 434  434  cpu_enable_intr(struct cpu *cp)
 435  435  {
 436  436          ASSERT(MUTEX_HELD(&cpu_lock));
 437  437  
 438  438          cp->cpu_flags |= CPU_ENABLE;
 439  439  
 440  440          intr_redist_all_cpus();
 441  441  }
 442  442  
 443  443  /*
 444  444   * Add function to callback list for intr_redist_all_cpus.  We keep two lists,
 445  445   * one for weighted callbacks and one for normal callbacks. Weighted callbacks
 446  446   * are issued to redirect interrupts of a specified weight, from heavy to
 447  447   * light.  This allows all the interrupts of a given weight to be redistributed
 448  448   * for all weighted nexus drivers prior to those of less weight.
 449  449   */
 450  450  static void
 451  451  intr_dist_add_list(struct intr_dist **phead, void (*func)(void *), void *arg)
 452  452  {
 453  453          struct intr_dist *new = kmem_alloc(sizeof (*new), KM_SLEEP);
 454  454          struct intr_dist *iptr;
 455  455          struct intr_dist **pptr;
 456  456  
 457  457          ASSERT(func);
 458  458          new->func = func;
 459  459          new->arg = arg;
 460  460          new->next = NULL;
 461  461  
 462  462          /* Add to tail so that redistribution occurs in original order. */
 463  463          mutex_enter(&intr_dist_lock);
 464  464          for (iptr = *phead, pptr = phead; iptr != NULL;
 465  465              pptr = &iptr->next, iptr = iptr->next) {
 466  466                  /* check for problems as we locate the tail */
 467  467                  if ((iptr->func == func) && (iptr->arg == arg)) {
 468  468                          cmn_err(CE_PANIC, "intr_dist_add_list(): duplicate");
 469  469                          /*NOTREACHED*/
 470  470                  }
 471  471          }
 472  472          *pptr = new;
 473  473  
 474  474          mutex_exit(&intr_dist_lock);
 475  475  }
 476  476  
 477  477  void
 478  478  intr_dist_add(void (*func)(void *), void *arg)
 479  479  {
 480  480          intr_dist_add_list(&intr_dist_head, (void (*)(void *))func, arg);
 481  481  }
 482  482  
 483  483  void
 484  484  intr_dist_add_weighted(void (*func)(void *, int32_t, int32_t), void *arg)
 485  485  {
 486  486          intr_dist_add_list(&intr_dist_whead, (void (*)(void *))func, arg);
 487  487  }
 488  488  
 489  489  /*
 490  490   * Search for the interrupt distribution structure with the specified
 491  491   * mondo vec reg in the interrupt distribution list. If a match is found,
 492  492   * then delete the entry from the list. The caller is responsible for
 493  493   * modifying the mondo vector registers.
 494  494   */
 495  495  static void
 496  496  intr_dist_rem_list(struct intr_dist **headp, void (*func)(void *), void *arg)
 497  497  {
 498  498          struct intr_dist *iptr;
 499  499          struct intr_dist **vect;
 500  500  
 501  501          mutex_enter(&intr_dist_lock);
 502  502          for (iptr = *headp, vect = headp;
 503  503              iptr != NULL; vect = &iptr->next, iptr = iptr->next) {
 504  504                  if ((iptr->func == func) && (iptr->arg == arg)) {
 505  505                          *vect = iptr->next;
 506  506                          kmem_free(iptr, sizeof (struct intr_dist));
 507  507                          mutex_exit(&intr_dist_lock);
 508  508                          return;
 509  509                  }
 510  510          }
 511  511  
 512  512          if (!panicstr)
 513  513                  cmn_err(CE_PANIC, "intr_dist_rem_list: not found");
 514  514          mutex_exit(&intr_dist_lock);
 515  515  }
 516  516  
 517  517  void
 518  518  intr_dist_rem(void (*func)(void *), void *arg)
 519  519  {
 520  520          intr_dist_rem_list(&intr_dist_head, (void (*)(void *))func, arg);
 521  521  }
 522  522  
 523  523  void
 524  524  intr_dist_rem_weighted(void (*func)(void *, int32_t, int32_t), void *arg)
 525  525  {
 526  526          intr_dist_rem_list(&intr_dist_whead, (void (*)(void *))func, arg);
 527  527  }
 528  528  
 529  529  /*
 530  530   * Initiate interrupt redistribution.  Redistribution improves the isolation
 531  531   * associated with interrupt weights by ordering operations from heavy weight
 532  532   * to light weight.  When a CPUs orientation changes relative to interrupts,
 533  533   * there is *always* a redistribution to accommodate this change (call to
 534  534   * intr_redist_all_cpus()).  As devices (not CPUs) attach/detach it is possible
 535  535   * that a redistribution could improve the quality of an initialization. For
 536  536   * example, if you are not using a NIC it may not be attached with s10 (devfs).
 537  537   * If you then configure the NIC (ifconfig), this may cause the NIC to attach
 538  538   * and plumb interrupts.  The CPU assignment for the NIC's interrupts is
 539  539   * occurring late, so optimal "isolation" relative to weight is not occurring.
 540  540   * The same applies to detach, although in this case doing the redistribution
 541  541   * might improve "spread" for medium weight devices since the "isolation" of
 542  542   * a higher weight device may no longer be present.
 543  543   *
 544  544   * NB: We should provide a utility to trigger redistribution (ala "intradm -r").
 545  545   *
 546  546   * NB: There is risk associated with automatically triggering execution of the
 547  547   * redistribution code at arbitrary times. The risk comes from the fact that
 548  548   * there is a lot of low-level hardware interaction associated with a
 549  549   * redistribution.  At some point we may want this code to perform automatic
 550  550   * redistribution (redistribution thread; trigger timeout when add/remove
 551  551   * weight delta is large enough, and call cv_signal from timeout - causing
 552  552   * thead to call i_ddi_intr_redist_all_cpus()) but this is considered too
 553  553   * risky at this time.
 554  554   */
 555  555  void
 556  556  i_ddi_intr_redist_all_cpus()
 557  557  {
 558  558          mutex_enter(&cpu_lock);
 559  559          INTR_DEBUG((CE_CONT, "intr_dist: i_ddi_intr_redist_all_cpus\n"));
 560  560          intr_redist_all_cpus();
 561  561          mutex_exit(&cpu_lock);
 562  562  }
 563  563  
 564  564  /*
 565  565   * Redistribute all interrupts
 566  566   *
 567  567   * This function redistributes all interrupting devices, running the
 568  568   * parent callback functions for each node.
 569  569   */
 570  570  void
 571  571  intr_redist_all_cpus(void)
 572  572  {
 573  573          struct cpu *cp;
 574  574          struct intr_dist *iptr;
 575  575          int32_t weight, max_weight;
 576  576  
 577  577          ASSERT(MUTEX_HELD(&cpu_lock));
 578  578          mutex_enter(&intr_dist_lock);
 579  579  
 580  580          /*
 581  581           * zero cpu_intr_weight on all cpus - it is safe to traverse
 582  582           * cpu_list since we hold cpu_lock.
 583  583           */
 584  584          cp = cpu_list;
 585  585          do {
 586  586                  cp->cpu_intr_weight = 0;
 587  587          } while ((cp = cp->cpu_next) != cpu_list);
 588  588  
 589  589          /*
 590  590           * Assume that this redistribution may encounter a device weight
 591  591           * via driver.conf tuning of "ddi-intr-weight" that is at most
 592  592           * intr_dist_weight_maxfactor times larger.
 593  593           */
 594  594          max_weight = intr_dist_weight_max * intr_dist_weight_maxfactor;
 595  595          if (max_weight > intr_dist_weight_maxmax)
 596  596                  max_weight = intr_dist_weight_maxmax;
 597  597          intr_dist_weight_max = 1;
 598  598  
 599  599          INTR_DEBUG((CE_CONT, "intr_dist: "
 600  600              "intr_redist_all_cpus: %d-0\n", max_weight));
 601  601  
 602  602          /*
 603  603           * Redistribute weighted, from heavy to light.  The callback that
 604  604           * specifies a weight equal to weight_max should redirect all
 605  605           * interrupts of weight weight_max or greater [weight_max, inf.).
 606  606           * Interrupts of lesser weight should be processed on the call with
 607  607           * the matching weight. This allows all the heaver weight interrupts
 608  608           * on all weighted busses (multiple pci busses) to be redirected prior
 609  609           * to any lesser weight interrupts.
 610  610           */
 611  611          for (weight = max_weight; weight >= 0; weight--)
 612  612                  for (iptr = intr_dist_whead; iptr != NULL; iptr = iptr->next)
 613  613                          ((void (*)(void *, int32_t, int32_t))iptr->func)
 614  614                              (iptr->arg, max_weight, weight);
 615  615  
 616  616          /* redistribute normal (non-weighted) interrupts */
 617  617          for (iptr = intr_dist_head; iptr != NULL; iptr = iptr->next)
 618  618                  ((void (*)(void *))iptr->func)(iptr->arg);
 619  619          mutex_exit(&intr_dist_lock);
 620  620  }
 621  621  
 622  622  void
 623  623  intr_redist_all_cpus_shutdown(void)
 624  624  {
 625  625          intr_policy = INTR_CURRENT_CPU;
 626  626          intr_redist_all_cpus();
 627  627  }
 628  628  
 629  629  /*
 630  630   * Determine what CPU to target, based on interrupt policy.
 631  631   *
 632  632   * INTR_FLAT_DIST: hold a current CPU pointer in a static variable and
 633  633   *      advance through interrupt enabled cpus (round-robin).
 634  634   *
 635  635   * INTR_WEIGHTED_DIST: search for an enabled CPU with the lowest
 636  636   *      cpu_intr_weight, round robin when all equal.
 637  637   *
 638  638   *      Weighted interrupt distribution provides two things: "spread" of weight
 639  639   *      (associated with algorithm itself) and "isolation" (associated with a
 640  640   *      particular device weight). A redistribution is what provides optimal
 641  641   *      "isolation" of heavy weight interrupts, optimal "spread" of weight
 642  642   *      (relative to what came before) is always occurring.
 643  643   *
 644  644   *      An interrupt weight is a subjective number that represents the
 645  645   *      percentage of a CPU required to service a device's interrupts: the
 646  646   *      default weight is 0% (however the algorithm still maintains
 647  647   *      round-robin), a network interface controller (NIC) may have a large
 648  648   *      weight (35%). Interrupt weight only has meaning relative to the
 649  649   *      interrupt weight of other devices: a CPU can be weighted more than
 650  650   *      100%, and a single device might consume more than 100% of a CPU.
 651  651   *
 652  652   *      A coarse interrupt weight can be defined by the parent nexus driver
 653  653   *      based on bus specific information, like pci class codes. A nexus
 654  654   *      driver that supports device interrupt weighting for its children
 655  655   *      should call intr_dist_cpuid_add/rem_device_weight(), which adds
 656  656   *      and removes the weight of a device from the CPU that an interrupt
 657  657   *      is directed at.  The quality of initialization improves when the
 658  658   *      device interrupt weights more accuracy reflect actual run-time weights,
 659  659   *      and as the assignments are ordered from is heavy to light.
 660  660   *
 661  661   *      The implementation also supports interrupt weight being specified in
 662  662   *      driver.conf files via the property "ddi-intr-weight", which takes
 663  663   *      precedence over the nexus supplied weight.  This support is added to
 664  664   *      permit possible tweaking in the product in response to customer
 665  665   *      problems. This is not a formal or committed interface.
 666  666   *
 667  667   *      While a weighted approach chooses the CPU providing the best spread
 668  668   *      given past weights, less than optimal isolation can result in cases
 669  669   *      where heavy weight devices show up last. The nexus driver's interrupt
 670  670   *      redistribution logic should use intr_dist_add/rem_weighted so that
 671  671   *      interrupts can be redistributed heavy first for optimal isolation.
 672  672   */
 673  673  uint32_t
 674  674  intr_dist_cpuid(void)
 675  675  {
 676  676          static struct cpu       *curr_cpu;
 677  677          struct cpu              *start_cpu;
 678  678          struct cpu              *new_cpu;
 679  679          struct cpu              *cp;
 680  680          int                     cpuid = -1;
 681  681  
 682  682          /* Establish exclusion for curr_cpu and cpu_intr_weight manipulation */
 683  683          mutex_enter(&intr_dist_cpu_lock);
 684  684  
 685  685          switch (intr_policy) {
 686  686          case INTR_CURRENT_CPU:
 687  687                  cpuid = CPU->cpu_id;
 688  688                  break;
 689  689  
 690  690          case INTR_BOOT_CPU:
 691  691                  panic("INTR_BOOT_CPU no longer supported.");
 692  692                  /*NOTREACHED*/
 693  693  
 694  694          case INTR_FLAT_DIST:
 695  695          case INTR_WEIGHTED_DIST:
 696  696          default:
 697  697                  /*
 698  698                   * Ensure that curr_cpu is valid - cpu_next will be NULL if
 699  699                   * the cpu has been deleted (cpu structs are never freed).
 700  700                   */
 701  701                  if (curr_cpu == NULL || curr_cpu->cpu_next == NULL)
 702  702                          curr_cpu = CPU;
 703  703  
 704  704                  /*
 705  705                   * Advance to online CPU after curr_cpu (round-robin). For
 706  706                   * INTR_WEIGHTED_DIST we choose the cpu with the lightest
 707  707                   * weight.  For a nexus that does not support weight the
 708  708                   * default weight of zero is used. We degrade to round-robin
 709  709                   * behavior among equal weightes.  The default weight is zero
 710  710                   * and round-robin behavior continues.
 711  711                   *
 712  712                   * Disable preemption while traversing cpu_next_onln to
 713  713                   * ensure the list does not change.  This works because
 714  714                   * modifiers of this list and other lists in a struct cpu
 715  715                   * call pause_cpus() before making changes.
 716  716                   */
 717  717                  kpreempt_disable();
 718  718                  cp = start_cpu = curr_cpu->cpu_next_onln;
 719  719                  new_cpu = NULL;
 720  720                  do {
 721  721                          /* Skip CPUs with interrupts disabled */
 722  722                          if ((cp->cpu_flags & CPU_ENABLE) == 0)
 723  723                                  continue;
 724  724  
 725  725                          if (intr_policy == INTR_FLAT_DIST) {
 726  726                                  /* select CPU */
 727  727                                  new_cpu = cp;
 728  728                                  break;
 729  729                          } else if ((new_cpu == NULL) ||
 730  730                              (cp->cpu_intr_weight < new_cpu->cpu_intr_weight)) {
 731  731                                  /* Choose if lighter weight */
 732  732                                  new_cpu = cp;
 733  733                          }
 734  734                  } while ((cp = cp->cpu_next_onln) != start_cpu);
 735  735                  ASSERT(new_cpu);
 736  736                  cpuid = new_cpu->cpu_id;
 737  737  
 738  738                  INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: "
 739  739                      "targeted\n", cpuid, new_cpu->cpu_intr_weight));
 740  740  
 741  741                  /* update static pointer for next round-robin */
 742  742                  curr_cpu = new_cpu;
 743  743                  kpreempt_enable();
 744  744                  break;
 745  745          }
 746  746          mutex_exit(&intr_dist_cpu_lock);
 747  747          return (cpuid);
 748  748  }
 749  749  
 750  750  /*
 751  751   * Add or remove the the weight of a device from a CPUs interrupt weight.
 752  752   *
 753  753   * We expect nexus drivers to call intr_dist_cpuid_add/rem_device_weight for
 754  754   * their children to improve the overall quality of interrupt initialization.
 755  755   *
 756  756   * If a nexues shares the CPU returned by a single intr_dist_cpuid() call
 757  757   * among multiple devices (sharing ino) then the nexus should call
 758  758   * intr_dist_cpuid_add/rem_device_weight for each device separately. Devices
 759  759   * that share must specify the same cpuid.
 760  760   *
 761  761   * If a nexus driver is unable to determine the cpu at remove_intr time
 762  762   * for some of its interrupts, then it should not call add_device_weight -
 763  763   * intr_dist_cpuid will still provide round-robin.
 764  764   *
 765  765   * An established device weight (from dev_info node) takes precedence over
 766  766   * the weight passed in.  If a device weight is not already established
 767  767   * then the passed in nexus weight is established.
 768  768   */
 769  769  void
 770  770  intr_dist_cpuid_add_device_weight(uint32_t cpuid,
 771  771      dev_info_t *dip, int32_t nweight)
 772  772  {
 773  773          int32_t         eweight;
 774  774  
 775  775          /*
 776  776           * For non-weighted policy everything has weight of zero (and we get
 777  777           * round-robin distribution from intr_dist_cpuid).
 778  778           * NB: intr_policy is limited to this file. A weighted nexus driver is
 779  779           * calls this rouitne even if intr_policy has been patched to
 780  780           * INTR_FLAG_DIST.
 781  781           */
 782  782          ASSERT(dip);
 783  783          if (intr_policy != INTR_WEIGHTED_DIST)
 784  784                  return;
 785  785  
 786  786          eweight = i_ddi_get_intr_weight(dip);
 787  787          INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: +%2d/%2d for "
 788  788              "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight,
 789  789              nweight, eweight, ddi_driver_name(ddi_get_parent(dip)),
 790  790              ddi_get_instance(ddi_get_parent(dip)),
 791  791              ddi_driver_name(dip), ddi_get_instance(dip)));
 792  792  
 793  793          /* if no establish weight, establish nexus weight */
 794  794          if (eweight < 0) {
 795  795                  if (nweight > 0)
 796  796                          (void) i_ddi_set_intr_weight(dip, nweight);
 797  797                  else
 798  798                          nweight = 0;
 799  799          } else
 800  800                  nweight = eweight;      /* use established weight */
 801  801  
 802  802          /* Establish exclusion for cpu_intr_weight manipulation */
 803  803          mutex_enter(&intr_dist_cpu_lock);
 804  804          cpu[cpuid]->cpu_intr_weight += nweight;
 805  805  
 806  806          /* update intr_dist_weight_max */
 807  807          if (nweight > intr_dist_weight_max)
 808  808                  intr_dist_weight_max = nweight;
 809  809          mutex_exit(&intr_dist_cpu_lock);
 810  810  }
 811  811  
 812  812  void
 813  813  intr_dist_cpuid_rem_device_weight(uint32_t cpuid, dev_info_t *dip)
 814  814  {
 815  815          struct cpu      *cp;
 816  816          int32_t         weight;
 817  817  
 818  818          ASSERT(dip);
 819  819          if (intr_policy != INTR_WEIGHTED_DIST)
 820  820                  return;
 821  821  
 822  822          /* remove weight of device from cpu */
 823  823          weight = i_ddi_get_intr_weight(dip);
 824  824          if (weight < 0)
 825  825                  weight = 0;
 826  826          INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: -%2d    for "
 827  827              "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, weight,
 828  828              ddi_driver_name(ddi_get_parent(dip)),
 829  829              ddi_get_instance(ddi_get_parent(dip)),
 830  830              ddi_driver_name(dip), ddi_get_instance(dip)));
 831  831  
 832  832          /* Establish exclusion for cpu_intr_weight manipulation */
 833  833          mutex_enter(&intr_dist_cpu_lock);
 834  834          cp = cpu[cpuid];
 835  835          cp->cpu_intr_weight -= weight;
 836  836          if (cp->cpu_intr_weight < 0)
 837  837                  cp->cpu_intr_weight = 0;        /* sanity */
 838  838          mutex_exit(&intr_dist_cpu_lock);
 839  839  }
 840  840  
 841  841  ulong_t
 842  842  create_softint(uint_t pil, uint_t (*func)(caddr_t, caddr_t), caddr_t arg1)
 843  843  {
 844  844          uint64_t inum;
 845  845  
 846  846          inum = add_softintr(pil, func, arg1, SOFTINT_MT);
 847  847          return ((ulong_t)inum);
 848  848  }
 849  849  
 850  850  void
 851  851  invoke_softint(processorid_t cpuid, ulong_t hdl)
 852  852  {
 853  853          uint64_t inum = hdl;
 854  854  
 855  855          if (cpuid == CPU->cpu_id)
 856  856                  setsoftint(inum);
 857  857          else
 858  858                  xt_one(cpuid, setsoftint_tl1, inum, 0);
 859  859  }
 860  860  
 861  861  void
 862  862  remove_softint(ulong_t hdl)
 863  863  {
 864  864          uint64_t inum = hdl;
 865  865  
 866  866          (void) rem_softintr(inum);
 867  867  }
 868  868  
 869  869  void
 870  870  sync_softint(cpuset_t set)
 871  871  {
 872  872          xt_sync(set);
 873  873  }

↓ open down ↓

814 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX