illumos Wdiff usr/src/uts/sun4/os/intr.c

Print this page

OS-2366 ddi_periodic_add(9F) is entirely rubbish

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/sun4/os/intr.c
          +++ new/usr/src/uts/sun4/os/intr.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.

↓ open down ↓

14 lines elided

↑ open up ↑

  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
       25 +/*
       26 + * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
       27 + */
  25   28  
  26   29  #include <sys/sysmacros.h>
  27   30  #include <sys/stack.h>
  28   31  #include <sys/cpuvar.h>
  29   32  #include <sys/ivintr.h>
  30   33  #include <sys/intreg.h>
  31   34  #include <sys/membar.h>
  32   35  #include <sys/kmem.h>
  33   36  #include <sys/intr.h>
  34   37  #include <sys/sunddi.h>

  35   38  #include <sys/sunndi.h>
  36   39  #include <sys/cmn_err.h>
  37   40  #include <sys/privregs.h>
  38   41  #include <sys/systm.h>
  39   42  #include <sys/archsystm.h>
  40   43  #include <sys/machsystm.h>
  41   44  #include <sys/x_call.h>
  42   45  #include <vm/seg_kp.h>
  43   46  #include <sys/debug.h>
  44   47  #include <sys/cyclic.h>
  45   48  #include <sys/kdi_impl.h>
  46   49  #include <sys/ddi_timer.h>
  47   50  
  48   51  #include <sys/cpu_sgnblk_defs.h>
  49   52  
  50   53  /* Global locks which protect the interrupt distribution lists */
  51   54  static kmutex_t intr_dist_lock;
  52   55  static kmutex_t intr_dist_cpu_lock;
  53   56  
  54   57  /* Head of the interrupt distribution lists */
  55   58  static struct intr_dist *intr_dist_head = NULL;
  56   59  static struct intr_dist *intr_dist_whead = NULL;
  57   60  
  58   61  static uint64_t siron_inum[DDI_IPL_10]; /* software interrupt numbers */
  59   62  uint64_t *siron_cpu_inum = NULL;
  60   63  uint64_t siron_poke_cpu_inum;
  61   64  static int siron_cpu_setup(cpu_setup_t, int, void *);
  62   65  extern uint_t softlevel1();
  63   66  
  64   67  static uint64_t siron1_inum; /* backward compatibility */
  65   68  uint64_t poke_cpu_inum;
  66   69  uint_t poke_cpu_intr(caddr_t arg1, caddr_t arg2);
  67   70  uint_t siron_poke_cpu_intr(caddr_t arg1, caddr_t arg2);
  68   71  
  69   72  /*
  70   73   * Variable to enable/disable printing a message when an invalid vecintr
  71   74   * is received.
  72   75   */
  73   76  uint_t ignore_invalid_vecintr = 0;
  74   77  
  75   78  /*
  76   79   * Note:-
  77   80   * siron_pending was originally created to prevent a resource over consumption
  78   81   * bug in setsoftint(exhaustion of interrupt pool free list).
  79   82   * It's original intention is obsolete with the use of iv_pending in
  80   83   * setsoftint. However, siron_pending stayed around, acting as a second
  81   84   * gatekeeper preventing soft interrupts from being queued. In this capacity,
  82   85   * it can lead to hangs on MP systems, where due to global visibility issues
  83   86   * it can end up set while iv_pending is reset, preventing soft interrupts from
  84   87   * ever being processed. In addition to its gatekeeper role, init_intr also
  85   88   * uses it to flag the situation where siron() was called before siron_inum has
  86   89   * been defined.
  87   90   *
  88   91   * siron() does not need an extra gatekeeper; any cpu that wishes should be
  89   92   * allowed to queue a soft interrupt. It is softint()'s job to ensure
  90   93   * correct handling of the queues. Therefore, siron_pending has been
  91   94   * stripped of its gatekeeper task, retaining only its intr_init job, where
  92   95   * it indicates that there is a pending need to call siron().
  93   96   */
  94   97  static int siron_pending[DDI_IPL_10]; /* software interrupt pending flags */
  95   98  static int siron1_pending; /* backward compatibility */
  96   99  
  97  100  int intr_policy = INTR_WEIGHTED_DIST;   /* interrupt distribution policy */
  98  101  int intr_dist_debug = 0;
  99  102  int32_t intr_dist_weight_max = 1;
 100  103  int32_t intr_dist_weight_maxmax = 1000;
 101  104  int intr_dist_weight_maxfactor = 2;
 102  105  #define INTR_DEBUG(args) if (intr_dist_debug) cmn_err args
 103  106  
 104  107  /*
 105  108   * intr_init() - Interrupt initialization
 106  109   *      Initialize the system's interrupt vector table.
 107  110   */
 108  111  void
 109  112  intr_init(cpu_t *cp)
 110  113  {
 111  114          int i;

↓ open down ↓

77 lines elided

↑ open up ↑

 112  115          extern uint_t softlevel1();
 113  116  
 114  117          init_ivintr();
 115  118          REGISTER_BBUS_INTR();
 116  119  
 117  120          /*
 118  121           * Register these software interrupts for ddi timer.
 119  122           * Software interrupts up to the level 10 are supported.
 120  123           */
 121  124          for (i = DDI_IPL_1; i <= DDI_IPL_10; i++) {
 122      -                siron_inum[i-1] = add_softintr(i, (softintrfunc)timer_softintr,
      125 +                siron_inum[i - 1] = add_softintr(i,
      126 +                    (softintrfunc)ddi_periodic_softintr,
 123  127                      (caddr_t)(uintptr_t)(i), SOFTINT_ST);
 124  128          }
 125  129  
 126  130          siron1_inum = add_softintr(PIL_1, softlevel1, 0, SOFTINT_ST);
 127  131          poke_cpu_inum = add_softintr(PIL_13, poke_cpu_intr, 0, SOFTINT_MT);
 128  132          siron_poke_cpu_inum = add_softintr(PIL_13,
 129  133              siron_poke_cpu_intr, 0, SOFTINT_MT);
 130  134          cp->cpu_m.poke_cpu_outstanding = B_FALSE;
 131  135  
 132  136          mutex_init(&intr_dist_lock, NULL, MUTEX_DEFAULT, NULL);

 133  137          mutex_init(&intr_dist_cpu_lock, NULL, MUTEX_DEFAULT, NULL);
 134  138  
 135  139          /*
 136  140           * A soft interrupt may have been requested prior to the initialization
 137  141           * of soft interrupts.  Soft interrupts can't be dispatched until after
 138  142           * init_intr(), so we have to wait until now before we can dispatch the
 139  143           * pending soft interrupt (if any).
 140  144           */
 141  145          for (i = DDI_IPL_1; i <= DDI_IPL_10; i++) {
 142  146                  if (siron_pending[i-1]) {
 143  147                          siron_pending[i-1] = 0;
 144  148                          sir_on(i);
 145  149                  }
 146  150          }
 147  151          if (siron1_pending) {
 148  152                  siron1_pending = 0;
 149  153                  siron();
 150  154          }
 151  155  }
 152  156  
 153  157  /*
 154  158   * poke_cpu_intr - fall through when poke_cpu calls
 155  159   */
 156  160  /* ARGSUSED */
 157  161  uint_t
 158  162  poke_cpu_intr(caddr_t arg1, caddr_t arg2)
 159  163  {
 160  164          CPU->cpu_m.poke_cpu_outstanding = B_FALSE;
 161  165          membar_stld_stst();
 162  166          return (1);
 163  167  }
 164  168  
 165  169  /*
 166  170   * Trigger software interrupts dedicated to ddi timer.
 167  171   */
 168  172  void
 169  173  sir_on(int level)
 170  174  {
 171  175          ASSERT(level >= DDI_IPL_1 && level <= DDI_IPL_10);
 172  176          if (siron_inum[level-1])
 173  177                  setsoftint(siron_inum[level-1]);
 174  178          else
 175  179                  siron_pending[level-1] = 1;
 176  180  }
 177  181  
 178  182  /*
 179  183   * kmdb uses siron (and thus setsoftint) while the world is stopped in order to
 180  184   * inform its driver component that there's work to be done.  We need to keep
 181  185   * DTrace from instrumenting kmdb's siron and setsoftint.  We duplicate siron,
 182  186   * giving kmdb's version a kdi_ prefix to keep DTrace at bay.  The
 183  187   * implementation of setsoftint is complicated enough that we don't want to
 184  188   * duplicate it, but at the same time we don't want to preclude tracing either.
 185  189   * The meat of setsoftint() therefore goes into kdi_setsoftint, with
 186  190   * setsoftint() implemented as a wrapper.  This allows tracing, while still
 187  191   * providing a way for kmdb to sneak in unmolested.
 188  192   */
 189  193  void
 190  194  kdi_siron(void)
 191  195  {
 192  196          if (siron1_inum != 0)
 193  197                  kdi_setsoftint(siron1_inum);
 194  198          else
 195  199                  siron1_pending = 1;
 196  200  }
 197  201  
 198  202  void
 199  203  setsoftint(uint64_t inum)
 200  204  {
 201  205          kdi_setsoftint(inum);
 202  206  }
 203  207  
 204  208  /*
 205  209   * Generates softlevel1 interrupt on current CPU if it
 206  210   * is not pending already.
 207  211   */
 208  212  void
 209  213  siron(void)
 210  214  {
 211  215          uint64_t inum;
 212  216  
 213  217          if (siron1_inum != 0) {
 214  218                  /*
 215  219                   * Once siron_cpu_inum has been allocated, we can
 216  220                   * use per-CPU siron inum.
 217  221                   */
 218  222                  if (siron_cpu_inum && siron_cpu_inum[CPU->cpu_id] != 0)
 219  223                          inum = siron_cpu_inum[CPU->cpu_id];
 220  224                  else
 221  225                          inum = siron1_inum;
 222  226  
 223  227                  setsoftint(inum);
 224  228          } else
 225  229                  siron1_pending = 1;
 226  230  }
 227  231  
 228  232  
 229  233  static void
 230  234  siron_init(void)
 231  235  {
 232  236          /*
 233  237           * We just allocate memory for per-cpu siron right now. Rest of
 234  238           * the work is done when CPU is configured.
 235  239           */
 236  240          siron_cpu_inum = kmem_zalloc(sizeof (uint64_t) * NCPU, KM_SLEEP);
 237  241  }
 238  242  
 239  243  /*
 240  244   * This routine creates per-CPU siron inum for CPUs which are
 241  245   * configured during boot.
 242  246   */
 243  247  void
 244  248  siron_mp_init()
 245  249  {
 246  250          cpu_t *c;
 247  251  
 248  252          /*
 249  253           * Get the memory for per-CPU siron inums
 250  254           */
 251  255          siron_init();
 252  256  
 253  257          mutex_enter(&cpu_lock);
 254  258          c = cpu_list;
 255  259          do {
 256  260                  (void) siron_cpu_setup(CPU_CONFIG, c->cpu_id, NULL);
 257  261          } while ((c = c->cpu_next) != cpu_list);
 258  262  
 259  263          register_cpu_setup_func(siron_cpu_setup, NULL);
 260  264          mutex_exit(&cpu_lock);
 261  265  }
 262  266  
 263  267  /*
 264  268   * siron_poke_cpu_intr - cross-call handler.
 265  269   */
 266  270  /* ARGSUSED */
 267  271  uint_t
 268  272  siron_poke_cpu_intr(caddr_t arg1, caddr_t arg2)
 269  273  {
 270  274          /* generate level1 softint */
 271  275          siron();
 272  276          return (1);
 273  277  }
 274  278  
 275  279  /*
 276  280   * This routine generates a cross-call on target CPU(s).
 277  281   */
 278  282  void
 279  283  siron_poke_cpu(cpuset_t poke)
 280  284  {
 281  285          int cpuid = CPU->cpu_id;
 282  286  
 283  287          if (CPU_IN_SET(poke, cpuid)) {
 284  288                  siron();
 285  289                  CPUSET_DEL(poke, cpuid);
 286  290                  if (CPUSET_ISNULL(poke))
 287  291                          return;
 288  292          }
 289  293  
 290  294          xt_some(poke, setsoftint_tl1, siron_poke_cpu_inum, 0);
 291  295  }
 292  296  
 293  297  /*
 294  298   * This callback function allows us to create per-CPU siron inum.
 295  299   */
 296  300  /* ARGSUSED */
 297  301  static int
 298  302  siron_cpu_setup(cpu_setup_t what, int id, void *arg)
 299  303  {
 300  304          cpu_t *cp = cpu[id];
 301  305  
 302  306          ASSERT(MUTEX_HELD(&cpu_lock));
 303  307          ASSERT(cp != NULL);
 304  308  
 305  309          switch (what) {
 306  310          case CPU_CONFIG:
 307  311                  siron_cpu_inum[cp->cpu_id] = add_softintr(PIL_1,
 308  312                      (softintrfunc)softlevel1, 0, SOFTINT_ST);
 309  313                  break;
 310  314          case CPU_UNCONFIG:
 311  315                  (void) rem_softintr(siron_cpu_inum[cp->cpu_id]);
 312  316                  siron_cpu_inum[cp->cpu_id] = 0;
 313  317                  break;
 314  318          default:
 315  319                  break;
 316  320          }
 317  321  
 318  322          return (0);
 319  323  }
 320  324  
 321  325  /*
 322  326   * no_ivintr()
 323  327   *      called by setvecint_tl1() through sys_trap()
 324  328   *      vector interrupt received but not valid or not
 325  329   *      registered in intr_vec_table
 326  330   *      considered as a spurious mondo interrupt
 327  331   */
 328  332  /* ARGSUSED */
 329  333  void
 330  334  no_ivintr(struct regs *rp, int inum, int pil)
 331  335  {
 332  336          if (!ignore_invalid_vecintr)
 333  337                  cmn_err(CE_WARN, "invalid vector intr: number 0x%x, pil 0x%x",
 334  338                      inum, pil);
 335  339  
 336  340  #ifdef DEBUG_VEC_INTR
 337  341          prom_enter_mon();
 338  342  #endif /* DEBUG_VEC_INTR */
 339  343  }
 340  344  
 341  345  void
 342  346  intr_dequeue_req(uint_t pil, uint64_t inum)
 343  347  {
 344  348          intr_vec_t      *iv, *next, *prev;
 345  349          struct machcpu  *mcpu;
 346  350          uint32_t        clr;
 347  351          processorid_t   cpu_id;
 348  352          extern uint_t   getpstate(void);
 349  353  
 350  354          ASSERT((getpstate() & PSTATE_IE) == 0);
 351  355  
 352  356          mcpu = &CPU->cpu_m;
 353  357          cpu_id = CPU->cpu_id;
 354  358  
 355  359          iv = (intr_vec_t *)inum;
 356  360          prev = NULL;
 357  361          next = mcpu->intr_head[pil];
 358  362  
 359  363          /* Find a matching entry in the list */
 360  364          while (next != NULL) {
 361  365                  if (next == iv)
 362  366                          break;
 363  367                  prev = next;
 364  368                  next = IV_GET_PIL_NEXT(next, cpu_id);
 365  369          }
 366  370  
 367  371          if (next != NULL) {
 368  372                  intr_vec_t      *next_iv = IV_GET_PIL_NEXT(next, cpu_id);
 369  373  
 370  374                  /* Remove entry from list */
 371  375                  if (prev != NULL)
 372  376                          IV_SET_PIL_NEXT(prev, cpu_id, next_iv); /* non-head */
 373  377                  else
 374  378                          mcpu->intr_head[pil] = next_iv; /* head */
 375  379  
 376  380                  if (next_iv == NULL)
 377  381                          mcpu->intr_tail[pil] = prev; /* tail */
 378  382          }
 379  383  
 380  384          /* Clear pending interrupts at this level if the list is empty */
 381  385          if (mcpu->intr_head[pil] == NULL) {
 382  386                  clr = 1 << pil;
 383  387                  if (pil == PIL_14)
 384  388                          clr |= (TICK_INT_MASK | STICK_INT_MASK);
 385  389                  wr_clr_softint(clr);
 386  390          }
 387  391  }
 388  392  
 389  393  
 390  394  /*
 391  395   * Send a directed interrupt of specified interrupt number id to a cpu.
 392  396   */
 393  397  void
 394  398  send_dirint(
 395  399          int cpuix,              /* cpu to be interrupted */
 396  400          int intr_id)            /* interrupt number id */
 397  401  {
 398  402          xt_one(cpuix, setsoftint_tl1, intr_id, 0);
 399  403  }
 400  404  
 401  405  /*
 402  406   * Take the specified CPU out of participation in interrupts.
 403  407   *      Called by p_online(2) when a processor is being taken off-line.
 404  408   *      This allows interrupt threads being handled on the processor to
 405  409   *      complete before the processor is idled.
 406  410   */
 407  411  int
 408  412  cpu_disable_intr(struct cpu *cp)
 409  413  {
 410  414          ASSERT(MUTEX_HELD(&cpu_lock));
 411  415  
 412  416          /*
 413  417           * Turn off the CPU_ENABLE flag before calling the redistribution
 414  418           * function, since it checks for this in the cpu flags.
 415  419           */
 416  420          cp->cpu_flags &= ~CPU_ENABLE;
 417  421  
 418  422          intr_redist_all_cpus();
 419  423  
 420  424          return (0);
 421  425  }
 422  426  
 423  427  /*
 424  428   * Allow the specified CPU to participate in interrupts.
 425  429   *      Called by p_online(2) if a processor could not be taken off-line
 426  430   *      because of bound threads, in order to resume processing interrupts.
 427  431   *      Also called after starting a processor.
 428  432   */
 429  433  void
 430  434  cpu_enable_intr(struct cpu *cp)
 431  435  {
 432  436          ASSERT(MUTEX_HELD(&cpu_lock));
 433  437  
 434  438          cp->cpu_flags |= CPU_ENABLE;
 435  439  
 436  440          intr_redist_all_cpus();
 437  441  }
 438  442  
 439  443  /*
 440  444   * Add function to callback list for intr_redist_all_cpus.  We keep two lists,
 441  445   * one for weighted callbacks and one for normal callbacks. Weighted callbacks
 442  446   * are issued to redirect interrupts of a specified weight, from heavy to
 443  447   * light.  This allows all the interrupts of a given weight to be redistributed
 444  448   * for all weighted nexus drivers prior to those of less weight.
 445  449   */
 446  450  static void
 447  451  intr_dist_add_list(struct intr_dist **phead, void (*func)(void *), void *arg)
 448  452  {
 449  453          struct intr_dist *new = kmem_alloc(sizeof (*new), KM_SLEEP);
 450  454          struct intr_dist *iptr;
 451  455          struct intr_dist **pptr;
 452  456  
 453  457          ASSERT(func);
 454  458          new->func = func;
 455  459          new->arg = arg;
 456  460          new->next = NULL;
 457  461  
 458  462          /* Add to tail so that redistribution occurs in original order. */
 459  463          mutex_enter(&intr_dist_lock);
 460  464          for (iptr = *phead, pptr = phead; iptr != NULL;
 461  465              pptr = &iptr->next, iptr = iptr->next) {
 462  466                  /* check for problems as we locate the tail */
 463  467                  if ((iptr->func == func) && (iptr->arg == arg)) {
 464  468                          cmn_err(CE_PANIC, "intr_dist_add_list(): duplicate");
 465  469                          /*NOTREACHED*/
 466  470                  }
 467  471          }
 468  472          *pptr = new;
 469  473  
 470  474          mutex_exit(&intr_dist_lock);
 471  475  }
 472  476  
 473  477  void
 474  478  intr_dist_add(void (*func)(void *), void *arg)
 475  479  {
 476  480          intr_dist_add_list(&intr_dist_head, (void (*)(void *))func, arg);
 477  481  }
 478  482  
 479  483  void
 480  484  intr_dist_add_weighted(void (*func)(void *, int32_t, int32_t), void *arg)
 481  485  {
 482  486          intr_dist_add_list(&intr_dist_whead, (void (*)(void *))func, arg);
 483  487  }
 484  488  
 485  489  /*
 486  490   * Search for the interrupt distribution structure with the specified
 487  491   * mondo vec reg in the interrupt distribution list. If a match is found,
 488  492   * then delete the entry from the list. The caller is responsible for
 489  493   * modifying the mondo vector registers.
 490  494   */
 491  495  static void
 492  496  intr_dist_rem_list(struct intr_dist **headp, void (*func)(void *), void *arg)
 493  497  {
 494  498          struct intr_dist *iptr;
 495  499          struct intr_dist **vect;
 496  500  
 497  501          mutex_enter(&intr_dist_lock);
 498  502          for (iptr = *headp, vect = headp;
 499  503              iptr != NULL; vect = &iptr->next, iptr = iptr->next) {
 500  504                  if ((iptr->func == func) && (iptr->arg == arg)) {
 501  505                          *vect = iptr->next;
 502  506                          kmem_free(iptr, sizeof (struct intr_dist));
 503  507                          mutex_exit(&intr_dist_lock);
 504  508                          return;
 505  509                  }
 506  510          }
 507  511  
 508  512          if (!panicstr)
 509  513                  cmn_err(CE_PANIC, "intr_dist_rem_list: not found");
 510  514          mutex_exit(&intr_dist_lock);
 511  515  }
 512  516  
 513  517  void
 514  518  intr_dist_rem(void (*func)(void *), void *arg)
 515  519  {
 516  520          intr_dist_rem_list(&intr_dist_head, (void (*)(void *))func, arg);
 517  521  }
 518  522  
 519  523  void
 520  524  intr_dist_rem_weighted(void (*func)(void *, int32_t, int32_t), void *arg)
 521  525  {
 522  526          intr_dist_rem_list(&intr_dist_whead, (void (*)(void *))func, arg);
 523  527  }
 524  528  
 525  529  /*
 526  530   * Initiate interrupt redistribution.  Redistribution improves the isolation
 527  531   * associated with interrupt weights by ordering operations from heavy weight
 528  532   * to light weight.  When a CPUs orientation changes relative to interrupts,
 529  533   * there is *always* a redistribution to accommodate this change (call to
 530  534   * intr_redist_all_cpus()).  As devices (not CPUs) attach/detach it is possible
 531  535   * that a redistribution could improve the quality of an initialization. For
 532  536   * example, if you are not using a NIC it may not be attached with s10 (devfs).
 533  537   * If you then configure the NIC (ifconfig), this may cause the NIC to attach
 534  538   * and plumb interrupts.  The CPU assignment for the NIC's interrupts is
 535  539   * occurring late, so optimal "isolation" relative to weight is not occurring.
 536  540   * The same applies to detach, although in this case doing the redistribution
 537  541   * might improve "spread" for medium weight devices since the "isolation" of
 538  542   * a higher weight device may no longer be present.
 539  543   *
 540  544   * NB: We should provide a utility to trigger redistribution (ala "intradm -r").
 541  545   *
 542  546   * NB: There is risk associated with automatically triggering execution of the
 543  547   * redistribution code at arbitrary times. The risk comes from the fact that
 544  548   * there is a lot of low-level hardware interaction associated with a
 545  549   * redistribution.  At some point we may want this code to perform automatic
 546  550   * redistribution (redistribution thread; trigger timeout when add/remove
 547  551   * weight delta is large enough, and call cv_signal from timeout - causing
 548  552   * thead to call i_ddi_intr_redist_all_cpus()) but this is considered too
 549  553   * risky at this time.
 550  554   */
 551  555  void
 552  556  i_ddi_intr_redist_all_cpus()
 553  557  {
 554  558          mutex_enter(&cpu_lock);
 555  559          INTR_DEBUG((CE_CONT, "intr_dist: i_ddi_intr_redist_all_cpus\n"));
 556  560          intr_redist_all_cpus();
 557  561          mutex_exit(&cpu_lock);
 558  562  }
 559  563  
 560  564  /*
 561  565   * Redistribute all interrupts
 562  566   *
 563  567   * This function redistributes all interrupting devices, running the
 564  568   * parent callback functions for each node.
 565  569   */
 566  570  void
 567  571  intr_redist_all_cpus(void)
 568  572  {
 569  573          struct cpu *cp;
 570  574          struct intr_dist *iptr;
 571  575          int32_t weight, max_weight;
 572  576  
 573  577          ASSERT(MUTEX_HELD(&cpu_lock));
 574  578          mutex_enter(&intr_dist_lock);
 575  579  
 576  580          /*
 577  581           * zero cpu_intr_weight on all cpus - it is safe to traverse
 578  582           * cpu_list since we hold cpu_lock.
 579  583           */
 580  584          cp = cpu_list;
 581  585          do {
 582  586                  cp->cpu_intr_weight = 0;
 583  587          } while ((cp = cp->cpu_next) != cpu_list);
 584  588  
 585  589          /*
 586  590           * Assume that this redistribution may encounter a device weight
 587  591           * via driver.conf tuning of "ddi-intr-weight" that is at most
 588  592           * intr_dist_weight_maxfactor times larger.
 589  593           */
 590  594          max_weight = intr_dist_weight_max * intr_dist_weight_maxfactor;
 591  595          if (max_weight > intr_dist_weight_maxmax)
 592  596                  max_weight = intr_dist_weight_maxmax;
 593  597          intr_dist_weight_max = 1;
 594  598  
 595  599          INTR_DEBUG((CE_CONT, "intr_dist: "
 596  600              "intr_redist_all_cpus: %d-0\n", max_weight));
 597  601  
 598  602          /*
 599  603           * Redistribute weighted, from heavy to light.  The callback that
 600  604           * specifies a weight equal to weight_max should redirect all
 601  605           * interrupts of weight weight_max or greater [weight_max, inf.).
 602  606           * Interrupts of lesser weight should be processed on the call with
 603  607           * the matching weight. This allows all the heaver weight interrupts
 604  608           * on all weighted busses (multiple pci busses) to be redirected prior
 605  609           * to any lesser weight interrupts.
 606  610           */
 607  611          for (weight = max_weight; weight >= 0; weight--)
 608  612                  for (iptr = intr_dist_whead; iptr != NULL; iptr = iptr->next)
 609  613                          ((void (*)(void *, int32_t, int32_t))iptr->func)
 610  614                              (iptr->arg, max_weight, weight);
 611  615  
 612  616          /* redistribute normal (non-weighted) interrupts */
 613  617          for (iptr = intr_dist_head; iptr != NULL; iptr = iptr->next)
 614  618                  ((void (*)(void *))iptr->func)(iptr->arg);
 615  619          mutex_exit(&intr_dist_lock);
 616  620  }
 617  621  
 618  622  void
 619  623  intr_redist_all_cpus_shutdown(void)
 620  624  {
 621  625          intr_policy = INTR_CURRENT_CPU;
 622  626          intr_redist_all_cpus();
 623  627  }
 624  628  
 625  629  /*
 626  630   * Determine what CPU to target, based on interrupt policy.
 627  631   *
 628  632   * INTR_FLAT_DIST: hold a current CPU pointer in a static variable and
 629  633   *      advance through interrupt enabled cpus (round-robin).
 630  634   *
 631  635   * INTR_WEIGHTED_DIST: search for an enabled CPU with the lowest
 632  636   *      cpu_intr_weight, round robin when all equal.
 633  637   *
 634  638   *      Weighted interrupt distribution provides two things: "spread" of weight
 635  639   *      (associated with algorithm itself) and "isolation" (associated with a
 636  640   *      particular device weight). A redistribution is what provides optimal
 637  641   *      "isolation" of heavy weight interrupts, optimal "spread" of weight
 638  642   *      (relative to what came before) is always occurring.
 639  643   *
 640  644   *      An interrupt weight is a subjective number that represents the
 641  645   *      percentage of a CPU required to service a device's interrupts: the
 642  646   *      default weight is 0% (however the algorithm still maintains
 643  647   *      round-robin), a network interface controller (NIC) may have a large
 644  648   *      weight (35%). Interrupt weight only has meaning relative to the
 645  649   *      interrupt weight of other devices: a CPU can be weighted more than
 646  650   *      100%, and a single device might consume more than 100% of a CPU.
 647  651   *
 648  652   *      A coarse interrupt weight can be defined by the parent nexus driver
 649  653   *      based on bus specific information, like pci class codes. A nexus
 650  654   *      driver that supports device interrupt weighting for its children
 651  655   *      should call intr_dist_cpuid_add/rem_device_weight(), which adds
 652  656   *      and removes the weight of a device from the CPU that an interrupt
 653  657   *      is directed at.  The quality of initialization improves when the
 654  658   *      device interrupt weights more accuracy reflect actual run-time weights,
 655  659   *      and as the assignments are ordered from is heavy to light.
 656  660   *
 657  661   *      The implementation also supports interrupt weight being specified in
 658  662   *      driver.conf files via the property "ddi-intr-weight", which takes
 659  663   *      precedence over the nexus supplied weight.  This support is added to
 660  664   *      permit possible tweaking in the product in response to customer
 661  665   *      problems. This is not a formal or committed interface.
 662  666   *
 663  667   *      While a weighted approach chooses the CPU providing the best spread
 664  668   *      given past weights, less than optimal isolation can result in cases
 665  669   *      where heavy weight devices show up last. The nexus driver's interrupt
 666  670   *      redistribution logic should use intr_dist_add/rem_weighted so that
 667  671   *      interrupts can be redistributed heavy first for optimal isolation.
 668  672   */
 669  673  uint32_t
 670  674  intr_dist_cpuid(void)
 671  675  {
 672  676          static struct cpu       *curr_cpu;
 673  677          struct cpu              *start_cpu;
 674  678          struct cpu              *new_cpu;
 675  679          struct cpu              *cp;
 676  680          int                     cpuid = -1;
 677  681  
 678  682          /* Establish exclusion for curr_cpu and cpu_intr_weight manipulation */
 679  683          mutex_enter(&intr_dist_cpu_lock);
 680  684  
 681  685          switch (intr_policy) {
 682  686          case INTR_CURRENT_CPU:
 683  687                  cpuid = CPU->cpu_id;
 684  688                  break;
 685  689  
 686  690          case INTR_BOOT_CPU:
 687  691                  panic("INTR_BOOT_CPU no longer supported.");
 688  692                  /*NOTREACHED*/
 689  693  
 690  694          case INTR_FLAT_DIST:
 691  695          case INTR_WEIGHTED_DIST:
 692  696          default:
 693  697                  /*
 694  698                   * Ensure that curr_cpu is valid - cpu_next will be NULL if
 695  699                   * the cpu has been deleted (cpu structs are never freed).
 696  700                   */
 697  701                  if (curr_cpu == NULL || curr_cpu->cpu_next == NULL)
 698  702                          curr_cpu = CPU;
 699  703  
 700  704                  /*
 701  705                   * Advance to online CPU after curr_cpu (round-robin). For
 702  706                   * INTR_WEIGHTED_DIST we choose the cpu with the lightest
 703  707                   * weight.  For a nexus that does not support weight the
 704  708                   * default weight of zero is used. We degrade to round-robin
 705  709                   * behavior among equal weightes.  The default weight is zero
 706  710                   * and round-robin behavior continues.
 707  711                   *
 708  712                   * Disable preemption while traversing cpu_next_onln to
 709  713                   * ensure the list does not change.  This works because
 710  714                   * modifiers of this list and other lists in a struct cpu
 711  715                   * call pause_cpus() before making changes.
 712  716                   */
 713  717                  kpreempt_disable();
 714  718                  cp = start_cpu = curr_cpu->cpu_next_onln;
 715  719                  new_cpu = NULL;
 716  720                  do {
 717  721                          /* Skip CPUs with interrupts disabled */
 718  722                          if ((cp->cpu_flags & CPU_ENABLE) == 0)
 719  723                                  continue;
 720  724  
 721  725                          if (intr_policy == INTR_FLAT_DIST) {
 722  726                                  /* select CPU */
 723  727                                  new_cpu = cp;
 724  728                                  break;
 725  729                          } else if ((new_cpu == NULL) ||
 726  730                              (cp->cpu_intr_weight < new_cpu->cpu_intr_weight)) {
 727  731                                  /* Choose if lighter weight */
 728  732                                  new_cpu = cp;
 729  733                          }
 730  734                  } while ((cp = cp->cpu_next_onln) != start_cpu);
 731  735                  ASSERT(new_cpu);
 732  736                  cpuid = new_cpu->cpu_id;
 733  737  
 734  738                  INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: "
 735  739                      "targeted\n", cpuid, new_cpu->cpu_intr_weight));
 736  740  
 737  741                  /* update static pointer for next round-robin */
 738  742                  curr_cpu = new_cpu;
 739  743                  kpreempt_enable();
 740  744                  break;
 741  745          }
 742  746          mutex_exit(&intr_dist_cpu_lock);
 743  747          return (cpuid);
 744  748  }
 745  749  
 746  750  /*
 747  751   * Add or remove the the weight of a device from a CPUs interrupt weight.
 748  752   *
 749  753   * We expect nexus drivers to call intr_dist_cpuid_add/rem_device_weight for
 750  754   * their children to improve the overall quality of interrupt initialization.
 751  755   *
 752  756   * If a nexues shares the CPU returned by a single intr_dist_cpuid() call
 753  757   * among multiple devices (sharing ino) then the nexus should call
 754  758   * intr_dist_cpuid_add/rem_device_weight for each device separately. Devices
 755  759   * that share must specify the same cpuid.
 756  760   *
 757  761   * If a nexus driver is unable to determine the cpu at remove_intr time
 758  762   * for some of its interrupts, then it should not call add_device_weight -
 759  763   * intr_dist_cpuid will still provide round-robin.
 760  764   *
 761  765   * An established device weight (from dev_info node) takes precedence over
 762  766   * the weight passed in.  If a device weight is not already established
 763  767   * then the passed in nexus weight is established.
 764  768   */
 765  769  void
 766  770  intr_dist_cpuid_add_device_weight(uint32_t cpuid,
 767  771      dev_info_t *dip, int32_t nweight)
 768  772  {
 769  773          int32_t         eweight;
 770  774  
 771  775          /*
 772  776           * For non-weighted policy everything has weight of zero (and we get
 773  777           * round-robin distribution from intr_dist_cpuid).
 774  778           * NB: intr_policy is limited to this file. A weighted nexus driver is
 775  779           * calls this rouitne even if intr_policy has been patched to
 776  780           * INTR_FLAG_DIST.
 777  781           */
 778  782          ASSERT(dip);
 779  783          if (intr_policy != INTR_WEIGHTED_DIST)
 780  784                  return;
 781  785  
 782  786          eweight = i_ddi_get_intr_weight(dip);
 783  787          INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: +%2d/%2d for "
 784  788              "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight,
 785  789              nweight, eweight, ddi_driver_name(ddi_get_parent(dip)),
 786  790              ddi_get_instance(ddi_get_parent(dip)),
 787  791              ddi_driver_name(dip), ddi_get_instance(dip)));
 788  792  
 789  793          /* if no establish weight, establish nexus weight */
 790  794          if (eweight < 0) {
 791  795                  if (nweight > 0)
 792  796                          (void) i_ddi_set_intr_weight(dip, nweight);
 793  797                  else
 794  798                          nweight = 0;
 795  799          } else
 796  800                  nweight = eweight;      /* use established weight */
 797  801  
 798  802          /* Establish exclusion for cpu_intr_weight manipulation */
 799  803          mutex_enter(&intr_dist_cpu_lock);
 800  804          cpu[cpuid]->cpu_intr_weight += nweight;
 801  805  
 802  806          /* update intr_dist_weight_max */
 803  807          if (nweight > intr_dist_weight_max)
 804  808                  intr_dist_weight_max = nweight;
 805  809          mutex_exit(&intr_dist_cpu_lock);
 806  810  }
 807  811  
 808  812  void
 809  813  intr_dist_cpuid_rem_device_weight(uint32_t cpuid, dev_info_t *dip)
 810  814  {
 811  815          struct cpu      *cp;
 812  816          int32_t         weight;
 813  817  
 814  818          ASSERT(dip);
 815  819          if (intr_policy != INTR_WEIGHTED_DIST)
 816  820                  return;
 817  821  
 818  822          /* remove weight of device from cpu */
 819  823          weight = i_ddi_get_intr_weight(dip);
 820  824          if (weight < 0)
 821  825                  weight = 0;
 822  826          INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: -%2d    for "
 823  827              "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, weight,
 824  828              ddi_driver_name(ddi_get_parent(dip)),
 825  829              ddi_get_instance(ddi_get_parent(dip)),
 826  830              ddi_driver_name(dip), ddi_get_instance(dip)));
 827  831  
 828  832          /* Establish exclusion for cpu_intr_weight manipulation */
 829  833          mutex_enter(&intr_dist_cpu_lock);
 830  834          cp = cpu[cpuid];
 831  835          cp->cpu_intr_weight -= weight;
 832  836          if (cp->cpu_intr_weight < 0)
 833  837                  cp->cpu_intr_weight = 0;        /* sanity */
 834  838          mutex_exit(&intr_dist_cpu_lock);
 835  839  }
 836  840  
 837  841  ulong_t
 838  842  create_softint(uint_t pil, uint_t (*func)(caddr_t, caddr_t), caddr_t arg1)
 839  843  {
 840  844          uint64_t inum;
 841  845  
 842  846          inum = add_softintr(pil, func, arg1, SOFTINT_MT);
 843  847          return ((ulong_t)inum);
 844  848  }
 845  849  
 846  850  void
 847  851  invoke_softint(processorid_t cpuid, ulong_t hdl)
 848  852  {
 849  853          uint64_t inum = hdl;
 850  854  
 851  855          if (cpuid == CPU->cpu_id)
 852  856                  setsoftint(inum);
 853  857          else
 854  858                  xt_one(cpuid, setsoftint_tl1, inum, 0);
 855  859  }
 856  860  
 857  861  void
 858  862  remove_softint(ulong_t hdl)
 859  863  {
 860  864          uint64_t inum = hdl;
 861  865  
 862  866          (void) rem_softintr(inum);
 863  867  }
 864  868  
 865  869  void
 866  870  sync_softint(cpuset_t set)
 867  871  {
 868  872          xt_sync(set);
 869  873  }

↓ open down ↓

737 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX