Print this page
11584 ::xcall would be useful
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/i86pc/os/x_call.c
          +++ new/usr/src/uts/i86pc/os/x_call.c
↓ open down ↓ 17 lines elided ↑ open up ↑
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  /*
  26   26   * Copyright (c) 2010, Intel Corporation.
  27   27   * All rights reserved.
       28 + * Copyright 2018 Joyent, Inc.
  28   29   */
  29   30  
  30   31  #include <sys/types.h>
  31   32  #include <sys/param.h>
  32   33  #include <sys/t_lock.h>
  33   34  #include <sys/thread.h>
  34   35  #include <sys/cpuvar.h>
  35   36  #include <sys/x_call.h>
  36   37  #include <sys/xc_levels.h>
  37   38  #include <sys/cpu.h>
↓ open down ↓ 34 lines elided ↑ open up ↑
  72   73   * CPUs to initiate cross calls to intersecting sets of CPUs at the same time.
  73   74   * The cross call processing by the CPUs will happen in any order with only
  74   75   * a guarantee, for xc_call() and xc_sync(), that an initiator won't return
  75   76   * from cross calls before all slaves have invoked the function.
  76   77   *
  77   78   * The reason for this asynchronous approach is to allow for fast global
  78   79   * TLB shootdowns. If all CPUs, say N, tried to do a global TLB invalidation
  79   80   * on a different Virtual Address at the same time. The old code required
  80   81   * N squared IPIs. With this method, depending on timing, it could happen
  81   82   * with just N IPIs.
  82      - */
  83      -
  84      -/*
  85      - * The default is to not enable collecting counts of IPI information, since
  86      - * the updating of shared cachelines could cause excess bus traffic.
  87      - */
  88      -uint_t xc_collect_enable = 0;
  89      -uint64_t xc_total_cnt = 0;      /* total #IPIs sent for cross calls */
  90      -uint64_t xc_multi_cnt = 0;      /* # times we piggy backed on another IPI */
  91      -
  92      -/*
  93      - * Values for message states. Here are the normal transitions. A transition
  94      - * of "->" happens in the slave cpu and "=>" happens in the master cpu as
  95      - * the messages are passed back and forth.
  96   83   *
       84 + * Here are the normal transitions for XC_MSG_* values in ->xc_command. A
       85 + * transition of "->" happens in the slave cpu and "=>" happens in the master
       86 + * cpu as the messages are passed back and forth.
       87 + *
  97   88   * FREE => ASYNC ->                       DONE => FREE
  98   89   * FREE => CALL ->                        DONE => FREE
  99   90   * FREE => SYNC -> WAITING => RELEASED -> DONE => FREE
 100   91   *
 101      - * The interesing one above is ASYNC. You might ask, why not go directly
 102      - * to FREE, instead of DONE. If it did that, it might be possible to exhaust
       92 + * The interesting one above is ASYNC. You might ask, why not go directly
       93 + * to FREE, instead of DONE? If it did that, it might be possible to exhaust
 103   94   * the master's xc_free list if a master can generate ASYNC messages faster
 104   95   * then the slave can process them. That could be handled with more complicated
 105   96   * handling. However since nothing important uses ASYNC, I've not bothered.
 106   97   */
 107      -#define XC_MSG_FREE     (0)     /* msg in xc_free queue */
 108      -#define XC_MSG_ASYNC    (1)     /* msg in slave xc_msgbox */
 109      -#define XC_MSG_CALL     (2)     /* msg in slave xc_msgbox */
 110      -#define XC_MSG_SYNC     (3)     /* msg in slave xc_msgbox */
 111      -#define XC_MSG_WAITING  (4)     /* msg in master xc_msgbox or xc_waiters */
 112      -#define XC_MSG_RELEASED (5)     /* msg in slave xc_msgbox */
 113      -#define XC_MSG_DONE     (6)     /* msg in master xc_msgbox */
 114   98  
 115   99  /*
      100 + * The default is to not enable collecting counts of IPI information, since
      101 + * the updating of shared cachelines could cause excess bus traffic.
      102 + */
      103 +uint_t xc_collect_enable = 0;
      104 +uint64_t xc_total_cnt = 0;      /* total #IPIs sent for cross calls */
      105 +uint64_t xc_multi_cnt = 0;      /* # times we piggy backed on another IPI */
      106 +
      107 +/*
 116  108   * We allow for one high priority message at a time to happen in the system.
 117  109   * This is used for panic, kmdb, etc., so no locking is done.
 118  110   */
 119  111  static volatile cpuset_t xc_priority_set_store;
 120  112  static volatile ulong_t *xc_priority_set = CPUSET2BV(xc_priority_set_store);
 121  113  static xc_data_t xc_priority_data;
 122  114  
 123  115  /*
 124      - * Wrappers to avoid C compiler warnings due to volatile. The atomic bit
 125      - * operations don't accept volatile bit vectors - which is a bit silly.
 126      - */
 127      -#define XC_BT_SET(vector, b)    BT_ATOMIC_SET((ulong_t *)(vector), (b))
 128      -#define XC_BT_CLEAR(vector, b)  BT_ATOMIC_CLEAR((ulong_t *)(vector), (b))
 129      -
 130      -/*
 131  116   * Decrement a CPU's work count
 132  117   */
 133  118  static void
 134  119  xc_decrement(struct machcpu *mcpu)
 135  120  {
 136  121          atomic_dec_32(&mcpu->xc_work_cnt);
 137  122  }
 138  123  
 139  124  /*
 140  125   * Increment a CPU's work count and return the old value
↓ open down ↓ 45 lines elided ↑ open up ↑
 186  171                  old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
 187  172                  if (old_head == NULL)
 188  173                          return (old_head);
 189  174          } while (atomic_cas_ptr(queue, old_head, old_head->xc_next) !=
 190  175              old_head);
 191  176          old_head->xc_next = NULL;
 192  177          return (old_head);
 193  178  }
 194  179  
 195  180  /*
      181 + * Extract the next message from the CPU's queue, and place the message in
      182 + * .xc_curmsg.  The latter is solely to make debugging (and ::xcall) more
      183 + * useful.
      184 + */
      185 +static xc_msg_t *
      186 +xc_get(void)
      187 +{
      188 +        struct machcpu *mcpup = &CPU->cpu_m;
      189 +        xc_msg_t *msg = xc_extract(&mcpup->xc_msgbox);
      190 +        mcpup->xc_curmsg = msg;
      191 +        return (msg);
      192 +}
      193 +
      194 +/*
 196  195   * Initialize the machcpu fields used for cross calls
 197  196   */
 198  197  static uint_t xc_initialized = 0;
 199  198  
 200  199  void
 201  200  xc_init_cpu(struct cpu *cpup)
 202  201  {
 203  202          xc_msg_t *msg;
 204  203          int c;
 205  204  
↓ open down ↓ 115 lines elided ↑ open up ↑
 321  320          xc_arg_t a2;
 322  321          xc_arg_t a3;
 323  322          uint_t rc = DDI_INTR_UNCLAIMED;
 324  323  
 325  324          while (mcpup->xc_work_cnt != 0) {
 326  325                  rc = DDI_INTR_CLAIMED;
 327  326  
 328  327                  /*
 329  328                   * We may have to wait for a message to arrive.
 330  329                   */
 331      -                for (msg = NULL; msg == NULL;
 332      -                    msg = xc_extract(&mcpup->xc_msgbox)) {
      330 +                for (msg = NULL; msg == NULL; msg = xc_get()) {
 333  331  
 334  332                          /*
 335  333                           * Alway check for and handle a priority message.
 336  334                           */
 337  335                          if (BT_TEST(xc_priority_set, CPU->cpu_id)) {
 338  336                                  func = xc_priority_data.xc_func;
 339  337                                  a1 = xc_priority_data.xc_a1;
 340  338                                  a2 = xc_priority_data.xc_a2;
 341  339                                  a3 = xc_priority_data.xc_a3;
 342      -                                XC_BT_CLEAR(xc_priority_set, CPU->cpu_id);
      340 +                                BT_ATOMIC_CLEAR(xc_priority_set, CPU->cpu_id);
 343  341                                  xc_decrement(mcpup);
 344  342                                  func(a1, a2, a3);
 345  343                                  if (mcpup->xc_work_cnt == 0)
 346  344                                          return (rc);
 347  345                          }
 348  346  
 349  347                          /*
 350  348                           * wait for a message to arrive
 351  349                           */
 352  350                          SMT_PAUSE();
↓ open down ↓ 83 lines elided ↑ open up ↑
 436  434                          break;
 437  435  
 438  436                  case XC_MSG_FREE:
 439  437                          panic("free message 0x%p in msgbox", (void *)msg);
 440  438                          break;
 441  439  
 442  440                  default:
 443  441                          panic("bad message 0x%p in msgbox", (void *)msg);
 444  442                          break;
 445  443                  }
      444 +
      445 +                CPU->cpu_m.xc_curmsg = NULL;
 446  446          }
 447  447          return (rc);
 448  448  }
 449  449  
 450  450  /*
 451  451   * Initiate cross call processing.
 452  452   */
 453  453  static void
 454  454  xc_common(
 455  455          xc_func_t func,
↓ open down ↓ 118 lines elided ↑ open up ↑
 574  574                          SMT_PAUSE();
 575  575  
 576  576                  /*
 577  577                   * Some CPU did not respond to a previous priority request. It's
 578  578                   * probably deadlocked with interrupts blocked or some such
 579  579                   * problem. We'll just erase the previous request - which was
 580  580                   * most likely a kmdb_enter that has already expired - and plow
 581  581                   * ahead.
 582  582                   */
 583  583                  if (BT_TEST(xc_priority_set, c)) {
 584      -                        XC_BT_CLEAR(xc_priority_set, c);
      584 +                        BT_ATOMIC_CLEAR(xc_priority_set, c);
 585  585                          if (cpup->cpu_m.xc_work_cnt > 0)
 586  586                                  xc_decrement(&cpup->cpu_m);
 587  587                  }
 588  588          }
 589  589  
 590  590          /*
 591  591           * fill in cross call data
 592  592           */
 593  593          xc_priority_data.xc_func = func;
 594  594          xc_priority_data.xc_a1 = arg1;
↓ open down ↓ 5 lines elided ↑ open up ↑
 600  600           * We'll always IPI, plus bang on the xc_msgbox for i86_mwait()
 601  601           */
 602  602          for (c = 0; c < max_ncpus; ++c) {
 603  603                  if (!BT_TEST(set, c))
 604  604                          continue;
 605  605                  cpup = cpu[c];
 606  606                  if (cpup == NULL || !(cpup->cpu_flags & CPU_READY) ||
 607  607                      cpup == CPU)
 608  608                          continue;
 609  609                  (void) xc_increment(&cpup->cpu_m);
 610      -                XC_BT_SET(xc_priority_set, c);
      610 +                BT_ATOMIC_SET(xc_priority_set, c);
 611  611                  send_dirint(c, XC_HI_PIL);
 612  612                  for (i = 0; i < 10; ++i) {
 613  613                          (void) atomic_cas_ptr(&cpup->cpu_m.xc_msgbox,
 614  614                              cpup->cpu_m.xc_msgbox, cpup->cpu_m.xc_msgbox);
 615  615                  }
 616  616          }
 617  617  }
 618  618  
 619  619  /*
 620  620   * Do cross call to all other CPUs with absolutely no waiting or handshaking.
↓ open down ↓ 88 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX