2 Wdiff usr/src/uts/i86pc/os/x_call.c

Print this page

11584 ::xcall would be useful
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/i86pc/os/x_call.c
          +++ new/usr/src/uts/i86pc/os/x_call.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]

↓ open down ↓

17 lines elided

↑ open up ↑

  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  /*
  26   26   * Copyright (c) 2010, Intel Corporation.
  27   27   * All rights reserved.
       28 + * Copyright 2018 Joyent, Inc.
  28   29   */
  29   30  
  30   31  #include <sys/types.h>
  31   32  #include <sys/param.h>
  32   33  #include <sys/t_lock.h>
  33   34  #include <sys/thread.h>
  34   35  #include <sys/cpuvar.h>
  35   36  #include <sys/x_call.h>
  36   37  #include <sys/xc_levels.h>
  37   38  #include <sys/cpu.h>

  38   39  #include <sys/psw.h>
  39   40  #include <sys/sunddi.h>
  40   41  #include <sys/debug.h>
  41   42  #include <sys/systm.h>
  42   43  #include <sys/archsystm.h>
  43   44  #include <sys/machsystm.h>
  44   45  #include <sys/mutex_impl.h>
  45   46  #include <sys/stack.h>
  46   47  #include <sys/promif.h>
  47   48  #include <sys/x86_archext.h>
  48   49  
  49   50  /*
  50   51   * Implementation for cross-processor calls via interprocessor interrupts
  51   52   *
  52   53   * This implementation uses a message passing architecture to allow multiple
  53   54   * concurrent cross calls to be in flight at any given time. We use the cmpxchg
  54   55   * instruction, aka atomic_cas_ptr(), to implement simple efficient work
  55   56   * queues for message passing between CPUs with almost no need for regular
  56   57   * locking.  See xc_extract() and xc_insert() below.
  57   58   *
  58   59   * The general idea is that initiating a cross call means putting a message
  59   60   * on a target(s) CPU's work queue. Any synchronization is handled by passing
  60   61   * the message back and forth between initiator and target(s).
  61   62   *
  62   63   * Every CPU has xc_work_cnt, which indicates it has messages to process.
  63   64   * This value is incremented as message traffic is initiated and decremented
  64   65   * with every message that finishes all processing.
  65   66   *
  66   67   * The code needs no mfence or other membar_*() calls. The uses of
  67   68   * atomic_cas_ptr(), atomic_cas_32() and atomic_dec_32() for the message
  68   69   * passing are implemented with LOCK prefix instructions which are
  69   70   * equivalent to mfence.
  70   71   *
  71   72   * One interesting aspect of this implmentation is that it allows 2 or more

↓ open down ↓

34 lines elided

↑ open up ↑

  72   73   * CPUs to initiate cross calls to intersecting sets of CPUs at the same time.
  73   74   * The cross call processing by the CPUs will happen in any order with only
  74   75   * a guarantee, for xc_call() and xc_sync(), that an initiator won't return
  75   76   * from cross calls before all slaves have invoked the function.
  76   77   *
  77   78   * The reason for this asynchronous approach is to allow for fast global
  78   79   * TLB shootdowns. If all CPUs, say N, tried to do a global TLB invalidation
  79   80   * on a different Virtual Address at the same time. The old code required
  80   81   * N squared IPIs. With this method, depending on timing, it could happen
  81   82   * with just N IPIs.
  82      - */
  83      -
  84      -/*
  85      - * The default is to not enable collecting counts of IPI information, since
  86      - * the updating of shared cachelines could cause excess bus traffic.
  87      - */
  88      -uint_t xc_collect_enable = 0;
  89      -uint64_t xc_total_cnt = 0;      /* total #IPIs sent for cross calls */
  90      -uint64_t xc_multi_cnt = 0;      /* # times we piggy backed on another IPI */
  91      -
  92      -/*
  93      - * Values for message states. Here are the normal transitions. A transition
  94      - * of "->" happens in the slave cpu and "=>" happens in the master cpu as
  95      - * the messages are passed back and forth.
  96   83   *
       84 + * Here are the normal transitions for XC_MSG_* values in ->xc_command. A
       85 + * transition of "->" happens in the slave cpu and "=>" happens in the master
       86 + * cpu as the messages are passed back and forth.
       87 + *
  97   88   * FREE => ASYNC ->                       DONE => FREE
  98   89   * FREE => CALL ->                        DONE => FREE
  99   90   * FREE => SYNC -> WAITING => RELEASED -> DONE => FREE
 100   91   *
 101      - * The interesing one above is ASYNC. You might ask, why not go directly
 102      - * to FREE, instead of DONE. If it did that, it might be possible to exhaust
       92 + * The interesting one above is ASYNC. You might ask, why not go directly
       93 + * to FREE, instead of DONE? If it did that, it might be possible to exhaust
 103   94   * the master's xc_free list if a master can generate ASYNC messages faster
 104   95   * then the slave can process them. That could be handled with more complicated
 105   96   * handling. However since nothing important uses ASYNC, I've not bothered.
 106   97   */
 107      -#define XC_MSG_FREE     (0)     /* msg in xc_free queue */
 108      -#define XC_MSG_ASYNC    (1)     /* msg in slave xc_msgbox */
 109      -#define XC_MSG_CALL     (2)     /* msg in slave xc_msgbox */
 110      -#define XC_MSG_SYNC     (3)     /* msg in slave xc_msgbox */
 111      -#define XC_MSG_WAITING  (4)     /* msg in master xc_msgbox or xc_waiters */
 112      -#define XC_MSG_RELEASED (5)     /* msg in slave xc_msgbox */
 113      -#define XC_MSG_DONE     (6)     /* msg in master xc_msgbox */
 114   98  
 115   99  /*
      100 + * The default is to not enable collecting counts of IPI information, since
      101 + * the updating of shared cachelines could cause excess bus traffic.
      102 + */
      103 +uint_t xc_collect_enable = 0;
      104 +uint64_t xc_total_cnt = 0;      /* total #IPIs sent for cross calls */
      105 +uint64_t xc_multi_cnt = 0;      /* # times we piggy backed on another IPI */
      106 +
      107 +/*
 116  108   * We allow for one high priority message at a time to happen in the system.
 117  109   * This is used for panic, kmdb, etc., so no locking is done.
 118  110   */
 119  111  static volatile cpuset_t xc_priority_set_store;
 120  112  static volatile ulong_t *xc_priority_set = CPUSET2BV(xc_priority_set_store);
 121  113  static xc_data_t xc_priority_data;
 122  114  
 123  115  /*
 124      - * Wrappers to avoid C compiler warnings due to volatile. The atomic bit
 125      - * operations don't accept volatile bit vectors - which is a bit silly.
 126      - */
 127      -#define XC_BT_SET(vector, b)    BT_ATOMIC_SET((ulong_t *)(vector), (b))
 128      -#define XC_BT_CLEAR(vector, b)  BT_ATOMIC_CLEAR((ulong_t *)(vector), (b))
 129      -
 130      -/*
 131  116   * Decrement a CPU's work count
 132  117   */
 133  118  static void
 134  119  xc_decrement(struct machcpu *mcpu)
 135  120  {
 136  121          atomic_dec_32(&mcpu->xc_work_cnt);
 137  122  }
 138  123  
 139  124  /*
 140  125   * Increment a CPU's work count and return the old value

 141  126   */
 142  127  static int
 143  128  xc_increment(struct machcpu *mcpu)
 144  129  {
 145  130          int old;
 146  131          do {
 147  132                  old = mcpu->xc_work_cnt;
 148  133          } while (atomic_cas_32(&mcpu->xc_work_cnt, old, old + 1) != old);
 149  134          return (old);
 150  135  }
 151  136  
 152  137  /*
 153  138   * Put a message into a queue. The insertion is atomic no matter
 154  139   * how many different inserts/extracts to the same queue happen.
 155  140   */
 156  141  static void
 157  142  xc_insert(void *queue, xc_msg_t *msg)
 158  143  {
 159  144          xc_msg_t *old_head;
 160  145  
 161  146          /*
 162  147           * FREE messages should only ever be getting inserted into
 163  148           * the xc_master CPUs xc_free queue.
 164  149           */
 165  150          ASSERT(msg->xc_command != XC_MSG_FREE ||
 166  151              cpu[msg->xc_master] == NULL || /* possible only during init */
 167  152              queue == &cpu[msg->xc_master]->cpu_m.xc_free);
 168  153  
 169  154          do {
 170  155                  old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
 171  156                  msg->xc_next = old_head;
 172  157          } while (atomic_cas_ptr(queue, old_head, msg) != old_head);
 173  158  }
 174  159  
 175  160  /*
 176  161   * Extract a message from a queue. The extraction is atomic only
 177  162   * when just one thread does extractions from the queue.
 178  163   * If the queue is empty, NULL is returned.
 179  164   */
 180  165  static xc_msg_t *
 181  166  xc_extract(xc_msg_t **queue)
 182  167  {
 183  168          xc_msg_t *old_head;
 184  169  
 185  170          do {

↓ open down ↓

45 lines elided

↑ open up ↑

 186  171                  old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
 187  172                  if (old_head == NULL)
 188  173                          return (old_head);
 189  174          } while (atomic_cas_ptr(queue, old_head, old_head->xc_next) !=
 190  175              old_head);
 191  176          old_head->xc_next = NULL;
 192  177          return (old_head);
 193  178  }
 194  179  
 195  180  /*
      181 + * Extract the next message from the CPU's queue, and place the message in
      182 + * .xc_curmsg.  The latter is solely to make debugging (and ::xcall) more
      183 + * useful.
      184 + */
      185 +static xc_msg_t *
      186 +xc_get(void)
      187 +{
      188 +        struct machcpu *mcpup = &CPU->cpu_m;
      189 +        xc_msg_t *msg = xc_extract(&mcpup->xc_msgbox);
      190 +        mcpup->xc_curmsg = msg;
      191 +        return (msg);
      192 +}
      193 +
      194 +/*
 196  195   * Initialize the machcpu fields used for cross calls
 197  196   */
 198  197  static uint_t xc_initialized = 0;
 199  198  
 200  199  void
 201  200  xc_init_cpu(struct cpu *cpup)
 202  201  {
 203  202          xc_msg_t *msg;
 204  203          int c;
 205  204

 206  205          /*
 207  206           * Allocate message buffers for the new CPU.
 208  207           */
 209  208          for (c = 0; c < max_ncpus; ++c) {
 210  209                  if (plat_dr_support_cpu()) {
 211  210                          /*
 212  211                           * Allocate a message buffer for every CPU possible
 213  212                           * in system, including our own, and add them to our xc
 214  213                           * message queue.
 215  214                           */
 216  215                          msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
 217  216                          msg->xc_command = XC_MSG_FREE;
 218  217                          msg->xc_master = cpup->cpu_id;
 219  218                          xc_insert(&cpup->cpu_m.xc_free, msg);
 220  219                  } else if (cpu[c] != NULL && cpu[c] != cpup) {
 221  220                          /*
 222  221                           * Add a new message buffer to each existing CPU's free
 223  222                           * list, as well as one for my list for each of them.
 224  223                           * Note: cpu0 is statically inserted into cpu[] array,
 225  224                           * so need to check cpu[c] isn't cpup itself to avoid
 226  225                           * allocating extra message buffers for cpu0.
 227  226                           */
 228  227                          msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
 229  228                          msg->xc_command = XC_MSG_FREE;
 230  229                          msg->xc_master = c;
 231  230                          xc_insert(&cpu[c]->cpu_m.xc_free, msg);
 232  231  
 233  232                          msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
 234  233                          msg->xc_command = XC_MSG_FREE;
 235  234                          msg->xc_master = cpup->cpu_id;
 236  235                          xc_insert(&cpup->cpu_m.xc_free, msg);
 237  236                  }
 238  237          }
 239  238  
 240  239          if (!plat_dr_support_cpu()) {
 241  240                  /*
 242  241                   * Add one for self messages if CPU hotplug is disabled.
 243  242                   */
 244  243                  msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
 245  244                  msg->xc_command = XC_MSG_FREE;
 246  245                  msg->xc_master = cpup->cpu_id;
 247  246                  xc_insert(&cpup->cpu_m.xc_free, msg);
 248  247          }
 249  248  
 250  249          if (!xc_initialized)
 251  250                  xc_initialized = 1;
 252  251  }
 253  252  
 254  253  void
 255  254  xc_fini_cpu(struct cpu *cpup)
 256  255  {
 257  256          xc_msg_t *msg;
 258  257  
 259  258          ASSERT((cpup->cpu_flags & CPU_READY) == 0);
 260  259          ASSERT(cpup->cpu_m.xc_msgbox == NULL);
 261  260          ASSERT(cpup->cpu_m.xc_work_cnt == 0);
 262  261  
 263  262          while ((msg = xc_extract(&cpup->cpu_m.xc_free)) != NULL) {
 264  263                  kmem_free(msg, sizeof (*msg));
 265  264          }
 266  265  }
 267  266  
 268  267  #define XC_FLUSH_MAX_WAITS              1000
 269  268  
 270  269  /* Flush inflight message buffers. */
 271  270  int
 272  271  xc_flush_cpu(struct cpu *cpup)
 273  272  {
 274  273          int i;
 275  274  
 276  275          ASSERT((cpup->cpu_flags & CPU_READY) == 0);
 277  276  
 278  277          /*
 279  278           * Pause all working CPUs, which ensures that there's no CPU in
 280  279           * function xc_common().
 281  280           * This is used to work around a race condition window in xc_common()
 282  281           * between checking CPU_READY flag and increasing working item count.
 283  282           */
 284  283          pause_cpus(cpup, NULL);
 285  284          start_cpus();
 286  285  
 287  286          for (i = 0; i < XC_FLUSH_MAX_WAITS; i++) {
 288  287                  if (cpup->cpu_m.xc_work_cnt == 0) {
 289  288                          break;
 290  289                  }
 291  290                  DELAY(1);
 292  291          }
 293  292          for (; i < XC_FLUSH_MAX_WAITS; i++) {
 294  293                  if (!BT_TEST(xc_priority_set, cpup->cpu_id)) {
 295  294                          break;
 296  295                  }
 297  296                  DELAY(1);
 298  297          }
 299  298  
 300  299          return (i >= XC_FLUSH_MAX_WAITS ? ETIME : 0);
 301  300  }
 302  301  
 303  302  /*
 304  303   * X-call message processing routine. Note that this is used by both
 305  304   * senders and recipients of messages.
 306  305   *
 307  306   * We're protected against changing CPUs by either being in a high-priority
 308  307   * interrupt, having preemption disabled or by having a raised SPL.
 309  308   */
 310  309  /*ARGSUSED*/
 311  310  uint_t
 312  311  xc_serv(caddr_t arg1, caddr_t arg2)
 313  312  {
 314  313          struct machcpu *mcpup = &(CPU->cpu_m);
 315  314          xc_msg_t *msg;
 316  315          xc_data_t *data;
 317  316          xc_msg_t *xc_waiters = NULL;
 318  317          uint32_t num_waiting = 0;
 319  318          xc_func_t func;
 320  319          xc_arg_t a1;

↓ open down ↓

115 lines elided

↑ open up ↑

 321  320          xc_arg_t a2;
 322  321          xc_arg_t a3;
 323  322          uint_t rc = DDI_INTR_UNCLAIMED;
 324  323  
 325  324          while (mcpup->xc_work_cnt != 0) {
 326  325                  rc = DDI_INTR_CLAIMED;
 327  326  
 328  327                  /*
 329  328                   * We may have to wait for a message to arrive.
 330  329                   */
 331      -                for (msg = NULL; msg == NULL;
 332      -                    msg = xc_extract(&mcpup->xc_msgbox)) {
      330 +                for (msg = NULL; msg == NULL; msg = xc_get()) {
 333  331  
 334  332                          /*
 335  333                           * Alway check for and handle a priority message.
 336  334                           */
 337  335                          if (BT_TEST(xc_priority_set, CPU->cpu_id)) {
 338  336                                  func = xc_priority_data.xc_func;
 339  337                                  a1 = xc_priority_data.xc_a1;
 340  338                                  a2 = xc_priority_data.xc_a2;
 341  339                                  a3 = xc_priority_data.xc_a3;
 342      -                                XC_BT_CLEAR(xc_priority_set, CPU->cpu_id);
      340 +                                BT_ATOMIC_CLEAR(xc_priority_set, CPU->cpu_id);
 343  341                                  xc_decrement(mcpup);
 344  342                                  func(a1, a2, a3);
 345  343                                  if (mcpup->xc_work_cnt == 0)
 346  344                                          return (rc);
 347  345                          }
 348  346  
 349  347                          /*
 350  348                           * wait for a message to arrive
 351  349                           */
 352  350                          SMT_PAUSE();

 353  351                  }
 354  352  
 355  353  
 356  354                  /*
 357  355                   * process the message
 358  356                   */
 359  357                  switch (msg->xc_command) {
 360  358  
 361  359                  /*
 362  360                   * ASYNC gives back the message immediately, then we do the
 363  361                   * function and return with no more waiting.
 364  362                   */
 365  363                  case XC_MSG_ASYNC:
 366  364                          data = &cpu[msg->xc_master]->cpu_m.xc_data;
 367  365                          func = data->xc_func;
 368  366                          a1 = data->xc_a1;
 369  367                          a2 = data->xc_a2;
 370  368                          a3 = data->xc_a3;
 371  369                          msg->xc_command = XC_MSG_DONE;
 372  370                          xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
 373  371                          if (func != NULL)
 374  372                                  (void) (*func)(a1, a2, a3);
 375  373                          xc_decrement(mcpup);
 376  374                          break;
 377  375  
 378  376                  /*
 379  377                   * SYNC messages do the call, then send it back to the master
 380  378                   * in WAITING mode
 381  379                   */
 382  380                  case XC_MSG_SYNC:
 383  381                          data = &cpu[msg->xc_master]->cpu_m.xc_data;
 384  382                          if (data->xc_func != NULL)
 385  383                                  (void) (*data->xc_func)(data->xc_a1,
 386  384                                      data->xc_a2, data->xc_a3);
 387  385                          msg->xc_command = XC_MSG_WAITING;
 388  386                          xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
 389  387                          break;
 390  388  
 391  389                  /*
 392  390                   * WAITING messsages are collected by the master until all
 393  391                   * have arrived. Once all arrive, we release them back to
 394  392                   * the slaves
 395  393                   */
 396  394                  case XC_MSG_WAITING:
 397  395                          xc_insert(&xc_waiters, msg);
 398  396                          if (++num_waiting < mcpup->xc_wait_cnt)
 399  397                                  break;
 400  398                          while ((msg = xc_extract(&xc_waiters)) != NULL) {
 401  399                                  msg->xc_command = XC_MSG_RELEASED;
 402  400                                  xc_insert(&cpu[msg->xc_slave]->cpu_m.xc_msgbox,
 403  401                                      msg);
 404  402                                  --num_waiting;
 405  403                          }
 406  404                          if (num_waiting != 0)
 407  405                                  panic("wrong number waiting");
 408  406                          mcpup->xc_wait_cnt = 0;
 409  407                          break;
 410  408  
 411  409                  /*
 412  410                   * CALL messages do the function and then, like RELEASE,
 413  411                   * send the message is back to master as DONE.
 414  412                   */
 415  413                  case XC_MSG_CALL:
 416  414                          data = &cpu[msg->xc_master]->cpu_m.xc_data;
 417  415                          if (data->xc_func != NULL)
 418  416                                  (void) (*data->xc_func)(data->xc_a1,
 419  417                                      data->xc_a2, data->xc_a3);
 420  418                          /*FALLTHROUGH*/
 421  419                  case XC_MSG_RELEASED:
 422  420                          msg->xc_command = XC_MSG_DONE;
 423  421                          xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
 424  422                          xc_decrement(mcpup);
 425  423                          break;
 426  424  
 427  425                  /*
 428  426                   * DONE means a slave has completely finished up.
 429  427                   * Once we collect all the DONE messages, we'll exit
 430  428                   * processing too.
 431  429                   */
 432  430                  case XC_MSG_DONE:
 433  431                          msg->xc_command = XC_MSG_FREE;
 434  432                          xc_insert(&mcpup->xc_free, msg);
 435  433                          xc_decrement(mcpup);

↓ open down ↓

83 lines elided

↑ open up ↑

 436  434                          break;
 437  435  
 438  436                  case XC_MSG_FREE:
 439  437                          panic("free message 0x%p in msgbox", (void *)msg);
 440  438                          break;
 441  439  
 442  440                  default:
 443  441                          panic("bad message 0x%p in msgbox", (void *)msg);
 444  442                          break;
 445  443                  }
      444 +
      445 +                CPU->cpu_m.xc_curmsg = NULL;
 446  446          }
 447  447          return (rc);
 448  448  }
 449  449  
 450  450  /*
 451  451   * Initiate cross call processing.
 452  452   */
 453  453  static void
 454  454  xc_common(
 455  455          xc_func_t func,

 456  456          xc_arg_t arg1,
 457  457          xc_arg_t arg2,
 458  458          xc_arg_t arg3,
 459  459          ulong_t *set,
 460  460          uint_t command)
 461  461  {
 462  462          int c;
 463  463          struct cpu *cpup;
 464  464          xc_msg_t *msg;
 465  465          xc_data_t *data;
 466  466          int cnt;
 467  467          int save_spl;
 468  468  
 469  469          if (!xc_initialized) {
 470  470                  if (BT_TEST(set, CPU->cpu_id) && (CPU->cpu_flags & CPU_READY) &&
 471  471                      func != NULL)
 472  472                          (void) (*func)(arg1, arg2, arg3);
 473  473                  return;
 474  474          }
 475  475  
 476  476          save_spl = splr(ipltospl(XC_HI_PIL));
 477  477  
 478  478          /*
 479  479           * fill in cross call data
 480  480           */
 481  481          data = &CPU->cpu_m.xc_data;
 482  482          data->xc_func = func;
 483  483          data->xc_a1 = arg1;
 484  484          data->xc_a2 = arg2;
 485  485          data->xc_a3 = arg3;
 486  486  
 487  487          /*
 488  488           * Post messages to all CPUs involved that are CPU_READY
 489  489           */
 490  490          CPU->cpu_m.xc_wait_cnt = 0;
 491  491          for (c = 0; c < max_ncpus; ++c) {
 492  492                  if (!BT_TEST(set, c))
 493  493                          continue;
 494  494                  cpup = cpu[c];
 495  495                  if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
 496  496                          continue;
 497  497  
 498  498                  /*
 499  499                   * Fill out a new message.
 500  500                   */
 501  501                  msg = xc_extract(&CPU->cpu_m.xc_free);
 502  502                  if (msg == NULL)
 503  503                          panic("Ran out of free xc_msg_t's");
 504  504                  msg->xc_command = command;
 505  505                  if (msg->xc_master != CPU->cpu_id)
 506  506                          panic("msg %p has wrong xc_master", (void *)msg);
 507  507                  msg->xc_slave = c;
 508  508  
 509  509                  /*
 510  510                   * Increment my work count for all messages that I'll
 511  511                   * transition from DONE to FREE.
 512  512                   * Also remember how many XC_MSG_WAITINGs to look for
 513  513                   */
 514  514                  (void) xc_increment(&CPU->cpu_m);
 515  515                  if (command == XC_MSG_SYNC)
 516  516                          ++CPU->cpu_m.xc_wait_cnt;
 517  517  
 518  518                  /*
 519  519                   * Increment the target CPU work count then insert the message
 520  520                   * in the target msgbox. If I post the first bit of work
 521  521                   * for the target to do, send an IPI to the target CPU.
 522  522                   */
 523  523                  cnt = xc_increment(&cpup->cpu_m);
 524  524                  xc_insert(&cpup->cpu_m.xc_msgbox, msg);
 525  525                  if (cpup != CPU) {
 526  526                          if (cnt == 0) {
 527  527                                  CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
 528  528                                  send_dirint(c, XC_HI_PIL);
 529  529                                  if (xc_collect_enable)
 530  530                                          ++xc_total_cnt;
 531  531                          } else if (xc_collect_enable) {
 532  532                                  ++xc_multi_cnt;
 533  533                          }
 534  534                  }
 535  535          }
 536  536  
 537  537          /*
 538  538           * Now drop into the message handler until all work is done
 539  539           */
 540  540          (void) xc_serv(NULL, NULL);
 541  541          splx(save_spl);
 542  542  }
 543  543  
 544  544  /*
 545  545   * Push out a priority cross call.
 546  546   */
 547  547  static void
 548  548  xc_priority_common(
 549  549          xc_func_t func,
 550  550          xc_arg_t arg1,
 551  551          xc_arg_t arg2,
 552  552          xc_arg_t arg3,
 553  553          ulong_t *set)
 554  554  {
 555  555          int i;
 556  556          int c;
 557  557          struct cpu *cpup;
 558  558  
 559  559          /*
 560  560           * Wait briefly for any previous xc_priority to have finished.
 561  561           */
 562  562          for (c = 0; c < max_ncpus; ++c) {
 563  563                  cpup = cpu[c];
 564  564                  if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
 565  565                          continue;
 566  566  
 567  567                  /*
 568  568                   * The value of 40000 here is from old kernel code. It
 569  569                   * really should be changed to some time based value, since
 570  570                   * under a hypervisor, there's no guarantee a remote CPU
 571  571                   * is even scheduled.
 572  572                   */
 573  573                  for (i = 0; BT_TEST(xc_priority_set, c) && i < 40000; ++i)

↓ open down ↓

118 lines elided

↑ open up ↑

 574  574                          SMT_PAUSE();
 575  575  
 576  576                  /*
 577  577                   * Some CPU did not respond to a previous priority request. It's
 578  578                   * probably deadlocked with interrupts blocked or some such
 579  579                   * problem. We'll just erase the previous request - which was
 580  580                   * most likely a kmdb_enter that has already expired - and plow
 581  581                   * ahead.
 582  582                   */
 583  583                  if (BT_TEST(xc_priority_set, c)) {
 584      -                        XC_BT_CLEAR(xc_priority_set, c);
      584 +                        BT_ATOMIC_CLEAR(xc_priority_set, c);
 585  585                          if (cpup->cpu_m.xc_work_cnt > 0)
 586  586                                  xc_decrement(&cpup->cpu_m);
 587  587                  }
 588  588          }
 589  589  
 590  590          /*
 591  591           * fill in cross call data
 592  592           */
 593  593          xc_priority_data.xc_func = func;
 594  594          xc_priority_data.xc_a1 = arg1;

 595  595          xc_priority_data.xc_a2 = arg2;
 596  596          xc_priority_data.xc_a3 = arg3;
 597  597  
 598  598          /*
 599  599           * Post messages to all CPUs involved that are CPU_READY

↓ open down ↓

5 lines elided

↑ open up ↑

 600  600           * We'll always IPI, plus bang on the xc_msgbox for i86_mwait()
 601  601           */
 602  602          for (c = 0; c < max_ncpus; ++c) {
 603  603                  if (!BT_TEST(set, c))
 604  604                          continue;
 605  605                  cpup = cpu[c];
 606  606                  if (cpup == NULL || !(cpup->cpu_flags & CPU_READY) ||
 607  607                      cpup == CPU)
 608  608                          continue;
 609  609                  (void) xc_increment(&cpup->cpu_m);
 610      -                XC_BT_SET(xc_priority_set, c);
      610 +                BT_ATOMIC_SET(xc_priority_set, c);
 611  611                  send_dirint(c, XC_HI_PIL);
 612  612                  for (i = 0; i < 10; ++i) {
 613  613                          (void) atomic_cas_ptr(&cpup->cpu_m.xc_msgbox,
 614  614                              cpup->cpu_m.xc_msgbox, cpup->cpu_m.xc_msgbox);
 615  615                  }
 616  616          }
 617  617  }
 618  618  
 619  619  /*
 620  620   * Do cross call to all other CPUs with absolutely no waiting or handshaking.

 621  621   * This should only be used for extraordinary operations, like panic(), which
 622  622   * need to work, in some fashion, in a not completely functional system.
 623  623   * All other uses that want minimal waiting should use xc_call_nowait().
 624  624   */
 625  625  void
 626  626  xc_priority(
 627  627          xc_arg_t arg1,
 628  628          xc_arg_t arg2,
 629  629          xc_arg_t arg3,
 630  630          ulong_t *set,
 631  631          xc_func_t func)
 632  632  {
 633  633          extern int IGNORE_KERNEL_PREEMPTION;
 634  634          int save_spl = splr(ipltospl(XC_HI_PIL));
 635  635          int save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
 636  636  
 637  637          IGNORE_KERNEL_PREEMPTION = 1;
 638  638          xc_priority_common((xc_func_t)func, arg1, arg2, arg3, set);
 639  639          IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
 640  640          splx(save_spl);
 641  641  }
 642  642  
 643  643  /*
 644  644   * Wrapper for kmdb to capture other CPUs, causing them to enter the debugger.
 645  645   */
 646  646  void
 647  647  kdi_xc_others(int this_cpu, void (*func)(void))
 648  648  {
 649  649          extern int IGNORE_KERNEL_PREEMPTION;
 650  650          int save_kernel_preemption;
 651  651          cpuset_t set;
 652  652  
 653  653          if (!xc_initialized)
 654  654                  return;
 655  655  
 656  656          save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
 657  657          IGNORE_KERNEL_PREEMPTION = 1;
 658  658          CPUSET_ALL_BUT(set, this_cpu);
 659  659          xc_priority_common((xc_func_t)func, 0, 0, 0, CPUSET2BV(set));
 660  660          IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
 661  661  }
 662  662  
 663  663  
 664  664  
 665  665  /*
 666  666   * Invoke function on specified processors. Remotes may continue after
 667  667   * service with no waiting. xc_call_nowait() may return immediately too.
 668  668   */
 669  669  void
 670  670  xc_call_nowait(
 671  671          xc_arg_t arg1,
 672  672          xc_arg_t arg2,
 673  673          xc_arg_t arg3,
 674  674          ulong_t *set,
 675  675          xc_func_t func)
 676  676  {
 677  677          xc_common(func, arg1, arg2, arg3, set, XC_MSG_ASYNC);
 678  678  }
 679  679  
 680  680  /*
 681  681   * Invoke function on specified processors. Remotes may continue after
 682  682   * service with no waiting. xc_call() returns only after remotes have finished.
 683  683   */
 684  684  void
 685  685  xc_call(
 686  686          xc_arg_t arg1,
 687  687          xc_arg_t arg2,
 688  688          xc_arg_t arg3,
 689  689          ulong_t *set,
 690  690          xc_func_t func)
 691  691  {
 692  692          xc_common(func, arg1, arg2, arg3, set, XC_MSG_CALL);
 693  693  }
 694  694  
 695  695  /*
 696  696   * Invoke function on specified processors. Remotes wait until all have
 697  697   * finished. xc_sync() also waits until all remotes have finished.
 698  698   */
 699  699  void
 700  700  xc_sync(
 701  701          xc_arg_t arg1,
 702  702          xc_arg_t arg2,
 703  703          xc_arg_t arg3,
 704  704          ulong_t *set,
 705  705          xc_func_t func)
 706  706  {
 707  707          xc_common(func, arg1, arg2, arg3, set, XC_MSG_SYNC);
 708  708  }

↓ open down ↓

88 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX